aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2023-11-22 11:43:59 +0000
committerMartin Matuska <mm@FreeBSD.org>2023-11-22 11:43:59 +0000
commitf7f5c2419ea7e66460fe33bc6a5588842fd5312c (patch)
treed4c00729b739df53225287e60dc2569f8b771af9 /sys/contrib
parent0c0d524e5e6cf5deafb24c6be2c3e6461272e055 (diff)
parent55dd24c4ccee2da61d5396289ef560f9b7bc6a68 (diff)
downloadsrc-f7f5c2419ea7e66460fe33bc6a5588842fd5312c.tar.gz
src-f7f5c2419ea7e66460fe33bc6a5588842fd5312c.zip
Diffstat (limited to 'sys/contrib')
-rw-r--r--sys/contrib/openzfs/.gitignore1
-rw-r--r--sys/contrib/openzfs/META4
-rwxr-xr-xsys/contrib/openzfs/cmd/arc_summary2
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c96
-rw-r--r--sys/contrib/openzfs/cmd/zpool/compatibility.d/grub29
-rw-r--r--sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.11
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_iter.c33
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_util.h4
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_vdev.c43
-rw-r--r--sys/contrib/openzfs/config/Rules.am1
-rw-r--r--sys/contrib/openzfs/config/kernel-fsync-bdev.m436
-rw-r--r--sys/contrib/openzfs/config/kernel-generic_fillattr.m439
-rw-r--r--sys/contrib/openzfs/config/kernel-inode-times.m443
-rw-r--r--sys/contrib/openzfs/config/kernel.m42
-rw-r--r--sys/contrib/openzfs/configure.ac1
-rw-r--r--sys/contrib/openzfs/contrib/debian/changelog.in (renamed from sys/contrib/openzfs/contrib/debian/changelog)6
-rw-r--r--sys/contrib/openzfs/contrib/debian/control1
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install2
-rw-r--r--sys/contrib/openzfs/include/libzfs.h9
-rw-r--r--sys/contrib/openzfs/include/libzutil.h2
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/mutex.h1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h18
-rw-r--r--sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h6
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h21
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/uio.h8
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h2
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h5
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h13
-rw-r--r--sys/contrib/openzfs/include/sys/arc.h2
-rw-r--r--sys/contrib/openzfs/include/sys/arc_impl.h1
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h2
-rw-r--r--sys/contrib/openzfs/include/sys/txg_impl.h3
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_impl.h5
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz_impl.h4
-rw-r--r--sys/contrib/openzfs/include/sys/zfs_context.h2
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi4
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_util.c193
-rw-r--r--sys/contrib/openzfs/lib/libzpool/kernel.c9
-rw-r--r--sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c5
-rw-r--r--sys/contrib/openzfs/man/Makefile.am1
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.464
-rw-r--r--sys/contrib/openzfs/man/man7/zpool-features.79
-rw-r--r--sys/contrib/openzfs/man/man8/.gitignore1
-rw-r--r--sys/contrib/openzfs/man/man8/zfs_prepare_disk.8.in70
-rw-r--r--sys/contrib/openzfs/module/Kbuild.in4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c76
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c62
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c34
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/arc_os.c51
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c29
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c29
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c18
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c11
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c5
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c6
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c2
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c7
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c18
-rw-r--r--sys/contrib/openzfs/module/zfs/arc.c98
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_tx.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_pool.c14
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_config.c17
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_queue.c7
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c5
-rw-r--r--sys/contrib/openzfs/module/zfs/zil.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c89
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in90
-rw-r--r--sys/contrib/openzfs/scripts/Makefile.am2
-rwxr-xr-xsys/contrib/openzfs/scripts/zfs_prepare_disk17
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run8
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg9
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib21
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg3
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh10
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg4
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh41
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh192
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh6
85 files changed, 1370 insertions, 426 deletions
diff --git a/sys/contrib/openzfs/.gitignore b/sys/contrib/openzfs/.gitignore
index 47d17ae16d34..a2cb92dd5406 100644
--- a/sys/contrib/openzfs/.gitignore
+++ b/sys/contrib/openzfs/.gitignore
@@ -83,6 +83,7 @@
modules.order
Makefile
Makefile.in
+changelog
*.patch
*.orig
*.tmp
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
index 0d7df10d47db..5868838a26df 100644
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -1,10 +1,10 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 2.2.0
+Version: 2.2.1
Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
-Linux-Maximum: 6.5
+Linux-Maximum: 6.6
Linux-Minimum: 3.10
diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/arc_summary
index 426e0207052d..9c69ec4f8ccc 100755
--- a/sys/contrib/openzfs/cmd/arc_summary
+++ b/sys/contrib/openzfs/cmd/arc_summary
@@ -711,7 +711,7 @@ def section_archits(kstats_dict):
pd_total = int(arc_stats['prefetch_data_hits']) +\
int(arc_stats['prefetch_data_iohits']) +\
int(arc_stats['prefetch_data_misses'])
- prt_2('ARC prefetch metadata accesses:', f_perc(pd_total, all_accesses),
+ prt_2('ARC prefetch data accesses:', f_perc(pd_total, all_accesses),
f_hits(pd_total))
pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']),
('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']),
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
index 2f040ff7582c..9636c99fc85f 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
@@ -24,6 +24,7 @@
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2016, 2017, Intel Corporation.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2023, Klara Inc.
*/
/*
@@ -147,6 +148,17 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
}
/*
+ * Write an array of strings to the zed log
+ */
+static void lines_to_zed_log_msg(char **lines, int lines_cnt)
+{
+ int i;
+ for (i = 0; i < lines_cnt; i++) {
+ zed_log_msg(LOG_INFO, "%s", lines[i]);
+ }
+}
+
+/*
* Two stage replace on Linux
* since we get disk notifications
* we can wait for partitioned disk slice to show up!
@@ -193,14 +205,21 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
uint64_t is_spare = 0;
const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
char rawpath[PATH_MAX], fullpath[PATH_MAX];
- char devpath[PATH_MAX];
+ char pathbuf[PATH_MAX];
int ret;
int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
boolean_t is_sd = B_FALSE;
boolean_t is_mpath_wholedisk = B_FALSE;
uint_t c;
vdev_stat_t *vs;
+ char **lines = NULL;
+ int lines_cnt = 0;
+ /*
+ * Get the persistent path, typically under the '/dev/disk/by-id' or
+ * '/dev/disk/by-vdev' directories. Note that this path can change
+ * when a vdev is replaced with a new disk.
+ */
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
return;
@@ -357,15 +376,17 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
- if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
+ if (realpath(rawpath, pathbuf) == NULL && !is_mpath_wholedisk) {
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
rawpath, strerror(errno));
- (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
- &newstate);
+ int err = zpool_vdev_online(zhp, fullpath,
+ ZFS_ONLINE_FORCEFAULT, &newstate);
- zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)",
- fullpath, libzfs_error_description(g_zfshdl));
+ zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s) "
+ "err %d, new state %d",
+ fullpath, libzfs_error_description(g_zfshdl), err,
+ err ? (int)newstate : 0);
return;
}
@@ -383,6 +404,22 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
if (is_mpath_wholedisk) {
/* Don't label device mapper or multipath disks. */
+ zed_log_msg(LOG_INFO,
+ " it's a multipath wholedisk, don't label");
+ if (zpool_prepare_disk(zhp, vdev, "autoreplace", &lines,
+ &lines_cnt) != 0) {
+ zed_log_msg(LOG_INFO,
+ " zpool_prepare_disk: could not "
+ "prepare '%s' (%s)", fullpath,
+ libzfs_error_description(g_zfshdl));
+ if (lines_cnt > 0) {
+ zed_log_msg(LOG_INFO,
+ " zfs_prepare_disk output:");
+ lines_to_zed_log_msg(lines, lines_cnt);
+ }
+ libzfs_free_str_array(lines, lines_cnt);
+ return;
+ }
} else if (!labeled) {
/*
* we're auto-replacing a raw disk, so label it first
@@ -399,16 +436,24 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* to trigger a ZFS fault for the device (and any hot spare
* replacement).
*/
- leafname = strrchr(devpath, '/') + 1;
+ leafname = strrchr(pathbuf, '/') + 1;
/*
* If this is a request to label a whole disk, then attempt to
* write out the label.
*/
- if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) {
- zed_log_msg(LOG_INFO, " zpool_label_disk: could not "
+ if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname,
+ vdev, "autoreplace", &lines, &lines_cnt) != 0) {
+ zed_log_msg(LOG_WARNING,
+ " zpool_prepare_and_label_disk: could not "
"label '%s' (%s)", leafname,
libzfs_error_description(g_zfshdl));
+ if (lines_cnt > 0) {
+ zed_log_msg(LOG_INFO,
+ " zfs_prepare_disk output:");
+ lines_to_zed_log_msg(lines, lines_cnt);
+ }
+ libzfs_free_str_array(lines, lines_cnt);
(void) zpool_vdev_online(zhp, fullpath,
ZFS_ONLINE_FORCEFAULT, &newstate);
@@ -431,7 +476,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
sizeof (device->pd_physpath));
list_insert_tail(&g_device_list, device);
- zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)",
+ zed_log_msg(LOG_NOTICE, " zpool_label_disk: async '%s' (%llu)",
leafname, (u_longlong_t)guid);
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
@@ -454,8 +499,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
}
if (!found) {
/* unexpected partition slice encountered */
- zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
- fullpath);
+ zed_log_msg(LOG_WARNING, "labeled disk %s was "
+ "unexpected here", fullpath);
(void) zpool_vdev_online(zhp, fullpath,
ZFS_ONLINE_FORCEFAULT, &newstate);
return;
@@ -464,10 +509,21 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
physpath, (u_longlong_t)guid);
- (void) snprintf(devpath, sizeof (devpath), "%s%s",
- DEV_BYID_PATH, new_devid);
+ /*
+ * Paths that begin with '/dev/disk/by-id/' will change and so
+ * they must be updated before calling zpool_vdev_attach().
+ */
+ if (strncmp(path, DEV_BYID_PATH, strlen(DEV_BYID_PATH)) == 0) {
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s%s",
+ DEV_BYID_PATH, new_devid);
+ zed_log_msg(LOG_INFO, " zpool_label_disk: path '%s' "
+ "replaced by '%s'", path, pathbuf);
+ path = pathbuf;
+ }
}
+ libzfs_free_str_array(lines, lines_cnt);
+
/*
* Construct the root vdev to pass to zpool_vdev_attach(). While adding
* the entire vdev structure is harmless, we construct a reduced set of
@@ -506,9 +562,11 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* Wait for udev to verify the links exist, then auto-replace
* the leaf disk at same physical location.
*/
- if (zpool_label_disk_wait(path, 3000) != 0) {
- zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
- "disk %s is missing", path);
+ if (zpool_label_disk_wait(path, DISK_LABEL_WAIT) != 0) {
+ zed_log_msg(LOG_WARNING, "zfs_mod: pool '%s', after labeling "
+ "replacement disk, the expected disk partition link '%s' "
+ "is missing after waiting %u ms",
+ zpool_get_name(zhp), path, DISK_LABEL_WAIT);
nvlist_free(nvroot);
return;
}
@@ -523,7 +581,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
B_TRUE, B_FALSE);
}
- zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
+ zed_log_msg(LOG_WARNING, " zpool_vdev_replace: %s with %s (%s)",
fullpath, path, (ret == 0) ? "no errors" :
libzfs_error_description(g_zfshdl));
@@ -621,7 +679,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
dp->dd_prop, path);
dp->dd_found = B_TRUE;
- /* pass the new devid for use by replacing code */
+ /* pass the new devid for use by auto-replacing code */
if (dp->dd_new_devid != NULL) {
(void) nvlist_add_string(nvl, "new_devid",
dp->dd_new_devid);
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
index fec73a269a78..6d60e643593b 100644
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
@@ -1,6 +1,9 @@
# Features which are supported by GRUB2
+allocation_classes
async_destroy
+block_cloning
bookmarks
+device_rebuild
embedded_data
empty_bpobj
enabled_txg
@@ -9,6 +12,12 @@ filesystem_limits
hole_birth
large_blocks
livelist
+log_spacemap
lz4_compress
+project_quota
+resilver_defer
spacemap_histogram
+spacemap_v2
+userobj_accounting
+zilsaxattr
zpool_checkpoint
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1
index 162ff32a7803..125c578344f9 100644
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfsonosx-1.8.1
@@ -6,7 +6,6 @@ edonr
embedded_data
empty_bpobj
enabled_txg
-encryption
extensible_dataset
filesystem_limits
hole_birth
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c
index 7c6549b0ae54..506b529dce48 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c
@@ -443,37 +443,22 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd)
{
int rc;
char *argv[2] = {cmd};
- char *env[5] = {(char *)"PATH=/bin:/sbin:/usr/bin:/usr/sbin"};
+ char **env;
char **lines = NULL;
int lines_cnt = 0;
int i;
- /* Setup our custom environment variables */
- rc = asprintf(&env[1], "VDEV_PATH=%s",
- data->path ? data->path : "");
- if (rc == -1) {
- env[1] = NULL;
+ env = zpool_vdev_script_alloc_env(data->pool, data->path, data->upath,
+ data->vdev_enc_sysfs_path, NULL, NULL);
+ if (env == NULL)
goto out;
- }
-
- rc = asprintf(&env[2], "VDEV_UPATH=%s",
- data->upath ? data->upath : "");
- if (rc == -1) {
- env[2] = NULL;
- goto out;
- }
-
- rc = asprintf(&env[3], "VDEV_ENC_SYSFS_PATH=%s",
- data->vdev_enc_sysfs_path ?
- data->vdev_enc_sysfs_path : "");
- if (rc == -1) {
- env[3] = NULL;
- goto out;
- }
/* Run the command */
rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines,
&lines_cnt);
+
+ zpool_vdev_script_free_env(env);
+
if (rc != 0)
goto out;
@@ -485,10 +470,6 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd)
out:
if (lines != NULL)
libzfs_free_str_array(lines, lines_cnt);
-
- /* Start with i = 1 since env[0] was statically allocated */
- for (i = 1; i < ARRAY_SIZE(env); i++)
- free(env[i]);
}
/*
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_util.h b/sys/contrib/openzfs/cmd/zpool/zpool_util.h
index b35dea0cd449..db8e631dc6be 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_util.h
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_util.h
@@ -126,6 +126,10 @@ vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv,
void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl);
+void free_vdev_cmd_data(vdev_cmd_data_t *data);
+
+int vdev_run_cmd_simple(char *path, char *cmd);
+
int check_device(const char *path, boolean_t force,
boolean_t isspare, boolean_t iswholedisk);
boolean_t check_sector_size_database(char *path, int *sector_size);
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
index 99a521aa2a28..3d0fc089c32f 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
@@ -936,6 +936,15 @@ zero_label(const char *path)
return (0);
}
+static void
+lines_to_stderr(char *lines[], int lines_cnt)
+{
+ int i;
+ for (i = 0; i < lines_cnt; i++) {
+ fprintf(stderr, "%s\n", lines[i]);
+ }
+}
+
/*
* Go through and find any whole disks in the vdev specification, labelling them
* as appropriate. When constructing the vdev spec, we were unable to open this
@@ -947,7 +956,7 @@ zero_label(const char *path)
* need to get the devid after we label the disk.
*/
static int
-make_disks(zpool_handle_t *zhp, nvlist_t *nv)
+make_disks(zpool_handle_t *zhp, nvlist_t *nv, boolean_t replacing)
{
nvlist_t **child;
uint_t c, children;
@@ -1032,6 +1041,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
*/
if (!is_exclusive && !is_spare(NULL, udevpath)) {
char *devnode = strrchr(devpath, '/') + 1;
+ char **lines = NULL;
+ int lines_cnt = 0;
ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT));
if (ret == 0) {
@@ -1043,9 +1054,27 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
/*
* When labeling a pool the raw device node name
* is provided as it appears under /dev/.
+ *
+ * Note that 'zhp' will be NULL when we're creating a
+ * pool.
*/
- if (zpool_label_disk(g_zfs, zhp, devnode) == -1)
+ if (zpool_prepare_and_label_disk(g_zfs, zhp, devnode,
+ nv, zhp == NULL ? "create" :
+ replacing ? "replace" : "add", &lines,
+ &lines_cnt) != 0) {
+ (void) fprintf(stderr,
+ gettext(
+ "Error preparing/labeling disk.\n"));
+ if (lines_cnt > 0) {
+ (void) fprintf(stderr,
+ gettext("zfs_prepare_disk output:\n"));
+ lines_to_stderr(lines, lines_cnt);
+ }
+
+ libzfs_free_str_array(lines, lines_cnt);
return (-1);
+ }
+ libzfs_free_str_array(lines, lines_cnt);
/*
* Wait for udev to signal the device is available
@@ -1082,19 +1111,19 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
}
for (c = 0; c < children; c++)
- if ((ret = make_disks(zhp, child[c])) != 0)
+ if ((ret = make_disks(zhp, child[c], replacing)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
- if ((ret = make_disks(zhp, child[c])) != 0)
+ if ((ret = make_disks(zhp, child[c], replacing)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
- if ((ret = make_disks(zhp, child[c])) != 0)
+ if ((ret = make_disks(zhp, child[c], replacing)) != 0)
return (ret);
return (0);
@@ -1752,7 +1781,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
return (NULL);
}
- if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
+ if (!flags.dryrun && make_disks(zhp, newroot, B_FALSE) != 0) {
nvlist_free(newroot);
return (NULL);
}
@@ -1873,7 +1902,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
/*
* Run through the vdev specification and label any whole disks found.
*/
- if (!dryrun && make_disks(zhp, newroot) != 0) {
+ if (!dryrun && make_disks(zhp, newroot, replacing) != 0) {
nvlist_free(newroot);
return (NULL);
}
diff --git a/sys/contrib/openzfs/config/Rules.am b/sys/contrib/openzfs/config/Rules.am
index abb4ced33233..7c266964f3f3 100644
--- a/sys/contrib/openzfs/config/Rules.am
+++ b/sys/contrib/openzfs/config/Rules.am
@@ -33,6 +33,7 @@ AM_CPPFLAGS += -D_REENTRANT
AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
+AM_CPPFLAGS += -DZFSEXECDIR=\"$(zfsexecdir)\"
AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\"
diff --git a/sys/contrib/openzfs/config/kernel-fsync-bdev.m4 b/sys/contrib/openzfs/config/kernel-fsync-bdev.m4
new file mode 100644
index 000000000000..c47e236f705f
--- /dev/null
+++ b/sys/contrib/openzfs/config/kernel-fsync-bdev.m4
@@ -0,0 +1,36 @@
+dnl #
+dnl # 6.6 API change,
+dnl # fsync_bdev was removed in favor of sync_blockdev
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYNC_BDEV], [
+ ZFS_LINUX_TEST_SRC([fsync_bdev], [
+ #include <linux/blkdev.h>
+ ],[
+ fsync_bdev(NULL);
+ ])
+
+ ZFS_LINUX_TEST_SRC([sync_blockdev], [
+ #include <linux/blkdev.h>
+ ],[
+ sync_blockdev(NULL);
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYNC_BDEV], [
+ AC_MSG_CHECKING([whether fsync_bdev() exists])
+ ZFS_LINUX_TEST_RESULT([fsync_bdev], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_FSYNC_BDEV, 1,
+ [fsync_bdev() is declared in include/blkdev.h])
+ ],[
+ AC_MSG_CHECKING([whether sync_blockdev() exists])
+ ZFS_LINUX_TEST_RESULT([sync_blockdev], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_SYNC_BLOCKDEV, 1,
+ [sync_blockdev() is declared in include/blkdev.h])
+ ],[
+ ZFS_LINUX_TEST_ERROR(
+ [neither fsync_bdev() nor sync_blockdev() exist])
+ ])
+ ])
+])
diff --git a/sys/contrib/openzfs/config/kernel-generic_fillattr.m4 b/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
index 02dee4d4c000..f5323f0dcb9f 100644
--- a/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
+++ b/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
@@ -7,6 +7,10 @@ dnl #
dnl # 6.3 API
dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
dnl #
+dnl # 6.6 API
+dnl # generic_fillattr() now takes u32 as second argument, representing a
+dnl # request_mask for statx
+dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
#include <linux/fs.h>
@@ -25,22 +29,39 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
struct kstat *k = NULL;
generic_fillattr(idmap, in, k);
])
+
+ ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap_reqmask], [
+ #include <linux/fs.h>
+ ],[
+ struct mnt_idmap *idmap = NULL;
+ struct inode *in = NULL;
+ struct kstat *k = NULL;
+ generic_fillattr(idmap, 0, in, k);
+ ])
])
AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
- AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
- ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
+ AC_MSG_CHECKING(
+ [whether generic_fillattr requires struct mnt_idmap* and request_mask])
+ ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap_reqmask], [
AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
- [generic_fillattr requires struct mnt_idmap*])
+ AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK, 1,
+ [generic_fillattr requires struct mnt_idmap* and u32 request_mask])
],[
- AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
- ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
+ AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
+ ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
- [generic_fillattr requires struct user_namespace*])
+ AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
+ [generic_fillattr requires struct mnt_idmap*])
],[
- AC_MSG_RESULT([no])
+ AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
+ ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
+ [generic_fillattr requires struct user_namespace*])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
])
])
])
diff --git a/sys/contrib/openzfs/config/kernel-inode-times.m4 b/sys/contrib/openzfs/config/kernel-inode-times.m4
index 9c016c790081..412e13b47df5 100644
--- a/sys/contrib/openzfs/config/kernel-inode-times.m4
+++ b/sys/contrib/openzfs/config/kernel-inode-times.m4
@@ -27,6 +27,31 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
memset(&ip, 0, sizeof(ip));
ts = ip.i_mtime;
])
+
+ dnl #
+ dnl # 6.6 API change
+ dnl # i_ctime no longer directly accessible, must use
+ dnl # inode_get_ctime(ip), inode_set_ctime*(ip) to
+ dnl # read/write.
+ dnl #
+ ZFS_LINUX_TEST_SRC([inode_get_ctime], [
+ #include <linux/fs.h>
+ ],[
+ struct inode ip;
+
+ memset(&ip, 0, sizeof(ip));
+ inode_get_ctime(&ip);
+ ])
+
+ ZFS_LINUX_TEST_SRC([inode_set_ctime_to_ts], [
+ #include <linux/fs.h>
+ ],[
+ struct inode ip;
+ struct timespec64 ts;
+
+ memset(&ip, 0, sizeof(ip));
+ inode_set_ctime_to_ts(&ip, ts);
+ ])
])
AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
@@ -47,4 +72,22 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1,
[inode->i_*time's are timespec64])
])
+
+ AC_MSG_CHECKING([whether inode_get_ctime() exists])
+ ZFS_LINUX_TEST_RESULT([inode_get_ctime], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_INODE_GET_CTIME, 1,
+ [inode_get_ctime() exists in linux/fs.h])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+
+ AC_MSG_CHECKING([whether inode_set_ctime_to_ts() exists])
+ ZFS_LINUX_TEST_RESULT([inode_set_ctime_to_ts], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_INODE_SET_CTIME_TO_TS, 1,
+ [inode_set_ctime_to_ts() exists in linux/fs.h])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index df194ec72207..056517a841f2 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -162,6 +162,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_RECLAIMED
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
+ ZFS_AC_KERNEL_SRC_SYNC_BDEV
case "$host_cpu" in
powerpc*)
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -303,6 +304,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_RECLAIMED
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_COPY_SPLICE_READ
+ ZFS_AC_KERNEL_SYNC_BDEV
case "$host_cpu" in
powerpc*)
ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac
index 4c75616e4299..f31fe1db81e4 100644
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@@ -67,6 +67,7 @@ ZFS_AC_DEBUG_INVARIANTS
AC_CONFIG_FILES([
contrib/debian/rules
+ contrib/debian/changelog
Makefile
include/Makefile
lib/libzfs/libzfs.pc
diff --git a/sys/contrib/openzfs/contrib/debian/changelog b/sys/contrib/openzfs/contrib/debian/changelog.in
index ba42ea59fa8d..525519a73d08 100644
--- a/sys/contrib/openzfs/contrib/debian/changelog
+++ b/sys/contrib/openzfs/contrib/debian/changelog.in
@@ -1,3 +1,9 @@
+openzfs-linux (@VERSION@-1) unstable; urgency=low
+
+ * OpenZFS @VERSION@ is tagged.
+
+ -- Umer Saleem <usaleem@ixsystems.com> Wed, 15 Nov 2023 15:00:00 +0500
+
openzfs-linux (2.2.0-0) unstable; urgency=low
* OpenZFS 2.2.0 is tagged.
diff --git a/sys/contrib/openzfs/contrib/debian/control b/sys/contrib/openzfs/contrib/debian/control
index f4e97fe16145..98beb900d0fa 100644
--- a/sys/contrib/openzfs/contrib/debian/control
+++ b/sys/contrib/openzfs/contrib/debian/control
@@ -197,7 +197,6 @@ Recommends: openzfs-zfs-zed, openzfs-zfsutils (>= ${source:Version}), ${linux:Re
Suggests: debhelper
Breaks: spl-dkms (<< 0.8.0~rc1)
Replaces: spl-dkms, zfs-dkms
-Conflicts: zfs-dkms
Provides: openzfs-zfs-modules
Description: OpenZFS filesystem kernel modules for Linux
OpenZFS is a storage platform that encompasses the functionality of
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
index fa05401bc168..741014398ade 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
@@ -34,6 +34,7 @@ usr/bin/zvol_wait
usr/lib/modules-load.d/ lib/
usr/lib/zfs-linux/zpool.d/
usr/lib/zfs-linux/zpool_influxdb
+usr/lib/zfs-linux/zfs_prepare_disk
usr/sbin/arc_summary
usr/sbin/arcstat
usr/sbin/dbufstat
@@ -87,6 +88,7 @@ usr/share/man/man8/zfs-wait.8
usr/share/man/man8/zfs-zone.8
usr/share/man/man8/zfs.8
usr/share/man/man8/zfs_ids_to_path.8
+usr/share/man/man8/zfs_prepare_disk.8
usr/share/man/man7/zfsconcepts.7
usr/share/man/man7/zfsprops.7
usr/share/man/man8/zgenhostid.8
diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h
index 6c3669273786..4adfa38e87be 100644
--- a/sys/contrib/openzfs/include/libzfs.h
+++ b/sys/contrib/openzfs/include/libzfs.h
@@ -326,6 +326,15 @@ _LIBZFS_H nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
boolean_t *, boolean_t *, boolean_t *);
_LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *,
const char *);
+_LIBZFS_H int zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv,
+ const char *prepare_str, char **lines[], int *lines_cnt);
+_LIBZFS_H int zpool_prepare_and_label_disk(libzfs_handle_t *hdl,
+ zpool_handle_t *, const char *, nvlist_t *vdev_nv, const char *prepare_str,
+ char **lines[], int *lines_cnt);
+_LIBZFS_H char ** zpool_vdev_script_alloc_env(const char *pool_name,
+ const char *vdev_path, const char *vdev_upath,
+ const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val);
+_LIBZFS_H void zpool_vdev_script_free_env(char **env);
_LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp,
const char *path);
diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h
index 237ff976ba62..053b1ed4b52a 100644
--- a/sys/contrib/openzfs/include/libzutil.h
+++ b/sys/contrib/openzfs/include/libzutil.h
@@ -34,7 +34,7 @@ extern "C" {
#endif
/*
- * Default wait time for a device name to be created.
+ * Default wait time in milliseconds for a device name to be created.
*/
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mutex.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mutex.h
index e757d12c1502..8cfe56c75309 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mutex.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mutex.h
@@ -64,6 +64,7 @@ typedef enum {
} while (0)
#define mutex_destroy(lock) sx_destroy(lock)
#define mutex_enter(lock) sx_xlock(lock)
+#define mutex_enter_interruptible(lock) sx_xlock_sig(lock)
#define mutex_enter_nested(lock, type) sx_xlock(lock)
#define mutex_tryenter(lock) sx_try_xlock(lock)
#define mutex_exit(lock) sx_xunlock(lock)
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h
index 30579b391711..b23a939b3aa7 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/taskq.h
@@ -30,9 +30,9 @@
#include <sys/types.h>
#include <sys/proc.h>
+#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/thread.h>
-#include <sys/ck.h>
#ifdef __cplusplus
extern "C" {
@@ -48,16 +48,16 @@ typedef uintptr_t taskqid_t;
typedef void (task_func_t)(void *);
typedef struct taskq_ent {
- struct task tqent_task;
- struct timeout_task tqent_timeout_task;
+ union {
+ struct task tqent_task;
+ struct timeout_task tqent_timeout_task;
+ };
task_func_t *tqent_func;
void *tqent_arg;
- taskqid_t tqent_id;
- CK_LIST_ENTRY(taskq_ent) tqent_hash;
- uint8_t tqent_type;
- uint8_t tqent_registered;
- uint8_t tqent_cancelled;
- volatile uint32_t tqent_rc;
+ taskqid_t tqent_id;
+ LIST_ENTRY(taskq_ent) tqent_hash;
+ uint_t tqent_type;
+ volatile uint_t tqent_rc;
} taskq_ent_t;
/*
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
index e156ed41c28c..aea8bd5ed22c 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
@@ -461,10 +461,16 @@ zpl_is_32bit_api(void)
* 6.3 API change
* generic_fillattr() first arg is changed to struct mnt_idmap *
*
+ * 6.6 API change
+ * generic_fillattr() gets new second arg request_mask, a u32 type
+ *
*/
#ifdef HAVE_GENERIC_FILLATTR_IDMAP
#define zpl_generic_fillattr(idmap, ip, sp) \
generic_fillattr(idmap, ip, sp)
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+#define zpl_generic_fillattr(idmap, rqm, ip, sp) \
+ generic_fillattr(idmap, rqm, ip, sp)
#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
#define zpl_generic_fillattr(user_ns, ip, sp) \
generic_fillattr(user_ns, ip, sp)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h
index 20eeadc46e10..82d50b6034c4 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h
@@ -108,7 +108,7 @@ typedef struct spl_kmem_magazine {
uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */
unsigned int skm_cpu; /* Owned by cpu */
- void *skm_objs[0]; /* Object pointers */
+ void *skm_objs[]; /* Object pointers */
} spl_kmem_magazine_t;
typedef struct spl_kmem_obj {
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h
index 6b61c59c48e2..b4eaa0266d20 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/mutex.h
@@ -128,7 +128,6 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
#define NESTED_SINGLE 1
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define mutex_enter_nested(mp, subclass) \
{ \
ASSERT3P(mutex_owner(mp), !=, current); \
@@ -137,16 +136,22 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \
}
-#else /* CONFIG_DEBUG_LOCK_ALLOC */
-#define mutex_enter_nested(mp, subclass) \
-{ \
+
+#define mutex_enter_interruptible(mp) \
+/* CSTYLED */ \
+({ \
+ int _rc_; \
+ \
ASSERT3P(mutex_owner(mp), !=, current); \
spl_mutex_lockdep_off_maybe(mp); \
- mutex_lock(MUTEX(mp)); \
+ _rc_ = mutex_lock_interruptible(MUTEX(mp)); \
spl_mutex_lockdep_on_maybe(mp); \
- spl_mutex_set_owner(mp); \
-}
-#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+ if (!_rc_) { \
+ spl_mutex_set_owner(mp); \
+ } \
+ \
+ _rc_; \
+})
#define mutex_enter(mp) mutex_enter_nested((mp), 0)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
index cce097e16fbc..a4b600004c9f 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
@@ -73,13 +73,6 @@ typedef struct zfs_uio {
size_t uio_skip;
struct request *rq;
-
- /*
- * Used for saving rq_for_each_segment() state between calls
- * to zfs_uiomove_bvec_rq().
- */
- struct req_iterator iter;
- struct bio_vec bv;
} zfs_uio_t;
@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
} else {
uio->uio_bvec = NULL;
uio->uio_iovcnt = 0;
- memset(&uio->iter, 0, sizeof (uio->iter));
}
uio->uio_loffset = io_offset(bio, rq);
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
index b4d5db21f5e5..220466550258 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -45,6 +45,8 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t;
struct znode;
+extern int zfs_bclone_enabled;
+
/*
* This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h
index 7a1db7deeec8..830c76e5743a 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h
@@ -56,7 +56,12 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
cred_t *cr, int flags);
extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+extern int zfs_getattr_fast(zidmap_t *, u32 request_mask, struct inode *ip,
+ struct kstat *sp);
+#else
extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
+#endif
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
zidmap_t *mnt_ns);
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
index 0bd20f64897d..9b729be6d74d 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
@@ -60,7 +60,7 @@ extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;
/* zpl_super.c */
-extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
+extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);
extern const struct super_operations zpl_super_operations;
extern const struct export_operations zpl_export_operations;
@@ -263,4 +263,15 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia)
#endif
+#ifdef HAVE_INODE_GET_CTIME
+#define zpl_inode_get_ctime(ip) inode_get_ctime(ip)
+#else
+#define zpl_inode_get_ctime(ip) (ip->i_ctime)
+#endif
+#ifdef HAVE_INODE_SET_CTIME_TO_TS
+#define zpl_inode_set_ctime_to_ts(ip, ts) inode_set_ctime_to_ts(ip, ts)
+#else
+#define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts)
+#endif
+
#endif /* _SYS_ZPL_H */
diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h
index 9d67dab06ca3..05307aab99e3 100644
--- a/sys/contrib/openzfs/include/sys/arc.h
+++ b/sys/contrib/openzfs/include/sys/arc.h
@@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t;
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
const blkptr_t *bp, arc_buf_t *buf, void *priv);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
-typedef void arc_prune_func_t(int64_t bytes, void *priv);
+typedef void arc_prune_func_t(uint64_t bytes, void *priv);
/* Shared module parameters */
extern uint_t zfs_arc_average_blocksize;
diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
index adff42c55d05..defebe3b2fbb 100644
--- a/sys/contrib/openzfs/include/sys/arc_impl.h
+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
@@ -1065,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
-extern void arc_prune_async(uint64_t);
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
extern uint64_t arc_free_memory(void);
extern int64_t arc_available_memory(void);
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index b90855687411..87ddbd90e170 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -837,7 +837,7 @@ extern kmutex_t spa_namespace_lock;
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
extern void spa_config_load(void);
-extern nvlist_t *spa_all_configs(uint64_t *);
+extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
diff --git a/sys/contrib/openzfs/include/sys/txg_impl.h b/sys/contrib/openzfs/include/sys/txg_impl.h
index 45fde2e1f351..8ab7969b25be 100644
--- a/sys/contrib/openzfs/include/sys/txg_impl.h
+++ b/sys/contrib/openzfs/include/sys/txg_impl.h
@@ -73,8 +73,7 @@ struct tx_cpu {
kcondvar_t tc_cv[TXG_SIZE];
uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */
list_t tc_callbacks[TXG_SIZE]; /* commit cb list */
- char tc_pad[8]; /* pad to fill 3 cache lines */
-};
+} ____cacheline_aligned;
/*
* The tx_state structure maintains the state information about the different
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index ad9dc3aefd8e..3f2312c23438 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -131,7 +131,10 @@ typedef const struct vdev_ops {
* Virtual device properties
*/
typedef union vdev_queue_class {
- list_t vqc_list;
+ struct {
+ ulong_t vqc_list_numnodes;
+ list_t vqc_list;
+ };
avl_tree_t vqc_tree;
} vdev_queue_class_t;
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
index c1037fa12e30..73c26dff1e0e 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
@@ -130,7 +130,7 @@ typedef struct raidz_row {
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
#endif
- raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
+ raidz_col_t rr_col[]; /* Flexible array of I/O columns */
} raidz_row_t;
typedef struct raidz_map {
@@ -139,7 +139,7 @@ typedef struct raidz_map {
int rm_nskip; /* RAIDZ sectors skipped for padding */
int rm_skipstart; /* Column index of padding start */
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
- raidz_row_t *rm_row[0]; /* flexible array of rows */
+ raidz_row_t *rm_row[]; /* flexible array of rows */
} raidz_map_t;
diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h
index 6a337b49edf3..750ca612b962 100644
--- a/sys/contrib/openzfs/include/sys/zfs_context.h
+++ b/sys/contrib/openzfs/include/sys/zfs_context.h
@@ -274,11 +274,13 @@ typedef struct kmutex {
extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
extern void mutex_destroy(kmutex_t *mp);
extern void mutex_enter(kmutex_t *mp);
+extern int mutex_enter_check_return(kmutex_t *mp);
extern void mutex_exit(kmutex_t *mp);
extern int mutex_tryenter(kmutex_t *mp);
#define NESTED_SINGLE 1
#define mutex_enter_nested(mp, class) mutex_enter(mp)
+#define mutex_enter_interruptible(mp) mutex_enter_check_return(mp)
/*
* RW locks
*/
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index 8658d39e28fc..2d612a16b227 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -515,6 +515,8 @@
<elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='zpool_prepare_and_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='zpool_prepare_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -562,6 +564,8 @@
<elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='zpool_vdev_script_alloc_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='zpool_vdev_script_free_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
index b94abea3d581..fdd1975fa677 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
@@ -2071,3 +2071,196 @@ printf_color(const char *color, const char *format, ...)
return (rc);
}
+
+/* PATH + 5 env vars + a NULL entry = 7 */
+#define ZPOOL_VDEV_SCRIPT_ENV_COUNT 7
+
+/*
+ * There's a few places where ZFS will call external scripts (like the script
+ * in zpool.d/ and `zfs_prepare_disk`). These scripts are called with a
+ * reduced $PATH, and some vdev specific environment vars set. This function
+ * will allocate an populate the environment variable array that is passed to
+ * these scripts. The user must free the arrays with zpool_vdev_free_env() when
+ * they are done.
+ *
+ * The following env vars will be set (but value could be blank):
+ *
+ * POOL_NAME
+ * VDEV_PATH
+ * VDEV_UPATH
+ * VDEV_ENC_SYSFS_PATH
+ *
+ * In addition, you can set an optional environment variable named 'opt_key'
+ * to 'opt_val' if you want.
+ *
+ * Returns allocated env[] array on success, NULL otherwise.
+ */
+char **
+zpool_vdev_script_alloc_env(const char *pool_name,
+ const char *vdev_path, const char *vdev_upath,
+ const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val)
+{
+ char **env = NULL;
+ int rc;
+
+ env = calloc(ZPOOL_VDEV_SCRIPT_ENV_COUNT, sizeof (*env));
+ if (!env)
+ return (NULL);
+
+ env[0] = strdup("PATH=/bin:/sbin:/usr/bin:/usr/sbin");
+ if (!env[0])
+ goto error;
+
+ /* Setup our custom environment variables */
+ rc = asprintf(&env[1], "POOL_NAME=%s", pool_name ? pool_name : "");
+ if (rc == -1) {
+ env[1] = NULL;
+ goto error;
+ }
+
+ rc = asprintf(&env[2], "VDEV_PATH=%s", vdev_path ? vdev_path : "");
+ if (rc == -1) {
+ env[2] = NULL;
+ goto error;
+ }
+
+ rc = asprintf(&env[3], "VDEV_UPATH=%s", vdev_upath ? vdev_upath : "");
+ if (rc == -1) {
+ env[3] = NULL;
+ goto error;
+ }
+
+ rc = asprintf(&env[4], "VDEV_ENC_SYSFS_PATH=%s",
+ vdev_enc_sysfs_path ? vdev_enc_sysfs_path : "");
+ if (rc == -1) {
+ env[4] = NULL;
+ goto error;
+ }
+
+ if (opt_key != NULL) {
+ rc = asprintf(&env[5], "%s=%s", opt_key,
+ opt_val ? opt_val : "");
+ if (rc == -1) {
+ env[5] = NULL;
+ goto error;
+ }
+ }
+
+ return (env);
+
+error:
+ for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++)
+ free(env[i]);
+
+ free(env);
+
+ return (NULL);
+}
+
+/*
+ * Free the env[] array that was allocated by zpool_vdev_script_alloc_env().
+ */
+void
+zpool_vdev_script_free_env(char **env)
+{
+ for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++)
+ free(env[i]);
+
+ free(env);
+}
+
+/*
+ * Prepare a disk by (optionally) running a program before labeling the disk.
+ * This can be useful for installing disk firmware or doing some pre-flight
+ * checks on the disk before it becomes part of the pool. The program run is
+ * located at ZFSEXECDIR/zfs_prepare_disk
+ * (E.x: /usr/local/libexec/zfs/zfs_prepare_disk).
+ *
+ * Return 0 on success, non-zero on failure.
+ */
+int
+zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv,
+ const char *prepare_str, char **lines[], int *lines_cnt)
+{
+ const char *script_path = ZFSEXECDIR "/zfs_prepare_disk";
+ const char *pool_name;
+ int rc = 0;
+
+ /* Path to script and a NULL entry */
+ char *argv[2] = {(char *)script_path};
+ char **env = NULL;
+ const char *path = NULL, *enc_sysfs_path = NULL;
+ char *upath;
+ *lines_cnt = 0;
+
+ if (access(script_path, X_OK) != 0) {
+ /* No script, nothing to do */
+ return (0);
+ }
+
+ (void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH, &path);
+ (void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+ &enc_sysfs_path);
+
+ upath = zfs_get_underlying_path(path);
+ pool_name = zhp ? zpool_get_name(zhp) : NULL;
+
+ env = zpool_vdev_script_alloc_env(pool_name, path, upath,
+ enc_sysfs_path, "VDEV_PREPARE", prepare_str);
+
+ free(upath);
+
+ if (env == NULL) {
+ return (ENOMEM);
+ }
+
+ rc = libzfs_run_process_get_stdout(script_path, argv, env, lines,
+ lines_cnt);
+
+ zpool_vdev_script_free_env(env);
+
+ return (rc);
+}
+
+/*
+ * Optionally run a script and then label a disk. The script can be used to
+ * prepare a disk for inclusion into the pool. For example, it might update
+ * the disk's firmware or check its health.
+ *
+ * The 'name' provided is the short name, stripped of any leading
+ * /dev path, and is passed to zpool_label_disk. vdev_nv is the nvlist for
+ * the vdev. prepare_str is a string that gets passed as the VDEV_PREPARE
+ * env variable to the script.
+ *
+ * The following env vars are passed to the script:
+ *
+ * POOL_NAME: The pool name (blank during zpool create)
+ * VDEV_PREPARE: Reason why the disk is being prepared for inclusion:
+ * "create", "add", "replace", or "autoreplace"
+ * VDEV_PATH: Path to the disk
+ * VDEV_UPATH: One of the 'underlying paths' to the disk. This is
+ * useful for DM devices.
+ * VDEV_ENC_SYSFS_PATH: Path to the disk's enclosure sysfs path, if available.
+ *
+ * Note, some of these values can be blank.
+ *
+ * Return 0 on success, non-zero otherwise.
+ */
+int
+zpool_prepare_and_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp,
+ const char *name, nvlist_t *vdev_nv, const char *prepare_str,
+ char **lines[], int *lines_cnt)
+{
+ int rc;
+ char vdev_path[MAXPATHLEN];
+ (void) snprintf(vdev_path, sizeof (vdev_path), "%s/%s", DISK_ROOT,
+ name);
+
+ /* zhp will be NULL when creating a pool */
+ rc = zpool_prepare_disk(zhp, vdev_nv, prepare_str, lines, lines_cnt);
+ if (rc != 0)
+ return (rc);
+
+ rc = zpool_label_disk(hdl, zhp, name);
+ return (rc);
+}
diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c
index a9b9bf4c2ce5..ffad7fc02bc9 100644
--- a/sys/contrib/openzfs/lib/libzpool/kernel.c
+++ b/sys/contrib/openzfs/lib/libzpool/kernel.c
@@ -206,6 +206,15 @@ mutex_enter(kmutex_t *mp)
}
int
+mutex_enter_check_return(kmutex_t *mp)
+{
+ int error = pthread_mutex_lock(&mp->m_lock);
+ if (error == 0)
+ mp->m_owner = pthread_self();
+ return (error);
+}
+
+int
mutex_tryenter(kmutex_t *mp)
{
int error = pthread_mutex_trylock(&mp->m_lock);
diff --git a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c
index 8b64369dc29f..44ed697dd490 100644
--- a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c
+++ b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c
@@ -582,9 +582,8 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
* Wait up to timeout_ms for udev to set up the device node. The device is
* considered ready when libudev determines it has been initialized, all of
* the device links have been verified to exist, and it has been allowed to
- * settle. At this point the device the device can be accessed reliably.
- * Depending on the complexity of the udev rules this process could take
- * several seconds.
+ * settle. At this point the device can be accessed reliably. Depending on
+ * the complexity of the udev rules this process could take several seconds.
*/
int
zpool_label_disk_wait(const char *path, int timeout_ms)
diff --git a/sys/contrib/openzfs/man/Makefile.am b/sys/contrib/openzfs/man/Makefile.am
index 36c1aede106e..45156571eec3 100644
--- a/sys/contrib/openzfs/man/Makefile.am
+++ b/sys/contrib/openzfs/man/Makefile.am
@@ -62,6 +62,7 @@ dist_man_MANS = \
%D%/man8/zfs-userspace.8 \
%D%/man8/zfs-wait.8 \
%D%/man8/zfs_ids_to_path.8 \
+ %D%/man8/zfs_prepare_disk.8 \
%D%/man8/zgenhostid.8 \
%D%/man8/zinject.8 \
%D%/man8/zpool.8 \
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 71a3e67ee67e..4ec52a2fb653 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -1137,6 +1137,11 @@ Selecting any option other than
results in vector instructions
from the respective CPU instruction set being used.
.
+.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable the experimental block cloning feature.
+If this setting is 0, then even if feature@block_cloning is enabled,
+attempts to clone blocks will act as though the feature is disabled.
+.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.
.Pp
@@ -2172,7 +2177,7 @@ if a volatile out-of-order write cache is enabled.
Disable intent logging replay.
Can be disabled for recovery from corrupted ZIL.
.
-.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768 KiB Pc Pq u64
+.It Sy zil_slog_bulk Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
Limit SLOG write size per commit executed with synchronous priority.
Any writes above that will be executed with lower (asynchronous) priority
to limit potential SLOG device abuse by single active ZIL writer.
@@ -2317,6 +2322,63 @@ If
.Sy zvol_threads
to the number of CPUs present or 32 (whichever is greater).
.
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
+The number of threads per zvol to use for queuing IO requests.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_blk_mq_threads
+to the number of CPUs present.
+.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of
+.Sy volblocksize Ns -sized blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to
+.Sy 0 ,
+then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+Values are clamped to the kernel's
+.Dv BLKDEV_MIN_RQ
+and
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
+limits.
+.
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
Defines zvol block devices behaviour when
.Sy volmode Ns = Ns Sy default :
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
index b901ce6c2935..8ca4bd927b24 100644
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -219,8 +219,11 @@ to the end of the line is ignored.
.Bd -literal -compact -offset 4n
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
# Features which are supported by GRUB2
+allocation_classes
async_destroy
+block_cloning
bookmarks
+device_rebuild
embedded_data
empty_bpobj
enabled_txg
@@ -229,8 +232,14 @@ filesystem_limits
hole_birth
large_blocks
livelist
+log_spacemap
lz4_compress
+project_quota
+resilver_defer
spacemap_histogram
+spacemap_v2
+userobj_accounting
+zilsaxattr
zpool_checkpoint
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
diff --git a/sys/contrib/openzfs/man/man8/.gitignore b/sys/contrib/openzfs/man/man8/.gitignore
index f2fc702147e9..a468f9cbf9d3 100644
--- a/sys/contrib/openzfs/man/man8/.gitignore
+++ b/sys/contrib/openzfs/man/man8/.gitignore
@@ -1,2 +1,3 @@
/zed.8
/zfs-mount-generator.8
+/zfs_prepare_disk.8
diff --git a/sys/contrib/openzfs/man/man8/zfs_prepare_disk.8.in b/sys/contrib/openzfs/man/man8/zfs_prepare_disk.8.in
new file mode 100644
index 000000000000..2a741531e415
--- /dev/null
+++ b/sys/contrib/openzfs/man/man8/zfs_prepare_disk.8.in
@@ -0,0 +1,70 @@
+.\"
+.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
+.\" Copyright (C) 2023 Lawrence Livermore National Security, LLC.
+.\" Refer to the OpenZFS git commit log for authoritative copyright attribution.
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License Version 1.0 (CDDL-1.0).
+.\" You can obtain a copy of the license from the top-level file
+.\" "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
+.\" You may not use this file except in compliance with the license.
+.\"
+.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049)
+.\"
+.Dd August 30, 2023
+.Dt ZFS_PREPARE_DISK 8
+.Os
+.
+.Sh NAME
+.Nm zfs_prepare_disk
+.Nd special script that gets run before bringing a disk into a pool
+.Sh DESCRIPTION
+.Nm
+is an optional script that gets called by libzfs before bringing a disk into a
+pool.
+It can be modified by the user to run whatever commands are necessary to prepare
+a disk for inclusion into the pool.
+For example, users can add lines to
+.Nm zfs_prepare_disk
+to do things like update the drive's firmware or check the drive's health.
+.Nm zfs_prepare_disk
+is optional and can be removed if not needed.
+libzfs will look for the script at @zfsexecdir@/zfs_prepare_disk.
+.
+.Ss Properties
+.Nm zfs_prepare_disk
+will be passed the following environment variables:
+.sp
+.Bl -tag -compact -width "VDEV_ENC_SYSFS_PATH"
+.
+.It Nm POOL_NAME
+.No Name of the pool
+.It Nm VDEV_PATH
+.No Path to the disk (like /dev/sda)
+.It Nm VDEV_PREPARE
+.No Reason why the disk is being prepared for inclusion
+('create', 'add', 'replace', or 'autoreplace').
+This can be useful if you only want the script to be run under certain actions.
+.It Nm VDEV_UPATH
+.No Path to one of the underlying devices for the
+disk.
+For multipath this would return one of the /dev/sd* paths to the disk.
+If the device is not a device mapper device, then
+.Nm VDEV_UPATH
+just returns the same value as
+.Nm VDEV_PATH
+.It Nm VDEV_ENC_SYSFS_PATH
+.No Path to the disk's enclosure sysfs path, if available
+.El
+.Pp
+Note that some of these variables may have a blank value.
+.Nm POOL_NAME
+is blank at pool creation time, for example.
+.Sh ENVIRONMENT
+.Nm zfs_prepare_disk
+runs with a limited $PATH.
+.Sh EXIT STATUS
+.Nm zfs_prepare_disk
+should return 0 on success, non-zero otherwise.
+If non-zero is returned, the disk will not be included in the pool.
+.
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index c132171592a8..b9c284a24418 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -488,6 +488,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
+UBSAN_SANITIZE_zap_leaf.o := n
+UBSAN_SANITIZE_zap_micro.o := n
+UBSAN_SANITIZE_sa.o := n
+
# Suppress incorrect warnings from versions of objtool which are not
# aware of x86 EVEX prefix instructions used for AVX512.
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
index b31810d57f59..3fba5ed3c228 100644
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_taskq.c
@@ -30,8 +30,6 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/ck.h>
-#include <sys/epoch.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/lock.h>
@@ -66,11 +64,9 @@ taskq_t *dynamic_taskq = NULL;
proc_t *system_proc;
-extern int uma_align_cache;
-
static MALLOC_DEFINE(M_TASKQ, "taskq", "taskq structures");
-static CK_LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl;
+static LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl;
static unsigned long tqenthash;
static unsigned long tqenthashlock;
static struct sx *tqenthashtbl_lock;
@@ -80,8 +76,8 @@ static taskqid_t tqidnext;
#define TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash])
#define TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) & tqenthashlock)])
+#define NORMAL_TASK 0
#define TIMEOUT_TASK 1
-#define NORMAL_TASK 2
static void
system_taskq_init(void *arg)
@@ -121,7 +117,7 @@ system_taskq_fini(void *arg)
for (i = 0; i < tqenthashlock + 1; i++)
sx_destroy(&tqenthashtbl_lock[i]);
for (i = 0; i < tqenthash + 1; i++)
- VERIFY(CK_LIST_EMPTY(&tqenthashtbl[i]));
+ VERIFY(LIST_EMPTY(&tqenthashtbl[i]));
free(tqenthashtbl_lock, M_TASKQ);
free(tqenthashtbl, M_TASKQ);
}
@@ -162,27 +158,27 @@ taskq_lookup(taskqid_t tqid)
{
taskq_ent_t *ent = NULL;
- sx_xlock(TQIDHASHLOCK(tqid));
- CK_LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) {
+ if (tqid == 0)
+ return (NULL);
+ sx_slock(TQIDHASHLOCK(tqid));
+ LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) {
if (ent->tqent_id == tqid)
break;
}
if (ent != NULL)
refcount_acquire(&ent->tqent_rc);
- sx_xunlock(TQIDHASHLOCK(tqid));
+ sx_sunlock(TQIDHASHLOCK(tqid));
return (ent);
}
static taskqid_t
taskq_insert(taskq_ent_t *ent)
{
- taskqid_t tqid;
+ taskqid_t tqid = __taskq_genid();
- tqid = __taskq_genid();
ent->tqent_id = tqid;
- ent->tqent_registered = B_TRUE;
sx_xlock(TQIDHASHLOCK(tqid));
- CK_LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash);
+ LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash);
sx_xunlock(TQIDHASHLOCK(tqid));
return (tqid);
}
@@ -192,13 +188,14 @@ taskq_remove(taskq_ent_t *ent)
{
taskqid_t tqid = ent->tqent_id;
- if (!ent->tqent_registered)
+ if (tqid == 0)
return;
-
sx_xlock(TQIDHASHLOCK(tqid));
- CK_LIST_REMOVE(ent, tqent_hash);
+ if (ent->tqent_id != 0) {
+ LIST_REMOVE(ent, tqent_hash);
+ ent->tqent_id = 0;
+ }
sx_xunlock(TQIDHASHLOCK(tqid));
- ent->tqent_registered = B_FALSE;
}
static void
@@ -285,21 +282,22 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid)
int rc;
taskq_ent_t *ent;
- if (tid == 0)
- return (0);
-
if ((ent = taskq_lookup(tid)) == NULL)
return (0);
- ent->tqent_cancelled = B_TRUE;
- if (ent->tqent_type == TIMEOUT_TASK) {
+ if (ent->tqent_type == NORMAL_TASK) {
+ rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
+ if (rc == EBUSY)
+ taskqueue_drain(tq->tq_queue, &ent->tqent_task);
+ } else {
rc = taskqueue_cancel_timeout(tq->tq_queue,
&ent->tqent_timeout_task, &pend);
- } else
- rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
- if (rc == EBUSY) {
- taskqueue_drain(tq->tq_queue, &ent->tqent_task);
- } else if (pend) {
+ if (rc == EBUSY) {
+ taskqueue_drain_timeout(tq->tq_queue,
+ &ent->tqent_timeout_task);
+ }
+ }
+ if (pend) {
/*
* Tasks normally free themselves when run, but here the task
* was cancelled so it did not free itself.
@@ -312,12 +310,13 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid)
}
static void
-taskq_run(void *arg, int pending __unused)
+taskq_run(void *arg, int pending)
{
taskq_ent_t *task = arg;
- if (!task->tqent_cancelled)
- task->tqent_func(task->tqent_arg);
+ if (pending == 0)
+ return;
+ task->tqent_func(task->tqent_arg);
taskq_free(task);
}
@@ -345,7 +344,6 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
task->tqent_func = func;
task->tqent_arg = arg;
task->tqent_type = TIMEOUT_TASK;
- task->tqent_cancelled = B_FALSE;
refcount_init(&task->tqent_rc, 1);
tqid = taskq_insert(task);
TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
@@ -379,7 +377,6 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
refcount_init(&task->tqent_rc, 1);
task->tqent_func = func;
task->tqent_arg = arg;
- task->tqent_cancelled = B_FALSE;
task->tqent_type = NORMAL_TASK;
tqid = taskq_insert(task);
TASK_INIT(&task->tqent_task, prio, taskq_run, task);
@@ -388,10 +385,12 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
}
static void
-taskq_run_ent(void *arg, int pending __unused)
+taskq_run_ent(void *arg, int pending)
{
taskq_ent_t *task = arg;
+ if (pending == 0)
+ return;
task->tqent_func(task->tqent_arg);
}
@@ -406,8 +405,6 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
* can go at the front of the queue.
*/
prio = !!(flags & TQ_FRONT);
- task->tqent_cancelled = B_FALSE;
- task->tqent_registered = B_FALSE;
task->tqent_id = 0;
task->tqent_func = func;
task->tqent_arg = arg;
@@ -427,12 +424,13 @@ taskq_wait_id(taskq_t *tq, taskqid_t tid)
{
taskq_ent_t *ent;
- if (tid == 0)
- return;
if ((ent = taskq_lookup(tid)) == NULL)
return;
- taskqueue_drain(tq->tq_queue, &ent->tqent_task);
+ if (ent->tqent_type == NORMAL_TASK)
+ taskqueue_drain(tq->tq_queue, &ent->tqent_task);
+ else
+ taskqueue_drain_timeout(tq->tq_queue, &ent->tqent_timeout_task);
taskq_free(ent);
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index 12f16edb1e2b..92696c0bf1ae 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -52,11 +52,6 @@
#include <sys/vm.h>
#include <sys/vmmeter.h>
-#if __FreeBSD_version >= 1300139
-static struct sx arc_vnlru_lock;
-static struct vnode *arc_vnlru_marker;
-#endif
-
extern struct vfsops zfs_vfsops;
uint_t zfs_arc_free_target = 0;
@@ -131,53 +126,6 @@ arc_default_max(uint64_t min, uint64_t allmem)
return (MAX(allmem * 5 / 8, size));
}
-/*
- * Helper function for arc_prune_async() it is responsible for safely
- * handling the execution of a registered arc_prune_func_t.
- */
-static void
-arc_prune_task(void *arg)
-{
- uint64_t nr_scan = (uintptr_t)arg;
-
-#ifndef __ILP32__
- if (nr_scan > INT_MAX)
- nr_scan = INT_MAX;
-#endif
-
-#if __FreeBSD_version >= 1300139
- sx_xlock(&arc_vnlru_lock);
- vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
- sx_xunlock(&arc_vnlru_lock);
-#else
- vnlru_free(nr_scan, &zfs_vfsops);
-#endif
-}
-
-/*
- * Notify registered consumers they must drop holds on a portion of the ARC
- * buffered they reference. This provides a mechanism to ensure the ARC can
- * honor the metadata limit and reclaim otherwise pinned ARC buffers. This
- * is analogous to dnlc_reduce_cache() but more generic.
- *
- * This operation is performed asynchronously so it may be safely called
- * in the context of the arc_reclaim_thread(). A reference is taken here
- * for each registered arc_prune_t and the arc_prune_task() is responsible
- * for releasing it once the registered arc_prune_func_t has completed.
- */
-void
-arc_prune_async(uint64_t adjust)
-{
-
-#ifndef __LP64__
- if (adjust > UINTPTR_MAX)
- adjust = UINTPTR_MAX;
-#endif
- taskq_dispatch(arc_prune_taskq, arc_prune_task,
- (void *)(intptr_t)adjust, TQ_SLEEP);
- ARCSTAT_BUMP(arcstat_prune);
-}
-
uint64_t
arc_all_memory(void)
{
@@ -228,10 +176,6 @@ arc_lowmem_init(void)
{
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
EVENTHANDLER_PRI_FIRST);
-#if __FreeBSD_version >= 1300139
- arc_vnlru_marker = vnlru_alloc_marker();
- sx_init(&arc_vnlru_lock, "arc vnlru lock");
-#endif
}
void
@@ -239,12 +183,6 @@ arc_lowmem_fini(void)
{
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
-#if __FreeBSD_version >= 1300139
- if (arc_vnlru_marker != NULL) {
- vnlru_free_marker(arc_vnlru_marker);
- sx_destroy(&arc_vnlru_lock);
- }
-#endif
}
void
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
index 8969fd6a54bd..23b8da184535 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -89,7 +89,7 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
-int zfs_bclone_enabled;
+int zfs_bclone_enabled = 0;
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
&zfs_bclone_enabled, 0, "Enable block cloning");
@@ -2074,6 +2074,26 @@ zfs_vnodes_adjust_back(void)
#endif
}
+#if __FreeBSD_version >= 1300139
+static struct sx zfs_vnlru_lock;
+static struct vnode *zfs_vnlru_marker;
+#endif
+static arc_prune_t *zfs_prune;
+
+static void
+zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
+{
+ if (nr_to_scan > INT_MAX)
+ nr_to_scan = INT_MAX;
+#if __FreeBSD_version >= 1300139
+ sx_xlock(&zfs_vnlru_lock);
+ vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
+ sx_xunlock(&zfs_vnlru_lock);
+#else
+ vnlru_free(nr_to_scan, &zfs_vfsops);
+#endif
+}
+
void
zfs_init(void)
{
@@ -2100,11 +2120,23 @@ zfs_init(void)
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
+
+#if __FreeBSD_version >= 1300139
+ zfs_vnlru_marker = vnlru_alloc_marker();
+ sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
+#endif
+ zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
}
void
zfs_fini(void)
{
+ arc_remove_prune_callback(zfs_prune);
+#if __FreeBSD_version >= 1300139
+ vnlru_free_marker(zfs_vnlru_marker);
+ sx_destroy(&zfs_vnlru_lock);
+#endif
+
taskq_destroy(zfsvfs_taskq);
zfsctl_fini();
zfs_znode_fini();
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
index 29a8802b8367..43ed087e2dbb 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -489,56 +489,5 @@ arc_unregister_hotplug(void)
}
#endif /* _KERNEL */
-/*
- * Helper function for arc_prune_async() it is responsible for safely
- * handling the execution of a registered arc_prune_func_t.
- */
-static void
-arc_prune_task(void *ptr)
-{
- arc_prune_t *ap = (arc_prune_t *)ptr;
- arc_prune_func_t *func = ap->p_pfunc;
-
- if (func != NULL)
- func(ap->p_adjust, ap->p_private);
-
- zfs_refcount_remove(&ap->p_refcnt, func);
-}
-
-/*
- * Notify registered consumers they must drop holds on a portion of the ARC
- * buffered they reference. This provides a mechanism to ensure the ARC can
- * honor the metadata limit and reclaim otherwise pinned ARC buffers. This
- * is analogous to dnlc_reduce_cache() but more generic.
- *
- * This operation is performed asynchronously so it may be safely called
- * in the context of the arc_reclaim_thread(). A reference is taken here
- * for each registered arc_prune_t and the arc_prune_task() is responsible
- * for releasing it once the registered arc_prune_func_t has completed.
- */
-void
-arc_prune_async(uint64_t adjust)
-{
- arc_prune_t *ap;
-
- mutex_enter(&arc_prune_mtx);
- for (ap = list_head(&arc_prune_list); ap != NULL;
- ap = list_next(&arc_prune_list, ap)) {
-
- if (zfs_refcount_count(&ap->p_refcnt) >= 2)
- continue;
-
- zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
- ap->p_adjust = adjust;
- if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
- ap, TQ_SLEEP) == TASKQID_INVALID) {
- zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
- continue;
- }
- ARCSTAT_BUMP(arcstat_prune);
- }
- mutex_exit(&arc_prune_mtx);
-}
-
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
"Limit on number of pages that ARC shrinker can reclaim at once");
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index 02cb379ea840..94e25fa0ae8f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -522,7 +522,7 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
ip->i_blkbits = SPA_MINBLOCKSHIFT;
ip->i_atime = now;
ip->i_mtime = now;
- ip->i_ctime = now;
+ zpl_inode_set_ctime_to_ts(ip, now);
ip->i_fop = fops;
ip->i_op = ops;
#if defined(IOP_XATTR)
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
index 3efd4ab159c6..c2ed67c438c6 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
this_seg_start = orig_loffset;
rq_for_each_segment(bv, rq, iter) {
- if (uio->iter.bio) {
- /*
- * If uio->iter.bio is present, then we know we've saved
- * uio->iter from a previous call to this function, and
- * we can skip ahead in this rq_for_each_segment() loop
- * to where we last left off. That way, we don't need
- * to iterate over tons of segments we've already
- * processed - we can just restore the "saved state".
- */
- iter = uio->iter;
- bv = uio->bv;
- this_seg_start = uio->uio_loffset;
- memset(&uio->iter, 0, sizeof (uio->iter));
- continue;
- }
-
/*
* Lookup what the logical offset of the last byte of this
* segment is.
@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
copied = 1; /* We copied some data */
}
- if (n == 0) {
- /*
- * All done copying. Save our 'iter' value to the uio.
- * This allows us to "save our state" and skip ahead in
- * the rq_for_each_segment() loop the next time we call
- * call zfs_uiomove_bvec_rq() on this uio (which we
- * will be doing for any remaining data in the uio).
- */
- uio->iter = iter; /* make a copy of the struct data */
- uio->bv = bv;
- return (0);
- }
-
this_seg_start = this_seg_end + 1;
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index a1db5c57c18b..2792bc027213 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1488,7 +1488,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
* read-only flag, pretend it was set, as done for snapshots.
*/
if (!canwrite)
- vfs->vfs_readonly = true;
+ vfs->vfs_readonly = B_TRUE;
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index 33baac9db06b..b464f615cdd3 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -1652,7 +1652,12 @@ out:
* RETURN: 0 (always succeeds)
*/
int
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
+ struct kstat *sp)
+#else
zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
+#endif
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
@@ -1665,7 +1670,11 @@ zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
mutex_enter(&zp->z_lock);
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+ zpl_generic_fillattr(user_ns, request_mask, ip, sp);
+#else
zpl_generic_fillattr(user_ns, ip, sp);
+#endif
/*
* +1 link count for root inode with visible '.zfs' directory.
*/
@@ -2442,8 +2451,8 @@ top:
if (mask & (ATTR_CTIME | ATTR_SIZE)) {
ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
- ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
- ZTOI(zp));
+ zpl_inode_set_ctime_to_ts(ZTOI(zp),
+ zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime));
}
@@ -3648,6 +3657,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
caddr_t va;
int err = 0;
uint64_t mtime[2], ctime[2];
+ inode_timespec_t tmp_ctime;
sa_bulk_attr_t bulk[3];
int cnt = 0;
struct address_space *mapping;
@@ -3812,7 +3822,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
/* Preserve the mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
- ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+ tmp_ctime = zpl_inode_get_ctime(ip);
+ ZFS_TIME_ENCODE(&tmp_ctime, ctime);
zp->z_atime_dirty = B_FALSE;
zp->z_seq++;
@@ -3862,6 +3873,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
zfsvfs_t *zfsvfs = ITOZSB(ip);
dmu_tx_t *tx;
uint64_t mode, atime[2], mtime[2], ctime[2];
+ inode_timespec_t tmp_ctime;
sa_bulk_attr_t bulk[4];
int error = 0;
int cnt = 0;
@@ -3908,7 +3920,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
/* Preserve the mode, mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_atime, atime);
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
- ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+ tmp_ctime = zpl_inode_get_ctime(ip);
+ ZFS_TIME_ENCODE(&tmp_ctime, ctime);
mode = ip->i_mode;
zp->z_mode = mode;
@@ -4058,8 +4071,8 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
return (error);
- if ((vm_flags & VM_WRITE) && (zp->z_pflags &
- (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
+ if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
+ (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM));
}
@@ -4229,4 +4242,8 @@ EXPORT_SYMBOL(zfs_map);
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
+/* CSTYLED */
+module_param(zfs_bclone_enabled, uint, 0644);
+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
+
#endif
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
index 52c8e51df659..f71026da83cb 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -542,6 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
uint64_t links;
uint64_t z_uid, z_gid;
uint64_t atime[2], mtime[2], ctime[2], btime[2];
+ inode_timespec_t tmp_ctime;
uint64_t projid = ZFS_DEFAULT_PROJID;
sa_bulk_attr_t bulk[12];
int count = 0;
@@ -615,7 +616,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ZFS_TIME_DECODE(&ip->i_atime, atime);
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
- ZFS_TIME_DECODE(&ip->i_ctime, ctime);
+ ZFS_TIME_DECODE(&tmp_ctime, ctime);
+ zpl_inode_set_ctime_to_ts(ip, tmp_ctime);
ZFS_TIME_DECODE(&zp->z_btime, btime);
ip->i_ino = zp->z_id;
@@ -1195,6 +1197,7 @@ zfs_rezget(znode_t *zp)
uint64_t gen;
uint64_t z_uid, z_gid;
uint64_t atime[2], mtime[2], ctime[2], btime[2];
+ inode_timespec_t tmp_ctime;
uint64_t projid = ZFS_DEFAULT_PROJID;
znode_hold_t *zh;
@@ -1289,7 +1292,8 @@ zfs_rezget(znode_t *zp)
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
- ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
+ ZFS_TIME_DECODE(&tmp_ctime, ctime);
+ zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
ZFS_TIME_DECODE(&zp->z_btime, btime);
if ((uint32_t)gen != ZTOI(zp)->i_generation) {
@@ -1397,7 +1401,7 @@ zfs_zinactive(znode_t *zp)
boolean_t
zfs_relatime_need_update(const struct inode *ip)
{
- inode_timespec_t now;
+ inode_timespec_t now, tmp_ctime;
gethrestime(&now);
/*
@@ -1408,7 +1412,8 @@ zfs_relatime_need_update(const struct inode *ip)
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
return (B_TRUE);
- if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
+ tmp_ctime = zpl_inode_get_ctime(ip);
+ if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0)
return (B_TRUE);
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
@@ -1434,7 +1439,7 @@ void
zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
uint64_t ctime[2])
{
- inode_timespec_t now;
+ inode_timespec_t now, tmp_ctime;
gethrestime(&now);
@@ -1451,7 +1456,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
if (flag & ATTR_CTIME) {
ZFS_TIME_ENCODE(&now, ctime);
- ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
+ ZFS_TIME_DECODE(&tmp_ctime, ctime);
+ zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
if (ZTOZSB(zp)->z_use_fuids)
zp->z_pflags |= ZFS_ARCHIVE;
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
index 7786444fea35..8ee7fcecc7b7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@@ -124,6 +124,8 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
generic_fillattr(user_ns, ip, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+ generic_fillattr(user_ns, request_mask, ip, stat);
#else
(void) user_ns;
#endif
@@ -435,6 +437,8 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
generic_fillattr(user_ns, ip, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+ generic_fillattr(user_ns, request_mask, ip, stat);
#else
(void) user_ns;
#endif
@@ -609,6 +613,8 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
generic_fillattr(user_ns, path->dentry->d_inode, stat);
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
+ generic_fillattr(user_ns, request_mask, ip, stat);
#else
(void) user_ns;
#endif
@@ -623,7 +629,10 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
if (error == 0) {
-#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+ error = -zfs_getattr_fast(user_ns, request_mask, ZTOI(dzp),
+ stat);
+#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
#else
error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
index c47fe99dacff..139c51cf46df 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
@@ -31,6 +31,8 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
+int zfs_bclone_enabled = 0;
+
/*
* Clone part of a file via block cloning.
*
@@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
fstrans_cookie_t cookie;
int err;
+ if (!zfs_bclone_enabled)
+ return (-EOPNOTSUPP);
+
if (!spa_feature_is_enabled(
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
return (-EOPNOTSUPP);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
index 5f5ad186a61c..96f65b9e94e2 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@@ -435,7 +435,9 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
* XXX query_flags currently ignored.
*/
-#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+ error = -zfs_getattr_fast(user_ns, request_mask, ip, stat);
+#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
error = -zfs_getattr_fast(user_ns, ip, stat);
#else
error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
@@ -774,7 +776,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
return (-EMLINK);
crhold(cr);
- ip->i_ctime = current_time(ip);
+ zpl_inode_set_ctime_to_ts(ip, current_time(ip));
/* Must have an existing ref, so igrab() cannot return NULL */
VERIFY3P(igrab(ip), !=, NULL);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index ad52a11aada0..d98d32c1f9fb 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -375,7 +375,7 @@ zpl_kill_sb(struct super_block *sb)
}
void
-zpl_prune_sb(int64_t nr_to_scan, void *arg)
+zpl_prune_sb(uint64_t nr_to_scan, void *arg)
{
struct super_block *sb = (struct super_block *)arg;
int objects = 0;
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
index 96d85991811e..4e4f5210f85d 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@@ -513,7 +513,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
error = -zfs_write_simple(xzp, value, size, pos, NULL);
out:
if (error == 0) {
- ip->i_ctime = current_time(ip);
+ zpl_inode_set_ctime_to_ts(ip, current_time(ip));
zfs_mark_inode_dirty(ip);
}
@@ -1011,7 +1011,8 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
*/
if (ip->i_mode != mode) {
ip->i_mode = ITOZ(ip)->z_mode = mode;
- ip->i_ctime = current_time(ip);
+ zpl_inode_set_ctime_to_ts(ip,
+ current_time(ip));
zfs_mark_inode_dirty(ip);
}
@@ -1170,7 +1171,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir)
return (PTR_ERR(acl));
if (!acl) {
ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
- ip->i_ctime = current_time(ip);
+ zpl_inode_set_ctime_to_ts(ip, current_time(ip));
zfs_mark_inode_dirty(ip);
return (0);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index 76521c95911e..f94ce69fb9e2 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -873,7 +873,13 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
switch (cmd) {
case BLKFLSBUF:
+#ifdef HAVE_FSYNC_BDEV
fsync_bdev(bdev);
+#elif defined(HAVE_SYNC_BLOCKDEV)
+ sync_blockdev(bdev);
+#else
+#error "Neither fsync_bdev() nor sync_blockdev() found"
+#endif
invalidate_bdev(bdev);
rw_enter(&zv->zv_suspend_lock, RW_READER);
@@ -1620,6 +1626,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
module_param(zvol_volmode, uint, 0644);
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+ "Process volblocksize blocks per thread");
+#endif
+
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
module_param(zvol_open_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index b5946e7604c0..dfea15b74394 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -886,6 +886,8 @@ static void l2arc_do_free_on_write(void);
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
boolean_t state_only);
+static void arc_prune_async(uint64_t adjust);
+
#define l2arc_hdr_arcstats_increment(hdr) \
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
#define l2arc_hdr_arcstats_decrement(hdr) \
@@ -1364,7 +1366,7 @@ arc_buf_is_shared(arc_buf_t *buf)
abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) &&
buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd));
IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr));
- IMPLY(shared, ARC_BUF_SHARED(buf));
+ EQUIV(shared, ARC_BUF_SHARED(buf));
IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf));
/*
@@ -1998,7 +2000,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
IMPLY(encrypted, HDR_ENCRYPTED(hdr));
IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf));
IMPLY(encrypted, ARC_BUF_COMPRESSED(buf));
- IMPLY(encrypted, !ARC_BUF_SHARED(buf));
+ IMPLY(encrypted, !arc_buf_is_shared(buf));
/*
* If the caller wanted encrypted data we just need to copy it from
@@ -2066,7 +2068,9 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
}
if (hdr_compressed == compressed) {
- if (!arc_buf_is_shared(buf)) {
+ if (ARC_BUF_SHARED(buf)) {
+ ASSERT(arc_buf_is_shared(buf));
+ } else {
abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd,
arc_buf_size(buf));
}
@@ -2078,7 +2082,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
* If the buf is sharing its data with the hdr, unlink it and
* allocate a new data buffer for the buf.
*/
- if (arc_buf_is_shared(buf)) {
+ if (ARC_BUF_SHARED(buf)) {
ASSERT(ARC_BUF_COMPRESSED(buf));
/* We need to give the buf its own b_data */
@@ -2090,6 +2094,8 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
/* Previously overhead was 0; just add new overhead */
ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr));
} else if (ARC_BUF_COMPRESSED(buf)) {
+ ASSERT(!arc_buf_is_shared(buf));
+
/* We need to reallocate the buf's b_data */
arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr),
buf);
@@ -2217,7 +2223,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) {
- if (arc_buf_is_shared(buf))
+ if (ARC_BUF_SHARED(buf))
continue;
(void) zfs_refcount_add_many(&state->arcs_esize[type],
arc_buf_size(buf), buf);
@@ -2256,7 +2262,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) {
- if (arc_buf_is_shared(buf))
+ if (ARC_BUF_SHARED(buf))
continue;
(void) zfs_refcount_remove_many(&state->arcs_esize[type],
arc_buf_size(buf), buf);
@@ -2481,7 +2487,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
* add to the refcount if the arc_buf_t is
* not shared.
*/
- if (arc_buf_is_shared(buf))
+ if (ARC_BUF_SHARED(buf))
continue;
(void) zfs_refcount_add_many(
@@ -2537,7 +2543,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
* add to the refcount if the arc_buf_t is
* not shared.
*/
- if (arc_buf_is_shared(buf))
+ if (ARC_BUF_SHARED(buf))
continue;
(void) zfs_refcount_remove_many(
@@ -3061,9 +3067,10 @@ arc_buf_destroy_impl(arc_buf_t *buf)
arc_cksum_verify(buf);
arc_buf_unwatch(buf);
- if (arc_buf_is_shared(buf)) {
+ if (ARC_BUF_SHARED(buf)) {
arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
} else {
+ ASSERT(!arc_buf_is_shared(buf));
uint64_t size = arc_buf_size(buf);
arc_free_data_buf(hdr, buf->b_data, size, buf);
ARCSTAT_INCR(arcstat_overhead_size, -size);
@@ -3104,9 +3111,9 @@ arc_buf_destroy_impl(arc_buf_t *buf)
*/
if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) {
/* Only one buf can be shared at once */
- VERIFY(!arc_buf_is_shared(lastbuf));
+ ASSERT(!arc_buf_is_shared(lastbuf));
/* hdr is uncompressed so can't have compressed buf */
- VERIFY(!ARC_BUF_COMPRESSED(lastbuf));
+ ASSERT(!ARC_BUF_COMPRESSED(lastbuf));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
arc_hdr_free_abd(hdr, B_FALSE);
@@ -5863,12 +5870,9 @@ top:
* 3. This buffer isn't currently writing to the L2ARC.
* 4. The L2ARC entry wasn't evicted, which may
* also have invalidated the vdev.
- * 5. This isn't prefetch or l2arc_noprefetch is 0.
*/
if (HDR_HAS_L2HDR(hdr) &&
- !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
- !(l2arc_noprefetch &&
- (*arc_flags & ARC_FLAG_PREFETCH))) {
+ !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr)) {
l2arc_read_callback_t *cb;
abd_t *abd;
uint64_t asize;
@@ -6049,6 +6053,56 @@ arc_remove_prune_callback(arc_prune_t *p)
}
/*
+ * Helper function for arc_prune_async() it is responsible for safely
+ * handling the execution of a registered arc_prune_func_t.
+ */
+static void
+arc_prune_task(void *ptr)
+{
+ arc_prune_t *ap = (arc_prune_t *)ptr;
+ arc_prune_func_t *func = ap->p_pfunc;
+
+ if (func != NULL)
+ func(ap->p_adjust, ap->p_private);
+
+ zfs_refcount_remove(&ap->p_refcnt, func);
+}
+
+/*
+ * Notify registered consumers they must drop holds on a portion of the ARC
+ * buffers they reference. This provides a mechanism to ensure the ARC can
+ * honor the metadata limit and reclaim otherwise pinned ARC buffers.
+ *
+ * This operation is performed asynchronously so it may be safely called
+ * in the context of the arc_reclaim_thread(). A reference is taken here
+ * for each registered arc_prune_t and the arc_prune_task() is responsible
+ * for releasing it once the registered arc_prune_func_t has completed.
+ */
+static void
+arc_prune_async(uint64_t adjust)
+{
+ arc_prune_t *ap;
+
+ mutex_enter(&arc_prune_mtx);
+ for (ap = list_head(&arc_prune_list); ap != NULL;
+ ap = list_next(&arc_prune_list, ap)) {
+
+ if (zfs_refcount_count(&ap->p_refcnt) >= 2)
+ continue;
+
+ zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
+ ap->p_adjust = adjust;
+ if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
+ ap, TQ_SLEEP) == TASKQID_INVALID) {
+ zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
+ continue;
+ }
+ ARCSTAT_BUMP(arcstat_prune);
+ }
+ mutex_exit(&arc_prune_mtx);
+}
+
+/*
* Notify the arc that a block was freed, and thus will never be used again.
*/
void
@@ -6189,7 +6243,7 @@ arc_release(arc_buf_t *buf, const void *tag)
ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
VERIFY3S(remove_reference(hdr, tag), >, 0);
- if (arc_buf_is_shared(buf) && !ARC_BUF_COMPRESSED(buf)) {
+ if (ARC_BUF_SHARED(buf) && !ARC_BUF_COMPRESSED(buf)) {
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
ASSERT(ARC_BUF_LAST(buf));
}
@@ -6206,9 +6260,9 @@ arc_release(arc_buf_t *buf, const void *tag)
* If the current arc_buf_t and the hdr are sharing their data
* buffer, then we must stop sharing that block.
*/
- if (arc_buf_is_shared(buf)) {
+ if (ARC_BUF_SHARED(buf)) {
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
- VERIFY(!arc_buf_is_shared(lastbuf));
+ ASSERT(!arc_buf_is_shared(lastbuf));
/*
* First, sever the block sharing relationship between
@@ -6241,7 +6295,7 @@ arc_release(arc_buf_t *buf, const void *tag)
*/
ASSERT(arc_buf_is_shared(lastbuf) ||
arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
- ASSERT(!ARC_BUF_SHARED(buf));
+ ASSERT(!arc_buf_is_shared(buf));
}
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
@@ -6335,9 +6389,10 @@ arc_write_ready(zio_t *zio)
arc_cksum_free(hdr);
arc_buf_unwatch(buf);
if (hdr->b_l1hdr.b_pabd != NULL) {
- if (arc_buf_is_shared(buf)) {
+ if (ARC_BUF_SHARED(buf)) {
arc_unshare_buf(hdr, buf);
} else {
+ ASSERT(!arc_buf_is_shared(buf));
arc_hdr_free_abd(hdr, B_FALSE);
}
}
@@ -6636,9 +6691,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
* The hdr will remain with a NULL data pointer and the
* buf will take sole ownership of the block.
*/
- if (arc_buf_is_shared(buf)) {
+ if (ARC_BUF_SHARED(buf)) {
arc_unshare_buf(hdr, buf);
} else {
+ ASSERT(!arc_buf_is_shared(buf));
arc_hdr_free_abd(hdr, B_FALSE);
}
VERIFY3P(buf->b_data, !=, NULL);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
index 0eb8c17e331a..8451b5082e86 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -210,10 +210,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
dmu_buf_impl_t *db;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- db = dbuf_hold_level(dn, level, blkid, FTAG);
+ err = dbuf_hold_impl(dn, level, blkid, TRUE, FALSE, FTAG, &db);
rw_exit(&dn->dn_struct_rwlock);
- if (db == NULL)
- return (SET_ERROR(EIO));
+ if (err == ENOENT)
+ return (0);
+ if (err != 0)
+ return (err);
/*
* PARTIAL_FIRST allows caching for uncacheable blocks. It will
* be cleared after dmu_buf_will_dirty() call dbuf_read() again.
diff --git a/sys/contrib/openzfs/module/zfs/dsl_pool.c b/sys/contrib/openzfs/module/zfs/dsl_pool.c
index 9120fef93c74..17b971248283 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_pool.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_pool.c
@@ -965,18 +965,18 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
uint64_t delay_min_bytes =
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
- mutex_enter(&dp->dp_lock);
- uint64_t dirty = dp->dp_dirty_total;
- mutex_exit(&dp->dp_lock);
-
- return (dirty > delay_min_bytes);
+ /*
+ * We are not taking the dp_lock here and few other places, since torn
+ * reads are unlikely: on 64-bit systems due to register size and on
+ * 32-bit due to memory constraints. Pool-wide locks in hot path may
+ * be too expensive, while we do not need a precise result here.
+ */
+ return (dp->dp_dirty_total > delay_min_bytes);
}
static boolean_t
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
{
- ASSERT(MUTEX_HELD(&dp->dp_lock));
-
uint64_t dirty_min_bytes =
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c
index 636c04d9f785..a77874ea0dd3 100644
--- a/sys/contrib/openzfs/module/zfs/spa_config.c
+++ b/sys/contrib/openzfs/module/zfs/spa_config.c
@@ -367,23 +367,24 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
* So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
* information for all pool visible within the zone.
*/
-nvlist_t *
-spa_all_configs(uint64_t *generation)
+int
+spa_all_configs(uint64_t *generation, nvlist_t **pools)
{
- nvlist_t *pools;
spa_t *spa = NULL;
if (*generation == spa_config_generation)
- return (NULL);
+ return (SET_ERROR(EEXIST));
- pools = fnvlist_alloc();
+ int error = mutex_enter_interruptible(&spa_namespace_lock);
+ if (error)
+ return (SET_ERROR(EINTR));
- mutex_enter(&spa_namespace_lock);
+ *pools = fnvlist_alloc();
while ((spa = spa_next(spa)) != NULL) {
if (INGLOBALZONE(curproc) ||
zone_dataset_visible(spa_name(spa), NULL)) {
mutex_enter(&spa->spa_props_lock);
- fnvlist_add_nvlist(pools, spa_name(spa),
+ fnvlist_add_nvlist(*pools, spa_name(spa),
spa->spa_config);
mutex_exit(&spa->spa_props_lock);
}
@@ -391,7 +392,7 @@ spa_all_configs(uint64_t *generation)
*generation = spa_config_generation;
mutex_exit(&spa_namespace_lock);
- return (pools);
+ return (0);
}
void
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index 08d918467d03..092b3f375be0 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -273,8 +273,10 @@ vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
{
zio_priority_t p = zio->io_priority;
vq->vq_cqueued |= 1U << p;
- if (vdev_queue_class_fifo(p))
+ if (vdev_queue_class_fifo(p)) {
list_insert_tail(&vq->vq_class[p].vqc_list, zio);
+ vq->vq_class[p].vqc_list_numnodes++;
+ }
else
avl_add(&vq->vq_class[p].vqc_tree, zio);
}
@@ -288,6 +290,7 @@ vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
list_t *list = &vq->vq_class[p].vqc_list;
list_remove(list, zio);
empty = list_is_empty(list);
+ vq->vq_class[p].vqc_list_numnodes--;
} else {
avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
avl_remove(tree, zio);
@@ -1069,7 +1072,7 @@ vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
{
vdev_queue_t *vq = &vd->vdev_queue;
if (vdev_queue_class_fifo(p))
- return (list_is_empty(&vq->vq_class[p].vqc_list) == 0);
+ return (vq->vq_class[p].vqc_list_numnodes);
else
return (avl_numnodes(&vq->vq_class[p].vqc_tree));
}
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index f91a2f3bbca5..2738385e260b 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -1582,8 +1582,9 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
nvlist_t *configs;
int error;
- if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
- return (SET_ERROR(EEXIST));
+ error = spa_all_configs(&zc->zc_cookie, &configs);
+ if (error)
+ return (error);
error = put_nvlist(zc, configs);
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 18c6cbf028b3..a11886136994 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -145,7 +145,7 @@ static int zil_nocacheflush = 0;
* Any writes above that will be executed with lower (asynchronous) priority
* to limit potential SLOG device abuse by single active ZIL writer.
*/
-static uint64_t zil_slog_bulk = 768 * 1024;
+static uint64_t zil_slog_bulk = 64 * 1024 * 1024;
static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache;
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 3b3b40fa73d8..a719e5492323 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -158,23 +158,22 @@ zio_init(void)
zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
- /*
- * For small buffers, we want a cache for each multiple of
- * SPA_MINBLOCKSIZE. For larger buffers, we want a cache
- * for each quarter-power of 2.
- */
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
- size_t p2 = size;
- size_t align = 0;
- size_t data_cflags, cflags;
-
- data_cflags = KMC_NODEBUG;
- cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
- KMC_NODEBUG : 0;
+ size_t align, cflags, data_cflags;
+ char name[32];
+ /*
+ * Create cache for each half-power of 2 size, starting from
+ * SPA_MINBLOCKSIZE. It should give us memory space efficiency
+ * of ~7/8, sufficient for transient allocations mostly using
+ * these caches.
+ */
+ size_t p2 = size;
while (!ISP2(p2))
p2 &= p2 - 1;
+ if (!IS_P2ALIGNED(size, p2 / 2))
+ continue;
#ifndef _KERNEL
/*
@@ -185,47 +184,37 @@ zio_init(void)
*/
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
continue;
- /*
- * Here's the problem - on 4K native devices in userland on
- * Linux using O_DIRECT, buffers must be 4K aligned or I/O
- * will fail with EINVAL, causing zdb (and others) to coredump.
- * Since userland probably doesn't need optimized buffer caches,
- * we just force 4K alignment on everything.
- */
- align = 8 * SPA_MINBLOCKSIZE;
-#else
- if (size < PAGESIZE) {
- align = SPA_MINBLOCKSIZE;
- } else if (IS_P2ALIGNED(size, p2 >> 2)) {
- align = PAGESIZE;
- }
#endif
- if (align != 0) {
- char name[36];
- if (cflags == data_cflags) {
- /*
- * Resulting kmem caches would be identical.
- * Save memory by creating only one.
- */
- (void) snprintf(name, sizeof (name),
- "zio_buf_comb_%lu", (ulong_t)size);
- zio_buf_cache[c] = kmem_cache_create(name,
- size, align, NULL, NULL, NULL, NULL, NULL,
- cflags);
- zio_data_buf_cache[c] = zio_buf_cache[c];
- continue;
- }
- (void) snprintf(name, sizeof (name), "zio_buf_%lu",
- (ulong_t)size);
- zio_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, NULL, cflags);
-
- (void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
- (ulong_t)size);
- zio_data_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, NULL, data_cflags);
+ if (IS_P2ALIGNED(size, PAGESIZE))
+ align = PAGESIZE;
+ else
+ align = 1 << (highbit64(size ^ (size - 1)) - 1);
+
+ cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
+ KMC_NODEBUG : 0;
+ data_cflags = KMC_NODEBUG;
+ if (cflags == data_cflags) {
+ /*
+ * Resulting kmem caches would be identical.
+ * Save memory by creating only one.
+ */
+ (void) snprintf(name, sizeof (name),
+ "zio_buf_comb_%lu", (ulong_t)size);
+ zio_buf_cache[c] = kmem_cache_create(name, size, align,
+ NULL, NULL, NULL, NULL, NULL, cflags);
+ zio_data_buf_cache[c] = zio_buf_cache[c];
+ continue;
}
+ (void) snprintf(name, sizeof (name), "zio_buf_%lu",
+ (ulong_t)size);
+ zio_buf_cache[c] = kmem_cache_create(name, size, align,
+ NULL, NULL, NULL, NULL, NULL, cflags);
+
+ (void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
+ (ulong_t)size);
+ zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
+ NULL, NULL, NULL, NULL, NULL, data_cflags);
}
while (--c != 0) {
diff --git a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
index 23c3ed6ff408..d56967d7a8b1 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs-dkms.spec.in
@@ -24,6 +24,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildArch: noarch
Requires: dkms >= 2.2.0.3
+Requires(pre): dkms >= 2.2.0.3
Requires(post): dkms >= 2.2.0.3
Requires(preun): dkms >= 2.2.0.3
Requires: gcc, make, perl, diffutils
@@ -68,9 +69,92 @@ fi
%defattr(-,root,root)
/usr/src/%{module}-%{version}
+%pre
+echo "Running pre installation script: $0. Parameters: $*"
+# We don't want any other versions lingering around in dkms.
+# Tests with 'dnf' showed that in case of reinstall, or upgrade
+# the preun scriptlet removed the version we are trying to install.
+# Because of this, find all zfs dkms sources in /var/lib/dkms and
+# remove them, if we find a matching version in dkms.
+
+dkms_root=/var/lib/dkms
+if [ -d ${dkms_root}/%{module} ]; then
+ cd ${dkms_root}/%{module}
+ for x in [[:digit:]]*; do
+ [ -d "$x" ] || continue
+ otherver="$x"
+ opath="${dkms_root}/%{module}/${otherver}"
+ if [ "$otherver" != %{version} ]; then
+ # This is a workaround for a broken 'dkms status', we caused in a previous version.
+ # One day it might be not needed anymore, but it does not hurt to keep it.
+ if dkms status -m %{module} -v "$otherver" 2>&1 | grep "${opath}/source/dkms.conf does not exist"
+ then
+ echo "ERROR: dkms status is broken!" >&2
+ if [ -L "${opath}/source" -a ! -d "${opath}/source" ]
+ then
+ echo "Trying to fix it by removing the symlink: ${opath}/source" >&2
+ echo "You should manually remove ${opath}" >&2
+ rm -f "${opath}/source" || echo "Removal failed!" >&2
+ fi
+ fi
+ if [ `dkms status -m %{module} -v "$otherver" | grep -c %{module}` -gt 0 ]; then
+ echo "Removing old %{module} dkms modules version $otherver from all kernels."
+ dkms remove -m %{module} -v "$otherver" --all ||:
+ fi
+ fi
+ done
+fi
+
+# Uninstall this version of zfs dkms modules before installation of the package.
+if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
+ echo "Removing %{module} dkms modules version %{version} from all kernels."
+ dkms remove -m %{module} -v %{version} --all ||:
+fi
+
+%post
+echo "Running post installation script: $0. Parameters: $*"
+# Add the module to dkms, as reccommended in the dkms man page.
+# This is generally rpm specfic.
+# But this also may help, if we have a broken 'dkms status'.
+# Because, if the sources are available and only the symlink pointing
+# to them is missing, this will resolve the situation
+echo "Adding %{module} dkms modules version %{version} to dkms."
+dkms add -m %{module} -v %{version} %{!?not_rpm:--rpm_safe_upgrade} ||:
+
+# After installing the package, dkms install this zfs version for the current kernel.
+# Force the overwriting of old modules to avoid diff warnings in dkms status.
+# Or in case of a downgrade to overwrite newer versions.
+# Or if some other backed up versions have been restored before.
+echo "Installing %{module} dkms modules version %{version} for the current kernel."
+dkms install --force -m %{module} -v %{version} ||:
+
%preun
-dkms remove -m %{module} -v %{version} --all
+dkms_root="/var/lib/dkms/%{module}/%{version}"
+echo "Running pre uninstall script: $0. Parameters: $*"
+# In case of upgrade we do nothing. See above comment in pre hook.
+if [ "$1" = "1" -o "$1" = "upgrade" ] ; then
+ echo "This is an upgrade. Skipping pre uninstall action."
+ exit 0
+fi
+
+# Check if we uninstall the package. In that case remove the dkms modules.
+# '0' is the value for the first parameter for rpm packages.
+# 'remove' or 'purge' are the possible names for deb packages.
+if [ "$1" = "0" -o "$1" = "remove" -o "$1" = "purge" ] ; then
+ if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
+ echo "Removing %{module} dkms modules version %{version} from all kernels."
+ dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade} && exit 0
+ fi
+ # If removing the modules failed, it might be because of the broken 'dkms status'.
+ if dkms status -m %{module} -v %{version} 2>&1 | grep "${dkms_root}/source/dkms.conf does not exist"
+ then
+ echo "ERROR: dkms status is broken!" >&2
+ echo "You should manually remove ${dkms_root}" >&2
+ echo "WARNING: installed modules in /lib/modules/`uname -r`/extra could not be removed automatically!" >&2
+ fi
+else
+ echo "Script parameter $1 did not match any removal condition."
+fi
-%posttrans
-/usr/lib/dkms/common.postinst %{module} %{version}
+exit 0
diff --git a/sys/contrib/openzfs/scripts/Makefile.am b/sys/contrib/openzfs/scripts/Makefile.am
index 95640727ac6a..b43bf97dbdf4 100644
--- a/sys/contrib/openzfs/scripts/Makefile.am
+++ b/sys/contrib/openzfs/scripts/Makefile.am
@@ -20,6 +20,8 @@ scripts_scripts = \
if CONFIG_USER
dist_scripts_SCRIPTS = $(scripts_scripts)
+dist_zfsexec_SCRIPTS = \
+ %D%/zfs_prepare_disk
else
dist_noinst_SCRIPTS += $(scripts_scripts)
endif
diff --git a/sys/contrib/openzfs/scripts/zfs_prepare_disk b/sys/contrib/openzfs/scripts/zfs_prepare_disk
new file mode 100755
index 000000000000..02aa9f8a7728
--- /dev/null
+++ b/sys/contrib/openzfs/scripts/zfs_prepare_disk
@@ -0,0 +1,17 @@
+#!/bin/sh
+#
+# This is an optional helper script that is automatically called by libzfs
+# before a disk is about to be added into the pool. It can be modified by
+# the user to run whatever commands are necessary to prepare a disk for
+# inclusion into the pool. For example, users can add lines to this
+# script to do things like update the drive's firmware or check the drive's
+# health. The script is optional and can be removed if it is not needed.
+#
+# See the zfs_prepare_disk(8) man page for details.
+#
+# Example:
+#
+# echo "Prepare disk $VDEV_PATH ($VDEV_UPATH) for $VDEV_PREPARE in $POOL_NAME"
+#
+
+exit 0
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index 2252e46df3a8..8bc55a1b4b47 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -122,10 +122,10 @@ tags = ['functional', 'fallocate']
[tests/functional/fault:Linux]
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
- 'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
- 'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
- 'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
- 'zpool_status_-s']
+ 'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
+ 'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
+ 'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
+ 'scrub_after_resilver', 'zpool_status_-s']
tags = ['functional', 'fault']
[tests/functional/features/large_dnode:Linux]
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index 5d1360380de5..4608e87522a3 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -328,6 +328,7 @@ if os.environ.get('CI') == 'true':
'fault/auto_online_001_pos': ['SKIP', ci_reason],
'fault/auto_online_002_pos': ['SKIP', ci_reason],
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
+ 'fault/auto_replace_002_pos': ['SKIP', ci_reason],
'fault/auto_spare_ashift': ['SKIP', ci_reason],
'fault/auto_spare_shared': ['SKIP', ci_reason],
'procfs/pool_state': ['SKIP', ci_reason],
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
index fa545e06bbf3..648f2203dfba 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
@@ -130,12 +130,14 @@ export SYSTEM_FILES_LINUX='attr
chattr
exportfs
fallocate
+ flock
free
getfattr
groupadd
groupdel
groupmod
hostid
+ logger
losetup
lsattr
lsblk
@@ -145,21 +147,20 @@ export SYSTEM_FILES_LINUX='attr
md5sum
mkswap
modprobe
+ mountpoint
mpstat
nsenter
parted
perf
setfattr
+ setpriv
sha256sum
udevadm
unshare
useradd
userdel
usermod
- setpriv
- mountpoint
- flock
- logger'
+ wipefs'
export ZFS_FILES='zdb
zfs
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
index 844caa17d8ed..b4d2b91dd476 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -37,6 +37,12 @@
. ${STF_SUITE}/include/math.shlib
. ${STF_SUITE}/include/blkdev.shlib
+# On AlmaLinux 9 we will see $PWD = '.' instead of the full path. This causes
+# some tests to fail. Fix it up here.
+if [ "$PWD" = "." ] ; then
+ PWD="$(readlink -f $PWD)"
+fi
+
#
# Apply constrained path when available. This is required since the
# PATH may have been modified by sudo's secure_path behavior.
@@ -3334,6 +3340,21 @@ function set_tunable_impl
esac
}
+function save_tunable
+{
+ [[ ! -d $TEST_BASE_DIR ]] && return 1
+ [[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
+ echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
+}
+
+function restore_tunable
+{
+ [[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
+ val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
+ set_tunable64 "$1" "$val"
+ rm $TEST_BASE_DIR/tunable-$1
+}
+
#
# Get a global system tunable
#
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index 8010a9451597..a0edad14d028 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -89,7 +89,8 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
-VOL_USE_BLK_MQ UNSUPPORTED UNSUPPORTED
+VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
+BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index 158401e078aa..87b50f59ca7a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -1431,6 +1431,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/fault/auto_online_001_pos.ksh \
functional/fault/auto_online_002_pos.ksh \
functional/fault/auto_replace_001_pos.ksh \
+ functional/fault/auto_replace_002_pos.ksh \
functional/fault/auto_spare_001_pos.ksh \
functional/fault/auto_spare_002_pos.ksh \
functional/fault/auto_spare_ashift.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
index 7ac13adb6325..b985445a5d12 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
@@ -31,4 +31,8 @@ verify_runnable "global"
default_cleanup_noexit
+if tunable_exists BCLONE_ENABLED ; then
+ log_must restore_tunable BCLONE_ENABLED
+fi
+
log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
index 512f5a0644df..58441bf8f3ad 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
@@ -33,4 +33,9 @@ fi
verify_runnable "global"
+if tunable_exists BCLONE_ENABLED ; then
+ log_must save_tunable BCLONE_ENABLED
+ log_must set_tunable32 BCLONE_ENABLED 1
+fi
+
log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh
index c35ca8e8c92c..c7c133a219cd 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_007_pos.ksh
@@ -34,6 +34,7 @@
# STRATEGY:
# 1. Create a pool with a known feature set.
# 2. Verify only those features are active/enabled.
+# 3. Do this for all known feature sets
#
verify_runnable "global"
@@ -47,8 +48,11 @@ log_onexit cleanup
log_assert "creates a pool with a specified feature set enabled"
-log_must zpool create -f -o compatibility=compat-2020 $TESTPOOL $DISKS
-check_feature_set $TESTPOOL compat-2020
-log_must zpool destroy -f $TESTPOOL
+for compat in "$ZPOOL_COMPAT_DIR"/*
+do
+ log_must zpool create -f -o compatibility="${compat##*/}" $TESTPOOL $DISKS
+ check_feature_set $TESTPOOL "${compat##*/}"
+ log_must zpool destroy -f $TESTPOOL
+done
log_pass "creates a pool with a specified feature set enabled"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
index e98b5e8b2214..9c76a8780b4a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/misc.cfg
@@ -29,7 +29,7 @@
#
if is_linux; then
- # these are the set of setable ZFS properties
+ # these are the set of settable ZFS properties
PROP_NAMES="\
acltype atime \
checksum compression devices \
@@ -81,7 +81,7 @@ elif is_freebsd; then
hidden"
else
- # these are the set of setable ZFS properties
+ # these are the set of settable ZFS properties
PROP_NAMES="\
aclinherit aclmode atime \
checksum compression devices \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
index 081e6c18430d..ae56ee9919bf 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
@@ -34,13 +34,14 @@
# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
# This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
-# 3. Export a pool
+# 3. Export the pool
# 4. Wipe and offline the scsi_debug disk
-# 5. Import pool with missing disk
+# 5. Import the pool with missing disk
# 6. Re-online the wiped scsi_debug disk
-# 7. Verify the ZED detects the new unused disk and adds it back to the pool
+# 7. Verify ZED detects the new blank disk and replaces the missing vdev
+# 8. Verify that the scsi_debug disk was re-partitioned
#
-# Creates a raidz1 zpool using persistent disk path names
+# Creates a raidz1 zpool using persistent /dev/disk/by-vdev path names
# (ie not /dev/sdc)
#
# Auto-replace is opt in, and matches by phys_path.
@@ -83,11 +84,27 @@ log_must zpool create -f $TESTPOOL raidz1 $SD_DEVICE $DISK1 $DISK2 $DISK3
log_must zpool set autoreplace=on $TESTPOOL
# Add some data to the pool
-log_must mkfile $FSIZE /$TESTPOOL/data
+log_must zfs create $TESTPOOL/fs
+log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 Z
log_must zpool export $TESTPOOL
+# Record the partition UUID for later comparison
+part_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
+ --value /dev/disk/by-id/$SD_DEVICE_ID)
+[[ -z "$part_uuid" ]] || log_note original disk GPT uuid ${part_uuid}
+
+#
# Wipe and offline the disk
+#
+# Note that it is not enough to zero the disk to expunge the partitions.
+# You also need to inform the kernel (e.g., 'hdparm -z' or 'partprobe').
+#
+# Using partprobe is overkill and hdparm is not as common as wipefs. So
+# we use wipefs which lets the kernel know the partition was removed
+# from the device (i.e., calls BLKRRPART ioctl).
+#
log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
+log_must /usr/sbin/wipefs -a /dev/disk/by-id/$SD_DEVICE_ID
remove_disk $SD
block_device_wait
@@ -106,4 +123,18 @@ log_must wait_replacing $TESTPOOL 60
# Validate auto-replace was successful
log_must check_state $TESTPOOL "" "ONLINE"
+#
+# Confirm the partition UUID changed so we know the new disk was relabeled
+#
+# Note: some older versions of udevadm don't support "--property" option so
+# we'll # skip this test when it is not supported
+#
+if [ ! -z "$part_uuid" ]; then
+ new_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
+ --value /dev/disk/by-id/$SD_DEVICE_ID)
+ log_note new disk GPT uuid ${new_uuid}
+ [[ "$part_uuid" = "$new_uuid" ]] && \
+ log_fail "The new disk was not relabeled as expected"
+fi
+
log_pass "Auto-replace test successful"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh
new file mode 100755
index 000000000000..2259e604317b
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_replace_002_pos.ksh
@@ -0,0 +1,192 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+# Copyright (c) 2023 by Klara, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Replace Test.
+# Verifys that auto-replace works with by-id paths.
+#
+# STRATEGY:
+# 1. Update /etc/zfs/vdev_id.conf with scsidebug alias for a persistent path.
+# This creates keys ID_VDEV and ID_VDEV_PATH and set phys_path="scsidebug".
+# 2. Create a pool and set autoreplace=on (auto-replace is opt-in)
+# 3. Export the pool
+# 4. Wipe and offline the scsi_debug disk
+# 5. Import the pool with missing disk
+# 6. Re-online the wiped scsi_debug disk with a new serial number
+# 7. Verify ZED detects the new blank disk and replaces the missing vdev
+# 8. Verify that the scsi_debug disk was re-partitioned
+#
+# Creates a raidz1 zpool using persistent /dev/disk/by-id path names
+#
+# Auto-replace is opt in, and matches by phys_path.
+#
+
+verify_runnable "both"
+
+if ! is_physical_device $DISKS; then
+ log_unsupported "Unsupported disks for this test."
+fi
+
+function cleanup
+{
+ zpool status $TESTPOOL
+ destroy_pool $TESTPOOL
+ sed -i '/alias scsidebug/d' $VDEVID_CONF
+ unload_scsi_debug
+}
+
+#
+# Wait until a vdev transitions to its replacement vdev
+#
+# Return 0 when vdev reaches expected state, 1 on timeout.
+#
+# Note: index +2 is to skip over root and raidz-0 vdevs
+#
+function wait_vdev_online # pool index oldguid timeout
+{
+ typeset pool=$1
+ typeset -i index=$2+2
+ typeset guid=$3
+ typeset timeout=${4:-60}
+ typeset -i i=0
+
+ while [[ $i -lt $timeout ]]; do
+ vdev_guids=( $(zpool get -H -o value guid $pool all-vdevs) )
+
+ if [ "${vdev_guids[$index]}" != "${guid}" ]; then
+ log_note "new vdev[$((index-2))]: ${vdev_guids[$index]}, replacing ${guid}"
+ return 0
+ fi
+
+ i=$((i+1))
+ sleep 1
+ done
+
+ return 1
+}
+log_assert "automated auto-replace with by-id paths"
+log_onexit cleanup
+
+load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
+SD=$(get_debug_device)
+SD_DEVICE_ID=$(get_persistent_disk_name $SD)
+SD_HOST=$(get_scsi_host $SD)
+
+# Register vdev_id alias for scsi_debug device to create a persistent path
+echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
+block_device_wait
+
+SD_DEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | \
+ awk -F'=' '/ID_VDEV=/ {print $2; exit}')
+[ -z $SD_DEVICE ] && log_fail "vdev rule was not registered properly"
+
+log_must zpool events -c
+log_must zpool create -f $TESTPOOL raidz1 $SD_DEVICE_ID $DISK1 $DISK2 $DISK3
+
+vdev_guid=$(zpool get guid -H -o value $TESTPOOL $SD_DEVICE_ID)
+log_note original vdev guid ${vdev_guid}
+
+# Auto-replace is opt-in so need to set property
+log_must zpool set autoreplace=on $TESTPOOL
+
+# Add some data to the pool
+log_must zfs create $TESTPOOL/fs
+log_must fill_fs /$TESTPOOL/fs 4 100 4096 512 Z
+log_must zpool export $TESTPOOL
+
+# Record the partition UUID for later comparison
+part_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
+ --value /dev/disk/by-id/$SD_DEVICE_ID)
+[[ -z "$part_uuid" ]] || log_note original disk GPT uuid ${part_uuid}
+
+#
+# Wipe and offline the disk
+#
+# Note that it is not enough to zero the disk to expunge the partitions.
+# You also need to inform the kernel (e.g., 'hdparm -z' or 'partprobe').
+#
+# Using partprobe is overkill and hdparm is not as common as wipefs. So
+# we use wipefs which lets the kernel know the partition was removed
+# from the device (i.e., calls BLKRRPART ioctl).
+#
+log_must dd if=/dev/zero of=/dev/disk/by-id/$SD_DEVICE_ID bs=1M count=$SDSIZE
+log_must /usr/sbin/wipefs -a /dev/disk/by-id/$SD_DEVICE_ID
+remove_disk $SD
+block_device_wait
+
+# Re-import pool with drive missing
+log_must zpool import $TESTPOOL
+log_must check_state $TESTPOOL "" "DEGRADED"
+block_device_wait
+
+#
+# Online an empty disk in the same physical location, with a different by-id
+# symlink. We use vpd_use_hostno to make sure the underlying serial number
+# changes for the new disk which in turn gives us a different by-id path.
+#
+# The original names were something like:
+# /dev/disk/by-id/scsi-SLinux_scsi_debug_16000-part1
+# /dev/disk/by-id/wwn-0x33333330000007d0-part1
+#
+# This new inserted disk, will have different links like:
+# /dev/disk/by-id/scsi-SLinux_scsi_debug_2000-part1
+# /dev/disk/by-id/wwn-0x0x3333333000003e80 -part1
+#
+echo '0' > /sys/bus/pseudo/drivers/scsi_debug/vpd_use_hostno
+
+insert_disk $SD $SD_HOST
+
+# make sure the physical path points to the same scsi-debug device
+SD_DEVICE_ID=$(get_persistent_disk_name $SD)
+echo "alias scsidebug /dev/disk/by-id/$SD_DEVICE_ID" >>$VDEVID_CONF
+block_device_wait
+
+# Wait for the new disk to be online and replaced
+log_must wait_vdev_online $TESTPOOL 0 $vdev_guid 45
+log_must wait_replacing $TESTPOOL 45
+
+# Validate auto-replace was successful
+log_must check_state $TESTPOOL "" "ONLINE"
+
+#
+# Confirm the partition UUID changed so we know the new disk was relabeled
+#
+# Note: some older versions of udevadm don't support "--property" option so
+# we'll # skip this test when it is not supported
+#
+if [ ! -z "$part_uuid" ]; then
+ new_uuid=$(udevadm info --query=property --property=ID_PART_TABLE_UUID \
+ --value /dev/disk/by-id/$SD_DEVICE_ID)
+ log_note new disk GPT uuid ${new_uuid}
+ [[ "$part_uuid" = "$new_uuid" ]] && \
+ log_fail "The new disk was not relabeled as expected"
+fi
+
+log_pass "automated auto-replace with by-id paths"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
index 7a02eb68abda..bae876379177 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/procfs/pool_state.ksh
@@ -141,7 +141,11 @@ remove_disk $SDISK
# background since the command will hang when the pool gets suspended. The
# command will resume and exit after we restore the missing disk later on.
zpool scrub $TESTPOOL2 &
-sleep 3 # Give the scrub some time to run before we check if it fails
+# Once we trigger the zpool scrub, all zpool/zfs command gets stuck for 180 seconds.
+# Post 180 seconds zpool/zfs commands gets start executing however few more seconds(10s)
+# it take to update the status.
+# hence sleeping for 200 seconds so that we get the correct status.
+sleep 200 # Give the scrub some time to run before we check if it fails
log_must check_all $TESTPOOL2 "SUSPENDED"