aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2026-04-18 22:21:01 +0000
committerMartin Matuska <mm@FreeBSD.org>2026-04-18 22:22:45 +0000
commitd8fbbd371ca11d9ad4b29b9d3a316885a5da0b15 (patch)
tree91076f59280743f162f469d55e3a9a3fae488f38 /sys
parent1c50cb1d75625bea2ba928697ea1cbafa29dd245 (diff)
parent1644e2ffd2640fa3e2c191ceaf048a5fc8399493 (diff)
Diffstat (limited to 'sys')
-rw-r--r--sys/contrib/openzfs/.github/workflows/checkstyle.yaml7
-rw-r--r--sys/contrib/openzfs/.github/workflows/codeql.yml2
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py33
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh21
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh8
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh22
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh11
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh22
-rwxr-xr-xsys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh4
-rw-r--r--sys/contrib/openzfs/.github/workflows/smatch.yml4
-rw-r--r--sys/contrib/openzfs/.github/workflows/zfs-arm.yml40
-rw-r--r--sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml23
-rw-r--r--sys/contrib/openzfs/.github/workflows/zfs-qemu.yml42
-rw-r--r--sys/contrib/openzfs/.github/workflows/zloop.yml7
-rw-r--r--sys/contrib/openzfs/AUTHORS1
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c4
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c146
-rw-r--r--sys/contrib/openzfs/cmd/zfs/zfs_main.c7
-rw-r--r--sys/contrib/openzfs/cmd/zinject/translate.c35
-rw-r--r--sys/contrib/openzfs/cmd/zinject/zinject.c4
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_main.c21
-rw-r--r--sys/contrib/openzfs/cmd/zpool/zpool_vdev.c234
-rw-r--r--sys/contrib/openzfs/cmd/ztest.c11
-rw-r--r--sys/contrib/openzfs/config/deb.am20
-rw-r--r--sys/contrib/openzfs/config/kernel-copy-from-user-inatomic.m430
-rw-r--r--sys/contrib/openzfs/config/kernel.m42
-rw-r--r--sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install1
-rw-r--r--sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py4
-rw-r--r--sys/contrib/openzfs/contrib/pyzfs/setup.py.in4
-rw-r--r--sys/contrib/openzfs/include/libzfs.h2
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h73
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h1
-rw-r--r--sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h4
-rw-r--r--sys/contrib/openzfs/include/os/linux/spl/sys/zone.h58
-rw-r--r--sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h14
-rw-r--r--sys/contrib/openzfs/include/sys/dbuf.h2
-rw-r--r--sys/contrib/openzfs/include/sys/dmu.h6
-rw-r--r--sys/contrib/openzfs/include/sys/dmu_zfetch.h1
-rw-r--r--sys/contrib/openzfs/include/sys/dnode.h13
-rw-r--r--sys/contrib/openzfs/include/sys/dsl_crypt.h3
-rw-r--r--sys/contrib/openzfs/include/sys/fs/zfs.h8
-rw-r--r--sys/contrib/openzfs/include/sys/spa.h2
-rw-r--r--sys/contrib/openzfs/include/sys/spa_impl.h1
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_draid.h7
-rw-r--r--sys/contrib/openzfs/include/sys/vdev_raidz_impl.h1
-rw-r--r--sys/contrib/openzfs/include/zfeature_common.h1
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs.abi43
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c52
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c10
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_pool.c170
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_status.c43
-rw-r--r--sys/contrib/openzfs/lib/libzfs/libzfs_util.c6
-rw-r--r--sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c19
-rw-r--r--sys/contrib/openzfs/man/Makefile.am16
-rw-r--r--sys/contrib/openzfs/man/man1/dbufstat.1233
-rw-r--r--sys/contrib/openzfs/man/man4/zfs.411
-rw-r--r--sys/contrib/openzfs/man/man7/vdevprops.726
-rw-r--r--sys/contrib/openzfs/man/man7/zfsprops.792
-rw-r--r--sys/contrib/openzfs/man/man7/zpool-features.727
-rw-r--r--sys/contrib/openzfs/man/man7/zpoolconcepts.736
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-load-key.89
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-set.820
-rw-r--r--sys/contrib/openzfs/man/man8/zfs-zone.815
-rw-r--r--sys/contrib/openzfs/man/man8/zinject.810
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-create.835
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-list.84
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-offline.87
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-resilver.83
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-scrub.84
-rw-r--r--sys/contrib/openzfs/man/man8/zpool-status.84
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c8
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c43
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c84
-rw-r--r--sys/contrib/openzfs/module/os/linux/spl/spl-zone.c413
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c50
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c57
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c4
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c240
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c39
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c509
-rw-r--r--sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c2
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfeature_common.c13
-rw-r--r--sys/contrib/openzfs/module/zcommon/zfs_prop.c15
-rw-r--r--sys/contrib/openzfs/module/zcommon/zpool_prop.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/abd.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/dbuf.c78
-rw-r--r--sys/contrib/openzfs/module/zfs/ddt_log.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu.c79
-rw-r--r--sys/contrib/openzfs/module/zfs/dmu_zfetch.c69
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/dnode_sync.c105
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_crypt.c15
-rw-r--r--sys/contrib/openzfs/module/zfs/dsl_deleg.c13
-rw-r--r--sys/contrib/openzfs/module/zfs/spa.c58
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_errlog.c2
-rw-r--r--sys/contrib/openzfs/module/zfs/spa_log_spacemap.c8
-rw-r--r--sys/contrib/openzfs/module/zfs/space_map.c3
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev.c68
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_draid.c423
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_label.c23
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_mirror.c13
-rw-r--r--sys/contrib/openzfs/module/zfs/vdev_raidz.c65
-rw-r--r--sys/contrib/openzfs/module/zfs/zfs_ioctl.c318
-rw-r--r--sys/contrib/openzfs/module/zfs/zio.c6
-rw-r--r--sys/contrib/openzfs/module/zfs/zvol.c51
-rw-r--r--sys/contrib/openzfs/rpm/generic/zfs.spec.in6
-rwxr-xr-xsys/contrib/openzfs/scripts/spdxcheck.pl1
-rw-r--r--sys/contrib/openzfs/tests/runfiles/common.run40
-rw-r--r--sys/contrib/openzfs/tests/runfiles/linux.run7
-rw-r--r--sys/contrib/openzfs/tests/runfiles/sanity.run6
-rwxr-xr-xsys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in6
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore1
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/cmd/clone_after_trunc.c117
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am46
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_trunc.ksh31
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_userprop.ksh72
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_005_pos.ksh149
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_errinfo_001_neg.ksh103
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg2
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg1
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh84
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh63
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh5
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_draid_fgroups.ksh163
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib65
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh141
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh157
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh152
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_width.ksh91
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh3
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/cleanup.ksh46
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/setup.ksh99
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid.cfg33
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_001_pos.ksh85
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_002_pos.ksh83
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_003_pos.ksh100
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_004_pos.ksh91
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_005_neg.ksh72
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_006_pos.ksh109
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_007_pos.ksh110
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_008_pos.ksh128
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_009_pos.ksh149
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_010_pos.ksh157
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_011_neg.ksh153
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_012_pos.ksh120
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_013_pos.ksh122
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_014_pos.ksh116
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_015_pos.ksh114
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_016_pos.ksh132
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_017_neg.ksh125
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_018_pos.ksh129
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_019_neg.ksh141
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_020_neg.ksh171
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_021_neg.ksh109
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_022_neg.ksh154
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_023_pos.ksh131
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_024_neg.ksh144
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_025_pos.ksh102
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_026_pos.ksh112
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_027_pos.ksh103
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_028_neg.ksh103
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_029_neg.ksh120
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_030_pos.ksh183
-rwxr-xr-xsys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_031_pos.ksh110
-rw-r--r--sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_common.kshlib237
-rw-r--r--sys/modules/zfs/zfs_config.h4
-rw-r--r--sys/modules/zfs/zfs_gitrev.h2
170 files changed, 9623 insertions, 846 deletions
diff --git a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml
index a01a4fe8587c..ddcc2b8581fa 100644
--- a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml
+++ b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml
@@ -56,9 +56,10 @@ jobs:
- name: Prepare artifacts
if: failure() && steps.CheckABI.outcome == 'failure'
run: |
- find -name *.abi | tar -cf abi_files.tar -T -
- - uses: actions/upload-artifact@v4
+ find -name *.abi | tar -cjf abi_files.tar.bz2 -T -
+ - uses: actions/upload-artifact@v7
if: failure() && steps.CheckABI.outcome == 'failure'
with:
name: New ABI files (use only if you're sure about interface changes)
- path: abi_files.tar
+ path: abi_files.tar.bz2
+ archive: false
diff --git a/sys/contrib/openzfs/.github/workflows/codeql.yml b/sys/contrib/openzfs/.github/workflows/codeql.yml
index e975d7dd00b9..689fe71fddc3 100644
--- a/sys/contrib/openzfs/.github/workflows/codeql.yml
+++ b/sys/contrib/openzfs/.github/workflows/codeql.yml
@@ -28,7 +28,7 @@ jobs:
echo "MAKEFLAGS=-j$(nproc)" >> $GITHUB_ENV
- name: Checkout repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
index 059d6ad3872b..b1910ab630af 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
+++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
@@ -3,13 +3,16 @@
"""
Determine the CI type based on the change list and commit message.
-Prints "quick" if (explicity required by user):
+Output format: "<type> <source>" where source is "manual" (from
+ZFS-CI-Type commit tag) or "auto" (from file change heuristics).
+
+Prints "quick manual" if:
- the *last* commit message contains 'ZFS-CI-Type: quick'
-or if (heuristics):
+or "quick auto" if (heuristics):
- the files changed are not in the list of specified directories, and
- all commit messages do not contain 'ZFS-CI-Type: (full|linux|freebsd)'
-Otherwise prints "full".
+Otherwise prints "full auto" (or "<type> manual" if explicitly requested).
"""
import sys
@@ -58,9 +61,10 @@ if __name__ == '__main__':
head, base = sys.argv[1:3]
- def output_type(type, reason):
- print(f'{prog}: will run {type} CI: {reason}', file=sys.stderr)
- print(type)
+ def output_type(type, source, reason):
+ print(f'{prog}: will run {type} CI ({source}): {reason}',
+ file=sys.stderr)
+ print(f'{type} {source}')
sys.exit(0)
# check last (HEAD) commit message
@@ -70,7 +74,8 @@ if __name__ == '__main__':
for line in last_commit_message_raw.stdout.decode().splitlines():
if line.strip().lower() == 'zfs-ci-type: quick':
- output_type('quick', f'requested by HEAD commit {head}')
+ output_type('quick', 'manual',
+ f'requested by HEAD commit {head}')
# check all commit messages
all_commit_message_raw = subprocess.run([
@@ -84,11 +89,14 @@ if __name__ == '__main__':
if line.startswith('ZFS-CI-Commit:'):
commit_ref = line.lstrip('ZFS-CI-Commit:').rstrip()
if line.strip().lower() == 'zfs-ci-type: freebsd':
- output_type('freebsd', f'requested by commit {commit_ref}')
+ output_type('freebsd', 'manual',
+ f'requested by commit {commit_ref}')
if line.strip().lower() == 'zfs-ci-type: linux':
- output_type('linux', f'requested by commit {commit_ref}')
+ output_type('linux', 'manual',
+ f'requested by commit {commit_ref}')
if line.strip().lower() == 'zfs-ci-type: full':
- output_type('full', f'requested by commit {commit_ref}')
+ output_type('full', 'manual',
+ f'requested by commit {commit_ref}')
# check changed files
changed_files_raw = subprocess.run([
@@ -104,9 +112,10 @@ if __name__ == '__main__':
for r in FULL_RUN_REGEX:
if r.match(f):
output_type(
- 'full',
+ 'full', 'auto',
f'changed file "{f}" matches pattern "{r.pattern}"'
)
# catch-all
- output_type('quick', 'no changed file matches full CI patterns')
+ output_type('quick', 'auto',
+ 'no changed file matches full CI patterns')
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
index 3d111561272d..9d6cc3c6d3e2 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
@@ -6,6 +6,27 @@
set -eu
+# The default runner has a bunch of development tools and other things
+# that we do not need. Remove them here to free up a total of 35GB.
+#
+# First remove packages - this frees up ~10GB
+echo "Disk space before purge:"
+df -h /
+sudo docker image prune --all --force
+sudo docker builder prune -a
+unneeded="microsoft-edge-stable|azure-cli|google-cloud|google-chrome-stable|"\
+"temurin|llvm|firefox|mysql-server|snapd|android|dotnet|haskell|ghcup|"\
+"powershell|julia|swift|miniconda|chromium"
+sudo apt-get -y remove $(dpkg-query -f '${binary:Package}\n' -W | grep -E "'$unneeded'")
+sudo apt-get -y autoremove
+
+# Next, remove unneeded files in /usr. This frees up an additional 25GB.
+sudo rm -fr /usr/local/lib/android /usr/share/dotnet /usr/local/.ghcup \
+ /usr/share/swift /usr/local/share/powershell /usr/local/julia* \
+ /usr/share/miniconda /usr/local/share/chromium
+echo "Disk space after:"
+df -h /
+
# The default 'azure.archive.ubuntu.com' mirrors can be really slow.
# Prioritize the official Ubuntu mirrors.
#
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
index 7cf33a9c49ea..3d78885a9ca3 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
@@ -96,8 +96,8 @@ case "$OS" in
KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
NIC="rtl8139"
;;
- freebsd14-3r)
- FreeBSD="14.3-RELEASE"
+ freebsd14-4r)
+ FreeBSD="14.4-RELEASE"
OSNAME="FreeBSD $FreeBSD"
OSv="freebsd14.0"
URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"
@@ -111,8 +111,8 @@ case "$OS" in
KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"
NIC="rtl8139"
;;
- freebsd14-3s)
- FreeBSD="14.3-STABLE"
+ freebsd14-4s)
+ FreeBSD="14.4-STABLE"
OSNAME="FreeBSD $FreeBSD"
OSv="freebsd14.0"
URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
index c8e1a015abd9..6a83ef45fd26 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
@@ -3,8 +3,11 @@
######################################################################
# 3) install dependencies for compiling and loading
#
-# $1: OS name (like 'fedora41')
-# $2: (optional) Experimental Fedora kernel version, like "6.14" to
+# qemu-3-deps-vm.sh [--poweroff] OS_NAME [FEDORA_VERSION]
+#
+# --poweroff: Power off the VM after installing dependencies
+# OS_NAME: OS name (like 'fedora41')
+# FEDORA_VERSION: (optional) Experimental Fedora kernel version, like "6.14" to
# install instead of Fedora defaults.
######################################################################
@@ -153,6 +156,12 @@ function install_fedora_experimental_kernel {
sudo dnf -y copr disable @kernel-vanilla/mainline
}
+POWEROFF=""
+if [ "$1" == "--poweroff" ] ; then
+ POWEROFF=1
+ shift
+fi
+
# Install dependencies
case "$1" in
almalinux8)
@@ -212,6 +221,11 @@ case "$1" in
sudo apt-get install -yq linux-tools-common libtirpc-dev \
linux-modules-extra-$(uname -r)
sudo apt-get install -yq dh-sequence-dkms
+
+ # Need 'build-essential' explicitly for ARM builder
+ # https://github.com/actions/runner-images/issues/9946
+ sudo apt-get install -yq build-essential
+
echo "##[endgroup]"
echo "##[group]Delete Ubuntu OpenZFS modules"
for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done
@@ -306,5 +320,7 @@ esac
# reset cloud-init configuration and poweroff
sudo cloud-init clean --logs
-sleep 2 && sudo poweroff &
+if [ "$POWEROFF" == "1" ] ; then
+ sleep 2 && sudo poweroff &
+fi
exit 0
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
index 38255cf39966..bbfa2ec85b8f 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
@@ -350,7 +350,16 @@ fi
# save some sysinfo
uname -a > /var/tmp/uname.txt
-cd $HOME/zfs
+# Check if we're running this script from within a VM or on the runner itself.
+# Most of the time we will be running in a VM, but the ARM builder actually
+# runs this script on the runner. If we happen to be running on the ARM
+# runner, we will start in the ZFS source directory. If we're running on a VM
+# then we'll just start in our home directory, and will need to 'cd' into our
+# source directory.
+if [ ! -e META ] ; then
+ cd $HOME/zfs
+fi
+
export PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin"
extra=""
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh
index 98a5c24c2521..5e18f4bf49c1 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh
@@ -13,16 +13,29 @@ source env.txt
mkdir -p $RESPATH
+TARNAME=qemu-$OS
+
# check if building the module has failed
if [ -z ${VMs:-} ]; then
cd $RESPATH
echo ":exclamation: ZFS module didn't build successfully :exclamation:" \
| tee summary.txt | tee /tmp/summary.txt
cp /var/tmp/*.txt .
- tar cf /tmp/qemu-$OS.tar -C $RESPATH -h . || true
+
+ # rename /var/tmp/test_results to /var/tmp/qemu-$OS
+ mv $RESPATH $(dirname $RESPATH)/$TARNAME
+ tar cjf /tmp/$TARNAME.tar.bz2 -C $(dirname $RESPATH) -h $TARNAME || true
+ # move it back to /var/tmp/test_results (needed for next script)
+ mv $(dirname $RESPATH)/$TARNAME $RESPATH
+
exit 0
fi
+if ! grep -q vm /etc/hosts ; then
+ echo "No vm* hostnames, VMs probably didn't startup"
+ exit 0
+fi
+
# build was okay
BASE="$HOME/work/zfs/zfs"
MERGE="$BASE/.github/workflows/scripts/merge_summary.awk"
@@ -121,4 +134,9 @@ if [ ! -s uname.txt ]; then
fi
# artifact ready now
-tar cf /tmp/qemu-$OS.tar -C $RESPATH -h . || true
+#
+# rename /var/tmp/test_results to /var/tmp/qemu-$OS
+mv $RESPATH $(dirname $RESPATH)/$TARNAME
+tar cjf /tmp/$TARNAME.tar.bz2 -C $(dirname $RESPATH) -h $TARNAME || true
+# move it back to /var/tmp/test_results (needed for next script)
+mv $(dirname $RESPATH)/$TARNAME $RESPATH
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh
index 737dda01b565..1200f4de2b94 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-9-summary-page.sh
@@ -33,7 +33,9 @@ function send2github() {
# first call, generate all summaries
if [ ! -f out-1.md ]; then
logfile="1"
- for tarfile in Logs-functional-*/qemu-*.tar; do
+ # The bz2 files are put into directories with the same name, like:
+ # "qemu-debian12.tar.bz2/qemu-debian12.tar.bz2"
+ for tarfile in qemu-*.tar.bz2/qemu-*.tar.bz2; do
rm -rf vm* *.txt
if [ ! -s "$tarfile" ]; then
output "\n## Functional Tests: unknown\n"
diff --git a/sys/contrib/openzfs/.github/workflows/smatch.yml b/sys/contrib/openzfs/.github/workflows/smatch.yml
index ac6454244f93..305a1f0179bd 100644
--- a/sys/contrib/openzfs/.github/workflows/smatch.yml
+++ b/sys/contrib/openzfs/.github/workflows/smatch.yml
@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: Checkout smatch
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
repository: error27/smatch
ref: master
@@ -26,7 +26,7 @@ jobs:
cd $GITHUB_WORKSPACE/smatch
make -j$(nproc)
- name: Checkout OpenZFS
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
path: zfs
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-arm.yml b/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
new file mode 100644
index 000000000000..6039e4736c42
--- /dev/null
+++ b/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
@@ -0,0 +1,40 @@
+name: zfs-arm
+
+on:
+ push:
+ pull_request:
+ workflow_dispatch:
+
+jobs:
+ zfs-arm:
+ name: ZFS ARM build
+ runs-on: ubuntu-24.04-arm
+ steps:
+ - uses: actions/checkout@v6
+ with:
+ fetch-depth: 0
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Install dependencies
+ timeout-minutes: 20
+ run: |
+ sudo apt-get -y remove firefox || true
+ .github/workflows/scripts/qemu-3-deps-vm.sh ubuntu24
+
+ # We're running the VM scripts locally on the runner, so need to fix
+ # up hostnames to make it work.
+ for ((i=0; i<=3; i++)); do
+ echo "127.0.0.1 vm$i" | sudo tee -a /etc/hosts
+ done
+ - name: Build modules
+ timeout-minutes: 30
+ run: |
+ .github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24
+
+ # Quick sanity test since we're not running the full ZTS
+ sudo modprobe zfs
+ sudo dmesg | grep -i zfs
+ truncate -s 100M file
+ sudo zpool create tank ./file
+ zpool status
+
+ echo "Built ZFS successfully on ARM"
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
index 537223586366..88d85a06d975 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
@@ -61,7 +61,7 @@ jobs:
os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora42', 'fedora43']
runs-on: ubuntu-24.04
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
@@ -73,7 +73,7 @@ jobs:
- name: Install dependencies
run: |
- .github/workflows/scripts/qemu-3-deps.sh ${{ matrix.os }}
+ .github/workflows/scripts/qemu-3-deps.sh --poweroff ${{ matrix.os }}
- name: Build modules or Test repo
run: |
@@ -104,17 +104,18 @@ jobs:
run: |
rsync -a zfs@vm0:/tmp/repo /tmp || true
.github/workflows/scripts/replace-dupes-with-symlinks.sh /tmp/repo
- tar -cf ${{ matrix.os }}-repo.tar -C /tmp repo
+ tar -cjf ${{ matrix.os }}-repo.tar.bz2 -C /tmp repo
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v7
id: artifact-upload
if: always()
with:
name: ${{ matrix.os }}-repo
- path: ${{ matrix.os }}-repo.tar
+ path: ${{ matrix.os }}-repo.tar.bz2
compression-level: 0
retention-days: 2
if-no-files-found: ignore
+ archive: false
combine_repos:
if: always()
@@ -122,16 +123,16 @@ jobs:
name: "Results"
runs-on: ubuntu-latest
steps:
- - uses: actions/download-artifact@v4
+ - uses: actions/download-artifact@v8
id: artifact-download
if: always()
- name: Test Summary
if: always()
run: |
- for i in $(find . -type f -iname "*.tar") ; do
+ for i in $(find . -type f -iname "*.tar.bz2") ; do
tar -xf $i -C /tmp
done
- tar -cf all-repo.tar -C /tmp repo
+ tar -cjf all-repo.tar.bz2 -C /tmp repo
# If we're installing from a repo, print out the summary of the versions
# that got installed using Markdown.
@@ -146,12 +147,12 @@ jobs:
done
fi
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v7
id: artifact-upload2
if: always()
with:
name: all-repo
- path: all-repo.tar
- compression-level: 0
+ path: all-repo.tar.bz2
retention-days: 5
if-no-files-found: ignore
+ archive: false
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
index a9615abb68d7..f83b319a331f 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
@@ -28,19 +28,20 @@ jobs:
test_os: ${{ steps.os.outputs.os }}
ci_type: ${{ steps.os.outputs.ci_type }}
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Generate OS config and CI type
id: os
run: |
ci_type="default"
+ ci_source="auto"
# determine CI type when running on PR
if ${{ github.event_name == 'pull_request' }}; then
head=${{ github.event.pull_request.head.sha }}
base=${{ github.event.pull_request.base.sha }}
- ci_type=$(python3 .github/workflows/scripts/generate-ci-type.py $head $base)
+ read ci_type ci_source <<< "$(python3 .github/workflows/scripts/generate-ci-type.py $head $base)"
fi
case "$ci_type" in
@@ -51,14 +52,27 @@ jobs:
os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora42", "fedora43", "ubuntu22", "ubuntu24"]'
;;
freebsd)
- os_selection='["freebsd13-5r", "freebsd14-3r", "freebsd13-5s", "freebsd14-3s", "freebsd15-0s", "freebsd16-0c"]'
+ os_selection='["freebsd13-5r", "freebsd14-4r", "freebsd13-5s", "freebsd14-4s", "freebsd15-0s", "freebsd16-0c"]'
;;
*)
# default list
- os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "freebsd14-3r", "freebsd15-0s", "freebsd16-0c", "ubuntu22", "ubuntu24"]'
+ os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "freebsd14-4r", "freebsd15-0s", "freebsd16-0c", "ubuntu22", "ubuntu24"]'
;;
esac
+ # Repository-level override for OS selection.
+ # Set vars.ZTS_OS_OVERRIDE in repo settings to restrict targets
+ # (e.g. '["debian13"]' or '["debian13", "fedora42"]').
+ # Manual ZFS-CI-Type in commit messages bypasses the override.
+ if [ -n "${{ vars.ZTS_OS_OVERRIDE }}" ] && [ "$ci_source" != "manual" ]; then
+ override='${{ vars.ZTS_OS_OVERRIDE }}'
+ if echo "$override" | jq -e 'type == "array"' >/dev/null 2>&1; then
+ os_selection="$override"
+ else
+ echo "::warning::Invalid ZTS_OS_OVERRIDE, using default"
+ fi
+ fi
+
if ${{ github.event.inputs.fedora_kernel_ver != '' }}; then
# They specified a custom kernel version for Fedora.
# Use only Fedora runners.
@@ -84,13 +98,13 @@ jobs:
# debian: debian12, debian13, ubuntu22, ubuntu24
# misc: archlinux, tumbleweed
# FreeBSD variants of november 2025:
- # FreeBSD Release: freebsd13-5r, freebsd14-3r, freebsd15-0r
- # FreeBSD Stable: freebsd13-5s, freebsd14-3s, freebsd15-0s
+ # FreeBSD Release: freebsd13-5r, freebsd14-4r, freebsd15-0r
+ # FreeBSD Stable: freebsd13-5s, freebsd14-4s, freebsd15-0s
# FreeBSD Current: freebsd16-0c
os: ${{ fromJson(needs.test-config.outputs.test_os) }}
runs-on: ubuntu-24.04
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
@@ -104,7 +118,7 @@ jobs:
- name: Install dependencies
timeout-minutes: 60
- run: .github/workflows/scripts/qemu-3-deps.sh ${{ matrix.os }} ${{ github.event.inputs.fedora_kernel_ver }}
+ run: .github/workflows/scripts/qemu-3-deps.sh --poweroff ${{ matrix.os }} ${{ github.event.inputs.fedora_kernel_ver }}
- name: Build modules
timeout-minutes: 30
@@ -125,12 +139,13 @@ jobs:
timeout-minutes: 10
run: .github/workflows/scripts/qemu-7-prepare.sh
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v7
id: artifact-upload
if: always()
with:
name: Logs-functional-${{ matrix.os }}
- path: /tmp/qemu-${{ matrix.os }}.tar
+ path: /tmp/qemu-${{ matrix.os }}.tar.bz2
+ archive: false
if-no-files-found: ignore
- name: Test Summary
@@ -144,10 +159,10 @@ jobs:
needs: [ qemu-vm ]
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
- - uses: actions/download-artifact@v4
+ - uses: actions/download-artifact@v8
- name: Generating summary
run: .github/workflows/scripts/qemu-9-summary-page.sh
- name: Generating summary...
@@ -186,7 +201,8 @@ jobs:
run: .github/workflows/scripts/qemu-9-summary-page.sh 18
- name: Generating summary...
run: .github/workflows/scripts/qemu-9-summary-page.sh 19
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v7
with:
name: Summary Files
path: out-*
+ archive: true
diff --git a/sys/contrib/openzfs/.github/workflows/zloop.yml b/sys/contrib/openzfs/.github/workflows/zloop.yml
index 4ae3ccdc5484..7f76a670af95 100644
--- a/sys/contrib/openzfs/.github/workflows/zloop.yml
+++ b/sys/contrib/openzfs/.github/workflows/zloop.yml
@@ -15,7 +15,7 @@ jobs:
WORK_DIR: /mnt/zloop
CORE_DIR: /mnt/zloop/cores
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Install dependencies
@@ -41,6 +41,7 @@ jobs:
sudo modprobe zfs
- name: Tests
run: |
+ [ -r /etc/hostid ] && [ -s /etc/hostid ] || sudo zgenhostid -f
sudo truncate -s 256G /mnt/vdev
sudo zpool create cipool -m $WORK_DIR -O compression=on -o autotrim=on /mnt/vdev
sudo /usr/share/zfs/zloop.sh -t 600 -I 6 -l -m 1 -c $CORE_DIR -f $WORK_DIR -- -T 120 -P 60
@@ -60,7 +61,7 @@ jobs:
if: failure()
run: |
cat $CORE_DIR/*/ztest.zdb
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v7
if: failure()
with:
name: Logs
@@ -68,7 +69,7 @@ jobs:
/mnt/zloop/*/
!/mnt/zloop/cores/*/vdev/
if-no-files-found: ignore
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v7
if: failure()
with:
name: Pool files
diff --git a/sys/contrib/openzfs/AUTHORS b/sys/contrib/openzfs/AUTHORS
index 7174b7d66d17..11aea0171651 100644
--- a/sys/contrib/openzfs/AUTHORS
+++ b/sys/contrib/openzfs/AUTHORS
@@ -150,6 +150,7 @@ CONTRIBUTORS:
Chris Siden <chris.siden@delphix.com>
Chris Siebenmann <cks.github@cs.toronto.edu>
Christer Ekholm <che@chrekh.se>
+ Christos Longros <chris.longros@gmail.com>
Christian Kohlschütter <christian@kohlschutter.com>
Christian Neukirchen <chneukirchen@gmail.com>
Christian Schwarz <me@cschwarz.com>
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c
index 206caa16baa6..48597d9c3856 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_diagnosis.c
@@ -48,8 +48,6 @@
#define DEFAULT_CHECKSUM_T 600 /* seconds */
#define DEFAULT_IO_N 10 /* events */
#define DEFAULT_IO_T 600 /* seconds */
-#define DEFAULT_SLOW_IO_N 10 /* events */
-#define DEFAULT_SLOW_IO_T 30 /* seconds */
#define CASE_GC_TIMEOUT_SECS 43200 /* 12 hours */
@@ -726,7 +724,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
*/
if (isresource) {
zfs_stats.resource_drops.fmds_value.ui64++;
- fmd_hdl_debug(hdl, "discarding '%s for vdev %llu",
+ fmd_hdl_debug(hdl, "discarding '%s' for vdev %llu",
class, vdev_guid);
return;
}
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
index d68272bea731..8aabf6d3bf75 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
@@ -100,12 +100,16 @@ find_pool(zpool_handle_t *zhp, void *data)
* Find a vdev within a tree with a matching GUID.
*/
static nvlist_t *
-find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
+find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid,
+ uint64_t *parent_guid)
{
- uint64_t guid;
+ uint64_t guid, saved_parent_guid;
nvlist_t **child;
uint_t c, children;
- nvlist_t *ret;
+ nvlist_t *ret = NULL;
+
+ if (parent_guid != NULL)
+ saved_parent_guid = *parent_guid;
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
guid == search_guid) {
@@ -119,8 +123,9 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
- return (ret);
+ if ((ret = find_vdev(zhdl, child[c], search_guid,
+ parent_guid)) != NULL)
+ goto out;
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
@@ -128,8 +133,9 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
- return (ret);
+ if ((ret = find_vdev(zhdl, child[c], search_guid,
+ parent_guid)) != NULL)
+ goto out;
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
@@ -137,11 +143,18 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
- return (ret);
+ if ((ret = find_vdev(zhdl, child[c], search_guid,
+ parent_guid)) != NULL)
+ goto out;
}
return (NULL);
+out:
+ /* If parent_guid was set, don't reset it. */
+ if (ret != NULL && parent_guid != NULL &&
+ saved_parent_guid == *parent_guid)
+ *parent_guid = guid;
+ return (ret);
}
static int
@@ -203,11 +216,12 @@ find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid)
}
/*
- * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
+ * Given a (pool, vdev) GUID pair, find the matching pool, vdev and
+ * its top_guid.
*/
static zpool_handle_t *
-find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
- nvlist_t **vdevp)
+find_by_guid_impl(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
+ nvlist_t **vdevp, uint64_t *top_guid)
{
find_cbdata_t cb;
zpool_handle_t *zhp;
@@ -228,8 +242,11 @@ find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
return (NULL);
}
+ if (top_guid)
+ *top_guid = 0;
if (vdev_guid != 0) {
- if ((*vdevp = find_vdev(zhdl, nvroot, vdev_guid)) == NULL) {
+ if ((*vdevp = find_vdev(zhdl, nvroot, vdev_guid,
+ top_guid)) == NULL) {
zpool_close(zhp);
return (NULL);
}
@@ -239,6 +256,101 @@ find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
}
/*
+ * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
+ */
+static zpool_handle_t *
+find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
+ nvlist_t **vdevp)
+{
+ return (find_by_guid_impl(zhdl, pool_guid, vdev_guid, vdevp, NULL));
+}
+
+/*
+ * Given a (pool, vdev) GUID pair, count the number of faulted vdevs in
+ * its top vdev and return TRUE if the number of failures at i-th device
+ * index in each dRAID failure group equals to the number of failure groups,
+ * which means it's the domain failure, and the vdev is one of those faults.
+ * Otherwise, return FALSE.
+ */
+static boolean_t
+is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl,
+ uint64_t pool_guid, uint64_t vdev_guid)
+{
+ uint64_t guid, top_guid;
+ uint64_t children;
+ nvlist_t *nvtop, *vdev, **child;
+ vdev_stat_t *vs;
+ uint_t i, c, vdev_i = UINT_MAX, width, *nfaults_map = NULL;
+ boolean_t res = B_FALSE;
+
+ for (int try = 0; try < 4; try++) {
+ if (find_by_guid_impl(zhdl, pool_guid, vdev_guid, &vdev,
+ &top_guid) == NULL)
+ return (B_FALSE);
+
+ if (find_by_guid_impl(zhdl, pool_guid, top_guid, &nvtop,
+ NULL) == NULL)
+ return (B_FALSE);
+
+ if (nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN,
+ &child, &width) != 0)
+ return (B_FALSE);
+
+ if (nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_DRAID_NCHILDREN,
+ &children) != 0) /* not dRAID */
+ return (B_FALSE);
+
+ if (width == children) /* dRAID without failure domains */
+ return (B_FALSE);
+
+ if (nfaults_map == NULL)
+ nfaults_map = fmd_hdl_alloc(hdl,
+ children * sizeof (*nfaults_map), FMD_SLEEP);
+ memset(nfaults_map, 0, children * sizeof (*nfaults_map));
+
+ for (c = 0; c < width; c++) {
+ nvlist_lookup_uint64_array(child[c],
+ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &i);
+
+ if (vs->vs_state != VDEV_STATE_HEALTHY)
+ nfaults_map[c % children]++;
+
+ if (vs->vs_state != VDEV_STATE_HEALTHY &&
+ nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
+ &guid) == 0 && guid == vdev_guid)
+ vdev_i = (c % children);
+ }
+
+ for (c = 0; c < children; c++) {
+ if (c == vdev_i &&
+ nfaults_map[c] == (width / children)) {
+ res = B_TRUE;
+ break;
+ }
+ }
+
+ if (res)
+ break;
+
+ /*
+ * No rush with starting resilver, it can be domain failure,
+ * in which case we need to wait a little to allow more devices
+ * to get into faulted state so that we could detect that
+ * it's the domain failure indeed.
+ */
+ sleep(5);
+ }
+
+ fmd_hdl_free(hdl, nfaults_map, children * sizeof (*nfaults_map));
+
+ if (res)
+ fmd_hdl_debug(hdl, "vdev %llu belongs to draid fdomain failure",
+ vdev_guid);
+
+ return (res);
+}
+
+/*
* Given a vdev, attempt to replace it with every known spare until one
* succeeds or we run out of devices to try.
* Return whether we were successful or not in replacing the device.
@@ -446,6 +558,14 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
return;
/*
+ * Resilvering domain failures can take a lot of computing and
+ * I/O bandwidth resources, only to be wasted when the failed
+ * domain component (for example enclosure) is replaced.
+ */
+ if (is_draid_fdomain_failure(hdl, zhdl, pool_guid, vdev_guid))
+ return;
+
+ /*
* If state removed is requested for already removed vdev,
* its a loopback event from spa_async_remove(). Just
* ignore it.
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 48e563181fda..631ddda5c6e6 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -32,6 +32,7 @@
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright 2026 Oxide Computer Company
*/
#include <assert.h>
@@ -8739,12 +8740,6 @@ zfs_do_change_key(int argc, char **argv)
}
}
- if (inheritkey && !nvlist_empty(props)) {
- (void) fprintf(stderr,
- gettext("Properties not allowed for inheriting\n"));
- usage(B_FALSE);
- }
-
argc -= optind;
argv += optind;
diff --git a/sys/contrib/openzfs/cmd/zinject/translate.c b/sys/contrib/openzfs/cmd/zinject/translate.c
index cd157b5928f6..8d8e3870ec4b 100644
--- a/sys/contrib/openzfs/cmd/zinject/translate.c
+++ b/sys/contrib/openzfs/cmd/zinject/translate.c
@@ -165,22 +165,28 @@ initialize_range(err_type_t type, int level, char *range,
record->zi_start = 0;
record->zi_end = -1ULL;
} else {
- char *end;
+ char *comma;
+ int error;
- /* XXX add support for suffixes */
- record->zi_start = strtoull(range, &end, 10);
+ comma = strchr(range, ',');
+ if (comma != NULL)
+ *comma = '\0';
+ error = zfs_nicestrtonum(g_zfs, range,
+ &record->zi_start);
- if (*end == '\0')
- record->zi_end = record->zi_start + 1;
- else if (*end == ',')
- record->zi_end = strtoull(end + 1, &end, 10);
+ if (comma != NULL)
+ *comma = ',';
- if (*end != '\0') {
- (void) fprintf(stderr, "invalid range '%s': must be "
- "a numeric range of the form 'start[,end]'\n",
- range);
- return (-1);
+ if (error != 0)
+ goto bad_range;
+
+ if (comma != NULL) {
+ if (zfs_nicestrtonum(g_zfs, comma + 1,
+ &record->zi_end) != 0)
+ goto bad_range;
+ } else {
+ record->zi_end = record->zi_start + 1;
}
}
@@ -213,6 +219,11 @@ initialize_range(err_type_t type, int level, char *range,
record->zi_level = level;
return (0);
+
+bad_range:
+ (void) fprintf(stderr, "invalid range '%s': must be of the form "
+ "'start[,end]'\n", range);
+ return (-1);
}
int
diff --git a/sys/contrib/openzfs/cmd/zinject/zinject.c b/sys/contrib/openzfs/cmd/zinject/zinject.c
index c2f646f2567d..37ff92a816f4 100644
--- a/sys/contrib/openzfs/cmd/zinject/zinject.c
+++ b/sys/contrib/openzfs/cmd/zinject/zinject.c
@@ -389,7 +389,9 @@ usage(void)
"0.\n"
"\t\t-m\tAutomatically remount underlying filesystem.\n"
"\t\t-r\tInject error over a particular logical range of an\n"
- "\t\t\tobject. Will be translated to the appropriate blkid\n"
+ "\t\t\tobject, specified as 'start[,end]'. Numeric\n"
+ "\t\t\tsuffixes (K, M, G, T, P, E) are accepted.\n"
+ "\t\t\tWill be translated to the appropriate blkid\n"
"\t\t\trange according to the object's properties.\n"
"\t\t-a\tFlush the ARC cache. Can be specified without any\n"
"\t\t\tassociated object.\n"
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index eb76a21352ab..3ed7babc1ca9 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -3528,6 +3528,11 @@ show_import(nvlist_t *config, boolean_t report_error)
"accessed by another system.\n"));
break;
+ case ZPOOL_STATUS_FAULTED_FDOM_R:
+ (void) printf_color(ANSI_YELLOW, gettext("One or more failure "
+ " domains are faulted.\n"));
+ break;
+
case ZPOOL_STATUS_FAULTED_DEV_R:
case ZPOOL_STATUS_FAULTED_DEV_NR:
(void) printf_color(ANSI_YELLOW, gettext("One or more devices "
@@ -8063,7 +8068,7 @@ zpool_do_online(int argc, char **argv)
if ((zhp = zpool_open(g_zfs, poolname)) == NULL) {
(void) fprintf(stderr, gettext("failed to open pool "
- "\"%s\""), poolname);
+ "\"%s\"\n"), poolname);
return (1);
}
@@ -8207,7 +8212,7 @@ zpool_do_offline(int argc, char **argv)
if ((zhp = zpool_open(g_zfs, poolname)) == NULL) {
(void) fprintf(stderr, gettext("failed to open pool "
- "\"%s\""), poolname);
+ "\"%s\"\n"), poolname);
return (1);
}
@@ -10720,6 +10725,18 @@ print_status_reason(zpool_handle_t *zhp, status_cbdata_t *cbp,
"or use 'zpool clear' to mark the device\n\trepaired.\n"));
break;
+ case ZPOOL_STATUS_FAULTED_FDOM_R:
+ (void) snprintf(status, ST_SIZE,
+ gettext("One or more failure domains are faulted. "
+ "The storage devices may be\n\tintact. Sufficient "
+ "replicas exist for the pool to continue functioning\n\t"
+ "in a degraded state.\n"));
+ (void) snprintf(action, AC_SIZE,
+ gettext("Replace the faulted domain device, "
+ "or use 'zpool clear' to mark domain\n\tstorage devices "
+ "repaired.\n"));
+ break;
+
case ZPOOL_STATUS_FAULTED_DEV_NR:
(void) snprintf(status, ST_SIZE,
gettext("One or more devices are "
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
index d1e9ef76dc10..583c9646ad7c 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c
@@ -1323,36 +1323,43 @@ is_grouping(const char *type, int *mindev, int *maxdev)
* Extract the configuration parameters encoded in the dRAID type and
* use them to generate a dRAID configuration. The expected format is:
*
- * draid[<parity>][:<data><d|D>][:<children><c|C>][:<spares><s|S>]
+ * draid[<parity>][:<data>d][:<children>c][:<spares>s][:<width>w]
*
* The intent is to be able to generate a good configuration when no
* additional information is provided. The only mandatory component
* of the 'type' is the 'draid' prefix. If a value is not provided
* then reasonable defaults are used. The optional components may
- * appear in any order but the d/s/c suffix is required.
+ * appear in any order but the d/s/c/w suffix is required.
*
* Valid inputs:
* - data: number of data devices per group (1-255)
- * - parity: number of parity blocks per group (1-3)
- * - spares: number of distributed spare (0-100)
- * - children: total number of devices (1-255)
+ * - parity: number of parity devices per group (1-3)
+ * - children: total number of devices in slice (1-255)
+ * - width: total number of devices, multiple of children (1-255 for now)
+ * - spares: number of distributed spare devices (0-100)
*
* Examples:
* - zpool create tank draid <devices...>
* - zpool create tank draid2:8d:51c:2s <devices...>
+ * - zpool create tank draid2:8d:12c:96w:8s <devices...>
*/
static int
-draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
+draid_config_by_type(nvlist_t *nv, const char *type, uint64_t width,
+ int nfgroup, int nfdomain)
{
uint64_t nparity;
uint64_t nspares = 0;
uint64_t ndata = UINT64_MAX;
uint64_t ngroups = 1;
+ uint64_t children = 0;
long value;
if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) != 0)
return (EINVAL);
+ if (nfgroup && nfdomain) /* must be only one of two or none */
+ return (EINVAL);
+
nparity = (uint64_t)get_parity(type);
if (nparity == 0 || nparity > VDEV_DRAID_MAXPARITY) {
fprintf(stderr,
@@ -1376,24 +1383,35 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
return (EINVAL);
}
- /* Expected non-zero value with c/d/s suffix */
+ /* Expected non-zero value with c/d/s/w suffix */
value = strtol(p, &end, 10);
char suffix = tolower(*end);
if (errno != 0 ||
- (suffix != 'c' && suffix != 'd' && suffix != 's')) {
+ (suffix != 'c' && suffix != 'd' && suffix != 's' &&
+ suffix != 'w')) {
(void) fprintf(stderr, gettext("invalid dRAID "
- "syntax; expected [:<number><c|d|s>] not '%s'\n"),
- type);
+ "syntax; expected [:<number><c|d|s|w>], "
+ "not '%s'\n"), type);
return (EINVAL);
}
if (suffix == 'c') {
- if ((uint64_t)value != children) {
+ if ((uint64_t)value > width ||
+ width % (uint64_t)value != 0) {
fprintf(stderr,
- gettext("invalid number of dRAID children; "
+ gettext("invalid number of dRAID disks; "
+ "multiple of %llu required but %llu "
+ "provided\n"), (u_longlong_t)value,
+ (u_longlong_t)width);
+ return (EINVAL);
+ }
+ children = value;
+ } else if (suffix == 'w') {
+ if ((uint64_t)value != width) {
+ fprintf(stderr,
+ gettext("invalid number of dRAID disks; "
"%llu required but %llu provided\n"),
- (u_longlong_t)value,
- (u_longlong_t)children);
+ (u_longlong_t)value, (u_longlong_t)width);
return (EINVAL);
}
} else if (suffix == 'd') {
@@ -1405,29 +1423,60 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
}
}
+ if (!children && nfgroup)
+ children = width / nfgroup;
+ if (!children && nfdomain)
+ children = nfdomain;
+ if (!children)
+ children = width;
+
+ int fgrps = width / children;
+
+ if (fgrps == 1 && (nfgroup || nfdomain)) {
+ fprintf(stderr, gettext("failure domains are not set "
+ "in dRAID vdev descriptor\n"));
+ return (EINVAL);
+ }
+
+ if (fgrps > 1 && nfgroup && fgrps != nfgroup) {
+ fprintf(stderr, gettext("invalid number of failure groups "
+ "%d, must be %d\n"), nfgroup, fgrps);
+ return (EINVAL);
+ }
+
+ if (fgrps > 1 && nfdomain && nfdomain != children) {
+ fprintf(stderr, gettext("invalid number of failure domains "
+ "%d, must be %lu\n"), nfdomain, children);
+ return (EINVAL);
+ }
+
+ int nspare = nspares / fgrps;
+ if (nspares % fgrps)
+ nspare++;
+
/*
* When a specific number of data disks is not provided limit a
* redundancy group to 8 data disks. This value was selected to
* provide a reasonable tradeoff between capacity and performance.
*/
if (ndata == UINT64_MAX) {
- if (children > nspares + nparity) {
- ndata = MIN(children - nspares - nparity, 8);
+ if (children > (nspare + nparity)) {
+ ndata = MIN(children - nspare - nparity, 8);
} else {
- fprintf(stderr, gettext("request number of "
- "distributed spares %llu and parity level %llu\n"
+ fprintf(stderr, gettext("requested number of "
+ "distributed spares %llu and parity level %llu "
"leaves no disks available for data\n"),
- (u_longlong_t)nspares, (u_longlong_t)nparity);
+ (u_longlong_t)nspare, (u_longlong_t)nparity);
return (EINVAL);
}
}
/* Verify the maximum allowed group size is never exceeded. */
- if (ndata == 0 || (ndata + nparity > children - nspares)) {
+ if (ndata == 0 || (ndata + nparity > children - nspare)) {
fprintf(stderr, gettext("requested number of dRAID data "
- "disks per group %llu is too high,\nat most %llu disks "
+ "disks %llu per group is too high,\nat most %llu disks "
"are available for data\n"), (u_longlong_t)ndata,
- (u_longlong_t)(children - nspares - nparity));
+ (u_longlong_t)(children - nspare - nparity));
return (EINVAL);
}
@@ -1435,7 +1484,7 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
* Verify the requested number of spares can be satisfied.
* An arbitrary limit of 100 distributed spares is applied.
*/
- if (nspares > 100 || nspares > (children - (ndata + nparity))) {
+ if (nspare > 100 || nspare > (children - (ndata + nparity))) {
fprintf(stderr,
gettext("invalid number of dRAID spares %llu; additional "
"disks would be required\n"), (u_longlong_t)nspares);
@@ -1443,14 +1492,14 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
}
/* Verify the requested number children is sufficient. */
- if (children < (ndata + nparity + nspares)) {
+ if (children < (ndata + nparity + nspare)) {
fprintf(stderr, gettext("%llu disks were provided, but at "
"least %llu disks are required for this config\n"),
(u_longlong_t)children,
- (u_longlong_t)(ndata + nparity + nspares));
+ (u_longlong_t)(ndata + nparity + nspare));
}
- if (children > VDEV_DRAID_MAX_CHILDREN) {
+ if (width > VDEV_DRAID_MAX_CHILDREN) {
fprintf(stderr, gettext("%llu disks were provided, but "
"dRAID only supports up to %u disks"),
(u_longlong_t)children, VDEV_DRAID_MAX_CHILDREN);
@@ -1459,16 +1508,36 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
/*
* Calculate the minimum number of groups required to fill a slice.
* This is the LCM of the stripe width (ndata + nparity) and the
- * number of data drives (children - nspares).
+ * number of data drives (children - nspare).
+ *
+ * In case of failure domains, some failure groups may have less
+ * number of spares than others, so they will have different number
+ * of ngroups.
*/
- while (ngroups * (ndata + nparity) % (children - nspares) != 0)
+ uint64_t ndisks1 = children - (nspares / fgrps);
+ uint64_t ndisks2 = (nspares % fgrps) ? ndisks1 - 1 : ndisks1;
+ while (ngroups * (ndata + nparity) % ndisks2 != 0 ||
+ (ndisks1 != ndisks2 &&
+ ((ngroups + 1) * (ndata + nparity) % ndisks1) != 0))
+ ngroups++;
+
+ /* Keep bigger valude of ngroups for the next calculation. */
+ if (ndisks1 != ndisks2)
ngroups++;
+ /*
+ * Total ngroups in all failure groups. The failure groups with
+ * additional spare (nspares % fgrps) have one less ngroups.
+ */
+ ngroups = (ngroups - 1) * (nspares % fgrps) +
+ ngroups * (fgrps - (nspares % fgrps));
+
/* Store the basic dRAID configuration. */
fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, nparity);
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, ndata);
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
+ fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NCHILDREN, children);
return (0);
}
@@ -1606,10 +1675,41 @@ construct_spec(nvlist_t *props, int argc, char **argv)
nlogs++;
}
+ int nfdomain = 0, nfgroup = 0;
+ int fdndev = 0, fgndev = 0;
+ int fdndev_prev = 0, fgndev_prev = 0;
+
for (c = 1; c < argc; c++) {
if (is_grouping(argv[c], NULL, NULL) != NULL)
break;
+ if (strcmp(argv[c], "fgroup") == 0 ||
+ strcmp(argv[c], "failure_group") == 0) {
+ if (fgndev_prev &&
+ fgndev_prev != fgndev)
+ break;
+ fgndev_prev = fgndev;
+ fgndev = 0;
+ nfgroup++;
+ continue;
+ }
+
+ if (strcmp(argv[c], "fdomain") == 0 ||
+ strcmp(argv[c], "failure_domain") == 0) {
+ if (fdndev_prev &&
+ fdndev_prev != fdndev)
+ break;
+ fdndev_prev = fdndev;
+ fdndev = 0;
+ nfdomain++;
+ continue;
+ }
+
+ if (nfgroup)
+ fgndev++;
+ if (nfdomain)
+ fdndev++;
+
children++;
child = realloc(child,
children * sizeof (nvlist_t *));
@@ -1647,6 +1747,81 @@ construct_spec(nvlist_t *props, int argc, char **argv)
goto spec_out;
}
+ if ((nfdomain || nfgroup) &&
+ strcmp(type, VDEV_TYPE_DRAID) != 0) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s is not dRAID and cannot "
+ "have failure domains\n"), argv[0]);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ goto spec_out;
+ }
+
+ if (nfgroup && nfdomain) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s has mixed configuration "
+ "of %d failure groups and %d failure "
+ "domains, it must have either fgroups or "
+ "fdomains, not both\n"), argv[0],
+ nfgroup, nfdomain);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ goto spec_out;
+ }
+
+ if (nfgroup == 1 || nfdomain == 1) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s has only one failure %s "
+ "configured, it must be more than one\n"),
+ argv[0], nfgroup ? "group" : "domain");
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ goto spec_out;
+ }
+
+ if (fgndev_prev != fgndev) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s has different number of "
+ "devices in failure group %d than in "
+ "previous group: %d != %d\n"), argv[0],
+ nfgroup, fgndev, fgndev_prev);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ goto spec_out;
+ }
+
+ if (fdndev_prev != fdndev) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s has different number of "
+ "devices in failure domain %d than in "
+ "previous domain: %d != %d\n"), argv[0],
+ nfdomain, fdndev, fdndev_prev);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ goto spec_out;
+ }
+
+ if (nfdomain) {
+ /* Put children in the right order */
+ nvlist_t **ch = NULL;
+ ch = realloc(ch,
+ children * sizeof (nvlist_t *));
+ if (ch == NULL)
+ zpool_no_memory();
+ int dlen = children / nfdomain;
+ int i = 0;
+ for (int g = 0; g < dlen; g++)
+ for (int d = 0; d < nfdomain; d++)
+ ch[i++] = child[g + (d * dlen)];
+ free(child);
+ child = ch;
+ }
+
argc -= c;
argv += c;
@@ -1692,7 +1867,8 @@ construct_spec(nvlist_t *props, int argc, char **argv)
}
if (strcmp(type, VDEV_TYPE_DRAID) == 0) {
if (draid_config_by_type(nv,
- fulltype, children) != 0) {
+ fulltype, children, nfgroup,
+ nfdomain) != 0) {
for (c = 0; c < children; c++)
nvlist_free(child[c]);
free(child);
diff --git a/sys/contrib/openzfs/cmd/ztest.c b/sys/contrib/openzfs/cmd/ztest.c
index bab7e32db414..ece9acbaa459 100644
--- a/sys/contrib/openzfs/cmd/ztest.c
+++ b/sys/contrib/openzfs/cmd/ztest.c
@@ -3038,7 +3038,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
VERIFY3U(ENOENT, ==,
- spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
+ spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL, NULL));
fnvlist_free(nvroot);
/*
@@ -3046,7 +3046,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1);
VERIFY3U(ENOENT, ==,
- spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
+ spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL, NULL));
fnvlist_free(nvroot);
/*
@@ -3056,7 +3056,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
(void) pthread_rwlock_rdlock(&ztest_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
VERIFY3U(EEXIST, ==,
- spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
+ spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL, NULL));
fnvlist_free(nvroot);
/*
@@ -3208,7 +3208,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
props = fnvlist_alloc();
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
- VERIFY0(spa_create(name, nvroot, props, NULL, NULL));
+ VERIFY0(spa_create(name, nvroot, props, NULL, NULL, NULL));
fnvlist_free(nvroot);
fnvlist_free(props);
@@ -8686,7 +8686,8 @@ ztest_init(ztest_shared_t *zs)
free(buf);
}
- VERIFY0(spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
+ VERIFY0(spa_create(ztest_opts.zo_pool, nvroot, props,
+ NULL, NULL, NULL));
fnvlist_free(nvroot);
fnvlist_free(props);
diff --git a/sys/contrib/openzfs/config/deb.am b/sys/contrib/openzfs/config/deb.am
index 1c04296613ca..fbecc475130e 100644
--- a/sys/contrib/openzfs/config/deb.am
+++ b/sys/contrib/openzfs/config/deb.am
@@ -93,17 +93,17 @@ debian:
cp -r contrib/debian debian; chmod +x debian/rules;
native-deb-utils: native-deb-local debian
- while [ -f debian/deb-build.lock ]; do sleep 1; done; \
- echo "native-deb-utils" > debian/deb-build.lock; \
- cp contrib/debian/control debian/control; \
- $(DPKGBUILD) -b -rfakeroot -us -uc; \
- $(RM) -f debian/deb-build.lock
+ while [ -f debian/deb-build.lock ]; do sleep 1; done && \
+ echo "native-deb-utils" > debian/deb-build.lock && \
+ trap '$(RM) -f debian/deb-build.lock' EXIT && \
+ cp contrib/debian/control debian/control && \
+ $(DPKGBUILD) -b -rfakeroot -us -uc
native-deb-kmod: native-deb-local debian
- while [ -f debian/deb-build.lock ]; do sleep 1; done; \
- echo "native-deb-kmod" > debian/deb-build.lock; \
- sh scripts/make_gitrev.sh; \
- fakeroot debian/rules override_dh_binary-modules; \
- $(RM) -f debian/deb-build.lock
+ while [ -f debian/deb-build.lock ]; do sleep 1; done && \
+ echo "native-deb-kmod" > debian/deb-build.lock && \
+ trap '$(RM) -f debian/deb-build.lock' EXIT && \
+ sh scripts/make_gitrev.sh && \
+ fakeroot debian/rules override_dh_binary-modules
native-deb: native-deb-utils native-deb-kmod
diff --git a/sys/contrib/openzfs/config/kernel-copy-from-user-inatomic.m4 b/sys/contrib/openzfs/config/kernel-copy-from-user-inatomic.m4
deleted file mode 100644
index ed8ab95a30c3..000000000000
--- a/sys/contrib/openzfs/config/kernel-copy-from-user-inatomic.m4
+++ /dev/null
@@ -1,30 +0,0 @@
-dnl # SPDX-License-Identifier: CDDL-1.0
-dnl #
-dnl # On certain architectures `__copy_from_user_inatomic`
-dnl # is a GPL exported variable and cannot be used by OpenZFS.
-dnl #
-
-dnl #
-dnl # Checking if `__copy_from_user_inatomic` is available.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC], [
- ZFS_LINUX_TEST_SRC([__copy_from_user_inatomic], [
- #include <linux/uaccess.h>
- ], [
- int result __attribute__ ((unused)) = __copy_from_user_inatomic(NULL, NULL, 0);
- ], [], [ZFS_META_LICENSE])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC], [
- AC_MSG_CHECKING([whether __copy_from_user_inatomic is available])
- ZFS_LINUX_TEST_RESULT([__copy_from_user_inatomic_license], [
- AC_MSG_RESULT(yes)
- ], [
- AC_MSG_RESULT(no)
- AC_MSG_ERROR([
- *** The `__copy_from_user_inatomic()` Linux kernel function is
- *** incompatible with the CDDL license and will prevent the module
- *** linking stage from succeeding. OpenZFS cannot be compiled.
- ])
- ])
-])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index 3389c5359834..e1dba34e5dbe 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -120,7 +120,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_ADD_DISK
ZFS_AC_KERNEL_SRC_KTHREAD
ZFS_AC_KERNEL_SRC_ZERO_PAGE
- ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
ZFS_AC_KERNEL_SRC_IDMAP_MNT_API
ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS
ZFS_AC_KERNEL_SRC_IATTR_VFSID
@@ -245,7 +244,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_ADD_DISK
ZFS_AC_KERNEL_KTHREAD
ZFS_AC_KERNEL_ZERO_PAGE
- ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
ZFS_AC_KERNEL_IDMAP_MNT_API
ZFS_AC_KERNEL_IDMAP_NO_USERNS
ZFS_AC_KERNEL_IATTR_VFSID
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
index 5e7cdb98571c..fa203185316f 100644
--- a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.install
@@ -42,6 +42,7 @@ usr/bin/dbufstat usr/sbin
usr/bin/zilstat
usr/share/zfs/compatibility.d/
usr/share/bash-completion/completions
+usr/share/man/man1/dbufstat.1
usr/share/man/man1/zarcsummary.1
usr/share/man/man1/zarcstat.1
usr/share/man/man1/zhack.1
diff --git a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
index 95c9a6738289..4d52502bd217 100644
--- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
+++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
@@ -105,6 +105,10 @@ zfs_errno = enum_with_offset(1024, [
'ZFS_ERR_RESUME_EXISTS',
'ZFS_ERR_CRYPTO_NOTSUP',
'ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS',
+ 'ZFS_ERR_ASHIFT_MISMATCH',
+ 'ZFS_ERR_STREAM_LARGE_MICROZAP',
+ 'ZFS_ERR_TOO_MANY_SITOUTS',
+ 'ZFS_ERR_NO_USER_NS_SUPPORT',
],
{}
)
diff --git a/sys/contrib/openzfs/contrib/pyzfs/setup.py.in b/sys/contrib/openzfs/contrib/pyzfs/setup.py.in
index 43a1accfaf02..55640ec0f0d4 100644
--- a/sys/contrib/openzfs/contrib/pyzfs/setup.py.in
+++ b/sys/contrib/openzfs/contrib/pyzfs/setup.py.in
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
#
# Copyright 2015 ClusterHQ
#
@@ -25,11 +26,10 @@ setup(
author="ClusterHQ",
author_email="support@clusterhq.com",
url="http://pyzfs.readthedocs.org",
- license="Apache License, Version 2.0",
+ license="Apache-2.0",
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
- "License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h
index a9a31c90ae74..e936f4280ff3 100644
--- a/sys/contrib/openzfs/include/libzfs.h
+++ b/sys/contrib/openzfs/include/libzfs.h
@@ -160,6 +160,7 @@ typedef enum zfs_error {
EZFS_SHAREFAILED, /* filesystem share failed */
EZFS_RAIDZ_EXPAND_IN_PROGRESS, /* a raidz is currently expanding */
EZFS_ASHIFT_MISMATCH, /* can't add vdevs with different ashifts */
+ EZFS_NO_USER_NS_SUPPORT, /* kernel built without CONFIG_USER_NS */
EZFS_UNKNOWN
} zfs_error_t;
@@ -443,6 +444,7 @@ typedef enum {
* checksum errors) has been lost.
*/
ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */
+ ZPOOL_STATUS_FAULTED_FDOM_R, /* faulted fdomain with replicas */
ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */
/*
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
index cfe63946706b..12c80b39dfac 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/zone.h
@@ -31,6 +31,7 @@
#define _OPENSOLARIS_SYS_ZONE_H_
#include <sys/jail.h>
+#include <sys/errno.h>
/*
* Macros to help with zone visibility restrictions.
@@ -65,4 +66,76 @@ extern int zone_dataset_visible(const char *, int *);
*/
extern uint32_t zone_get_hostid(void *);
+/*
+ * Operations that can be authorized via zoned_uid delegation.
+ * Shared with Linux; on FreeBSD these are defined but the check
+ * always returns NOT_APPLICABLE (no user namespace support).
+ */
+typedef enum zone_uid_op {
+ ZONE_OP_CREATE,
+ ZONE_OP_SNAPSHOT,
+ ZONE_OP_CLONE,
+ ZONE_OP_DESTROY,
+ ZONE_OP_RENAME,
+ ZONE_OP_SETPROP
+} zone_uid_op_t;
+
+typedef enum zone_admin_result {
+ ZONE_ADMIN_NOT_APPLICABLE,
+ ZONE_ADMIN_ALLOWED,
+ ZONE_ADMIN_DENIED
+} zone_admin_result_t;
+
+/*
+ * FreeBSD stub: zoned_uid delegation is not applicable (no user namespaces).
+ * Always returns NOT_APPLICABLE so callers fall through to existing
+ * jail-based permission checks.
+ */
+static inline zone_admin_result_t
+zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
+ const char *aux_dataset)
+{
+ (void) dataset, (void) op, (void) aux_dataset;
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+}
+
+/*
+ * Callback type for looking up zoned_uid property.
+ */
+typedef uid_t (*zone_get_zoned_uid_fn_t)(const char *dataset,
+ char *root_out, size_t root_size);
+
+/*
+ * FreeBSD stubs: zoned_uid attach/detach require user namespaces
+ * which FreeBSD does not have. Return ENXIO (consistent with the
+ * Linux fallback when CONFIG_USER_NS is not defined).
+ */
+static inline int
+zone_dataset_attach_uid(struct ucred *cred, const char *dataset, uid_t uid)
+{
+ (void) cred, (void) dataset, (void) uid;
+ return (ENXIO);
+}
+
+static inline int
+zone_dataset_detach_uid(struct ucred *cred, const char *dataset, uid_t uid)
+{
+ (void) cred, (void) dataset, (void) uid;
+ return (ENXIO);
+}
+
+/*
+ * FreeBSD stubs: no-op since zoned_uid delegation requires user namespaces.
+ */
+static inline void
+zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
+{
+ (void) fn;
+}
+
+static inline void
+zone_unregister_zoned_uid_callback(void)
+{
+}
+
#endif /* !_OPENSOLARIS_SYS_ZONE_H_ */
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index 3ed311d49cc6..62e78d82bcb9 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -71,6 +71,7 @@ struct zfsvfs {
boolean_t z_utf8; /* utf8-only */
int z_norm; /* normalization flags */
boolean_t z_atime; /* enable atimes mount option */
+ boolean_t z_relatime; /* enable relatime mount option */
boolean_t z_unmounted; /* unmounted */
zfs_teardown_lock_t z_teardown_lock;
zfs_teardown_inactive_lock_t z_teardown_inactive_lock;
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
index 15e3affba0e8..9f8667536e30 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h
@@ -174,11 +174,13 @@ zfs_exit(zfsvfs_t *zfsvfs, const char *tag)
(tp)->tv_nsec = (long)(stmp)[1]; \
}
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
- if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
+ if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY) && \
+ (!(zfsvfs)->z_relatime || zfs_relatime_need_update(zp))) \
zfs_tstamp_update_setup_ext(zp, ACCESSED, NULL, NULL, B_FALSE);
extern void zfs_tstamp_update_setup_ext(struct znode *,
uint_t, uint64_t [2], uint64_t [2], boolean_t have_tx);
+extern boolean_t zfs_relatime_need_update(const struct znode *);
extern void zfs_znode_free(struct znode *);
extern zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE];
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h b/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
index 4e75202fbdde..2933c5a5c635 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
@@ -42,10 +42,68 @@ extern int zone_dataset_attach(cred_t *, const char *, int);
extern int zone_dataset_detach(cred_t *, const char *, int);
/*
+ * Attach the given dataset to all user namespaces owned by the given UID.
+ */
+extern int zone_dataset_attach_uid(cred_t *, const char *, uid_t);
+
+/*
+ * Detach the given dataset from UID-based zoning.
+ */
+extern int zone_dataset_detach_uid(cred_t *, const char *, uid_t);
+
+/*
* Returns true if the named pool/dataset is visible in the current zone.
*/
extern int zone_dataset_visible(const char *dataset, int *write);
+/*
+ * Operations that can be authorized via zoned_uid delegation.
+ * Used by zone_dataset_admin_check() to apply operation-specific constraints.
+ */
+typedef enum zone_uid_op {
+ ZONE_OP_CREATE, /* Create child dataset */
+ ZONE_OP_SNAPSHOT, /* Create snapshot */
+ ZONE_OP_CLONE, /* Clone from snapshot */
+ ZONE_OP_DESTROY, /* Destroy dataset/snapshot */
+ ZONE_OP_RENAME, /* Rename (both src and dst checked) */
+ ZONE_OP_SETPROP /* Set properties */
+} zone_uid_op_t;
+
+/*
+ * Result of admin authorization check for zoned_uid delegation.
+ */
+typedef enum zone_admin_result {
+ ZONE_ADMIN_NOT_APPLICABLE, /* In global zone, use normal checks */
+ ZONE_ADMIN_ALLOWED, /* Authorized via zoned_uid */
+ ZONE_ADMIN_DENIED /* In user ns but not authorized */
+} zone_admin_result_t;
+
+/*
+ * Check if a dataset operation is authorized via zoned_uid delegation.
+ * For ZONE_OP_RENAME and ZONE_OP_CLONE, aux_dataset provides the
+ * second dataset (destination for rename, origin for clone).
+ * Returns ZONE_ADMIN_ALLOWED if authorized, ZONE_ADMIN_DENIED if in a
+ * user namespace but not authorized, or ZONE_ADMIN_NOT_APPLICABLE if
+ * in the global zone (caller should use normal permission checks).
+ */
+extern zone_admin_result_t zone_dataset_admin_check(const char *dataset,
+ zone_uid_op_t op, const char *aux_dataset);
+
+/*
+ * Callback type for looking up zoned_uid property.
+ * Returns the zoned_uid value if found, 0 if not set or on error.
+ * If root_out is non-NULL, copies the delegation root dataset name.
+ */
+typedef uid_t (*zone_get_zoned_uid_fn_t)(const char *dataset,
+ char *root_out, size_t root_size);
+
+/*
+ * Register/unregister the zoned_uid property lookup callback.
+ * Called by ZFS module during init/fini.
+ */
+extern void zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn);
+extern void zone_unregister_zoned_uid_callback(void);
+
int spl_zone_init(void);
void spl_zone_fini(void);
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
index ab46d5f8ca08..123ea71b4de9 100644
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2026, TrueNAS.
*/
#ifndef _SYS_FS_ZFS_VFSOPS_H
@@ -73,11 +74,6 @@ typedef struct vfs {
kmutex_t vfs_mntpt_lock;
} vfs_t;
-typedef struct zfs_mnt {
- const char *mnt_osname; /* Objset name */
- char *mnt_data; /* Raw mount options */
-} zfs_mnt_t;
-
struct zfsvfs {
vfs_t *z_vfs; /* generic fs struct */
struct super_block *z_sb; /* generic super_block */
@@ -245,11 +241,15 @@ extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
extern void zfsvfs_free(zfsvfs_t *zfsvfs);
extern int zfs_check_global_label(const char *dsname, const char *hexsl);
+extern vfs_t *zfsvfs_vfs_alloc(void);
+extern void zfsvfs_vfs_free(vfs_t *vfsp);
+
extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
-extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent);
+extern int zfs_domount(struct super_block *sb, const char *osname,
+ vfs_t *mntopts, int silent);
extern void zfs_preumount(struct super_block *sb);
extern int zfs_umount(struct super_block *sb);
-extern int zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm);
+extern int zfs_remount(struct super_block *sb, vfs_t *mntopts, int flags);
extern int zfs_statvfs(struct inode *ip, struct kstatfs *statp);
extern int zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp);
extern int zfs_prune(struct super_block *sb, unsigned long nr_to_scan,
diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h
index baf3b1508335..0b379ee79caa 100644
--- a/sys/contrib/openzfs/include/sys/dbuf.h
+++ b/sys/contrib/openzfs/include/sys/dbuf.h
@@ -411,6 +411,8 @@ void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type,
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
struct dmu_tx *);
+void dbuf_evict_range(struct dnode *dn, uint64_t start_blkid,
+ uint64_t end_blkid);
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
diff --git a/sys/contrib/openzfs/include/sys/dmu.h b/sys/contrib/openzfs/include/sys/dmu.h
index bb623e404955..7db5a04c8209 100644
--- a/sys/contrib/openzfs/include/sys/dmu.h
+++ b/sys/contrib/openzfs/include/sys/dmu.h
@@ -963,6 +963,12 @@ void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);
int dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size);
+void dmu_prefetch_stream(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t len, boolean_t start_now);
+void dmu_prefetch_stream_by_dnode(dnode_t *dn, uint64_t offset,
+ uint64_t len, boolean_t start_now);
+void dmu_evict_range(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t len);
typedef struct dmu_object_info {
/* All sizes are in bytes unless otherwise indicated. */
diff --git a/sys/contrib/openzfs/include/sys/dmu_zfetch.h b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
index a5ddd28026ce..697f4d1f1933 100644
--- a/sys/contrib/openzfs/include/sys/dmu_zfetch.h
+++ b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
@@ -79,6 +79,7 @@ void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
+boolean_t dmu_zfetch_prime(zfetch_t *, uint64_t, uint64_t);
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t,
diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h
index 8bd1db5b7165..b9ded73efe33 100644
--- a/sys/contrib/openzfs/include/sys/dnode.h
+++ b/sys/contrib/openzfs/include/sys/dnode.h
@@ -652,6 +652,19 @@ extern dnode_sums_t dnode_sums;
#endif
+/*
+ * Assert that we are not modifying the range tree for the syncing TXG from
+ * a non-syncing thread. We verify that either the transaction group is
+ * strictly newer than the one currently syncing (meaning it's being modified
+ * in open context), OR the current thread is the sync thread itself. If this
+ * triggers, it indicates a race where dn_free_ranges is being modified while
+ * dnode_sync() may be iterating over it.
+ */
+#define FREE_RANGE_VERIFY(tx, dn) \
+ ASSERT((tx)->tx_txg > spa_syncing_txg((dn)->dn_objset->os_spa) || \
+ dmu_objset_pool((dn)->dn_objset)->dp_tx.tx_sync_thread == \
+ curthread)
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/contrib/openzfs/include/sys/dsl_crypt.h b/sys/contrib/openzfs/include/sys/dsl_crypt.h
index 1a088b8f3d3d..14413072a592 100644
--- a/sys/contrib/openzfs/include/sys/dsl_crypt.h
+++ b/sys/contrib/openzfs/include/sys/dsl_crypt.h
@@ -200,7 +200,8 @@ void dsl_crypto_recv_raw_key_sync(struct dsl_dataset *ds,
int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj,
dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key);
-int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
+int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp,
+ nvlist_t *userprops);
int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index ba79a674e73f..fcef32ecf9f7 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -204,6 +204,7 @@ typedef enum {
ZFS_PROP_DEFAULTGROUPOBJQUOTA,
ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
ZFS_PROP_SNAPSHOTS_CHANGED_NSECS,
+ ZFS_PROP_ZONED_UID,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -473,6 +474,8 @@ typedef enum {
VDEV_PROP_AUTOSIT,
VDEV_PROP_SLOW_IO_EVENTS,
VDEV_PROP_SCHEDULER,
+ VDEV_PROP_FDOMAIN,
+ VDEV_PROP_FGROUP,
VDEV_NUM_PROPS
} vdev_prop_t;
@@ -951,6 +954,9 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
+#define ZPOOL_CONFIG_CREATE_INFO "create_info" /* not stored on disk */
+#define ZPOOL_CREATE_INFO_VDEV "create_err_vdev"
+#define ZPOOL_CREATE_INFO_POOL "create_err_pool"
#define ZPOOL_CONFIG_REWIND_INFO "rewind_info" /* not stored on disk */
#define ZPOOL_CONFIG_UNSUP_FEAT "unsup_feat" /* not stored on disk */
#define ZPOOL_CONFIG_ENABLED_FEAT "enabled_feat" /* not stored on disk */
@@ -1007,6 +1013,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_DRAID_NDATA "draid_ndata"
#define ZPOOL_CONFIG_DRAID_NSPARES "draid_nspares"
#define ZPOOL_CONFIG_DRAID_NGROUPS "draid_ngroups"
+#define ZPOOL_CONFIG_DRAID_NCHILDREN "draid_nchildren"
#define VDEV_TYPE_ROOT "root"
#define VDEV_TYPE_MIRROR "mirror"
@@ -1782,6 +1789,7 @@ typedef enum {
ZFS_ERR_ASHIFT_MISMATCH,
ZFS_ERR_STREAM_LARGE_MICROZAP,
ZFS_ERR_TOO_MANY_SITOUTS,
+ ZFS_ERR_NO_USER_NS_SUPPORT,
} zfs_errno_t;
/*
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
index 0de8a1867a12..3191ccc29f19 100644
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -777,7 +777,7 @@ extern int spa_open_rewind(const char *pool, spa_t **, const void *tag,
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
- nvlist_t *zplprops, struct dsl_crypto_params *dcp);
+ nvlist_t *zplprops, struct dsl_crypto_params *dcp, nvlist_t **errinfo);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h
index 62cf196eeaa4..090b787c0fee 100644
--- a/sys/contrib/openzfs/include/sys/spa_impl.h
+++ b/sys/contrib/openzfs/include/sys/spa_impl.h
@@ -229,6 +229,7 @@ struct spa {
nvlist_t *spa_config_syncing; /* currently syncing config */
nvlist_t *spa_config_splitting; /* config for splitting */
nvlist_t *spa_load_info; /* info and errors from load */
+ nvlist_t *spa_create_info; /* info from create */
uint64_t spa_config_txg; /* txg of last config change */
uint32_t spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
diff --git a/sys/contrib/openzfs/include/sys/vdev_draid.h b/sys/contrib/openzfs/include/sys/vdev_draid.h
index e923092a39ad..e51a1a59f00b 100644
--- a/sys/contrib/openzfs/include/sys/vdev_draid.h
+++ b/sys/contrib/openzfs/include/sys/vdev_draid.h
@@ -68,9 +68,10 @@ typedef struct vdev_draid_config {
*/
uint64_t vdc_ndata; /* # of data devices in group */
uint64_t vdc_nparity; /* # of parity devices in group */
- uint64_t vdc_nspares; /* # of distributed spares */
+ uint64_t vdc_nspares; /* # of distributed spares in slice */
uint64_t vdc_children; /* # of children */
uint64_t vdc_ngroups; /* # groups per slice */
+ uint64_t vdc_width; /* # multiple of children */
/*
* Immutable derived constants.
@@ -103,7 +104,9 @@ extern nvlist_t *vdev_draid_read_config_spare(vdev_t *);
/* Functions for dRAID distributed spares. */
extern vdev_t *vdev_draid_spare_get_child(vdev_t *, uint64_t);
extern vdev_t *vdev_draid_spare_get_parent(vdev_t *);
-extern int vdev_draid_spare_create(nvlist_t *, vdev_t *, uint64_t *, uint64_t);
+extern int vdev_draid_spare_create(nvlist_t *, vdev_t *, uint64_t *, uint64_t *,
+ uint64_t);
+extern boolean_t vdev_draid_fail_domain_allowed(vdev_t *);
#ifdef __cplusplus
}
diff --git a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
index 55885dbd706f..d5fb694eda8d 100644
--- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
@@ -119,6 +119,7 @@ typedef struct raidz_col {
uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */
uint8_t rc_force_repair:1; /* Write good data to this column */
uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */
+ uint8_t rc_tgt_is_dspare:1; /* The target is draid spare vdev */
uint8_t rc_latency_outlier:1; /* Latency outlier for this device */
int rc_shadow_devidx; /* for double write during expansion */
int rc_shadow_error; /* for double write during expansion */
diff --git a/sys/contrib/openzfs/include/zfeature_common.h b/sys/contrib/openzfs/include/zfeature_common.h
index 56382ca85b55..64606de226b0 100644
--- a/sys/contrib/openzfs/include/zfeature_common.h
+++ b/sys/contrib/openzfs/include/zfeature_common.h
@@ -90,6 +90,7 @@ typedef enum spa_feature {
SPA_FEATURE_DYNAMIC_GANG_HEADER,
SPA_FEATURE_BLOCK_CLONING_ENDIAN,
SPA_FEATURE_PHYSICAL_REWRITE,
+ SPA_FEATURE_DRAID_FAIL_DOMAINS,
SPA_FEATURES
} spa_feature_t;
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index bed2c7979a1b..6349fca09bcb 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -690,7 +690,7 @@
<elf-symbol name='fletcher_4_superscalar4_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
- <elf-symbol name='spa_feature_table' size='2632' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='spa_feature_table' size='2688' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='544' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -2292,7 +2292,8 @@
<enumerator name='ZFS_PROP_DEFAULTGROUPOBJQUOTA' value='104'/>
<enumerator name='ZFS_PROP_DEFAULTPROJECTOBJQUOTA' value='105'/>
<enumerator name='ZFS_PROP_SNAPSHOTS_CHANGED_NSECS' value='106'/>
- <enumerator name='ZFS_NUM_PROPS' value='107'/>
+ <enumerator name='ZFS_PROP_ZONED_UID' value='107'/>
+ <enumerator name='ZFS_NUM_PROPS' value='108'/>
</enum-decl>
<typedef-decl name='zfs_prop_t' type-id='4b000d60' id='58603c44'/>
<enum-decl name='zprop_source_t' naming-typedef-id='a2256d42' id='5903f80e'>
@@ -6202,18 +6203,19 @@
<enumerator name='ZPOOL_STATUS_UNSUP_FEAT_READ' value='18'/>
<enumerator name='ZPOOL_STATUS_UNSUP_FEAT_WRITE' value='19'/>
<enumerator name='ZPOOL_STATUS_FAULTED_DEV_R' value='20'/>
- <enumerator name='ZPOOL_STATUS_FAULTED_DEV_NR' value='21'/>
- <enumerator name='ZPOOL_STATUS_VERSION_OLDER' value='22'/>
- <enumerator name='ZPOOL_STATUS_FEAT_DISABLED' value='23'/>
- <enumerator name='ZPOOL_STATUS_RESILVERING' value='24'/>
- <enumerator name='ZPOOL_STATUS_OFFLINE_DEV' value='25'/>
- <enumerator name='ZPOOL_STATUS_REMOVED_DEV' value='26'/>
- <enumerator name='ZPOOL_STATUS_REBUILDING' value='27'/>
- <enumerator name='ZPOOL_STATUS_REBUILD_SCRUB' value='28'/>
- <enumerator name='ZPOOL_STATUS_NON_NATIVE_ASHIFT' value='29'/>
- <enumerator name='ZPOOL_STATUS_COMPATIBILITY_ERR' value='30'/>
- <enumerator name='ZPOOL_STATUS_INCOMPATIBLE_FEAT' value='31'/>
- <enumerator name='ZPOOL_STATUS_OK' value='32'/>
+ <enumerator name='ZPOOL_STATUS_FAULTED_FDOM_R' value='21'/>
+ <enumerator name='ZPOOL_STATUS_FAULTED_DEV_NR' value='22'/>
+ <enumerator name='ZPOOL_STATUS_VERSION_OLDER' value='23'/>
+ <enumerator name='ZPOOL_STATUS_FEAT_DISABLED' value='24'/>
+ <enumerator name='ZPOOL_STATUS_RESILVERING' value='25'/>
+ <enumerator name='ZPOOL_STATUS_OFFLINE_DEV' value='26'/>
+ <enumerator name='ZPOOL_STATUS_REMOVED_DEV' value='27'/>
+ <enumerator name='ZPOOL_STATUS_REBUILDING' value='28'/>
+ <enumerator name='ZPOOL_STATUS_REBUILD_SCRUB' value='29'/>
+ <enumerator name='ZPOOL_STATUS_NON_NATIVE_ASHIFT' value='30'/>
+ <enumerator name='ZPOOL_STATUS_COMPATIBILITY_ERR' value='31'/>
+ <enumerator name='ZPOOL_STATUS_INCOMPATIBLE_FEAT' value='32'/>
+ <enumerator name='ZPOOL_STATUS_OK' value='33'/>
</enum-decl>
<typedef-decl name='zpool_status_t' type-id='5e770b40' id='d3dd6294'/>
<enum-decl name='zpool_compat_status_t' naming-typedef-id='901b78d1' id='20676925'>
@@ -6285,7 +6287,9 @@
<enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='54'/>
<enumerator name='VDEV_PROP_SCHEDULER' value='55'/>
- <enumerator name='VDEV_NUM_PROPS' value='56'/>
+ <enumerator name='VDEV_PROP_FDOMAIN' value='56'/>
+ <enumerator name='VDEV_PROP_FGROUP' value='57'/>
+ <enumerator name='VDEV_NUM_PROPS' value='58'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
@@ -6569,7 +6573,8 @@
<enumerator name='SPA_FEATURE_DYNAMIC_GANG_HEADER' value='44'/>
<enumerator name='SPA_FEATURE_BLOCK_CLONING_ENDIAN' value='45'/>
<enumerator name='SPA_FEATURE_PHYSICAL_REWRITE' value='46'/>
- <enumerator name='SPA_FEATURES' value='47'/>
+ <enumerator name='SPA_FEATURE_DRAID_FAIL_DOMAINS' value='47'/>
+ <enumerator name='SPA_FEATURES' value='48'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
@@ -9948,8 +9953,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
- <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='21056' id='fd43354e'>
- <subrange length='47' type-id='7359adad' id='8f8900fe'/>
+ <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='21504' id='bd288d11'>
+ <subrange length='48' type-id='7359adad' id='8f6d2a81'/>
</array-type-def>
<enum-decl name='zfeature_flags' id='6db816a4'>
<underlying-type type-id='9cac1fee'/>
@@ -10010,7 +10015,7 @@
<pointer-type-def type-id='c5c76c9c' size-in-bits='64' id='b7f9d8e6'/>
<qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/>
<pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/>
- <var-decl name='spa_feature_table' type-id='fd43354e' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
+ <var-decl name='spa_feature_table' type-id='bd288d11' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
<var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
<function-decl name='tsearch' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='eaa32e2f'/>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
index f461ad41405b..b302718edfa2 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
@@ -17,6 +17,7 @@
/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
* Copyright 2020 Joyent, Inc.
+ * Copyright 2026 Oxide Computer Company
*/
#include <sys/fs/zfs.h>
@@ -1536,34 +1537,51 @@ error:
static int
zfs_crypto_verify_rewrap_nvlist(zfs_handle_t *zhp, nvlist_t *props,
- nvlist_t **props_out, char *errbuf)
+ boolean_t inheritkey, nvlist_t **props_out, char *errbuf)
{
int ret;
nvpair_t *elem = NULL;
- zfs_prop_t prop;
nvlist_t *new_props = NULL;
- new_props = fnvlist_alloc();
-
/*
* loop through all provided properties, we should only have
- * keyformat, keylocation and pbkdf2iters. The actual validation of
- * values is done by zfs_valid_proplist().
+ * keyformat, keylocation and pbkdf2iters, and user properties.
+ * The actual validation of values is done by zfs_valid_proplist().
*/
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
const char *propname = nvpair_name(elem);
- prop = zfs_name_to_prop(propname);
- switch (prop) {
+ switch (zfs_name_to_prop(propname)) {
case ZFS_PROP_PBKDF2_ITERS:
case ZFS_PROP_KEYFORMAT:
case ZFS_PROP_KEYLOCATION:
+ if (inheritkey) {
+ ret = EINVAL;
+ zfs_error_aux(zhp->zfs_hdl,
+ dgettext(TEXT_DOMAIN,
+ "Only user properties may be set with "
+ "'zfs change-key -i'"));
+ goto error;
+ }
break;
+ case ZPROP_INVAL:
+ if (zfs_prop_user(propname))
+ break;
+ zfs_fallthrough;
default:
ret = EINVAL;
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "Only keyformat, keylocation and pbkdf2iters may "
- "be set with this command."));
+ if (inheritkey) {
+ zfs_error_aux(zhp->zfs_hdl,
+ dgettext(TEXT_DOMAIN,
+ "Only user properties may be set with "
+ "'zfs change-key -i'"));
+ } else {
+ zfs_error_aux(zhp->zfs_hdl,
+ dgettext(TEXT_DOMAIN,
+ "Only keyformat, keylocation, pbkdf2iters, "
+ "and user properties may be set with this "
+ "command."));
+ }
goto error;
}
}
@@ -1642,17 +1660,17 @@ zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey)
goto error;
}
+ /* validate the provided properties */
+ ret = zfs_crypto_verify_rewrap_nvlist(zhp, raw_props, inheritkey,
+ &props, errbuf);
+ if (ret != 0)
+ goto error;
+
/*
* If the user wants to use the inheritkey variant of this function
* we don't need to collect any crypto arguments.
*/
if (!inheritkey) {
- /* validate the provided properties */
- ret = zfs_crypto_verify_rewrap_nvlist(zhp, raw_props, &props,
- errbuf);
- if (ret != 0)
- goto error;
-
/*
* Load keyformat and keylocation from the nvlist. Fetch from
* the dataset properties if not specified.
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
index bf276a3aa916..e5a7ca9ba3f5 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
@@ -3347,9 +3347,13 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
/* we are in a non-global zone, but parent is in the global zone */
if (getzoneid() != GLOBAL_ZONEID && !is_zoned) {
- (void) zfs_standard_error(hdl, EPERM, errbuf);
- zfs_close(zhp);
- return (-1);
+ uint64_t zoned_uid = zfs_prop_get_int(zhp, ZFS_PROP_ZONED_UID);
+ if (zoned_uid == 0) {
+ (void) zfs_standard_error(hdl, EPERM, errbuf);
+ zfs_close(zhp);
+ return (-1);
+ }
+ /* zoned_uid set - let kernel decide */
}
/* make sure parent is a filesystem */
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index 49d5c3d5910a..7cd6a768084b 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -1510,11 +1510,17 @@ zpool_has_draid_vdev(nvlist_t *nvroot)
*/
static char *
zpool_draid_name(char *name, int len, uint64_t data, uint64_t parity,
- uint64_t spares, uint64_t children)
+ uint64_t spares, uint64_t children, uint64_t width)
{
- snprintf(name, len, "%s%llu:%llud:%lluc:%llus",
- VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data,
- (u_longlong_t)children, (u_longlong_t)spares);
+ if (children < width)
+ snprintf(name, len, "%s%llu:%llud:%lluc:%lluw:%llus",
+ VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data,
+ (u_longlong_t)children, (u_longlong_t)width,
+ (u_longlong_t)spares);
+ else
+ snprintf(name, len, "%s%llu:%llud:%lluc:%llus",
+ VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data,
+ (u_longlong_t)children, (u_longlong_t)spares);
return (name);
}
@@ -1536,6 +1542,50 @@ zpool_is_draid_spare(const char *name)
return (B_FALSE);
}
+
+/*
+ * Extract device-specific error information from a failed pool creation.
+ * If the kernel returned ZPOOL_CONFIG_CREATE_INFO in the ioctl output,
+ * set an appropriate error aux message identifying the problematic device.
+ */
+static int
+zpool_create_info(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+ nvlist_t *outnv = NULL;
+ nvlist_t *info = NULL;
+ const char *vdev = NULL;
+ const char *pname = NULL;
+
+ if (zc->zc_nvlist_dst_size == 0)
+ return (ENOENT);
+
+ if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst,
+ zc->zc_nvlist_dst_size, &outnv, 0) != 0 || outnv == NULL)
+ return (EINVAL);
+
+ if (nvlist_lookup_nvlist(outnv, ZPOOL_CONFIG_CREATE_INFO, &info) != 0) {
+ nvlist_free(outnv);
+ return (EINVAL);
+ }
+
+ if (nvlist_lookup_string(info, ZPOOL_CREATE_INFO_VDEV, &vdev) != 0) {
+ nvlist_free(outnv);
+ return (EINVAL);
+ }
+
+ if (nvlist_lookup_string(info, ZPOOL_CREATE_INFO_POOL, &pname) == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "device '%s' is part of active pool '%s'"),
+ vdev, pname);
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "device '%s' is in use"), vdev);
+ }
+
+ nvlist_free(outnv);
+ return (0);
+}
+
/*
* Create the named pool, using the provided vdev list. It is assumed
* that the consumer has already validated the contents of the nvlist, so we
@@ -1615,16 +1665,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zcmd_write_src_nvlist(hdl, &zc, zc_props);
(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
+ zcmd_alloc_dst_nvlist(hdl, &zc, 4096);
if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
-
- zcmd_free_nvlists(&zc);
- nvlist_free(zc_props);
- nvlist_free(zc_fsprops);
- nvlist_free(hidden_args);
- if (wkeydata != NULL)
- free(wkeydata);
-
switch (errno) {
case EBUSY:
/*
@@ -1634,11 +1677,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
* label. This can also happen under if the device is
* part of an active md or lvm device.
*/
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more vdevs refer to the same device, or "
- "one of\nthe devices is part of an active md or "
- "lvm device"));
- return (zfs_error(hdl, EZFS_BADDEV, errbuf));
+ if (zpool_create_info(hdl, &zc) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more vdevs refer to the same "
+ "device, or one of\nthe devices is "
+ "part of an active md or lvm device"));
+ }
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
+ break;
case ERANGE:
/*
@@ -1653,7 +1699,8 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"record size invalid"));
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
+ ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
+ break;
case EOVERFLOW:
/*
@@ -1672,12 +1719,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
"one or more devices is less than the "
"minimum size (%s)"), buf);
}
- return (zfs_error(hdl, EZFS_BADDEV, errbuf));
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
+ break;
case ENOSPC:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"one or more devices is out of space"));
- return (zfs_error(hdl, EZFS_BADDEV, errbuf));
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
+ break;
case EINVAL:
if (zpool_has_draid_vdev(nvroot) &&
@@ -1685,24 +1734,32 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"dRAID vdevs are unsupported by the "
"kernel"));
- return (zfs_error(hdl, EZFS_BADDEV, errbuf));
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
} else {
- return (zpool_standard_error(hdl, errno,
- errbuf));
+ ret = zpool_standard_error(hdl, errno, errbuf);
}
+ break;
case ENXIO:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more devices could not be opened"));
- return (zfs_error(hdl, EZFS_BADDEV, errbuf));
+ if (zpool_create_info(hdl, &zc) == 0) {
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more devices could not be "
+ "opened"));
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
+ }
+ break;
case EDOM:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"block size out of range or does not match"));
- return (zfs_error(hdl, EZFS_BADDEV, errbuf));
+ ret = zfs_error(hdl, EZFS_BADDEV, errbuf);
+ break;
default:
- return (zpool_standard_error(hdl, errno, errbuf));
+ ret = zpool_standard_error(hdl, errno, errbuf);
+ break;
}
}
@@ -3571,10 +3628,53 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
zfs_cmd_t zc = {"\0"};
char errbuf[ERRBUFLEN];
libzfs_handle_t *hdl = zhp->zpool_hdl;
+ nvlist_t *vdev_nv;
+ boolean_t avail_spare, l2cache;
+ char *vdev_name;
+ char guid_str[21]; /* 64-bit num + '\0' */
+ boolean_t is_draid_spare = B_FALSE;
+ const char *vdev_type;
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
+ snprintf(guid_str, sizeof (guid_str), "%llu", (u_longlong_t)guid);
+ if ((vdev_nv = zpool_find_vdev(zhp, guid_str, &avail_spare,
+ &l2cache, NULL)) == NULL)
+ return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
+
+ vdev_name = zpool_vdev_name(hdl, zhp, vdev_nv, 0);
+ if (vdev_name != NULL) {
+ /*
+ * We have the actual vdev name, so use that instead of the GUID
+ * in any error messages.
+ */
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot fault %s"), vdev_name);
+ free(vdev_name);
+ }
+
+ /*
+ * Spares (traditional or draid) cannot be faulted by libzfs, except:
+ *
+ * - Any spare type that exceeds it's errors can be faulted (aux =
+ * VDEV_AUX_ERR_EXCEEDED). This is only used by zed.
+ *
+ * - Traditional spares that are active can be force faulted.
+ */
+ if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_TYPE, &vdev_type) == 0)
+ if (strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0)
+ is_draid_spare = B_TRUE;
+
+ /*
+ * If vdev is a spare that is not being used, or is a dRAID spare (in
+ * use or not), then don't allow it to be force-faulted. However, an
+ * in-use dRAID spare can be faulted by ZED if see too many errors
+ * (aux = VDEV_AUX_ERR_EXCEEDED).
+ */
+ if (avail_spare || (is_draid_spare && aux != VDEV_AUX_ERR_EXCEEDED))
+ return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
+
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_guid = guid;
zc.zc_cookie = VDEV_STATE_FAULTED;
@@ -4650,12 +4750,12 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
* If it's a dRAID device, we add parity, groups, and spares.
*/
if (strcmp(path, VDEV_TYPE_DRAID) == 0) {
- uint64_t ndata, nparity, nspares;
+ uint64_t ndata, nparity, nspares, children;
nvlist_t **child;
- uint_t children;
+ uint_t width;
verify(nvlist_lookup_nvlist_array(nv,
- ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
+ ZPOOL_CONFIG_CHILDREN, &child, &width) == 0);
nparity = fnvlist_lookup_uint64(nv,
ZPOOL_CONFIG_NPARITY);
ndata = fnvlist_lookup_uint64(nv,
@@ -4663,8 +4763,12 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
nspares = fnvlist_lookup_uint64(nv,
ZPOOL_CONFIG_DRAID_NSPARES);
+ if (nvlist_lookup_uint64(nv,
+ ZPOOL_CONFIG_DRAID_NCHILDREN, &children) != 0)
+ children = width;
+
path = zpool_draid_name(buf, sizeof (buf), ndata,
- nparity, nspares, children);
+ nparity, nspares, children, width);
}
/*
@@ -5588,6 +5692,8 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
case VDEV_PROP_IO_T:
case VDEV_PROP_SLOW_IO_N:
case VDEV_PROP_SLOW_IO_T:
+ case VDEV_PROP_FDOMAIN:
+ case VDEV_PROP_FGROUP:
if (intval == UINT64_MAX) {
(void) strlcpy(buf, "-", len);
} else {
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c
index a589ca6896f0..d39172f45008 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c
@@ -154,8 +154,12 @@ vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc, void *arg)
/*
* Detect if any leaf devices that have seen errors or could not be opened.
+ * Returns:
+ * - EDOM if a failure domain in dRAID vdev is down
+ * - ENXIO if any device is problematic
+ * - 0 (zero) otherwise
*/
-static boolean_t
+static int
find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t, void *),
void *arg, boolean_t ignore_replacing)
{
@@ -172,22 +176,41 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t, void *),
const char *type = fnvlist_lookup_string(vdev,
ZPOOL_CONFIG_TYPE);
if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
- return (B_FALSE);
+ return (0);
}
if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
&children) == 0) {
+
+ uint64_t fgrp_children = 0;
+ (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_DRAID_NCHILDREN,
+ &fgrp_children);
+
+ for (c = 0; c < fgrp_children; c++) {
+ int nfgrps = children / fgrp_children;
+ int nfaults = 0;
+ for (int g = 0; g < nfgrps; g++) {
+ if (find_vdev_problem(child[c +
+ (g * fgrp_children)], func, arg,
+ ignore_replacing))
+ nfaults++;
+ }
+ if (nfaults == nfgrps)
+ return (EDOM);
+ }
+
for (c = 0; c < children; c++) {
- if (find_vdev_problem(child[c], func, arg,
- ignore_replacing))
- return (B_TRUE);
+ int res;
+ if ((res = find_vdev_problem(child[c], func, arg,
+ ignore_replacing)))
+ return (res);
}
} else {
uint_t vsc;
vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array(
vdev, ZPOOL_CONFIG_VDEV_STATS, &vsc);
if (func(vs, vsc, arg) != 0)
- return (B_TRUE);
+ return (ENXIO);
}
/*
@@ -198,11 +221,11 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t, void *),
for (c = 0; c < children; c++) {
if (find_vdev_problem(child[c], func, arg,
ignore_replacing))
- return (B_TRUE);
+ return (ENXIO);
}
}
- return (B_FALSE);
+ return (0);
}
/*
@@ -406,6 +429,10 @@ check_status(nvlist_t *config, boolean_t isimport,
/*
* Missing devices in a replicated config.
*/
+ if (find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE) == EDOM)
+ return (ZPOOL_STATUS_FAULTED_FDOM_R);
+ if (find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE) == EDOM)
+ return (ZPOOL_STATUS_FAULTED_FDOM_R);
if (find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE))
return (ZPOOL_STATUS_FAULTED_DEV_R);
if (find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE))
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
index 021a1d8a407d..d886bdb97864 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
@@ -324,6 +324,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_ASHIFT_MISMATCH:
return (dgettext(TEXT_DOMAIN, "adding devices with "
"different physical sector sizes is not allowed"));
+ case EZFS_NO_USER_NS_SUPPORT:
+ return (dgettext(TEXT_DOMAIN, "kernel was built without "
+ "user namespace support (CONFIG_USER_NS)"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -517,6 +520,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_NOT_USER_NAMESPACE:
zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
break;
+ case ZFS_ERR_NO_USER_NS_SUPPORT:
+ zfs_verror(hdl, EZFS_NO_USER_NS_SUPPORT, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, "%s", zfs_strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
diff --git a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
index aef169a3f880..f87cca4d09f4 100644
--- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
@@ -264,10 +264,25 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
return (zfs_error(hdl, EZFS_NOCAP, errbuf));
}
+ /*
+ * The disks of the same capacity may have different sector sizes
+ * (512 or 4K). So to have the same start_block and slice_size on
+ * such a disks, divide NEW_START_BLOCK and EFI_MIN_RESV_SIZE by
+ * (efi_lbasize / DEV_BSIZE) coefficient.
+ */
+ uint64_t coeff = vtoc->efi_lbasize / DEV_BSIZE;
+
+ /* This probably should never be the case, but who knows. */
+ if (((NEW_START_BLOCK * DEV_BSIZE) % vtoc->efi_lbasize) ||
+ ((EFI_MIN_RESV_SIZE * DEV_BSIZE) % vtoc->efi_lbasize))
+ coeff = 1;
+
slice_size = vtoc->efi_last_u_lba + 1;
- slice_size -= EFI_MIN_RESV_SIZE;
+ slice_size -= (EFI_MIN_RESV_SIZE / coeff);
if (start_block == MAXOFFSET_T)
start_block = NEW_START_BLOCK;
+ if (start_block == NEW_START_BLOCK)
+ start_block /= coeff;
slice_size -= start_block;
slice_size = P2ALIGN_TYPED(slice_size, PARTITION_END_ALIGNMENT,
uint64_t);
@@ -298,7 +313,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
vtoc->efi_parts[8].p_start = slice_size + start_block;
- vtoc->efi_parts[8].p_size = resv;
+ vtoc->efi_parts[8].p_size = resv / coeff;
vtoc->efi_parts[8].p_tag = V_RESERVED;
rval = efi_write(fd, vtoc);
diff --git a/sys/contrib/openzfs/man/Makefile.am b/sys/contrib/openzfs/man/Makefile.am
index 5eb7fd95d3c8..cf38aa70c449 100644
--- a/sys/contrib/openzfs/man/Makefile.am
+++ b/sys/contrib/openzfs/man/Makefile.am
@@ -3,6 +3,7 @@ dist_noinst_man_MANS = \
%D%/man1/cstyle.1
dist_man_MANS = \
+ %D%/man1/dbufstat.1 \
%D%/man1/zilstat.1 \
%D%/man1/zarcsummary.1 \
%D%/man1/zarcstat.1 \
@@ -130,17 +131,18 @@ SUBSTFILES += $(nodist_man_MANS)
MANFILES = $(dist_noinst_man_MANS) $(dist_man_MANS) $(nodist_man_MANS)
-PHONY += mancheck
-
mancheck_verbose = $(mancheck_verbose_@AM_V@)
mancheck_verbose_ = $(mancheck_verbose_@AM_DEFAULT_V@)
-mancheck_verbose_0 = @echo MANCHECK $(_MTGT);
+mancheck_verbose_0 = @echo MANCHECK $<;
+
+MANCHECK_TARGETS = $(foreach manfile, $(MANFILES), $(addprefix mancheck-,$(manfile)))
+
+PHONY += $(MANCHECK_TARGETS) mancheck
-_MTGT = $(subst ^,/,$(subst mancheck-,,$@))
-mancheck-%:
- $(mancheck_verbose)scripts/mancheck.sh $(_MTGT)
+$(MANCHECK_TARGETS): mancheck-%: %
+ $(mancheck_verbose)scripts/mancheck.sh $<
-mancheck: $(foreach manfile, $(MANFILES), $(addprefix mancheck-,$(subst /,^,$(manfile))))
+mancheck: $(MANCHECK_TARGETS)
CHECKS += mancheck
diff --git a/sys/contrib/openzfs/man/man1/dbufstat.1 b/sys/contrib/openzfs/man/man1/dbufstat.1
new file mode 100644
index 000000000000..311af5e76a98
--- /dev/null
+++ b/sys/contrib/openzfs/man/man1/dbufstat.1
@@ -0,0 +1,233 @@
+.\" SPDX-License-Identifier: CDDL-1.0
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright (C) 2013 Lawrence Livermore National Security, LLC.
+.\"
+.Dd March 19, 2026
+.Dt DBUFSTAT 1
+.Os
+.
+.Sh NAME
+.Nm dbufstat
+.Nd display statistics for DMU buffer cache
+.Sh SYNOPSIS
+.Nm
+.Op Fl bdhnrtvx
+.Op Fl f Ar field Ns Op , Ns Ar field Ns …
+.Op Fl F Ar field Ns = Ns Ar value Ns Op , Ns …
+.Op Fl i Ar file
+.Op Fl o Ar file
+.Op Fl s Ar separator
+.
+.Sh DESCRIPTION
+.Nm
+displays statistics about the DMU (Data Management Unit) buffer
+cache, also known as the dbuf cache.
+This information is read from the
+.Sy dbufs
+kstat
+.Po Pa /proc/spl/kstat/zfs/dbufs
+on Linux and
+.Sy kstat.zfs.misc.dbufs
+on FreeBSD
+.Pc .
+.Pp
+.Nm
+can display data in three modes:
+.Bl -tag -width "-b"
+.It Fl b
+Display a table of information for each individual dbuf
+(buffer).
+This is the most detailed view.
+.It Fl d
+Display a table of information for each dnode
+(the default mode).
+Multiple dbufs belonging to the same dnode are
+aggregated.
+.It Fl t
+Display a table of information for each dnode type.
+Multiple dnodes of the same type are aggregated.
+.El
+.Pp
+The following fields are available for display.
+Not all fields are compatible with all modes; use
+.Fl v
+to see which fields are incompatible with each mode.
+.Bl -tag -compact -offset Ds -width "indirect"
+.It Sy pool
+Pool name.
+.It Sy objset
+Dataset identification number.
+.It Sy object
+Object number.
+.It Sy level
+Indirection level of buffer.
+.It Sy blkid
+Block number of buffer.
+.It Sy offset
+Offset in object of buffer.
+.It Sy dbsize
+Size of buffer.
+.It Sy usize
+Size of attached user data.
+.It Sy meta
+Whether this buffer is metadata.
+.It Sy state
+State of buffer (read, cached, etc).
+.It Sy dbholds
+Number of holds on buffer.
+.It Sy dbc
+Whether this buffer is in the dbuf cache.
+.It Sy list
+Which ARC list contains this buffer.
+.It Sy atype
+ARC header type (data or metadata).
+.It Sy flags
+ARC read flags.
+.It Sy count
+ARC data count.
+.It Sy asize
+Size of this ARC buffer.
+.It Sy access
+Time this ARC buffer was last accessed.
+.It Sy mru
+Hits while on the ARC's MRU list.
+.It Sy gmru
+Hits while on the ARC's MRU ghost list.
+.It Sy mfu
+Hits while on the ARC's MFU list.
+.It Sy gmfu
+Hits while on the ARC's MFU ghost list.
+.It Sy l2
+Hits while on the L2ARC.
+.It Sy l2_dattr
+L2ARC disk address/offset.
+.It Sy l2_asize
+L2ARC allocated size (depends on compression).
+.It Sy l2_comp
+L2ARC compression algorithm for buffer.
+.It Sy aholds
+Number of holds on this ARC buffer.
+.It Sy dtype
+Dnode type.
+.It Sy btype
+Bonus buffer type.
+.It Sy data_bs
+Data block size.
+.It Sy meta_bs
+Metadata block size.
+.It Sy bsize
+Bonus buffer size.
+.It Sy lvls
+Number of indirection levels.
+.It Sy dholds
+Number of holds on dnode.
+.It Sy blocks
+Number of allocated blocks.
+.It Sy dsize
+Size of dnode.
+.It Sy cached
+Bytes cached for all blocks.
+.It Sy direct
+Bytes cached for direct blocks.
+.It Sy indirect
+Bytes cached for indirect blocks.
+.It Sy bonus
+Bytes cached for bonus buffer.
+.It Sy spill
+Bytes cached for spill block.
+.El
+.
+.Sh OPTIONS
+.Bl -tag -width "-F filter"
+.It Fl b
+Display information for each individual dbuf.
+.It Fl d
+Display information for each dnode (default).
+.It Fl f Ar field Ns Op , Ns Ar field Ns …
+Display only the specified fields.
+.It Fl F Ar field Ns = Ns Ar value Ns Op , Ns …
+Filter output by field value or regular expression.
+Multiple filters may be specified as a comma-separated list.
+.It Fl h
+Display a help message.
+.It Fl i Ar file
+Redirect input from the specified file instead of the
+.Sy dbufs
+kstat.
+.It Fl n
+Exclude the header from output.
+.It Fl o Ar file
+Redirect output to the specified file instead of standard output.
+.It Fl r
+Display raw values.
+Specify twice to also display numeric type and compression
+identifiers instead of names.
+.It Fl s Ar separator
+Override the default field separator (two spaces) with a
+custom character or string.
+.It Fl t
+Display information for each dnode type.
+.It Fl v
+List all possible field headers, definitions, and
+incompatibilities for each mode.
+.It Fl x
+Display extended statistics.
+.El
+.
+.Sh EXAMPLES
+Display per-dnode statistics (default mode):
+.Bd -literal -compact -offset Ds
+# dbufstat -d
+ pool objset object dtype cached
+ zroot 30 34466 DMU_OT_DIRECTORY_CONTENTS 240K
+ zroot 30 0 DMU_OT_DNODE 14M
+ zroot 30 36343 DMU_OT_DIRECTORY_CONTENTS 1024
+ zroot 30 32921 DMU_OT_PLAIN_FILE_CONTENTS 2.5K
+.Ed
+.Pp
+Display per-type statistics:
+.Bd -literal -compact -offset Ds
+# dbufstat -t
+ pool objset dtype cached
+ zroot 30 DMU_OT_DIRECTORY_CONTENTS 1.9M
+ zroot 30 DMU_OT_DNODE 14M
+ zroot 30 DMU_OT_PLAIN_FILE_CONTENTS 8.4M
+ zroot 0 DMU_OT_DSL_PROPS 2.6M
+.Ed
+.Pp
+Display per-buffer statistics:
+.Bd -literal -compact -offset Ds
+# dbufstat -b
+ pool objset object level blkid offset dbsize
+ zroot 30 34466 1 0 0 128K
+ zroot 30 0 0 292 4.6M 16K
+ zroot 30 34466 0 8 128K 16K
+ zroot 0 4 0 0 0 128K
+.Ed
+.Pp
+Display specific fields per dnode:
+.Dl # Nm dbufstat Fl d Fl f Ar pool,object,objset,dsize,cached
+.Pp
+Display per-type statistics with comma-separated output:
+.Dl # Nm dbufstat Fl t Fl s Qq ,
+.Pp
+Display extended per-buffer statistics with filters:
+.Dl # Nm dbufstat Fl bx Fl F Ar dbc=1,pool=zroot
+.
+.Sh SEE ALSO
+.Xr zarcstat 1 ,
+.Xr zfs 8 ,
+.Xr zpool-iostat 8
+.
+.Sh AUTHORS
+This man page was written by
+.An Christos Longros Aq Mt chris.longros@gmail.com .
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index 5ec9d045ed0d..82b0a890e0b4 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -1008,6 +1008,17 @@ Value of 4 means parity with page cache.
The target number of bytes the ARC should leave as free memory on the system.
If zero, equivalent to the bigger of
.Sy 512 KiB No and Sy all_system_memory/64 .
+.It Sy zfs_arc_free_target Pq uint
+Desired number of free pages below which the ARC
+triggers reclaim.
+Initialized at boot to the kernel's
+.Va vm.v_free_target
+value and can be adjusted at runtime.
+This parameter is
+.Fx Ns -specific
+and uses pages, unlike the Linux-specific
+.Sy zfs_arc_sys_free
+which is measured in bytes.
.
.It Sy zfs_checksum_events_per_second Ns = Ns Sy 20 Ns /s Pq uint
Rate limit checksum events to this many per second.
diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7
index 3b65a52ae630..bb5fec65eff6 100644
--- a/sys/contrib/openzfs/man/man7/vdevprops.7
+++ b/sys/contrib/openzfs/man/man7/vdevprops.7
@@ -64,7 +64,7 @@ The values of non-numeric properties are case sensitive and must be lowercase.
The following native properties consist of read-only statistics about the
vdev.
These properties can not be changed.
-.Bl -tag -width "fragmentation"
+.Bl -tag -width "failure_domain"
.It Sy capacity
Percentage of vdev space used
.It Sy state
@@ -89,6 +89,14 @@ How much this vdev can expand by
Percent of fragmentation in this vdev
.It Sy parity
The level of parity for this vdev
+.It Sy failure_domain
+Failure domain id of this child vdev in
+.Sy dRAID
+vdev with failure domains feature
+.It Sy failure_group
+Failure group id of this child vdev in
+.Sy dRAID
+vdev with failure domains feature
.It Sy devid
The device id for this vdev
.It Sy physpath
@@ -114,7 +122,7 @@ threshold in milliseconds
For
.Sy RAIDZ
and
-.Sy DRAID
+.Sy dRAID
configurations, this value also represents the number of times the vdev was
identified as an outlier and excluded from participating in read I/O operations.
.It Sy null_ops , read_ops , write_ops , free_ops , claim_ops , trim_ops
@@ -142,7 +150,17 @@ If the property is only set on the top-level vdev, this value will be used.
The value of these properties do not persist across vdev replacement.
For this reason, it is advisable to set the property on the top-level vdev -
not on the leaf vdev itself.
+The
+.Sy slow_io_n
+and
+.Sy slow_io_t
+properties must be manually set to enable slow I/O diagnosis;
+there are no built-in defaults.
The default values for
+.Sy checksum_n , checksum_t , io_n ,
+and
+.Sy io_t
+on
.Sy OpenZFS on Linux
are 10 errors in 600 seconds.
For
@@ -166,7 +184,7 @@ failfast.
Only valid for
.Sy RAIDZ
and
-.Sy DRAID
+.Sy dRAID
vdevs.
True when a slow disk outlier was detected and the vdev is currently in a sit
out state.
@@ -180,7 +198,7 @@ data will be reconstructed as needed from parity.
Only valid for
.Sy RAIDZ
and
-.Sy DRAID
+.Sy dRAID
vdevs.
If set, this enables the kernel-level slow disk detection logic.
This logic automatically causes any vdevs that are significant negative
diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7
index 448a7ec05cc3..183e6ea95745 100644
--- a/sys/contrib/openzfs/man/man7/zfsprops.7
+++ b/sys/contrib/openzfs/man/man7/zfsprops.7
@@ -2112,6 +2112,98 @@ for more information.
Zoning is a
Linux
feature and this property is not available on other platforms.
+.It Sy zoned_uid Ns = Ns Ar uid
+Delegates dataset visibility and administration to all user namespaces
+owned by the specified UID.
+This property enables rootless container support with native ZFS storage.
+For example, setting
+.Sy zoned_uid Ns = Ns 1000
+allows user 1000's rootless Podman containers to use ZFS for storage layers.
+This is a Linux-only feature.
+.Pp
+Authorization uses an additive three-layer model where all layers must pass:
+.Bl -tag -width "L2 (capability tier)" -compact
+.It Sy L0 (authentication)
+The user namespace owner UID must match the
+.Sy zoned_uid
+value.
+.It Sy L1 (dsl_deleg)
+The pool administrator must grant per-operation permissions on the
+delegation root using
+.Xr zfs-allow 8 .
+When pool delegation is OFF
+.Pq Nm zpool Cm set Sy delegation Ns = Ns Sy off ,
+all write operations are denied regardless of capabilities.
+.It Sy L2 (capability tier)
+Linux capabilities within the user namespace determine the permitted
+operation class:
+.Sy CAP_FOWNER
+for non-destructive operations
+.Pq create, snapshot, set property ,
+.Sy CAP_SYS_ADMIN
+for destructive operations
+.Pq destroy, rename, clone .
+Both are namespaced capabilities scoped to the user namespace,
+not the init namespace.
+.El
+.Pp
+Read-only operations
+.Pq Nm zfs Cm list , Nm zfs Cm get
+require no capabilities and no
+.Nm zfs Cm allow
+grants; visibility is controlled solely by the
+.Sy zoned_uid
+delegation scoping.
+.Pp
+Write operations that can be delegated include
+.Nm zfs Cm create ,
+.Nm zfs Cm destroy ,
+.Nm zfs Cm snapshot ,
+.Nm zfs Cm clone ,
+.Nm zfs Cm rename
+.Pq within the delegation subtree ,
+and
+.Nm zfs Cm set .
+.Pp
+The delegation root dataset
+.Pq where zoned_uid is locally set
+cannot be destroyed from within the user namespace, protecting the
+parent dataset from unauthorized removal.
+Renames are also constrained to remain within the delegation subtree.
+The namespace user cannot modify the
+.Sy zoned_uid
+or
+.Sy zoned
+properties, and cannot override
+.Sy filesystem_limit
+or
+.Sy snapshot_limit
+set by the administrator on the delegation root
+.Pq but can impose tighter sub-limits on child datasets .
+.Pp
+Set to
+.Sy 0
+.Pq or inherit
+to disable UID-based delegation.
+.Pp
+Unlike
+.Nm zfs Cm zone
+which requires an existing namespace file,
+.Sy zoned_uid
+applies to any user namespace owned by the specified UID,
+making it suitable for container runtimes that create new namespaces
+on each invocation.
+See
+.Xr zfs-zone 8
+for namespace-specific delegation.
+.Pp
+Example setup for rootless Podman:
+.Bd -literal -offset indent
+# zfs create tank/containers
+# zfs set zoned_uid=1000 tank/containers
+# zfs set mountpoint=none tank/containers
+# zfs allow -u 1000 create,destroy,mount,snapshot,rename,clone tank/containers
+.Ed
.El
.Pp
The following three properties cannot be changed after the file system is
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
index b4404a6eb58d..6221cfdfda9a 100644
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -504,6 +504,33 @@ vdev type, or when adding a new
.Sy draid
vdev to an existing pool.
.
+.feature com.seagate draid_failure_domains no draid
+This feature enables use of failure domains in
+.Sy draid
+vdev type.
+Failure domains allow for an entire set of devices that belong to a domain
+to fail without taking the pool offline.
+Devices that are likely to fail together due to sharing a common component,
+such as an enclosure, HBA, or SAS expander, are good candidates to form a
+failure domain.
+For example, on a setup with several enclosures the user could define
+the failure domains to be the set of devices in each enclosure, then the
+failure groups span the enclosures.
+Or in other words, the dRAID is arranged such that the i-th device in
+every failure group belongs to i-th enclosure.
+This will allow tolerating the failure of the whole enclosure.
+The size of the failure group is equal to the number of failure domains,
+and it cannot be less than the size of the redundancy group
+(parity + data + spares).
+.Pp
+This feature becomes
+.Sy active
+when creating a pool which uses the
+.Sy draid
+vdev type with failure domains configured, or when adding a new
+.Sy draid
+vdev with failure domains to an existing pool.
+.
.feature com.klarasystems dynamic_gang_header no
This feature enables larger gang headers based on the sector size of the pool.
When enabled, gang headers will use the entire space allocated for them, instead
diff --git a/sys/contrib/openzfs/man/man7/zpoolconcepts.7 b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
index 1571bc6b8b76..ebd0b3466146 100644
--- a/sys/contrib/openzfs/man/man7/zpoolconcepts.7
+++ b/sys/contrib/openzfs/man/man7/zpoolconcepts.7
@@ -26,6 +26,7 @@
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+.\" Copyright (c) 2026 Seagate Technology, LLC.
.\"
.Dd August 6, 2025
.Dt ZPOOLCONCEPTS 7
@@ -142,7 +143,7 @@ A dRAID with
.No parity level, and Em S No distributed hot spares can hold approximately
.Em (N-S)*(D/(D+P))*X No bytes and can withstand Em P
devices failing without losing data.
-.It Sy draid Ns Oo Ar parity Oc Ns Oo Sy \&: Ns Ar data Ns Sy d Oc Ns Oo Sy \&: Ns Ar children Ns Sy c Oc Ns Oo Sy \&: Ns Ar spares Ns Sy s Oc
+.It Sy draid Ns Oo Ar parity Oc Ns Oo Sy \&: Ns Ar data Ns Sy d Oc Ns Oo Sy \&: Ns Ar children Ns Sy c Oc Ns Oo Sy \&: Ns Ar width Ns Sy w Oc Ns Oo Sy \&: Ns Ar spares Ns Sy s Oc
A non-default dRAID configuration can be specified by appending one or more
of the following optional arguments to the
.Sy draid
@@ -161,9 +162,30 @@ Defaults to
The expected number of children.
Useful as a cross-check when listing a large number of devices.
An error is returned when the provided number of children differs.
+.It Ar width
+You can configure several groups of children in the same row, in which case
+.Em width No would be a multiple of Em children .
+Such configurations allow the creation of failure groups with every i-th device
+in each group being from different failure domain (for example an enclosure)
+so that if all devices in one domain fail, the
+.Em draid No vdev still will be operational with enough redundancy to
+rebuild the data.
+In case of
+.Em draid2 , No two domains can fail at a time, in case of
+.Em draid3 No \(em three domains (provided there are no other failures
+in any failure group).
+For each group, it will be only one, two or three failures.
.It Ar spares
The number of distributed hot spares.
+All spares are shared between failure groups.
Defaults to zero.
+.Pp
+Note: to support domain failure, we cannot have more than
+.Em parity-1 No failures in any failure group, no matter if the failed
+devices are rebuilt to draid hot spares or not \(em the blocks of those
+spares can be mapped to the devices from the failed domain, and we cannot
+tolerate more than
+.Em parity No failures in any failure group .
.El
.It Sy spare
A pseudo-vdev which keeps track of available hot spares for a pool.
@@ -202,6 +224,10 @@ A cache device cannot be configured as a mirror or raidz group.
For more information, see the
.Sx Cache Devices
section.
+.It Sy fdomain No or Sy failure_domain
+Denotes the list of failure domain devices for dRAID vdev.
+.It Sy fgroup No or Sy failure_group
+Denotes the list of failure group devices for dRAID vdev.
.El
.Pp
Virtual devices cannot be nested arbitrarily.
@@ -364,7 +390,13 @@ pools.
The
.Sy draid
vdev type provides distributed hot spares.
-These hot spares are named after the dRAID vdev they're a part of
+These are virtual devices whose blocks are reserved and distributed among
+all real devices, which makes resilvering to them much faster because one
+device is not a bottleneck anymore.
+Fast resilvering is crucial for data durability, it decreases the time of
+having degraded data redundancy in the pool, thus decreasing the chance of
+losing more devices at a time which cannot be tolerate.
+dRAID hot spares are named after the draid vdev they're a part of
.Po Sy draid1 Ns - Ns Ar 2 Ns - Ns Ar 3 No specifies spare Ar 3 No of vdev Ar 2 ,
.No which is a single parity dRAID Pc
and may only be used by that dRAID vdev.
diff --git a/sys/contrib/openzfs/man/man8/zfs-load-key.8 b/sys/contrib/openzfs/man/man8/zfs-load-key.8
index 912f55d753b0..b0af3553472c 100644
--- a/sys/contrib/openzfs/man/man8/zfs-load-key.8
+++ b/sys/contrib/openzfs/man/man8/zfs-load-key.8
@@ -29,8 +29,9 @@
.\" Copyright 2019 Richard Laager. All rights reserved.
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
+.\" Copyright 2026 Oxide Computer Company
.\"
-.Dd July 11, 2022
+.Dd January 30, 2026
.Dt ZFS-LOAD-KEY 8
.Os
.
@@ -53,6 +54,7 @@
.Op Fl o Ar keylocation Ns = Ns Ar value
.Op Fl o Ar keyformat Ns = Ns Ar value
.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
+.Op Fl o Ar user:prop Ns = Ns Ar value
.Ar filesystem
.Nm zfs
.Cm change-key
@@ -157,6 +159,7 @@ Unloads the keys for all encryption roots in all imported pools.
.Op Fl o Ar keylocation Ns = Ns Ar value
.Op Fl o Ar keyformat Ns = Ns Ar value
.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
+.Op Fl o Ar user:prop Ns = Ns Ar value
.Ar filesystem
.Xc
.It Xo
@@ -173,7 +176,7 @@ This command may also be used to change the
.Sy keyformat ,
and
.Sy pbkdf2iters
-properties as needed.
+properties as needed, as well as set user properties.
If the dataset was not previously an encryption root it will become one.
Alternatively, the
.Fl i
@@ -209,7 +212,7 @@ This is effectively equivalent to running
.It Fl o Ar property Ns = Ns Ar value
Allows the user to set encryption key properties
.Pq Sy keyformat , keylocation , No and Sy pbkdf2iters
-while changing the key.
+and user properties while changing the key.
This is the only way to alter
.Sy keyformat
and
diff --git a/sys/contrib/openzfs/man/man8/zfs-set.8 b/sys/contrib/openzfs/man/man8/zfs-set.8
index 08daf09d05f8..9c75af22b186 100644
--- a/sys/contrib/openzfs/man/man8/zfs-set.8
+++ b/sys/contrib/openzfs/man/man8/zfs-set.8
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
-.Dd October 12, 2024
+.Dd March 15, 2026
.Dt ZFS-SET 8
.Os
.
@@ -114,9 +114,17 @@ Property value
.It Sy source
Property source
.Sy local , default , inherited , temporary , received , No or Sy - Pq none .
+.It Sy received
+The received value of the property, if any.
+This column is not displayed by default.
.El
.Pp
-All columns are displayed by default, though this can be controlled by using the
+The
+.Sy name , property , value ,
+and
+.Sy source
+columns are displayed by default, though this can be
+controlled by using the
.Fl o
option.
This command takes a comma-separated list of properties as described in the
@@ -147,7 +155,13 @@ A depth of
.Sy 1
will display only the dataset and its direct children.
.It Fl o Ar field
-A comma-separated list of columns to display, defaults to
+A comma-separated list of columns to display.
+Supported fields are
+.Sy name , property , value , received , source ,
+or
+.Sy all
+to select all five columns.
+The default value is
.Sy name , Ns Sy property , Ns Sy value , Ns Sy source .
.It Fl p
Display numbers in parsable
diff --git a/sys/contrib/openzfs/man/man8/zfs-zone.8 b/sys/contrib/openzfs/man/man8/zfs-zone.8
index a56a304e82b2..d00b2e217a5a 100644
--- a/sys/contrib/openzfs/man/man8/zfs-zone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-zone.8
@@ -114,4 +114,17 @@ dataset to a user namespace identified by user namespace file
.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users
.
.Sh SEE ALSO
-.Xr zfsprops 7
+.Xr zfsprops 7 ,
+.Xr zfs-allow 8
+.Pp
+For rootless container use cases where the namespace is ephemeral,
+consider using the
+.Sy zoned_uid
+property instead, which delegates to all namespaces owned by a UID
+rather than requiring attachment to a specific namespace file.
+The
+.Sy zoned_uid
+property uses a three-layer additive authorization model
+.Pq UID match, dsl_deleg grants, capability tiers
+described in
+.Xr zfsprops 7 .
diff --git a/sys/contrib/openzfs/man/man8/zinject.8 b/sys/contrib/openzfs/man/man8/zinject.8
index 704f6a7accd8..092af93211c8 100644
--- a/sys/contrib/openzfs/man/man8/zinject.8
+++ b/sys/contrib/openzfs/man/man8/zinject.8
@@ -24,7 +24,7 @@
.\"
.\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS
.\"
-.Dd January 14, 2025
+.Dd April 1, 2026
.Dt ZINJECT 8
.Os
.
@@ -275,8 +275,12 @@ Automatically remount the underlying filesystem.
Quiet mode.
Only print the handler number added.
.It Fl r Ar range
-Inject an error over a particular logical range of an object, which
-will be translated to the appropriate blkid range according to the
+Inject an error over a particular logical range of an object, specified as
+.Ar start Ns Op , Ns Ar end .
+Numeric suffixes
+.Pq K, M, G, T, P, E
+are accepted.
+The range will be translated to the appropriate blkid range according to the
object's properties.
.It Fl s Ar seconds
Run for this many seconds before reporting failure.
diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8
index a36ae260a158..d5696ad85f6b 100644
--- a/sys/contrib/openzfs/man/man8/zpool-create.8
+++ b/sys/contrib/openzfs/man/man8/zpool-create.8
@@ -239,6 +239,41 @@ The following command creates a ZFS storage pool consisting of two, two-way
mirrors and mirrored log devices:
.Dl # Nm zpool Cm create Ar pool Sy mirror Pa sda sdb Sy mirror Pa sdc sdd Sy log mirror Pa sde sdf
.
+.Ss Example 7 : No Creating a ZFS Pool with dRAID vdev
+The following command creates a ZFS storage pool with dRAID vdev
+with one parity, four data and one spare devices, 6 devices in total:
+.Dl # Nm zpool Cm create Ar pool Sy draid1:4d:6c:1s Pa sda sdb sdc sdd sde sdf
+.
+.Ss Example 8 : No Creating a ZFS Pool with dRAID vdev with failure domains
+The following commands create a ZFS storage pool with dRAID vdev
+with five failure groups and six failure domains (for example, enclosures).
+The commands are equivalent:
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm create Ar pool Sy draid1:4d:6c:30w:5s No \e
+ \fIenc0d0 enc1d0 enc2d0 enc3d0 enc4d0 enc5d0\fP \e
+ \fIenc0d1 enc1d1 enc2d1 enc3d1 enc4d1 enc5d1\fP \e
+ \fIenc0d2 enc1d2 enc2d2 enc3d2 enc4d2 enc5d2\fP \e
+ \fIenc0d3 enc1d3 enc2d3 enc3d3 enc4d3 enc5d3\fP \e
+ \fIenc0d4 enc1d4 enc2d4 enc3d4 enc4d4 enc5d4\fP
+.Ed
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm create Ar pool Sy draid1:5s No \e
+ \fBfgroup\fP \fIenc0d0 enc1d0 enc2d0 enc3d0 enc4d0 enc5d0\fP \e
+ \fBfgroup\fP \fIenc0d1 enc1d1 enc2d1 enc3d1 enc4d1 enc5d1\fP \e
+ \fBfgroup\fP \fIenc0d2 enc1d2 enc2d2 enc3d2 enc4d2 enc5d2\fP \e
+ \fBfgroup\fP \fIenc0d3 enc1d3 enc2d3 enc3d3 enc4d3 enc5d3\fP \e
+ \fBfgroup\fP \fIenc0d4 enc1d4 enc2d4 enc3d4 enc4d4 enc5d4\fP
+.Ed
+.Bd -literal -compact -offset Ds
+.No # Nm zpool Cm create Ar pool Sy draid1:5s No \e
+ \fBfdomain\fP \fIenc0d0 enc0d1 enc0d2 enc0d3 enc0d4\fP \e
+ \fBfdomain\fP \fIenc1d0 enc1d1 enc1d2 enc1d3 enc1d4\fP \e
+ \fBfdomain\fP \fIenc2d0 enc2d1 enc2d2 enc2d3 enc2d4\fP \e
+ \fBfdomain\fP \fIenc3d0 enc3d1 enc3d2 enc3d3 enc3d4\fP \e
+ \fBfdomain\fP \fIenc4d0 enc4d1 enc4d2 enc4d3 enc4d4\fP \e
+ \fBfdomain\fP \fIenc5d0 enc5d1 enc5d2 enc5d3 enc5d4\fP
+.Ed
+.
.Sh SEE ALSO
.Xr zpool-destroy 8 ,
.Xr zpool-export 8 ,
diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8
index 106399941f98..396941f174a0 100644
--- a/sys/contrib/openzfs/man/man8/zpool-list.8
+++ b/sys/contrib/openzfs/man/man8/zpool-list.8
@@ -48,7 +48,9 @@
Lists the given pools along with a health status and space usage.
If no
.Ar pool Ns s
-are specified, all pools in the system are listed.
+are specified, all pools currently imported are listed.
+For listing pools available for import, see
+.Xr zpool-import 8 .
When given an
.Ar interval ,
the information is printed every
diff --git a/sys/contrib/openzfs/man/man8/zpool-offline.8 b/sys/contrib/openzfs/man/man8/zpool-offline.8
index 388c7634acce..155e185236d0 100644
--- a/sys/contrib/openzfs/man/man8/zpool-offline.8
+++ b/sys/contrib/openzfs/man/man8/zpool-offline.8
@@ -56,11 +56,12 @@
.Ar pool
.Ar device Ns …
.Xc
-Takes the specified physical device offline.
+Takes the specified physical device offline or force-fault it.
While the
.Ar device
-is offline, no attempt is made to read or write to the device.
-This command is not applicable to spares.
+is offline or force-faulted, no attempt is made to read or write to the device.
+dRAID spares can not be offlined or force faulted.
+Traditional spares can only be offlined or force-faulted when they are active.
.Bl -tag -width Ds
.It Fl -power
Power off the device's slot in the storage enclosure.
diff --git a/sys/contrib/openzfs/man/man8/zpool-resilver.8 b/sys/contrib/openzfs/man/man8/zpool-resilver.8
index 59c4be5db209..268127f403ff 100644
--- a/sys/contrib/openzfs/man/man8/zpool-resilver.8
+++ b/sys/contrib/openzfs/man/man8/zpool-resilver.8
@@ -48,6 +48,9 @@ resilver will be added to the new one.
This requires the
.Sy resilver_defer
pool feature.
+.Pp
+Due to concurrent writes on a live system, it is possible for
+resilver to progress beyond 100% completion.
.
.Sh SEE ALSO
.Xr zpool-iostat 8 ,
diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8
index cf7ead5788bf..966eb0adf344 100644
--- a/sys/contrib/openzfs/man/man8/zpool-scrub.8
+++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8
@@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2025 Hewlett Packard Enterprise Development LP.
.\"
-.Dd August 6, 2025
+.Dd March 16, 2026
.Dt ZPOOL-SCRUB 8
.Os
.
@@ -87,7 +87,7 @@ If a resilver is in progress, ZFS does not allow a scrub to be started until the
resilver completes.
.Pp
Note that, due to changes in pool data on a live system, it is possible for
-scrubs to progress slightly beyond 100% completion.
+scrubs and resilvers to progress beyond 100% completion.
During this period, no completion time estimate will be provided.
.
.Sh OPTIONS
diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8
index 108a1067b384..4d27ed4eda24 100644
--- a/sys/contrib/openzfs/man/man8/zpool-status.8
+++ b/sys/contrib/openzfs/man/man8/zpool-status.8
@@ -27,7 +27,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
-.Dd May 20, 2025
+.Dd March 16, 2026
.Dt ZPOOL-STATUS 8
.Os
.
@@ -63,6 +63,8 @@ If a scrub or resilver is in progress, this command reports the percentage done
and the estimated time to completion.
Both of these are only approximate, because the amount of data in the pool and
the other workloads on the system can change.
+Due to concurrent writes on a live system, it is possible for both scrubs and
+resilvers to progress beyond 100% completion.
.Bl -tag -width Ds
.It Fl c Ar script1 Ns Oo , Ns Ar script2 Ns ,… Oc
Run a script (or scripts) on each vdev and include the output as a new column
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
index 79b784288911..7f9c02678229 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -495,6 +495,12 @@ atime_changed_cb(void *arg, uint64_t newval)
}
static void
+relatime_changed_cb(void *arg, uint64_t newval)
+{
+ ((zfsvfs_t *)arg)->z_relatime = (newval != 0);
+}
+
+static void
xattr_changed_cb(void *arg, uint64_t newval)
{
zfsvfs_t *zfsvfs = arg;
@@ -753,6 +759,8 @@ zfs_register_callbacks(vfs_t *vfsp)
error = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);
+ error = error ? error : dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index 1b3eeb4353fe..6e52d90e0940 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -6767,10 +6767,12 @@ zfs_freebsd_advise(struct vop_advise_args *ap)
dmu_prefetch(os, zp->z_id, 0, start, len,
ZIO_PRIORITY_ASYNC_READ);
break;
+ case POSIX_FADV_DONTNEED:
+ dmu_evict_range(os, zp->z_id, start, len);
+ break;
case POSIX_FADV_NORMAL:
case POSIX_FADV_RANDOM:
case POSIX_FADV_SEQUENTIAL:
- case POSIX_FADV_DONTNEED:
case POSIX_FADV_NOREUSE:
/* ignored for now */
break;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
index b9f427b39705..6f74c924eb79 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
@@ -1325,6 +1325,49 @@ zfs_znode_free(znode_t *zp)
zfs_znode_free_kmem(zp);
}
+/*
+ * Determine whether the znode's atime must be updated. The logic mostly
+ * duplicates the Linux kernel's relatime_need_update() functionality.
+ * This function is only called if the underlying filesystem actually has
+ * atime updates enabled.
+ */
+boolean_t
+zfs_relatime_need_update(const znode_t *zp)
+{
+ uint64_t mtime[2], ctime[2];
+ sa_bulk_attr_t bulk[2];
+ zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+ struct timespec now, tmp_atime, tmp_ts;
+ int count = 0;
+
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
+ if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0)
+ return (B_TRUE);
+
+ ZFS_TIME_DECODE(&tmp_atime, zp->z_atime);
+ /*
+ * In relatime mode, only update the atime if the previous atime
+ * is earlier than either the ctime or mtime or if at least a day
+ * has passed since the last update of atime.
+ */
+ ZFS_TIME_DECODE(&tmp_ts, mtime);
+ /* CSTYLED */
+ if (timespeccmp(&tmp_ts, &tmp_atime, >=))
+ return (B_TRUE);
+
+ ZFS_TIME_DECODE(&tmp_ts, ctime);
+ /* CSTYLED */
+ if (timespeccmp(&tmp_ts, &tmp_atime, >=))
+ return (B_TRUE);
+
+ vfs_timestamp(&now);
+ if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
void
zfs_tstamp_update_setup_ext(znode_t *zp, uint_t flag, uint64_t mtime[2],
uint64_t ctime[2], boolean_t have_tx)
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
index dc30f6dd939c..5be153a90ec5 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -128,7 +128,8 @@ struct zvol_state_os {
struct g_provider *zsg_provider;
} _zso_geom;
} _zso_state;
- int zso_dying;
+ boolean_t zso_opening;
+ boolean_t zso_dying;
};
static uint32_t zvol_minors;
@@ -226,12 +227,13 @@ zvol_geom_open(struct g_provider *pp, int flag, int count)
}
retry:
- zv = atomic_load_ptr(&pp->private);
+ zv = pp->private;
if (zv == NULL)
return (SET_ERROR(ENXIO));
mutex_enter(&zv->zv_state_lock);
- if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) {
+ g_topology_unlock();
+ if (zv->zv_flags & ZVOL_REMOVING || zv->zv_zso->zso_dying) {
err = SET_ERROR(ENXIO);
goto out_locked;
}
@@ -245,18 +247,16 @@ retry:
if (zv->zv_open_count == 0) {
drop_suspend = B_TRUE;
if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) {
- mutex_exit(&zv->zv_state_lock);
-
/*
- * Removal may happen while the locks are down, so
- * we can't trust zv any longer; we have to start over.
+ * Set a flag to interlock with zvol_os_remove_minor()
+ * while locks are dropped.
*/
- zv = atomic_load_ptr(&pp->private);
- if (zv == NULL)
- return (SET_ERROR(ENXIO));
-
+ zv->zv_zso->zso_opening = B_TRUE;
+ mutex_exit(&zv->zv_state_lock);
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
mutex_enter(&zv->zv_state_lock);
+ zv->zv_zso->zso_opening = B_FALSE;
+ cv_broadcast(&zv->zv_removing_cv);
if (zv->zv_zso->zso_dying ||
zv->zv_flags & ZVOL_REMOVING) {
@@ -289,6 +289,7 @@ retry:
rw_exit(&zv->zv_suspend_lock);
drop_suspend = B_FALSE;
kern_yield(PRI_USER);
+ g_topology_lock();
goto retry;
} else {
drop_namespace = B_TRUE;
@@ -337,6 +338,7 @@ out_locked:
mutex_exit(&zv->zv_state_lock);
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
+ g_topology_lock();
return (err);
}
@@ -348,11 +350,12 @@ zvol_geom_close(struct g_provider *pp, int flag, int count)
boolean_t drop_suspend = B_TRUE;
int new_open_count;
- zv = atomic_load_ptr(&pp->private);
+ zv = pp->private;
if (zv == NULL)
return (SET_ERROR(ENXIO));
mutex_enter(&zv->zv_state_lock);
+ g_topology_unlock();
if (zv->zv_flags & ZVOL_EXCL) {
ASSERT3U(zv->zv_open_count, ==, 1);
zv->zv_flags &= ~ZVOL_EXCL;
@@ -413,6 +416,7 @@ zvol_geom_close(struct g_provider *pp, int flag, int count)
if (drop_suspend)
rw_exit(&zv->zv_suspend_lock);
+ g_topology_lock();
return (0);
}
@@ -448,7 +452,7 @@ zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
pp->name, acr, acw, ace));
- if (atomic_load_ptr(&pp->private) == NULL) {
+ if (pp->private == NULL) {
if (acr <= 0 && acw <= 0 && ace <= 0)
return (0);
return (pp->error);
@@ -473,24 +477,16 @@ zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
if (acw != 0)
flags |= FWRITE;
- g_topology_unlock();
if (count > 0)
error = zvol_geom_open(pp, flags, count);
else
error = zvol_geom_close(pp, flags, -count);
- g_topology_lock();
return (error);
}
static void
zvol_geom_bio_start(struct bio *bp)
{
- zvol_state_t *zv = bp->bio_to->private;
-
- if (zv == NULL) {
- g_io_deliver(bp, ENXIO);
- return;
- }
if (bp->bio_cmd == BIO_GETATTR) {
if (zvol_geom_bio_getattr(bp))
g_io_deliver(bp, EOPNOTSUPP);
@@ -507,7 +503,10 @@ zvol_geom_bio_getattr(struct bio *bp)
zvol_state_t *zv;
zv = bp->bio_to->private;
- ASSERT3P(zv, !=, NULL);
+ if (zv == NULL) {
+ g_io_deliver(bp, ENXIO);
+ return (0);
+ }
spa_t *spa = dmu_objset_spa(zv->zv_objset);
uint64_t refd, avail, usedobjs, availobjs;
@@ -920,7 +919,7 @@ retry:
return (SET_ERROR(ENXIO));
mutex_enter(&zv->zv_state_lock);
- if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) {
+ if (zv->zv_flags & ZVOL_REMOVING || zv->zv_zso->zso_dying) {
err = SET_ERROR(ENXIO);
goto out_locked;
}
@@ -1251,24 +1250,32 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
{
int error = 0;
- ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+ ASSERT(RW_WRITE_HELD(&zvol_state_lock));
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
/* Move to a new hashtable entry. */
zv->zv_hash = zvol_name_hash(newname);
hlist_del(&zv->zv_hlink);
hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
+ strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+ dataset_kstats_rename(&zv->zv_kstat, newname);
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
- struct g_provider *pp = zsg->zsg_provider;
+ struct g_provider *pp;
struct g_geom *gp;
+ mutex_exit(&zv->zv_state_lock);
g_topology_lock();
+ pp = zsg->zsg_provider;
+ if (pp->private == NULL) {
+ g_topology_unlock();
+ mutex_enter(&zv->zv_state_lock);
+ return (SET_ERROR(ENXIO));
+ }
gp = pp->geom;
ASSERT3P(gp, !=, NULL);
- zsg->zsg_provider = NULL;
g_wither_provider(pp, ENXIO);
pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
@@ -1278,6 +1285,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
pp->private = zv;
zsg->zsg_provider = pp;
g_error_provider(pp, 0);
+ mutex_enter(&zv->zv_state_lock);
g_topology_unlock();
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
@@ -1310,8 +1318,6 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
zsd->zsd_cdev = dev;
}
}
- strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
- dataset_kstats_rename(&zv->zv_kstat, newname);
return (error);
}
@@ -1400,27 +1406,32 @@ zvol_alloc(const char *name, uint64_t volsize, uint64_t volblocksize,
void
zvol_os_remove_minor(zvol_state_t *zv)
{
+ struct zvol_state_os *zso = zv->zv_zso;
+
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
ASSERT0(zv->zv_open_count);
ASSERT0(atomic_read(&zv->zv_suspend_ref));
ASSERT(zv->zv_flags & ZVOL_REMOVING);
- struct zvol_state_os *zso = zv->zv_zso;
- zv->zv_zso = NULL;
-
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
struct zvol_state_geom *zsg = &zso->zso_geom;
- struct g_provider *pp = zsg->zsg_provider;
- atomic_store_ptr(&pp->private, NULL);
- mutex_exit(&zv->zv_state_lock);
+ struct g_provider *pp;
+ while (zso->zso_opening)
+ cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock);
+ zv->zv_zso = NULL;
+ mutex_exit(&zv->zv_state_lock);
g_topology_lock();
+ pp = zsg->zsg_provider;
+ pp->private = NULL;
g_wither_geom(pp->geom, ENXIO);
g_topology_unlock();
+ g_waitidle(curthread);
} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
struct zvol_state_dev *zsd = &zso->zso_dev;
struct cdev *dev = zsd->zsd_cdev;
+ zv->zv_zso = NULL;
if (dev != NULL)
atomic_store_ptr(&dev->si_drv2, NULL);
mutex_exit(&zv->zv_state_lock);
@@ -1545,6 +1556,7 @@ out_dmu_objset_disown:
g_error_provider(zv->zv_zso->zso_geom.zsg_provider, 0);
/* geom was locked inside zvol_alloc() function */
g_topology_unlock();
+ g_waitidle(curthread);
}
out_doi:
kmem_free(doi, sizeof (dmu_object_info_t));
@@ -1565,10 +1577,10 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
zv->zv_volsize = volsize;
if (zv->zv_volmode == ZFS_VOLMODE_GEOM) {
struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom;
- struct g_provider *pp = zsg->zsg_provider;
+ struct g_provider *pp;
g_topology_lock();
-
+ pp = zsg->zsg_provider;
if (pp->private == NULL) {
g_topology_unlock();
return (SET_ERROR(ENXIO));
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
index b2eae5d00b10..5992957280e4 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@@ -59,6 +59,18 @@ typedef struct zone_dataset {
char zd_dsname[]; /* name of the member dataset */
} zone_dataset_t;
+/*
+ * UID-based dataset zoning: allows delegating datasets to all user
+ * namespaces owned by a specific UID, enabling rootless container support.
+ */
+typedef struct zone_uid_datasets {
+ struct list_head zuds_list; /* zone_uid_datasets linkage */
+ kuid_t zuds_owner; /* owner UID */
+ struct list_head zuds_datasets; /* datasets for this UID */
+} zone_uid_datasets_t;
+
+static struct list_head zone_uid_datasets;
+
#ifdef CONFIG_USER_NS
/*
@@ -138,6 +150,18 @@ zone_datasets_lookup(unsigned int nsinum)
}
#ifdef CONFIG_USER_NS
+static zone_uid_datasets_t *
+zone_uid_datasets_lookup(kuid_t owner)
+{
+ zone_uid_datasets_t *zuds;
+
+ list_for_each_entry(zuds, &zone_uid_datasets, zuds_list) {
+ if (uid_eq(zuds->zuds_owner, owner))
+ return (zuds);
+ }
+ return (NULL);
+}
+
static struct zone_dataset *
zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
{
@@ -232,6 +256,62 @@ zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
EXPORT_SYMBOL(zone_dataset_attach);
int
+zone_dataset_attach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
+{
+#ifdef CONFIG_USER_NS
+ zone_uid_datasets_t *zuds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+ kuid_t kowner;
+
+ /* Only root can attach datasets to UIDs */
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+
+ kowner = make_kuid(current_user_ns(), owner_uid);
+ if (!uid_valid(kowner))
+ return (EINVAL);
+
+ mutex_enter(&zone_datasets_lock);
+
+ /* Find or create UID entry */
+ zuds = zone_uid_datasets_lookup(kowner);
+ if (zuds == NULL) {
+ zuds = kmem_alloc(sizeof (zone_uid_datasets_t), KM_SLEEP);
+ INIT_LIST_HEAD(&zuds->zuds_list);
+ INIT_LIST_HEAD(&zuds->zuds_datasets);
+ zuds->zuds_owner = kowner;
+ list_add_tail(&zuds->zuds_list, &zone_uid_datasets);
+ } else {
+ /* Check if dataset already attached */
+ list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
+ if (zd->zd_dsnamelen == dsnamelen &&
+ strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
+ mutex_exit(&zone_datasets_lock);
+ return (EEXIST);
+ }
+ }
+ }
+
+ /* Add dataset to UID's list */
+ zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+ zd->zd_dsnamelen = dsnamelen;
+ strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
+ INIT_LIST_HEAD(&zd->zd_list);
+ list_add_tail(&zd->zd_list, &zuds->zuds_datasets);
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+#else
+ return (ENXIO);
+#endif /* CONFIG_USER_NS */
+}
+EXPORT_SYMBOL(zone_dataset_attach_uid);
+
+int
zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
{
#ifdef CONFIG_USER_NS
@@ -280,6 +360,217 @@ zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
}
EXPORT_SYMBOL(zone_dataset_detach);
+int
+zone_dataset_detach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
+{
+#ifdef CONFIG_USER_NS
+ zone_uid_datasets_t *zuds;
+ zone_dataset_t *zd;
+ int error;
+ size_t dsnamelen;
+ kuid_t kowner;
+
+ if ((error = zone_dataset_cred_check(cred)) != 0)
+ return (error);
+ if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+ return (error);
+
+ kowner = make_kuid(current_user_ns(), owner_uid);
+ if (!uid_valid(kowner))
+ return (EINVAL);
+
+ mutex_enter(&zone_datasets_lock);
+
+ zuds = zone_uid_datasets_lookup(kowner);
+ if (zuds == NULL) {
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+ }
+
+ /* Find and remove dataset */
+ list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
+ if (zd->zd_dsnamelen == dsnamelen &&
+ strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+ /* Remove UID entry if no more datasets */
+ if (list_empty(&zuds->zuds_datasets)) {
+ list_del(&zuds->zuds_list);
+ kmem_free(zuds, sizeof (*zuds));
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+ }
+ }
+
+ mutex_exit(&zone_datasets_lock);
+ return (ENOENT);
+#else
+ return (ENXIO);
+#endif /* CONFIG_USER_NS */
+}
+EXPORT_SYMBOL(zone_dataset_detach_uid);
+
+/*
+ * Callback for looking up zoned_uid property (registered by ZFS module).
+ */
+static zone_get_zoned_uid_fn_t zone_get_zoned_uid_fn = NULL;
+
+void
+zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
+{
+ zone_get_zoned_uid_fn = fn;
+}
+EXPORT_SYMBOL(zone_register_zoned_uid_callback);
+
+void
+zone_unregister_zoned_uid_callback(void)
+{
+ zone_get_zoned_uid_fn = NULL;
+}
+EXPORT_SYMBOL(zone_unregister_zoned_uid_callback);
+
+#ifdef CONFIG_USER_NS
+/*
+ * Check if a dataset is the delegation root (has zoned_uid set locally).
+ */
+static boolean_t
+zone_dataset_is_zoned_uid_root(const char *dataset, uid_t zoned_uid)
+{
+ char *root;
+ uid_t found_uid;
+ boolean_t is_root;
+
+ if (zone_get_zoned_uid_fn == NULL)
+ return (B_FALSE);
+
+ root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ found_uid = zone_get_zoned_uid_fn(dataset, root, MAXPATHLEN);
+ is_root = (found_uid == zoned_uid && strcmp(root, dataset) == 0);
+ kmem_free(root, MAXPATHLEN);
+ return (is_root);
+}
+#endif /* CONFIG_USER_NS */
+
+/*
+ * Core authorization check for zoned_uid write delegation.
+ */
+zone_admin_result_t
+zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
+ const char *aux_dataset)
+{
+#ifdef CONFIG_USER_NS
+ struct user_namespace *user_ns;
+ char *delegation_root;
+ uid_t zoned_uid, ns_owner_uid;
+ int write_unused;
+ zone_admin_result_t result = ZONE_ADMIN_NOT_APPLICABLE;
+
+ /* Step 1: If in global zone, not applicable */
+ if (INGLOBALZONE(curproc))
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+
+ /* Step 2: Need callback to be registered */
+ if (zone_get_zoned_uid_fn == NULL)
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+
+ delegation_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /* Step 3: Find delegation root */
+ zoned_uid = zone_get_zoned_uid_fn(dataset, delegation_root,
+ MAXPATHLEN);
+ if (zoned_uid == 0)
+ goto out;
+
+ /* Step 4: Verify namespace owner matches */
+ user_ns = current_user_ns();
+ ns_owner_uid = from_kuid(&init_user_ns, user_ns->owner);
+ if (ns_owner_uid != zoned_uid)
+ goto out;
+
+ /* Step 5: Tiered capability check based on operation class */
+ {
+ int required_cap;
+ switch (op) {
+ case ZONE_OP_DESTROY:
+ case ZONE_OP_RENAME:
+ case ZONE_OP_CLONE:
+ required_cap = CAP_SYS_ADMIN;
+ break;
+ case ZONE_OP_CREATE:
+ case ZONE_OP_SNAPSHOT:
+ case ZONE_OP_SETPROP:
+ required_cap = CAP_FOWNER;
+ break;
+ default:
+ required_cap = CAP_SYS_ADMIN;
+ break;
+ }
+ if (!ns_capable(user_ns, required_cap)) {
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ }
+
+ /* Step 6: Operation-specific constraints */
+ switch (op) {
+ case ZONE_OP_DESTROY:
+ /* Cannot destroy the delegation root itself */
+ if (zone_dataset_is_zoned_uid_root(dataset, zoned_uid)) {
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ break;
+
+ case ZONE_OP_RENAME:
+ /* Cannot rename outside delegation subtree */
+ if (aux_dataset != NULL) {
+ char *dst_root;
+ uid_t dst_uid;
+
+ dst_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ dst_uid = zone_get_zoned_uid_fn(aux_dataset,
+ dst_root, MAXPATHLEN);
+ if (dst_uid != zoned_uid ||
+ strcmp(dst_root, delegation_root) != 0) {
+ kmem_free(dst_root, MAXPATHLEN);
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ kmem_free(dst_root, MAXPATHLEN);
+ }
+ break;
+
+ case ZONE_OP_CLONE:
+ /* Clone source must be visible */
+ if (aux_dataset != NULL) {
+ if (!zone_dataset_visible(aux_dataset, &write_unused)) {
+ result = ZONE_ADMIN_DENIED;
+ goto out;
+ }
+ }
+ break;
+
+ case ZONE_OP_CREATE:
+ case ZONE_OP_SNAPSHOT:
+ case ZONE_OP_SETPROP:
+ /* No additional constraints */
+ break;
+ }
+
+ result = ZONE_ADMIN_ALLOWED;
+out:
+ kmem_free(delegation_root, MAXPATHLEN);
+ return (result);
+#else
+ (void) dataset, (void) op, (void) aux_dataset;
+ return (ZONE_ADMIN_NOT_APPLICABLE);
+#endif
+}
+EXPORT_SYMBOL(zone_dataset_admin_check);
+
/*
* A dataset is visible if:
* - It is a parent of a namespace entry.
@@ -293,34 +584,19 @@ EXPORT_SYMBOL(zone_dataset_detach);
* The parent datasets of namespace entries are visible and
* read-only to provide a path back to the root of the pool.
*/
-int
-zone_dataset_visible(const char *dataset, int *write)
+/*
+ * Helper function to check if a dataset matches against a list of
+ * delegated datasets. Returns visibility and sets write permission.
+ */
+static int
+zone_dataset_check_list(struct list_head *datasets, const char *dataset,
+ size_t dsnamelen, int *write)
{
- zone_datasets_t *zds;
zone_dataset_t *zd;
- size_t dsnamelen, zd_len;
- int visible;
+ size_t zd_len;
+ int visible = 0;
- /* Default to read-only, in case visible is returned. */
- if (write != NULL)
- *write = 0;
- if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
- return (0);
- if (INGLOBALZONE(curproc)) {
- if (write != NULL)
- *write = 1;
- return (1);
- }
-
- mutex_enter(&zone_datasets_lock);
- zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
- if (zds == NULL) {
- mutex_exit(&zone_datasets_lock);
- return (0);
- }
-
- visible = 0;
- list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+ list_for_each_entry(zd, datasets, zd_list) {
zd_len = strlen(zd->zd_dsname);
if (zd_len > dsnamelen) {
/*
@@ -352,7 +628,8 @@ zone_dataset_visible(const char *dataset, int *write)
* the namespace entry.
*/
visible = memcmp(zd->zd_dsname, dataset,
- zd_len) == 0 && dataset[zd_len] == '/';
+ zd_len) == 0 && (dataset[zd_len] == '/' ||
+ dataset[zd_len] == '@' || dataset[zd_len] == '#');
if (visible) {
if (write != NULL)
*write = 1;
@@ -361,9 +638,70 @@ zone_dataset_visible(const char *dataset, int *write)
}
}
- mutex_exit(&zone_datasets_lock);
return (visible);
}
+
+#if defined(CONFIG_USER_NS)
+/*
+ * Check UID-based zoning visibility for the current process.
+ * Must be called with zone_datasets_lock held.
+ */
+static int
+zone_dataset_visible_uid(const char *dataset, size_t dsnamelen, int *write)
+{
+ zone_uid_datasets_t *zuds;
+
+ zuds = zone_uid_datasets_lookup(curproc->cred->user_ns->owner);
+ if (zuds != NULL)
+ return (zone_dataset_check_list(&zuds->zuds_datasets, dataset,
+ dsnamelen, write));
+ return (0);
+}
+#endif
+
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_datasets_t *zds;
+ size_t dsnamelen;
+ int visible;
+
+ /* Default to read-only, in case visible is returned. */
+ if (write != NULL)
+ *write = 0;
+ if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+ return (0);
+ if (INGLOBALZONE(curproc)) {
+ if (write != NULL)
+ *write = 1;
+ return (1);
+ }
+
+ mutex_enter(&zone_datasets_lock);
+
+ /* First, check namespace-specific zoning (existing behavior) */
+ zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+ if (zds != NULL) {
+ visible = zone_dataset_check_list(&zds->zds_datasets, dataset,
+ dsnamelen, write);
+ if (visible) {
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+ }
+ }
+
+ /* Second, check UID-based zoning */
+#if defined(CONFIG_USER_NS)
+ visible = zone_dataset_visible_uid(dataset, dsnamelen, write);
+ if (visible) {
+ mutex_exit(&zone_datasets_lock);
+ return (visible);
+ }
+#endif
+
+ mutex_exit(&zone_datasets_lock);
+ return (0);
+}
EXPORT_SYMBOL(zone_dataset_visible);
unsigned int
@@ -395,8 +733,9 @@ EXPORT_SYMBOL(crgetzoneid);
boolean_t
inglobalzone(proc_t *proc)
{
+ (void) proc;
#if defined(CONFIG_USER_NS)
- return (proc->cred->user_ns == &init_user_ns);
+ return (current_user_ns() == &init_user_ns);
#else
return (B_TRUE);
#endif
@@ -408,6 +747,7 @@ spl_zone_init(void)
{
mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
INIT_LIST_HEAD(&zone_datasets);
+ INIT_LIST_HEAD(&zone_uid_datasets);
return (0);
}
@@ -415,6 +755,7 @@ void
spl_zone_fini(void)
{
zone_datasets_t *zds;
+ zone_uid_datasets_t *zuds;
zone_dataset_t *zd;
/*
@@ -423,6 +764,22 @@ spl_zone_fini(void)
* namespace is destroyed, just do it here, since spl is about to go
* out of context.
*/
+
+ /* Clean up UID-based delegations */
+ while (!list_empty(&zone_uid_datasets)) {
+ zuds = list_entry(zone_uid_datasets.next,
+ zone_uid_datasets_t, zuds_list);
+ while (!list_empty(&zuds->zuds_datasets)) {
+ zd = list_entry(zuds->zuds_datasets.next,
+ zone_dataset_t, zd_list);
+ list_del(&zd->zd_list);
+ kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+ }
+ list_del(&zuds->zuds_list);
+ kmem_free(zuds, sizeof (*zuds));
+ }
+
+ /* Clean up namespace-based delegations */
while (!list_empty(&zone_datasets)) {
zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
while (!list_empty(&zds->zds_datasets)) {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
index d6323fd56a8f..91010bdf642a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/spa_misc_os.c
@@ -39,8 +39,10 @@
#include <sys/dsl_prop.h>
#include <sys/fm/util.h>
#include <sys/dsl_scan.h>
+#include <sys/dmu.h>
#include <sys/fs/zfs.h>
#include <sys/kstat.h>
+#include <sys/zone.h>
#include "zfs_prop.h"
@@ -122,16 +124,60 @@ spa_history_zone(void)
return ("linux");
}
+static int
+spa_restore_zoned_uid_cb(const char *dsname, void *arg)
+{
+ (void) arg;
+ uint64_t zoned_uid = 0;
+
+ if (dsl_prop_get(dsname, "zoned_uid", 8, 1, &zoned_uid, NULL) != 0)
+ return (0);
+
+ if (zoned_uid != 0) {
+ int err = zone_dataset_attach_uid(kcred, dsname,
+ (uid_t)zoned_uid);
+ if (err != 0 && err != EEXIST) {
+ cmn_err(CE_WARN, "failed to restore zoned_uid for "
+ "'%s' (uid %llu): %d", dsname,
+ (unsigned long long)zoned_uid, err);
+ }
+ }
+ return (0);
+}
+
void
spa_import_os(spa_t *spa)
{
- (void) spa;
+ (void) dmu_objset_find(spa_name(spa),
+ spa_restore_zoned_uid_cb, NULL, DS_FIND_CHILDREN);
+}
+
+static int
+spa_cleanup_zoned_uid_cb(const char *dsname, void *arg)
+{
+ (void) arg;
+ uint64_t zoned_uid = 0;
+
+ if (dsl_prop_get(dsname, "zoned_uid", 8, 1, &zoned_uid, NULL) != 0)
+ return (0);
+
+ if (zoned_uid != 0) {
+ int err = zone_dataset_detach_uid(kcred, dsname,
+ (uid_t)zoned_uid);
+ if (err != 0 && err != ENOENT) {
+ cmn_err(CE_WARN, "failed to detach zoned_uid for "
+ "'%s' (uid %llu): %d", dsname,
+ (unsigned long long)zoned_uid, err);
+ }
+ }
+ return (0);
}
void
spa_export_os(spa_t *spa)
{
- (void) spa;
+ (void) dmu_objset_find(spa_name(spa),
+ spa_cleanup_zoned_uid_cb, NULL, DS_FIND_CHILDREN);
}
void
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
index b1dc50b2d47d..c73ef86df4dc 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@@ -1187,8 +1187,10 @@ zfsctl_snapshot_mount(struct path *path, int flags)
error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
ZFS_MAX_DATASET_NAME_LEN, full_name);
- if (error)
+ if (error) {
+ zfs_exit(zfsvfs, FTAG);
goto error;
+ }
if (is_current_chrooted() == 0) {
/*
@@ -1206,6 +1208,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
error = get_root_path(&mnt_path, m, MAXPATHLEN);
if (error != 0) {
kmem_free(m, MAXPATHLEN);
+ zfs_exit(zfsvfs, FTAG);
goto error;
}
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
@@ -1239,6 +1242,33 @@ zfsctl_snapshot_mount(struct path *path, int flags)
zfs_snapshot_no_setuid ? "nosuid" : "suid");
/*
+ * Release z_teardown_lock before potentially blocking operations
+ * (cv_wait for concurrent mounts, call_usermodehelper for the mount
+ * helper). Holding z_teardown_lock(R) across call_usermodehelper
+ * deadlocks with namespace_sem: the mount helper needs
+ * namespace_sem(W) via move_mount, while /proc/self/mountinfo
+ * readers hold namespace_sem(R) and need z_teardown_lock(R) via
+ * zpl_show_devname. A concurrent zfs_suspend_fs queuing
+ * z_teardown_lock(W) blocks new readers, completing the cycle.
+ * See https://github.com/openzfs/zfs/issues/18409
+ *
+ * Releasing the lock allows zfs_suspend_fs to proceed during
+ * the mount, so dmu_objset_hold in zpl_get_tree can transiently
+ * fail with ENOENT during the clone swap. The mount helper
+ * fails, this function returns EISDIR, and the VFS silently
+ * falls back to the ctldir stub (empty directory). The caller
+ * gets the stub inode instead of the real snapshot root until
+ * the next access retries the automount.
+ *
+ * Safe because everything below operates on local string copies
+ * (full_name, full_path) or uses its own synchronization
+ * (zfs_snapshot_lock, se_mtx). The parent zfsvfs pointer
+ * remains valid because we hold a path reference to the
+ * automount trigger dentry.
+ */
+ zfs_exit(zfsvfs, FTAG);
+
+ /*
* Check if snapshot is already being mounted. If found, wait for
* pending mount to complete before returning success.
*/
@@ -1352,8 +1382,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
error:
kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(full_path, MAXPATHLEN);
-
- zfs_exit(zfsvfs, FTAG);
+ kmem_free(options, 7);
return (error);
}
@@ -1365,17 +1394,31 @@ int
zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
struct inode **ipp)
{
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
int error;
struct path path;
char *mnt;
struct dentry *dentry;
+ zfs_snapentry_t *se;
mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
- MAXPATHLEN, mnt);
- if (error)
- goto out;
+ /*
+ * Try the in-memory AVL tree first for previously mounted
+ * snapshots, falling back to the on-disk scan if not found.
+ */
+ rw_enter(&zfs_snapshot_lock, RW_READER);
+ se = zfsctl_snapshot_find_by_objsetid(zfsvfs->z_os->os_spa, objsetid);
+ rw_exit(&zfs_snapshot_lock);
+ if (se != NULL) {
+ strlcpy(mnt, se->se_path, MAXPATHLEN);
+ zfsctl_snapshot_rele(se);
+ } else {
+ error = zfsctl_snapshot_path_objset(zfsvfs, objsetid,
+ MAXPATHLEN, mnt);
+ if (error)
+ goto out;
+ }
/* Trigger automount */
error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
index 5421a441b323..ce6092be1da7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@@ -170,6 +170,8 @@ zfs_ioc_userns_attach(zfs_cmd_t *zc)
*/
if (error == ENOTTY)
error = ZFS_ERR_NOT_USER_NAMESPACE;
+ if (error == ENXIO)
+ error = ZFS_ERR_NO_USER_NS_SUPPORT;
return (error);
}
@@ -190,6 +192,8 @@ zfs_ioc_userns_detach(zfs_cmd_t *zc)
*/
if (error == ENOTTY)
error = ZFS_ERR_NOT_USER_NAMESPACE;
+ if (error == ENXIO)
+ error = ZFS_ERR_NO_USER_NS_SUPPORT;
return (error);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index 8a7d14ab6119..9c0d92551843 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2026, TrueNAS.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -64,53 +65,15 @@
#include <linux/fs.h>
#include "zfs_comutil.h"
-enum {
- TOKEN_RO,
- TOKEN_RW,
- TOKEN_SETUID,
- TOKEN_NOSETUID,
- TOKEN_EXEC,
- TOKEN_NOEXEC,
- TOKEN_DEVICES,
- TOKEN_NODEVICES,
- TOKEN_DIRXATTR,
- TOKEN_SAXATTR,
- TOKEN_XATTR,
- TOKEN_NOXATTR,
- TOKEN_ATIME,
- TOKEN_NOATIME,
- TOKEN_RELATIME,
- TOKEN_NORELATIME,
- TOKEN_NBMAND,
- TOKEN_NONBMAND,
- TOKEN_MNTPOINT,
- TOKEN_LAST,
-};
-
-static const match_table_t zpl_tokens = {
- { TOKEN_RO, MNTOPT_RO },
- { TOKEN_RW, MNTOPT_RW },
- { TOKEN_SETUID, MNTOPT_SETUID },
- { TOKEN_NOSETUID, MNTOPT_NOSETUID },
- { TOKEN_EXEC, MNTOPT_EXEC },
- { TOKEN_NOEXEC, MNTOPT_NOEXEC },
- { TOKEN_DEVICES, MNTOPT_DEVICES },
- { TOKEN_NODEVICES, MNTOPT_NODEVICES },
- { TOKEN_DIRXATTR, MNTOPT_DIRXATTR },
- { TOKEN_SAXATTR, MNTOPT_SAXATTR },
- { TOKEN_XATTR, MNTOPT_XATTR },
- { TOKEN_NOXATTR, MNTOPT_NOXATTR },
- { TOKEN_ATIME, MNTOPT_ATIME },
- { TOKEN_NOATIME, MNTOPT_NOATIME },
- { TOKEN_RELATIME, MNTOPT_RELATIME },
- { TOKEN_NORELATIME, MNTOPT_NORELATIME },
- { TOKEN_NBMAND, MNTOPT_NBMAND },
- { TOKEN_NONBMAND, MNTOPT_NONBMAND },
- { TOKEN_MNTPOINT, MNTOPT_MNTPOINT "=%s" },
- { TOKEN_LAST, NULL },
-};
+vfs_t *
+zfsvfs_vfs_alloc(void)
+{
+ vfs_t *vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
+ mutex_init(&vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
+ return (vfsp);
+}
-static void
+void
zfsvfs_vfs_free(vfs_t *vfsp)
{
if (vfsp != NULL) {
@@ -121,139 +84,6 @@ zfsvfs_vfs_free(vfs_t *vfsp)
}
}
-static int
-zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
-{
- switch (token) {
- case TOKEN_RO:
- vfsp->vfs_readonly = B_TRUE;
- vfsp->vfs_do_readonly = B_TRUE;
- break;
- case TOKEN_RW:
- vfsp->vfs_readonly = B_FALSE;
- vfsp->vfs_do_readonly = B_TRUE;
- break;
- case TOKEN_SETUID:
- vfsp->vfs_setuid = B_TRUE;
- vfsp->vfs_do_setuid = B_TRUE;
- break;
- case TOKEN_NOSETUID:
- vfsp->vfs_setuid = B_FALSE;
- vfsp->vfs_do_setuid = B_TRUE;
- break;
- case TOKEN_EXEC:
- vfsp->vfs_exec = B_TRUE;
- vfsp->vfs_do_exec = B_TRUE;
- break;
- case TOKEN_NOEXEC:
- vfsp->vfs_exec = B_FALSE;
- vfsp->vfs_do_exec = B_TRUE;
- break;
- case TOKEN_DEVICES:
- vfsp->vfs_devices = B_TRUE;
- vfsp->vfs_do_devices = B_TRUE;
- break;
- case TOKEN_NODEVICES:
- vfsp->vfs_devices = B_FALSE;
- vfsp->vfs_do_devices = B_TRUE;
- break;
- case TOKEN_DIRXATTR:
- vfsp->vfs_xattr = ZFS_XATTR_DIR;
- vfsp->vfs_do_xattr = B_TRUE;
- break;
- case TOKEN_SAXATTR:
- vfsp->vfs_xattr = ZFS_XATTR_SA;
- vfsp->vfs_do_xattr = B_TRUE;
- break;
- case TOKEN_XATTR:
- vfsp->vfs_xattr = ZFS_XATTR_SA;
- vfsp->vfs_do_xattr = B_TRUE;
- break;
- case TOKEN_NOXATTR:
- vfsp->vfs_xattr = ZFS_XATTR_OFF;
- vfsp->vfs_do_xattr = B_TRUE;
- break;
- case TOKEN_ATIME:
- vfsp->vfs_atime = B_TRUE;
- vfsp->vfs_do_atime = B_TRUE;
- break;
- case TOKEN_NOATIME:
- vfsp->vfs_atime = B_FALSE;
- vfsp->vfs_do_atime = B_TRUE;
- break;
- case TOKEN_RELATIME:
- vfsp->vfs_relatime = B_TRUE;
- vfsp->vfs_do_relatime = B_TRUE;
- break;
- case TOKEN_NORELATIME:
- vfsp->vfs_relatime = B_FALSE;
- vfsp->vfs_do_relatime = B_TRUE;
- break;
- case TOKEN_NBMAND:
- vfsp->vfs_nbmand = B_TRUE;
- vfsp->vfs_do_nbmand = B_TRUE;
- break;
- case TOKEN_NONBMAND:
- vfsp->vfs_nbmand = B_FALSE;
- vfsp->vfs_do_nbmand = B_TRUE;
- break;
- case TOKEN_MNTPOINT:
- if (vfsp->vfs_mntpoint != NULL)
- kmem_strfree(vfsp->vfs_mntpoint);
- vfsp->vfs_mntpoint = match_strdup(&args[0]);
- if (vfsp->vfs_mntpoint == NULL)
- return (SET_ERROR(ENOMEM));
- break;
- default:
- break;
- }
-
- return (0);
-}
-
-/*
- * Parse the raw mntopts and return a vfs_t describing the options.
- */
-static int
-zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
-{
- vfs_t *tmp_vfsp;
- int error;
-
- tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
- mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
-
- if (mntopts != NULL) {
- substring_t args[MAX_OPT_ARGS];
- char *tmp_mntopts, *p, *t;
- int token;
-
- tmp_mntopts = t = kmem_strdup(mntopts);
- if (tmp_mntopts == NULL)
- return (SET_ERROR(ENOMEM));
-
- while ((p = strsep(&t, ",")) != NULL) {
- if (!*p)
- continue;
-
- args[0].to = args[0].from = NULL;
- token = match_token(p, zpl_tokens, args);
- error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
- if (error) {
- kmem_strfree(tmp_mntopts);
- zfsvfs_vfs_free(tmp_vfsp);
- return (error);
- }
- }
-
- kmem_strfree(tmp_mntopts);
- }
-
- *vfsp = tmp_vfsp;
-
- return (0);
-}
-
boolean_t
zfs_is_readonly(zfsvfs_t *zfsvfs)
{
@@ -1486,20 +1316,16 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
static atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
int
-zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
+zfs_domount(struct super_block *sb, const char *osname,
+ vfs_t *vfs, int silent)
{
- const char *osname = zm->mnt_osname;
struct inode *root_inode = NULL;
uint64_t recordsize;
int error = 0;
zfsvfs_t *zfsvfs = NULL;
- vfs_t *vfs = NULL;
int canwrite;
int dataset_visible_zone;
- ASSERT(zm);
- ASSERT(osname);
-
dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
/*
@@ -1511,10 +1337,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
return (SET_ERROR(EPERM));
}
- error = zfsvfs_parse_options(zm->mnt_data, &vfs);
- if (error)
- return (error);
-
/*
* If a non-writable filesystem is being mounted without the
* read-only flag, pretend it was set, as done for snapshots.
@@ -1523,16 +1345,12 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
vfs->vfs_readonly = B_TRUE;
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
- if (error) {
- zfsvfs_vfs_free(vfs);
+ if (error)
goto out;
- }
if ((error = dsl_prop_get_integer(osname, "recordsize",
- &recordsize, NULL))) {
- zfsvfs_vfs_free(vfs);
+ &recordsize, NULL)))
goto out;
- }
vfs->vfs_data = zfsvfs;
zfsvfs->z_vfs = vfs;
@@ -1614,6 +1432,13 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
out:
if (error) {
if (zfsvfs != NULL) {
+ /*
+ * We're returning error, so the caller still owns
+ * the mount options vfs_t. Remove them from zfsvfs
+ * so we don't try to free them.
+ */
+ zfsvfs->z_vfs = NULL;
+
dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
zfsvfs_free(zfsvfs);
}
@@ -1704,24 +1529,16 @@ zfs_umount(struct super_block *sb)
}
int
-zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
+zfs_remount(struct super_block *sb, vfs_t *vfsp, int flags)
{
zfsvfs_t *zfsvfs = sb->s_fs_info;
- vfs_t *vfsp;
boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
- int error;
if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
- !(*flags & SB_RDONLY)) {
- *flags |= SB_RDONLY;
+ !(flags & SB_RDONLY))
return (EROFS);
- }
- error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
- if (error)
- return (error);
-
- if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))
+ if (!zfs_is_readonly(zfsvfs) && (flags & SB_RDONLY))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
zfs_unregister_callbacks(zfsvfs);
@@ -1732,7 +1549,7 @@ zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
if (!issnap)
(void) zfs_register_callbacks(vfsp);
- return (error);
+ return (0);
}
int
@@ -1963,15 +1780,6 @@ bail:
/* release the VFS ops */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
-
- if (err != 0) {
- /*
- * Since we couldn't setup the sa framework, try to force
- * unmount this file system.
- */
- if (zfsvfs->z_os)
- (void) zfs_umount(zfsvfs->z_sb);
- }
return (err);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
index efcb400f196e..ffe227796f0a 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@@ -779,34 +779,23 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
return (error);
- switch (advice) {
- case POSIX_FADV_SEQUENTIAL:
- case POSIX_FADV_WILLNEED:
+ if (advice == POSIX_FADV_WILLNEED) {
+ loff_t rlen = len ? len : i_size_read(ip) - offset;
+ dmu_prefetch(os, zp->z_id, 0, offset, rlen,
+ ZIO_PRIORITY_ASYNC_READ);
+ if (!zn_has_cached_data(zp, offset, offset + rlen - 1)) {
+ zfs_exit(zfsvfs, FTAG);
+ return (error);
+ }
+ }
+
#ifdef HAVE_GENERIC_FADVISE
- if (zn_has_cached_data(zp, offset, offset + len - 1))
- error = generic_fadvise(filp, offset, len, advice);
+ error = generic_fadvise(filp, offset, len, advice);
#endif
- /*
- * Pass on the caller's size directly, but note that
- * dmu_prefetch_max will effectively cap it. If there
- * really is a larger sequential access pattern, perhaps
- * dmu_zfetch will detect it.
- */
- if (len == 0)
- len = i_size_read(ip) - offset;
- dmu_prefetch(os, zp->z_id, 0, offset, len,
- ZIO_PRIORITY_ASYNC_READ);
- break;
- case POSIX_FADV_NORMAL:
- case POSIX_FADV_RANDOM:
- case POSIX_FADV_DONTNEED:
- case POSIX_FADV_NOREUSE:
- /* ignored for now */
- break;
- default:
- error = -EINVAL;
- break;
+ if (error == 0 && advice == POSIX_FADV_DONTNEED) {
+ loff_t rlen = len ? len : i_size_read(ip) - offset;
+ dmu_evict_range(os, zp->z_id, offset, rlen);
}
zfs_exit(zfsvfs, FTAG);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index a970959531a3..2cd0f17c860f 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -37,6 +37,7 @@
#include <linux/version.h>
#include <linux/vfs_compat.h>
#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
/*
* What to do when the last reference to an inode is released. If 0, the kernel
@@ -390,16 +391,430 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
#endif
}
+/*
+ * Mount option parsing.
+ *
+ * The kernel receives a set of "stringy" mount options, typically a
+ * comma-separated list through mount(2) or fsconfig(2). These are split into a
+ * set of struct fs_parameter, and then vfs_parse_fs_param() is called for
+ * each. That function will handle (and consume) some options directly, and
+ * other subsystems (mainly security modules) are given the opportunity to
+ * consume them too. Any left over are passed to zpl_parse_param(). Our job is
+ * to use them to fill in the vfs_t we've attached previously to
+ * fc->fs_private, ready for the mount or remount call when it comes.
+ *
+ * Historically, mount options have been generated, removed, modified and
+ * otherwise complicated by multiple different actors over a long time: the
+ * kernel itself, the original mount(8) utility and later libmount,
+ * mount.zfs(8), libzfs and the ZFS tools that use it, and any program using
+ * the various mount APIs that have come and gone over the years. This is
+ * further complicated by cross-pollination between OpenSolaris/illumos, Linux
+ * and FreeBSD. Long story short: we could see all sorts of things, and we need
+ * to at least try not to break old userspace programs.
+ *
+ * At time of writing, this is my best understanding of all the options we
+ * might reasonably see, and where and how they're handled.
+ *
+ *
+ * These are common options for all filesystems that are processed by the
+ * kernel directly, without zpl_parse_param() being called. They're a bit of a
+ * mixed bag, but are ultimately all available to us via either sb->s_flags or
+ * fc->sb_flags:
+ *
+ * dirsync: set SB_DIRSYNC
+ * lazytime: set SB_LAZYTIME
+ * mand: set SB_MANDLOCK
+ * ro: set SB_RDONLY
+ * sync: set SB_SYNCHRONOUS
+ *
+ * async: clear SB_SYNCHRONOUS
+ * nolazytime: clear SB_LAZYTIME
+ * nomand: clear SB_MANDLOCK
+ * rw: clear SB_RDONLY
+ *
+ * Fortunately, almost all of these are handled directly by the kernel. 'mand'
+ * and 'nomand' are swallowed by the kernel ('mand' emits a warning in the
+ * kernel log), but it and the corresponding dataset property have been a no-op
+ * in OpenZFS for years, so there's nothing for us to do there.
+ *
+ * The only tricky one is SB_RDONLY ('ro'/'rw'), which can be both a mount and
+ * a superblock option. While we won't receive the "stringy" options, the
+ * kernel will set it for us in fc->sb_flags, and we've always had special
+ * handling for it at mount and remount time (eg handling snapshot mounts), so
+ * it's not a problem to do nothing here because we will sort it out later.
+ *
+ *
+ * These are options that we may receive as "stringy" options but also as mount
+ * flags.
+ *
+ * exec: clear MS_NOEXEC
+ * noexec: set MS_NOEXEC
+ * suid: clear MS_NOSUID
+ * nosuid: set MS_NOSUID
+ * dev: clear MS_NODEV
+ * nodev: set MS_NODEV
+ * atime: clear MS_NOATIME
+ * noatime: set MS_NOATIME
+ * relatime: set MS_RELATIME
+ * norelatime: clear MS_RELATIME
+ *
+ * In testing, it appears that recent libmount will convert them, but our own
+ * mount code (libzfs_mount) may not. We will be called for the stringy
+ * versions, but not for the flags. The flags will later be available on
+ * vfsmount->mnt_flags, not set on the vfs_t. This tends not to matter in
+ * practice, as almost all mounts come through libzfs (via zfs-mount(8) or
+ * mount.zfs(8)) and so as strings, and when they do come through flags, they
+ * will still be reported correctly via mountinfo and by zfs-get(8), which has
+ * special handling for "temporary" properties. Also, we never use these
+ * internally for any decisions; 'exec', 'suid' and 'dev' are handled in the
+ * kernel, and the kernel provides helpers for 'atime' and 'relatime'. The
+ * only place the difference is observable is through zfs_get_temporary_prop(),
+ * which is only used by the zfs.get_prop() Lua call.
+ *
+ * This is fixable by getting at vfsmount->mnt_flags, but this is not readily
+ * available until after the mount operation is completed, and with some
+ * effort. This is all very low impact, so it's left for future improvement.
+ *
+ *
+ * These are true OpenZFS-specific mount options. They give the equivalent
+ * of temporarily setting the pool properties as follows:
+ *
+ * strictatime atime=on, relatime=off
+ *
+ * xattr: xattr=sa
+ * saxattr: xattr=sa
+ * dirxattr: xattr=dir
+ * noxattr: xattr=off
+ *
+ *
+ * mntpoint= provides the canonical mount point for a snapshot mount. This
+ * is an assist for the snapshot automounter call out to userspace, to
+ * understand where the snapshot is mounted even when triggered from an
+ * alternate mount namespace (eg inside a chroot).
+ *
+ * mntpoint= vfs->vfs_mntpoint=...
+ *
+ *
+ * These are used for coordination inside libzfs, and should not make it
+ * to the kernel, but it does not strip them, so we handle them and ignore
+ * them.
+ *
+ * defaults
+ * zfsutil
+ * remount
+ *
+ *
+ * These are specific to SELinux. When that security module is running, it
+ * will consume them, but if not, they will be passed through to us. libzfs
+ * adds them unconditionally, so we will always see them when SELinux is not
+ * running, and ignore them.
+ *
+ * fscontext
+ * defcontext
+ * rootcontext
+ * context
+ *
+ *
+ * When preparing a remount, libmount will read /proc/self/mountinfo and add
+ * any unrecognised flags it finds there to the options. So, we have to accept
+ * anything that __zpl_show_options() can produce.
+ *
+ * posixacl
+ * noacl
+ * casesensitive
+ * caseinsensitive
+ * casemixed
+ *
+ *
+ * mount(8) has a notion of "sloppy" options. According to the documentation,
+ * when the -s switch is provided, unrecognised mount options will be ignored.
+ * Only the Linux NFS and SMB filesystems support it, and traditionally
+ * OpenZFS has too. however, it appears massively underspecified and
+ * inconsistent. Depending on the interplay between mount(8), the mount helper
+ * (eg mount.zfs(8)) and libmount, -s may cause unknown options to be filtered
+ * in userspace, _or_ an additional option 'sloppy' to be passed to the kernel
+ * either before or after the "unknown" option, _or_ nothing at all happens
+ * and the unknown option to be passed through to the kernel as-is. The
+ * kernel NFS and SMB filesystems both expect to see an explicit option
+ * 'sloppy' and use this to either ignore or reject unknown options, but as
+ * described, it's very easy for that option to not appear, or appear too late.
+ *
+ * OpenZFS has a test for this in the test suite, and it's documented in
+ * mount.zfs(8), so to support it we accept 'sloppy' and ignore it, and all
+ * other unknown options produce a notice in the kernel log, and are also
+ * ignored. This allows the "feature" to continue to work, while avoiding
+ * the additional housekeeping for the 'sloppy' option.
+ *
+ * sloppy
+ *
+ *
+ * Finally, all filesystems get automatic handling for the 'source' option,
+ * that is, the "name" of the filesystem (the first column of df(1)'s output).
+ * However, this only happens if the handler does not otherwise handle
+ * the 'source' option. Since we handle _all_ options because of 'sloppy', we
+ * deal with this explicitly by calling into the kernel's helper for this,
+ * vfs_parse_fs_param_source(), which sets up fc->source.
+ *
+ * source
+ *
+ *
+ * Thank you for reading this far. I hope you find what you are looking for,
+ * in this life or the next.
+ *
+ * -- robn, 2026-03-26
+ */
+
+enum {
+ Opt_exec, Opt_suid, Opt_dev,
+ Opt_atime, Opt_relatime, Opt_strictatime,
+ Opt_saxattr, Opt_dirxattr, Opt_noxattr,
+ Opt_mntpoint,
+
+ Opt_ignore, Opt_warn,
+};
+
+static const struct fs_parameter_spec zpl_param_spec[] = {
+ fsparam_flag_no("exec", Opt_exec),
+ fsparam_flag_no("suid", Opt_suid),
+ fsparam_flag_no("dev", Opt_dev),
+
+ fsparam_flag_no("atime", Opt_atime),
+ fsparam_flag_no("relatime", Opt_relatime),
+ fsparam_flag("strictatime", Opt_strictatime),
+
+ fsparam_flag("xattr", Opt_saxattr),
+ fsparam_flag("saxattr", Opt_saxattr),
+ fsparam_flag("dirxattr", Opt_dirxattr),
+ fsparam_flag("noxattr", Opt_noxattr),
+
+ fsparam_string("mntpoint", Opt_mntpoint),
+
+ fsparam_flag("defaults", Opt_ignore),
+ fsparam_flag("zfsutil", Opt_ignore),
+ fsparam_flag("remount", Opt_ignore),
+
+ fsparam_string("fscontext", Opt_ignore),
+ fsparam_string("defcontext", Opt_ignore),
+ fsparam_string("rootcontext", Opt_ignore),
+ fsparam_string("context", Opt_ignore),
+
+ fsparam_flag("posixacl", Opt_ignore),
+ fsparam_flag("noacl", Opt_ignore),
+ fsparam_flag("casesensitive", Opt_ignore),
+ fsparam_flag("caseinsensitive", Opt_ignore),
+ fsparam_flag("casemixed", Opt_ignore),
+
+ fsparam_flag("sloppy", Opt_ignore),
+
+ {}
+};
+
+static int
+zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ vfs_t *vfs = fc->fs_private;
+
+ /* Handle 'source' explicitly so we don't trip on it as an unknown. */
+ int opt = vfs_parse_fs_param_source(fc, param);
+ if (opt != -ENOPARAM)
+ return (opt);
+
+ struct fs_parse_result result;
+ opt = fs_parse(fc, zpl_param_spec, param, &result);
+ if (opt == -ENOPARAM) {
+ /*
+ * Convert unknowns to warnings, to work around the whole
+ * "sloppy option" mess.
+ */
+ opt = Opt_warn;
+ }
+ if (opt < 0)
+ return (opt);
+
+ switch (opt) {
+ case Opt_exec:
+ vfs->vfs_exec = !result.negated;
+ vfs->vfs_do_exec = B_TRUE;
+ break;
+ case Opt_suid:
+ vfs->vfs_setuid = !result.negated;
+ vfs->vfs_do_setuid = B_TRUE;
+ break;
+ case Opt_dev:
+ vfs->vfs_devices = !result.negated;
+ vfs->vfs_do_devices = B_TRUE;
+ break;
+
+ case Opt_atime:
+ vfs->vfs_atime = !result.negated;
+ vfs->vfs_do_atime = B_TRUE;
+ break;
+ case Opt_relatime:
+ vfs->vfs_relatime = !result.negated;
+ vfs->vfs_do_relatime = B_TRUE;
+ break;
+ case Opt_strictatime:
+ vfs->vfs_atime = B_TRUE;
+ vfs->vfs_do_atime = B_TRUE;
+ vfs->vfs_relatime = B_FALSE;
+ vfs->vfs_do_relatime = B_TRUE;
+ break;
+
+ case Opt_saxattr:
+ vfs->vfs_xattr = ZFS_XATTR_SA;
+ vfs->vfs_do_xattr = B_TRUE;
+ break;
+ case Opt_dirxattr:
+ vfs->vfs_xattr = ZFS_XATTR_DIR;
+ vfs->vfs_do_xattr = B_TRUE;
+ break;
+ case Opt_noxattr:
+ vfs->vfs_xattr = ZFS_XATTR_OFF;
+ vfs->vfs_do_xattr = B_TRUE;
+ break;
+
+ case Opt_mntpoint:
+ if (vfs->vfs_mntpoint != NULL)
+ kmem_strfree(vfs->vfs_mntpoint);
+ vfs->vfs_mntpoint = kmem_strdup(param->string);
+ break;
+
+ case Opt_ignore:
+ break;
+
+ case Opt_warn:
+ cmn_err(CE_NOTE,
+ "ZFS: ignoring unknown mount option: %s", param->key);
+ break;
+
+ default:
+ return (-SET_ERROR(EINVAL));
+ }
+
+ return (0);
+}
+
+/*
+ * Before Linux 5.8, the kernel's individual parameter parsing had a list of
+ * "forbidden" options that would always be rejected early. These were options
+ * that should be specified by MS_* flags, to be set on the superblock
+ * directly. However, it was inconsistently applied (eg it had various "*atime"
+ * options but not "atime", and also caused problems when it was not in sync
+ * with the version of libmount in use. It was deemed needlessly restrictive
+ * and was dropped in torvalds/linux@9193ae87a8af.
+ *
+ * Unfortunately, some of the options on this list are used by OpenZFS, so
+ * we need to see them. These include the aforementioned "*atime", "dev",
+ * "exec" and "suid".
+ *
+ * There is no easy compile-time check available to detect this, so we use
+ * a simple version check that should make it available everywhere needed,
+ * most notably RHEL8's 4.18+extras, which has backported fs_context support
+ * but does not include the 5.8 commit.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+#define HAVE_FORBIDDEN_SB_FLAGS 1
+#endif
+
+#ifdef HAVE_FORBIDDEN_SB_FLAGS
+/*
+ * The typical path for options parsing through mount(2) is:
+ *
+ * ksys_mount
+ * do_mount
+ * generic_parse_monolithic
+ * vfs_parse_fs_string
+ * vfs_parse_fs_param
+ * zpl_parse_param
+ *
+ * vfs_parse_fs_param() calls the internal vfs_parse_sb_flag(), which is
+ * where the "forbidden" flags are applied. If it makes it through there,
+ * it will later call fc->parse_param() ie zpl_parse_param(). We can't
+ * intercept this chain in the middle anywhere; the earliest thing we can
+ * override is generic_parse_monolithic(), substituting our own by setting
+ * fc->parse_monolithic and doing the parsing work ourselves.
+ *
+ * Fortunately, generic_parse_monolithic() is almost entirely splitting the
+ * incoming parameter string on comma and handing off to the rest of the
+ * pipeline. This is easily replaced (almost entirely by reviving a few bits
+ * of our old options parser).
+ *
+ * To keep the change as narrow as possible, we reuse zpl_param_spec and
+ * zpl_parse_param() as much as possible. Once we've parsed the option, we call
+ * fs_parse(zpl_param_spec) to find out if the option is actually one we
+ * explicitly care about. If it is, we call zpl_parse_param() directly,
+ * avoiding vfs_parse_fs_param() and so the risk of being rejected. If it is
+ * not one we explicitly care about, we call zpl_parse_param() as normal,
+ * letting the kernel reject it if it wishes. If it doesn't, it will end up
+ * back in zpl_parse_param() via fc->parse_param, and we can ignore or warn
+ * about it we normally would.
+ */
static int
zpl_parse_monolithic(struct fs_context *fc, void *data)
{
+ char *mntopts = data;
+
+ if (mntopts == NULL)
+ return (0);
+
/*
- * We do options parsing in zfs_domount(); just stash the options blob
- * in the fs_context so we can pass it down later.
+ * Because we supply a .parse_monolithic callback, the kernel does
+ * no consideration of the options blob at all. Because of this, we
+ * have to give LSMs a first look at it. They will remove any options
+ * of interest to them (eg the SELinux *context= options).
*/
- fc->fs_private = data;
+ int err = security_sb_eat_lsm_opts(mntopts, &fc->security);
+ if (err)
+ return (err);
+
+ char *key;
+ while ((key = strsep(&mntopts, ",")) != NULL) {
+ if (!*key)
+ continue;
+
+ struct fs_parameter param = {
+ .key = key,
+ };
+
+ char *value = strchr(key, '=');
+ if (value != NULL) {
+ /* Key starts with '='. Kernel ignores, we will too. */
+ if (value == key)
+ continue;
+ *value++ = '\0';
+
+ /* key=value is a "string" type, set up for that */
+ param.string = value;
+ param.type = fs_value_is_string;
+ param.size = strlen(value);
+ } else {
+ /* unadorned key is a "flag" type */
+ param.type = fs_value_is_flag;
+ }
+
+ /* Check if this is one of our options. */
+ struct fs_parse_result result;
+ int opt = fs_parse(fc, zpl_param_spec, &param, &result);
+ if (opt >= 0) {
+ /*
+ * We already know this one of our options, so a
+ * failure here would be nonsensical.
+ */
+ VERIFY0(zpl_parse_param(fc, &param));
+ } else {
+ /*
+ * Not one of our option, send it through the kernel's
+ * standard parameter handling.
+ */
+ err = vfs_parse_fs_param(fc, &param);
+ if (err < 0)
+ return (err);
+ }
+ }
+
return (0);
}
+#endif /* HAVE_FORBIDDEN_SB_FLAGS */
static int
zpl_get_tree(struct fs_context *fc)
@@ -457,13 +872,17 @@ zpl_get_tree(struct fs_context *fc)
}
if (sb->s_root == NULL) {
- zfs_mnt_t zm = {
- .mnt_osname = fc->source,
- .mnt_data = fc->fs_private,
- };
+ vfs_t *vfs = fc->fs_private;
+
+ /* Apply readonly flag as mount option */
+ if (fc->sb_flags & SB_RDONLY) {
+ vfs->vfs_readonly = B_TRUE;
+ vfs->vfs_do_readonly = B_TRUE;
+ }
fstrans_cookie_t cookie = spl_fstrans_mark();
- err = zfs_domount(sb, &zm, fc->sb_flags & SB_SILENT ? 1 : 0);
+ err = zfs_domount(sb, fc->source, vfs,
+ fc->sb_flags & SB_SILENT ? 1 : 0);
spl_fstrans_unmark(cookie);
if (err) {
@@ -471,6 +890,12 @@ zpl_get_tree(struct fs_context *fc)
return (-err);
}
+ /*
+ * zfsvfs has taken ownership of the mount options, so we
+ * need to ensure we don't free them.
+ */
+ fc->fs_private = NULL;
+
sb->s_flags |= SB_ACTIVE;
} else if (!issnap && ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY)) {
/*
@@ -492,28 +917,92 @@ zpl_get_tree(struct fs_context *fc)
static int
zpl_reconfigure(struct fs_context *fc)
{
- zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = fc->fs_private };
fstrans_cookie_t cookie;
int error;
cookie = spl_fstrans_mark();
- error = -zfs_remount(fc->root->d_sb, &fc->sb_flags, &zm);
+ error = -zfs_remount(fc->root->d_sb, fc->fs_private, fc->sb_flags);
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
+ if (error == 0) {
+ /*
+ * zfsvfs has taken ownership of the mount options, so we
+ * need to ensure we don't free them.
+ */
+ fc->fs_private = NULL;
+ }
+
return (error);
}
+static int
+zpl_dup_fc(struct fs_context *fc, struct fs_context *src_fc)
+{
+ vfs_t *src_vfs = src_fc->fs_private;
+ if (src_vfs == NULL)
+ return (0);
+
+ vfs_t *vfs = zfsvfs_vfs_alloc();
+ if (vfs == NULL)
+ return (-SET_ERROR(ENOMEM));
+
+ /*
+ * This is annoying, but a straight memcpy() would require us to
+ * reinitialise the lock.
+ */
+ vfs->vfs_xattr = src_vfs->vfs_xattr;
+ vfs->vfs_readonly = src_vfs->vfs_readonly;
+ vfs->vfs_do_readonly = src_vfs->vfs_do_readonly;
+ vfs->vfs_setuid = src_vfs->vfs_setuid;
+ vfs->vfs_do_setuid = src_vfs->vfs_do_setuid;
+ vfs->vfs_exec = src_vfs->vfs_exec;
+ vfs->vfs_do_exec = src_vfs->vfs_do_exec;
+ vfs->vfs_devices = src_vfs->vfs_devices;
+ vfs->vfs_do_devices = src_vfs->vfs_do_devices;
+ vfs->vfs_do_xattr = src_vfs->vfs_do_xattr;
+ vfs->vfs_atime = src_vfs->vfs_atime;
+ vfs->vfs_do_atime = src_vfs->vfs_do_atime;
+ vfs->vfs_relatime = src_vfs->vfs_relatime;
+ vfs->vfs_do_relatime = src_vfs->vfs_do_relatime;
+ vfs->vfs_nbmand = src_vfs->vfs_nbmand;
+ vfs->vfs_do_nbmand = src_vfs->vfs_do_nbmand;
+
+ mutex_enter(&src_vfs->vfs_mntpt_lock);
+ if (src_vfs->vfs_mntpoint != NULL)
+ vfs->vfs_mntpoint = kmem_strdup(src_vfs->vfs_mntpoint);
+ mutex_exit(&src_vfs->vfs_mntpt_lock);
+
+ fc->fs_private = vfs;
+ return (0);
+}
+
+static void
+zpl_free_fc(struct fs_context *fc)
+{
+ zfsvfs_vfs_free(fc->fs_private);
+}
+
const struct fs_context_operations zpl_fs_context_operations = {
+#ifdef HAVE_FORBIDDEN_SB_FLAGS
.parse_monolithic = zpl_parse_monolithic,
+#endif
+ .parse_param = zpl_parse_param,
.get_tree = zpl_get_tree,
.reconfigure = zpl_reconfigure,
+ .dup = zpl_dup_fc,
+ .free = zpl_free_fc,
};
static int
zpl_init_fs_context(struct fs_context *fc)
{
+ fc->fs_private = zfsvfs_vfs_alloc();
+ if (fc->fs_private == NULL)
+ return (-SET_ERROR(ENOMEM));
+
fc->ops = &zpl_fs_context_operations;
+
return (0);
}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index 89f9bc555fcf..dc47ff20fd74 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -1796,7 +1796,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
{
int readonly = get_disk_ro(zv->zv_zso->zvo_disk);
- ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+ ASSERT(RW_WRITE_HELD(&zvol_state_lock));
ASSERT(MUTEX_HELD(&zv->zv_state_lock));
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
diff --git a/sys/contrib/openzfs/module/zcommon/zfeature_common.c b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
index 6ba9892eeb64..2bb19c0cf5fd 100644
--- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c
+++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
@@ -697,6 +697,19 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
{
+ static const spa_feature_t draid_fdomain_deps[] = {
+ SPA_FEATURE_DRAID,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_DRAID_FAIL_DOMAINS,
+ "com.seagate:draid_failure_domains",
+ "draid_failure_domains",
+ "Support for failure domains in dRAID",
+ ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN,
+ draid_fdomain_deps, sfeatures);
+ }
+
+ {
static const spa_feature_t zilsaxattr_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_prop.c b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
index 78d4b0a05f75..0866caf8795c 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
@@ -497,9 +497,14 @@ zfs_prop_init(void)
/* inherit index (boolean) properties */
zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table, sfeatures);
- zprop_register_index(ZFS_PROP_RELATIME, "relatime", 1, PROP_INHERIT,
- ZFS_TYPE_FILESYSTEM, "on | off", "RELATIME", boolean_table,
- sfeatures);
+ zprop_register_index(ZFS_PROP_RELATIME, "relatime",
+#ifdef __FreeBSD__
+ 0, /* FreeBSD does not natively support relatime. */
+#else
+ 1,
+#endif
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "RELATIME",
+ boolean_table, sfeatures);
zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
boolean_table, sfeatures);
@@ -520,6 +525,10 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table, sfeatures);
#endif
+ /* UID-based zoning for rootless containers */
+ zprop_register_number(ZFS_PROP_ZONED_UID, "zoned_uid", 0,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<uid> | none", "ZONED_UID",
+ B_FALSE, sfeatures);
zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", boolean_table, sfeatures);
zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
index ef21f17be8ca..ee86fe0c7171 100644
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -441,6 +441,12 @@ vdev_prop_init(void)
ZFS_TYPE_VDEV, "<ashift>", "ASHIFT", B_FALSE, sfeatures);
zprop_register_number(VDEV_PROP_PARITY, "parity", 0, PROP_READONLY,
ZFS_TYPE_VDEV, "<parity>", "PARITY", B_FALSE, sfeatures);
+ zprop_register_number(VDEV_PROP_FDOMAIN, "failure_domain", UINT64_MAX,
+ PROP_READONLY, ZFS_TYPE_VDEV, "<fdomain>", "FDOM", B_FALSE,
+ sfeatures);
+ zprop_register_number(VDEV_PROP_FGROUP, "failure_group", UINT64_MAX,
+ PROP_READONLY, ZFS_TYPE_VDEV, "<fgroup>", "FGRP", B_FALSE,
+ sfeatures);
zprop_register_number(VDEV_PROP_NUMCHILDREN, "numchildren", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "<number-of-children>", "NUMCHILD",
B_FALSE, sfeatures);
diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c
index 2d310276af1c..7ea07c418300 100644
--- a/sys/contrib/openzfs/module/zfs/abd.c
+++ b/sys/contrib/openzfs/module/zfs/abd.c
@@ -280,7 +280,8 @@ static void
abd_free_scatter(abd_t *abd)
{
abd_free_chunks(abd);
- abd_update_scatter_stats(abd, ABDSTAT_DECR);
+ if (!abd_is_from_pages(abd))
+ abd_update_scatter_stats(abd, ABDSTAT_DECR);
}
/*
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
index df75d3fbe0b0..a4fe3e519700 100644
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -1480,8 +1480,12 @@ dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp)
* Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync()
* processes the delete record and clears the bp while we are waiting
* for the dn_mtx (resulting in a "no" from block_freed).
+ *
+ * If bp != db->db_blkptr, it means that it was overridden (by a block
+ * clone or direct I/O write). We cannot rely on dnode_block_freed as
+ * the range can be freed in an earlier TXG but overridden in later.
*/
- if (!is_hole && db->db_level == 0)
+ if (!is_hole && db->db_level == 0 && bp == db->db_blkptr)
is_hole = dnode_block_freed(dn, db->db_blkid) || BP_IS_HOLE(bp);
if (is_hole) {
@@ -2077,6 +2081,65 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
kmem_free(db_search, sizeof (dmu_buf_impl_t));
}
+/*
+ * Advisory eviction of level-0 dbufs in [start_blkid, end_blkid] for
+ * the given dnode. Dirty dbufs carry a reference, so they will be
+ * evicted once their sync is completed.
+ */
+void
+dbuf_evict_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid)
+{
+ dmu_buf_impl_t *db_marker;
+ dmu_buf_impl_t *db, *db_next;
+ avl_index_t where;
+
+ db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
+ db_marker->db_level = 0;
+ db_marker->db_blkid = start_blkid;
+ db_marker->db_state = DB_SEARCH;
+
+ mutex_enter(&dn->dn_dbufs_mtx);
+ db = avl_find(&dn->dn_dbufs, db_marker, &where);
+ ASSERT0P(db);
+ db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
+
+ for (; db != NULL; db = db_next) {
+ if (db->db_level != 0 || db->db_blkid > end_blkid)
+ break;
+
+ mutex_enter(&db->db_mtx);
+ if (db->db_state != DB_EVICTING &&
+ zfs_refcount_is_zero(&db->db_holds)) {
+ /*
+ * Clean and unreferenced: evict immediately.
+ * Use the marker pattern from dnode_evict_dbufs()
+ * because dbuf_destroy() may recursively remove
+ * the parent indirect dbuf from dn_dbufs, which
+ * could be the node db_next would point to.
+ */
+ db_marker->db_level = db->db_level;
+ db_marker->db_blkid = db->db_blkid;
+ db_marker->db_state = DB_MARKER;
+ db_marker->db_parent =
+ (void *)((uintptr_t)db - 1);
+ avl_insert_here(&dn->dn_dbufs, db_marker,
+ db, AVL_BEFORE);
+ dbuf_destroy(db);
+ db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
+ avl_remove(&dn->dn_dbufs, db_marker);
+ } else {
+ /* Referenced (possibly dirty): evict when released. */
+ db->db_pending_evict = TRUE;
+ db->db_partial_read = FALSE;
+ mutex_exit(&db->db_mtx);
+ db_next = AVL_NEXT(&dn->dn_dbufs, db);
+ }
+ }
+ mutex_exit(&dn->dn_dbufs_mtx);
+
+ kmem_free(db_marker, sizeof (dmu_buf_impl_t));
+}
+
void
dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
{
@@ -2201,6 +2264,17 @@ dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
mutex_enter(&dn->dn_mtx);
int txgoff = tx->tx_txg & TXG_MASK;
+
+ /*
+ * Assert that we are not modifying the range tree for the syncing
+ * TXG from a non-syncing thread. We verify that the tx's
+ * transaction group is strictly newer than the one currently
+ * syncing (meaning we are in open context). If this triggers,
+ * it indicates a race where syncing dn_free_range tree is
+ * being modified while dnode_sync() may be iterating over it.
+ */
+ ASSERT(tx->tx_txg > spa_syncing_txg(dn->dn_objset->os_spa));
+
if (dn->dn_free_ranges[txgoff] != NULL) {
zfs_range_tree_clear(dn->dn_free_ranges[txgoff], blkid, 1);
}
@@ -2388,6 +2462,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
db->db_blkid != DMU_SPILL_BLKID) {
mutex_enter(&dn->dn_mtx);
if (dn->dn_free_ranges[txgoff] != NULL) {
+ FREE_RANGE_VERIFY(tx, dn);
zfs_range_tree_clear(dn->dn_free_ranges[txgoff],
db->db_blkid, 1);
}
@@ -5434,6 +5509,7 @@ EXPORT_SYMBOL(dbuf_whichblock);
EXPORT_SYMBOL(dbuf_read);
EXPORT_SYMBOL(dbuf_unoverride);
EXPORT_SYMBOL(dbuf_free_range);
+EXPORT_SYMBOL(dbuf_evict_range);
EXPORT_SYMBOL(dbuf_new_size);
EXPORT_SYMBOL(dbuf_release_bp);
EXPORT_SYMBOL(dbuf_dirty);
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index e36c15085baa..51ce8b9a0842 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -604,8 +604,7 @@ ddt_log_load_one(ddt_t *ddt, uint_t n)
}
if (hdr.dlh_length > 0) {
- dmu_prefetch_by_dnode(dn, 0, 0, hdr.dlh_length,
- ZIO_PRIORITY_SYNC_READ);
+ dmu_prefetch_stream_by_dnode(dn, 0, hdr.dlh_length, B_FALSE);
for (uint64_t offset = 0; offset < hdr.dlh_length;
offset += dn->dn_datablksz) {
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
index 5cb02831a251..0f40164ecc95 100644
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -779,6 +779,54 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
rw_exit(&dn->dn_struct_rwlock);
}
+/*
+ * Prime a prefetch for sequential accesses from offset for at least len bytes.
+ */
+void
+dmu_prefetch_stream(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t len, boolean_t start_now)
+{
+ dnode_t *dn;
+
+ if (dnode_hold(os, object, FTAG, &dn) != 0)
+ return;
+ dmu_prefetch_stream_by_dnode(dn, offset, len, start_now);
+ dnode_rele(dn, FTAG);
+}
+
+void
+dmu_prefetch_stream_by_dnode(dnode_t *dn, uint64_t offset, uint64_t len,
+ boolean_t start_now)
+{
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ if (dn->dn_datablkshift != 0) {
+ uint64_t start = dbuf_whichblock(dn, 0, offset);
+ if (len == 0) {
+ if (dmu_zfetch_prime(&dn->dn_zfetch, start, start) &&
+ start_now) {
+ dmu_zfetch(&dn->dn_zfetch, start, 0, B_TRUE,
+ B_TRUE, B_TRUE, B_FALSE);
+ }
+ } else {
+ uint64_t end = dbuf_whichblock(dn, 0, offset + len - 1);
+ if (start == end) {
+ if (start_now) {
+ dbuf_prefetch(dn, 0, start,
+ ZIO_PRIORITY_ASYNC_READ, 0);
+ }
+ } else if (
+ dmu_zfetch_prime(&dn->dn_zfetch, start, end + 1) &&
+ start_now) {
+ dmu_zfetch(&dn->dn_zfetch, start, 0, B_TRUE,
+ B_TRUE, B_TRUE, B_FALSE);
+ }
+ }
+ } else if (offset < dn->dn_datablksz && start_now) {
+ dbuf_prefetch(dn, 0, 0, ZIO_PRIORITY_ASYNC_READ, 0);
+ }
+ rw_exit(&dn->dn_struct_rwlock);
+}
+
typedef struct {
kmutex_t dpa_lock;
kcondvar_t dpa_cv;
@@ -899,6 +947,35 @@ dmu_prefetch_dnode(objset_t *os, uint64_t object, zio_priority_t pri)
}
/*
+ * Advisory cache eviction for a byte range of an object.
+ */
+void
+dmu_evict_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
+{
+ dnode_t *dn;
+
+ if (len == 0)
+ return;
+ if (dnode_hold(os, object, FTAG, &dn) != 0)
+ return;
+
+ /*
+ * Exclude the last block if the range end is not block-aligned:
+ * a sequential access may continue into that block. The first
+ * block is included even when partially covered since backwards
+ * access patterns are rare.
+ */
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ uint64_t start = dbuf_whichblock(dn, 0, offset);
+ uint64_t end = dbuf_whichblock(dn, 0, offset + len);
+ if (end > start)
+ dbuf_evict_range(dn, start, end - 1);
+ rw_exit(&dn->dn_struct_rwlock);
+
+ dnode_rele(dn, FTAG);
+}
+
+/*
* Get the next "chunk" of file data to free. We traverse the file from
* the end so that the file gets shorter over time (if we crash in the
* middle, this will leave us in a better state). We find allocated file
@@ -2943,6 +3020,8 @@ EXPORT_SYMBOL(dmu_buf_rele_array);
EXPORT_SYMBOL(dmu_prefetch);
EXPORT_SYMBOL(dmu_prefetch_by_dnode);
EXPORT_SYMBOL(dmu_prefetch_dnode);
+EXPORT_SYMBOL(dmu_prefetch_stream);
+EXPORT_SYMBOL(dmu_prefetch_stream_by_dnode);
EXPORT_SYMBOL(dmu_free_range);
EXPORT_SYMBOL(dmu_free_long_range);
EXPORT_SYMBOL(dmu_free_long_object);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 51165d0bf723..f2101579c00f 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -455,6 +455,61 @@ dmu_zfetch_future(zstream_t *zs, uint64_t blkid, uint64_t nblks)
}
/*
+ * Prime a zfetch stream at blkid, so that the first demand access triggered
+ * enough prefetch without ramp-up to sequentially read up to end_blkid.
+ */
+boolean_t
+dmu_zfetch_prime(zfetch_t *zf, uint64_t blkid, uint64_t end_blkid)
+{
+ zstream_t *zs;
+ dnode_t *dn = zf->zf_dnode;
+ spa_t *spa = dn->dn_objset->os_spa;
+
+ ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ if (zfs_prefetch_disable ||
+ dn->dn_objset->os_prefetch == ZFS_PREFETCH_NONE)
+ return (B_FALSE);
+
+ if (!spa_indirect_vdevs_loaded(spa))
+ return (B_FALSE);
+
+ uint64_t maxblkid = dn->dn_maxblkid;
+ unsigned int dbs = dn->dn_datablkshift;
+
+ if (blkid >= maxblkid)
+ return (B_FALSE);
+ if (end_blkid > maxblkid + 1)
+ end_blkid = maxblkid + 1;
+
+ mutex_enter(&zf->zf_lock);
+
+ /* Skip if a nearby stream already covers this range. */
+ uint_t max_near = zfetch_max_reorder >> dbs;
+ for (zs = list_head(&zf->zf_stream); zs != NULL;
+ zs = list_next(&zf->zf_stream, zs)) {
+ uint64_t diff = (blkid >= zs->zs_blkid) ?
+ (blkid - zs->zs_blkid) : (zs->zs_blkid - blkid);
+ if (diff <= max_near) {
+ mutex_exit(&zf->zf_lock);
+ return (B_FALSE);
+ }
+ }
+
+ dmu_zfetch_stream_create(zf, blkid);
+ zs = list_head(&zf->zf_stream);
+ ASSERT(zs != NULL);
+ ASSERT3U(zs->zs_blkid, ==, blkid);
+
+ /* dmu_zfetch_prepare() will double the distances, so take a half. */
+ unsigned int nbytes = ((end_blkid - blkid) << dbs) / 2;
+ zs->zs_pf_dist = MIN(nbytes, zfetch_min_distance);
+ zs->zs_ipf_dist = MIN(nbytes, zfetch_max_idistance);
+
+ mutex_exit(&zf->zf_lock);
+ return (B_TRUE);
+}
+
+/*
* This is the predictive prefetch entry point. dmu_zfetch_prepare()
* associates dnode access specified with blkid and nblks arguments with
* prefetch stream, predicts further accesses based on that stats and returns
@@ -493,20 +548,21 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
/*
* As a fast path for small (single-block) files, ignore access
- * to the first block.
+ * to the first block, unless some streams exist, since a prime
+ * may be waiting.
*/
- if (!have_lock && blkid == 0)
+ if (!have_lock && blkid == 0 && zf->zf_numstreams == 0)
return (NULL);
if (!have_lock)
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
/*
- * A fast path for small files for which no prefetch will
- * happen.
+ * A fast path for small files for which no prefetch will happen,
+ * unless streams exist, since a prime may be waiting.
*/
uint64_t maxblkid = zf->zf_dnode->dn_maxblkid;
- if (maxblkid < 2) {
+ if (maxblkid < 2 && (maxblkid == 0 || zf->zf_numstreams == 0)) {
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return (NULL);
@@ -589,7 +645,7 @@ future:
zs->zs_atime = gethrestime_sec();
/* Exit if we already prefetched for this position before. */
- if (nblks == 0)
+ if (nblks == 0 && zs->zs_ipf_end > end_blkid)
goto out;
/* If the file is ending, remove the stream. */
@@ -643,6 +699,7 @@ out:
* Do the same for indirects, starting where we will stop reading
* data blocks (and the indirects that point to them).
*/
+ nbytes = MAX(nbytes, (1 << dbs));
if (unlikely(zs->zs_ipf_dist < nbytes))
zs->zs_ipf_dist = nbytes;
else
diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c
index e0cc4a7e13e0..be0e3de9bb23 100644
--- a/sys/contrib/openzfs/module/zfs/dnode.c
+++ b/sys/contrib/openzfs/module/zfs/dnode.c
@@ -2409,6 +2409,8 @@ done:
mutex_enter(&dn->dn_mtx);
{
int txgoff = tx->tx_txg & TXG_MASK;
+
+ FREE_RANGE_VERIFY(tx, dn);
if (dn->dn_free_ranges[txgoff] == NULL) {
dn->dn_free_ranges[txgoff] =
zfs_range_tree_create_flags(
diff --git a/sys/contrib/openzfs/module/zfs/dnode_sync.c b/sys/contrib/openzfs/module/zfs/dnode_sync.c
index 046ceddb3609..0e070c69dcd6 100644
--- a/sys/contrib/openzfs/module/zfs/dnode_sync.c
+++ b/sys/contrib/openzfs/module/zfs/dnode_sync.c
@@ -440,24 +440,6 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
}
}
-typedef struct dnode_sync_free_range_arg {
- dnode_t *dsfra_dnode;
- dmu_tx_t *dsfra_tx;
- boolean_t dsfra_free_indirects;
-} dnode_sync_free_range_arg_t;
-
-static void
-dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
-{
- dnode_sync_free_range_arg_t *dsfra = arg;
- dnode_t *dn = dsfra->dsfra_dnode;
-
- mutex_exit(&dn->dn_mtx);
- dnode_sync_free_range_impl(dn, blkid, nblks,
- dsfra->dsfra_free_indirects, dsfra->dsfra_tx);
- mutex_enter(&dn->dn_mtx);
-}
-
/*
* Try to kick all the dnode's dbufs out of the cache...
*/
@@ -635,6 +617,64 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
}
/*
+ * We cannot simply detach the range tree (set dn_free_ranges to NULL)
+ * before processing it because dnode_block_freed() relies on it to
+ * correctly identify blocks that have been freed in the current TXG
+ * (for dbuf_read() calls on holes). If we detached it early, a concurrent
+ * reader might see the block as valid on disk and return stale data
+ * instead of zeros.
+ *
+ * We also can't use zfs_range_tree_walk() nor zfs_range_tree_vacate()
+ * with a callback that drops dn_mtx (dnode_sync_free_range()). This is
+ * unsafe because another thread (spa_sync_deferred_frees() ->
+ * dnode_free_range()) could acquire dn_mtx and modify the tree while the
+ * walk or vacate was in progress. This leads to tree corruption or panic
+ * when we resume.
+ *
+ * To fix the race while maintaining visibility, we process the tree
+ * incrementally. We pick a segment, drop the lock to sync it, and
+ * re-acquire the lock to remove it. By always restarting from the head
+ * of the tree, we ensure we are never using an invalid iterator.
+ * We use zfs_range_tree_clear() instead of ..._remove() because the range
+ * might have already been removed while the lock was dropped (specifically
+ * in the dbuf_dirty path mentioned above). ..._clear() handles this
+ * gracefully, while ..._remove() would panic on a missing segment.
+ */
+static void
+dnode_sync_free_ranges(dnode_t *dn, dmu_tx_t *tx)
+{
+ int txgoff = tx->tx_txg & TXG_MASK;
+
+ mutex_enter(&dn->dn_mtx);
+ zfs_range_tree_t *rt = dn->dn_free_ranges[txgoff];
+ if (rt != NULL) {
+ boolean_t freeing_dnode = dn->dn_free_txg > 0 &&
+ dn->dn_free_txg <= tx->tx_txg;
+ zfs_range_seg_t *rs;
+
+ if (freeing_dnode) {
+ ASSERT(zfs_range_tree_contains(rt, 0,
+ dn->dn_maxblkid + 1));
+ }
+
+ while ((rs = zfs_range_tree_first(rt)) != NULL) {
+ uint64_t start = zfs_rs_get_start(rs, rt);
+ uint64_t size = zfs_rs_get_end(rs, rt) - start;
+
+ mutex_exit(&dn->dn_mtx);
+ dnode_sync_free_range_impl(dn, start, size,
+ freeing_dnode, tx);
+ mutex_enter(&dn->dn_mtx);
+
+ zfs_range_tree_clear(rt, start, size);
+ }
+ zfs_range_tree_destroy(rt);
+ dn->dn_free_ranges[txgoff] = NULL;
+ }
+ mutex_exit(&dn->dn_mtx);
+}
+
+/*
* Write out the dnode's dirty buffers.
* Does not wait for zio completions.
*/
@@ -781,32 +821,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}
/* process all the "freed" ranges in the file */
- if (dn->dn_free_ranges[txgoff] != NULL) {
- dnode_sync_free_range_arg_t dsfra;
- dsfra.dsfra_dnode = dn;
- dsfra.dsfra_tx = tx;
- dsfra.dsfra_free_indirects = freeing_dnode;
- mutex_enter(&dn->dn_mtx);
- if (freeing_dnode) {
- ASSERT(zfs_range_tree_contains(
- dn->dn_free_ranges[txgoff], 0,
- dn->dn_maxblkid + 1));
- }
- /*
- * Because dnode_sync_free_range() must drop dn_mtx during its
- * processing, using it as a callback to zfs_range_tree_vacate()
- * is not safe. No other operations (besides destroy) are
- * allowed once zfs_range_tree_vacate() has begun, and dropping
- * dn_mtx would leave a window open for another thread to
- * observe that invalid (and unsafe) state.
- */
- zfs_range_tree_walk(dn->dn_free_ranges[txgoff],
- dnode_sync_free_range, &dsfra);
- zfs_range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL);
- zfs_range_tree_destroy(dn->dn_free_ranges[txgoff]);
- dn->dn_free_ranges[txgoff] = NULL;
- mutex_exit(&dn->dn_mtx);
- }
+ dnode_sync_free_ranges(dn, tx);
if (freeing_dnode) {
dn->dn_objset->os_freed_dnodes++;
@@ -828,7 +843,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}
/*
- * This must be done after dnode_sync_free_range()
+ * This must be done after dnode_sync_free_ranges()
* and dnode_increase_indirection(). See dnode_new_blkid()
* for an explanation of the high bit being set.
*/
diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
index 9cb1536642d1..9207737f908b 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c
@@ -17,6 +17,7 @@
/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
* Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright 2026 Oxide Computer Company
*/
#include <sys/dsl_crypt.h>
@@ -1241,6 +1242,7 @@ dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx)
typedef struct spa_keystore_change_key_args {
const char *skcka_dsname;
dsl_crypto_params_t *skcka_cp;
+ nvlist_t *skcka_userprops;
} spa_keystore_change_key_args_t;
static int
@@ -1253,6 +1255,8 @@ spa_keystore_change_key_check(void *arg, dmu_tx_t *tx)
dsl_crypto_params_t *dcp = skcka->skcka_cp;
uint64_t rddobj;
+ /* we assume skcka_userprops has already been verified */
+
/* check for the encryption feature */
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
ret = SET_ERROR(ENOTSUP);
@@ -1539,6 +1543,10 @@ spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds));
ASSERT(!ds->ds_is_snapshot);
+ /* set user properties */
+ dsl_props_set_sync_impl(ds, ZPROP_SRC_LOCAL, skcka->skcka_userprops,
+ tx);
+
if (dcp->cp_cmd == DCP_CMD_NEW_KEY ||
dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) {
/*
@@ -1617,14 +1625,19 @@ spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
}
+/*
+ * Note: assumes userprops has already been checked for validity.
+ */
int
-spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp)
+spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp,
+ nvlist_t *userprops)
{
spa_keystore_change_key_args_t skcka;
/* initialize the args struct */
skcka.skcka_dsname = dsname;
skcka.skcka_cp = dcp;
+ skcka.skcka_userprops = userprops;
/*
* Perform the actual work in syncing context. The blocks modified
diff --git a/sys/contrib/openzfs/module/zfs/dsl_deleg.c b/sys/contrib/openzfs/module/zfs/dsl_deleg.c
index 200bee200d34..f3153d6901c2 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_deleg.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_deleg.c
@@ -591,13 +591,16 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
* the zoned property is set
*/
if (!INGLOBALZONE(curproc)) {
- uint64_t zoned;
+ uint64_t zoned = 0;
+ uint64_t zoned_uid_val = 0;
- if (dsl_prop_get_dd(dd,
+ (void) dsl_prop_get_dd(dd,
zfs_prop_to_name(ZFS_PROP_ZONED),
- 8, 1, &zoned, NULL, B_FALSE) != 0)
- break;
- if (!zoned)
+ 8, 1, &zoned, NULL, B_FALSE);
+ (void) dsl_prop_get_dd(dd,
+ zfs_prop_to_name(ZFS_PROP_ZONED_UID),
+ 8, 1, &zoned_uid_val, NULL, B_FALSE);
+ if (!zoned && zoned_uid_val == 0)
break;
}
zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj;
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index 4397c14b5c77..eafd4b176208 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -1947,6 +1947,10 @@ spa_activate(spa_t *spa, spa_mode_t mode)
static void
spa_deactivate(spa_t *spa)
{
+ if (spa->spa_create_info != NULL) {
+ nvlist_free(spa->spa_create_info);
+ spa->spa_create_info = NULL;
+ }
ASSERT(spa->spa_sync_on == B_FALSE);
ASSERT0P(spa->spa_dsl_pool);
ASSERT0P(spa->spa_root_vdev);
@@ -7060,7 +7064,7 @@ spa_create_check_encryption_params(dsl_crypto_params_t *dcp,
*/
int
spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
- nvlist_t *zplprops, dsl_crypto_params_t *dcp)
+ nvlist_t *zplprops, dsl_crypto_params_t *dcp, nvlist_t **errinfo)
{
spa_t *spa;
const char *altroot = NULL;
@@ -7071,10 +7075,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
uint64_t txg = TXG_INITIAL;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
- uint64_t version, obj, ndraid = 0;
+ uint64_t version, obj, ndraid = 0, draid_nfgroup = 0;
boolean_t has_features;
boolean_t has_encryption;
boolean_t has_allocclass;
+ boolean_t has_draid;
+ boolean_t has_draid_fdomains;
spa_feature_t feat;
const char *feat_name;
const char *poolname;
@@ -7121,6 +7127,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
has_features = B_FALSE;
has_encryption = B_FALSE;
has_allocclass = B_FALSE;
+ has_draid = B_FALSE;
+ has_draid_fdomains = B_FALSE;
for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
if (zpool_prop_feature(nvpair_name(elem))) {
@@ -7132,6 +7140,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
has_encryption = B_TRUE;
if (feat == SPA_FEATURE_ALLOCATION_CLASSES)
has_allocclass = B_TRUE;
+ if (feat == SPA_FEATURE_DRAID)
+ has_draid = B_TRUE;
+ if (feat == SPA_FEATURE_DRAID_FAIL_DOMAINS)
+ has_draid_fdomains = B_TRUE;
}
}
@@ -7195,7 +7207,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (error == 0 &&
(error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
- (error = vdev_draid_spare_create(nvroot, rvd, &ndraid, 0)) == 0 &&
+ (error = vdev_draid_spare_create(nvroot, rvd, &ndraid,
+ &draid_nfgroup, 0)) == 0 &&
+ (ndraid == 0 || has_draid || (error = SET_ERROR(ENOTSUP))) &&
+ (draid_nfgroup == 0 || has_draid_fdomains ||
+ (error = SET_ERROR(ENOTSUP))) && error == 0 &&
(error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) == 0) {
/*
* instantiate the metaslab groups (this will dirty the vdevs)
@@ -7212,6 +7228,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_config_exit(spa, SCL_ALL, FTAG);
if (error != 0) {
+ if (errinfo != NULL) {
+ *errinfo = spa->spa_create_info;
+ spa->spa_create_info = NULL;
+ }
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
@@ -7346,6 +7366,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
for (int i = 0; i < ndraid; i++)
spa_feature_incr(spa, SPA_FEATURE_DRAID, tx);
+ for (int i = 0; i < draid_nfgroup; i++)
+ spa_feature_incr(spa, SPA_FEATURE_DRAID_FAIL_DOMAINS, tx);
+
dmu_tx_commit(tx);
spa->spa_sync_on = B_TRUE;
@@ -7943,12 +7966,25 @@ spa_draid_feature_incr(void *arg, dmu_tx_t *tx)
}
/*
+ * This is called as a synctask to increment the draid_fail_domains feature flag
+ */
+static void
+spa_draid_fdomains_feature_incr(void *arg, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ int nfgrp = (int)(uintptr_t)arg;
+
+ for (int c = 0; c < nfgrp; c++)
+ spa_feature_incr(spa, SPA_FEATURE_DRAID_FAIL_DOMAINS, tx);
+}
+
+/*
* Add a device to a storage pool.
*/
int
spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
{
- uint64_t txg, ndraid = 0;
+ uint64_t txg, ndraid = 0, draid_nfgroup = 0;
int error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd, *tvd;
@@ -7987,10 +8023,15 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
* dRAID is stored in the config and used when opening the spare.
*/
if ((error = vdev_draid_spare_create(nvroot, vd, &ndraid,
- rvd->vdev_children)) == 0) {
+ &draid_nfgroup, rvd->vdev_children)) == 0) {
+
if (ndraid > 0 && nvlist_lookup_nvlist_array(nvroot,
ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0)
nspares = 0;
+
+ if (draid_nfgroup > 0 && !spa_feature_is_enabled(spa,
+ SPA_FEATURE_DRAID_FAIL_DOMAINS))
+ return (spa_vdev_exit(spa, vd, txg, ENOTSUP));
} else {
return (spa_vdev_exit(spa, vd, txg, error));
}
@@ -8077,8 +8118,15 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
dmu_tx_t *tx;
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
+
dsl_sync_task_nowait(spa->spa_dsl_pool, spa_draid_feature_incr,
(void *)(uintptr_t)ndraid, tx);
+
+ if (draid_nfgroup > 0)
+ dsl_sync_task_nowait(spa->spa_dsl_pool,
+ spa_draid_fdomains_feature_incr,
+ (void *)(uintptr_t)draid_nfgroup, tx);
+
dmu_tx_commit(tx);
}
diff --git a/sys/contrib/openzfs/module/zfs/spa_errlog.c b/sys/contrib/openzfs/module/zfs/spa_errlog.c
index 7252fd534bdf..afa9e9d0efd4 100644
--- a/sys/contrib/openzfs/module/zfs/spa_errlog.c
+++ b/sys/contrib/openzfs/module/zfs/spa_errlog.c
@@ -468,7 +468,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
kmem_free(zc, sizeof (*zc));
out:
- kmem_free(snap_obj_array, sizeof (*snap_obj_array));
+ kmem_free(snap_obj_array, snap_count * sizeof (*snap_obj_array));
return (error);
}
diff --git a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
index 32ef0aaeb858..10817c2c3df6 100644
--- a/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
+++ b/sys/contrib/openzfs/module/zfs/spa_log_spacemap.c
@@ -1160,7 +1160,7 @@ spa_ld_log_sm_data(spa_t *spa)
while (sls != NULL) {
/* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */
if (psls != NULL && pn < 16 &&
- (pn < 2 || ps < 2 * dmu_prefetch_max)) {
+ (pn < 2 || ps < dmu_prefetch_max)) {
error = space_map_open(&psls->sls_sm,
spa_meta_objset(spa), psls->sls_sm_obj, 0,
UINT64_MAX, SPA_MINBLOCKSHIFT);
@@ -1171,9 +1171,9 @@ spa_ld_log_sm_data(spa_t *spa)
(u_longlong_t)sls->sls_sm_obj, error);
goto out;
}
- dmu_prefetch(spa_meta_objset(spa), psls->sls_sm_obj,
- 0, 0, space_map_length(psls->sls_sm),
- ZIO_PRIORITY_ASYNC_READ);
+ dmu_prefetch_stream(spa_meta_objset(spa),
+ psls->sls_sm_obj, 0,
+ space_map_length(psls->sls_sm), B_TRUE);
pn++;
ps += space_map_length(psls->sls_sm);
psls = AVL_NEXT(&spa->spa_sm_logs_by_txg, psls);
diff --git a/sys/contrib/openzfs/module/zfs/space_map.c b/sys/contrib/openzfs/module/zfs/space_map.c
index f20c49ebb6de..13c606e9ff34 100644
--- a/sys/contrib/openzfs/module/zfs/space_map.c
+++ b/sys/contrib/openzfs/module/zfs/space_map.c
@@ -92,8 +92,7 @@ space_map_iterate(space_map_t *sm, uint64_t end, sm_cb_t callback, void *arg)
ASSERT3U(end, <=, space_map_length(sm));
ASSERT0(P2PHASE(end, sizeof (uint64_t)));
- dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, end,
- ZIO_PRIORITY_SYNC_READ);
+ dmu_prefetch_stream(sm->sm_os, space_map_object(sm), 0, end, B_FALSE);
int error = 0;
uint64_t txg = 0, sync_pass = 0;
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 9def59b06727..30639d7f4c7f 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -3429,23 +3429,51 @@ vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
/* leaf vdevs only */
continue;
}
+ int children = vd->vdev_children;
+ int width = children;
if (t == DTL_PARTIAL) {
/* i.e. non-zero */
minref = 1;
} else if (vdev_get_nparity(vd) != 0) {
/* RAIDZ, DRAID */
minref = vdev_get_nparity(vd) + 1;
+ if (vd->vdev_ops == &vdev_draid_ops) {
+ vdev_draid_config_t *vdc = vd->vdev_tsd;
+ minref = vdc->vdc_nparity + 1;
+ children = vdc->vdc_children;
+ }
} else {
/* any kind of mirror */
minref = vd->vdev_children;
}
+ /*
+ * For dRAID with failure domains, count failures
+ * only once for any i-th child failure in each failure
+ * group, but only if the failures threshold is not
+ * reached in any of the groups.
+ */
+ boolean_t safe2skip = B_FALSE;
+ if (width > children &&
+ vdev_draid_fail_domain_allowed(vd))
+ safe2skip = B_TRUE;
+
space_reftree_create(&reftree);
- for (int c = 0; c < vd->vdev_children; c++) {
- vdev_t *cvd = vd->vdev_child[c];
- mutex_enter(&cvd->vdev_dtl_lock);
- space_reftree_add_map(&reftree,
- cvd->vdev_dtl[s], 1);
- mutex_exit(&cvd->vdev_dtl_lock);
+ for (int c = 0; c < children; c++) {
+ for (int i = c; i < width; i += children) {
+ vdev_t *cvd = vd->vdev_child[i];
+
+ mutex_enter(&cvd->vdev_dtl_lock);
+ space_reftree_add_map(&reftree,
+ cvd->vdev_dtl[s], 1);
+ boolean_t empty =
+ zfs_range_tree_is_empty(
+ cvd->vdev_dtl[s]);
+ mutex_exit(&cvd->vdev_dtl_lock);
+
+ if (s == DTL_OUTAGE && !empty &&
+ safe2skip)
+ break;
+ }
}
space_reftree_generate_map(&reftree,
vd->vdev_dtl[t], minref);
@@ -6307,6 +6335,15 @@ end:
innvl, 6, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
+static int
+vdev_get_child_idx(vdev_t *vd, uint64_t c_guid)
+{
+ for (int c = 0; c < vd->vdev_children; c++)
+ if (vd->vdev_child[c]->vdev_guid == c_guid)
+ return (c);
+ return (0);
+}
+
int
vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
{
@@ -6413,6 +6450,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, NULL,
vdev_get_nparity(vd), ZPROP_SRC_NONE);
continue;
+ case VDEV_PROP_FDOMAIN:
+ case VDEV_PROP_FGROUP:
+ if (vd->vdev_ops->vdev_op_leaf &&
+ vd->vdev_top != NULL &&
+ vd->vdev_top->vdev_ops ==
+ &vdev_draid_ops) {
+ vdev_draid_config_t *vdc =
+ vd->vdev_top->vdev_tsd;
+ if (vdc->vdc_width == vdc->vdc_children)
+ continue;
+ int c_idx = vdev_get_child_idx(
+ vd->vdev_top, vd->vdev_guid);
+ vdev_prop_add_list(outnvl, propname,
+ NULL, prop == VDEV_PROP_FDOMAIN ?
+ (c_idx % vdc->vdc_children) :
+ (c_idx / vdc->vdc_children),
+ ZPROP_SRC_NONE);
+ }
+ continue;
case VDEV_PROP_PATH:
if (vd->vdev_path == NULL)
continue;
diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c
index 9e02d868213b..c76557e80c9b 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@@ -23,6 +23,8 @@
* Copyright (c) 2018 Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2025, Klara, Inc.
+ * Copyright (c) 2026, Seagate Technology, LLC.
+ * Copyright (c) 2026, Wasabi Technologies, Inc.
*/
#include <sys/zfs_context.h>
@@ -140,6 +142,57 @@
* the same for all groups (although some of the logic around computing
* permutation numbers and drive offsets is more complicated).
*
+ * === dRAID failure domains ===
+ *
+ * If we put several slices alongside in a row and configure each disk in
+ * slice to be from different failure domain (for example an enclosure), we
+ * can then tolerate the failure of the whole domain -- only one device
+ * will be failed in every slice in this case. The column of such slices
+ * we will call failure group, and the row with such slices alongside we
+ * will call "big width row", width being multiple of children (W = C*n).
+ *
+ * Here's an example of configuration with 7 failure domains and two
+ * failure groups:
+ *
+ * 7 C disks in each slice, 2 slices in big 14 W rows
+ * +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ * | 1 | 7 | 3 | 9 | 11| 5 | 13| 6 | 10| 4 | 8 | 0 | 12| 2 | device map 0
+ * s +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ * l | group 0 | gr1..| S | group 3 | gr4.. | S | row 0
+ * c +-------+-------+-------+---+-------+-------+-------+---+
+ * 0,1 | ..gr1 | group 2 | S | ..gr4 | group 5 | S | row 1
+ * +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ * | 2 | 10| 12| 7 | 8 | 13| 11| 1 | 5 | 4 | 6 | 3 | 9 | 0 | device map 1
+ * s +===+===+===+===+===+===+===+===+===+===+===+===+===+===+
+ * l | group 6 | gr7..| S | group 9 |gr10.. | S | row 2
+ * c +-------+-------+-------+---+---------------+-------+---+
+ * 2,3 | ..gr7 | group 8 | S |..gr10 | group 11 | S | row 3
+ * +-------+---------------+---+-------+---------------+---+
+ * failure group 0 failure group 1
+ *
+ * In practice, there might be much more failure groups. And in theory, the
+ * width of the big rows can be much larger than curent limit of 255 imposed
+ * for the number of children. But we kept the same limit for now for the
+ * sake of simplicity of implementation.
+ *
+ * In order to preserve fast sequential resilvering in case of a disk failure,
+ * all failure groups much share all disks between themselves, and this is
+ * achieved by shuffling the disks between the groups. But only i-th disks
+ * in each group are shuffled between themselves, i.e. the disks from the
+ * same failure domains (enclosures). After that, they are shuffled within
+ * each group. Thus, no more than one disk from any failure domain can appear
+ * in any failure group as a result of this shuffling. In the above example,
+ * you won't find any tuple of (0, 7) or (1, 8) or (2, 9) or ... (6, 13)
+ * mapped to the same slice. This is done in vdev_draid_shuffle_perms().
+ *
+ * Spare disks are evenly distributed among failure groups, and they are
+ * shared by all groups. However, to support domain failure, we cannot have
+ * more than (nparity - 1) failed disks in any group, no matter if they are
+ * rebuilt to draid spares or not (the blocks of those spares can be mapped
+ * to the disks from the failed domain (enclosure), and we cannot tolerate
+ * more than nparity failures in any failure group).
+ *
+ *
* N.B. The following array describes all valid dRAID permutation maps.
* Each row is used to generate a permutation map for a different number
* of children from a unique seed. The seeds were generated and carefully
@@ -537,6 +590,73 @@ vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp)
return (0);
}
+static void
+vdev_draid_swap_perms(uint8_t *perms, uint64_t i, uint64_t j)
+{
+ uint8_t val = perms[i];
+
+ perms[i] = perms[j];
+ perms[j] = val;
+}
+
+/*
+ * Shuffle every i-th disk in slices that lie alongside in the big width row,
+ * increasing disk indices in each next slice in the row accordingly. The
+ * input to this function is the array of ready permutations from
+ * vdev_draid_generate_perms(), so in order to correctly shuffle i-th disks,
+ * we need to locate their position first and build a map of their locations.
+ *
+ * Note: the same Fisher-Yates shuffle algorithm is used as in
+ * vdev_draid_generate_perms().
+ */
+static void
+vdev_draid_shuffle_perms(const draid_map_t *map, uint8_t *perms, uint64_t width)
+{
+ uint64_t cn = map->dm_children;
+ uint64_t n = width / cn;
+ uint64_t nperms = map->dm_nperms / n * n;
+
+ if (width <= cn)
+ return;
+
+ VERIFY3U(width, >=, VDEV_DRAID_MIN_CHILDREN);
+ VERIFY3U(width, <=, VDEV_DRAID_MAX_CHILDREN);
+ ASSERT0(width % cn);
+
+ uint64_t draid_seed[2] = { VDEV_DRAID_SEED, map->dm_seed };
+
+ uint8_t *cmap = kmem_alloc(n, KM_SLEEP);
+
+ for (int i = 0; i < nperms; i += n) {
+ for (int j = 0; j < cn; j++) {
+
+ /* locate position of the same child in other slices */
+ for (int k = n - 1; k > 0; k--)
+ for (int l = 0; l < cn; l++)
+ if (perms[(i+k) * cn + l] ==
+ perms[(i+0) * cn + j])
+ cmap[k] = l;
+ cmap[0] = j;
+
+ /* increase index values for slices on the right */
+ for (int k = n - 1; k > 0; k--)
+ perms[(i+k) * cn + cmap[k]] += k * cn;
+
+ /* shuffle */
+ for (int k = n - 1; k > 0; k--) {
+ int l = vdev_draid_rand(draid_seed) % (k + 1);
+ if (k == l)
+ continue;
+ vdev_draid_swap_perms(perms,
+ (i+k) * cn + cmap[k],
+ (i+l) * cn + cmap[l]);
+ }
+ }
+ }
+
+ kmem_free(cmap, n);
+}
+
/*
* Lookup the fixed draid_map_t for the requested number of children.
*/
@@ -560,17 +680,26 @@ static void
vdev_draid_get_perm(vdev_draid_config_t *vdc, uint64_t pindex,
uint8_t **base, uint64_t *iter)
{
+ uint64_t n = vdc->vdc_width / vdc->vdc_children;
uint64_t ncols = vdc->vdc_children;
- uint64_t poff = pindex % (vdc->vdc_nperms * ncols);
+ uint64_t nperms = (vdc->vdc_nperms / n) * n;
+ uint64_t poff = pindex % (nperms * ncols);
- *base = vdc->vdc_perms + (poff / ncols) * ncols;
- *iter = poff % ncols;
+ ASSERT3P(nperms, >=, ncols * n);
+
+ *base = vdc->vdc_perms + (poff / (ncols * n)) * (ncols * n);
+ *iter = (poff % ncols) + (pindex % n) * ncols;
}
static inline uint64_t
vdev_draid_permute_id(vdev_draid_config_t *vdc,
uint8_t *base, uint64_t iter, uint64_t index)
{
+ if (vdc->vdc_width > vdc->vdc_children) {
+ uint64_t off = (iter / vdc->vdc_children) * vdc->vdc_children;
+ return (base[(index + iter) % vdc->vdc_children + off]);
+ }
+
return ((base[index] + iter) % vdc->vdc_children);
}
@@ -899,7 +1028,7 @@ vdev_draid_map_verify_empty(zio_t *zio, raidz_row_t *rr)
*/
static uint64_t
vdev_draid_logical_to_physical(vdev_t *vd, uint64_t logical_offset,
- uint64_t *perm, uint64_t *start)
+ uint64_t *perm, uint64_t *start, uint64_t *ndisks)
{
vdev_draid_config_t *vdc = vd->vdev_tsd;
@@ -925,16 +1054,31 @@ vdev_draid_logical_to_physical(vdev_t *vd, uint64_t logical_offset,
*/
uint64_t groupwidth = vdc->vdc_groupwidth;
uint64_t ngroups = vdc->vdc_ngroups;
- uint64_t ndisks = vdc->vdc_ndisks;
+
+ uint64_t group = logical_offset / vdc->vdc_groupsz;
+ uint64_t fgrps = vdc->vdc_width / vdc->vdc_children;
+
+ *perm = (group / ngroups) * fgrps;
/*
- * groupstart is where the group this IO will land in "starts" in
- * the permutation array.
+ * Failure groups starting from (vdc_nspares % fgrps) have one less
+ * spare, so they have one more ndisks.
*/
- uint64_t group = logical_offset / vdc->vdc_groupsz;
- uint64_t groupstart = (group * groupwidth) % ndisks;
- ASSERT3U(groupstart + groupwidth, <=, ndisks + groupstart);
- *start = groupstart;
+ uint64_t biggies = vdc->vdc_nspares % fgrps;
+
+ uint64_t poff = 0;
+ group %= ngroups;
+ uint64_t ngroups1 = ngroups / fgrps;
+ if (!biggies || group < biggies * ngroups1)
+ poff = group / ngroups1;
+ else
+ poff = biggies +
+ (group - (biggies * ngroups1)) / (ngroups1 + 1);
+ ASSERT3U(poff, <, fgrps);
+ *perm += poff;
+
+ *ndisks = (vdc->vdc_ndisks / fgrps) +
+ (biggies ? ((poff >= biggies) ? 1 : 0) : 0);
/* b_offset is the sector offset within a group chunk */
b_offset = b_offset % (rowheight_sectors * groupwidth);
@@ -948,9 +1092,33 @@ vdev_draid_logical_to_physical(vdev_t *vd, uint64_t logical_offset,
* - so each permutation covers rows * slice portion of the disk
* - so we need to find the row where this IO group target begins
*/
- *perm = group / ngroups;
- uint64_t row = (*perm * ((groupwidth * ngroups) / ndisks)) +
- (((group % ngroups) * groupwidth) / ndisks);
+ uint64_t perm_rows = (groupwidth * ngroups) / vdc->vdc_ndisks;
+
+ /* Adjust group for our failure group. */
+ if (!biggies || poff <= biggies)
+ group -= poff * ngroups1;
+ else
+ group -= (biggies * ngroups1) +
+ (poff - biggies) * (ngroups1 + 1);
+
+ IMPLY(poff < biggies, group < ngroups1);
+ ASSERT3U(group, <=, ngroups1);
+
+ /*
+ * groupstart is where the group this IO will land in "starts" in
+ * the permutation array.
+ */
+ uint64_t groupstart = (group * groupwidth) % *ndisks;
+ ASSERT3U(groupstart + groupwidth, <=, *ndisks + groupstart);
+ *start = groupstart;
+
+ /* Adjust ngroups for our failure group. */
+ ngroups = ngroups1 + ((biggies && poff >= biggies) ? 1 : 0);
+
+ ASSERT3U(group, <, ngroups);
+
+ uint64_t row = ((*perm / fgrps) * perm_rows) +
+ (((group % ngroups) * groupwidth) / *ndisks);
return (((rowheight_sectors * row) +
(b_offset / groupwidth)) << ashift);
@@ -989,16 +1157,15 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
vdev_draid_group_to_offset(vd, group) == io_offset);
/* Lookup starting byte offset on each child vdev */
- uint64_t groupstart, perm;
+ uint64_t groupstart, perm, ndisks;
uint64_t physical_offset = vdev_draid_logical_to_physical(vd,
- io_offset, &perm, &groupstart);
+ io_offset, &perm, &groupstart, &ndisks);
/*
* If there is less than groupwidth drives available after the group
* start, the group is going to wrap onto the next row. 'wrap' is the
* group disk number that starts on the next row.
*/
- uint64_t ndisks = vdc->vdc_ndisks;
uint64_t groupwidth = vdc->vdc_groupwidth;
uint64_t wrap = groupwidth;
@@ -1161,7 +1328,7 @@ vdev_draid_get_astart(vdev_t *vd, const uint64_t start)
/*
* Allocatable space for dRAID is (children - nspares) * sizeof(smallest child)
* rounded down to the last full slice. So each child must provide at least
- * 1 / (children - nspares) of its asize.
+ * 1 / (children - nspares) of its asize rounded up to VDEV_DRAID_ROWHEIGHT.
*/
static uint64_t
vdev_draid_min_asize(vdev_t *vd)
@@ -1171,7 +1338,8 @@ vdev_draid_min_asize(vdev_t *vd)
ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
return (VDEV_DRAID_REFLOW_RESERVE +
- (vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks));
+ DIV_ROUND_UP(DIV_ROUND_UP(vd->vdev_min_asize, vdc->vdc_ndisks),
+ VDEV_DRAID_ROWHEIGHT) * VDEV_DRAID_ROWHEIGHT);
}
/*
@@ -1189,7 +1357,7 @@ vdev_draid_min_alloc(vdev_t *vd)
}
/*
- * Returns true if the txg range does not exist on any leaf vdev.
+ * Returns false if the txg range exists on any leaf vdev, true otherwise.
*
* A dRAID spare does not fit into the DTL model. While it has child vdevs
* there is no redundancy among them, and the effective child vdev is
@@ -1247,8 +1415,7 @@ vdev_draid_missing(vdev_t *vd, uint64_t physical_offset, uint64_t txg,
if (vd == NULL)
return (B_TRUE);
- return (vdev_draid_missing(vd, physical_offset,
- txg, size));
+ return (vdev_draid_missing(vd, physical_offset, txg, size));
}
return (vdev_dtl_contains(vd, DTL_MISSING, txg, size));
@@ -1396,16 +1563,16 @@ vdev_draid_group_degraded(vdev_t *vd, uint64_t offset)
ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
ASSERT3U(vdev_draid_get_astart(vd, offset), ==, offset);
- uint64_t groupstart, perm;
+ uint64_t groupstart, perm, ndisks;
uint64_t physical_offset = vdev_draid_logical_to_physical(vd,
- offset, &perm, &groupstart);
+ offset, &perm, &groupstart, &ndisks);
uint8_t *base;
uint64_t iter;
vdev_draid_get_perm(vdc, perm, &base, &iter);
for (uint64_t i = 0; i < vdc->vdc_groupwidth; i++) {
- uint64_t c = (groupstart + i) % vdc->vdc_ndisks;
+ uint64_t c = (groupstart + i) % ndisks;
uint64_t cid = vdev_draid_permute_id(vdc, base, iter, c);
vdev_t *cvd = vd->vdev_child[cid];
@@ -1436,16 +1603,16 @@ vdev_draid_group_missing(vdev_t *vd, uint64_t offset, uint64_t txg,
ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
ASSERT3U(vdev_draid_get_astart(vd, offset), ==, offset);
- uint64_t groupstart, perm;
+ uint64_t groupstart, perm, ndisks;
uint64_t physical_offset = vdev_draid_logical_to_physical(vd,
- offset, &perm, &groupstart);
+ offset, &perm, &groupstart, &ndisks);
uint8_t *base;
uint64_t iter;
vdev_draid_get_perm(vdc, perm, &base, &iter);
for (uint64_t i = 0; i < vdc->vdc_groupwidth; i++) {
- uint64_t c = (groupstart + i) % vdc->vdc_ndisks;
+ uint64_t c = (groupstart + i) % ndisks;
uint64_t cid = vdev_draid_permute_id(vdc, base, iter, c);
vdev_t *cvd = vd->vdev_child[cid];
@@ -1528,7 +1695,7 @@ vdev_draid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
int open_errors = 0;
if (nparity > VDEV_DRAID_MAXPARITY ||
- vd->vdev_children < nparity + 1) {
+ vdc->vdc_children < nparity + 1) {
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
return (SET_ERROR(EINVAL));
}
@@ -1541,12 +1708,26 @@ vdev_draid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
vdev_open_children_subset(vd, vdev_draid_open_children);
vdev_open_children_subset(vd, vdev_draid_open_spares);
- /* Verify enough of the children are available to continue. */
- for (int c = 0; c < vd->vdev_children; c++) {
- if (vd->vdev_child[c]->vdev_open_error != 0) {
- if ((++open_errors) > nparity) {
- vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
- return (SET_ERROR(ENXIO));
+ /*
+ * Verify enough of the children are available to continue.
+ * If several disks got failed on i-th position in each slice in the
+ * big width row (failure groups) - they are counted as one failure,
+ * but only if the failures threshold is not reached in any group.
+ */
+ boolean_t safe2skip = B_FALSE;
+ if (vdc->vdc_width > vdc->vdc_children &&
+ vdev_draid_fail_domain_allowed(vd))
+ safe2skip = B_TRUE;
+ for (int c = 0; c < vdc->vdc_children; c++) {
+ for (int i = c; i < vdc->vdc_width; i += vdc->vdc_children) {
+ if (vd->vdev_child[i]->vdev_open_error != 0) {
+ if ((++open_errors) > nparity) {
+ vd->vdev_stat.vs_aux =
+ VDEV_AUX_NO_REPLICAS;
+ return (SET_ERROR(ENXIO));
+ }
+ if (safe2skip)
+ break;
}
}
}
@@ -1581,6 +1762,16 @@ vdev_draid_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*max_asize = (((child_max_asize * vdc->vdc_ndisks) / vdc->vdc_groupsz) *
vdc->vdc_groupsz);
+ /*
+ * For failure groups with multiple silices in the big width row,
+ * round down to the big slice size.
+ */
+ if (vdc->vdc_width > vdc->vdc_children) {
+ uint64_t slicesz = vdc->vdc_devslicesz * vdc->vdc_ndisks;
+ *asize = (*asize / slicesz) * slicesz;
+ *max_asize = (*max_asize / slicesz) * slicesz;
+ }
+
return (0);
}
@@ -1667,10 +1858,11 @@ vdev_draid_metaslab_init(vdev_t *vd, uint64_t *ms_start, uint64_t *ms_size)
*/
int
vdev_draid_spare_create(nvlist_t *nvroot, vdev_t *vd, uint64_t *ndraidp,
- uint64_t next_vdev_id)
+ uint64_t *nfgroupp, uint64_t next_vdev_id)
{
uint64_t draid_nspares = 0;
uint64_t ndraid = 0;
+ uint64_t nfgroup = 0;
int error;
for (uint64_t i = 0; i < vd->vdev_children; i++) {
@@ -1680,11 +1872,14 @@ vdev_draid_spare_create(nvlist_t *nvroot, vdev_t *vd, uint64_t *ndraidp,
vdev_draid_config_t *vdc = cvd->vdev_tsd;
draid_nspares += vdc->vdc_nspares;
ndraid++;
+ if (vdc->vdc_width > vdc->vdc_children)
+ nfgroup++;
}
}
if (draid_nspares == 0) {
*ndraidp = ndraid;
+ *nfgroupp = nfgroup;
return (0);
}
@@ -1752,6 +1947,7 @@ vdev_draid_spare_create(nvlist_t *nvroot, vdev_t *vd, uint64_t *ndraidp,
kmem_free(new_spares, sizeof (*new_spares) * n);
*ndraidp = ndraid;
+ *nfgroupp = nfgroup;
return (0);
}
@@ -1907,12 +2103,34 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
}
if (vdev_draid_missing(cvd, rc->rc_offset, zio->io_txg, 1)) {
+ vdev_t *svd;
+
if (c >= rr->rr_firstdatacol)
rr->rr_missingdata++;
else
rr->rr_missingparity++;
rc->rc_error = SET_ERROR(ESTALE);
rc->rc_skipped = 1;
+
+ /*
+ * If this child has draid spare attached, and that
+ * spare by rc_offset maps to another spare, the repair
+ * would go to that spare, and we want all mirrored
+ * children on it to be updated with the repaired data,
+ * even when we cannot vouch for it during rebuilds
+ * (which don't have checksums). Otherwise, we will have
+ * a lot of checksum errors on that spares during scrub.
+ * The worst thing that can happen in this case is that
+ * we will update the reserved spare column on some
+ * device with unverified data, which is harmless.
+ */
+ if ((svd = vdev_draid_find_spare(cvd)) != NULL) {
+ svd = vdev_draid_spare_get_child(svd,
+ rc->rc_offset);
+ if (svd && (svd->vdev_ops == &vdev_spare_ops ||
+ svd->vdev_ops == &vdev_replacing_ops))
+ rc->rc_tgt_is_dspare = 1;
+ }
continue;
}
@@ -1930,34 +2148,15 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
vdev_t *svd;
/*
- * Sequential rebuilds need to always consider the data
- * on the child being rebuilt to be stale. This is
- * important when all columns are available to aid
- * known reconstruction in identifing which columns
- * contain incorrect data.
- *
- * Furthermore, all repairs need to be constrained to
- * the devices being rebuilt because without a checksum
- * we cannot verify the data is actually correct and
- * performing an incorrect repair could result in
- * locking in damage and making the data unrecoverable.
+ * Repairs need to be constrained to the devices being
+ * rebuilt since without a checksum we cannot verify the
+ * data is actually correct and performing an incorrect
+ * repair could result in locking in the damage and
+ * making the data unrecoverable.
*/
- if (zio->io_priority == ZIO_PRIORITY_REBUILD) {
- if (vdev_draid_rebuilding(cvd)) {
- if (c >= rr->rr_firstdatacol)
- rr->rr_missingdata++;
- else
- rr->rr_missingparity++;
- rc->rc_error = SET_ERROR(ESTALE);
- rc->rc_skipped = 1;
- rc->rc_allow_repair = 1;
- continue;
- } else {
- rc->rc_allow_repair = 0;
- }
- } else {
- rc->rc_allow_repair = 1;
- }
+ if (zio->io_priority == ZIO_PRIORITY_REBUILD &&
+ !vdev_draid_rebuilding(cvd))
+ rc->rc_allow_repair = 0;
/*
* If this child is a distributed spare then the
@@ -2093,7 +2292,7 @@ vdev_draid_state_change(vdev_t *vd, int faulted, int degraded)
vdev_draid_config_t *vdc = vd->vdev_tsd;
ASSERT(vd->vdev_ops == &vdev_draid_ops);
- if (faulted > vdc->vdc_nparity)
+ if (faulted > vdc->vdc_nparity * (vdc->vdc_width / vdc->vdc_children))
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_NO_REPLICAS);
else if (degraded + faulted != 0)
@@ -2147,9 +2346,9 @@ vdev_draid_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs,
logical_end = nextstart;
/* Find the starting offset for each vdev in the group */
- uint64_t perm, groupstart;
+ uint64_t perm, groupstart, ndisks;
uint64_t start = vdev_draid_logical_to_physical(raidvd,
- logical_start, &perm, &groupstart);
+ logical_start, &perm, &groupstart, &ndisks);
uint64_t end = start;
uint8_t *base;
@@ -2163,7 +2362,7 @@ vdev_draid_xlate(vdev_t *cvd, const zfs_range_seg64_t *logical_rs,
* (zero-length) physical range being returned.
*/
for (uint64_t i = 0; i < vdc->vdc_groupwidth; i++) {
- uint64_t c = (groupstart + i) % vdc->vdc_ndisks;
+ uint64_t c = (groupstart + i) % ndisks;
if (c == 0 && i != 0) {
/* the group wrapped, increment the start */
@@ -2210,6 +2409,10 @@ vdev_draid_config_generate(vdev_t *vd, nvlist_t *nv)
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, vdc->vdc_ndata);
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, vdc->vdc_nspares);
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, vdc->vdc_ngroups);
+
+ if (spa_feature_is_active(vd->vdev_spa, SPA_FEATURE_DRAID_FAIL_DOMAINS))
+ fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NCHILDREN,
+ vdc->vdc_children);
}
/*
@@ -2230,30 +2433,44 @@ vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd)
return (SET_ERROR(EINVAL));
}
- uint_t children;
+ uint_t width;
+ uint64_t children;
nvlist_t **child;
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0 || children == 0 ||
- children > VDEV_DRAID_MAX_CHILDREN) {
+ &child, &width) != 0 || width == 0) {
return (SET_ERROR(EINVAL));
}
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, &nspares) ||
- nspares > 100 || nspares > (children - (ndata + nparity))) {
- return (SET_ERROR(EINVAL));
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NCHILDREN, &children)) {
+ children = width;
+ if (children > VDEV_DRAID_MAX_CHILDREN)
+ return (SET_ERROR(EINVAL));
}
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, &ngroups) ||
- ngroups == 0 || ngroups > VDEV_DRAID_MAX_CHILDREN) {
+ if (children == 0 || width % children != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, &nspares) ||
+ nspares > 100) {
return (SET_ERROR(EINVAL));
}
+ int fgrps = width / children;
+ int nspare = nspares / fgrps;
+ if (nspares % fgrps)
+ nspare++;
+
/*
* Validate the minimum number of children exist per group for the
* specified parity level (draid1 >= 2, draid2 >= 3, draid3 >= 4).
*/
- if (children < (ndata + nparity + nspares))
+ if (children < (ndata + nparity + nspare))
+ return (SET_ERROR(EINVAL));
+
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, &ngroups) ||
+ ngroups == 0 || ngroups > VDEV_DRAID_MAX_CHILDREN) {
return (SET_ERROR(EINVAL));
+ }
/*
* Create the dRAID configuration using the pool nvlist configuration
@@ -2272,6 +2489,7 @@ vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd)
vdc->vdc_nspares = nspares;
vdc->vdc_children = children;
vdc->vdc_ngroups = ngroups;
+ vdc->vdc_width = width;
vdc->vdc_nperms = map->dm_nperms;
error = vdev_draid_generate_perms(map, &vdc->vdc_perms);
@@ -2280,11 +2498,14 @@ vdev_draid_init(spa_t *spa, nvlist_t *nv, void **tsd)
return (SET_ERROR(EINVAL));
}
+ if (width > children)
+ vdev_draid_shuffle_perms(map, vdc->vdc_perms, width);
+
/*
* Derived constants.
*/
vdc->vdc_groupwidth = vdc->vdc_ndata + vdc->vdc_nparity;
- vdc->vdc_ndisks = vdc->vdc_children - vdc->vdc_nspares;
+ vdc->vdc_ndisks = vdc->vdc_width - vdc->vdc_nspares;
vdc->vdc_groupsz = vdc->vdc_groupwidth * VDEV_DRAID_ROWHEIGHT;
vdc->vdc_devslicesz = (vdc->vdc_groupsz * vdc->vdc_ngroups) /
vdc->vdc_ndisks;
@@ -2317,7 +2538,7 @@ vdev_draid_nparity(vdev_t *vd)
{
vdev_draid_config_t *vdc = vd->vdev_tsd;
- return (vdc->vdc_nparity);
+ return (vdc->vdc_nparity * (vdc->vdc_width / vdc->vdc_children));
}
static uint64_t
@@ -2429,17 +2650,25 @@ vdev_draid_spare_get_child(vdev_t *vd, uint64_t physical_offset)
vdev_t *tvd = vds->vds_draid_vdev;
vdev_draid_config_t *vdc = tvd->vdev_tsd;
+ uint64_t fgrps = vdc->vdc_width / vdc->vdc_children;
+
ASSERT3P(tvd->vdev_ops, ==, &vdev_draid_ops);
ASSERT3U(vds->vds_spare_id, <, vdc->vdc_nspares);
uint8_t *base;
uint64_t iter;
- uint64_t perm = physical_offset / vdc->vdc_devslicesz;
+ uint64_t perm = (physical_offset / vdc->vdc_devslicesz) * fgrps;
+
+ /*
+ * Adjust permutation so that it points to the correct slice in the
+ * big width row.
+ */
+ perm += vds->vds_spare_id % fgrps;
vdev_draid_get_perm(vdc, perm, &base, &iter);
uint64_t cid = vdev_draid_permute_id(vdc, base, iter,
- (tvd->vdev_children - 1) - vds->vds_spare_id);
+ (vdc->vdc_children - 1) - (vds->vds_spare_id / fgrps));
vdev_t *cvd = tvd->vdev_child[cid];
if (cvd->vdev_ops == &vdev_draid_spare_ops)
@@ -2448,6 +2677,40 @@ vdev_draid_spare_get_child(vdev_t *vd, uint64_t physical_offset)
return (cvd);
}
+/*
+ * Returns true if no failure group reached failures threshold so that
+ * enclosure failure cannot be tolerated anymore. Used spares are counted
+ * as failures because in case of enclosure failure their blocks can belong
+ * to the disks from that enclosure and can be lost.
+ */
+boolean_t
+vdev_draid_fail_domain_allowed(vdev_t *vd)
+{
+ vdev_draid_config_t *vdc = vd->vdev_tsd;
+
+ ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
+ ASSERT3P(vdc->vdc_width, >, vdc->vdc_children);
+
+ int counter = 0;
+
+ for (int c = 0; c < vdc->vdc_width; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if ((c % vdc->vdc_children) == 0)
+ counter = 0;
+
+ if (cvd->vdev_ops == &vdev_spare_ops ||
+ cvd->vdev_ops == &vdev_draid_spare_ops ||
+ !vdev_readable(cvd))
+ counter++;
+
+ if (counter > vdc->vdc_nparity)
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
static void
vdev_draid_spare_close(vdev_t *vd)
{
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
index 16ba09c6f295..b1371b0349c6 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -1109,8 +1109,29 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
* Determine if the vdev is in use.
*/
if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT &&
- vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
+ vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid)) {
+ if (spa->spa_create_info == NULL) {
+ nvlist_t *nv = fnvlist_alloc();
+ nvlist_t *cfg;
+
+ if (vd->vdev_path != NULL)
+ fnvlist_add_string(nv,
+ ZPOOL_CREATE_INFO_VDEV, vd->vdev_path);
+
+ cfg = vdev_label_read_config(vd, -1ULL);
+ if (cfg != NULL) {
+ const char *pname;
+ if (nvlist_lookup_string(cfg,
+ ZPOOL_CONFIG_POOL_NAME, &pname) == 0)
+ fnvlist_add_string(nv,
+ ZPOOL_CREATE_INFO_POOL, pname);
+ nvlist_free(cfg);
+ }
+
+ spa->spa_create_info = nv;
+ }
return (SET_ERROR(EBUSY));
+ }
/*
* If this is a request to add or replace a spare or l2cache device
diff --git a/sys/contrib/openzfs/module/zfs/vdev_mirror.c b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
index 18efdaac006f..35a4a5bebecf 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
@@ -669,18 +669,19 @@ vdev_mirror_io_start(zio_t *zio)
}
while (children--) {
- mc = &mm->mm_child[c];
- c++;
+ mc = &mm->mm_child[c++];
/*
- * When sequentially resilvering only issue write repair
- * IOs to the vdev which is being rebuilt since performance
- * is limited by the slowest child. This is an issue for
- * faster replacement devices such as distributed spares.
+ * When sequentially resilvering and the integrity of the data
+ * is speculative (ZIO_FLAG_SPECULATIVE), issue write repair IOs
+ * only to the vdev which is being rebuilt. Existing data on
+ * other children must never be overwritten with unconfirmed
+ * data to avoid unrecoverable damage to the pool.
*/
if ((zio->io_priority == ZIO_PRIORITY_REBUILD) &&
(zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
!(zio->io_flags & ZIO_FLAG_SCRUB) &&
+ (zio->io_flags & ZIO_FLAG_SPECULATIVE) &&
mm->mm_rebuilding && !mc->mc_rebuilding) {
continue;
}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
index 520ddd692bda..aa44acbf39cb 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@@ -25,6 +25,7 @@
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2016 Gvozden Nešković. All rights reserved.
* Copyright (c) 2025, Klara, Inc.
+ * Copyright (c) 2026, Wasabi Technologies, Inc.
*/
#include <sys/zfs_context.h>
@@ -3104,6 +3105,7 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
int parity_errors = 0;
int parity_untried = 0;
int data_errors = 0;
+ zio_flag_t add_flags = 0;
ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
@@ -3134,10 +3136,30 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
* Note that we also regenerate parity when resilvering so we
* can write it out to failed devices later.
*/
- if (parity_errors + parity_untried <
- rr->rr_firstdatacol - data_errors ||
- (zio->io_flags & ZIO_FLAG_RESILVER)) {
+ boolean_t parity_verify = (parity_errors + parity_untried) <
+ (rr->rr_firstdatacol - data_errors);
+ if (parity_verify || (zio->io_flags & ZIO_FLAG_RESILVER)) {
int n = raidz_parity_verify(zio, rr);
+ /*
+ * In, Reed-Solomon encoding, if we have ndata+1 columns and
+ * the parity doesn't match, it means the data integrity is
+ * compromised. We shouldn't try to repair anything in this
+ * case.
+ */
+ if (parity_verify && n > 0 &&
+ zio->io_priority == ZIO_PRIORITY_REBUILD)
+ return;
+ /*
+ * If we have only ndata columns, the data integrity will
+ * be checked by the checksums normally, but not in case
+ * of rebuild when we don't have checksums. In this case,
+ * we add ZIO_FLAG_SPECULATIVE and try to not spread
+ * unverified data. For example, when the target vdev happens
+ * to be the mirroring spare vdev, we would repair only that
+ * child in it which is being rebuilt.
+ */
+ if (!parity_verify && zio->io_priority == ZIO_PRIORITY_REBUILD)
+ add_flags |= ZIO_FLAG_SPECULATIVE;
unexpected_errors += n;
}
@@ -3163,13 +3185,27 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
*/
ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ);
+ /*
+ * When the target vdev is draid spare, we should clear
+ * ZIO_FLAG_SPECULATIVE. First, if that draid spare maps
+ * to another spare having an online/degraded disk, that
+ * disk must be repaired also. Otherwise, the scrub will
+ * detect a lot of cksum errors later. Second, since it
+ * is draid spare, there is no harm in updating its
+ * content on any vdev it maps to because the space is
+ * reserved as a spare anyway.
+ */
+ zio_flag_t aflags = add_flags;
+ if (rc->rc_tgt_is_dspare)
+ aflags &= ~ZIO_FLAG_SPECULATIVE;
+
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
rc->rc_offset, rc->rc_abd, rc->rc_size,
ZIO_TYPE_WRITE,
zio->io_priority == ZIO_PRIORITY_REBUILD ?
ZIO_PRIORITY_REBUILD : ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
- ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
+ ZIO_FLAG_SELF_HEAL : 0) | aflags, NULL, NULL));
}
}
@@ -3271,11 +3307,18 @@ raidz_simulate_failure(int physical_width, int original_width, int ashift,
static int
raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
{
+ vdev_t *vd = zio->io_vd;
raidz_map_t *rm = zio->io_vsd;
- int physical_width = zio->io_vd->vdev_children;
+ int physical_width = vd->vdev_children;
+ int dbgmsg = zfs_flags & ZFS_DEBUG_RAIDZ_RECONSTRUCT;
+
+ if (vd->vdev_ops == &vdev_draid_ops) {
+ vdev_draid_config_t *vdc = vd->vdev_tsd;
+ physical_width = vdc->vdc_children;
+ }
+
int original_width = (rm->rm_original_width != 0) ?
rm->rm_original_width : physical_width;
- int dbgmsg = zfs_flags & ZFS_DEBUG_RAIDZ_RECONSTRUCT;
if (dbgmsg) {
zfs_dbgmsg("raidz_reconstruct_expanded(zio=%px ltgts=%u,%u,%u "
@@ -3465,9 +3508,17 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
static int
vdev_raidz_combrec(zio_t *zio)
{
- int nparity = vdev_get_nparity(zio->io_vd);
+ vdev_t *vd = zio->io_vd;
+ int nparity = vdev_get_nparity(vd);
raidz_map_t *rm = zio->io_vsd;
int physical_width = zio->io_vd->vdev_children;
+
+ if (vd->vdev_ops == &vdev_draid_ops) {
+ vdev_draid_config_t *vdc = vd->vdev_tsd;
+ nparity = vdc->vdc_nparity;
+ physical_width = vdc->vdc_children;
+ }
+
int original_width = (rm->rm_original_width != 0) ?
rm->rm_original_width : physical_width;
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 3bbc9107ae2e..fe98e7db073e 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -41,7 +41,7 @@
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
* Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
* Copyright (c) 2019, Allan Jude
- * Copyright 2024 Oxide Computer Company
+ * Copyright 2026 Oxide Computer Company
*/
/*
@@ -286,6 +286,59 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
+/*
+ * Callback for SPL to look up zoned_uid property.
+ * Walks ancestors to find the delegation root with zoned_uid set.
+ * Returns the zoned_uid value if found, or 0 if not set.
+ */
+static uid_t
+zfs_get_zoned_uid(const char *dataset, char *root_out, size_t root_size)
+{
+ char path[ZFS_MAX_DATASET_NAME_LEN];
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
+ char *slash, *at;
+ uint64_t zoned_uid_val = 0;
+ int error;
+
+ (void) strlcpy(path, dataset, sizeof (path));
+
+ /*
+ * Strip snapshot suffix if present — snapshots inherit properties
+ * from their parent filesystem.
+ */
+ at = strchr(path, '@');
+ if (at != NULL)
+ *at = '\0';
+
+ /*
+ * Walk up the hierarchy until we find a dataset with zoned_uid set.
+ * This handles the case where the dataset doesn't exist yet (e.g.,
+ * rename destination) — dsl_prop_get fails on non-existent datasets,
+ * so we walk up to find an existing ancestor.
+ *
+ * When the property is found (possibly via inheritance), setpoint
+ * tells us the actual delegation root where zoned_uid is locally
+ * set, rather than the dataset where we happened to query it.
+ */
+ while (path[0] != '\0') {
+ error = dsl_prop_get(path, "zoned_uid", 8, 1,
+ &zoned_uid_val, setpoint);
+
+ if (error == 0 && zoned_uid_val != 0) {
+ if (root_out != NULL)
+ (void) strlcpy(root_out, setpoint, root_size);
+ return ((uid_t)zoned_uid_val);
+ }
+
+ slash = strrchr(path, '/');
+ if (slash == NULL)
+ break;
+ *slash = '\0';
+ }
+
+ return (0);
+}
+
static void
history_str_free(char *buf)
{
@@ -502,6 +555,42 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
}
/*
+ * Check dsl_deleg permission for zoned_uid datasets.
+ *
+ * This bypasses zfs_dozonecheck_ds() (which requires the 'zoned' property)
+ * because zoned_uid datasets use a different authentication model. The zone
+ * check was already performed by zone_dataset_admin_check().
+ *
+ * Returns 0 if permission is granted, error otherwise.
+ * ECANCELED from dsl_deleg_access_impl() means delegation is disabled on the
+ * pool — in that case we deny access (POLP: no delegation = no access).
+ */
+static int
+zfs_secpolicy_zoned_uid_deleg(const char *name, const char *perm, cred_t *cr)
+{
+ dsl_pool_t *dp;
+ dsl_dataset_t *ds;
+ int error;
+
+ error = dsl_pool_hold(name, FTAG, &dp);
+ if (error != 0)
+ return (error);
+ error = dsl_dataset_hold(dp, name, FTAG, &ds);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+ error = dsl_deleg_access_impl(ds, perm, cr);
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ /* ECANCELED = delegation disabled on pool; deny access (POLP) */
+ if (error == ECANCELED)
+ return (SET_ERROR(EPERM));
+ return (error);
+}
+
+/*
* Policy for setting the security label property.
*
* Returns 0 for success, non-zero for access and other errors.
@@ -607,6 +696,31 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
cred_t *cr)
{
const char *strval;
+ zone_admin_result_t zone_result;
+
+ /*
+ * Check zoned_uid delegation first. However, even delegated
+ * namespace users must not be allowed to modify zoned_uid itself.
+ */
+ zone_result = zone_dataset_admin_check(dsname, ZONE_OP_SETPROP, NULL);
+ if (zone_result == ZONE_ADMIN_ALLOWED) {
+ if (prop == ZFS_PROP_ZONED_UID)
+ return (SET_ERROR(EPERM));
+ if (prop == ZFS_PROP_FILESYSTEM_LIMIT ||
+ prop == ZFS_PROP_SNAPSHOT_LIMIT) {
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
+ uint64_t zoned_uid_val = 0;
+ if (dsl_prop_get(dsname, "zoned_uid", 8, 1,
+ &zoned_uid_val, setpoint) == 0 &&
+ zoned_uid_val != 0 &&
+ strcmp(dsname, setpoint) == 0)
+ return (SET_ERROR(EPERM));
+ }
+ return (zfs_secpolicy_zoned_uid_deleg(dsname,
+ zfs_prop_to_name(prop), cr));
+ }
+ if (zone_result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
/*
* Check permissions for special properties.
@@ -621,6 +735,15 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
if (!INGLOBALZONE(curproc))
return (SET_ERROR(EPERM));
break;
+ case ZFS_PROP_ZONED_UID:
+ /*
+ * Disallow setting of 'zoned_uid' from within a
+ * delegated namespace -- only global zone can manage
+ * delegation assignments.
+ */
+ if (!INGLOBALZONE(curproc))
+ return (SET_ERROR(EPERM));
+ break;
case ZFS_PROP_QUOTA:
case ZFS_PROP_FILESYSTEM_LIMIT:
@@ -774,7 +897,21 @@ int
zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
{
int error;
+ zone_admin_result_t result;
+
+ /* Check zoned_uid delegation first */
+ result = zone_dataset_admin_check(name, ZONE_OP_DESTROY, NULL);
+ if (result == ZONE_ADMIN_ALLOWED) {
+ if ((error = zfs_secpolicy_zoned_uid_deleg(name,
+ ZFS_DELEG_PERM_DESTROY, cr)) != 0)
+ return (error);
+ return (zfs_secpolicy_zoned_uid_deleg(name,
+ ZFS_DELEG_PERM_MOUNT, cr));
+ }
+ if (result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
+ /* NOT_APPLICABLE: continue with existing checks */
if ((error = zfs_secpolicy_write_perms(name,
ZFS_DELEG_PERM_MOUNT, cr)) != 0)
return (error);
@@ -831,7 +968,21 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
{
char parentname[ZFS_MAX_DATASET_NAME_LEN];
int error;
+ zone_admin_result_t result;
+ /* Check zoned_uid delegation first */
+ result = zone_dataset_admin_check(from, ZONE_OP_RENAME, to);
+ if (result == ZONE_ADMIN_ALLOWED) {
+ if ((error = zfs_secpolicy_zoned_uid_deleg(from,
+ ZFS_DELEG_PERM_RENAME, cr)) != 0)
+ return (error);
+ return (zfs_secpolicy_zoned_uid_deleg(from,
+ ZFS_DELEG_PERM_MOUNT, cr));
+ }
+ if (result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
+
+ /* NOT_APPLICABLE: continue with existing checks */
if ((error = zfs_secpolicy_write_perms(from,
ZFS_DELEG_PERM_RENAME, cr)) != 0)
return (error);
@@ -940,6 +1091,17 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
int
zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
{
+ zone_admin_result_t result;
+
+ /* Check zoned_uid delegation first */
+ result = zone_dataset_admin_check(name, ZONE_OP_SNAPSHOT, NULL);
+ if (result == ZONE_ADMIN_ALLOWED)
+ return (zfs_secpolicy_zoned_uid_deleg(name,
+ ZFS_DELEG_PERM_SNAPSHOT, cr));
+ if (result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
+
+ /* NOT_APPLICABLE: continue with existing checks */
return (zfs_secpolicy_write_perms(name,
ZFS_DELEG_PERM_SNAPSHOT, cr));
}
@@ -1062,13 +1224,35 @@ zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
char parentname[ZFS_MAX_DATASET_NAME_LEN];
int error;
- const char *origin;
+ const char *origin = NULL;
+ zone_admin_result_t result;
if ((error = zfs_get_parent(zc->zc_name, parentname,
sizeof (parentname))) != 0)
return (error);
- if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
+ (void) nvlist_lookup_string(innvl, "origin", &origin);
+
+ /* Check zoned_uid delegation first */
+ result = zone_dataset_admin_check(parentname,
+ origin != NULL ? ZONE_OP_CLONE : ZONE_OP_CREATE, origin);
+ if (result == ZONE_ADMIN_ALLOWED) {
+ if (origin != NULL) {
+ if ((error = zfs_secpolicy_zoned_uid_deleg(origin,
+ ZFS_DELEG_PERM_CLONE, cr)) != 0)
+ return (error);
+ }
+ if ((error = zfs_secpolicy_zoned_uid_deleg(parentname,
+ ZFS_DELEG_PERM_CREATE, cr)) != 0)
+ return (error);
+ return (zfs_secpolicy_zoned_uid_deleg(parentname,
+ ZFS_DELEG_PERM_MOUNT, cr));
+ }
+ if (result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
+
+ /* NOT_APPLICABLE: continue with existing checks */
+ if (origin != NULL &&
(error = zfs_secpolicy_write_perms(origin,
ZFS_DELEG_PERM_CLONE, cr)) != 0)
return (error);
@@ -1131,6 +1315,14 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
if (prop == ZPROP_USERPROP) {
if (!zfs_prop_user(zc->zc_value))
return (SET_ERROR(EINVAL));
+ zone_admin_result_t zone_result;
+ zone_result = zone_dataset_admin_check(zc->zc_name,
+ ZONE_OP_SETPROP, NULL);
+ if (zone_result == ZONE_ADMIN_ALLOWED)
+ return (zfs_secpolicy_zoned_uid_deleg(zc->zc_name,
+ ZFS_DELEG_PERM_USERPROP, cr));
+ if (zone_result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_USERPROP, cr));
} else {
@@ -1439,6 +1631,7 @@ zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
* objset from the zfsvfs.
*/
ZFS_TEARDOWN_EXIT(*zfvp, tag);
+ zfs_vfs_rele(*zfvp);
return (SET_ERROR(EBUSY));
}
}
@@ -1468,6 +1661,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
dsl_crypto_params_t *dcp = NULL;
const char *spa_name = zc->zc_name;
boolean_t unload_wkey = B_TRUE;
+ nvlist_t *errinfo = NULL;
if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
zc->zc_iflags, &config)))
@@ -1519,7 +1713,16 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
spa_name = tname;
}
- error = spa_create(zc->zc_name, config, props, zplprops, dcp);
+ error = spa_create(zc->zc_name, config, props, zplprops, dcp,
+ &errinfo);
+ if (errinfo != NULL) {
+ nvlist_t *outnv = fnvlist_alloc();
+ fnvlist_add_nvlist(outnv,
+ ZPOOL_CONFIG_CREATE_INFO, errinfo);
+ (void) put_nvlist(zc, outnv);
+ nvlist_free(outnv);
+ nvlist_free(errinfo);
+ }
/*
* Set the remaining root properties
@@ -2707,6 +2910,28 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
zfsvfs_rele(zfsvfs, FTAG);
break;
}
+ case ZFS_PROP_ZONED_UID:
+ {
+ uint64_t old_uid = 0;
+ (void) dsl_prop_get(dsname, "zoned_uid", 8, 1, &old_uid, NULL);
+ if (old_uid != 0)
+ (void) zone_dataset_detach_uid(CRED(), dsname,
+ (uid_t)old_uid);
+ if (intval != 0) {
+ err = zone_dataset_attach_uid(CRED(), dsname,
+ (uid_t)intval);
+ if (err == ENXIO)
+ err = ZFS_ERR_NO_USER_NS_SUPPORT;
+ if (err != 0)
+ break;
+ }
+ /*
+ * Set err to -1 to force the zfs_set_prop_nvlist code down the
+ * default path to set the value in the nvlist.
+ */
+ err = -1;
+ break;
+ }
default:
err = -1;
}
@@ -3850,8 +4075,20 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
*/
if (!nvlist_empty(props)) {
*cp = '\0';
- error = zfs_secpolicy_write_perms(name,
- ZFS_DELEG_PERM_USERPROP, CRED());
+ zone_admin_result_t zone_result;
+ zone_result = zone_dataset_admin_check(name,
+ ZONE_OP_SETPROP, NULL);
+ if (zone_result == ZONE_ADMIN_DENIED) {
+ *cp = '@';
+ return (SET_ERROR(EPERM));
+ }
+ if (zone_result == ZONE_ADMIN_ALLOWED) {
+ error = zfs_secpolicy_zoned_uid_deleg(name,
+ ZFS_DELEG_PERM_USERPROP, CRED());
+ } else {
+ error = zfs_secpolicy_write_perms(name,
+ ZFS_DELEG_PERM_USERPROP, CRED());
+ }
*cp = '@';
if (error != 0)
return (error);
@@ -4333,6 +4570,14 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
if (strchr(zc->zc_name, '@')) {
err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
} else {
+ /*
+ * Save zoned_uid before destroying so we can clean up
+ * kernel-side zone tracking after a successful destroy.
+ */
+ uint64_t zoned_uid = 0;
+ (void) dsl_prop_get(zc->zc_name, "zoned_uid",
+ 8, 1, &zoned_uid, NULL);
+
err = dsl_destroy_head(zc->zc_name);
if (err == EEXIST) {
/*
@@ -4362,6 +4607,11 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
else if (err == ENOENT)
err = SET_ERROR(EEXIST);
}
+
+ if (err == 0 && zoned_uid != 0) {
+ (void) zone_dataset_detach_uid(kcred,
+ zc->zc_name, (uid_t)zoned_uid);
+ }
}
return (err);
@@ -4859,7 +5109,24 @@ zfs_ioc_rename(zfs_cmd_t *zc)
return (error);
} else {
- return (dsl_dir_rename(zc->zc_name, zc->zc_value));
+ /*
+ * For dataset renames, update kernel-side zone tracking
+ * if the dataset has a zoned_uid delegation. Read the
+ * property before rename, then detach old / attach new.
+ */
+ uint64_t zoned_uid = 0;
+ (void) dsl_prop_get(zc->zc_name, "zoned_uid",
+ 8, 1, &zoned_uid, NULL);
+
+ err = dsl_dir_rename(zc->zc_name, zc->zc_value);
+
+ if (err == 0 && zoned_uid != 0) {
+ (void) zone_dataset_detach_uid(kcred,
+ zc->zc_name, (uid_t)zoned_uid);
+ (void) zone_dataset_attach_uid(kcred,
+ zc->zc_value, (uid_t)zoned_uid);
+ }
+ return (err);
}
}
@@ -4874,6 +5141,14 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
if (prop == ZPROP_USERPROP) {
if (zfs_prop_user(propname)) {
+ zone_admin_result_t zone_result;
+ zone_result = zone_dataset_admin_check(dsname,
+ ZONE_OP_SETPROP, NULL);
+ if (zone_result == ZONE_ADMIN_ALLOWED)
+ return (zfs_secpolicy_zoned_uid_deleg(dsname,
+ ZFS_DELEG_PERM_USERPROP, cr));
+ if (zone_result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
if ((err = zfs_secpolicy_write_perms(dsname,
ZFS_DELEG_PERM_USERPROP, cr)))
return (err);
@@ -4918,6 +5193,14 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
return (SET_ERROR(EINVAL));
}
+ zone_admin_result_t zone_result;
+ zone_result = zone_dataset_admin_check(dsname,
+ ZONE_OP_SETPROP, NULL);
+ if (zone_result == ZONE_ADMIN_ALLOWED)
+ return (zfs_secpolicy_zoned_uid_deleg(dsname,
+ perm, cr));
+ if (zone_result == ZONE_ADMIN_DENIED)
+ return (SET_ERROR(EPERM));
if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
return (err);
return (0);
@@ -7318,7 +7601,7 @@ zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
int ret;
uint64_t cmd = DCP_CMD_NONE;
dsl_crypto_params_t *dcp = NULL;
- nvlist_t *args = NULL, *hidden_args = NULL;
+ nvlist_t *props = NULL, *hidden_args = NULL;
if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
ret = (SET_ERROR(EINVAL));
@@ -7326,14 +7609,20 @@ zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
}
(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
- (void) nvlist_lookup_nvlist(innvl, "props", &args);
+ (void) nvlist_lookup_nvlist(innvl, "props", &props);
(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
- ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
+ ret = dsl_crypto_params_create_nvlist(cmd, props, hidden_args, &dcp);
if (ret != 0)
goto error;
- ret = spa_keystore_change_key(dsname, dcp);
+ /* The keylocation property is set from dcp->cp_keylocation. */
+ (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
+
+ if ((ret = zfs_check_userprops(props)) != 0)
+ goto error;
+
+ ret = spa_keystore_change_key(dsname, dcp, props);
if (ret != 0)
goto error;
@@ -8267,6 +8556,9 @@ zfs_kmod_init(void)
zfs_ioctl_init();
+ /* Register zoned_uid property lookup callback with SPL */
+ zone_register_zoned_uid_callback(zfs_get_zoned_uid);
+
mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
zfsdev_state_listhead.zs_minor = -1;
@@ -8305,6 +8597,10 @@ zfs_kmod_fini(void)
}
zfs_ereport_taskq_fini(); /* run before zfs_fini() on Linux */
+
+ /* Unregister zoned_uid callback before ZFS layer is torn down */
+ zone_unregister_zoned_uid_callback();
+
zfs_fini();
spa_fini();
zvol_fini();
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 08cea9156688..5c2c984c34b6 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -1662,9 +1662,11 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
/*
* If we've decided to do a repair, the write is not speculative --
- * even if the original read was.
+ * even if the original read was. Rebuild is an exception since we
+ * cannot always ensure its data integrity.
*/
- if (flags & ZIO_FLAG_IO_REPAIR)
+ if ((flags & ZIO_FLAG_IO_REPAIR) &&
+ pio->io_priority != ZIO_PRIORITY_REBUILD)
flags &= ~ZIO_FLAG_SPECULATIVE;
/*
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
index 285b194a6969..21f41c38c980 100644
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -1825,9 +1825,10 @@ zvol_rename_minors_impl(zvol_task_t *task)
if (zvol_inhibit_dev)
return;
+ last_error = 0;
oldnamelen = strlen(oldname);
- rw_enter(&zvol_state_lock, RW_READER);
+ rw_enter(&zvol_state_lock, RW_WRITER);
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
zv_next = list_next(&zvol_state_list, zv);
@@ -1844,6 +1845,8 @@ zvol_rename_minors_impl(zvol_task_t *task)
zv->zv_name + oldnamelen + 1);
error = zvol_os_rename_minor(zv, name);
kmem_strfree(name);
+ } else {
+ error = 0;
}
if (error) {
last_error = error;
@@ -1999,6 +2002,10 @@ typedef struct zvol_set_prop_int_arg {
uint64_t zsda_value;
zprop_source_t zsda_source;
zfs_prop_t zsda_prop;
+ taskqid_t zsda_taskqid;
+ boolean_t zsda_dispatched;
+ kmutex_t zsda_lock;
+ kcondvar_t zsda_cv;
} zvol_set_prop_int_arg_t;
/*
@@ -2029,6 +2036,7 @@ zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
char dsname[ZFS_MAX_DATASET_NAME_LEN];
zvol_task_t *task;
uint64_t prop;
+ taskqid_t id;
const char *prop_name = zfs_prop_to_name(zsda->zsda_prop);
dsl_dataset_name(ds, dsname);
@@ -2047,8 +2055,12 @@ zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
}
task->zt_value = prop;
strlcpy(task->zt_name1, dsname, sizeof (task->zt_name1));
- (void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
- task, TQ_SLEEP);
+ id = taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb, task,
+ TQ_SLEEP);
+ mutex_enter(&zsda->zsda_lock);
+ if (id != TASKQID_INVALID && id > zsda->zsda_taskqid)
+ zsda->zsda_taskqid = id;
+ mutex_exit(&zsda->zsda_lock);
return (0);
}
@@ -2081,6 +2093,11 @@ zvol_set_common_sync(void *arg, dmu_tx_t *tx)
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_common_sync_cb,
zsda, DS_FIND_CHILDREN);
+ mutex_enter(&zsda->zsda_lock);
+ zsda->zsda_dispatched = TRUE;
+ cv_broadcast(&zsda->zsda_cv);
+ mutex_exit(&zsda->zsda_lock);
+
dsl_dir_rele(dd, FTAG);
}
@@ -2089,14 +2106,38 @@ zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source,
uint64_t val)
{
zvol_set_prop_int_arg_t zsda;
+ spa_t *spa;
+ int error;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
zsda.zsda_value = val;
zsda.zsda_prop = prop;
+ zsda.zsda_taskqid = TASKQID_INVALID;
+ zsda.zsda_dispatched = FALSE;
+ mutex_init(&zsda.zsda_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&zsda.zsda_cv, NULL, CV_DEFAULT, NULL);
+
+ error = spa_open(ddname, &spa, FTAG);
+ if (error != 0)
+ goto out;
+ error = dsl_sync_task(ddname, zvol_set_common_check,
+ zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE);
+ if (error == 0) {
+ mutex_enter(&zsda.zsda_lock);
+ while (!zsda.zsda_dispatched)
+ cv_wait(&zsda.zsda_cv, &zsda.zsda_lock);
+ mutex_exit(&zsda.zsda_lock);
- return (dsl_sync_task(ddname, zvol_set_common_check,
- zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
+ if (zsda.zsda_taskqid != TASKQID_INVALID)
+ taskq_wait_outstanding(spa->spa_zvol_taskq,
+ zsda.zsda_taskqid);
+ }
+ spa_close(spa, FTAG);
+out:
+ cv_destroy(&zsda.zsda_cv);
+ mutex_destroy(&zsda.zsda_lock);
+ return (error);
}
void
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index fa89f9d2aef6..48ed7bf2eb79 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -1,4 +1,9 @@
+# Since Fedora 42, Fedora has unified /bin and /sbin. For this to
+# work, we must not override _sbindir
+%if 0%{?fedora} >= 42
+%else
%global _sbindir /sbin
+%endif
%global _libdir /%{_lib}
# Set the default udev directory based on distribution.
@@ -393,6 +398,7 @@ support for unlocking datasets on user login.
--with-pammoduledir=%{_libdir}/security \
--with-python=%{__python} \
--with-pkgconfigdir=%{_pkgconfigdir} \
+ --with-mounthelperdir=%{_sbindir} \
--disable-static \
%{debug} \
%{debuginfo} \
diff --git a/sys/contrib/openzfs/scripts/spdxcheck.pl b/sys/contrib/openzfs/scripts/spdxcheck.pl
index 8e40cee835cb..b71caaad11ad 100755
--- a/sys/contrib/openzfs/scripts/spdxcheck.pl
+++ b/sys/contrib/openzfs/scripts/spdxcheck.pl
@@ -147,7 +147,6 @@ my $untagged_patterns = q(
contrib/bpftrace/zfs-trace.sh
contrib/pyzfs/docs/source/conf.py
contrib/pyzfs/libzfs_core/test/__init__.py
- contrib/pyzfs/setup.py.in
contrib/zcp/autosnap.lua
scripts/commitcheck.sh
scripts/man-dates.sh
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index f22f3c759e9b..243d28e8bc49 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -50,7 +50,8 @@ tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos',
tags = ['functional', 'arc']
[tests/functional/atime]
-tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on']
+tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos', 'root_atime_off',
+ 'root_atime_on', 'root_relatime_on']
tags = ['functional', 'atime']
[tests/functional/bclone]
@@ -83,7 +84,8 @@ tests = ['block_cloning_clone_mmap_cached',
'block_cloning_replay', 'block_cloning_replay_encrypted',
'block_cloning_lwb_buffer_overflow', 'block_cloning_clone_mmap_write',
'block_cloning_rlimit_fsize', 'block_cloning_large_offset',
- 'block_cloning_after_device_removal']
+ 'block_cloning_after_device_removal',
+ 'block_cloning_after_trunc']
tags = ['functional', 'block_cloning']
[tests/functional/bootfs]
@@ -189,7 +191,8 @@ tags = ['functional', 'cli_root', 'zfs_bookmark']
[tests/functional/cli_root/zfs_change-key]
tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
- 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones']
+ 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones',
+ 'zfs_change-key_userprop']
tags = ['functional', 'cli_root', 'zfs_change-key']
[tests/functional/cli_root/zfs_clone]
@@ -424,12 +427,13 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
'zpool_create_encrypted', 'zpool_create_edom_neg', 'zpool_create_crypt_combos',
'zpool_create_draid_001_pos', 'zpool_create_draid_002_pos',
'zpool_create_draid_003_pos', 'zpool_create_draid_004_pos',
+ 'zpool_create_draid_005_pos',
'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
'zpool_create_features_005_pos', 'zpool_create_features_006_pos',
'zpool_create_features_007_pos', 'zpool_create_features_008_pos',
'zpool_create_features_009_pos', 'create-o_ashift',
- 'zpool_create_tempname', 'zpool_create_dryrun_output']
+ 'zpool_create_tempname', 'zpool_create_errinfo_001_neg', 'zpool_create_dryrun_output']
tags = ['functional', 'cli_root', 'zpool_create']
[tests/functional/cli_root/zpool_destroy]
@@ -525,7 +529,7 @@ tags = ['functional', 'cli_root', 'zpool_initialize']
[tests/functional/cli_root/zpool_offline]
tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
- 'zpool_offline_003_pos']
+ 'zpool_offline_003_pos', 'zpool_offline_spare']
tags = ['functional', 'cli_root', 'zpool_offline']
[tests/functional/cli_root/zpool_online]
@@ -738,7 +742,7 @@ tests = ['exec_001_pos', 'exec_002_neg']
tags = ['functional', 'exec']
[tests/functional/fadvise]
-tests = ['fadvise_willneed']
+tests = ['fadvise_dontneed', 'fadvise_willneed']
tags = ['functional', 'fadvise']
[tests/functional/failmode]
@@ -914,9 +918,11 @@ timeout = 1200
[tests/functional/redundancy]
tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2',
- 'redundancy_draid3', 'redundancy_draid_damaged1',
- 'redundancy_draid_damaged2', 'redundancy_draid_spare1',
- 'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror',
+ 'redundancy_draid3', 'redundancy_draid_width', 'redundancy_draid_damaged1',
+ 'redundancy_draid_damaged2', 'redundancy_draid_degraded1',
+ 'redundancy_draid_degraded2',
+ 'redundancy_draid_spare1', 'redundancy_draid_spare2',
+ 'redundancy_draid_spare3', 'redundancy_draid_spare4', 'redundancy_mirror',
'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2',
'redundancy_raidz3', 'redundancy_stripe']
tags = ['functional', 'redundancy']
@@ -1102,6 +1108,22 @@ tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos',
'xattr_compat']
tags = ['functional', 'xattr']
+[tests/functional/zoned_uid:Linux]
+tests = ['zoned_uid_001_pos', 'zoned_uid_002_pos', 'zoned_uid_003_pos',
+ 'zoned_uid_004_pos', 'zoned_uid_005_neg', 'zoned_uid_006_pos',
+ 'zoned_uid_007_pos', 'zoned_uid_008_pos', 'zoned_uid_009_pos',
+ 'zoned_uid_010_pos', 'zoned_uid_011_neg', 'zoned_uid_012_pos',
+ 'zoned_uid_013_pos', 'zoned_uid_014_pos',
+ 'zoned_uid_015_pos', 'zoned_uid_016_pos', 'zoned_uid_017_neg',
+ 'zoned_uid_018_pos', 'zoned_uid_019_neg', 'zoned_uid_020_neg',
+ 'zoned_uid_021_neg', 'zoned_uid_022_neg',
+ 'zoned_uid_030_pos',
+ 'zoned_uid_023_pos', 'zoned_uid_024_neg',
+ 'zoned_uid_025_pos', 'zoned_uid_026_pos',
+ 'zoned_uid_027_pos', 'zoned_uid_028_neg',
+ 'zoned_uid_029_neg', 'zoned_uid_031_pos']
+tags = ['functional', 'zoned_uid']
+
[tests/functional/zvol/zvol_ENOSPC]
tests = ['zvol_ENOSPC_001_pos']
tags = ['functional', 'zvol', 'zvol_ENOSPC']
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index 2717bf53d0b1..11bda60a9caf 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -30,10 +30,6 @@ tags = ['functional', 'acl', 'posix']
tests = ['posix_001_pos', 'posix_002_pos', 'posix_003_pos', 'posix_004_pos']
tags = ['functional', 'acl', 'posix-sa']
-[tests/functional/atime:Linux]
-tests = ['atime_003_pos', 'root_relatime_on']
-tags = ['functional', 'atime']
-
[tests/functional/block_cloning:Linux]
tests = ['block_cloning_ficlone', 'block_cloning_ficlonerange',
'block_cloning_ficlonerange_partial', 'block_cloning_disabled_ficlone',
@@ -124,7 +120,8 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'auto_spare_002_pos', 'auto_spare_double', 'auto_spare_multiple',
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
'decompress_fault', 'fault_limits', 'scrub_after_resilver',
- 'suspend_on_probe_errors', 'suspend_resume_single', 'zpool_status_-s']
+ 'suspend_on_probe_errors', 'suspend_resume_single', 'suspend_draid_fgroups',
+ 'zpool_status_-s']
tags = ['functional', 'fault']
[tests/functional/features/large_dnode:Linux]
diff --git a/sys/contrib/openzfs/tests/runfiles/sanity.run b/sys/contrib/openzfs/tests/runfiles/sanity.run
index 9b49f63c5a90..ca16bee67dda 100644
--- a/sys/contrib/openzfs/tests/runfiles/sanity.run
+++ b/sys/contrib/openzfs/tests/runfiles/sanity.run
@@ -104,7 +104,8 @@ tags = ['functional', 'cli_root', 'zfs_bookmark']
[tests/functional/cli_root/zfs_change-key]
tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
- 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones']
+ 'zfs_change-key_pbkdf2iters', 'zfs_change-key_clones',
+ 'zfs_change-key_userprop']
tags = ['functional', 'cli_root', 'zfs_change-key']
[tests/functional/cli_root/zfs_clone]
@@ -323,7 +324,8 @@ pre =
tags = ['functional', 'cli_root', 'zpool_initialize']
[tests/functional/cli_root/zpool_offline]
-tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg']
+tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
+ 'zpool_offline_spare']
tags = ['functional', 'cli_root', 'zpool_offline']
[tests/functional/cli_root/zpool_online]
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index a8251c511ac4..874a23a87574 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -172,6 +172,7 @@ if sys.platform.startswith('freebsd'):
['FAIL', known_reason],
'cli_root/zpool_resilver/zpool_resilver_concurrent':
['SKIP', na_reason],
+ 'zoned_uid/setup': ['SKIP', na_reason],
'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
@@ -179,7 +180,6 @@ if sys.platform.startswith('freebsd'):
'cp_files/cp_files_002_pos': ['SKIP', na_reason],
'link_count/link_count_001': ['SKIP', na_reason],
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
- 'rsend/send_raw_ashift': ['SKIP', 14961],
})
elif sys.platform.startswith('linux'):
known.update({
@@ -253,8 +253,6 @@ maybe = {
'projectquota/setup': ['SKIP', exec_reason],
'raidz/raidz_002_pos': ['FAIL', known_reason],
'raidz/raidz_expand_001_pos': ['FAIL', 16421],
- 'redundancy/redundancy_draid_spare1': ['FAIL', 18307],
- 'redundancy/redundancy_draid_spare3': ['FAIL', 18319],
'removal/removal_condense_export': ['FAIL', known_reason],
'renameat2/setup': ['SKIP', renameat2_reason],
'reservation/reservation_008_pos': ['FAIL', 7741],
@@ -322,6 +320,7 @@ elif sys.platform.startswith('linux'):
'bclone/bclone_samefs_data': ['SKIP', cfr_reason],
'bclone/bclone_samefs_embedded': ['SKIP', cfr_reason],
'bclone/bclone_samefs_hole': ['SKIP', cfr_reason],
+ 'block_cloning/block_cloning_after_trunc': ['SKIP', cfr_reason],
'block_cloning/block_cloning_clone_mmap_cached': ['SKIP', cfr_reason],
'block_cloning/block_cloning_clone_mmap_write':
['SKIP', cfr_reason],
@@ -368,6 +367,7 @@ elif sys.platform.startswith('linux'):
'limits/filesystem_limit': ['SKIP', known_reason],
'limits/snapshot_limit': ['SKIP', known_reason],
'stat/statx_dioalign': ['SKIP', 'statx_reason'],
+ 'zoned_uid/setup': ['SKIP', user_ns_reason],
})
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore
index 335e4ceba282..4bdca0acf52b 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore
@@ -2,6 +2,7 @@
/btree_test
/chg_usr_exec
/clonefile
+/clone_after_trunc
/clone_mmap_cached
/clone_mmap_write
/crypto_test
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
index 9683834f8e92..c4155ca3cacd 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
@@ -34,6 +34,8 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/crypto_test
%C%_crypto_test_SOURCES = %D%/crypto_test.c
%C%_crypto_test_LDADD = libzpool.la
+scripts_zfs_tests_bin_PROGRAMS += %D%/clone_after_trunc
+%C%_clone_after_trunc_LDADD = -lpthread
if WANT_DEVNAME2DEVID
scripts_zfs_tests_bin_PROGRAMS += %D%/devname2devid
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_after_trunc.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_after_trunc.c
new file mode 100644
index 000000000000..631432928984
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_after_trunc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: CDDL-1.0
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#if defined(_GNU_SOURCE) && defined(__linux__)
+_Static_assert(sizeof (loff_t) == sizeof (off_t),
+ "loff_t and off_t must be the same size");
+#endif
+
+ssize_t
+copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)
+ __attribute__((weak));
+
+#define FILE_SIZE (1024 * 1024)
+#define RECORD_SIZE (128 * 1024)
+#define NUM_THREADS 64
+
+const char *dir;
+volatile int failed;
+
+static void *
+run_test(void *arg)
+{
+ int thread_id = (int)(long)arg;
+
+ char src_path[PATH_MAX], dst_path[PATH_MAX];
+ snprintf(src_path, PATH_MAX, "%s/src-%d", dir, thread_id);
+ snprintf(dst_path, PATH_MAX, "%s/dst-%d", dir, thread_id);
+
+ unsigned char *write_buf = malloc(FILE_SIZE);
+ unsigned char *read_buf = malloc(FILE_SIZE);
+
+ // Write out expected data.
+ memset(write_buf, 0xAA, FILE_SIZE);
+ int src = open(src_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (write(src, write_buf, FILE_SIZE) != FILE_SIZE)
+ perror("write");
+ close(src);
+
+ // Create destination file so we exercise O_TRUNC.
+ int dst = open(dst_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (write(dst, write_buf, FILE_SIZE) != FILE_SIZE)
+ perror("write");
+ fsync(dst);
+ close(dst);
+
+ // Open file with O_TRUNC and perform copy.
+ src = open(src_path, O_RDONLY);
+ dst = open(dst_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+
+ off_t off_in = 0, off_out = 0;
+ ssize_t ret =
+ copy_file_range(src, &off_in, dst, &off_out, FILE_SIZE, 0);
+ if (ret != FILE_SIZE)
+ perror("copy_file_range");
+ close(src);
+ close(dst);
+
+ // Read back
+ dst = open(dst_path, O_RDONLY);
+ if (read(dst, read_buf, FILE_SIZE) != FILE_SIZE)
+ perror("read");
+ close(dst);
+
+ // Bug check
+ if (memcmp(write_buf, read_buf, FILE_SIZE) != 0) {
+ failed = 1;
+ fprintf(stderr, "[%d]: FAIL\n", thread_id);
+
+ int all_zeros = 1;
+ for (int i = 0; i < RECORD_SIZE; i++) {
+ if (read_buf[i] != 0) {
+ all_zeros = 0;
+ break;
+ }
+ }
+
+ if (all_zeros) {
+ fprintf(stderr, "[%d]: ALL ZERO\n", thread_id);
+ }
+ }
+
+ unlink(src_path);
+ unlink(dst_path);
+ free(write_buf);
+ free(read_buf);
+ return (NULL);
+}
+
+int
+main(int argc, const char **argv)
+{
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <dir>\n", argv[0]);
+ return (1);
+ }
+ dir = argv[1];
+
+ pthread_t threads[NUM_THREADS];
+
+ for (int i = 0; i < NUM_THREADS; i++) {
+ pthread_create(&threads[i], NULL, run_test, (void *)(long)i);
+ }
+ for (int i = 0; i < NUM_THREADS; i++) {
+ pthread_join(threads[i], NULL);
+ }
+
+ return (failed);
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
index 4ba9aa7c8b67..a52cacec224a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg
@@ -129,6 +129,7 @@ export SYSTEM_FILES_LINUX='attr
blkid
blkdiscard
blockdev
+ capsh
chattr
cryptsetup
exportfs
@@ -186,6 +187,7 @@ export ZFSTEST_FILES_COMMON='badsend
btree_test
chg_usr_exec
clonefile
+ clone_after_trunc
clone_mmap_cached
clone_mmap_write
crypto_test
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index fdf211877e62..cf04950a9612 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -393,6 +393,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
functional/vdev_zaps/vdev_zaps.kshlib \
functional/xattr/xattr.cfg \
functional/xattr/xattr_common.kshlib \
+ functional/zoned_uid/zoned_uid.cfg \
+ functional/zoned_uid/zoned_uid_common.kshlib \
functional/zvol/zvol.cfg \
functional/zvol/zvol_cli/zvol_cli.cfg \
functional/zvol/zvol_common.shlib \
@@ -493,6 +495,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/block_cloning/block_cloning_rlimit_fsize.ksh \
functional/block_cloning/block_cloning_large_offset.ksh \
functional/block_cloning/block_cloning_after_device_removal.ksh \
+ functional/block_cloning/block_cloning_after_trunc.ksh \
functional/bootfs/bootfs_001_pos.ksh \
functional/bootfs/bootfs_002_neg.ksh \
functional/bootfs/bootfs_003_pos.ksh \
@@ -664,6 +667,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zfs_change-key/zfs_change-key_load.ksh \
functional/cli_root/zfs_change-key/zfs_change-key_location.ksh \
functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh \
+ functional/cli_root/zfs_change-key/zfs_change-key_userprop.ksh \
functional/cli_root/zfs/cleanup.ksh \
functional/cli_root/zfs_clone/cleanup.ksh \
functional/cli_root/zfs_clone/setup.ksh \
@@ -1083,6 +1087,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_create/zpool_create_draid_002_pos.ksh \
functional/cli_root/zpool_create/zpool_create_draid_003_pos.ksh \
functional/cli_root/zpool_create/zpool_create_draid_004_pos.ksh \
+ functional/cli_root/zpool_create/zpool_create_draid_005_pos.ksh \
functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh \
functional/cli_root/zpool_create/zpool_create_encrypted.ksh \
functional/cli_root/zpool_create/zpool_create_edom_neg.ksh \
@@ -1096,6 +1101,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_create/zpool_create_features_008_pos.ksh \
functional/cli_root/zpool_create/zpool_create_features_009_pos.ksh \
functional/cli_root/zpool_create/zpool_create_tempname.ksh \
+ functional/cli_root/zpool_create/zpool_create_errinfo_001_neg.ksh \
functional/cli_root/zpool_destroy/zpool_destroy_001_pos.ksh \
functional/cli_root/zpool_destroy/zpool_destroy_002_pos.ksh \
functional/cli_root/zpool_destroy/zpool_destroy_003_neg.ksh \
@@ -1220,6 +1226,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh \
functional/cli_root/zpool_offline/zpool_offline_002_neg.ksh \
functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh \
+ functional/cli_root/zpool_offline/zpool_offline_spare.ksh \
functional/cli_root/zpool_online/cleanup.ksh \
functional/cli_root/zpool_online/setup.ksh \
functional/cli_root/zpool_online/zpool_online_001_pos.ksh \
@@ -1570,6 +1577,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/exec/exec_002_neg.ksh \
functional/exec/setup.ksh \
functional/fadvise/cleanup.ksh \
+ functional/fadvise/fadvise_dontneed.ksh \
functional/fadvise/fadvise_willneed.ksh \
functional/fadvise/setup.ksh \
functional/failmode/cleanup.ksh \
@@ -1607,6 +1615,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/fault/scrub_after_resilver.ksh \
functional/fault/suspend_on_probe_errors.ksh \
functional/fault/suspend_resume_single.ksh \
+ functional/fault/suspend_draid_fgroups.ksh \
functional/fault/setup.ksh \
functional/fault/zpool_status_-s.ksh \
functional/features/async_destroy/async_destroy_001_pos.ksh \
@@ -1900,12 +1909,16 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/redundancy/redundancy_draid1.ksh \
functional/redundancy/redundancy_draid2.ksh \
functional/redundancy/redundancy_draid3.ksh \
+ functional/redundancy/redundancy_draid_width.ksh \
functional/redundancy/redundancy_draid_damaged1.ksh \
functional/redundancy/redundancy_draid_damaged2.ksh \
+ functional/redundancy/redundancy_draid_degraded1.ksh \
+ functional/redundancy/redundancy_draid_degraded2.ksh \
functional/redundancy/redundancy_draid.ksh \
functional/redundancy/redundancy_draid_spare1.ksh \
functional/redundancy/redundancy_draid_spare2.ksh \
functional/redundancy/redundancy_draid_spare3.ksh \
+ functional/redundancy/redundancy_draid_spare4.ksh \
functional/redundancy/redundancy_mirror.ksh \
functional/redundancy/redundancy_raidz1.ksh \
functional/redundancy/redundancy_raidz2.ksh \
@@ -2266,6 +2279,39 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/xattr/xattr_013_pos.ksh \
functional/xattr/xattr_014_pos.ksh \
functional/xattr/xattr_compat.ksh \
+ functional/zoned_uid/cleanup.ksh \
+ functional/zoned_uid/setup.ksh \
+ functional/zoned_uid/zoned_uid_001_pos.ksh \
+ functional/zoned_uid/zoned_uid_002_pos.ksh \
+ functional/zoned_uid/zoned_uid_003_pos.ksh \
+ functional/zoned_uid/zoned_uid_004_pos.ksh \
+ functional/zoned_uid/zoned_uid_005_neg.ksh \
+ functional/zoned_uid/zoned_uid_006_pos.ksh \
+ functional/zoned_uid/zoned_uid_007_pos.ksh \
+ functional/zoned_uid/zoned_uid_008_pos.ksh \
+ functional/zoned_uid/zoned_uid_009_pos.ksh \
+ functional/zoned_uid/zoned_uid_010_pos.ksh \
+ functional/zoned_uid/zoned_uid_011_neg.ksh \
+ functional/zoned_uid/zoned_uid_012_pos.ksh \
+ functional/zoned_uid/zoned_uid_013_pos.ksh \
+ functional/zoned_uid/zoned_uid_014_pos.ksh \
+ functional/zoned_uid/zoned_uid_015_pos.ksh \
+ functional/zoned_uid/zoned_uid_016_pos.ksh \
+ functional/zoned_uid/zoned_uid_017_neg.ksh \
+ functional/zoned_uid/zoned_uid_018_pos.ksh \
+ functional/zoned_uid/zoned_uid_019_neg.ksh \
+ functional/zoned_uid/zoned_uid_020_neg.ksh \
+ functional/zoned_uid/zoned_uid_021_neg.ksh \
+ functional/zoned_uid/zoned_uid_022_neg.ksh \
+ functional/zoned_uid/zoned_uid_023_pos.ksh \
+ functional/zoned_uid/zoned_uid_024_neg.ksh \
+ functional/zoned_uid/zoned_uid_025_pos.ksh \
+ functional/zoned_uid/zoned_uid_026_pos.ksh \
+ functional/zoned_uid/zoned_uid_027_pos.ksh \
+ functional/zoned_uid/zoned_uid_028_neg.ksh \
+ functional/zoned_uid/zoned_uid_029_neg.ksh \
+ functional/zoned_uid/zoned_uid_030_pos.ksh \
+ functional/zoned_uid/zoned_uid_031_pos.ksh \
functional/zap_shrink/cleanup.ksh \
functional/zap_shrink/zap_shrink_001_pos.ksh \
functional/zap_shrink/setup.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_trunc.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_trunc.ksh
new file mode 100755
index 000000000000..977ec16042b7
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_trunc.ksh
@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
+
+#
+# DESCRIPTION:
+# When a block is truncated and then cloned to, a read data corruption can occur.
+# This is a regression test for #18412.
+#
+
+verify_runnable "global"
+
+claim="No read data corruption when cloning blocks after a truncate"
+
+function cleanup
+{
+ datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_onexit cleanup
+
+log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
+
+# Run for a few times to increase the likelihood of bug triggering.
+for i in {0..50}; do
+ log_must clone_after_trunc /$TESTPOOL/
+done
+
+log_pass $claim
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_userprop.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_userprop.ksh
new file mode 100755
index 000000000000..0f6709693bb5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_userprop.ksh
@@ -0,0 +1,72 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Oxide Computer Company
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key -o user:prop=val' should set a user property while changing
+# or inheriting the key.
+#
+# STRATEGY:
+# 1. Create a parent encrypted dataset
+# 2. Create a child dataset as an encryption root
+# 3. Change parent key while setting a user property
+# 4. Verify the user property is set on the parent
+# 5. Make the child inherit the parent's key while setting a user property
+# 6. Verify the user property is set on the child
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key -o user:prop=value' should set a user property"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt" \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_must verify_encryption_root $TESTPOOL/$TESTFS1/child \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_must eval "echo $PASSPHRASE2 | zfs change-key -o user:prop=parentvalue" \
+ "$TESTPOOL/$TESTFS1"
+log_must eval "zfs get -H -o value user:prop $TESTPOOL/$TESTFS1 | \
+ grep -q parentvalue"
+
+log_must zfs change-key -i -o user:prop=abcd -o user:prop2=efgh \
+ $TESTPOOL/$TESTFS1/child
+log_must verify_encryption_root $TESTPOOL/$TESTFS1/child "$TESTPOOL/$TESTFS1"
+log_must eval "zfs get -H -o value user:prop $TESTPOOL/$TESTFS1/child | \
+ grep -q abcd"
+log_must eval "zfs get -H -o value user:prop2 $TESTPOOL/$TESTFS1/child | \
+ grep -q efgh"
+
+log_pass "'zfs change-key -o user:prop=value' sets a user property"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_005_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_005_pos.ksh
new file mode 100755
index 000000000000..b6115e5c5e36
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_draid_005_pos.ksh
@@ -0,0 +1,149 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020 Lawrence Livermore National Security, LLC.
+# Copyright (c) 2026 Seagate Technology, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify creation of several failure groups in one big width row.
+#
+# STRATEGY:
+# 1) Test valid stripe/spare/children/width combinations.
+# 2) Test invalid stripe/spare/children/width combinations outside the
+# allowed limits.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ poolexists $TESTPOOL && destroy_pool $TESTPOOL
+
+ rm -f $draid_vdevs
+ rmdir $TESTDIR
+}
+
+log_assert "'zpool create <pool> draid:#d:#c:#w:#s <vdevs>'"
+
+log_onexit cleanup
+
+mkdir $TESTDIR
+
+# Generate 10 random valid configurations to test.
+for (( i = 0; i < 10; i++ )); do
+ parity=$(random_int_between 1 3)
+ spares=$(random_int_between 0 3)
+ data=$(random_int_between 1 10)
+ n=$(random_int_between 2 4)
+
+ (( min_children = (data + parity + spares) ))
+ (( max_children = 64 / n ))
+ children=$(random_int_between $min_children $max_children)
+ (( width = (children * n) ))
+ (( spares *= n ))
+
+ draid="draid${parity}:${data}d:${children}c:${width}w:${spares}s"
+
+ draid_vdevs=$(echo $TESTDIR/file.{1..$width})
+ log_must truncate -s $MINVDEVSIZE $draid_vdevs
+
+ log_must zpool create $TESTPOOL $draid $draid_vdevs
+ log_must poolexists $TESTPOOL
+ destroy_pool $TESTPOOL
+
+ # create the same pool with fgroup keywords
+ draid_fgrp_vdevs=""
+ for (( g = 0; g < n; g++ )); do
+ draid_fgrp_vdevs+="fgroup "
+ for (( c = 0; c < children; c++ )); do
+ draid_fgrp_vdevs+="$TESTDIR/file.$((c + (g * children) + 1)) "
+ done
+ done
+
+ log_must zpool create $TESTPOOL $draid $draid_fgrp_vdevs
+ log_must poolexists $TESTPOOL
+ destroy_pool $TESTPOOL
+
+ # create the same pool with fdomain keywords
+ draid_fdom_vdevs=""
+ for (( c = 0; c < children; c++ )); do
+ draid_fdom_vdevs+="fdomain "
+ for (( g = 0; g < n; g++ )); do
+ draid_fdom_vdevs+="$TESTDIR/file.$((c + (g * children) + 1)) "
+ done
+ done
+
+ log_must zpool create $TESTPOOL $draid $draid_fgrp_vdevs
+ log_must poolexists $TESTPOOL
+ destroy_pool $TESTPOOL
+
+ rm -f $draid_vdevs
+done
+
+children=32
+draid_vdevs=$(echo $TESTDIR/file.{1..$children})
+draid_vdevs0=$(echo $TESTDIR/file.{1..$((children / 2))})
+draid_vdevs1=$(echo $TESTDIR/file.{$((children / 2 + 1))..$children})
+draid_vdevs0_less=$(echo $TESTDIR/file.{1..$((children / 2 - 1))})
+draid_vdevs1_more=$(echo $TESTDIR/file.{$((children / 2))..$children})
+log_must truncate -s $MINVDEVSIZE $draid_vdevs
+
+mkdir $TESTDIR
+log_must truncate -s $MINVDEVSIZE $draid_vdevs
+
+# Exceeds maximum data disks (limited by total children)
+log_must zpool create $TESTPOOL draid2:14d:32w $draid_vdevs
+log_must destroy_pool $TESTPOOL
+log_mustnot zpool create $TESTPOOL draid2:14d:33w $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid2:14d:31w $draid_vdevs
+
+# One fdomain or fgroup keyword is not enough
+log_mustnot zpool create $TESTPOOL draid2:14d:32w fdomain $draid_vdevs
+log_mustnot zpool create $TESTPOOL draid2:14d:32w fgroup $draid_vdevs
+
+# The number of devices should be equal after each fdomain or fgroup
+log_mustnot zpool create $TESTPOOL draid2:14d:32w fdomain $draid_vdevs0_less fdomain $draid_vdevs1_more
+log_mustnot zpool create $TESTPOOL draid2:14d:32w fgroup $draid_vdevs0_less fgroup $draid_vdevs1_more
+
+# Keywords cannot be mixed
+log_mustnot zpool create $TESTPOOL draid2:14d:32w fdomain $draid_vdevs0 fgroup $draid_vdevs1
+
+# Failure groups and domains can be inferred from keywords
+log_must zpool create $TESTPOOL draid2:14d fgroup $draid_vdevs0 fgroup $draid_vdevs1
+log_must poolexists $TESTPOOL
+log_must test "$(get_vdev_prop failure_group $TESTPOOL draid2:14d:16c:32w-0)" == "-"
+log_must destroy_pool $TESTPOOL
+log_must zpool create $TESTPOOL draid1 fdomain $draid_vdevs0 fdomain $draid_vdevs1
+log_must poolexists $TESTPOOL
+log_must test "$(get_vdev_prop failure_domain $TESTPOOL draid1:1d:2c:32w-0)" == "-"
+log_must destroy_pool $TESTPOOL
+
+# Width matches vdevs, but it must be multiple of children
+log_mustnot zpool create $TESTPOOL draid2:13d:15c:32w $draid_vdevs
+
+log_pass "'zpool create <pool> draid:#d:#c:#w:#s <vdevs>'"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_errinfo_001_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_errinfo_001_neg.ksh
new file mode 100755
index 000000000000..1c11aec6399e
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_errinfo_001_neg.ksh
@@ -0,0 +1,103 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026, Christos Longros. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# 'zpool create' should report which device is in use when it fails
+# because a vdev belongs to an active pool.
+#
+# STRATEGY:
+# 1. Create a backing file for two block devices.
+# 2. Attach two block devices to the same file.
+# 3. Attempt to create a mirror pool using both devices.
+# 4. Verify the error message identifies the specific device.
+# 5. Verify the error message names the active pool.
+#
+
+verify_runnable "global"
+
+TESTFILE="$TEST_BASE_DIR/vdev_errinfo"
+TESTPOOL2="testpool_errinfo"
+BLKDEV1=""
+BLKDEV2=""
+
+function cleanup
+{
+ destroy_pool $TESTPOOL2
+ destroy_pool $TESTPOOL
+
+ if is_linux; then
+ [[ -n "$BLKDEV1" ]] && losetup -d "$BLKDEV1" 2>/dev/null
+ [[ -n "$BLKDEV2" ]] && losetup -d "$BLKDEV2" 2>/dev/null
+ elif is_freebsd; then
+ [[ -n "$BLKDEV1" ]] && mdconfig -d -u "$BLKDEV1" 2>/dev/null
+ [[ -n "$BLKDEV2" ]] && mdconfig -d -u "$BLKDEV2" 2>/dev/null
+ fi
+
+ rm -f "$TESTFILE"
+}
+
+log_assert "'zpool create' reports device-specific errors for in-use vdevs."
+log_onexit cleanup
+
+# Create a file to back the block devices
+log_must truncate -s $MINVDEVSIZE "$TESTFILE"
+
+# Attach two block devices to the same file (platform-specific)
+if is_linux; then
+ BLKDEV1=$(losetup -f --show "$TESTFILE")
+ BLKDEV2=$(losetup -f --show "$TESTFILE")
+elif is_freebsd; then
+ BLKDEV1=/dev/$(mdconfig -a -t vnode -f "$TESTFILE")
+ BLKDEV2=/dev/$(mdconfig -a -t vnode -f "$TESTFILE")
+else
+ log_unsupported "Platform not supported for this test"
+fi
+
+log_note "Using devices: $BLKDEV1 $BLKDEV2"
+
+# Attempt to create a mirror pool; this should fail because both
+# devices refer to the same underlying file.
+log_mustnot zpool create $TESTPOOL2 mirror $BLKDEV1 $BLKDEV2
+
+# Re-run to capture the error message for content verification
+errmsg=$(zpool create $TESTPOOL2 mirror $BLKDEV1 $BLKDEV2 2>&1)
+log_note "zpool create output: $errmsg"
+
+# Error message should name one of the devices
+log_must eval "echo '$errmsg' | grep -qE '$BLKDEV1|$BLKDEV2'"
+
+# Error message should name the active pool
+if echo "$errmsg" | grep -q "active pool"; then
+ log_note "Error message correctly identifies the active pool"
+else
+ log_fail "Error message does not mention the active pool: $errmsg"
+fi
+
+log_pass "'zpool create' reports device-specific errors for in-use vdevs."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
index f59104e19805..79992227169e 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
@@ -72,6 +72,8 @@ typeset -a properties=(
io_n
io_t
slow_io_events
+ failure_domain
+ failure_group
slow_io_n
slow_io_t
trim_support
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index dcb2b92cc0e1..a68a2a4995cc 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
@@ -149,6 +149,7 @@ typeset -a properties=(
"feature@log_spacemap"
"feature@device_rebuild"
"feature@draid"
+ "feature@draid_failure_domains"
"feature@redaction_list_spill"
"feature@dynamic_gang_header"
"feature@physical_rewrite"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh
new file mode 100755
index 000000000000..cd7776f00aba
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_offline/zpool_offline_spare.ksh
@@ -0,0 +1,84 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright 2026 by Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that traditional spares that are active can be offlined or
+# force-faulted. Verify that in all other cases, spares cannot be
+# offlined or faulted.
+#
+# STRATEGY:
+# 1. Create pool with traditional spare
+# 2. Verify we can't offline and fault an inactive traditional spare
+# 3. Verify we can offline and fault an active traditional spare
+# 4. Create draid pool with draid spare
+# 5. Verify we can't offline/fault draid spare
+
+TESTPOOL2=testpool2
+function cleanup
+{
+ destroy_pool $TESTPOOL2
+ log_must rm -f $TESTDIR/file-vdev-{1..3}
+}
+
+log_onexit cleanup
+verify_runnable "global"
+
+log_assert "Verify zpool offline has the correct behavior on spares"
+
+# Verify any old file vdevs are gone
+log_mustnot ls $TESTDIR/file-vdev-* &> /dev/null
+
+log_must truncate -s 100M $TESTDIR/file-vdev-{1..3}
+
+log_must zpool create $TESTPOOL2 mirror $TESTDIR/file-vdev-1 \
+ $TESTDIR/file-vdev-2 spare $TESTDIR/file-vdev-3
+
+# Test that we can't offline an inactive spare
+log_mustnot zpool offline $TESTPOOL2 $TESTDIR/file-vdev-3
+log_mustnot zpool offline -f $TESTPOOL2 $TESTDIR/file-vdev-3
+
+# Test that we can offline an active spare
+log_must zpool replace $TESTPOOL2 $TESTDIR/file-vdev-1 $TESTDIR/file-vdev-3
+log_must zpool offline $TESTPOOL2 $TESTDIR/file-vdev-3
+log_must zpool online $TESTPOOL2 $TESTDIR/file-vdev-3
+log_must zpool offline -f $TESTPOOL2 $TESTDIR/file-vdev-3
+
+destroy_pool $TESTPOOL2
+
+log_must zpool create -f $TESTPOOL2 draid1:1d:1s:3c $TESTDIR/file-vdev-{1..3}
+
+# Test that we can't offline an inactive draid spare
+log_mustnot zpool offline $TESTPOOL2 draid1-0-0
+log_mustnot zpool offline -f $TESTPOOL2 draid1-0-0
+
+# Test that we can't offline an active draid spare
+log_must zpool replace $TESTPOOL2 $TESTDIR/file-vdev-1 draid1-0-0
+log_mustnot zpool offline $TESTPOOL2 draid1-0-0
+log_mustnot zpool offline -f $TESTPOOL2 draid1-0-0
+
+log_pass "zpool offline has the correct behavior on spares"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh
new file mode 100755
index 000000000000..b19f576adcf4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh
@@ -0,0 +1,63 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test that POSIX_FADV_DONTNEED evicts data from the ZFS dbuf cache.
+#
+# STRATEGY:
+# 1. Write blocks to a file and sync, so they land in the dbuf LRU cache
+# 2. Record cache_count from dbufstats
+# 3. Call file_fadvise with POSIX_FADV_DONTNEED on the file
+# 4. Verify that cache_count decreased
+#
+
+verify_runnable "global"
+
+FILE=$TESTDIR/$TESTFILE0
+BLKSZ=$(get_prop recordsize $TESTPOOL)
+
+function cleanup
+{
+ [[ -e $TESTDIR ]] && log_must rm -Rf $TESTDIR/*
+}
+
+log_assert "Ensure POSIX_FADV_DONTNEED evicts data from the dbuf cache"
+
+log_onexit cleanup
+
+log_must file_write -o create -f $FILE -b $BLKSZ -c 100
+sync_pool $TESTPOOL
+
+evicts1=$(kstat dbufstats.cache_count)
+
+log_must file_fadvise -f $FILE -a POSIX_FADV_DONTNEED
+
+evicts2=$(kstat dbufstats.cache_count)
+log_note "cache_count before=$evicts1 after=$evicts2"
+
+log_must [ $evicts1 -gt $evicts2 ]
+
+log_pass "POSIX_FADV_DONTNEED evicts data from the dbuf cache"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
index 43bfe2f5bfb1..83b3b77aa193 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
@@ -166,9 +166,8 @@ do
mntpnt=$(get_prop mountpoint /$TESTPOOL)
- # 2. Fault the spare device making it unavailable
- log_must zpool offline -f $TESTPOOL $sparedev
- log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"
+ # 2. Remove the spare device making it unavailable
+ log_must zpool remove $TESTPOOL $sparedev
# 3. Simulate physical removal of one device
remove_disk $removedev
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_draid_fgroups.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_draid_fgroups.ksh
new file mode 100755
index 000000000000..c22d8ef6397a
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_draid_fgroups.ksh
@@ -0,0 +1,163 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024, Klara Inc.
+# Copyright (c) 2026, Seagate Technology, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/blkdev.shlib
+
+#
+# DESCRIPTION: Verify that 4 disks removed from a draid3 with failure
+# groups, when they are removed from a group, will suspend the pool.
+#
+# STRATEGY:
+# 1. Disable ZED -- this test is focused on vdev_probe errors.
+# 2. Create a draid3 pool with random number of failure groups, from 2 to 6,
+# where 4 disks can be removed (i.e., using scsi_debug).
+# 3. Add some data to it for a resilver workload.
+# 4. Replace one of the child vdevs to start a replacing vdev.
+# 5. During the resilver, remove 4 disks, including one from the replacing vdev,
+# from a failure group.
+# 6. Verify that the pool is suspended.
+#
+
+DEV_SIZE_MB=1024
+
+DRAID_FGRP_CNT=$(random_int_between 2 6)
+FILE_VDEV_CNT=$((8 * $DRAID_FGRP_CNT))
+DRAID="draid3:8c:${FILE_VDEV_CNT}w"
+FILE_VDEV_SIZ=256M
+
+function cleanup
+{
+ destroy_pool $TESTPOOL
+ if [[ "$(cat /sys/block/$sd/device/state)" == "offline" ]]; then
+ log_must eval "echo running > /sys/block/$sd/device/state"
+ fi
+ unload_scsi_debug
+ rm -f $DATA_FILE
+ for i in {0..$((FILE_VDEV_CNT - 1))}; do
+ log_must rm -f "$TEST_BASE_DIR/dev-$i"
+ done
+ log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+ zed_start
+}
+
+log_onexit cleanup
+
+log_assert "dRAID vdev with failure groups probe errors for more disks than" \
+ "parity in a group should suspend a pool"
+
+log_note "Stoping ZED process"
+zed_stop
+zpool events -c
+
+# Make a debug device that we can "unplug" and lose 4 drives at once
+unload_scsi_debug
+load_scsi_debug $DEV_SIZE_MB 1 1 1 '512b'
+sd=$(get_debug_device)
+
+# Create 4 partitions that match the FILE_VDEV_SIZ
+parted "/dev/${sd}" --script mklabel gpt
+parted "/dev/${sd}" --script mkpart primary 0% 25%
+parted "/dev/${sd}" --script mkpart primary 25% 50%
+parted "/dev/${sd}" --script mkpart primary 50% 75%
+parted "/dev/${sd}" --script mkpart primary 75% 100%
+block_device_wait "/dev/${sd}"
+blkdevs="/dev/${sd}1 /dev/${sd}2 /dev/${sd}3 /dev/${sd}4"
+
+# Create file vdevs
+typeset -a filedevs
+for i in {0..$((FILE_VDEV_CNT - 1))}; do
+ device=$TEST_BASE_DIR/dev-$i
+ log_must truncate -s $FILE_VDEV_SIZ $device
+ # Use all but the last one for pool create
+ if [[ $i -lt $((FILE_VDEV_CNT - 4)) ]]; then
+ filedevs[${#filedevs[*]}+1]=$device
+ fi
+done
+
+# Create a draid3 pool that we can pull 4 disks from
+log_must zpool create -f $TESTPOOL $DRAID ${filedevs[@]} $blkdevs
+sync_pool $TESTPOOL
+
+# Add some data to the pool
+log_must zfs create $TESTPOOL/fs
+MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
+SECONDS=0
+log_must fill_fs $MNTPOINT 1 200 4096 10 R
+log_note "fill_fs took $SECONDS seconds"
+sync_pool $TESTPOOL
+
+# Start a replacing vdev, but suspend the resilver
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+log_must zpool replace -f $TESTPOOL /dev/${sd}4 $TEST_BASE_DIR/dev-$((FILE_VDEV_CNT - 1))
+
+# Remove 4 disks all at once
+log_must eval "echo offline > /sys/block/${sd}/device/state"
+
+log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+
+# Add some writes to drive the vdev probe errors
+log_must dd if=/dev/urandom of=$MNTPOINT/writes bs=1M count=1
+
+# Wait until sync starts, and the pool suspends
+log_note "waiting for pool to suspend"
+typeset -i tries=30
+until [[ $(kstat_pool $TESTPOOL state) == "SUSPENDED" ]] ; do
+ if ((tries-- == 0)); then
+ zpool status -s
+ log_fail "UNEXPECTED -- pool did not suspend"
+ fi
+ sleep 1
+done
+log_note $(kstat_pool $TESTPOOL state)
+
+# Put the missing disks back into service
+log_must eval "echo running > /sys/block/$sd/device/state"
+
+# Clear the vdev error states, which will reopen the vdevs and resume the pool
+log_must zpool clear $TESTPOOL
+
+# Wait until the pool resumes
+log_note "waiting for pool to resume"
+tries=30
+until [[ $(kstat_pool $TESTPOOL state) != "SUSPENDED" ]] ; do
+ if ((tries-- == 0)); then
+ log_fail "pool did not resume"
+ fi
+ sleep 1
+done
+log_must zpool wait -t resilver $TESTPOOL
+sync_pool $TESTPOOL
+
+# Make sure a pool scrub comes back clean
+log_must zpool scrub -w $TESTPOOL
+log_must zpool status -v $TESTPOOL
+log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+log_pass "dRAID vdev with failure groups probe errors for more disks than" \
+ "parity in a group should suspend a pool"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
index 65435554bdbe..53e2efffac2d 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
@@ -123,7 +123,7 @@ function setup_test_env
log_note "Filling up the filesystem ..."
typeset -i i=0
typeset file=$TESTDIR/file
- typeset -i limit
+ typeset -li limit
(( limit = $(get_prop available $pool) / 2 ))
while true ; do
@@ -206,15 +206,17 @@ function is_data_valid
#
# $1 pool name
# $2 devices count
+# $3 starting device index (optional, counts from 0)
#
-function get_vdevs #pool cnt
+function get_vdevs #pool cnt off
{
typeset pool=$1
typeset -i cnt=$2
+ typeset -i off=$3
typeset all_devs=$(zpool iostat -v $pool | awk '{print $1}' | \
grep -vEe "^pool$|^capacity$|^mirror\-[0-9]$|^raidz[1-3]\-[0-9]$|^draid[1-3].*\-[0-9]$|---" \
- -e "/old$|^$pool$")
+ -e "/old$|^$pool$" | tail -n +"$((off + 1))")
typeset -i i=0
typeset vdevs
while ((i < cnt)); do
@@ -283,6 +285,43 @@ function damage_devs
}
#
+# Damage the pool's virtual device files starting from i-th one.
+#
+# $1 pool name
+# $2 failing devices count
+# $3 starting from which device (counts from 0)
+# $3 damage vdevs method, if not null, we keep
+# the label for the vdevs
+#
+function damage_devs_off
+{
+ typeset pool=$1
+ typeset -i cnt=$2
+ typeset -i off=$3
+ typeset label="$4"
+ typeset vdevs
+ typeset -i bs_count=$(((MINVDEVSIZE / 1024) - 4096))
+
+ vdevs=$(get_vdevs $pool $cnt $off)
+ typeset dev
+ if [[ -n $label ]]; then
+ for dev in $vdevs; do
+ log_note "damage $dev (keeping label)"
+ log_must dd if=/dev/zero of=$dev seek=512 bs=1024 \
+ count=$bs_count conv=notrunc >/dev/null 2>&1
+ done
+ else
+ for dev in $vdevs; do
+ log_note "damage $dev"
+ log_must dd if=/dev/zero of=$dev bs=1024 \
+ count=$bs_count conv=notrunc >/dev/null 2>&1
+ done
+ fi
+
+ sync_pool $pool
+}
+
+#
# Clear errors in the pool caused by data corruptions
#
# $1 pool name
@@ -324,6 +363,26 @@ function remove_devs
}
#
+# Remove the specified pool's virtual device files starting from i-th one
+#
+# $1 Pool name
+# $2 Missing devices count
+#
+function remove_devs_off
+{
+ typeset pool=$1
+ typeset -i cnt=$2
+ typeset -i off=$3
+ typeset vdevs
+
+ vdevs=$(get_vdevs $pool $cnt $off)
+ log_note "remove $vdevs"
+ log_must rm -f $vdevs
+
+ sync_pool $pool
+}
+
+#
# Recover the bad or missing device files in the pool
#
# $1 Pool name
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh
new file mode 100755
index 000000000000..ae65d3a21290
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh
@@ -0,0 +1,141 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# When sequentially resilvering a dRAID pool with multiple vdevs
+# and N faulted vdevs, where N=parity, ensure that when another leaf
+# is marked degraded the pool can still be sequentially resilvered
+# without introducing new checksum errors. Note we've exhausted
+# the available redundancy so no silent correction can be tolerated.
+#
+# STRATEGY:
+# 1. Create block device files for the test draid pool
+# 2. For each parity value [1..3]
+# - create draid pool
+# - fill it with some directories/files
+# - fault N=parity vdevs eliminating any redundancy
+# - force fault an additional vdev causing it to be degraded
+# - replace the degraded (but online) vdev using a sequential
+# resilver. The minimum pool redundancy requirements are met so
+# reconstruction is possible when reading from all online vdevs.
+# - verify that the draid spare was correctly reconstructed and
+# no checksum errors were introduced.
+# - destroy the draid pool
+#
+
+typeset -r devs=7
+typeset -r dev_size_mb=512
+
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
+
+function cleanup
+{
+ poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+ for i in {0..$devs}; do
+ rm -f "$TEST_BASE_DIR/dev-$i"
+ done
+
+ set_tunable32 PREFETCH_DISABLE $prefetch_disable
+ set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
+}
+
+function test_sequential_resilver # <pool> <parity> <dir>
+{
+ typeset pool=$1
+ typeset nparity=$2
+ typeset dir=$3
+
+ # Fault N=parity devices
+ for (( i=0; i<$nparity; i=i+1 )); do
+ log_must zpool offline -f $pool $dir/dev-$i
+ done
+
+ # Parity is exhausted, faulting another device marks it degraded
+ log_must zpool offline -f $pool $dir/dev-$nparity
+
+ # Replace the degraded vdev with a distributed spare
+ spare=draid${nparity}-0-0
+ log_must zpool replace -fsw $pool $dir/dev-$nparity $spare
+
+ log_must zpool scrub -w $pool
+ log_must zpool status $pool
+
+ log_must check_pool_status $pool "scan" "repaired 0B"
+ log_must check_pool_status $pool "errors" "No known data errors"
+ log_must check_pool_status $pool "scan" "with 0 errors"
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+ device=$TEST_BASE_DIR/dev-$i
+ log_must truncate -s ${dev_size_mb}M $device
+ disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+for nparity in 1 2 3; do
+ raid=draid${nparity}:${nparity}s
+ dir=$TEST_BASE_DIR
+
+ log_must zpool create -O compression=off -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
+ log_must zfs set primarycache=metadata $TESTPOOL
+
+ log_must zfs create $TESTPOOL/fs
+ log_must fill_fs /$TESTPOOL/fs 1 512 102400 1 R
+
+ log_must zfs create -o compress=on $TESTPOOL/fs2
+ log_must fill_fs /$TESTPOOL/fs2 1 512 102400 1 R
+
+ log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+ log_must fill_fs /$TESTPOOL/fs3 1 512 102400 1 R
+
+ log_must zpool export $TESTPOOL
+ log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+ log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+ test_sequential_resilver $TESTPOOL $nparity $dir
+
+ log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "draid degraded device(s) test succeeded."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh
new file mode 100755
index 000000000000..8d102627fdb0
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh
@@ -0,0 +1,157 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2026 by Wasabi Technologies, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# When sequentially resilvering a dRAID pool with multiple vdevs
+# and N faulted vdevs, where N=parity, ensure that when another leaf
+# is marked degraded the pool can still be sequentially resilvered
+# without introducing new checksum errors. Note we've exhausted
+# the available redundancy so no silent correction can be tolerated.
+#
+# This test is very similar to redundancy_draid_degraded1 and is
+# based on it. The difference is that 1) we always have some faulted
+# vdev which is already resilvered, and 2) we resilver the most
+# recently faulted, but marked degraded due to redundancy exhaustion,
+# vdev also.
+#
+# STRATEGY:
+# 1. Create block device files for the test draid pool
+# 2. For each parity value [1..3]
+# - create draid pool
+# - fill it with some directories/files
+# - fault one vdev and resilver it
+# - fault N=parity vdevs eliminating any redundancy
+# - force fault an additional vdev causing it to be degraded
+# - replace faulted vdevs using a sequential resilver.
+# The minimum pool redundancy requirements are met so
+# reconstruction is possible when reading from all online vdevs.
+# - verify that the draid spare was correctly reconstructed and
+# no checksum errors were introduced.
+# - destroy the draid pool
+#
+
+typeset -r devs=13
+typeset -r dev_size_mb=512
+
+typeset -a disks
+
+prefetch_disable=$(get_tunable PREFETCH_DISABLE)
+rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
+scan_suspend_progress=$(get_tunable SCAN_SUSPEND_PROGRESS)
+
+function cleanup
+{
+ poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
+
+ for i in {0..$devs}; do
+ rm -f "$TEST_BASE_DIR/dev-$i"
+ done
+
+ set_tunable32 PREFETCH_DISABLE $prefetch_disable
+ set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
+ set_tunable32 SCAN_SUSPEND_PROGRESS $scan_suspend_progress
+}
+
+function test_sequential_resilver # <pool> <parity> <dir>
+{
+ typeset pool=$1
+ typeset nparity=$2
+ typeset dir=$3
+
+ # Fault N=parity devices
+ for (( i=0; i<$nparity; i++ )); do
+ log_must zpool offline -f $pool $dir/dev-$i
+ done
+
+ # Parity is exhausted, faulting another device marks it degraded
+ log_must zpool offline -f $pool $dir/dev-$nparity
+
+ # Replace all faulted vdevs with distributed spares
+ log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
+ for (( i=0; i<$((nparity+1)); i++ )); do
+ spare=draid${nparity}-0-$i
+ log_must zpool replace -fs $pool $dir/dev-$i $spare
+ done
+ log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
+
+ log_must zpool wait -t resilver $pool
+
+ log_must zpool scrub -w $pool
+ log_must zpool status $pool
+
+ log_must check_pool_status $pool "scan" "repaired 0B"
+ log_must check_pool_status $pool "errors" "No known data errors"
+ log_must check_pool_status $pool "scan" "with 0 errors"
+}
+
+log_onexit cleanup
+
+log_must set_tunable32 PREFETCH_DISABLE 1
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+# Disk files which will be used by pool
+for i in {0..$(($devs - 1))}; do
+ device=$TEST_BASE_DIR/dev-$i
+ log_must truncate -s ${dev_size_mb}M $device
+ disks[${#disks[*]}+1]=$device
+done
+
+# Disk file which will be attached
+log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
+
+for nparity in 3; do
+ raid=draid${nparity}:$((nparity+2))s
+ dir=$TEST_BASE_DIR
+
+ log_must zpool create -O compression=off -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
+ log_must zfs set primarycache=metadata $TESTPOOL
+
+ log_must zfs create $TESTPOOL/fs
+ log_must fill_fs /$TESTPOOL/fs 1 512 102400 1 R
+
+ log_must zfs create -o compress=on $TESTPOOL/fs2
+ log_must fill_fs /$TESTPOOL/fs2 1 512 102400 1 R
+
+ log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
+ log_must fill_fs /$TESTPOOL/fs3 1 512 102400 1 R
+
+ log_must zpool export $TESTPOOL
+ log_must zpool import -o cachefile=none -d $dir $TESTPOOL
+
+ log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+ test_sequential_resilver $TESTPOOL $nparity $dir
+
+ log_must zpool destroy "$TESTPOOL"
+done
+
+log_pass "draid degraded device(s) test succeeded."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh
new file mode 100755
index 000000000000..ffbf664046ec
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh
@@ -0,0 +1,152 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2026 by Seagate Technology, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# Verify resilver to dRAID distributed spares with failure groups.
+#
+# STRATEGY:
+# 1. For resilvers:
+# a. Create a semi-random dRAID pool configuration which can:
+# - sustain N failures (1-3) * n, and
+# - has N * n distributed spares to replace all faulted vdevs
+# - n is the number of fail groups in the dRAID
+# - failures in the groups happen at the same time
+# b. Fill the pool with data
+# c. Systematically fault a vdev, then replace it with a spare
+# d. Scrub the pool to verify no data was lost
+# e. Verify the contents of files in the pool
+#
+
+log_assert "Verify resilver to dRAID distributed spares with failure groups"
+
+function cleanup_tunable
+{
+ log_must set_tunable32 REBUILD_SCRUB_ENABLED 1
+ cleanup
+}
+
+log_onexit cleanup_tunable
+
+log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
+
+for replace_mode in "healing" "sequential"; do
+
+ if [[ "$replace_mode" = "sequential" ]]; then
+ flags="-s"
+ else
+ flags=""
+ fi
+
+ parity=$(random_int_between 1 3)
+ spares=$(random_int_between 1 $parity)
+ data=$(random_int_between 1 8)
+
+ (( min_children = (data + parity + spares) ))
+ children=$(random_int_between $min_children 16)
+ n=$(random_int_between 2 4)
+ (( width = children * n ))
+ off=$(random_int_between 0 $((children - parity - 1)))
+
+ (( spares *= $(random_int_between 1 $n) ))
+
+ draid="draid${parity}:${data}d:${children}c:${width}w:${spares}s"
+
+ setup_test_env $TESTPOOL $draid $width
+
+ for (( i=0; i < $spares; i+=$n )); do
+
+ for (( j=$i; j < $((i+n)); j++ )); do
+ fault_vdev="$BASEDIR/vdev$((i / n + (j % n) * children + off))"
+ log_must zpool offline -f $TESTPOOL $fault_vdev
+ log_must check_vdev_state $TESTPOOL $fault_vdev "FAULTED"
+ done
+
+ for (( j=$i; j < $((i+n)) && j < $spares; j++ )); do
+ fault_vdev="$BASEDIR/vdev$((i / n + (j % n) * children + off))"
+ spare_vdev="draid${parity}-0-${j}"
+ log_must zpool replace -w $flags $TESTPOOL \
+ $fault_vdev $spare_vdev
+ done
+
+ for (( j=$i; j < $((i+n)) && j < $spares; j++ )); do
+ fault_vdev="$BASEDIR/vdev$((i / n + (j % n) * children + off))"
+ spare_vdev="draid${parity}-0-${j}"
+ log_must check_vdev_state spare-$j "DEGRADED"
+ log_must check_vdev_state $spare_vdev "ONLINE"
+ log_must check_hotspare_state $TESTPOOL $spare_vdev "INUSE"
+ log_must zpool detach $TESTPOOL $fault_vdev
+ done
+
+ log_must verify_pool $TESTPOOL
+ log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+ log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+ done
+
+ # Fail remaining drives as long as parity permits.
+ failed=$(((spares + n - 1) / n))
+ faults_left=$parity
+ (( (spares % n) )) && (( faults_left -= $failed ))
+ off=0
+ for (( ; failed < $parity; failed++ )); do
+ # we can still fail disks
+ (( ++off ))
+ for (( i=0; i < $n; i++ )); do
+ fault_vdev="$BASEDIR/vdev$((i * children + children - 1 - off))"
+ log_must zpool offline -f $TESTPOOL $fault_vdev
+ log_must check_vdev_state $TESTPOOL $fault_vdev "FAULTED"
+
+ log_must verify_pool $TESTPOOL
+ log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+ log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+ (( faults_left > 0 && faults_left-- ))
+ done
+ done
+
+ # Make sure that faults_left failures are still allowed, but no more.
+ for (( i=0; i < $n; i++ )); do
+ fault_vdev="$BASEDIR/vdev$((i * children + children - 1))"
+ log_must zpool offline -f $TESTPOOL $fault_vdev
+ if (( $i < $faults_left)); then
+ log_must check_vdev_state $TESTPOOL $fault_vdev "FAULTED"
+ else
+ log_must check_vdev_state $TESTPOOL $fault_vdev "DEGRADED"
+ break
+ fi
+
+ log_must verify_pool $TESTPOOL
+ log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
+ log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+ done
+
+ log_must is_data_valid $TESTPOOL
+ log_must check_pool_status $TESTPOOL "errors" "No known data errors"
+
+ cleanup
+done
+
+log_pass "Verify resilver to dRAID distributed spares with failure groups"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_width.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_width.ksh
new file mode 100755
index 000000000000..40cb7bbb8efb
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_width.ksh
@@ -0,0 +1,91 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2026 by Seagate Technology, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
+
+#
+# DESCRIPTION:
+# A draid vdev with n failure groups can withstand n devices failing
+# or missing, each device being i-th one in each group.
+#
+# STRATEGY:
+# 1. Create N(>3,<6) * n virtual disk files.
+# 2. Create draid pool based on the virtual disk files.
+# 3. Fill the filesystem with directories and files.
+# 4. Record all the files and directories checksum information.
+# 5. Damage any n virtual disk files with the same offset in each group.
+# 6. Verify the data is correct.
+#
+
+verify_runnable "global"
+
+log_assert "Verify draid pool with n failure groups can withstand n i-th" \
+ "devices failing in each group."
+log_onexit cleanup
+
+typeset -i children=$(random_int_between 3 6)
+typeset -i fgroups=$(random_int_between 2 4)
+typeset -i ith=$(random_int_between 0 $((children - 1)))
+typeset -i width=$((children * fgroups))
+setup_test_env $TESTPOOL draid:${children}c:${width}w $width
+
+#
+# Inject data corruption errors for draid pool
+#
+for (( i=0; i<$fgroups; i=i+1 )); do
+ damage_devs_off $TESTPOOL 1 "$((ith + children*i))" "label"
+done
+log_must is_data_valid $TESTPOOL
+log_must clear_errors $TESTPOOL
+
+#
+# Inject bad device errors for draid pool
+#
+for (( i=0; i<$fgroups; i=i+1 )); do
+ damage_devs_off $TESTPOOL 1 "$((ith + children*i))"
+done
+log_must is_data_valid $TESTPOOL
+log_must recover_bad_missing_devs $TESTPOOL 1
+
+#
+# Inject missing device errors for draid pool
+#
+for (( i=0; i<$fgroups; i=i+1 )); do
+ remove_devs_off $TESTPOOL 1 "$((ith + children*i))"
+done
+log_must is_data_valid $TESTPOOL
+
+log_pass "draid:${children}c:${width}w pool can withstand $fgroups i-th" \
+ "devices failing passed."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
index 16e18c8def1f..ef2cc4a7bcd4 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
@@ -38,9 +38,6 @@ verify_runnable "both"
log_assert "Verify raw sending to pools with greater ashift succeeds"
-if is_freebsd; then
- log_unsupported "Runs too long on FreeBSD 14 (Issue #14961)"
-fi
function cleanup
{
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/cleanup.ksh
new file mode 100755
index 000000000000..c611e5a4d03f
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/cleanup.ksh
@@ -0,0 +1,46 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid.cfg
+
+# Restore AppArmor user namespace restriction if we relaxed it
+APPARMOR_USERNS=/proc/sys/kernel/apparmor_restrict_unprivileged_userns
+APPARMOR_RESTORE=/tmp/zoned_uid_apparmor_restore
+if [ -f "$APPARMOR_RESTORE" ]; then
+ cat "$APPARMOR_RESTORE" > "$APPARMOR_USERNS"
+ rm -f "$APPARMOR_RESTORE"
+fi
+
+# Remove test users created during setup
+for uid in "$ZONED_TEST_UID" "$ZONED_OTHER_UID"; do
+ if id "zfs_test_$uid" >/dev/null 2>&1; then
+ userdel "zfs_test_$uid" 2>/dev/null
+ fi
+done
+
+default_cleanup
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/setup.ksh
new file mode 100755
index 000000000000..3345a5981a38
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/setup.ksh
@@ -0,0 +1,99 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+# Only run on Linux - zoned_uid is Linux-specific
+if ! is_linux; then
+ log_unsupported "zoned_uid is only supported on Linux"
+fi
+
+# Check kernel supports user namespaces
+if ! [ -f /proc/self/uid_map ]; then
+ log_unsupported "The kernel doesn't support user namespaces."
+fi
+
+verify_runnable "global"
+
+DISK=${DISKS%% *}
+default_setup_noexit $DISK
+
+# Check if zoned_uid property is supported (requires pool to exist)
+if ! zoned_uid_supported; then
+ default_cleanup_noexit
+ log_unsupported "zoned_uid property not supported by this kernel"
+fi
+
+#
+# Provision test users if they don't exist.
+# Tests use "sudo -u #<uid>" which requires the UID to have a passwd entry.
+# CI environments (e.g. GitHub Actions QEMU VMs) typically don't have these.
+#
+for uid in "$ZONED_TEST_UID" "$ZONED_OTHER_UID"; do
+ if ! id "$uid" >/dev/null 2>&1; then
+ log_note "Creating test user for UID $uid"
+ log_must useradd -u "$uid" -M -N -s /usr/sbin/nologin \
+ "zfs_test_$uid"
+ fi
+done
+
+# Some environments (e.g., Ubuntu with AppArmor) restrict unprivileged
+# user namespace creation. Try to relax the restriction for testing.
+APPARMOR_USERNS=/proc/sys/kernel/apparmor_restrict_unprivileged_userns
+APPARMOR_RESTORE=/tmp/zoned_uid_apparmor_restore
+if [ -f "$APPARMOR_USERNS" ]; then
+ orig=$(cat "$APPARMOR_USERNS")
+ if [ "$orig" != "0" ]; then
+ echo "$orig" > "$APPARMOR_RESTORE"
+ echo 0 > "$APPARMOR_USERNS"
+ log_note "Relaxed AppArmor user namespace restriction for testing"
+ fi
+fi
+
+# Verify user namespace creation works with the test UIDs.
+if ! sudo -u \#${ZONED_TEST_UID} unshare --user --map-root-user \
+ true 2>/dev/null; then
+ default_cleanup_noexit
+ log_unsupported "Cannot create user namespaces as UID $ZONED_TEST_UID"
+fi
+
+# Verify capsh is available and works for capability control tests.
+# Tests 023+ use run_in_userns_caps which requires capsh.
+typeset _capsh_found
+_capsh_found="$(which capsh)"
+if [[ -z "$_capsh_found" ]]; then
+ log_note "WARNING: capsh not found; capability-tier tests will be skipped"
+else
+ if ! verify_capsh_works; then
+ log_note "WARNING: capsh cap control broken; capability-tier tests may fail"
+ else
+ log_note "capsh capability control verified"
+ fi
+fi
+
+log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid.cfg
new file mode 100644
index 000000000000..e3a98d38e962
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid.cfg
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+# Test UID for zoned_uid - the user namespace owner's UID
+# On this system, the "container" user (UID 956) owns subuid range 100000-165535
+# The zoned_uid should match the user who will create user namespaces
+export ZONED_TEST_UID=${ZONED_TEST_UID:-956}
+
+# A different UID to test non-matching case (colin)
+export ZONED_OTHER_UID=${ZONED_OTHER_UID:-1000}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_001_pos.ksh
new file mode 100755
index 000000000000..775baf188bc3
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_001_pos.ksh
@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that the zoned_uid property can be set and retrieved.
+#
+# STRATEGY:
+# 1. Verify default zoned_uid is 0 (none)
+# 2. Set zoned_uid to a test UID
+# 3. Verify the property value is correct
+# 4. Clear zoned_uid (set to 0)
+# 5. Verify it returns to 0
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ log_must zfs destroy -rf "$TESTPOOL/$TESTFS/zoned_test"
+}
+
+log_assert "zoned_uid property can be set and retrieved"
+log_onexit cleanup
+
+# Create test dataset
+log_must zfs create "$TESTPOOL/$TESTFS/zoned_test"
+
+# Verify default is 0
+typeset default_val
+default_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/zoned_test")
+if [[ "$default_val" != "0" ]]; then
+ log_fail "Default zoned_uid should be 0, got: $default_val"
+fi
+log_note "Default zoned_uid is 0 as expected"
+
+# Set zoned_uid
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/zoned_test" "$ZONED_TEST_UID"
+
+# Verify the value
+typeset set_val
+set_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/zoned_test")
+if [[ "$set_val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "zoned_uid should be $ZONED_TEST_UID, got: $set_val"
+fi
+log_note "zoned_uid set to $ZONED_TEST_UID successfully"
+
+# Clear zoned_uid
+log_must clear_zoned_uid "$TESTPOOL/$TESTFS/zoned_test"
+
+# Verify it's back to 0
+typeset cleared_val
+cleared_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/zoned_test")
+if [[ "$cleared_val" != "0" ]]; then
+ log_fail "Cleared zoned_uid should be 0, got: $cleared_val"
+fi
+log_note "zoned_uid cleared to 0 successfully"
+
+log_pass "zoned_uid property can be set and retrieved"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_002_pos.ksh
new file mode 100755
index 000000000000..51cd5be36380
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_002_pos.ksh
@@ -0,0 +1,83 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that zoned_uid property persists through pool export/import.
+#
+# STRATEGY:
+# 1. Create a test dataset
+# 2. Set zoned_uid property
+# 3. Export the pool
+# 4. Import the pool
+# 5. Verify zoned_uid property is preserved
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -f "$TESTPOOL/$TESTFS/persist_test" 2>/dev/null
+}
+
+log_assert "zoned_uid property persists through pool export/import"
+log_onexit cleanup
+
+# Create test dataset
+log_must zfs create "$TESTPOOL/$TESTFS/persist_test"
+
+# Set zoned_uid
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/persist_test" "$ZONED_TEST_UID"
+
+# Verify before export
+typeset before_val
+before_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/persist_test")
+if [[ "$before_val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "Before export: zoned_uid should be $ZONED_TEST_UID, got: $before_val"
+fi
+log_note "zoned_uid is $ZONED_TEST_UID before export"
+
+# Export the pool
+log_must zpool export "$TESTPOOL"
+
+# Import the pool
+log_must zpool import "$TESTPOOL"
+
+# Verify after import
+typeset after_val
+after_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/persist_test")
+if [[ "$after_val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "After import: zoned_uid should be $ZONED_TEST_UID, got: $after_val"
+fi
+log_note "zoned_uid is $ZONED_TEST_UID after import"
+
+# Cleanup
+log_must zfs destroy "$TESTPOOL/$TESTFS/persist_test"
+
+log_pass "zoned_uid property persists through pool export/import"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_003_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_003_pos.ksh
new file mode 100755
index 000000000000..8be7d5cc0922
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_003_pos.ksh
@@ -0,0 +1,100 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that setting zoned_uid property does not break normal
+# dataset operations from the global zone.
+#
+# STRATEGY:
+# 1. Create a test dataset with zoned_uid set
+# 2. Verify dataset is still visible and accessible from global zone
+# 3. Create a child dataset
+# 4. Verify child dataset operations work
+# 5. Verify the property is shown in zfs list output
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ log_must zfs destroy -rf "$TESTPOOL/$TESTFS/zoned_test"
+}
+
+log_assert "zoned_uid property does not break global zone operations"
+log_onexit cleanup
+
+# Create test dataset with zoned_uid
+log_must zfs create "$TESTPOOL/$TESTFS/zoned_test"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/zoned_test" "$ZONED_TEST_UID"
+
+# Verify dataset is visible
+log_must zfs list "$TESTPOOL/$TESTFS/zoned_test"
+log_note "Dataset is visible from global zone"
+
+# Verify we can get properties
+log_must zfs get all "$TESTPOOL/$TESTFS/zoned_test"
+log_note "Can retrieve properties from global zone"
+
+# Verify zoned_uid appears in output
+typeset list_output
+list_output=$(zfs get -H -o property,value all "$TESTPOOL/$TESTFS/zoned_test" | grep zoned_uid)
+if [[ -z "$list_output" ]]; then
+ log_fail "zoned_uid not shown in property listing"
+fi
+log_note "zoned_uid appears in property listing: $list_output"
+
+# Create child dataset
+log_must zfs create "$TESTPOOL/$TESTFS/zoned_test/child"
+log_note "Can create child dataset"
+
+# Verify child is visible
+log_must zfs list "$TESTPOOL/$TESTFS/zoned_test/child"
+log_note "Child dataset is visible"
+
+# Write data to the dataset
+typeset mntpt
+mntpt=$(get_prop mountpoint "$TESTPOOL/$TESTFS/zoned_test")
+log_must touch "$mntpt/testfile"
+log_must echo "test data" > "$mntpt/testfile"
+log_note "Can write data to dataset"
+
+# Read data back
+log_must cat "$mntpt/testfile"
+log_note "Can read data from dataset"
+
+# Take a snapshot
+log_must zfs snapshot "$TESTPOOL/$TESTFS/zoned_test@snap1"
+log_note "Can create snapshot"
+
+# List snapshots
+log_must zfs list -t snapshot "$TESTPOOL/$TESTFS/zoned_test@snap1"
+log_note "Snapshot is visible"
+
+log_pass "zoned_uid property does not break global zone operations"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_004_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_004_pos.ksh
new file mode 100755
index 000000000000..3692f9df5d01
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_004_pos.ksh
@@ -0,0 +1,91 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that zoned_uid property is inherited by child datasets
+# and can be overridden with a different value.
+#
+# STRATEGY:
+# 1. Create parent dataset with zoned_uid
+# 2. Create child dataset
+# 3. Verify child inherits parent's zoned_uid value
+# 4. Override zoned_uid on child with a different value
+# 5. Verify each dataset has its own value
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ log_must zfs destroy -rf "$TESTPOOL/$TESTFS/parent"
+}
+
+log_assert "zoned_uid property is inherited by child datasets"
+log_onexit cleanup
+
+# Create parent dataset with zoned_uid
+log_must zfs create "$TESTPOOL/$TESTFS/parent"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/parent" "$ZONED_TEST_UID"
+
+# Create child dataset
+log_must zfs create "$TESTPOOL/$TESTFS/parent/child"
+
+# Verify child inherits parent's value
+typeset child_val
+child_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/parent/child")
+if [[ "$child_val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "Child zoned_uid should inherit $ZONED_TEST_UID, got: $child_val"
+fi
+log_note "Child dataset inherits zoned_uid=$ZONED_TEST_UID from parent"
+
+# Verify parent still has its value
+typeset parent_val
+parent_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/parent")
+if [[ "$parent_val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "Parent zoned_uid should be $ZONED_TEST_UID, got: $parent_val"
+fi
+log_note "Parent dataset retains zoned_uid=$ZONED_TEST_UID"
+
+# Override with different value on child
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/parent/child" "$ZONED_OTHER_UID"
+
+# Verify each has independent value
+parent_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/parent")
+child_val=$(get_zoned_uid "$TESTPOOL/$TESTFS/parent/child")
+
+if [[ "$parent_val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "Parent zoned_uid changed unexpectedly to: $parent_val"
+fi
+if [[ "$child_val" != "$ZONED_OTHER_UID" ]]; then
+ log_fail "Child zoned_uid should be $ZONED_OTHER_UID, got: $child_val"
+fi
+log_note "Parent and child have independent zoned_uid values after override"
+
+log_pass "zoned_uid property is inherited by child datasets"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_005_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_005_neg.ksh
new file mode 100755
index 000000000000..5cc0577ba675
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_005_neg.ksh
@@ -0,0 +1,72 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that invalid zoned_uid values are rejected.
+#
+# STRATEGY:
+# 1. Try to set zoned_uid with invalid string value
+# 2. Verify it fails
+# 3. Try to set zoned_uid with negative value
+# 4. Verify it fails
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ if datasetexists "$TESTPOOL/$TESTFS/neg_test"; then
+ log_must zfs destroy -rf "$TESTPOOL/$TESTFS/neg_test"
+ fi
+}
+
+log_assert "Invalid zoned_uid values are rejected"
+log_onexit cleanup
+
+# Create test dataset
+log_must zfs create "$TESTPOOL/$TESTFS/neg_test"
+
+# Try invalid string value
+log_mustnot zfs set zoned_uid=invalid "$TESTPOOL/$TESTFS/neg_test"
+log_note "Invalid string value rejected"
+
+# Try negative value (if shell allows it)
+log_mustnot zfs set zoned_uid=-1 "$TESTPOOL/$TESTFS/neg_test"
+log_note "Negative value rejected"
+
+# Verify dataset still has default value
+typeset val
+val=$(get_zoned_uid "$TESTPOOL/$TESTFS/neg_test")
+if [[ "$val" != "0" ]]; then
+ log_fail "zoned_uid should still be 0 after failed sets, got: $val"
+fi
+log_note "zoned_uid unchanged after invalid set attempts"
+
+log_pass "Invalid zoned_uid values are rejected"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_006_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_006_pos.ksh
new file mode 100755
index 000000000000..3322515edd43
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_006_pos.ksh
@@ -0,0 +1,109 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can create child datasets
+# under a delegation root with matching zoned_uid.
+#
+# STRATEGY:
+# 1. Create a test dataset and set zoned_uid to test UID
+# 2. Enter a user namespace owned by that UID
+# 3. Verify CAP_SYS_ADMIN is present in the namespace
+# 4. Attempt to create a child dataset
+# 5. Verify the child dataset was created successfully
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ # Clean up from global zone
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can create child datasets"
+log_onexit cleanup
+
+# Create delegation root and set zoned_uid
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount
+
+# Verify zoned_uid is set
+typeset actual_uid
+actual_uid=$(get_zoned_uid "$TESTPOOL/$TESTFS/deleg_root")
+if [[ "$actual_uid" != "$ZONED_TEST_UID" ]]; then
+ log_fail "zoned_uid not set correctly: expected $ZONED_TEST_UID, got $actual_uid"
+fi
+log_note "Delegation root created with zoned_uid=$ZONED_TEST_UID"
+
+#
+# Enter user namespace and attempt to create child dataset.
+# unshare --user creates a new user namespace where the caller
+# has CAP_SYS_ADMIN (and all other capabilities) within that namespace.
+#
+# The --map-user option maps the current user to root inside the namespace,
+# which is the standard rootless container setup.
+#
+log_note "Attempting to create child dataset from user namespace..."
+
+# Use sudo -u to run as the zoned_uid owner, then unshare into user namespace
+# The user namespace owner will be ZONED_TEST_UID
+typeset create_result
+create_result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+create_status=$?
+
+if [[ $create_status -ne 0 ]]; then
+ log_note "Create output: $create_result"
+ log_fail "Failed to create child dataset from user namespace (status=$create_status)"
+fi
+
+log_note "Child dataset created successfully from user namespace"
+
+# Verify the child exists (from global zone)
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child"
+log_note "Child dataset verified from global zone"
+
+# Verify the child is visible from the user namespace
+typeset list_result
+list_result=$(run_in_userns "$ZONED_TEST_UID" \
+ list "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+list_status=$?
+
+if [[ $list_status -ne 0 ]]; then
+ log_note "List output: $list_result"
+ log_fail "Child dataset not visible from user namespace"
+fi
+
+log_note "Child dataset visible from user namespace"
+
+log_pass "Authorized user namespace can create child datasets"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_007_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_007_pos.ksh
new file mode 100755
index 000000000000..64de7663a5b2
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_007_pos.ksh
@@ -0,0 +1,110 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can create snapshots
+# of datasets under the delegation root.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create a child dataset (from global zone for setup)
+# 3. Enter user namespace owned by the zoned_uid
+# 4. Create a snapshot from within the user namespace
+# 5. Verify the snapshot was created successfully
+# 6. Verify the snapshot is visible from both namespaces
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can create snapshots"
+log_onexit cleanup
+
+# Create delegation root and child dataset
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ snapshot
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Delegation root created with zoned_uid=$ZONED_TEST_UID"
+
+# Enter user namespace and create snapshot
+log_note "Attempting to create snapshot from user namespace..."
+
+typeset snap_result
+snap_result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+typeset snap_status=$?
+
+if [[ $snap_status -ne 0 ]]; then
+ log_note "Snapshot output: $snap_result"
+ log_fail "Failed to create snapshot from user namespace (status=$snap_status)"
+fi
+
+log_note "Snapshot created successfully from user namespace"
+
+# Verify snapshot exists from global zone
+log_must zfs list -t snapshot "$TESTPOOL/$TESTFS/deleg_root/child@snap1"
+log_note "Snapshot verified from global zone"
+
+# Verify snapshot is visible from user namespace
+typeset list_result
+list_result=$(run_in_userns "$ZONED_TEST_UID" \
+ list -t snapshot "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+typeset list_status=$?
+
+if [[ $list_status -ne 0 ]]; then
+ log_note "List output: $list_result"
+ log_fail "Snapshot not visible from user namespace"
+fi
+
+log_note "Snapshot visible from user namespace"
+
+# Also test snapshot of the delegation root itself
+log_note "Testing snapshot of delegation root..."
+typeset root_snap_result
+root_snap_result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root@rootsnap" 2>&1)
+typeset root_snap_status=$?
+
+if [[ $root_snap_status -ne 0 ]]; then
+ log_note "Root snapshot output: $root_snap_result"
+ log_fail "Failed to snapshot delegation root from user namespace"
+fi
+
+log_must zfs list -t snapshot "$TESTPOOL/$TESTFS/deleg_root@rootsnap"
+log_note "Delegation root snapshot created successfully"
+
+log_pass "Authorized user namespace can create snapshots"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_008_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_008_pos.ksh
new file mode 100755
index 000000000000..fa5251665590
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_008_pos.ksh
@@ -0,0 +1,128 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can destroy child datasets
+# and snapshots, but cannot destroy the delegation root itself.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create child datasets and snapshots
+# 3. Enter user namespace and destroy a snapshot (should succeed)
+# 4. Destroy a child dataset (should succeed)
+# 5. Attempt to destroy the delegation root (should fail - protected)
+# 6. Verify the delegation root still exists
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can destroy children but not delegation root"
+log_onexit cleanup
+
+# Create delegation root with children
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ destroy,mount
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child1"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child2"
+log_must zfs snapshot "$TESTPOOL/$TESTFS/deleg_root/child1@snap1"
+
+log_note "Created delegation root with children and snapshot"
+
+# Unmount child datasets from global zone before entering user namespace.
+# Mounts inherited from the parent mount namespace are MNT_LOCKED by the
+# kernel and cannot be unmounted from a child mount namespace.
+log_must zfs unmount "$TESTPOOL/$TESTFS/deleg_root/child1"
+log_must zfs unmount "$TESTPOOL/$TESTFS/deleg_root/child2"
+
+# Test 1: Destroy snapshot from user namespace (should succeed)
+log_note "Test 1: Destroying snapshot from user namespace..."
+typeset snap_result
+snap_result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>&1)
+typeset snap_status=$?
+
+if [[ $snap_status -ne 0 ]]; then
+ log_note "Destroy snapshot output: $snap_result"
+ log_fail "Failed to destroy snapshot from user namespace"
+fi
+
+# Verify snapshot is gone
+if zfs list -t snapshot "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>/dev/null; then
+ log_fail "Snapshot should have been destroyed"
+fi
+log_note "Snapshot destroyed successfully"
+
+# Test 2: Destroy child dataset from user namespace (should succeed)
+log_note "Test 2: Destroying child dataset from user namespace..."
+typeset child_result
+child_result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/child1" 2>&1)
+typeset child_status=$?
+
+if [[ $child_status -ne 0 ]]; then
+ log_note "Destroy child output: $child_result"
+ log_fail "Failed to destroy child dataset from user namespace"
+fi
+
+# Verify child is gone
+if zfs list "$TESTPOOL/$TESTFS/deleg_root/child1" 2>/dev/null; then
+ log_fail "Child dataset should have been destroyed"
+fi
+log_note "Child dataset destroyed successfully"
+
+# Test 3: Attempt to destroy delegation root (should FAIL - protected)
+log_note "Test 3: Attempting to destroy delegation root (should fail)..."
+typeset root_result
+root_result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+typeset root_status=$?
+
+if [[ $root_status -eq 0 ]]; then
+ log_fail "Destroying delegation root should have been denied"
+fi
+
+log_note "Delegation root destruction correctly denied: $root_result"
+
+# Verify delegation root still exists
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root"
+log_note "Delegation root still exists (protected)"
+
+# Verify remaining child still exists
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child2"
+log_note "Remaining child dataset unaffected"
+
+log_pass "Authorized user namespace can destroy children but not delegation root"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_009_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_009_pos.ksh
new file mode 100755
index 000000000000..4fd66d5bbce0
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_009_pos.ksh
@@ -0,0 +1,149 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can rename datasets within
+# the delegation subtree, but cannot rename datasets outside of it.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create child datasets
+# 3. Enter user namespace and rename within the subtree (should succeed)
+# 4. Attempt to rename outside the subtree (should fail)
+# 5. Verify the rename operations behaved correctly
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+ zfs destroy -rf "$TESTPOOL/$TESTFS/outside" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can rename within delegation subtree only"
+log_onexit cleanup
+
+# Create delegation root with children
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ rename,mount,create
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child1"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/subdir"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/subdir/nested"
+
+# Create a dataset outside the delegation root (for escape test)
+log_must zfs create "$TESTPOOL/$TESTFS/outside"
+
+log_note "Created delegation root with children and outside dataset"
+
+# Unmount datasets from global zone before entering user namespace.
+# Mounts inherited from the parent mount namespace are MNT_LOCKED by the
+# kernel and cannot be unmounted from a child mount namespace.
+log_must zfs unmount "$TESTPOOL/$TESTFS/deleg_root/subdir/nested"
+log_must zfs unmount "$TESTPOOL/$TESTFS/deleg_root/subdir"
+log_must zfs unmount "$TESTPOOL/$TESTFS/deleg_root/child1"
+log_must zfs unmount "$TESTPOOL/$TESTFS/outside"
+
+# Test 1: Rename within subtree (should succeed)
+log_note "Test 1: Renaming within delegation subtree..."
+typeset rename_result
+rename_result=$(run_in_userns "$ZONED_TEST_UID" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/child1" \
+ "$TESTPOOL/$TESTFS/deleg_root/child1_renamed" 2>&1)
+typeset rename_status=$?
+
+if [[ $rename_status -ne 0 ]]; then
+ log_note "Rename output: $rename_result"
+ log_fail "Failed to rename within delegation subtree"
+fi
+
+# Verify old name is gone and new name exists
+if zfs list "$TESTPOOL/$TESTFS/deleg_root/child1" 2>/dev/null; then
+ log_fail "Old dataset name should not exist after rename"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child1_renamed"
+log_note "Rename within subtree succeeded"
+
+# Test 2: Rename to a different location within subtree (should succeed)
+log_note "Test 2: Moving dataset within subtree..."
+typeset move_result
+move_result=$(run_in_userns "$ZONED_TEST_UID" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/subdir/nested" \
+ "$TESTPOOL/$TESTFS/deleg_root/nested_moved" 2>&1)
+typeset move_status=$?
+
+if [[ $move_status -ne 0 ]]; then
+ log_note "Move output: $move_result"
+ log_fail "Failed to move dataset within delegation subtree"
+fi
+
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/nested_moved"
+log_note "Move within subtree succeeded"
+
+# Test 3: Attempt to rename outside the subtree (should FAIL)
+log_note "Test 3: Attempting to rename outside subtree (should fail)..."
+typeset escape_result
+escape_result=$(run_in_userns "$ZONED_TEST_UID" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/child1_renamed" \
+ "$TESTPOOL/$TESTFS/outside/escaped" 2>&1)
+typeset escape_status=$?
+
+if [[ $escape_status -eq 0 ]]; then
+ log_fail "Renaming outside delegation subtree should have been denied"
+fi
+
+log_note "Rename outside subtree correctly denied: $escape_result"
+
+# Verify the dataset is still in its original location
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child1_renamed"
+log_note "Dataset remains in delegation subtree"
+
+# Test 4: Attempt to rename from outside into subtree (should FAIL)
+# This tests that we can't "steal" datasets from outside
+log_note "Test 4: Attempting to rename from outside into subtree (should fail)..."
+typeset steal_result
+steal_result=$(run_in_userns "$ZONED_TEST_UID" \
+ rename "$TESTPOOL/$TESTFS/outside" \
+ "$TESTPOOL/$TESTFS/deleg_root/stolen" 2>&1)
+typeset steal_status=$?
+
+if [[ $steal_status -eq 0 ]]; then
+ log_fail "Renaming from outside into subtree should have been denied"
+fi
+
+log_note "Rename from outside correctly denied: $steal_result"
+
+# Verify outside dataset still exists in original location
+log_must zfs list "$TESTPOOL/$TESTFS/outside"
+log_note "Outside dataset remains in place"
+
+log_pass "Authorized user namespace can rename within delegation subtree only"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_010_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_010_pos.ksh
new file mode 100755
index 000000000000..c5f10048be44
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_010_pos.ksh
@@ -0,0 +1,157 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can set properties on
+# datasets within the delegation subtree.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create a child dataset
+# 3. Enter user namespace and set various properties
+# 4. Verify properties were set correctly
+# 5. Test setting properties on delegation root itself
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can set properties on delegated datasets"
+log_onexit cleanup
+
+# Create delegation root with child
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ quota,compression,atime,userprop
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Created delegation root with child dataset"
+
+# Test 1: Set quota on child dataset
+log_note "Test 1: Setting quota from user namespace..."
+typeset quota_result
+quota_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set quota=100M "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+quota_status=$?
+
+if [[ $quota_status -ne 0 ]]; then
+ log_note "Set quota output: $quota_result"
+ log_fail "Failed to set quota from user namespace"
+fi
+
+# Verify quota was set
+typeset actual_quota
+actual_quota=$(zfs get -H -o value quota "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_quota" != "100M" ]]; then
+ log_fail "Quota not set correctly: expected 100M, got $actual_quota"
+fi
+log_note "Quota set successfully to 100M"
+
+# Test 2: Set compression on child dataset
+log_note "Test 2: Setting compression from user namespace..."
+typeset comp_result
+comp_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set compression=lz4 "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+comp_status=$?
+
+if [[ $comp_status -ne 0 ]]; then
+ log_note "Set compression output: $comp_result"
+ log_fail "Failed to set compression from user namespace"
+fi
+
+typeset actual_comp
+actual_comp=$(zfs get -H -o value compression "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_comp" != "lz4" ]]; then
+ log_fail "Compression not set correctly: expected lz4, got $actual_comp"
+fi
+log_note "Compression set successfully to lz4"
+
+# Test 3: Set atime on delegation root
+# Unmount delegation root first — setting atime triggers a remount, and
+# inherited mounts are MNT_LOCKED (cannot be remounted from a child mount
+# namespace).
+log_must zfs unmount "$TESTPOOL/$TESTFS/deleg_root"
+log_note "Test 3: Setting atime on delegation root..."
+typeset atime_result
+atime_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set atime=off "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+atime_status=$?
+
+if [[ $atime_status -ne 0 ]]; then
+ log_note "Set atime output: $atime_result"
+ log_fail "Failed to set atime on delegation root"
+fi
+
+typeset actual_atime
+actual_atime=$(zfs get -H -o value atime "$TESTPOOL/$TESTFS/deleg_root")
+if [[ "$actual_atime" != "off" ]]; then
+ log_fail "Atime not set correctly: expected off, got $actual_atime"
+fi
+log_note "Atime set successfully on delegation root"
+
+# Test 4: Set a user property
+log_note "Test 4: Setting user property from user namespace..."
+typeset userprop_result
+userprop_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set com.example:testprop=testvalue "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+userprop_status=$?
+
+if [[ $userprop_status -ne 0 ]]; then
+ log_note "Set user property output: $userprop_result"
+ log_fail "Failed to set user property from user namespace"
+fi
+
+typeset actual_userprop
+actual_userprop=$(zfs get -H -o value com.example:testprop "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_userprop" != "testvalue" ]]; then
+ log_fail "User property not set correctly: expected testvalue, got $actual_userprop"
+fi
+log_note "User property set successfully"
+
+# Test 5: Verify properties are visible from user namespace
+log_note "Test 5: Verifying properties visible from user namespace..."
+typeset get_result
+get_result=$(run_in_userns "$ZONED_TEST_UID" \
+ get quota,compression "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+get_status=$?
+
+if [[ $get_status -ne 0 ]]; then
+ log_note "Get properties output: $get_result"
+ log_fail "Failed to get properties from user namespace"
+fi
+
+log_note "Properties visible from user namespace"
+
+log_pass "Authorized user namespace can set properties on delegated datasets"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_011_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_011_neg.ksh
new file mode 100755
index 000000000000..bc2bbe4a8dd3
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_011_neg.ksh
@@ -0,0 +1,153 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that a user namespace with a non-matching UID cannot perform
+# write operations on datasets delegated to a different UID.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set to ZONED_TEST_UID
+# 2. Enter a user namespace owned by ZONED_OTHER_UID (different)
+# 3. Verify dataset is visible (read-only path visibility)
+# 4. Attempt to create child dataset (should fail)
+# 5. Attempt to create snapshot (should fail)
+# 6. Attempt to set property (should fail)
+# 7. Attempt to destroy (should fail)
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Unauthorized user namespace cannot perform write operations"
+log_onexit cleanup
+
+# Create delegation root owned by ZONED_TEST_UID
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Created delegation root with zoned_uid=$ZONED_TEST_UID"
+log_note "Will test access from user namespace owned by $ZONED_OTHER_UID"
+
+# Test 1: Verify dataset visibility (should be visible via parent path)
+# Note: The dataset may or may not be visible depending on implementation
+# The key test is that write operations fail
+log_note "Test 1: Checking visibility from wrong user namespace..."
+typeset list_result
+list_result=$(run_in_userns "$ZONED_OTHER_UID" \
+ list "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+list_status=$?
+log_note "List result (status=$list_status): $list_result"
+
+# Test 2: Attempt to create child dataset (should FAIL)
+log_note "Test 2: Attempting to create child from wrong namespace (should fail)..."
+typeset create_result
+create_result=$(run_in_userns "$ZONED_OTHER_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/unauthorized_child" 2>&1)
+create_status=$?
+
+if [[ $create_status -eq 0 ]]; then
+ log_fail "Creating child from unauthorized namespace should have been denied"
+fi
+log_note "Create correctly denied: $create_result"
+
+# Verify the unauthorized child was not created
+if zfs list "$TESTPOOL/$TESTFS/deleg_root/unauthorized_child" 2>/dev/null; then
+ log_fail "Unauthorized child dataset should not exist"
+fi
+
+# Test 3: Attempt to create snapshot (should FAIL)
+log_note "Test 3: Attempting to create snapshot from wrong namespace (should fail)..."
+typeset snap_result
+snap_result=$(run_in_userns "$ZONED_OTHER_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/child@unauthorized" 2>&1)
+snap_status=$?
+
+if [[ $snap_status -eq 0 ]]; then
+ log_fail "Creating snapshot from unauthorized namespace should have been denied"
+fi
+log_note "Snapshot correctly denied: $snap_result"
+
+# Test 4: Attempt to set property (should FAIL)
+log_note "Test 4: Attempting to set property from wrong namespace (should fail)..."
+typeset prop_result
+prop_result=$(run_in_userns "$ZONED_OTHER_UID" \
+ set quota=1G "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+prop_status=$?
+
+if [[ $prop_status -eq 0 ]]; then
+ log_fail "Setting property from unauthorized namespace should have been denied"
+fi
+log_note "Set property correctly denied: $prop_result"
+
+# Verify quota was not changed
+typeset actual_quota
+actual_quota=$(zfs get -H -o value quota "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_quota" == "1G" ]]; then
+ log_fail "Quota should not have been changed by unauthorized namespace"
+fi
+
+# Test 5: Attempt to destroy (should FAIL)
+log_note "Test 5: Attempting to destroy from wrong namespace (should fail)..."
+typeset destroy_result
+destroy_result=$(run_in_userns "$ZONED_OTHER_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+destroy_status=$?
+
+if [[ $destroy_status -eq 0 ]]; then
+ log_fail "Destroying from unauthorized namespace should have been denied"
+fi
+log_note "Destroy correctly denied: $destroy_result"
+
+# Verify child still exists
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child"
+log_note "Child dataset still exists (protected from unauthorized access)"
+
+# Test 6: Attempt to rename (should FAIL)
+log_note "Test 6: Attempting to rename from wrong namespace (should fail)..."
+typeset rename_result
+rename_result=$(run_in_userns "$ZONED_OTHER_UID" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/child" \
+ "$TESTPOOL/$TESTFS/deleg_root/child_renamed" 2>&1)
+rename_status=$?
+
+if [[ $rename_status -eq 0 ]]; then
+ log_fail "Renaming from unauthorized namespace should have been denied"
+fi
+log_note "Rename correctly denied: $rename_result"
+
+# Verify child still has original name
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_pass "Unauthorized user namespace cannot perform write operations"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_012_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_012_pos.ksh
new file mode 100755
index 000000000000..db90ff1bade9
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_012_pos.ksh
@@ -0,0 +1,120 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can inherit properties
+# on datasets within the delegation subtree.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create a child dataset
+# 3. Set properties on the child, then inherit them from user namespace
+# 4. Verify properties were inherited correctly
+# 5. Test inheriting both native and user properties
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can inherit properties on delegated datasets"
+log_onexit cleanup
+
+# Create delegation root with child
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ userprop,compression
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Created delegation root with child dataset"
+
+# Set a native property on child that we will then inherit
+log_must zfs set compression=lz4 "$TESTPOOL/$TESTFS/deleg_root/child"
+
+typeset actual_comp
+actual_comp=$(zfs get -H -o value compression "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_comp" != "lz4" ]]; then
+ log_fail "Failed to set compression: expected lz4, got $actual_comp"
+fi
+
+# Set a user property on child that we will then inherit
+log_must zfs set com.example:testprop=localvalue "$TESTPOOL/$TESTFS/deleg_root/child"
+
+typeset actual_userprop
+actual_userprop=$(zfs get -H -o value com.example:testprop "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_userprop" != "localvalue" ]]; then
+ log_fail "Failed to set user property: expected localvalue, got $actual_userprop"
+fi
+
+# Test 1: Inherit native property from user namespace
+log_note "Test 1: Inheriting native property from user namespace..."
+typeset inherit_result
+inherit_result=$(run_in_userns "$ZONED_TEST_UID" \
+ inherit compression "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+inherit_status=$?
+
+if [[ $inherit_status -ne 0 ]]; then
+ log_note "Inherit compression output: $inherit_result"
+ log_fail "Failed to inherit compression from user namespace"
+fi
+
+# Verify compression was inherited (should match parent's value)
+actual_comp=$(zfs get -H -o value compression "$TESTPOOL/$TESTFS/deleg_root/child")
+typeset comp_source
+comp_source=$(zfs get -H -o source compression "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$comp_source" == "local" ]]; then
+ log_fail "Compression still local after inherit: $actual_comp (source=$comp_source)"
+fi
+log_note "Compression inherited successfully (value=$actual_comp, source=$comp_source)"
+
+# Test 2: Inherit user property from user namespace
+log_note "Test 2: Inheriting user property from user namespace..."
+typeset inherit_userprop_result
+inherit_userprop_result=$(run_in_userns "$ZONED_TEST_UID" \
+ inherit com.example:testprop "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+inherit_userprop_status=$?
+
+if [[ $inherit_userprop_status -ne 0 ]]; then
+ log_note "Inherit user property output: $inherit_userprop_result"
+ log_fail "Failed to inherit user property from user namespace"
+fi
+
+# Verify user property was removed (inherited means no local value)
+actual_userprop=$(zfs get -H -o value com.example:testprop "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$actual_userprop" == "localvalue" ]]; then
+ log_fail "User property still has local value after inherit"
+fi
+log_note "User property inherited successfully (value=$actual_userprop)"
+
+log_pass "Authorized user namespace can inherit properties on delegated datasets"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_013_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_013_pos.ksh
new file mode 100755
index 000000000000..c5a8bfe598b2
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_013_pos.ksh
@@ -0,0 +1,122 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+. $STF_SUITE/include/math.shlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can set userquota
+# and groupquota properties on delegated datasets.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create a child dataset
+# 3. Enter user namespace and set userquota on child
+# 4. Set groupquota on child
+# 5. Verify quotas were applied correctly
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can set userquota/groupquota on delegated datasets"
+log_onexit cleanup
+
+# Create delegation root with child
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ userquota,groupquota
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Created delegation root with child dataset"
+
+# Test 1: Set userquota from user namespace
+log_note "Test 1: Setting userquota from user namespace..."
+typeset uq_result
+uq_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set userquota@0=50M "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+typeset uq_status=$?
+
+if [[ $uq_status -ne 0 ]]; then
+ log_note "Set userquota output: $uq_result"
+ log_fail "Failed to set userquota from user namespace"
+fi
+
+# Verify userquota was set (use -p for parseable/raw bytes)
+typeset actual_uq
+actual_uq=$(zfs get -Hp -o value userquota@0 "$TESTPOOL/$TESTFS/deleg_root/child")
+if ! within_percent "$actual_uq" $((50 * 1048576)) 99; then
+ log_fail "Userquota not set correctly: expected ~50M, got $actual_uq"
+fi
+log_note "Userquota set successfully ($actual_uq bytes)"
+
+# Test 2: Set groupquota from user namespace
+log_note "Test 2: Setting groupquota from user namespace..."
+typeset gq_result
+gq_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set groupquota@0=100M "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+typeset gq_status=$?
+
+if [[ $gq_status -ne 0 ]]; then
+ log_note "Set groupquota output: $gq_result"
+ log_fail "Failed to set groupquota from user namespace"
+fi
+
+# Verify groupquota was set (use -p for parseable/raw bytes)
+typeset actual_gq
+actual_gq=$(zfs get -Hp -o value groupquota@0 "$TESTPOOL/$TESTFS/deleg_root/child")
+if ! within_percent "$actual_gq" $((100 * 1048576)) 99; then
+ log_fail "Groupquota not set correctly: expected ~100M, got $actual_gq"
+fi
+log_note "Groupquota set successfully ($actual_gq bytes)"
+
+# Test 3: Set userquota on delegation root itself
+log_note "Test 3: Setting userquota on delegation root..."
+typeset root_uq_result
+root_uq_result=$(run_in_userns "$ZONED_TEST_UID" \
+ set userquota@0=200M "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+typeset root_uq_status=$?
+
+if [[ $root_uq_status -ne 0 ]]; then
+ log_note "Set userquota on root output: $root_uq_result"
+ log_fail "Failed to set userquota on delegation root"
+fi
+
+typeset actual_root_uq
+actual_root_uq=$(zfs get -Hp -o value userquota@0 "$TESTPOOL/$TESTFS/deleg_root")
+if ! within_percent "$actual_root_uq" $((200 * 1048576)) 99; then
+ log_fail "Root userquota not set correctly: expected ~200M, got $actual_root_uq"
+fi
+log_note "Delegation root userquota set successfully ($actual_root_uq bytes)"
+
+log_pass "Authorized user namespace can set userquota/groupquota on delegated datasets"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_014_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_014_pos.ksh
new file mode 100755
index 000000000000..131addf6aa96
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_014_pos.ksh
@@ -0,0 +1,116 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that an authorized user namespace can create sub-datasets
+# (grandchildren) under a delegation root. The zoned_uid property
+# must inherit so that children of children are also authorized.
+#
+# STRATEGY:
+# 1. Create a delegation root and set zoned_uid
+# 2. From user namespace, create a child dataset
+# 3. From user namespace, create a grandchild under the child
+# 4. Verify the grandchild exists and is visible from the namespace
+# 5. Verify zoned_uid inherited to both child and grandchild
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Authorized user namespace can create sub-datasets (grandchildren)"
+log_onexit cleanup
+
+# Create delegation root and set zoned_uid
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount
+log_note "Delegation root created with zoned_uid=$ZONED_TEST_UID"
+
+# Step 1: Create child from user namespace
+typeset create_result
+create_result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+create_status=$?
+
+if [[ $create_status -ne 0 ]]; then
+ log_note "Create child output: $create_result"
+ log_fail "Failed to create child dataset (status=$create_status)"
+fi
+log_note "Child dataset created successfully"
+
+# Step 2: Create grandchild from user namespace
+typeset grandchild_result
+grandchild_result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child/grandchild" 2>&1)
+grandchild_status=$?
+
+if [[ $grandchild_status -ne 0 ]]; then
+ log_note "Create grandchild output: $grandchild_result"
+ log_fail "Failed to create grandchild dataset (status=$grandchild_status)"
+fi
+log_note "Grandchild dataset created successfully"
+
+# Step 3: Verify both exist from global zone
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child"
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child/grandchild"
+log_note "Both datasets verified from global zone"
+
+# Step 4: Verify grandchild is visible from user namespace
+typeset list_result
+list_result=$(run_in_userns "$ZONED_TEST_UID" \
+ list "$TESTPOOL/$TESTFS/deleg_root/child/grandchild" 2>&1)
+list_status=$?
+
+if [[ $list_status -ne 0 ]]; then
+ log_note "List output: $list_result"
+ log_fail "Grandchild not visible from user namespace"
+fi
+log_note "Grandchild visible from user namespace"
+
+# Step 5: Verify zoned_uid inherited to child and grandchild
+typeset child_uid
+child_uid=$(get_zoned_uid "$TESTPOOL/$TESTFS/deleg_root/child")
+typeset grandchild_uid
+grandchild_uid=$(get_zoned_uid "$TESTPOOL/$TESTFS/deleg_root/child/grandchild")
+
+if [[ "$child_uid" != "$ZONED_TEST_UID" ]]; then
+ log_fail "zoned_uid not inherited to child: expected $ZONED_TEST_UID, got $child_uid"
+fi
+if [[ "$grandchild_uid" != "$ZONED_TEST_UID" ]]; then
+ log_fail "zoned_uid not inherited to grandchild: expected $ZONED_TEST_UID, got $grandchild_uid"
+fi
+log_note "zoned_uid correctly inherited to child ($child_uid) and grandchild ($grandchild_uid)"
+
+log_pass "Authorized user namespace can create sub-datasets (grandchildren)"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_015_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_015_pos.ksh
new file mode 100755
index 000000000000..9c5ad675e766
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_015_pos.ksh
@@ -0,0 +1,114 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that destroying and recreating a pool with zoned_uid works
+# without stale kernel state. Exercises the spa_export_os() cleanup
+# path that must detach zone_uid_datasets entries on pool destroy.
+#
+# STRATEGY:
+# 1. Create a delegation root with zoned_uid set
+# 2. Create child datasets with inherited zoned_uid
+# 3. Verify delegation works (create from namespace)
+# 4. Destroy the pool
+# 5. Recreate the pool with same zoned_uid
+# 6. Verify delegation works again on the new pool
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ if poolexists "$TESTPOOL"; then
+ # Ensure pool is in a clean state
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+ else
+ # Pool was destroyed by test; recreate it for the framework
+ DISK=${DISKS%% *}
+ default_setup_noexit "$DISK"
+ fi
+}
+
+log_assert "Pool destroy/recreate with zoned_uid works without stale state"
+log_onexit cleanup
+
+# Step 1-2: Create delegation root with children
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child1"
+
+log_note "Created delegation root with child, zoned_uid=$ZONED_TEST_UID"
+
+# Step 3: Verify delegation works
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/ns_child" 2>&1)
+typeset status=$?
+
+if [[ $status -ne 0 ]]; then
+ log_note "Create output: $result"
+ log_fail "Initial delegation failed (status=$status)"
+fi
+log_note "Initial delegation works: created ns_child from namespace"
+
+# Step 4: Destroy the pool
+log_must zpool destroy "$TESTPOOL"
+
+log_note "Pool destroyed"
+
+# Step 5: Recreate the pool with same zoned_uid
+DISK=${DISKS%% *}
+log_must zpool create -f "$TESTPOOL" "$DISK"
+log_must zfs create "$TESTPOOL/$TESTFS"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount
+
+log_note "Pool recreated with zoned_uid=$ZONED_TEST_UID"
+
+# Step 6: Verify delegation works again on the new pool
+typeset result2
+result2=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/ns_child2" 2>&1)
+typeset status2=$?
+
+if [[ $status2 -ne 0 ]]; then
+ log_note "Create output after recreate: $result2"
+ log_fail "Delegation failed after pool destroy/recreate (status=$status2)"
+fi
+
+# Verify the dataset exists
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/ns_child2"
+log_note "Delegation works after pool destroy/recreate"
+
+log_pass "Pool destroy/recreate with zoned_uid works without stale state"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_016_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_016_pos.ksh
new file mode 100755
index 000000000000..aeb97e20d58f
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_016_pos.ksh
@@ -0,0 +1,132 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that snapshots can be individually destroyed from within a
+# delegated user namespace. Covers the zone_dataset_check_list()
+# visibility fix for '@' separator.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. From namespace: create child, create snapshot on child
+# 3. From namespace: verify snapshot is visible via zfs list -t snapshot
+# 4. From namespace: destroy snapshot individually (zfs destroy ds@snap)
+# 5. Verify snapshot is gone
+# 6. From namespace: create snapshot on delegation root itself
+# 7. From namespace: destroy that snapshot individually
+# 8. Verify snapshot is gone
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Individual snapshot destroy works from delegated user namespace"
+log_onexit cleanup
+
+# Step 1: Create delegation root
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount,snapshot,destroy
+
+# Step 2: Create child and snapshot from namespace
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create child output: $result"
+ log_fail "Failed to create child from namespace"
+fi
+
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create snapshot output: $result"
+ log_fail "Failed to create snapshot from namespace"
+fi
+
+log_note "Created child1@snap1 from namespace"
+
+# Step 3: Verify snapshot is visible from namespace
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ list -t snapshot "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "List snapshot output: $result"
+ log_fail "Snapshot not visible from namespace"
+fi
+log_note "Snapshot visible from namespace"
+
+# Step 4: Destroy snapshot individually from namespace
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>&1)
+typeset status=$?
+
+if [[ $status -ne 0 ]]; then
+ log_note "Destroy snapshot output: $result"
+ log_fail "Failed to destroy individual snapshot from namespace (status=$status)"
+fi
+
+# Step 5: Verify snapshot is gone
+if zfs list -t snapshot "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>/dev/null; then
+ log_fail "Snapshot child1@snap1 should have been destroyed"
+fi
+log_note "child1@snap1 destroyed successfully from namespace"
+
+# Step 6: Create snapshot on delegation root itself, then destroy it
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root@rootsnap" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create root snapshot output: $result"
+ log_fail "Failed to create snapshot on delegation root"
+fi
+
+log_note "Created deleg_root@rootsnap from namespace"
+
+# Step 7: Destroy the root snapshot individually from namespace
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root@rootsnap" 2>&1)
+status=$?
+
+if [[ $status -ne 0 ]]; then
+ log_note "Destroy root snapshot output: $result"
+ log_fail "Failed to destroy root snapshot from namespace (status=$status)"
+fi
+
+# Step 8: Verify root snapshot is gone
+if zfs list -t snapshot "$TESTPOOL/$TESTFS/deleg_root@rootsnap" 2>/dev/null; then
+ log_fail "Snapshot deleg_root@rootsnap should have been destroyed"
+fi
+log_note "deleg_root@rootsnap destroyed successfully from namespace"
+
+log_pass "Individual snapshot destroy works from delegated user namespace"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_017_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_017_neg.ksh
new file mode 100755
index 000000000000..40c314dcd986
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_017_neg.ksh
@@ -0,0 +1,125 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that a namespace user cannot modify the zoned_uid property,
+# even on datasets they have delegation over. Only the global zone
+# admin should be able to manage delegation assignments.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid=$ZONED_TEST_UID
+# 2. Create child dataset (inherits zoned_uid)
+# 3. From namespace: attempt zfs set zoned_uid=none on child (should FAIL)
+# 4. Verify zoned_uid still inherited on child
+# 5. From namespace: attempt zfs set zoned_uid=$ZONED_OTHER_UID (should FAIL)
+# 6. From namespace: attempt zfs set zoned_uid=$ZONED_TEST_UID (should FAIL)
+# 7. From namespace: attempt zfs set zoned_uid=none on root (should FAIL)
+# 8. Verify delegation root still has original zoned_uid
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Namespace user cannot modify zoned_uid property"
+log_onexit cleanup
+
+# Step 1-2: Create delegation root and child
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Created delegation root and child with zoned_uid=$ZONED_TEST_UID"
+
+# Step 3: Attempt to clear zoned_uid on child from namespace (should FAIL)
+log_note "Test 1: Attempting zfs set zoned_uid=none on child from namespace..."
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set zoned_uid=none "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+typeset status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "Setting zoned_uid=none on child should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 4: Verify zoned_uid still inherited on child
+typeset child_uid
+child_uid=$(get_zoned_uid "$TESTPOOL/$TESTFS/deleg_root/child")
+if [[ "$child_uid" != "$ZONED_TEST_UID" ]]; then
+ log_fail "Child zoned_uid changed to '$child_uid', expected '$ZONED_TEST_UID'"
+fi
+log_note "Child zoned_uid still $ZONED_TEST_UID (inherited)"
+
+# Step 5: Attempt to change zoned_uid to different UID (should FAIL)
+log_note "Test 2: Attempting zfs set zoned_uid=$ZONED_OTHER_UID on child..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set "zoned_uid=$ZONED_OTHER_UID" "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "Setting zoned_uid to different UID should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 6: Attempt to set zoned_uid to same UID (should still FAIL)
+log_note "Test 3: Attempting zfs set zoned_uid=$ZONED_TEST_UID on child..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set "zoned_uid=$ZONED_TEST_UID" "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "Setting zoned_uid (even to same value) should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 7: Attempt to clear zoned_uid on delegation root (should FAIL)
+log_note "Test 4: Attempting zfs set zoned_uid=none on delegation root..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set zoned_uid=none "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "Setting zoned_uid=none on delegation root should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 8: Verify delegation root still has original zoned_uid
+typeset root_uid
+root_uid=$(get_zoned_uid "$TESTPOOL/$TESTFS/deleg_root")
+if [[ "$root_uid" != "$ZONED_TEST_UID" ]]; then
+ log_fail "Root zoned_uid changed to '$root_uid', expected '$ZONED_TEST_UID'"
+fi
+log_note "Delegation root zoned_uid still $ZONED_TEST_UID"
+
+log_pass "Namespace user cannot modify zoned_uid property"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_018_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_018_pos.ksh
new file mode 100755
index 000000000000..770b6bbcabcf
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_018_pos.ksh
@@ -0,0 +1,129 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that clone operations work from within a delegated user
+# namespace, and that cloning outside the subtree is denied.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. From namespace: create child dataset
+# 3. From namespace: create snapshot on child
+# 4. From namespace: clone the snapshot to a new dataset within subtree
+# 5. Verify clone exists and is writable
+# 6. From namespace: attempt to clone outside the subtree (should FAIL)
+# 7. Verify the failed clone doesn't exist
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+ zfs destroy -rf "$TESTPOOL/$TESTFS/outside_clone" 2>/dev/null
+}
+
+log_assert "Clone operations work from delegated user namespace"
+log_onexit cleanup
+
+# Step 1: Create delegation root
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount,snapshot,clone
+
+# Step 2: Create child from namespace
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create child output: $result"
+ log_fail "Failed to create child from namespace"
+fi
+
+# Step 3: Create snapshot from namespace
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create snapshot output: $result"
+ log_fail "Failed to create snapshot from namespace"
+fi
+
+log_note "Created child@snap1 from namespace"
+
+# Step 4: Clone snapshot to new dataset within subtree
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ clone "$TESTPOOL/$TESTFS/deleg_root/child@snap1" \
+ "$TESTPOOL/$TESTFS/deleg_root/myclone" 2>&1)
+typeset status=$?
+
+if [[ $status -ne 0 ]]; then
+ log_note "Clone output: $result"
+ log_fail "Failed to clone within subtree from namespace (status=$status)"
+fi
+
+# Step 5: Verify clone exists and is writable
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/myclone"
+
+typeset origin
+origin=$(zfs get -H -o value origin "$TESTPOOL/$TESTFS/deleg_root/myclone")
+if [[ "$origin" != "$TESTPOOL/$TESTFS/deleg_root/child@snap1" ]]; then
+ log_fail "Clone origin should be child@snap1, got: $origin"
+fi
+log_note "Clone exists with correct origin"
+
+# Verify writable: create a child under the clone from namespace
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/myclone/subchild" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create under clone output: $result"
+ log_fail "Clone is not writable from namespace"
+fi
+log_note "Clone is writable from namespace"
+
+# Step 6: Attempt to clone outside the subtree (should FAIL)
+log_note "Attempting clone to outside subtree..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ clone "$TESTPOOL/$TESTFS/deleg_root/child@snap1" \
+ "$TESTPOOL/$TESTFS/outside_clone" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "Clone to outside subtree should have been denied"
+fi
+log_note "Correctly denied clone to outside subtree: $result"
+
+# Step 7: Verify the failed clone doesn't exist
+if datasetexists "$TESTPOOL/$TESTFS/outside_clone"; then
+ log_fail "Outside clone should not exist"
+fi
+log_note "Outside clone correctly does not exist"
+
+log_pass "Clone operations work from delegated user namespace"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_019_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_019_neg.ksh
new file mode 100755
index 000000000000..60393758357d
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_019_neg.ksh
@@ -0,0 +1,141 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that two different UIDs with sibling delegations cannot
+# access each other's subtrees (multi-UID isolation).
+#
+# STRATEGY:
+# 1. Create two sibling delegation roots with different zoned_uids
+# 2. Create a child under each from global zone
+# 3. From UID A's namespace: verify can create under deleg_root_a
+# 4. From UID A's namespace: attempt create under deleg_root_b (FAIL)
+# 5. From UID A's namespace: attempt destroy child under deleg_root_b (FAIL)
+# 6. From UID A's namespace: attempt set property on deleg_root_b/child (FAIL)
+# 7. From UID B's namespace: verify can create under deleg_root_b
+# 8. From UID B's namespace: attempt create under deleg_root_a (FAIL)
+# 9. Verify both subtrees remain intact
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root_a" 2>/dev/null
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root_b" 2>/dev/null
+}
+
+log_assert "Multi-UID isolation: sibling delegations cannot cross boundaries"
+log_onexit cleanup
+
+# Step 1: Create two sibling delegation roots
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root_a"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root_a" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root_a" "$ZONED_TEST_UID" \
+ create,mount
+
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root_b"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root_b" "$ZONED_OTHER_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root_b" "$ZONED_OTHER_UID" \
+ create,mount
+
+# Step 2: Create a child under each from global zone
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root_a/child_a"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root_b/child_b"
+
+log_note "Created two delegation roots: A(uid=$ZONED_TEST_UID) B(uid=$ZONED_OTHER_UID)"
+
+# Step 3: UID A can create under its own subtree
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root_a/ns_child_a" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create output: $result"
+ log_fail "UID A should be able to create under deleg_root_a"
+fi
+log_note "UID A can create under its own subtree"
+
+# Step 4: UID A cannot create under UID B's subtree
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root_b/intruder_a" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "UID A should NOT be able to create under deleg_root_b"
+fi
+log_note "UID A correctly denied create under deleg_root_b"
+
+# Step 5: UID A cannot destroy child under UID B's subtree
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root_b/child_b" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "UID A should NOT be able to destroy under deleg_root_b"
+fi
+log_note "UID A correctly denied destroy under deleg_root_b"
+
+# Step 6: UID A cannot set property on UID B's subtree
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set mountpoint=none "$TESTPOOL/$TESTFS/deleg_root_b/child_b" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "UID A should NOT be able to set properties on deleg_root_b"
+fi
+log_note "UID A correctly denied setprop on deleg_root_b"
+
+# Step 7: UID B can create under its own subtree
+result=$(run_in_userns "$ZONED_OTHER_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root_b/ns_child_b" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create output: $result"
+ log_fail "UID B should be able to create under deleg_root_b"
+fi
+log_note "UID B can create under its own subtree"
+
+# Step 8: UID B cannot create under UID A's subtree
+result=$(run_in_userns "$ZONED_OTHER_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root_a/intruder_b" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "UID B should NOT be able to create under deleg_root_a"
+fi
+log_note "UID B correctly denied create under deleg_root_a"
+
+# Step 9: Verify both subtrees remain intact
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root_a/child_a"
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root_a/ns_child_a"
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root_b/child_b"
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root_b/ns_child_b"
+
+# Verify intruder datasets don't exist
+if datasetexists "$TESTPOOL/$TESTFS/deleg_root_b/intruder_a"; then
+ log_fail "Intruder dataset from UID A should not exist"
+fi
+if datasetexists "$TESTPOOL/$TESTFS/deleg_root_a/intruder_b"; then
+ log_fail "Intruder dataset from UID B should not exist"
+fi
+log_note "Both subtrees intact, no cross-contamination"
+
+log_pass "Multi-UID isolation: sibling delegations cannot cross boundaries"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_020_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_020_neg.ksh
new file mode 100755
index 000000000000..4de33b30e545
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_020_neg.ksh
@@ -0,0 +1,171 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that operations without zone_dataset_admin_check() integration
+# are denied from a delegated namespace. These operations go through
+# zfs_dozonecheck_impl() which requires zoned=on (not set in the
+# zoned_uid-only flow), so they should all fail with EPERM.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid, create child, create snapshot
+# 2. From namespace: attempt zfs send (should FAIL)
+# 3. From namespace: attempt zfs rollback (should FAIL)
+# 4. From namespace: attempt zfs hold (should FAIL)
+# 5. From namespace: attempt zfs bookmark (should FAIL)
+# 6. From namespace: attempt zfs allow (should FAIL)
+# 7. From namespace: attempt zfs promote on a clone (should FAIL)
+# 8. Verify dataset state unchanged
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Operations without admin_check integration are denied from namespace"
+log_onexit cleanup
+
+# Step 1: Setup — create delegation root, child, snapshot, and clone
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount,snapshot
+
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create child output: $result"
+ log_fail "Failed to create child from namespace"
+fi
+
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create snapshot output: $result"
+ log_fail "Failed to create snapshot from namespace"
+fi
+
+# Create a clone from global zone for promote test
+log_must zfs clone "$TESTPOOL/$TESTFS/deleg_root/child@snap1" \
+ "$TESTPOOL/$TESTFS/deleg_root/myclone"
+
+log_note "Setup complete: child, child@snap1, myclone"
+
+# Step 2: Attempt zfs send (should FAIL)
+log_note "Test 1: zfs send from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ send "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+typeset status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "zfs send should have been denied from namespace"
+fi
+log_note "Correctly denied: zfs send (status=$status)"
+
+# Step 3: Attempt zfs rollback (should FAIL)
+log_note "Test 2: zfs rollback from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ rollback "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "zfs rollback should have been denied from namespace"
+fi
+log_note "Correctly denied: zfs rollback (status=$status)"
+
+# Step 4: Attempt zfs hold (should FAIL)
+log_note "Test 3: zfs hold from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ hold mytag "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "zfs hold should have been denied from namespace"
+fi
+log_note "Correctly denied: zfs hold (status=$status)"
+
+# Step 5: Attempt zfs bookmark (should FAIL)
+log_note "Test 4: zfs bookmark from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ bookmark "$TESTPOOL/$TESTFS/deleg_root/child@snap1" \
+ "$TESTPOOL/$TESTFS/deleg_root/child#bmark1" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "zfs bookmark should have been denied from namespace"
+fi
+log_note "Correctly denied: zfs bookmark (status=$status)"
+
+# Step 6: Attempt zfs allow (should FAIL)
+log_note "Test 5: zfs allow from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ allow -e create "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "zfs allow should have been denied from namespace"
+fi
+log_note "Correctly denied: zfs allow (status=$status)"
+
+# Step 7: Attempt zfs promote (should FAIL)
+log_note "Test 6: zfs promote from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ promote "$TESTPOOL/$TESTFS/deleg_root/myclone" 2>&1)
+status=$?
+
+if [[ $status -eq 0 ]]; then
+ log_fail "zfs promote should have been denied from namespace"
+fi
+log_note "Correctly denied: zfs promote (status=$status)"
+
+# Step 8: Verify dataset state unchanged
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child"
+log_must zfs list -t snapshot "$TESTPOOL/$TESTFS/deleg_root/child@snap1"
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/myclone"
+
+# Verify no holds were placed
+typeset holds
+holds=$(zfs holds "$TESTPOOL/$TESTFS/deleg_root/child@snap1" 2>&1 | wc -l)
+if [[ $holds -gt 1 ]]; then
+ log_fail "Unexpected holds found on snapshot"
+fi
+
+# Verify no bookmarks were created
+if zfs list -t bookmark "$TESTPOOL/$TESTFS/deleg_root/child#bmark1" 2>/dev/null; then
+ log_fail "Bookmark should not exist"
+fi
+
+log_note "All datasets unchanged after denied operations"
+
+log_pass "Operations without admin_check integration are denied from namespace"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_021_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_021_neg.ksh
new file mode 100755
index 000000000000..6a3a7c3030c5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_021_neg.ksh
@@ -0,0 +1,109 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that the 'zoned' property cannot be modified from within
+# a delegated namespace. The ZFS_PROP_ZONED case blocks this via
+# !INGLOBALZONE(curproc), but this is never tested in the zoned_uid
+# delegation context.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. Create child dataset
+# 3. From namespace: attempt zfs set zoned=on on child (should FAIL)
+# 4. From namespace: attempt zfs set zoned=off on child (should FAIL)
+# 5. From namespace: attempt zfs set zoned=on on delegation root (FAIL)
+# 6. Verify zoned property unchanged on all datasets
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Cannot set 'zoned' property from delegated namespace"
+log_onexit cleanup
+
+# Step 1-2: Create delegation root and child
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+log_note "Created delegation root and child"
+
+# Record original zoned values
+typeset orig_root_zoned orig_child_zoned
+orig_root_zoned=$(zfs get -H -o value zoned "$TESTPOOL/$TESTFS/deleg_root")
+orig_child_zoned=$(zfs get -H -o value zoned "$TESTPOOL/$TESTFS/deleg_root/child")
+
+# Step 3: Attempt zfs set zoned=on on child (should FAIL)
+log_note "Test 1: zfs set zoned=on on child from namespace..."
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set zoned=on "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Setting zoned=on on child should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 4: Attempt zfs set zoned=off on child (should FAIL)
+log_note "Test 2: zfs set zoned=off on child from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set zoned=off "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Setting zoned=off on child should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 5: Attempt zfs set zoned=on on delegation root (should FAIL)
+log_note "Test 3: zfs set zoned=on on delegation root from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set zoned=on "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Setting zoned=on on delegation root should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 6: Verify zoned property unchanged on all datasets
+typeset cur_root_zoned cur_child_zoned
+cur_root_zoned=$(zfs get -H -o value zoned "$TESTPOOL/$TESTFS/deleg_root")
+cur_child_zoned=$(zfs get -H -o value zoned "$TESTPOOL/$TESTFS/deleg_root/child")
+
+if [[ "$cur_root_zoned" != "$orig_root_zoned" ]]; then
+ log_fail "Root zoned changed from '$orig_root_zoned' to '$cur_root_zoned'"
+fi
+if [[ "$cur_child_zoned" != "$orig_child_zoned" ]]; then
+ log_fail "Child zoned changed from '$orig_child_zoned' to '$cur_child_zoned'"
+fi
+log_note "zoned property unchanged on all datasets"
+
+log_pass "Cannot set 'zoned' property from delegated namespace"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_022_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_022_neg.ksh
new file mode 100755
index 000000000000..cf1775e5dbb9
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_022_neg.ksh
@@ -0,0 +1,154 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that delegated users cannot override filesystem_limit and
+# snapshot_limit set by the global admin on the delegation root.
+# Delegated users CAN set tighter sub-limits on child datasets.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. Global admin: set filesystem_limit=10, snapshot_limit=5
+# 3. From namespace: attempt filesystem_limit=none on root (FAIL)
+# 4. From namespace: attempt snapshot_limit=none on root (FAIL)
+# 5. Verify limits unchanged on delegation root
+# 6. From namespace: create child dataset
+# 7. From namespace: set filesystem_limit=3 on child (SUCCEED)
+# 8. From namespace: set snapshot_limit=2 on child (SUCCEED)
+# 9. Verify child has the sub-limits set
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Delegated user cannot override admin limits on delegation root"
+log_onexit cleanup
+
+# Step 1: Create delegation root
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ create,mount,filesystem_limit,snapshot_limit
+
+# Step 2: Global admin sets limits
+log_must zfs set filesystem_limit=10 "$TESTPOOL/$TESTFS/deleg_root"
+log_must zfs set snapshot_limit=5 "$TESTPOOL/$TESTFS/deleg_root"
+
+log_note "Admin set filesystem_limit=10, snapshot_limit=5 on delegation root"
+
+# Step 3: Attempt to remove filesystem_limit from namespace (should FAIL)
+log_note "Test 1: filesystem_limit=none on root from namespace..."
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set filesystem_limit=none "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Removing filesystem_limit on root should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Also try raising the limit
+log_note "Test 2: filesystem_limit=100 on root from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set filesystem_limit=100 "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Raising filesystem_limit on root should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 4: Attempt to remove snapshot_limit from namespace (should FAIL)
+log_note "Test 3: snapshot_limit=none on root from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set snapshot_limit=none "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Removing snapshot_limit on root should have been denied"
+fi
+log_note "Correctly denied: $result"
+
+# Step 5: Verify limits unchanged
+typeset fs_limit snap_limit
+fs_limit=$(get_prop filesystem_limit "$TESTPOOL/$TESTFS/deleg_root")
+snap_limit=$(get_prop snapshot_limit "$TESTPOOL/$TESTFS/deleg_root")
+
+if [[ "$fs_limit" != "10" ]]; then
+ log_fail "filesystem_limit changed to '$fs_limit', expected '10'"
+fi
+if [[ "$snap_limit" != "5" ]]; then
+ log_fail "snapshot_limit changed to '$snap_limit', expected '5'"
+fi
+log_note "Admin limits unchanged on delegation root"
+
+# Step 6: Create child from namespace
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Create child output: $result"
+ log_fail "Failed to create child from namespace"
+fi
+
+# Step 7: Set filesystem_limit on child (should SUCCEED - tighter sub-limit)
+log_note "Test 4: filesystem_limit=3 on child from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set filesystem_limit=3 "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+typeset status=$?
+if [[ $status -ne 0 ]]; then
+ log_note "Set filesystem_limit on child output: $result"
+ log_fail "Setting filesystem_limit on child should succeed (status=$status)"
+fi
+
+# Step 8: Set snapshot_limit on child (should SUCCEED)
+log_note "Test 5: snapshot_limit=2 on child from namespace..."
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ set snapshot_limit=2 "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+status=$?
+if [[ $status -ne 0 ]]; then
+ log_note "Set snapshot_limit on child output: $result"
+ log_fail "Setting snapshot_limit on child should succeed (status=$status)"
+fi
+
+# Step 9: Verify child has the sub-limits
+typeset child_fs_limit child_snap_limit
+child_fs_limit=$(get_prop filesystem_limit \
+ "$TESTPOOL/$TESTFS/deleg_root/child")
+child_snap_limit=$(get_prop snapshot_limit \
+ "$TESTPOOL/$TESTFS/deleg_root/child")
+
+if [[ "$child_fs_limit" != "3" ]]; then
+ log_fail "Child filesystem_limit should be 3, got: $child_fs_limit"
+fi
+if [[ "$child_snap_limit" != "2" ]]; then
+ log_fail "Child snapshot_limit should be 2, got: $child_snap_limit"
+fi
+log_note "Child has correct sub-limits: filesystem_limit=3, snapshot_limit=2"
+
+log_pass "Delegated user cannot override admin limits on delegation root"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_023_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_023_pos.ksh
new file mode 100755
index 000000000000..9cdc73aa72db
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_023_pos.ksh
@@ -0,0 +1,131 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Additive least privilege: non-destructive operations (create, snapshot,
+# setprop) succeed only when BOTH dsl_deleg grants the permission AND
+# the namespace has at least CAP_FOWNER.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. Grant create,snapshot,mount via zfs allow
+# 3. With CAP_FOWNER: create succeeds (L1 yes + L2 yes)
+# 4. With no caps: create fails (L1 yes, L2 no)
+# 5. Without zfs allow grant: create fails even with CAP_FOWNER (L1 no)
+# 6. With CAP_FOWNER + snapshot grant: snapshot succeeds
+# 7. With CAP_FOWNER + create grant only: snapshot fails (wrong perm)
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Additive L1+L2: non-destructive ops need dsl_deleg AND CAP_FOWNER"
+log_onexit cleanup
+
+# Step 1: Create delegation root.
+# Use mountpoint=none so create/snapshot from the namespace don't
+# trigger mount operations that would fail without CAP_SYS_ADMIN.
+log_must zfs create -o mountpoint=none "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+
+# Step 2: Grant create,snapshot,mount
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,snapshot,mount"
+
+# ADD-1: L1 grants create + L2 has CAP_FOWNER → allowed
+log_note "Test ADD-1: create with dsl_deleg + CAP_FOWNER"
+typeset result
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ create "$TESTPOOL/$TESTFS/deleg_root/add1_child" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "ADD-1: create should succeed with dsl_deleg + CAP_FOWNER"
+fi
+log_note "ADD-1 passed: create allowed"
+
+# ADD-2: L1 grants create + L2 has no caps → denied
+log_note "Test ADD-2: create with dsl_deleg + no caps"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ create "$TESTPOOL/$TESTFS/deleg_root/add2_child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-2: create should fail without capabilities"
+fi
+log_note "ADD-2 passed: create denied without caps"
+
+# Verify the dataset was NOT created
+if datasetexists "$TESTPOOL/$TESTFS/deleg_root/add2_child"; then
+ log_fail "ADD-2: dataset should not exist"
+fi
+
+# ADD-3: No dsl_deleg grant + CAP_FOWNER → denied
+log_note "Test ADD-3: create without dsl_deleg grant"
+log_must revoke_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ create "$TESTPOOL/$TESTFS/deleg_root/add3_child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-3: create should fail without dsl_deleg grant"
+fi
+log_note "ADD-3 passed: create denied without dsl_deleg"
+
+if datasetexists "$TESTPOOL/$TESTFS/deleg_root/add3_child"; then
+ log_fail "ADD-3: dataset should not exist"
+fi
+
+# ADD-5: Restore grants, test snapshot with CAP_FOWNER
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,snapshot,mount"
+
+log_note "Test ADD-5: snapshot with dsl_deleg + CAP_FOWNER"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/add1_child@snap1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "ADD-5: snapshot should succeed with dsl_deleg + CAP_FOWNER"
+fi
+log_note "ADD-5 passed: snapshot allowed"
+
+# ADD-6: create grant only, snapshot should fail (wrong perm)
+log_must revoke_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,mount"
+
+log_note "Test ADD-6: snapshot with create-only dsl_deleg"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/add1_child@snap_bad" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-6: snapshot should fail with create-only dsl_deleg"
+fi
+log_note "ADD-6 passed: snapshot denied (wrong perm in dsl_deleg)"
+
+log_pass "Additive L1+L2: non-destructive ops need dsl_deleg AND CAP_FOWNER"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_024_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_024_neg.ksh
new file mode 100755
index 000000000000..487b3baa99cf
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_024_neg.ksh
@@ -0,0 +1,144 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Additive least privilege: destructive operations (destroy, rename,
+# clone) require BOTH dsl_deleg grant AND CAP_SYS_ADMIN.
+# CAP_FOWNER alone is insufficient for destructive operations.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid and child datasets
+# 2. Grant destroy,rename,clone,mount,create via zfs allow
+# 3. With CAP_SYS_ADMIN + destroy grant: destroy succeeds
+# 4. With CAP_FOWNER + destroy grant: destroy fails (wrong cap tier)
+# 5. With CAP_SYS_ADMIN but no destroy grant: destroy fails (L1 no)
+# 6. With no caps + destroy grant: destroy fails (L2 no)
+# 7. Test rename similarly: needs SYS_ADMIN + rename grant
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Additive L1+L2: destructive ops need dsl_deleg AND CAP_SYS_ADMIN"
+log_onexit cleanup
+
+# Setup: delegation root with children.
+# Use mountpoint=none so datasets aren't mounted in the host namespace;
+# otherwise destroy from a user namespace fails because mount-locked
+# mounts (created by the host) cannot be unmounted from a child namespace.
+log_must zfs create -o mountpoint=none "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/victim1"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/victim2"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/victim3"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/victim4"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/rename_src"
+
+# Grant destructive permissions
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,destroy,rename,clone,mount,snapshot"
+
+# ADD-8: destroy with dsl_deleg + CAP_SYS_ADMIN → allowed
+log_note "Test ADD-8: destroy with dsl_deleg + CAP_SYS_ADMIN"
+typeset result
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "all" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/victim1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "ADD-8: destroy should succeed with dsl_deleg + CAP_SYS_ADMIN"
+fi
+if datasetexists "$TESTPOOL/$TESTFS/deleg_root/victim1"; then
+ log_fail "ADD-8: victim1 should not exist after destroy"
+fi
+log_note "ADD-8 passed: destroy allowed with SYS_ADMIN"
+
+# ADD-9: destroy with dsl_deleg + CAP_FOWNER → denied (wrong tier)
+log_note "Test ADD-9: destroy with dsl_deleg + CAP_FOWNER only"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/victim2" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-9: destroy should fail with CAP_FOWNER (needs SYS_ADMIN)"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/victim2"
+log_note "ADD-9 passed: destroy denied with CAP_FOWNER only"
+
+# ADD-10: destroy with dsl_deleg + no caps → denied
+log_note "Test ADD-10: destroy with dsl_deleg + no caps"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/victim3" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-10: destroy should fail without any capabilities"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/victim3"
+log_note "ADD-10 passed: destroy denied without caps"
+
+# ADD-11: destroy with CAP_SYS_ADMIN but NO dsl_deleg grant → denied
+log_note "Test ADD-11: destroy without dsl_deleg grant"
+log_must revoke_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "all" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/victim4" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-11: destroy should fail without dsl_deleg grant"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/victim4"
+log_note "ADD-11 passed: destroy denied without dsl_deleg"
+
+# ADD-12: rename with dsl_deleg + CAP_SYS_ADMIN → allowed
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,destroy,rename,clone,mount,snapshot"
+
+log_note "Test ADD-12: rename with dsl_deleg + CAP_SYS_ADMIN"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "all" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/rename_src" \
+ "$TESTPOOL/$TESTFS/deleg_root/rename_dst" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "ADD-12: rename should succeed with dsl_deleg + CAP_SYS_ADMIN"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/rename_dst"
+log_note "ADD-12 passed: rename allowed with SYS_ADMIN"
+
+# ADD-13: clone with dsl_deleg + CAP_FOWNER → denied (destructive tier)
+log_note "Test ADD-13: clone with dsl_deleg + CAP_FOWNER"
+# Create a snapshot to clone from
+log_must zfs snapshot "$TESTPOOL/$TESTFS/deleg_root/victim2@snap"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ clone "$TESTPOOL/$TESTFS/deleg_root/victim2@snap" \
+ "$TESTPOOL/$TESTFS/deleg_root/clone_dst" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "ADD-13: clone should fail with CAP_FOWNER (needs SYS_ADMIN)"
+fi
+log_note "ADD-13 passed: clone denied with CAP_FOWNER only"
+
+log_pass "Additive L1+L2: destructive ops need dsl_deleg AND CAP_SYS_ADMIN"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_025_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_025_pos.ksh
new file mode 100755
index 000000000000..77ecd6316315
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_025_pos.ksh
@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Read-only operations (list, get properties) require no capabilities
+# and no dsl_deleg grants. Visibility is controlled solely by the
+# zoned_uid delegation scoping.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid and a child
+# 2. No dsl_deleg grants, no capabilities
+# 3. From namespace: zfs list succeeds for delegated dataset
+# 4. From namespace: zfs get succeeds for delegated dataset
+# 5. From namespace: zfs list fails for non-delegated dataset
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Read-only operations need no caps and no dsl_deleg"
+log_onexit cleanup
+
+# Setup: delegation root with a child, NO zfs allow grants.
+# Use mountpoint=none to avoid mount-lock issues in user namespaces.
+log_must zfs create -o mountpoint=none "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+# ADD-14: list with no caps, no dsl_deleg → allowed (read-only)
+log_note "Test ADD-14: list delegated dataset with no caps"
+typeset result
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ list "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "ADD-14: list should succeed with no caps (read-only)"
+fi
+log_note "ADD-14 passed: list allowed"
+
+# Get properties with no caps → should work
+log_note "Test: get properties with no caps"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ get zoned_uid "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "get properties should succeed with no caps (read-only)"
+fi
+log_note "Get properties passed"
+
+# List child dataset with no caps → should work (child of delegation)
+log_note "Test: list child dataset with no caps"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ list "$TESTPOOL/$TESTFS/deleg_root/child" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "list child should succeed with no caps"
+fi
+log_note "List child passed"
+
+# Non-delegated dataset should NOT be visible
+log_note "Test: list non-delegated dataset from namespace"
+log_must zfs create "$TESTPOOL/$TESTFS/other_ds"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ list "$TESTPOOL/$TESTFS/other_ds" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Non-delegated dataset should not be visible"
+fi
+log_note "Non-delegated dataset correctly not visible"
+log_must zfs destroy "$TESTPOOL/$TESTFS/other_ds"
+
+log_pass "Read-only operations need no caps and no dsl_deleg"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_026_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_026_pos.ksh
new file mode 100755
index 000000000000..10913fbdb4ab
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_026_pos.ksh
@@ -0,0 +1,112 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# When pool delegation is disabled (zpool set delegation=off),
+# ALL zoned_uid write operations are denied regardless of
+# capabilities. Delegation OFF means the pool admin has opted
+# out of delegating access entirely (POLP).
+# Read-only operations (list, get) still succeed.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. Disable delegation on the pool
+# 3. DOFF-1: create with CAP_FOWNER → denied (delegation off)
+# 4. DOFF-2: destroy with CAP_SYS_ADMIN → denied (delegation off)
+# 5. DOFF-3: create with all caps → denied (delegation off)
+# 6. DOFF-4: list with no caps → allowed (read-only)
+# 7. Re-enable delegation
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ # Always re-enable delegation
+ log_must zpool set delegation=on "$TESTPOOL"
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Delegation OFF: all zoned_uid writes denied"
+log_onexit cleanup
+
+# Setup.
+# Use mountpoint=none to avoid mount-lock issues in user namespaces.
+log_must zfs create -o mountpoint=none "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/victim"
+
+# Disable delegation on pool
+log_must zpool set delegation=off "$TESTPOOL"
+log_note "Pool delegation disabled"
+
+# DOFF-1: create with CAP_FOWNER → denied (delegation off overrides caps)
+log_note "Test DOFF-1: create with CAP_FOWNER (delegation off)"
+typeset result
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ create "$TESTPOOL/$TESTFS/deleg_root/doff1_child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "DOFF-1: create should fail when delegation is off"
+fi
+log_note "DOFF-1 passed: create denied (delegation off)"
+
+# DOFF-2: destroy with CAP_SYS_ADMIN → denied (delegation off)
+log_note "Test DOFF-2: destroy with CAP_SYS_ADMIN (delegation off)"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "all" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/victim" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "DOFF-2: destroy should fail when delegation is off"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/victim"
+log_note "DOFF-2 passed: destroy denied (delegation off)"
+
+# DOFF-3: create with all caps → denied (delegation off)
+log_note "Test DOFF-3: create with all caps (delegation off)"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "all" \
+ create "$TESTPOOL/$TESTFS/deleg_root/doff3_child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "DOFF-3: create should fail when delegation is off"
+fi
+log_note "DOFF-3 passed: create denied (delegation off)"
+
+# DOFF-4: list with no caps → allowed (read-only)
+log_note "Test DOFF-4: list with no caps (delegation off)"
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ list "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "DOFF-4: list should succeed with no caps (read-only)"
+fi
+log_note "DOFF-4 passed"
+
+# Re-enable delegation
+log_must zpool set delegation=on "$TESTPOOL"
+
+log_pass "Delegation OFF: all zoned_uid writes denied"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_027_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_027_pos.ksh
new file mode 100755
index 000000000000..c753145f1544
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_027_pos.ksh
@@ -0,0 +1,103 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# CAP_SYS_ADMIN satisfies the L2 requirement for ALL operation tiers
+# (both non-destructive and destructive). This verifies that
+# SYS_ADMIN is a superset of FOWNER for L2 purposes.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid
+# 2. Grant all permissions via zfs allow
+# 3. With CAP_SYS_ADMIN: create succeeds (SYS_ADMIN covers FOWNER tier)
+# 4. With CAP_SYS_ADMIN: snapshot succeeds
+# 5. With CAP_SYS_ADMIN: destroy succeeds
+# 6. Verify CAP_SYS_ADMIN is a complete L2 pass for all ops
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "CAP_SYS_ADMIN satisfies L2 for all operation tiers"
+log_onexit cleanup
+
+# Setup
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,destroy,snapshot,rename,clone,mount"
+
+# ADD-7: create with dsl_deleg + CAP_SYS_ADMIN → allowed
+log_note "Test ADD-7: create with CAP_SYS_ADMIN"
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ create "$TESTPOOL/$TESTFS/deleg_root/child1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "ADD-7: create should succeed with SYS_ADMIN"
+fi
+log_note "ADD-7 passed: create allowed with SYS_ADMIN"
+
+# Snapshot with CAP_SYS_ADMIN
+log_note "Test: snapshot with CAP_SYS_ADMIN"
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ snapshot "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "snapshot should succeed with SYS_ADMIN"
+fi
+log_note "Snapshot passed with SYS_ADMIN"
+
+# Clone with CAP_SYS_ADMIN (destructive tier)
+log_note "Test: clone with CAP_SYS_ADMIN"
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ clone "$TESTPOOL/$TESTFS/deleg_root/child1@snap1" \
+ "$TESTPOOL/$TESTFS/deleg_root/clone1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "clone should succeed with SYS_ADMIN"
+fi
+log_note "Clone passed with SYS_ADMIN"
+
+# Destroy with CAP_SYS_ADMIN (destructive tier)
+log_note "Test: destroy with CAP_SYS_ADMIN"
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/clone1" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "destroy should succeed with SYS_ADMIN"
+fi
+log_note "Destroy passed with SYS_ADMIN"
+
+log_pass "CAP_SYS_ADMIN satisfies L2 for all operation tiers"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_028_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_028_neg.ksh
new file mode 100755
index 000000000000..f1dbed22d35c
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_028_neg.ksh
@@ -0,0 +1,103 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that the additive model does not affect non-zoned datasets.
+# Standard ZFS permission checks (secpolicy_zfs → dsl_deleg) continue
+# to work unchanged when zone_dataset_admin_check returns NOT_APPLICABLE.
+#
+# STRATEGY:
+# 1. Create a dataset WITHOUT zoned_uid
+# 2. From global zone as root: all operations succeed (existing behavior)
+# 3. Grant permissions to a non-root user via zfs allow
+# 4. As non-root user (not in namespace): operations succeed via dsl_deleg
+# 5. Without zfs allow grant: operations fail
+# 6. Verify zoned_uid model doesn't interfere
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/normal_ds" 2>/dev/null
+}
+
+log_assert "Non-zoned datasets use standard permission model unchanged"
+log_onexit cleanup
+
+# Create a normal dataset (no zoned_uid)
+log_must zfs create "$TESTPOOL/$TESTFS/normal_ds"
+
+# Verify zoned_uid is 0 (unset)
+typeset val
+val=$(get_zoned_uid "$TESTPOOL/$TESTFS/normal_ds")
+if [[ "$val" != "0" ]]; then
+ log_fail "Default zoned_uid should be 0, got: $val"
+fi
+
+# EXIST-1: Root in global zone can do everything (existing behavior)
+log_note "Test EXIST-1: root in global zone"
+log_must zfs create "$TESTPOOL/$TESTFS/normal_ds/child"
+log_must zfs snapshot "$TESTPOOL/$TESTFS/normal_ds/child@snap"
+log_must zfs destroy "$TESTPOOL/$TESTFS/normal_ds/child@snap"
+log_must zfs destroy "$TESTPOOL/$TESTFS/normal_ds/child"
+log_note "EXIST-1 passed: root can do everything"
+
+# EXIST-2: Non-root with zfs allow can perform delegated operations
+log_note "Test EXIST-2: non-root with zfs allow"
+log_must grant_deleg "$TESTPOOL/$TESTFS/normal_ds" "$ZONED_TEST_UID" \
+ "create,snapshot,mount,destroy"
+
+# Run as the test user (NOT in a namespace, just sudo -u)
+typeset zfs_cmd result
+zfs_cmd="$(which zfs)"
+result=$(sudo -u \#"$ZONED_TEST_UID" "$zfs_cmd" \
+ create "$TESTPOOL/$TESTFS/normal_ds/deleg_child" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "EXIST-2: non-root with zfs allow should be able to create"
+fi
+log_note "EXIST-2 passed: dsl_deleg works for non-root"
+
+# EXIST-3: Non-root WITHOUT zfs allow is denied
+log_note "Test EXIST-3: non-root without zfs allow"
+log_must revoke_deleg "$TESTPOOL/$TESTFS/normal_ds" "$ZONED_TEST_UID"
+
+result=$(sudo -u \#"$ZONED_TEST_UID" "$zfs_cmd" \
+ create "$TESTPOOL/$TESTFS/normal_ds/denied_child" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "EXIST-3: non-root without zfs allow should be denied"
+fi
+log_note "EXIST-3 passed: denied without dsl_deleg"
+
+# Cleanup the child we created
+log_must zfs destroy "$TESTPOOL/$TESTFS/normal_ds/deleg_child"
+
+log_pass "Non-zoned datasets use standard permission model unchanged"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_029_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_029_neg.ksh
new file mode 100755
index 000000000000..fa6ec3ce431d
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_029_neg.ksh
@@ -0,0 +1,120 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Cross-cutting constraints still apply under the additive model.
+# Even with full dsl_deleg grants AND CAP_SYS_ADMIN, certain
+# operations are always denied to protect the delegation boundary.
+#
+# STRATEGY:
+# 1. Create delegation root with full grants + CAP_SYS_ADMIN
+# 2. CROSS-1: Cannot destroy delegation root itself
+# 3. CROSS-2: Cannot rename dataset outside delegation subtree
+# 4. CROSS-3: Cannot modify zoned_uid property from namespace
+# 5. CROSS-4: Cannot override admin-set limits on delegation root
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+ zfs destroy -rf "$TESTPOOL/$TESTFS/outside" 2>/dev/null
+}
+
+log_assert "Cross-cutting constraints enforced under additive model"
+log_onexit cleanup
+
+# Setup: full permissions
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+log_must zfs create "$TESTPOOL/$TESTFS/outside"
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,destroy,snapshot,rename,clone,mount"
+log_must zfs set filesystem_limit=10 "$TESTPOOL/$TESTFS/deleg_root"
+log_must zfs set snapshot_limit=5 "$TESTPOOL/$TESTFS/deleg_root"
+
+# CROSS-1: Cannot destroy the delegation root itself
+log_note "Test CROSS-1: destroy delegation root"
+run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root" >/dev/null 2>&1
+if [[ $? -eq 0 ]]; then
+ log_fail "CROSS-1: should not be able to destroy delegation root"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root"
+log_note "CROSS-1 passed: delegation root protected"
+
+# CROSS-2: Cannot rename outside delegation subtree
+log_note "Test CROSS-2: rename outside subtree"
+run_in_userns "$ZONED_TEST_UID" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/child" \
+ "$TESTPOOL/$TESTFS/outside/escaped" >/dev/null 2>&1
+if [[ $? -eq 0 ]]; then
+ log_fail "CROSS-2: should not be able to rename outside subtree"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child"
+log_note "CROSS-2 passed: cannot escape delegation"
+
+# CROSS-3: Cannot modify zoned_uid from namespace
+log_note "Test CROSS-3: set zoned_uid from namespace"
+run_in_userns "$ZONED_TEST_UID" \
+ set zoned_uid=0 "$TESTPOOL/$TESTFS/deleg_root" >/dev/null 2>&1
+if [[ $? -eq 0 ]]; then
+ log_fail "CROSS-3: should not be able to modify zoned_uid"
+fi
+typeset val
+val=$(get_zoned_uid "$TESTPOOL/$TESTFS/deleg_root")
+if [[ "$val" != "$ZONED_TEST_UID" ]]; then
+ log_fail "CROSS-3: zoned_uid changed from $ZONED_TEST_UID to $val"
+fi
+log_note "CROSS-3 passed: zoned_uid protected"
+
+# CROSS-4: Cannot override admin limits on delegation root
+log_note "Test CROSS-4: override filesystem_limit on root"
+run_in_userns "$ZONED_TEST_UID" \
+ set filesystem_limit=none "$TESTPOOL/$TESTFS/deleg_root" >/dev/null 2>&1
+if [[ $? -eq 0 ]]; then
+ log_fail "CROSS-4: should not be able to remove admin limits"
+fi
+typeset fs_limit
+fs_limit=$(get_prop filesystem_limit "$TESTPOOL/$TESTFS/deleg_root")
+if [[ "$fs_limit" != "10" ]]; then
+ log_fail "CROSS-4: filesystem_limit changed to $fs_limit"
+fi
+
+run_in_userns "$ZONED_TEST_UID" \
+ set snapshot_limit=none "$TESTPOOL/$TESTFS/deleg_root" >/dev/null 2>&1
+if [[ $? -eq 0 ]]; then
+ log_fail "CROSS-4: should not be able to remove snapshot_limit"
+fi
+log_note "CROSS-4 passed: admin limits protected"
+
+log_pass "Cross-cutting constraints enforced under additive model"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_030_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_030_pos.ksh
new file mode 100755
index 000000000000..8536b36e2941
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_030_pos.ksh
@@ -0,0 +1,183 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Validate that the capability control mechanism (capsh --drop within
+# unshare --user --map-root-user) works correctly. This is a
+# prerequisite for all L2 capability-tier tests (023-027).
+#
+# The kernel's ns_capable() checks the effective capability set
+# within the user namespace. capsh --drop removes capabilities
+# from the bounding set, and the exec'd child process inherits
+# the restricted set.
+#
+# STRATEGY:
+# 1. Verify capsh is available
+# 2. Full caps (all): CAP_SYS_ADMIN present, CAP_FOWNER present
+# 3. Drop SYS_ADMIN only: CAP_SYS_ADMIN absent, CAP_FOWNER present
+# 4. Drop all: CAP_SYS_ADMIN absent, CAP_FOWNER absent
+# 5. Verify /proc/self/status CapEff reflects the drops
+# 6. Verify drops work under sudo -u (as test UID)
+#
+
+verify_runnable "global"
+
+log_assert "Capability control via capsh works in user namespaces"
+
+typeset capsh_cmd
+capsh_cmd="$(which capsh)"
+if [[ -z "$capsh_cmd" ]]; then
+ log_unsupported "capsh not found (install libcap)"
+fi
+
+# Helper: check a capability in a namespace
+function check_cap_in_ns
+{
+ typeset drop_arg=$1
+ typeset cap_to_check=$2
+ typeset expect=$3 # "yes" or "no"
+
+ typeset result cmd_args
+ if [[ "$drop_arg" == "none" ]]; then
+ cmd_args=""
+ else
+ cmd_args="$drop_arg"
+ fi
+
+ if [[ -z "$cmd_args" ]]; then
+ result=$(unshare --user --map-root-user \
+ "$capsh_cmd" --has-p="$cap_to_check" 2>&1 \
+ && echo "YES" || echo "NO")
+ else
+ # shellcheck disable=SC2086
+ result=$(unshare --user --map-root-user \
+ "$capsh_cmd" $cmd_args -- \
+ -c "$capsh_cmd --has-p=$cap_to_check 2>&1 && echo YES || echo NO")
+ fi
+
+ if [[ "$expect" == "yes" && "$result" != *"YES"* ]]; then
+ log_fail "Expected $cap_to_check to be present ($drop_arg), got: $result"
+ fi
+ if [[ "$expect" == "no" && "$result" != *"NO"* ]]; then
+ log_fail "Expected $cap_to_check to be absent ($drop_arg), got: $result"
+ fi
+}
+
+# Test 1: Full caps — both present
+log_note "Test 1: full caps in namespace"
+check_cap_in_ns "none" "cap_sys_admin" "yes"
+check_cap_in_ns "none" "cap_fowner" "yes"
+log_note "Test 1 passed"
+
+# Test 2: Drop SYS_ADMIN — SYS_ADMIN absent, FOWNER present
+log_note "Test 2: drop cap_sys_admin"
+check_cap_in_ns "--drop=cap_sys_admin" "cap_sys_admin" "no"
+check_cap_in_ns "--drop=cap_sys_admin" "cap_fowner" "yes"
+log_note "Test 2 passed"
+
+# Test 3: Drop all — both absent
+log_note "Test 3: drop all caps"
+check_cap_in_ns "--drop=all" "cap_sys_admin" "no"
+check_cap_in_ns "--drop=all" "cap_fowner" "no"
+log_note "Test 3 passed"
+
+# Test 4: Verify via /proc/self/status CapEff bitmask
+log_note "Test 4: verify CapEff bitmask"
+typeset full_eff drop_eff
+full_eff=$(unshare --user --map-root-user \
+ grep CapEff /proc/self/status 2>&1 | awk '{print $2}')
+drop_eff=$(unshare --user --map-root-user \
+ "$capsh_cmd" --drop=cap_sys_admin -- \
+ -c 'grep CapEff /proc/self/status' 2>&1 | awk '{print $2}')
+
+if [[ "$full_eff" == "$drop_eff" ]]; then
+ log_fail "CapEff should differ after dropping cap_sys_admin"
+fi
+log_note "CapEff full=$full_eff drop_sys_admin=$drop_eff"
+
+# CAP_SYS_ADMIN is bit 21 = 0x200000
+# The difference should be exactly this bit
+typeset diff
+diff=$(printf "0x%x" $(( 16#${full_eff} - 16#${drop_eff} )))
+if [[ "$diff" != "0x200000" ]]; then
+ log_note "Expected diff 0x200000 (CAP_SYS_ADMIN), got $diff"
+ log_note "This may indicate kernel cap numbering differs; non-fatal"
+fi
+log_note "Test 4 passed"
+
+# Test 5: Works under sudo -u (as test UID)
+log_note "Test 5: capability drops work under sudo -u"
+typeset result
+result=$(sudo -u \#"$ZONED_TEST_UID" unshare --user --map-root-user \
+ "$capsh_cmd" --drop=cap_sys_admin -- \
+ -c "$capsh_cmd --has-p=cap_sys_admin 2>&1 && echo YES || echo NO" 2>&1)
+if [[ "$result" != *"NO"* ]]; then
+ log_fail "cap_sys_admin should be absent under sudo -u, got: $result"
+fi
+
+result=$(sudo -u \#"$ZONED_TEST_UID" unshare --user --map-root-user \
+ "$capsh_cmd" --drop=cap_sys_admin -- \
+ -c "$capsh_cmd --has-p=cap_fowner 2>&1 && echo YES || echo NO" 2>&1)
+if [[ "$result" != *"YES"* ]]; then
+ log_fail "cap_fowner should be present under sudo -u, got: $result"
+fi
+log_note "Test 5 passed"
+
+# Test 6: Verify run_in_userns_caps helper modes work
+log_note "Test 6: run_in_userns_caps helper verification"
+
+# "all" mode — should have SYS_ADMIN
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "all" \
+ version 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "run_in_userns_caps 'all' should work"
+fi
+log_note "'all' mode works"
+
+# "drop_sys_admin" mode — zfs version should still work (read-only)
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "drop_sys_admin" \
+ version 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "run_in_userns_caps 'drop_sys_admin' should work for read-only"
+fi
+log_note "'drop_sys_admin' mode works"
+
+# "none" mode — zfs version should still work (read-only)
+result=$(run_in_userns_caps "$ZONED_TEST_UID" "none" \
+ version 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "run_in_userns_caps 'none' should work for read-only"
+fi
+log_note "'none' mode works"
+
+log_pass "Capability control via capsh works in user namespaces"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_031_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_031_pos.ksh
new file mode 100755
index 000000000000..fe0fb2ab0240
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_031_pos.ksh
@@ -0,0 +1,110 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid_common.kshlib
+
+#
+# DESCRIPTION:
+# Verify that namespace-initiated rename+destroy properly cleans up
+# kernel-side zone tracking entries. When a namespace user renames
+# a dataset, a tracking entry is created for the new name. When
+# the renamed dataset is subsequently destroyed, that tracking entry
+# must be removed. If it persists (stale), the delegation root
+# remains visible as a parent dataset even after the admin removes
+# the zoned_uid delegation — an information leak.
+#
+# STRATEGY:
+# 1. Create delegation root with zoned_uid, grant permissions
+# 2. From namespace: rename child → child2 (creates tracking entry)
+# 3. From namespace: destroy child2 (should clean up tracking entry)
+# 4. Admin removes zoned_uid delegation (zfs set zoned_uid=0)
+# 5. Verify delegation root is NOT visible from the old namespace
+# (if stale tracking persists, it would still be visible)
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ zfs destroy -rf "$TESTPOOL/$TESTFS/deleg_root" 2>/dev/null
+}
+
+log_assert "Zone tracking cleanup after namespace rename+destroy"
+log_onexit cleanup
+
+# Setup: delegation root with child, mountpoint=none to avoid mount-lock issues
+log_must zfs create -o mountpoint=none "$TESTPOOL/$TESTFS/deleg_root"
+log_must set_zoned_uid "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID"
+log_must zfs create "$TESTPOOL/$TESTFS/deleg_root/child"
+
+# Grant all needed permissions
+log_must grant_deleg "$TESTPOOL/$TESTFS/deleg_root" "$ZONED_TEST_UID" \
+ "create,destroy,rename,mount"
+
+# Step 1: From namespace, rename child → child2
+# This internally calls zone_dataset_attach_uid for the new name
+log_note "Test: rename child from namespace"
+typeset result
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ rename "$TESTPOOL/$TESTFS/deleg_root/child" \
+ "$TESTPOOL/$TESTFS/deleg_root/child2" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "rename should succeed from namespace"
+fi
+log_must zfs list "$TESTPOOL/$TESTFS/deleg_root/child2"
+log_note "Rename succeeded"
+
+# Step 2: From namespace, destroy child2
+# This should clean up the tracking entry created by rename
+log_note "Test: destroy renamed child from namespace"
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ destroy "$TESTPOOL/$TESTFS/deleg_root/child2" 2>&1)
+if [[ $? -ne 0 ]]; then
+ log_note "Output: $result"
+ log_fail "destroy should succeed from namespace"
+fi
+log_note "Destroy succeeded"
+
+# Step 3: Admin removes the zoned_uid delegation
+log_note "Test: admin removes zoned_uid delegation"
+log_must zfs set zoned_uid=0 "$TESTPOOL/$TESTFS/deleg_root"
+
+# Step 4: Verify the delegation root is NOT visible from the old namespace.
+# If the tracking entry from the rename was not cleaned up (stale),
+# the delegation root would still be visible as a parent of the stale
+# entry, leaking its existence after delegation was revoked.
+log_note "Test: verify no stale visibility after delegation removal"
+result=$(run_in_userns "$ZONED_TEST_UID" \
+ list "$TESTPOOL/$TESTFS/deleg_root" 2>&1)
+if [[ $? -eq 0 ]]; then
+ log_fail "Delegation root should NOT be visible after " \
+ "zoned_uid=0 (stale tracking entry detected)"
+fi
+log_note "No stale visibility: delegation root correctly hidden"
+
+log_pass "Zone tracking cleanup after namespace rename+destroy"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_common.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_common.kshlib
new file mode 100644
index 000000000000..a44a3f0cc7a1
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zoned_uid/zoned_uid_common.kshlib
@@ -0,0 +1,237 @@
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026 Colin K. Williams / LINK ORG LLC / LI-NK.SOCIAL. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/zoned_uid/zoned_uid.cfg
+
+#
+# Check if the kernel supports zoned_uid property
+#
+function zoned_uid_supported
+{
+ zfs get zoned_uid "$TESTPOOL" >/dev/null 2>&1
+ return $?
+}
+
+#
+# Get the zoned_uid property value for a dataset
+# Use -p for parseable (raw numeric) output
+#
+function get_zoned_uid
+{
+ typeset dataset=$1
+ get_prop zoned_uid "$dataset"
+}
+
+#
+# Set the zoned_uid property on a dataset
+#
+function set_zoned_uid
+{
+ typeset dataset=$1
+ typeset uid=$2
+ zfs set zoned_uid="$uid" "$dataset"
+}
+
+#
+# Clear the zoned_uid property (set to 0/none)
+#
+function clear_zoned_uid
+{
+ typeset dataset=$1
+ zfs set zoned_uid=0 "$dataset"
+}
+
+#
+# Run a ZFS command inside a user namespace owned by the given UID.
+# Uses absolute path to zfs so the binary is found regardless of the
+# target user's PATH (e.g. when running from a source build).
+#
+# The namespace gets CAP_SYS_ADMIN via --map-root-user (default behavior,
+# equivalent to a container launched with --cap-add SYS_ADMIN).
+#
+# Usage: run_in_userns <uid> <zfs_args...>
+# Output is captured to stdout/stderr; return code is preserved.
+#
+function run_in_userns
+{
+ typeset uid=$1
+ shift
+ typeset zfs_cmd
+ zfs_cmd="$(which zfs)"
+
+ sudo -u \#"${uid}" unshare --user --mount --map-root-user \
+ "$zfs_cmd" "$@"
+}
+
+#
+# Run a ZFS command inside a user namespace with specific capabilities.
+# Uses capsh --drop to remove capabilities from the bounding set after
+# creating the namespace via unshare --map-root-user. The exec'd shell
+# (via capsh -- -c) inherits the restricted bounding set, so the ZFS
+# binary sees only the kept capabilities in effective/permitted.
+#
+# Usage: run_in_userns_caps <uid> <cap_spec> <zfs_args...>
+# cap_spec:
+# "all" — keep all caps (same as run_in_userns)
+# "none" — drop all capabilities
+# "drop_sys_admin" — drop only CAP_SYS_ADMIN (keep FOWNER etc.)
+# "cap_fowner" — keep only CAP_FOWNER (drop everything else)
+#
+function run_in_userns_caps
+{
+ typeset uid=$1
+ typeset cap_spec=$2
+ shift 2
+ typeset zfs_cmd capsh_cmd
+ zfs_cmd="$(which zfs)"
+ capsh_cmd="$(which capsh)"
+
+ if [[ "$cap_spec" == "all" ]]; then
+ sudo -u \#"${uid}" unshare --user --mount --map-root-user \
+ "$zfs_cmd" "$@"
+ return $?
+ fi
+
+ if [[ "$cap_spec" == "none" ]]; then
+ # Drop every capability from the bounding set
+ sudo -u \#"${uid}" unshare --user --mount --map-root-user \
+ "$capsh_cmd" --drop=all -- -c "$zfs_cmd $*"
+ return $?
+ fi
+
+ if [[ "$cap_spec" == "drop_sys_admin" ]]; then
+ # Drop only CAP_SYS_ADMIN; all other caps (including
+ # CAP_FOWNER) remain. This simulates a default Podman
+ # container (default caps, no --cap-add SYS_ADMIN).
+ sudo -u \#"${uid}" unshare --user --mount --map-root-user \
+ "$capsh_cmd" --drop=cap_sys_admin -- -c "$zfs_cmd $*"
+ return $?
+ fi
+
+ # Generic: drop all caps except the ones listed.
+ # Build the drop list by enumerating all caps and excluding those
+ # the caller wants to keep.
+ typeset all_caps drop_list=""
+ all_caps=$("$capsh_cmd" --print 2>/dev/null | \
+ grep "^Bounding set" | sed 's/.*=//;s/,/ /g')
+ if [[ -z "$all_caps" ]]; then
+ log_fail "capsh --print failed to enumerate capabilities"
+ fi
+ for cap in $all_caps; do
+ typeset keep=false
+ typeset IFS=","
+ for want in $cap_spec; do
+ if [[ "$cap" == "$want" ]]; then
+ keep=true
+ break
+ fi
+ done
+ unset IFS
+ if [[ "$keep" == "false" ]]; then
+ drop_list="$drop_list --drop=$cap"
+ fi
+ done
+
+ # shellcheck disable=SC2086
+ sudo -u \#"${uid}" unshare --user --mount --map-root-user \
+ "$capsh_cmd" $drop_list -- -c "$zfs_cmd $*"
+}
+
+#
+# Verify that capability control via capsh works in user namespaces.
+# Returns 0 if the mechanism is functional, non-zero otherwise.
+# This should be called in setup.ksh to skip tests if capsh is broken.
+#
+function verify_capsh_works
+{
+ typeset capsh_cmd
+ capsh_cmd="$(which capsh)"
+ if [[ -z "$capsh_cmd" ]]; then
+ return 1
+ fi
+
+ # Test 1: after --drop=cap_sys_admin, cap should be absent
+ typeset result
+ result=$(unshare --user --map-root-user \
+ "$capsh_cmd" --drop=cap_sys_admin -- \
+ -c "$capsh_cmd --has-p=cap_sys_admin 2>&1 && echo YES || echo NO")
+ if [[ "$result" != *"NO"* ]]; then
+ return 1
+ fi
+
+ # Test 2: cap_fowner should still be present
+ result=$(unshare --user --map-root-user \
+ "$capsh_cmd" --drop=cap_sys_admin -- \
+ -c "$capsh_cmd --has-p=cap_fowner 2>&1 && echo YES || echo NO")
+ if [[ "$result" != *"YES"* ]]; then
+ return 1
+ fi
+
+ # Test 3: --drop=all should remove everything
+ result=$(unshare --user --map-root-user \
+ "$capsh_cmd" --drop=all -- \
+ -c "$capsh_cmd --has-p=cap_fowner 2>&1 && echo YES || echo NO")
+ if [[ "$result" != *"NO"* ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# Grant delegated permissions to a user on a dataset.
+# Wrapper around zfs allow.
+#
+# Usage: grant_deleg <dataset> <uid> <perms>
+# perms: comma-separated list, e.g. "create,snapshot,mount"
+#
+function grant_deleg
+{
+ typeset dataset=$1
+ typeset uid=$2
+ typeset perms=$3
+ zfs allow -u "$uid" "$perms" "$dataset"
+}
+
+#
+# Revoke delegated permissions from a user on a dataset.
+# Wrapper around zfs unallow.
+#
+# Usage: revoke_deleg <dataset> <uid> [perms]
+# perms: optional comma-separated list; if omitted, revokes all
+#
+function revoke_deleg
+{
+ typeset dataset=$1
+ typeset uid=$2
+ typeset perms=${3:-}
+ if [[ -n "$perms" ]]; then
+ zfs unallow -u "$uid" "$perms" "$dataset"
+ else
+ zfs unallow -u "$uid" "$dataset"
+ fi
+}
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index d61e2643fa5c..1cf55ead9587 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -911,7 +911,7 @@
/* #undef ZFS_DEVICE_MINOR */
/* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.4.99-468-FreeBSD_g3ee08abd2"
+#define ZFS_META_ALIAS "zfs-2.4.99-522-FreeBSD_g1644e2ffd"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -941,7 +941,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
-#define ZFS_META_RELEASE "468-FreeBSD_g3ee08abd2"
+#define ZFS_META_RELEASE "522-FreeBSD_g1644e2ffd"
/* Define the project version. */
#define ZFS_META_VERSION "2.4.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 156d9a25fdcb..9c5cec1628b7 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define ZFS_META_GITREV "zfs-2.4.99-468-g3ee08abd2"
+#define ZFS_META_GITREV "zfs-2.4.99-522-g1644e2ffd"