aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob N <rob.norris@klarasystems.com>2024-04-08 18:50:24 +0000
committerGitHub <noreply@github.com>2024-04-08 18:50:24 +0000
commitba9f587a77e6893390c752491dfacb6ee5d52023 (patch)
tree7ff6773affc252c7f126636b53b1fefec5258531
parent03987f71e39c47d2f16c3006d690c50a9fec2ca5 (diff)
downloadsrc-ba9f587a77e6893390c752491dfacb6ee5d52023.tar.gz
src-ba9f587a77e6893390c752491dfacb6ee5d52023.zip
vdev_disk: ensure trim errors are returned immediately
After 06e25f9c4, the discard issuing code was organised such that if requesting an async discard or secure erase failed before the IO was issued (that is, calling __blkdev_issue_discard() returned an error), the failed zio would never be executed, resulting in txg_sync hanging forever waiting for IO to finish. This commit fixes that by immediately executing a failed zio on error. To handle the successful synchronous op case, we fake an async op by, when not using an asynchronous submission method, queuing the successful result zio as part of the discard handler. Since it was hard to understand the differences between discard and secure erase, and sync and async, across different kernel versions, I've commented and reorganised the code a bit to try and make everything more contained and linear. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Ameer Hamza <ahamza@ixsystems.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #16070
-rw-r--r--config/kernel-blkdev.m4118
-rw-r--r--module/os/linux/zfs/vdev_disk.c131
2 files changed, 171 insertions, 78 deletions
diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4
index 7b0e830e600f..b6ce1e1cf083 100644
--- a/config/kernel-blkdev.m4
+++ b/config/kernel-blkdev.m4
@@ -561,12 +561,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [
])
dnl #
-dnl # 5.19 API: blkdev_issue_secure_erase()
-dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
-dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
- ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
+dnl # TRIM support: discard and secure erase. We make use of asynchronous
+dnl # functions when available.
+dnl #
+dnl # 3.10:
+dnl # sync discard: blkdev_issue_discard(..., 0)
+dnl # sync erase: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
+dnl # async discard: [not available]
+dnl # async erase: [not available]
+dnl #
+dnl # 4.7:
+dnl # sync discard: blkdev_issue_discard(..., 0)
+dnl # sync erase: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
+dnl # async discard: __blkdev_issue_discard(..., 0)
+dnl # async erase: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
+dnl #
+dnl # 5.19:
+dnl # sync discard: blkdev_issue_discard(...)
+dnl # sync erase: blkdev_issue_secure_erase(...)
+dnl # async discard: __blkdev_issue_discard(...)
+dnl # async erase: [not available]
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_DISCARD], [
+ ZFS_LINUX_TEST_SRC([blkdev_issue_discard_noflags], [
#include <linux/blkdev.h>
],[
struct block_device *bdev = NULL;
@@ -574,10 +591,33 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
sector_t nr_sects = 0;
int error __attribute__ ((unused));
- error = blkdev_issue_secure_erase(bdev,
+ error = blkdev_issue_discard(bdev,
sector, nr_sects, GFP_KERNEL);
])
+ ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
+ #include <linux/blkdev.h>
+ ],[
+ struct block_device *bdev = NULL;
+ sector_t sector = 0;
+ sector_t nr_sects = 0;
+ unsigned long flags = 0;
+ int error __attribute__ ((unused));
+
+ error = blkdev_issue_discard(bdev,
+ sector, nr_sects, GFP_KERNEL, flags);
+ ])
+ ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_noflags], [
+ #include <linux/blkdev.h>
+ ],[
+ struct block_device *bdev = NULL;
+ sector_t sector = 0;
+ sector_t nr_sects = 0;
+ struct bio *biop = NULL;
+ int error __attribute__ ((unused));
+ error = __blkdev_issue_discard(bdev,
+ sector, nr_sects, GFP_KERNEL, &biop);
+ ])
ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [
#include <linux/blkdev.h>
],[
@@ -591,22 +631,52 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
error = __blkdev_issue_discard(bdev,
sector, nr_sects, GFP_KERNEL, flags, &biop);
])
-
- ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
+ ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
#include <linux/blkdev.h>
],[
struct block_device *bdev = NULL;
sector_t sector = 0;
sector_t nr_sects = 0;
- unsigned long flags = 0;
int error __attribute__ ((unused));
- error = blkdev_issue_discard(bdev,
- sector, nr_sects, GFP_KERNEL, flags);
+ error = blkdev_issue_secure_erase(bdev,
+ sector, nr_sects, GFP_KERNEL);
])
])
-AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_DISCARD], [
+ AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
+ ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_noflags], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS, 1,
+ [blkdev_issue_discard() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ AC_MSG_CHECKING([whether blkdev_issue_discard(flags) is available])
+ ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS, 1,
+ [blkdev_issue_discard(flags) is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
+ ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_noflags], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS, 1,
+ [__blkdev_issue_discard() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ AC_MSG_CHECKING([whether __blkdev_issue_discard(flags) is available])
+ ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS, 1,
+ [__blkdev_issue_discard(flags) is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available])
ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [
AC_MSG_RESULT(yes)
@@ -614,24 +684,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
[blkdev_issue_secure_erase() is available])
],[
AC_MSG_RESULT(no)
-
- AC_MSG_CHECKING([whether __blkdev_issue_discard() is available])
- ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1,
- [__blkdev_issue_discard() is available])
- ],[
- AC_MSG_RESULT(no)
-
- AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
- ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
- [blkdev_issue_discard() is available])
- ],[
- ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
- ])
- ])
])
])
@@ -696,7 +748,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
- ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
+ ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_DISCARD
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
@@ -717,7 +769,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
ZFS_AC_KERNEL_BLKDEV_BDEVNAME
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
- ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
+ ZFS_AC_KERNEL_BLKDEV_ISSUE_DISCARD
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index a710bb91004e..a560bca918a8 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -1252,8 +1252,6 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
return (0);
}
-#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
- defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
{
zio_t *zio = bio->bi_private;
@@ -1268,54 +1266,99 @@ BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
zio_interrupt(zio);
}
+/*
+ * Wrappers for the different secure erase and discard APIs. We use async
+ * when available; in this case, *biop is set to the last bio in the chain.
+ */
static int
-vdev_issue_discard_trim(zio_t *zio, unsigned long flags)
+vdev_bdev_issue_secure_erase(zfs_bdev_handle_t *bdh, sector_t sector,
+ sector_t nsect, struct bio **biop)
{
- int ret;
- struct bio *bio = NULL;
+ *biop = NULL;
+ int error;
-#if defined(BLKDEV_DISCARD_SECURE)
- ret = - __blkdev_issue_discard(
- BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
- zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio);
+#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
+ error = blkdev_issue_secure_erase(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
+ error = __blkdev_issue_discard(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE, biop);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
+ error = blkdev_issue_discard(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS, BLKDEV_DISCARD_SECURE);
#else
- (void) flags;
- ret = - __blkdev_issue_discard(
- BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
- zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio);
+#error "unsupported kernel"
#endif
- if (!ret && bio) {
- bio->bi_private = zio;
- bio->bi_end_io = vdev_disk_discard_end_io;
- vdev_submit_bio(bio);
- }
- return (ret);
+
+ return (error);
}
+
+static int
+vdev_bdev_issue_discard(zfs_bdev_handle_t *bdh, sector_t sector,
+ sector_t nsect, struct bio **biop)
+{
+ *biop = NULL;
+ int error;
+
+#if defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS)
+ error = __blkdev_issue_discard(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS, 0, biop);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS)
+ error = __blkdev_issue_discard(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS, biop);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_FLAGS)
+ error = blkdev_issue_discard(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS, 0);
+#elif defined(HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS)
+ error = blkdev_issue_discard(BDH_BDEV(bdh),
+ sector, nsect, GFP_NOFS);
+#else
+#error "unsupported kernel"
#endif
+ return (error);
+}
+
+/*
+ * Entry point for TRIM ops. This calls the right wrapper for secure erase or
+ * discard, and then does the appropriate finishing work for error vs success
+ * and async vs sync.
+ */
static int
vdev_disk_io_trim(zio_t *zio)
{
- unsigned long trim_flags = 0;
- if (zio->io_trim_flags & ZIO_TRIM_SECURE) {
-#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
- return (-blkdev_issue_secure_erase(
- BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
- zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS));
-#elif defined(BLKDEV_DISCARD_SECURE)
- trim_flags |= BLKDEV_DISCARD_SECURE;
-#endif
+ int error;
+ struct bio *bio;
+
+ zfs_bdev_handle_t *bdh = ((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh;
+ sector_t sector = zio->io_offset >> 9;
+ sector_t nsects = zio->io_size >> 9;
+
+ if (zio->io_trim_flags & ZIO_TRIM_SECURE)
+ error = vdev_bdev_issue_secure_erase(bdh, sector, nsects, &bio);
+ else
+ error = vdev_bdev_issue_discard(bdh, sector, nsects, &bio);
+
+ if (error != 0)
+ return (SET_ERROR(-error));
+
+ if (bio == NULL) {
+ /*
+ * This was a synchronous op that completed successfully, so
+ * return it to ZFS immediately.
+ */
+ zio_interrupt(zio);
+ } else {
+ /*
+ * This was an asynchronous op; set up completion callback and
+ * issue it.
+ */
+ bio->bi_private = zio;
+ bio->bi_end_io = vdev_disk_discard_end_io;
+ vdev_submit_bio(bio);
}
-#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \
- defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC)
- return (vdev_issue_discard_trim(zio, trim_flags));
-#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
- return (-blkdev_issue_discard(
- BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh),
- zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags));
-#else
-#error "Unsupported kernel"
-#endif
+
+ return (0);
}
int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
@@ -1390,14 +1433,12 @@ vdev_disk_io_start(zio_t *zio)
return;
case ZIO_TYPE_TRIM:
- zio->io_error = vdev_disk_io_trim(zio);
+ error = vdev_disk_io_trim(zio);
rw_exit(&vd->vd_lock);
-#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE)
- if (zio->io_trim_flags & ZIO_TRIM_SECURE)
- zio_interrupt(zio);
-#elif defined(HAVE_BLKDEV_ISSUE_DISCARD)
- zio_interrupt(zio);
-#endif
+ if (error) {
+ zio->io_error = error;
+ zio_execute(zio);
+ }
return;
case ZIO_TYPE_READ: