aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob N <robn@despairlabs.com>2024-05-02 22:18:35 +0000
committerGitHub <noreply@github.com>2024-05-02 22:18:35 +0000
commit8f1b7a6fa6762ea4c89198ceb11c521f80b92ddc (patch)
tree1dadb509ae98de1438f372a753402f1dbd87a83f
parent645b83307918085ab2f0e12618809e348635b34f (diff)
downloadsrc-vendor/openzfs/master.tar.gz
src-vendor/openzfs/master.zip
vdev_disk: disable flushes if device does not support itvendor/openzfs/master
If the underlying device doesn't have a write-back cache, the kernel will just return a successful response. This doesn't hurt anything, but it's extra work on the IO taskqs that are unnecessary. So, detect this when we open the device for the first time. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #16148
-rw-r--r--include/os/linux/kernel/linux/blkdev_compat.h27
-rw-r--r--module/os/linux/zfs/vdev_disk.c7
2 files changed, 32 insertions, 2 deletions
diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h
index b0f398354e4f..658f546213de 100644
--- a/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/include/os/linux/kernel/linux/blkdev_compat.h
@@ -94,6 +94,33 @@ blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
#endif
}
+/*
+ * Detect if a device has a write cache. Used to set the intial value for the
+ * vdev nowritecache flag.
+ *
+ * 4.10: QUEUE_FLAG_WC added. Initialised by the driver, but can be changed
+ * later by the operator. If not set, kernel will return flush requests
+ * immediately without doing anything.
+ * 6.6: QUEUE_FLAG_HW_WC added. Initialised by the driver, can't be changed.
+ * Only controls if the operator is allowed to change _WC. Initial version
+ * buggy; aliased to QUEUE_FLAG_FUA, so unuseable.
+ * 6.6.10, 6.7: QUEUE_FLAG_HW_WC fixed.
+ *
+ * Older than 4.10 we just assume write cache, and let the normal flush fail
+ * detection apply.
+ */
+static inline boolean_t
+zfs_bdev_has_write_cache(struct block_device *bdev)
+{
+#if defined(QUEUE_FLAG_HW_WC) && QUEUE_FLAG_HW_WC != QUEUE_FLAG_FUA
+ return (test_bit(QUEUE_FLAG_HW_WC, &bdev_get_queue(bdev)->queue_flags));
+#elif defined(QUEUE_FLAG_WC)
+ return (test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags));
+#else
+ return (B_TRUE);
+#endif
+}
+
static inline void
blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
{
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 2cea61a6294c..463c5f705102 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -429,8 +429,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
/* Determine the logical block size */
int logical_block_size = bdev_logical_block_size(bdev);
- /* Clear the nowritecache bit, causes vdev_reopen() to try again. */
- v->vdev_nowritecache = B_FALSE;
+ /*
+ * If the device has a write cache, clear the nowritecache flag,
+ * so that we start issuing flush requests again.
+ */
+ v->vdev_nowritecache = !zfs_bdev_has_write_cache(bdev);
/* Set when device reports it supports TRIM. */
v->vdev_has_trim = bdev_discard_supported(bdev);