aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Norris <rob.norris@klarasystems.com>2024-04-04 11:34:54 +0000
committerBrian Behlendorf <behlendorf1@llnl.gov>2024-04-12 00:17:11 +0000
commitc9c838aa1fca9aef84d74db1d99872c5efa9a25d (patch)
tree3d838e71611b593e11f61727aeb298a6a2c2f009
parentcac416f1062fdbd2ff84ff2b40835d4853cbf190 (diff)
downloadsrc-c9c838aa1fca9aef84d74db1d99872c5efa9a25d.tar.gz
src-c9c838aa1fca9aef84d74db1d99872c5efa9a25d.zip
zio: remove io_cmd and DKIOCFLUSHWRITECACHE
There's no other options, so we can just always assume its a flush. Includes some light refactoring where a switch statement was doing control flow that no longer works. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Closes #16064
-rw-r--r--include/os/linux/zfs/sys/trace_common.h6
-rw-r--r--include/sys/zio.h1
-rw-r--r--module/os/freebsd/zfs/vdev_file.c9
-rw-r--r--module/os/freebsd/zfs/vdev_geom.c43
-rw-r--r--module/os/linux/zfs/vdev_disk.c39
-rw-r--r--module/os/linux/zfs/vdev_file.c44
-rw-r--r--module/zfs/vdev_draid.c14
-rw-r--r--module/zfs/zfs_fm.c5
-rw-r--r--module/zfs/zil.c93
-rw-r--r--module/zfs/zio.c9
10 files changed, 106 insertions, 157 deletions
diff --git a/include/os/linux/zfs/sys/trace_common.h b/include/os/linux/zfs/sys/trace_common.h
index 3d4b1920d598..6ffa57c86418 100644
--- a/include/os/linux/zfs/sys/trace_common.h
+++ b/include/os/linux/zfs/sys/trace_common.h
@@ -31,7 +31,6 @@
/* ZIO macros */
#define ZIO_TP_STRUCT_ENTRY \
__field(zio_type_t, zio_type) \
- __field(int, zio_cmd) \
__field(zio_priority_t, zio_priority) \
__field(uint64_t, zio_size) \
__field(uint64_t, zio_orig_size) \
@@ -61,7 +60,6 @@
#define ZIO_TP_FAST_ASSIGN \
__entry->zio_type = zio->io_type; \
- __entry->zio_cmd = zio->io_cmd; \
__entry->zio_priority = zio->io_priority; \
__entry->zio_size = zio->io_size; \
__entry->zio_orig_size = zio->io_orig_size; \
@@ -90,7 +88,7 @@
__entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;
#define ZIO_TP_PRINTK_FMT \
- "zio { type %u cmd %i prio %u size %llu orig_size %llu " \
+ "zio { type %u prio %u size %llu orig_size %llu " \
"offset %llu timestamp %llu delta %llu delay %llu " \
"flags 0x%llx stage 0x%x pipeline 0x%x orig_flags 0x%llx " \
"orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
@@ -98,7 +96,7 @@
"type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"
#define ZIO_TP_PRINTK_ARGS \
- __entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \
+ __entry->zio_type, __entry->zio_priority, \
__entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
__entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
__entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 5dcd7fe073a0..545b9cf0c3c5 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -451,7 +451,6 @@ struct zio {
zio_type_t io_type;
enum zio_child io_child_type;
enum trim_flag io_trim_flags;
- int io_cmd;
zio_priority_t io_priority;
uint8_t io_reexecute;
uint8_t io_state[ZIO_WAIT_TYPES];
diff --git a/module/os/freebsd/zfs/vdev_file.c b/module/os/freebsd/zfs/vdev_file.c
index a65dfec86caf..888c8e7f8863 100644
--- a/module/os/freebsd/zfs/vdev_file.c
+++ b/module/os/freebsd/zfs/vdev_file.c
@@ -255,14 +255,7 @@ vdev_file_io_start(zio_t *zio)
return;
}
- switch (zio->io_cmd) {
- case DKIOCFLUSHWRITECACHE:
- zio->io_error = zfs_file_fsync(vf->vf_file,
- O_SYNC|O_DSYNC);
- break;
- default:
- zio->io_error = SET_ERROR(ENOTSUP);
- }
+ zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC|O_DSYNC);
zio_execute(zio);
return;
diff --git a/module/os/freebsd/zfs/vdev_geom.c b/module/os/freebsd/zfs/vdev_geom.c
index 196d67b4b595..264dfa5c9237 100644
--- a/module/os/freebsd/zfs/vdev_geom.c
+++ b/module/os/freebsd/zfs/vdev_geom.c
@@ -1153,42 +1153,31 @@ vdev_geom_io_start(zio_t *zio)
vd = zio->io_vd;
- switch (zio->io_type) {
- case ZIO_TYPE_IOCTL:
+ if (zio->io_type == ZIO_TYPE_IOCTL) {
/* XXPOLICY */
if (!vdev_readable(vd)) {
zio->io_error = SET_ERROR(ENXIO);
zio_interrupt(zio);
return;
- } else {
- switch (zio->io_cmd) {
- case DKIOCFLUSHWRITECACHE:
- if (zfs_nocacheflush ||
- vdev_geom_bio_flush_disable)
- break;
- if (vd->vdev_nowritecache) {
- zio->io_error = SET_ERROR(ENOTSUP);
- break;
- }
- goto sendreq;
- default:
- zio->io_error = SET_ERROR(ENOTSUP);
- }
}
- zio_execute(zio);
- return;
- case ZIO_TYPE_TRIM:
- if (!vdev_geom_bio_delete_disable) {
- goto sendreq;
+ if (zfs_nocacheflush || vdev_geom_bio_flush_disable) {
+ zio_execute(zio);
+ return;
+ }
+
+ if (vd->vdev_nowritecache) {
+ zio->io_error = SET_ERROR(ENOTSUP);
+ zio_execute(zio);
+ return;
+ }
+ } else if (zio->io_type == ZIO_TYPE_TRIM) {
+ if (vdev_geom_bio_delete_disable) {
+ zio_execute(zio);
+ return;
}
- zio_execute(zio);
- return;
- default:
- ;
- /* PASSTHROUGH --- placate compiler */
}
-sendreq:
+
ASSERT(zio->io_type == ZIO_TYPE_READ ||
zio->io_type == ZIO_TYPE_WRITE ||
zio->io_type == ZIO_TYPE_TRIM ||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index f3f0c0875210..554ed22b9df8 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -1403,38 +1403,29 @@ vdev_disk_io_start(zio_t *zio)
case ZIO_TYPE_IOCTL:
if (!vdev_readable(v)) {
- rw_exit(&vd->vd_lock);
- zio->io_error = SET_ERROR(ENXIO);
- zio_interrupt(zio);
- return;
- }
-
- switch (zio->io_cmd) {
- case DKIOCFLUSHWRITECACHE:
-
- if (zfs_nocacheflush)
- break;
-
- if (v->vdev_nowritecache) {
- zio->io_error = SET_ERROR(ENOTSUP);
- break;
- }
-
+ /* Drive not there, can't flush */
+ error = SET_ERROR(ENXIO);
+ } else if (zfs_nocacheflush) {
+ /* Flushing disabled by operator, declare success */
+ error = 0;
+ } else if (v->vdev_nowritecache) {
+ /* This vdev not capable of flushing */
+ error = SET_ERROR(ENOTSUP);
+ } else {
+ /*
+ * Issue the flush. If successful, the response will
+ * be handled in the completion callback, so we're done.
+ */
error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio);
if (error == 0) {
rw_exit(&vd->vd_lock);
return;
}
-
- zio->io_error = error;
-
- break;
-
- default:
- zio->io_error = SET_ERROR(ENOTSUP);
}
+ /* Couldn't issue the flush, so set the error and return it */
rw_exit(&vd->vd_lock);
+ zio->io_error = error;
zio_execute(zio);
return;
diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c
index 5abc0426d1a7..2b483c9a9fa4 100644
--- a/module/os/linux/zfs/vdev_file.c
+++ b/module/os/linux/zfs/vdev_file.c
@@ -250,33 +250,27 @@ vdev_file_io_start(zio_t *zio)
return;
}
- switch (zio->io_cmd) {
- case DKIOCFLUSHWRITECACHE:
-
- if (zfs_nocacheflush)
- break;
-
- /*
- * We cannot safely call vfs_fsync() when PF_FSTRANS
- * is set in the current context. Filesystems like
- * XFS include sanity checks to verify it is not
- * already set, see xfs_vm_writepage(). Therefore
- * the sync must be dispatched to a different context.
- */
- if (__spl_pf_fstrans_check()) {
- VERIFY3U(taskq_dispatch(vdev_file_taskq,
- vdev_file_io_fsync, zio, TQ_SLEEP), !=,
- TASKQID_INVALID);
- return;
- }
-
- zio->io_error = zfs_file_fsync(vf->vf_file,
- O_SYNC | O_DSYNC);
- break;
- default:
- zio->io_error = SET_ERROR(ENOTSUP);
+ if (zfs_nocacheflush) {
+ zio_execute(zio);
+ return;
}
+ /*
+ * We cannot safely call vfs_fsync() when PF_FSTRANS
+ * is set in the current context. Filesystems like
+ * XFS include sanity checks to verify it is not
+ * already set, see xfs_vm_writepage(). Therefore
+ * the sync must be dispatched to a different context.
+ */
+ if (__spl_pf_fstrans_check()) {
+ VERIFY3U(taskq_dispatch(vdev_file_taskq,
+ vdev_file_io_fsync, zio, TQ_SLEEP), !=,
+ TASKQID_INVALID);
+ return;
+ }
+
+ zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
+
zio_execute(zio);
return;
} else if (zio->io_type == ZIO_TYPE_TRIM) {
diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c
index ec961255fd64..7769ed6a377a 100644
--- a/module/zfs/vdev_draid.c
+++ b/module/zfs/vdev_draid.c
@@ -2557,15 +2557,11 @@ vdev_draid_spare_ioctl(zio_t *zio)
vdev_t *vd = zio->io_vd;
int error = 0;
- if (zio->io_cmd == DKIOCFLUSHWRITECACHE) {
- for (int c = 0; c < vd->vdev_children; c++) {
- zio_nowait(zio_vdev_child_io(zio, NULL,
- vd->vdev_child[c], zio->io_offset, zio->io_abd,
- zio->io_size, zio->io_type, zio->io_priority, 0,
- vdev_draid_spare_child_done, zio));
- }
- } else {
- error = SET_ERROR(ENOTSUP);
+ for (int c = 0; c < vd->vdev_children; c++) {
+ zio_nowait(zio_vdev_child_io(zio, NULL,
+ vd->vdev_child[c], zio->io_offset, zio->io_abd,
+ zio->io_size, zio->io_type, zio->io_priority, 0,
+ vdev_draid_spare_child_done, zio));
}
return (error);
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index 481af2ba826b..2f43c4aa41b8 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -1096,10 +1096,7 @@ zfs_ereport_is_valid(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio)
return (B_FALSE);
if (zio != NULL) {
- /*
- * If this is not a read or write zio, ignore the error. This
- * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
- */
+ /* If this is not a read or write zio, ignore the error */
if (zio->io_type != ZIO_TYPE_READ &&
zio->io_type != ZIO_TYPE_WRITE)
return (B_FALSE);
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 1af357c58006..34be54b337fd 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -125,10 +125,9 @@ static kstat_t *zil_kstats_global;
int zil_replay_disable = 0;
/*
- * Disable the DKIOCFLUSHWRITECACHE commands that are normally sent to
- * the disk(s) by the ZIL after an LWB write has completed. Setting this
- * will cause ZIL corruption on power loss if a volatile out-of-order
- * write cache is enabled.
+ * Disable the flush commands that are normally sent to the disk(s) by the ZIL
+ * after an LWB write has completed. Setting this will cause ZIL corruption on
+ * power loss if a volatile out-of-order write cache is enabled.
*/
static int zil_nocacheflush = 0;
@@ -1406,19 +1405,17 @@ zil_lwb_add_txg(lwb_t *lwb, uint64_t txg)
}
/*
- * This function is a called after all vdevs associated with a given lwb
- * write have completed their DKIOCFLUSHWRITECACHE command; or as soon
- * as the lwb write completes, if "zil_nocacheflush" is set. Further,
- * all "previous" lwb's will have completed before this function is
- * called; i.e. this function is called for all previous lwbs before
- * it's called for "this" lwb (enforced via zio the dependencies
- * configured in zil_lwb_set_zio_dependency()).
+ * This function is a called after all vdevs associated with a given lwb write
+ * have completed their flush command; or as soon as the lwb write completes,
+ * if "zil_nocacheflush" is set. Further, all "previous" lwb's will have
+ * completed before this function is called; i.e. this function is called for
+ * all previous lwbs before it's called for "this" lwb (enforced via zio the
+ * dependencies configured in zil_lwb_set_zio_dependency()).
*
- * The intention is for this function to be called as soon as the
- * contents of an lwb are considered "stable" on disk, and will survive
- * any sudden loss of power. At this point, any threads waiting for the
- * lwb to reach this state are signalled, and the "waiter" structures
- * are marked "done".
+ * The intention is for this function to be called as soon as the contents of
+ * an lwb are considered "stable" on disk, and will survive any sudden loss of
+ * power. At this point, any threads waiting for the lwb to reach this state
+ * are signalled, and the "waiter" structures are marked "done".
*/
static void
zil_lwb_flush_vdevs_done(zio_t *zio)
@@ -1532,17 +1529,16 @@ zil_lwb_flush_wait_all(zilog_t *zilog, uint64_t txg)
}
/*
- * This is called when an lwb's write zio completes. The callback's
- * purpose is to issue the DKIOCFLUSHWRITECACHE commands for the vdevs
- * in the lwb's lwb_vdev_tree. The tree will contain the vdevs involved
- * in writing out this specific lwb's data, and in the case that cache
- * flushes have been deferred, vdevs involved in writing the data for
- * previous lwbs. The writes corresponding to all the vdevs in the
- * lwb_vdev_tree will have completed by the time this is called, due to
- * the zio dependencies configured in zil_lwb_set_zio_dependency(),
- * which takes deferred flushes into account. The lwb will be "done"
- * once zil_lwb_flush_vdevs_done() is called, which occurs in the zio
- * completion callback for the lwb's root zio.
+ * This is called when an lwb's write zio completes. The callback's purpose is
+ * to issue the flush commands for the vdevs in the lwb's lwb_vdev_tree. The
+ * tree will contain the vdevs involved in writing out this specific lwb's
+ * data, and in the case that cache flushes have been deferred, vdevs involved
+ * in writing the data for previous lwbs. The writes corresponding to all the
+ * vdevs in the lwb_vdev_tree will have completed by the time this is called,
+ * due to the zio dependencies configured in zil_lwb_set_zio_dependency(),
+ * which takes deferred flushes into account. The lwb will be "done" once
+ * zil_lwb_flush_vdevs_done() is called, which occurs in the zio completion
+ * callback for the lwb's root zio.
*/
static void
zil_lwb_write_done(zio_t *zio)
@@ -1601,19 +1597,18 @@ zil_lwb_write_done(zio_t *zio)
}
/*
- * If this lwb does not have any threads waiting for it to
- * complete, we want to defer issuing the DKIOCFLUSHWRITECACHE
- * command to the vdevs written to by "this" lwb, and instead
- * rely on the "next" lwb to handle the DKIOCFLUSHWRITECACHE
- * command for those vdevs. Thus, we merge the vdev tree of
- * "this" lwb with the vdev tree of the "next" lwb in the list,
- * and assume the "next" lwb will handle flushing the vdevs (or
- * deferring the flush(s) again).
+ * If this lwb does not have any threads waiting for it to complete, we
+ * want to defer issuing the flush command to the vdevs written to by
+ * "this" lwb, and instead rely on the "next" lwb to handle the flush
+ * command for those vdevs. Thus, we merge the vdev tree of "this" lwb
+ * with the vdev tree of the "next" lwb in the list, and assume the
+ * "next" lwb will handle flushing the vdevs (or deferring the flush(s)
+ * again).
*
- * This is a useful performance optimization, especially for
- * workloads with lots of async write activity and few sync
- * write and/or fsync activity, as it has the potential to
- * coalesce multiple flush commands to a vdev into one.
+ * This is a useful performance optimization, especially for workloads
+ * with lots of async write activity and few sync write and/or fsync
+ * activity, as it has the potential to coalesce multiple flush
+ * commands to a vdev into one.
*/
if (list_is_empty(&lwb->lwb_waiters) && nlwb != NULL) {
zil_lwb_flush_defer(lwb, nlwb);
@@ -1663,16 +1658,16 @@ zil_lwb_set_zio_dependency(zilog_t *zilog, lwb_t *lwb)
* If the previous lwb's write hasn't already completed, we also want
* to order the completion of the lwb write zios (above, we only order
* the completion of the lwb root zios). This is required because of
- * how we can defer the DKIOCFLUSHWRITECACHE commands for each lwb.
+ * how we can defer the flush commands for each lwb.
*
- * When the DKIOCFLUSHWRITECACHE commands are deferred, the previous
- * lwb will rely on this lwb to flush the vdevs written to by that
- * previous lwb. Thus, we need to ensure this lwb doesn't issue the
- * flush until after the previous lwb's write completes. We ensure
- * this ordering by setting the zio parent/child relationship here.
+ * When the flush commands are deferred, the previous lwb will rely on
+ * this lwb to flush the vdevs written to by that previous lwb. Thus,
+ * we need to ensure this lwb doesn't issue the flush until after the
+ * previous lwb's write completes. We ensure this ordering by setting
+ * the zio parent/child relationship here.
*
- * Without this relationship on the lwb's write zio, it's possible
- * for this lwb's write to complete prior to the previous lwb's write
+ * Without this relationship on the lwb's write zio, it's possible for
+ * this lwb's write to complete prior to the previous lwb's write
* completing; and thus, the vdevs for the previous lwb would be
* flushed prior to that lwb's data being written to those vdevs (the
* vdevs are flushed in the lwb write zio's completion handler,
@@ -3499,8 +3494,8 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
* callback of the lwb's zio[*].
*
* * Actually, the waiters are signaled in the zio completion
- * callback of the root zio for the DKIOCFLUSHWRITECACHE commands
- * that are sent to the vdevs upon completion of the lwb zio.
+ * callback of the root zio for the flush commands that are sent to
+ * the vdevs upon completion of the lwb zio.
*
* 2. When the itxs are inserted into the ZIL's queue of uncommitted
* itxs, the order in which they are inserted is preserved[*]; as
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 4aa08f3b30f5..031fc3d5135d 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1631,11 +1631,9 @@ zio_flush(zio_t *pio, vdev_t *vd)
return;
if (vd->vdev_children == 0) {
- zio_t *zio = zio_create(pio, vd->vdev_spa, 0, NULL, NULL, 0, 0,
+ zio_nowait(zio_create(pio, vd->vdev_spa, 0, NULL, NULL, 0, 0,
NULL, NULL, ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, 0,
- NULL, ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
- zio->io_cmd = DKIOCFLUSHWRITECACHE;
- zio_nowait(zio);
+ NULL, ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE));
} else {
for (uint64_t c = 0; c < vd->vdev_children; c++)
zio_flush(pio, vd->vdev_child[c]);
@@ -4241,8 +4239,7 @@ zio_vdev_io_assess(zio_t *zio)
* boolean flag so that we don't bother with it in the future.
*/
if ((zio->io_error == ENOTSUP || zio->io_error == ENOTTY) &&
- zio->io_type == ZIO_TYPE_IOCTL &&
- zio->io_cmd == DKIOCFLUSHWRITECACHE && vd != NULL)
+ zio->io_type == ZIO_TYPE_IOCTL && vd != NULL)
vd->vdev_nowritecache = B_TRUE;
if (zio->io_error)