summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndriy Gapon <avg@FreeBSD.org>2017-09-20 06:34:48 +0000
committerAndriy Gapon <avg@FreeBSD.org>2017-09-20 06:34:48 +0000
commitc014f2f95b331d1af4672afc8739eb071ae6551c (patch)
treeaf4c8558d0048f404ca068f816a88794db0ebb90
parentf3dbcb8c81432ef26a56091b9ec040a1ab017443 (diff)
Notes
-rw-r--r--uts/common/fs/zfs/vdev_mirror.c53
1 files changed, 47 insertions, 6 deletions
diff --git a/uts/common/fs/zfs/vdev_mirror.c b/uts/common/fs/zfs/vdev_mirror.c
index a57bd6c73b35..d4cf540ee583 100644
--- a/uts/common/fs/zfs/vdev_mirror.c
+++ b/uts/common/fs/zfs/vdev_mirror.c
@@ -29,6 +29,9 @@
#include <sys/zfs_context.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_scan.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/abd.h>
@@ -49,7 +52,7 @@ typedef struct mirror_child {
typedef struct mirror_map {
int mm_children;
- int mm_replacing;
+ int mm_resilvering;
int mm_preferred;
int mm_root;
mirror_child_t mm_child[1];
@@ -86,7 +89,7 @@ vdev_mirror_map_alloc(zio_t *zio)
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
mm->mm_children = c;
- mm->mm_replacing = B_FALSE;
+ mm->mm_resilvering = B_FALSE;
mm->mm_preferred = spa_get_random(c);
mm->mm_root = B_TRUE;
@@ -109,13 +112,51 @@ vdev_mirror_map_alloc(zio_t *zio)
mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
}
} else {
+ int replacing;
+
c = vd->vdev_children;
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
mm->mm_children = c;
- mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
+ /*
+ * If we are resilvering, then we should handle scrub reads
+ * differently; we shouldn't issue them to the resilvering
+ * device because it might not have those blocks.
+ *
+ * We are resilvering iff:
+ * 1) We are a replacing vdev (ie our name is "replacing-1" or
+ * "spare-1" or something like that), and
+ * 2) The pool is currently being resilvered.
+ *
+ * We cannot simply check vd->vdev_resilver_txg, because it's
+ * not set in this path.
+ *
+ * Nor can we just check our vdev_ops; there are cases (such as
+ * when a user types "zpool replace pool odev spare_dev" and
+ * spare_dev is in the spare list, or when a spare device is
+ * automatically used to replace a DEGRADED device) when
+ * resilvering is complete but both the original vdev and the
+ * spare vdev remain in the pool. That behavior is intentional.
+ * It helps implement the policy that a spare should be
+ * automatically removed from the pool after the user replaces
+ * the device that originally failed.
+ */
+ replacing = (vd->vdev_ops == &vdev_replacing_ops ||
vd->vdev_ops == &vdev_spare_ops);
- mm->mm_preferred = mm->mm_replacing ? 0 :
+ /*
+ * If a spa load is in progress, then spa_dsl_pool may be
+ * uninitialized. But we shouldn't be resilvering during a spa
+ * load anyway.
+ */
+ if (replacing &&
+ (spa_load_state(vd->vdev_spa) == SPA_LOAD_NONE) &&
+ dsl_scan_resilvering(vd->vdev_spa->spa_dsl_pool)) {
+ mm->mm_resilvering = B_TRUE;
+ } else {
+ mm->mm_resilvering = B_FALSE;
+ }
+
+ mm->mm_preferred = mm->mm_resilvering ? 0 :
(zio->io_offset >> vdev_mirror_shift) % c;
mm->mm_root = B_FALSE;
@@ -271,7 +312,7 @@ vdev_mirror_io_start(zio_t *zio)
mm = vdev_mirror_map_alloc(zio);
if (zio->io_type == ZIO_TYPE_READ) {
- if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
+ if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering) {
/*
* For scrubbing reads we need to allocate a read
* buffer for each child and issue reads to all
@@ -408,7 +449,7 @@ vdev_mirror_io_done(zio_t *zio)
if (good_copies && spa_writeable(zio->io_spa) &&
(unexpected_errors ||
(zio->io_flags & ZIO_FLAG_RESILVER) ||
- ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
+ ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_resilvering))) {
/*
* Use the good data we have in hand to repair damaged children.
*/