aboutsummaryrefslogtreecommitdiff
path: root/module/zfs/spa.c
diff options
context:
space:
mode:
authorBrian Behlendorf <behlendorf1@llnl.gov>2020-07-03 18:05:50 +0000
committerGitHub <noreply@github.com>2020-07-03 18:05:50 +0000
commit9a49d3f3d3bfa26df4e5e54d574cb490f0ee284b (patch)
tree715c2fa00e55762764cadef8460da09f919910ad /module/zfs/spa.c
parent7ddb753d17f2c12f152647c0e34eb9c42ee5e4af (diff)
downloadsrc-9a49d3f3d3bfa26df4e5e54d574cb490f0ee284b.tar.gz
src-9a49d3f3d3bfa26df4e5e54d574cb490f0ee284b.zip
Diffstat (limited to 'module/zfs/spa.c')
-rw-r--r--module/zfs/spa.c109
1 files changed, 87 insertions, 22 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 943330886eec..6b60227d244f 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -57,6 +57,7 @@
#include <sys/vdev_indirect_mapping.h>
#include <sys/vdev_indirect_births.h>
#include <sys/vdev_initialize.h>
+#include <sys/vdev_rebuild.h>
#include <sys/vdev_trim.h>
#include <sys/vdev_disk.h>
#include <sys/metaslab.h>
@@ -1562,6 +1563,7 @@ spa_unload(spa_t *spa)
vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE);
vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
vdev_autotrim_stop_all(spa);
+ vdev_rebuild_stop_all(spa);
}
/*
@@ -4240,7 +4242,7 @@ spa_ld_load_vdev_metadata(spa_t *spa)
* Propagate the leaf DTLs we just loaded all the way up the vdev tree.
*/
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
+ vdev_dtl_reassess(rvd, 0, 0, B_FALSE, B_FALSE);
spa_config_exit(spa, SCL_ALL, FTAG);
return (0);
@@ -4829,11 +4831,16 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
update_config_cache);
/*
- * Check all DTLs to see if anything needs resilvering.
+ * Check if a rebuild was in progress and if so resume it.
+ * Then check all DTLs to see if anything needs resilvering.
+ * The resilver will be deferred if a rebuild was started.
*/
- if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
- vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
+ if (vdev_rebuild_active(spa->spa_root_vdev)) {
+ vdev_rebuild_restart(spa);
+ } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
spa_async_request(spa, SPA_ASYNC_RESILVER);
+ }
/*
* Log the fact that we booted up (so that we can detect if
@@ -6313,6 +6320,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE);
vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE);
vdev_autotrim_stop_all(spa);
+ vdev_rebuild_stop_all(spa);
}
/*
@@ -6536,12 +6544,17 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
* extra rules: you can't attach to it after it's been created, and upon
* completion of resilvering, the first disk (the one being replaced)
* is automatically detached.
+ *
+ * If 'rebuild' is specified, then sequential reconstruction (a.ka. rebuild)
+ * should be performed instead of traditional healing reconstruction. From
+ * an administrators perspective these are both resilver operations.
*/
int
-spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
+spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
+ int rebuild)
{
uint64_t txg, dtl_max_txg;
- vdev_t *rvd __maybe_unused = spa->spa_root_vdev;
+ vdev_t *rvd = spa->spa_root_vdev;
vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
vdev_ops_t *pvops;
char *oldvdpath, *newvdpath;
@@ -6561,6 +6574,19 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
return (spa_vdev_exit(spa, NULL, txg, error));
}
+ if (rebuild) {
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
+ return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
+ if (dsl_scan_resilvering(spa_get_dsl(spa)))
+ return (spa_vdev_exit(spa, NULL, txg,
+ ZFS_ERR_RESILVER_IN_PROGRESS));
+ } else {
+ if (vdev_rebuild_active(rvd))
+ return (spa_vdev_exit(spa, NULL, txg,
+ ZFS_ERR_REBUILD_IN_PROGRESS));
+ }
+
if (spa->spa_vdev_removal != NULL)
return (spa_vdev_exit(spa, NULL, txg, EBUSY));
@@ -6593,6 +6619,18 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+ if (rebuild) {
+ /*
+ * For rebuilds, the parent vdev must support reconstruction
+ * using only space maps. This means the only allowable
+ * parents are the root vdev or a mirror vdev.
+ */
+ if (pvd->vdev_ops != &vdev_mirror_ops &&
+ pvd->vdev_ops != &vdev_root_ops) {
+ return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+ }
+ }
+
if (!replacing) {
/*
* For attach, the only allowable parent is a mirror or the root
@@ -6646,7 +6684,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
* than the top-level vdev.
*/
if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
- return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
+ return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
/*
* If this is an in-place replacement, update oldvd's path and devid
@@ -6664,9 +6702,6 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
}
}
- /* mark the device being resilvered */
- newvd->vdev_resilver_txg = txg;
-
/*
* If the parent is not a mirror, or if we're replacing, insert the new
* mirror/replacing/spare vdev above oldvd.
@@ -6704,8 +6739,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
dtl_max_txg = txg + TXG_CONCURRENT_STATES;
- vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
- dtl_max_txg - TXG_INITIAL);
+ vdev_dtl_dirty(newvd, DTL_MISSING,
+ TXG_INITIAL, dtl_max_txg - TXG_INITIAL);
if (newvd->vdev_isspare) {
spa_spare_activate(newvd);
@@ -6722,16 +6757,25 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
vdev_dirty(tvd, VDD_DTL, newvd, txg);
/*
- * Schedule the resilver to restart in the future. We do this to
- * ensure that dmu_sync-ed blocks have been stitched into the
- * respective datasets. We do not do this if resilvers have been
- * deferred.
+ * Schedule the resilver or rebuild to restart in the future. We do
+ * this to ensure that dmu_sync-ed blocks have been stitched into the
+ * respective datasets.
*/
- if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
- spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_defer_resilver(newvd);
- else
- dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);
+ if (rebuild) {
+ newvd->vdev_rebuild_txg = txg;
+
+ vdev_rebuild(tvd);
+ } else {
+ newvd->vdev_resilver_txg = txg;
+
+ if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ vdev_defer_resilver(newvd);
+ } else {
+ dsl_scan_restart_resilver(spa->spa_dsl_pool,
+ dtl_max_txg);
+ }
+ }
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -6774,7 +6818,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
ASSERT(spa_writeable(spa));
- txg = spa_vdev_enter(spa);
+ txg = spa_vdev_detach_enter(spa, guid);
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
@@ -7728,6 +7772,12 @@ spa_vdev_resilver_done(spa_t *spa)
}
spa_config_exit(spa, SCL_ALL, FTAG);
+
+ /*
+ * If a detach was not performed above replace waiters will not have
+ * been notified. In which case we must do so now.
+ */
+ spa_notify_waiters(spa);
}
/*
@@ -7971,9 +8021,21 @@ spa_async_thread(void *arg)
spa_vdev_resilver_done(spa);
/*
+ * If any devices are done replacing, detach them. Then if no
+ * top-level vdevs are rebuilding attempt to kick off a scrub.
+ */
+ if (tasks & SPA_ASYNC_REBUILD_DONE) {
+ spa_vdev_resilver_done(spa);
+
+ if (!vdev_rebuild_active(spa->spa_root_vdev))
+ (void) dsl_scan(spa->spa_dsl_pool, POOL_SCAN_SCRUB);
+ }
+
+ /*
* Kick off a resilver.
*/
if (tasks & SPA_ASYNC_RESILVER &&
+ !vdev_rebuild_active(spa->spa_root_vdev) &&
(!dsl_scan_resilvering(dp) ||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
dsl_scan_restart_resilver(dp, 0);
@@ -9470,6 +9532,9 @@ spa_activity_in_progress(spa_t *spa, zpool_wait_activity_t activity,
DSS_SCANNING);
break;
case ZPOOL_WAIT_RESILVER:
+ if ((*in_progress = vdev_rebuild_active(spa->spa_root_vdev)))
+ break;
+ /* fall through */
case ZPOOL_WAIT_SCRUB:
{
boolean_t scanning, paused, is_scrub;