diff options
Diffstat (limited to 'usr.sbin/makefs/zfs/dsl.c')
| -rw-r--r-- | usr.sbin/makefs/zfs/dsl.c | 661 |
1 files changed, 661 insertions, 0 deletions
diff --git a/usr.sbin/makefs/zfs/dsl.c b/usr.sbin/makefs/zfs/dsl.c new file mode 100644 index 000000000000..1977521d7f92 --- /dev/null +++ b/usr.sbin/makefs/zfs/dsl.c @@ -0,0 +1,661 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include <util.h> + +#include "makefs.h" +#include "zfs.h" + +typedef struct zfs_dsl_dataset { + zfs_objset_t *os; /* referenced objset, may be null */ + dsl_dataset_phys_t *phys; /* on-disk representation */ + uint64_t dsid; /* DSL dataset dnode */ + + struct zfs_dsl_dir *dir; /* containing parent */ +} zfs_dsl_dataset_t; + +typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; + +typedef struct zfs_dsl_dir { + char *fullname; /* full dataset name */ + char *name; /* basename(fullname) */ + dsl_dir_phys_t *phys; /* on-disk representation */ + nvlist_t *propsnv; /* properties saved in propszap */ + + zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ + + uint64_t dirid; /* DSL directory dnode */ + zfs_zap_t *propszap; /* dataset properties */ + zfs_zap_t *childzap; /* child directories */ + + /* DSL directory tree linkage. */ + struct zfs_dsl_dir *parent; + zfs_dsl_dir_list_t children; + STAILQ_ENTRY(zfs_dsl_dir) next; +} zfs_dsl_dir_t; + +static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); +static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); + +static int +nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) +{ + char *str; + int error, len; + + error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); + if (error == 0) { + *retp = ecalloc(1, len + 1); + memcpy(*retp, str, len); + } + return (error); +} + +static int +nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) +{ + return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); +} + +/* + * Return an allocated string containing the head dataset's mountpoint, + * including the root path prefix. + * + * If the dataset has a mountpoint property, it is returned. Otherwise we have + * to follow ZFS' inheritance rules. + */ +char * +dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) +{ + zfs_dsl_dir_t *pdir; + char *mountpoint; + + if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { + if (strcmp(mountpoint, "none") == 0) + return (NULL); + } else { + /* + * If we don't have a mountpoint, it's inherited from one of our + * ancestors. Walk up the hierarchy until we find it, building + * up our mountpoint along the way. The mountpoint property is + * always set for the root dataset. + */ + for (pdir = dir->parent, mountpoint = estrdup(dir->name);; + pdir = pdir->parent) { + char *origmountpoint, *tmp; + + origmountpoint = mountpoint; + + if (nvlist_find_string(pdir->propsnv, "mountpoint", + &tmp) == 0) { + (void)easprintf(&mountpoint, "%s%s%s", tmp, + tmp[strlen(tmp) - 1] == '/' ? "" : "/", + origmountpoint); + free(tmp); + free(origmountpoint); + break; + } + + (void)easprintf(&mountpoint, "%s/%s", pdir->name, + origmountpoint); + free(origmountpoint); + } + } + assert(mountpoint[0] == '/'); + assert(strstr(mountpoint, zfs->rootpath) == mountpoint); + + return (mountpoint); +} + +int +dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) +{ + return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); +} + +/* + * Handle dataset properties that we know about; stash them into an nvlist to be + * written later to the properties ZAP object. + * + * If the set of properties we handle grows too much, we should probably explore + * using libzfs to manage them. + */ +static void +dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, + const char *val) +{ + nvlist_t *nvl; + + nvl = dir->propsnv; + if (val == NULL || val[0] == '\0') + errx(1, "missing value for property `%s'", key); + if (nvpair_find(nvl, key) != NULL) + errx(1, "property `%s' already set", key); + + if (strcmp(key, "mountpoint") == 0) { + if (strcmp(val, "none") != 0) { + if (val[0] != '/') + errx(1, "mountpoint `%s' is not absolute", val); + if (strcmp(val, zfs->rootpath) != 0 && + strcmp(zfs->rootpath, "/") != 0 && + (strstr(val, zfs->rootpath) != val || + val[strlen(zfs->rootpath)] != '/')) { + errx(1, "mountpoint `%s' is not prefixed by " + "the root path `%s'", val, zfs->rootpath); + } + } + (void)nvlist_add_string(nvl, key, val); + } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || + strcmp(key, "setuid") == 0) { + if (strcmp(val, "on") == 0) + (void)nvlist_add_uint64(nvl, key, 1); + else if (strcmp(val, "off") == 0) + (void)nvlist_add_uint64(nvl, key, 0); + else + errx(1, "invalid value `%s' for %s", val, key); + } else if (strcmp(key, "canmount") == 0) { + if (strcmp(val, "noauto") == 0) + (void)nvlist_add_uint64(nvl, key, 2); + else if (strcmp(val, "on") == 0) + (void)nvlist_add_uint64(nvl, key, 1); + else if (strcmp(val, "off") == 0) + (void)nvlist_add_uint64(nvl, key, 0); + else + errx(1, "invalid value `%s' for %s", val, key); + } else if (strcmp(key, "compression") == 0) { + size_t i; + + const struct zfs_compression_algorithm { + const char *name; + enum zio_compress alg; + } compression_algorithms[] = { + { "off", ZIO_COMPRESS_OFF }, + { "on", ZIO_COMPRESS_ON }, + { "lzjb", ZIO_COMPRESS_LZJB }, + { "gzip", ZIO_COMPRESS_GZIP_6 }, + { "gzip-1", ZIO_COMPRESS_GZIP_1 }, + { "gzip-2", ZIO_COMPRESS_GZIP_2 }, + { "gzip-3", ZIO_COMPRESS_GZIP_3 }, + { "gzip-4", ZIO_COMPRESS_GZIP_4 }, + { "gzip-5", ZIO_COMPRESS_GZIP_5 }, + { "gzip-6", ZIO_COMPRESS_GZIP_6 }, + { "gzip-7", ZIO_COMPRESS_GZIP_7 }, + { "gzip-8", ZIO_COMPRESS_GZIP_8 }, + { "gzip-9", ZIO_COMPRESS_GZIP_9 }, + { "zle", ZIO_COMPRESS_ZLE }, + { "lz4", ZIO_COMPRESS_LZ4 }, + { "zstd", ZIO_COMPRESS_ZSTD }, + }; + for (i = 0; i < nitems(compression_algorithms); i++) { + if (strcmp(val, compression_algorithms[i].name) == 0) { + nvlist_add_uint64(nvl, key, + compression_algorithms[i].alg); + break; + } + } + if (i == nitems(compression_algorithms)) + errx(1, "invalid compression algorithm `%s'", val); + } else { + errx(1, "unknown property `%s'", key); + } +} + +static zfs_dsl_dir_t * +dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) +{ + zfs_dsl_dir_t *dir; + char *path; + + (void)easprintf(&path, "%s/%s", zfs->poolname, name); + dir = dsl_dir_alloc(zfs, path); + free(path); + return (dir); +} + +static void +dsl_origindir_init(zfs_opt_t *zfs) +{ + dnode_phys_t *clones; + uint64_t clonesid; + + zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); + zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); + zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); + + clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); + zfs->cloneszap = zap_alloc(zfs->mos, clones); + zfs->origindsldir->phys->dd_clones = clonesid; +} + +void +dsl_init(zfs_opt_t *zfs) +{ + zfs_dsl_dir_t *dir; + struct dataset_desc *d; + const char *dspropdelim; + + dspropdelim = ";"; + + zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); + + zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); + zfs->rootdsldir->headds = zfs->rootds; + + zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); + zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); + dsl_origindir_init(zfs); + + /* + * Go through the list of user-specified datasets and create DSL objects + * for them. + */ + STAILQ_FOREACH(d, &zfs->datasetdescs, next) { + char *dsname, *next, *params, *param, *nextparam; + + params = d->params; + dsname = strsep(¶ms, dspropdelim); + + if (strcmp(dsname, zfs->poolname) == 0) { + /* + * This is the root dataset; it's already created, so + * we're just setting options. + */ + dir = zfs->rootdsldir; + } else { + /* + * This dataset must be a child of the root dataset. + */ + if (strstr(dsname, zfs->poolname) != dsname || + (next = strchr(dsname, '/')) == NULL || + (size_t)(next - dsname) != strlen(zfs->poolname)) { + errx(1, "dataset `%s' must be a child of `%s'", + dsname, zfs->poolname); + } + dir = dsl_dir_alloc(zfs, dsname); + dir->headds = dsl_dataset_alloc(zfs, dir); + } + + for (nextparam = param = params; nextparam != NULL;) { + char *key, *val; + + param = strsep(&nextparam, dspropdelim); + + key = val = param; + key = strsep(&val, "="); + dsl_dir_set_prop(zfs, dir, key, val); + } + } + + /* + * Set the root dataset's mount point and compression strategy if the + * user didn't override the defaults. + */ + if (nvpair_find(zfs->rootdsldir->propsnv, "compression") == NULL) { + (void)nvlist_add_uint64(zfs->rootdsldir->propsnv, + "compression", ZIO_COMPRESS_OFF); + } + if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { + (void)nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", + zfs->rootpath); + } +} + +uint64_t +dsl_dir_id(zfs_dsl_dir_t *dir) +{ + return (dir->dirid); +} + +uint64_t +dsl_dir_dataset_id(zfs_dsl_dir_t *dir) +{ + return (dir->headds->dsid); +} + +static void +dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, + void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) +{ + zfs_dsl_dir_t *cdsldir; + + STAILQ_FOREACH(cdsldir, &dsldir->children, next) { + dsl_dir_foreach_post(zfs, cdsldir, cb, arg); + } + cb(zfs, dsldir, arg); +} + +/* + * Used when the caller doesn't care about the order one way or another. + */ +void +dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, + void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) +{ + dsl_dir_foreach_post(zfs, dsldir, cb, arg); +} + +const char * +dsl_dir_fullname(const zfs_dsl_dir_t *dir) +{ + return (dir->fullname); +} + +/* + * Create a DSL directory, which is effectively an entry in the ZFS namespace. + * We always create a root DSL directory, whose name is the pool's name, and + * several metadata directories. + * + * Each directory has two ZAP objects, one pointing to child directories, and + * one for properties (which are inherited by children unless overridden). + * Directories typically reference a DSL dataset, the "head dataset", which + * points to an object set. + */ +static zfs_dsl_dir_t * +dsl_dir_alloc(zfs_opt_t *zfs, const char *name) +{ + zfs_dsl_dir_list_t l, *lp; + zfs_dsl_dir_t *dir, *parent; + dnode_phys_t *dnode; + char *dirname, *nextdir, *origname; + uint64_t childid, propsid; + + dir = ecalloc(1, sizeof(*dir)); + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, + DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); + dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); + + dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); + dir->propszap = zap_alloc(zfs->mos, dnode); + + dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, + &childid); + dir->childzap = zap_alloc(zfs->mos, dnode); + + dir->propsnv = nvlist_create(NV_UNIQUE_NAME); + STAILQ_INIT(&dir->children); + + dir->phys->dd_child_dir_zapobj = childid; + dir->phys->dd_props_zapobj = propsid; + + if (name == NULL) { + /* + * This is the root DSL directory. + */ + dir->name = estrdup(zfs->poolname); + dir->fullname = estrdup(zfs->poolname); + dir->parent = NULL; + dir->phys->dd_parent_obj = 0; + + assert(zfs->rootdsldir == NULL); + zfs->rootdsldir = dir; + return (dir); + } + + /* + * Insert the new directory into the hierarchy. Currently this must be + * done in order, e.g., when creating pool/a/b, pool/a must already + * exist. + */ + STAILQ_INIT(&l); + STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); + origname = dirname = nextdir = estrdup(name); + parent = NULL; + for (lp = &l;; lp = &parent->children) { + dirname = strsep(&nextdir, "/"); + if (nextdir == NULL) + break; + + STAILQ_FOREACH(parent, lp, next) { + if (strcmp(parent->name, dirname) == 0) + break; + } + if (parent == NULL) { + errx(1, "no parent at `%s' for filesystem `%s'", + dirname, name); + } + } + + dir->fullname = estrdup(name); + dir->name = estrdup(dirname); + free(origname); + STAILQ_INSERT_TAIL(lp, dir, next); + zap_add_uint64(parent->childzap, dir->name, dir->dirid); + + dir->parent = parent; + dir->phys->dd_parent_obj = parent->dirid; + return (dir); +} + +static void +dsl_dir_size_add(zfs_dsl_dir_t *dir, uint64_t bytes) +{ + dir->phys->dd_used_bytes += bytes; + dir->phys->dd_compressed_bytes += bytes; + dir->phys->dd_uncompressed_bytes += bytes; +} + +/* + * See dsl_dir_root_finalize(). + */ +void +dsl_dir_root_finalize(zfs_opt_t *zfs, uint64_t bytes) +{ + dsl_dir_size_add(zfs->mosdsldir, bytes); + zfs->mosdsldir->phys->dd_used_breakdown[DD_USED_HEAD] += bytes; + + dsl_dir_size_add(zfs->rootdsldir, bytes); + zfs->rootdsldir->phys->dd_used_breakdown[DD_USED_CHILD] += bytes; +} + +/* + * Convert dataset properties into entries in the DSL directory's properties + * ZAP. + */ +static void +dsl_dir_finalize_props(zfs_dsl_dir_t *dir) +{ + for (nvp_header_t *nvh = NULL; + (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { + nv_string_t *nvname; + nv_pair_data_t *nvdata; + char *name; + + nvname = (nv_string_t *)(nvh + 1); + nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + + NV_ALIGN4(nvname->nv_size)); + + name = nvstring_get(nvname); + switch (nvdata->nv_type) { + case DATA_TYPE_UINT64: { + uint64_t val; + + memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); + zap_add_uint64(dir->propszap, name, val); + break; + } + case DATA_TYPE_STRING: { + nv_string_t *nvstr; + char *val; + + nvstr = (nv_string_t *)&nvdata->nv_data[0]; + val = nvstring_get(nvstr); + zap_add_string(dir->propszap, name, val); + free(val); + break; + } + default: + assert(0); + } + free(name); + } +} + +static void +dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) +{ + zfs_dsl_dir_t *cdir; + dnode_phys_t *snapnames; + zfs_dsl_dataset_t *headds; + zfs_objset_t *os; + uint64_t bytes, childbytes, snapnamesid; + + dsl_dir_finalize_props(dir); + zap_write(zfs, dir->propszap); + zap_write(zfs, dir->childzap); + + headds = dir->headds; + if (headds == NULL) + return; + os = headds->os; + if (os == NULL) + return; + + snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, + &snapnamesid); + zap_write(zfs, zap_alloc(zfs->mos, snapnames)); + + dir->phys->dd_head_dataset_obj = headds->dsid; + dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; + headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; + headds->phys->ds_snapnames_zapobj = snapnamesid; + objset_root_blkptr_copy(os, &headds->phys->ds_bp); + + zfs->snapds->phys->ds_num_children++; + zap_add_uint64_self(zfs->cloneszap, headds->dsid); + + bytes = objset_space(os); + headds->phys->ds_used_bytes = bytes; + headds->phys->ds_uncompressed_bytes = bytes; + headds->phys->ds_compressed_bytes = bytes; + + childbytes = 0; + STAILQ_FOREACH(cdir, &dir->children, next) { + /* + * The root directory needs a special case: the amount of + * space used for the MOS isn't known until everything else is + * finalized, so it can't be accounted in the MOS directory's + * parent until then, at which point dsl_dir_root_finalize() is + * called. + */ + if (dir == zfs->rootdsldir && cdir == zfs->mosdsldir) + continue; + childbytes += cdir->phys->dd_used_bytes; + } + dsl_dir_size_add(dir, bytes + childbytes); + + dir->phys->dd_flags |= DD_FLAG_USED_BREAKDOWN; + dir->phys->dd_used_breakdown[DD_USED_HEAD] = bytes; + dir->phys->dd_used_breakdown[DD_USED_CHILD] = childbytes; +} + +void +dsl_write(zfs_opt_t *zfs) +{ + zfs_zap_t *snapnameszap; + dnode_phys_t *snapnames; + uint64_t snapmapid; + + /* + * Perform accounting, starting from the leaves of the DSL directory + * tree. Accounting for $MOS is done later, once we've finished + * allocating space. + */ + dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); + + snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, + &snapmapid); + snapnameszap = zap_alloc(zfs->mos, snapnames); + zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); + zap_write(zfs, snapnameszap); + + zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; + zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; + zfs->originds->phys->ds_snapnames_zapobj = snapmapid; + + zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; + assert(zfs->snapds->phys->ds_num_children > 0); + zfs->snapds->phys->ds_num_children++; + + zap_write(zfs, zfs->cloneszap); + + /* XXX-MJ dirs and datasets are leaked */ +} + +void +dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) +{ + dir->headds->os = os; + objset_write(zfs, os); +} + +bool +dsl_dir_has_dataset(zfs_dsl_dir_t *dir) +{ + return (dir->headds != NULL); +} + +bool +dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) +{ + return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); +} + +static zfs_dsl_dataset_t * +dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) +{ + zfs_dsl_dataset_t *ds; + dnode_phys_t *dnode; + uint64_t deadlistid; + + ds = ecalloc(1, sizeof(*ds)); + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, + DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); + ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, + DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); + zap_write(zfs, zap_alloc(zfs->mos, dnode)); + + ds->phys->ds_dir_obj = dir->dirid; + ds->phys->ds_deadlist_obj = deadlistid; + ds->phys->ds_creation_txg = TXG - 1; + if (ds != zfs->snapds) + ds->phys->ds_prev_snap_txg = TXG - 1; + ds->phys->ds_guid = randomguid(); + ds->dir = dir; + + return (ds); +} |
