diff options
author | Matt Macy <mmacy@FreeBSD.org> | 2020-08-25 02:21:27 +0000 |
---|---|---|
committer | Matt Macy <mmacy@FreeBSD.org> | 2020-08-25 02:21:27 +0000 |
commit | 9e5787d2284e187abb5b654d924394a65772e004 (patch) | |
tree | 2ebf833af6b1953d4a683e2da830fe87bf3435e1 /cddl/contrib | |
parent | 22df1ffd812f0395cdb7c0b1edae1f67b991562a (diff) | |
download | src-9e5787d2284e187abb5b654d924394a65772e004.tar.gz src-9e5787d2284e187abb5b654d924394a65772e004.zip |
Notes
Diffstat (limited to 'cddl/contrib')
73 files changed, 18 insertions, 75256 deletions
diff --git a/cddl/contrib/opensolaris/cmd/lockstat/sym.c b/cddl/contrib/opensolaris/cmd/lockstat/sym.c index f2987a028e74..b5366c566857 100644 --- a/cddl/contrib/opensolaris/cmd/lockstat/sym.c +++ b/cddl/contrib/opensolaris/cmd/lockstat/sym.c @@ -54,6 +54,7 @@ #endif #include <sys/cpuvar.h> + typedef struct syment { uintptr_t addr; char *name; @@ -72,6 +73,11 @@ static char maxsymname[64]; #endif #endif +#define __sElfN(x) typedef __CONCAT(__CONCAT(__CONCAT(Elf,__ELF_WORD_SIZE),_),x) x +__sElfN(Sym); +__sElfN(Shdr); +#define elf_getshdr __elfN(getshdr) + static void add_symbol(char *name, uintptr_t addr, size_t size) { diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.8 b/cddl/contrib/opensolaris/cmd/zdb/zdb.8 deleted file mode 100644 index e60c56c7c199..000000000000 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.8 +++ /dev/null @@ -1,414 +0,0 @@ -.\" -.\" This file and its contents are supplied under the terms of the -.\" Common Development and Distribution License ("CDDL"), version 1.0. -.\" You may only use this file in accordance with the terms of version -.\" 1.0 of the CDDL. -.\" -.\" A full copy of the text of the CDDL should have accompanied this -.\" source. A copy of the CDDL is also available via the Internet at -.\" http://www.illumos.org/license/CDDL. -.\" -.\" -.\" Copyright 2012, Richard Lowe. -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. -.\" Copyright 2017 Nexenta Systems, Inc. -.\" -.Dd February 25, 2020 -.Dt ZDB 8 -.Os -.Sh NAME -.Nm zdb -.Nd display zpool debugging and consistency information -.Sh SYNOPSIS -.Nm -.Op Fl AbcdDFGhikLMPsvX -.Op Fl e Oo Fl V Oc Op Fl p Ar path ... -.Op Fl I Ar inflight I/Os -.Oo Fl o Ar var Ns = Ns Ar value Oc Ns ... -.Op Fl t Ar txg -.Op Fl U Ar cache -.Op Fl x Ar dumpdir -.Op Ar poolname Op Ar object ... -.Nm -.Op Fl AdiPv -.Op Fl e Oo Fl V Oc Op Fl p Ar path ... -.Op Fl U Ar cache -.Ar dataset Op Ar object ... -.Nm -.Fl C -.Op Fl A -.Op Fl U Ar cache -.Nm -.Fl E -.Op Fl A -.Ar word0 Ns \&: Ns Ar word1 Ns :...: Ns Ar word15 -.Nm -.Fl l -.Op Fl Aqu -.Ar device -.Nm -.Fl m -.Op Fl AFLPX -.Op Fl e Oo Fl V Oc Op Fl p Ar path ... -.Op Fl t Ar txg -.Op Fl U Ar cache -.Ar poolname Op Ar vdev Op Ar metaslab ... -.Nm -.Fl O -.Ar dataset path -.Nm -.Fl R -.Op Fl A -.Op Fl e Oo Fl V Oc Op Fl p Ar path ... -.Op Fl U Ar cache -.Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags -.Nm -.Fl S -.Op Fl AP -.Op Fl e Oo Fl V Oc Op Fl p Ar path ... -.Op Fl U Ar cache -.Ar poolname -.Sh DESCRIPTION -The -.Nm -utility displays information about a ZFS pool useful for debugging and performs -some amount of consistency checking. -It is a not a general purpose tool and options -.Pq and facilities -may change. -This is neither a -.Xr fsck 8 -nor an -.Xr fsdb 8 -utility. -.Pp -The output of this command in general reflects the on-disk structure of a ZFS -pool, and is inherently unstable. -The precise output of most invocations is not documented, a knowledge of ZFS -internals is assumed. -.Pp -If the -.Ar dataset -argument does not contain any -.Qq Sy / -or -.Qq Sy @ -characters, it is interpreted as a pool name. -The root dataset can be specified as -.Ar pool Ns / -.Pq pool name followed by a slash . -.Pp -When operating on an imported and active pool it is possible, though unlikely, -that zdb may interpret inconsistent pool data and behave erratically. -.Sh OPTIONS -Display options: -.Bl -tag -width Ds -.It Fl b -Display statistics regarding the number, size -.Pq logical, physical and allocated -and deduplication of blocks. -.It Fl c -Verify the checksum of all metadata blocks while printing block statistics -.Po see -.Fl b -.Pc . -.Pp -If specified multiple times, verify the checksums of all blocks. -.It Fl C -Display information about the configuration. -If specified with no other options, instead display information about the cache -file -.Pq Pa /boot/zfs/zpool.cache . -To specify the cache file to display, see -.Fl U . -.Pp -If specified multiple times, and a pool name is also specified display both the -cached configuration and the on-disk configuration. -If specified multiple times with -.Fl e -also display the configuration that would be used were the pool to be imported. -.It Fl d -Display information about datasets. -Specified once, displays basic dataset information: ID, create transaction, -size, and object count. -.Pp -If specified multiple times provides greater and greater verbosity. -.Pp -If object IDs are specified, display information about those specific objects -only. -.It Fl D -Display deduplication statistics, including the deduplication ratio -.Pq Sy dedup , -compression ratio -.Pq Sy compress , -inflation due to the zfs copies property -.Pq Sy copies , -and an overall effective ratio -.Pq Sy dedup No * Sy compress No / Sy copies . -.It Fl DD -Display a histogram of deduplication statistics, showing the allocated -.Pq physically present on disk -and referenced -.Pq logically referenced in the pool -block counts and sizes by reference count. -.It Fl DDD -Display the statistics independently for each deduplication table. -.It Fl DDDD -Dump the contents of the deduplication tables describing duplicate blocks. -.It Fl DDDDD -Also dump the contents of the deduplication tables describing unique blocks. -.It Fl E Ar word0 Ns \&: Ns Ar word1 Ns :...: Ns Ar word15 -Decode and display block from an embedded block pointer specified by the -.Ar word -arguments. -.It Fl h -Display pool history similar to -.Nm zpool Cm history , -but include internal changes, transaction, and dataset information. -.It Fl i -Display information about intent log -.Pq ZIL -entries relating to each dataset. -If specified multiple times, display counts of each intent log transaction type. -.It Fl k -Examine the checkpointed state of the pool. -Note, the on disk format of the pool is not reverted to the checkpointed state. -.It Fl l Ar device -Read the vdev labels from the specified device. -.Nm Fl l -will return 0 if valid label was found, 1 if error occurred, and 2 if no valid -labels were found. -.Pp -If the -.Fl q -option is also specified, don't print the labels. -.Pp -If the -.Fl u -option is also specified, also display the uberblocks on this device. -.It Fl L -Disable leak detection and the loading of space maps. -By default, -.Nm -verifies that all non-free blocks are referenced, which can be very expensive. -.It Fl m -Display the offset, spacemap, and free space of each metaslab. -.It Fl mm -Also display information about the on-disk free space histogram associated with -each metaslab. -.It Fl mmm -Display the maximum contiguous free space, the in-core free space histogram, and -the percentage of free space in each space map. -.It Fl mmmm -Display every spacemap record. -.It Fl M -Display the offset, spacemap, and free space of each metaslab. -.It Fl MM -Also display information about the maximum contiguous free space and the -percentage of free space in each space map. -.It Fl MMM -Display every spacemap record. -.It Fl O Ar dataset path -Look up the specified -.Ar path -inside of the -.Ar dataset -and display its metadata and indirect blocks. -Specified -.Ar path -must be relative to the root of -.Ar dataset . -This option can be combined with -.Fl v -for increasing verbosity. -.It Xo -.Fl R Ar poolname vdev Ns \&: Ns Ar offset Ns \&: Ns Ar size Ns Op : Ns Ar flags -.Xc -Read and display a block from the specified device. -By default the block is displayed as a hex dump, but see the description of the -.Sy r -flag, below. -.Pp -The block is specified in terms of a colon-separated tuple -.Ar vdev -.Pq an integer vdev identifier -.Ar offset -.Pq the offset within the vdev -.Ar size -.Pq the size of the block to read -and, optionally, -.Ar flags -.Pq a set of flags, described below . -.Pp -.Bl -tag -compact -width "b offset" -.It Sy b Ar offset -Print block pointer -.It Sy d -Decompress the block -.It Sy e -Byte swap the block -.It Sy g -Dump gang block header -.It Sy i -Dump indirect block -.It Sy r -Dump raw uninterpreted block data -.El -.It Fl s -Report statistics on -.Nm zdb -I/O. -Display operation counts, bandwidth, and error counts of I/O to the pool from -.Nm . -.It Fl S -Simulate the effects of deduplication, constructing a DDT and then display -that DDT as with -.Fl DD . -.It Fl u -Display the current uberblock. -.El -.Pp -Other options: -.Bl -tag -width Ds -.It Fl A -Do not abort should any assertion fail. -.It Fl AA -Enable panic recovery, certain errors which would otherwise be fatal are -demoted to warnings. -.It Fl AAA -Do not abort if asserts fail and also enable panic recovery. -.It Fl e Op Fl p Ar path ... -Operate on an exported pool, not present in -.Pa /boot/zfs/zpool.cache . -The -.Fl p -flag specifies the path under which devices are to be searched. -.It Fl x Ar dumpdir -All blocks accessed will be copied to files in the specified directory. -The blocks will be placed in sparse files whose name is the same as -that of the file or device read. -.Nm -can be then run on the generated files. -Note that the -.Fl bbc -flags are sufficient to access -.Pq and thus copy -all metadata on the pool. -.It Fl F -Attempt to make an unreadable pool readable by trying progressively older -transactions. -.It Fl G -Dump the contents of the zfs_dbgmsg buffer before exiting -.Nm . -zfs_dbgmsg is a buffer used by ZFS to dump advanced debug information. -.It Fl I Ar inflight I/Os -Limit the number of outstanding checksum I/Os to the specified value. -The default value is 200. -This option affects the performance of the -.Fl c -option. -.It Fl o Ar var Ns = Ns Ar value ... -Set the given global libzpool variable to the provided value. -The value must be an unsigned 32-bit integer. -Currently only little-endian systems are supported to avoid accidentally setting -the high 32 bits of 64-bit variables. -.It Fl P -Print numbers in an unscaled form more amenable to parsing, eg. 1000000 rather -than 1M. -.It Fl t Ar transaction -Specify the highest transaction to use when searching for uberblocks. -See also the -.Fl u -and -.Fl l -options for a means to see the available uberblocks and their associated -transaction numbers. -.It Fl U Ar cachefile -Use a cache file other than -.Pa /boot/zfs/zpool.cache . -.It Fl v -Enable verbosity. -Specify multiple times for increased verbosity. -.It Fl V -Attempt verbatim import. -This mimics the behavior of the kernel when loading a pool from a cachefile. -Only usable with -.Fl e . -.It Fl X -Attempt -.Qq extreme -transaction rewind, that is attempt the same recovery as -.Fl F -but read transactions otherwise deemed too old. -.El -.Pp -Specifying a display option more than once enables verbosity for only that -option, with more occurrences enabling more verbosity. -.Pp -If no options are specified, all information about the named pool will be -displayed at default verbosity. -.Sh EXAMPLES -.Bl -tag -width Ds -.It Xo -.Sy Example 1 -Display the configuration of imported pool -.Pa rpool -.Xc -.Bd -literal -# zdb -C rpool - -MOS Configuration: - version: 28 - name: 'rpool' - ... -.Ed -.It Xo -.Sy Example 2 -Display basic dataset information about -.Pa rpool -.Xc -.Bd -literal -# zdb -d rpool -Dataset mos [META], ID 0, cr_txg 4, 26.9M, 1051 objects -Dataset rpool/swap [ZVOL], ID 59, cr_txg 356, 486M, 2 objects - ... -.Ed -.It Xo -.Sy Example 3 -Display basic information about object 0 in -.Pa rpool/export/home -.Xc -.Bd -literal -# zdb -d rpool/export/home 0 -Dataset rpool/export/home [ZPL], ID 137, cr_txg 1546, 32K, 8 objects - - Object lvl iblk dblk dsize lsize %full type - 0 7 16K 16K 15.0K 16K 25.00 DMU dnode -.Ed -.It Xo -.Sy Example 4 -Display the predicted effect of enabling deduplication on -.Pa rpool -.Xc -.Bd -literal -# zdb -S rpool -Simulated DDT histogram: - -bucket allocated referenced -______ ______________________________ ______________________________ -refcnt blocks LSIZE PSIZE DSIZE blocks LSIZE PSIZE DSIZE ------- ------ ----- ----- ----- ------ ----- ----- ----- - 1 694K 27.1G 15.0G 15.0G 694K 27.1G 15.0G 15.0G - 2 35.0K 1.33G 699M 699M 74.7K 2.79G 1.45G 1.45G - ... -dedup = 1.11, compress = 1.80, copies = 1.00, dedup * compress / copies = 2.00 -.Ed -.El -.Sh SEE ALSO -.Xr zfs 8 , -.Xr zpool 8 -.Sh HISTORY -The -.Nm -utility first appeared in -.Fx 7.0 . diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c deleted file mode 100644 index d51ddc68908c..000000000000 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c +++ /dev/null @@ -1,5749 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2017 Nexenta Systems, Inc. - * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. - * Copyright 2017 RackTop Systems. - */ - -#include <stdio.h> -#include <unistd.h> -#include <stdio_ext.h> -#include <stdlib.h> -#include <ctype.h> -#include <sys/zfs_context.h> -#include <sys/spa.h> -#include <sys/spa_impl.h> -#include <sys/dmu.h> -#include <sys/zap.h> -#include <sys/fs/zfs.h> -#include <sys/zfs_znode.h> -#include <sys/zfs_sa.h> -#include <sys/sa.h> -#include <sys/sa_impl.h> -#include <sys/vdev.h> -#include <sys/vdev_impl.h> -#include <sys/metaslab_impl.h> -#include <sys/dmu_objset.h> -#include <sys/dsl_dir.h> -#include <sys/dsl_dataset.h> -#include <sys/dsl_pool.h> -#include <sys/dbuf.h> -#include <sys/zil.h> -#include <sys/zil_impl.h> -#include <sys/stat.h> -#include <sys/resource.h> -#include <sys/dmu_traverse.h> -#include <sys/zio_checksum.h> -#include <sys/zio_compress.h> -#include <sys/zfs_fuid.h> -#include <sys/arc.h> -#include <sys/ddt.h> -#include <sys/zfeature.h> -#include <sys/abd.h> -#include <sys/blkptr.h> -#include <sys/dsl_scan.h> -#include <zfs_comutil.h> -#include <libcmdutils.h> -#undef verify -#include <libzfs.h> - -#include "zdb.h" - -#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ - zio_compress_table[(idx)].ci_name : "UNKNOWN") -#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ - zio_checksum_table[(idx)].ci_name : "UNKNOWN") -#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ - dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \ - dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN") -#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ - (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \ - DMU_OT_ZAP_OTHER : \ - (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \ - DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES) - -#ifndef lint -extern int reference_tracking_enable; -extern boolean_t zfs_recover; -extern uint64_t zfs_arc_max, zfs_arc_meta_limit; -extern int zfs_vdev_async_read_max_active; -extern boolean_t spa_load_verify_dryrun; -extern int aok; -#else -int reference_tracking_enable; -boolean_t zfs_recover; -uint64_t zfs_arc_max, zfs_arc_meta_limit; -int zfs_vdev_async_read_max_active; -boolean_t spa_load_verify_dryrun; -int aok; -#endif - -static const char cmdname[] = "zdb"; -uint8_t dump_opt[256]; - -typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); - -static uint64_t *zopt_object = NULL; -static unsigned zopt_objects = 0; -static libzfs_handle_t *g_zfs; -static uint64_t max_inflight = 1000; -static int leaked_objects = 0; - -static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *); -static void mos_obj_refd(uint64_t); - -/* - * These libumem hooks provide a reasonable set of defaults for the allocator's - * debugging facilities. - */ -const char * -_umem_debug_init() -{ - return ("default,verbose"); /* $UMEM_DEBUG setting */ -} - -const char * -_umem_logging_init(void) -{ - return ("fail,contents"); /* $UMEM_LOGGING setting */ -} - -static void -usage(void) -{ - (void) fprintf(stderr, - "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] " - "[-I <inflight I/Os>]\n" - "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" - "\t\t[<poolname> [<object> ...]]\n" - "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> " - "[<object> ...]\n" - "\t%s -C [-A] [-U <cache>]\n" - "\t%s -l [-Aqu] <device>\n" - "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] " - "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n" - "\t%s -O <dataset> <path>\n" - "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n" - "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n" - "\t%s -E [-A] word0:word1:...:word15\n" - "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] " - "<poolname>\n\n", - cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, - cmdname, cmdname); - - (void) fprintf(stderr, " Dataset name must include at least one " - "separator character '/' or '@'\n"); - (void) fprintf(stderr, " If dataset name is specified, only that " - "dataset is dumped\n"); - (void) fprintf(stderr, " If object numbers are specified, only " - "those objects are dumped\n\n"); - (void) fprintf(stderr, " Options to control amount of output:\n"); - (void) fprintf(stderr, " -b block statistics\n"); - (void) fprintf(stderr, " -c checksum all metadata (twice for " - "all data) blocks\n"); - (void) fprintf(stderr, " -C config (or cachefile if alone)\n"); - (void) fprintf(stderr, " -d dataset(s)\n"); - (void) fprintf(stderr, " -D dedup statistics\n"); - (void) fprintf(stderr, " -E decode and display block from an " - "embedded block pointer\n"); - (void) fprintf(stderr, " -h pool history\n"); - (void) fprintf(stderr, " -i intent logs\n"); - (void) fprintf(stderr, " -l read label contents\n"); - (void) fprintf(stderr, " -k examine the checkpointed state " - "of the pool\n"); - (void) fprintf(stderr, " -L disable leak tracking (do not " - "load spacemaps)\n"); - (void) fprintf(stderr, " -m metaslabs\n"); - (void) fprintf(stderr, " -M metaslab groups\n"); - (void) fprintf(stderr, " -O perform object lookups by path\n"); - (void) fprintf(stderr, " -R read and display block from a " - "device\n"); - (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); - (void) fprintf(stderr, " -S simulate dedup to measure effect\n"); - (void) fprintf(stderr, " -v verbose (applies to all " - "others)\n\n"); - (void) fprintf(stderr, " Below options are intended for use " - "with other options:\n"); - (void) fprintf(stderr, " -A ignore assertions (-A), enable " - "panic recovery (-AA) or both (-AAA)\n"); - (void) fprintf(stderr, " -e pool is exported/destroyed/" - "has altroot/not in a cachefile\n"); - (void) fprintf(stderr, " -F attempt automatic rewind within " - "safe range of transaction groups\n"); - (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before " - "exiting\n"); - (void) fprintf(stderr, " -I <number of inflight I/Os> -- " - "specify the maximum number of " - "checksumming I/Os [default is 200]\n"); - (void) fprintf(stderr, " -o <variable>=<value> set global " - "variable to an unsigned 32-bit integer value\n"); - (void) fprintf(stderr, " -p <path> -- use one or more with " - "-e to specify path to vdev dir\n"); - (void) fprintf(stderr, " -P print numbers in parseable form\n"); - (void) fprintf(stderr, " -q don't print label contents\n"); - (void) fprintf(stderr, " -t <txg> -- highest txg to use when " - "searching for uberblocks\n"); - (void) fprintf(stderr, " -u uberblock\n"); - (void) fprintf(stderr, " -U <cachefile_path> -- use alternate " - "cachefile\n"); - (void) fprintf(stderr, " -V do verbatim import\n"); - (void) fprintf(stderr, " -x <dumpdir> -- " - "dump all read blocks into specified directory\n"); - (void) fprintf(stderr, " -X attempt extreme rewind (does not " - "work with dataset)\n\n"); - (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " - "to make only that option verbose\n"); - (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); - exit(1); -} - -static void -dump_debug_buffer() -{ - if (dump_opt['G']) { - (void) printf("\n"); - zfs_dbgmsg_print("zdb"); - } -} - -/* - * Called for usage errors that are discovered after a call to spa_open(), - * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. - */ - -static void -fatal(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - (void) fprintf(stderr, "%s: ", cmdname); - (void) vfprintf(stderr, fmt, ap); - va_end(ap); - (void) fprintf(stderr, "\n"); - - dump_debug_buffer(); - - exit(1); -} - -/* ARGSUSED */ -static void -dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) -{ - nvlist_t *nv; - size_t nvsize = *(uint64_t *)data; - char *packed = umem_alloc(nvsize, UMEM_NOFAIL); - - VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); - - VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); - - umem_free(packed, nvsize); - - dump_nvlist(nv, 8); - - nvlist_free(nv); -} - -/* ARGSUSED */ -static void -dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) -{ - spa_history_phys_t *shp = data; - - if (shp == NULL) - return; - - (void) printf("\t\tpool_create_len = %llu\n", - (u_longlong_t)shp->sh_pool_create_len); - (void) printf("\t\tphys_max_off = %llu\n", - (u_longlong_t)shp->sh_phys_max_off); - (void) printf("\t\tbof = %llu\n", - (u_longlong_t)shp->sh_bof); - (void) printf("\t\teof = %llu\n", - (u_longlong_t)shp->sh_eof); - (void) printf("\t\trecords_lost = %llu\n", - (u_longlong_t)shp->sh_records_lost); -} - -static void -zdb_nicenum(uint64_t num, char *buf, size_t buflen) -{ - if (dump_opt['P']) - (void) snprintf(buf, buflen, "%llu", (longlong_t)num); - else - nicenum(num, buf, sizeof (buf)); -} - -static const char histo_stars[] = "****************************************"; -static const uint64_t histo_width = sizeof (histo_stars) - 1; - -static void -dump_histogram(const uint64_t *histo, int size, int offset) -{ - int i; - int minidx = size - 1; - int maxidx = 0; - uint64_t max = 0; - - for (i = 0; i < size; i++) { - if (histo[i] > max) - max = histo[i]; - if (histo[i] > 0 && i > maxidx) - maxidx = i; - if (histo[i] > 0 && i < minidx) - minidx = i; - } - - if (max < histo_width) - max = histo_width; - - for (i = minidx; i <= maxidx; i++) { - (void) printf("\t\t\t%3u: %6llu %s\n", - i + offset, (u_longlong_t)histo[i], - &histo_stars[(max - histo[i]) * histo_width / max]); - } -} - -static void -dump_zap_stats(objset_t *os, uint64_t object) -{ - int error; - zap_stats_t zs; - - error = zap_get_stats(os, object, &zs); - if (error) - return; - - if (zs.zs_ptrtbl_len == 0) { - ASSERT(zs.zs_num_blocks == 1); - (void) printf("\tmicrozap: %llu bytes, %llu entries\n", - (u_longlong_t)zs.zs_blocksize, - (u_longlong_t)zs.zs_num_entries); - return; - } - - (void) printf("\tFat ZAP stats:\n"); - - (void) printf("\t\tPointer table:\n"); - (void) printf("\t\t\t%llu elements\n", - (u_longlong_t)zs.zs_ptrtbl_len); - (void) printf("\t\t\tzt_blk: %llu\n", - (u_longlong_t)zs.zs_ptrtbl_zt_blk); - (void) printf("\t\t\tzt_numblks: %llu\n", - (u_longlong_t)zs.zs_ptrtbl_zt_numblks); - (void) printf("\t\t\tzt_shift: %llu\n", - (u_longlong_t)zs.zs_ptrtbl_zt_shift); - (void) printf("\t\t\tzt_blks_copied: %llu\n", - (u_longlong_t)zs.zs_ptrtbl_blks_copied); - (void) printf("\t\t\tzt_nextblk: %llu\n", - (u_longlong_t)zs.zs_ptrtbl_nextblk); - - (void) printf("\t\tZAP entries: %llu\n", - (u_longlong_t)zs.zs_num_entries); - (void) printf("\t\tLeaf blocks: %llu\n", - (u_longlong_t)zs.zs_num_leafs); - (void) printf("\t\tTotal blocks: %llu\n", - (u_longlong_t)zs.zs_num_blocks); - (void) printf("\t\tzap_block_type: 0x%llx\n", - (u_longlong_t)zs.zs_block_type); - (void) printf("\t\tzap_magic: 0x%llx\n", - (u_longlong_t)zs.zs_magic); - (void) printf("\t\tzap_salt: 0x%llx\n", - (u_longlong_t)zs.zs_salt); - - (void) printf("\t\tLeafs with 2^n pointers:\n"); - dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); - - (void) printf("\t\tBlocks with n*5 entries:\n"); - dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); - - (void) printf("\t\tBlocks n/10 full:\n"); - dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); - - (void) printf("\t\tEntries with n chunks:\n"); - dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); - - (void) printf("\t\tBuckets with n entries:\n"); - dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); -} - -/*ARGSUSED*/ -static void -dump_none(objset_t *os, uint64_t object, void *data, size_t size) -{ -} - -/*ARGSUSED*/ -static void -dump_unknown(objset_t *os, uint64_t object, void *data, size_t size) -{ - (void) printf("\tUNKNOWN OBJECT TYPE\n"); -} - -/*ARGSUSED*/ -static void -dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) -{ -} - -/*ARGSUSED*/ -static void -dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) -{ -} - -/*ARGSUSED*/ -static void -dump_zap(objset_t *os, uint64_t object, void *data, size_t size) -{ - zap_cursor_t zc; - zap_attribute_t attr; - void *prop; - unsigned i; - - dump_zap_stats(os, object); - (void) printf("\n"); - - for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; - zap_cursor_advance(&zc)) { - (void) printf("\t\t%s = ", attr.za_name); - if (attr.za_num_integers == 0) { - (void) printf("\n"); - continue; - } - prop = umem_zalloc(attr.za_num_integers * - attr.za_integer_length, UMEM_NOFAIL); - (void) zap_lookup(os, object, attr.za_name, - attr.za_integer_length, attr.za_num_integers, prop); - if (attr.za_integer_length == 1) { - (void) printf("%s", (char *)prop); - } else { - for (i = 0; i < attr.za_num_integers; i++) { - switch (attr.za_integer_length) { - case 2: - (void) printf("%u ", - ((uint16_t *)prop)[i]); - break; - case 4: - (void) printf("%u ", - ((uint32_t *)prop)[i]); - break; - case 8: - (void) printf("%lld ", - (u_longlong_t)((int64_t *)prop)[i]); - break; - } - } - } - (void) printf("\n"); - umem_free(prop, attr.za_num_integers * attr.za_integer_length); - } - zap_cursor_fini(&zc); -} - -static void -dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) -{ - bpobj_phys_t *bpop = data; - char bytes[32], comp[32], uncomp[32]; - - /* make sure the output won't get truncated */ - CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); - - if (bpop == NULL) - return; - - zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes)); - zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp)); - zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp)); - - (void) printf("\t\tnum_blkptrs = %llu\n", - (u_longlong_t)bpop->bpo_num_blkptrs); - (void) printf("\t\tbytes = %s\n", bytes); - if (size >= BPOBJ_SIZE_V1) { - (void) printf("\t\tcomp = %s\n", comp); - (void) printf("\t\tuncomp = %s\n", uncomp); - } - if (size >= sizeof (*bpop)) { - (void) printf("\t\tsubobjs = %llu\n", - (u_longlong_t)bpop->bpo_subobjs); - (void) printf("\t\tnum_subobjs = %llu\n", - (u_longlong_t)bpop->bpo_num_subobjs); - } - - if (dump_opt['d'] < 5) - return; - - for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) { - char blkbuf[BP_SPRINTF_LEN]; - blkptr_t bp; - - int err = dmu_read(os, object, - i * sizeof (bp), sizeof (bp), &bp, 0); - if (err != 0) { - (void) printf("got error %u from dmu_read\n", err); - break; - } - snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp); - (void) printf("\t%s\n", blkbuf); - } -} - -/* ARGSUSED */ -static void -dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) -{ - dmu_object_info_t doi; - - VERIFY0(dmu_object_info(os, object, &doi)); - uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); - - int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); - if (err != 0) { - (void) printf("got error %u from dmu_read\n", err); - kmem_free(subobjs, doi.doi_max_offset); - return; - } - - int64_t last_nonzero = -1; - for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) { - if (subobjs[i] != 0) - last_nonzero = i; - } - - for (int64_t i = 0; i <= last_nonzero; i++) { - (void) printf("\t%llu\n", (longlong_t)subobjs[i]); - } - kmem_free(subobjs, doi.doi_max_offset); -} - -/*ARGSUSED*/ -static void -dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) -{ - dump_zap_stats(os, object); - /* contents are printed elsewhere, properly decoded */ -} - -/*ARGSUSED*/ -static void -dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) -{ - zap_cursor_t zc; - zap_attribute_t attr; - - dump_zap_stats(os, object); - (void) printf("\n"); - - for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; - zap_cursor_advance(&zc)) { - (void) printf("\t\t%s = ", attr.za_name); - if (attr.za_num_integers == 0) { - (void) printf("\n"); - continue; - } - (void) printf(" %llx : [%d:%d:%d]\n", - (u_longlong_t)attr.za_first_integer, - (int)ATTR_LENGTH(attr.za_first_integer), - (int)ATTR_BSWAP(attr.za_first_integer), - (int)ATTR_NUM(attr.za_first_integer)); - } - zap_cursor_fini(&zc); -} - -/*ARGSUSED*/ -static void -dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) -{ - zap_cursor_t zc; - zap_attribute_t attr; - uint16_t *layout_attrs; - unsigned i; - - dump_zap_stats(os, object); - (void) printf("\n"); - - for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; - zap_cursor_advance(&zc)) { - (void) printf("\t\t%s = [", attr.za_name); - if (attr.za_num_integers == 0) { - (void) printf("\n"); - continue; - } - - VERIFY(attr.za_integer_length == 2); - layout_attrs = umem_zalloc(attr.za_num_integers * - attr.za_integer_length, UMEM_NOFAIL); - - VERIFY(zap_lookup(os, object, attr.za_name, - attr.za_integer_length, - attr.za_num_integers, layout_attrs) == 0); - - for (i = 0; i != attr.za_num_integers; i++) - (void) printf(" %d ", (int)layout_attrs[i]); - (void) printf("]\n"); - umem_free(layout_attrs, - attr.za_num_integers * attr.za_integer_length); - } - zap_cursor_fini(&zc); -} - -/*ARGSUSED*/ -static void -dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) -{ - zap_cursor_t zc; - zap_attribute_t attr; - const char *typenames[] = { - /* 0 */ "not specified", - /* 1 */ "FIFO", - /* 2 */ "Character Device", - /* 3 */ "3 (invalid)", - /* 4 */ "Directory", - /* 5 */ "5 (invalid)", - /* 6 */ "Block Device", - /* 7 */ "7 (invalid)", - /* 8 */ "Regular File", - /* 9 */ "9 (invalid)", - /* 10 */ "Symbolic Link", - /* 11 */ "11 (invalid)", - /* 12 */ "Socket", - /* 13 */ "Door", - /* 14 */ "Event Port", - /* 15 */ "15 (invalid)", - }; - - dump_zap_stats(os, object); - (void) printf("\n"); - - for (zap_cursor_init(&zc, os, object); - zap_cursor_retrieve(&zc, &attr) == 0; - zap_cursor_advance(&zc)) { - (void) printf("\t\t%s = %lld (type: %s)\n", - attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer), - typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]); - } - zap_cursor_fini(&zc); -} - -static int -get_dtl_refcount(vdev_t *vd) -{ - int refcount = 0; - - if (vd->vdev_ops->vdev_op_leaf) { - space_map_t *sm = vd->vdev_dtl_sm; - - if (sm != NULL && - sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) - return (1); - return (0); - } - - for (unsigned c = 0; c < vd->vdev_children; c++) - refcount += get_dtl_refcount(vd->vdev_child[c]); - return (refcount); -} - -static int -get_metaslab_refcount(vdev_t *vd) -{ - int refcount = 0; - - if (vd->vdev_top == vd) { - for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { - space_map_t *sm = vd->vdev_ms[m]->ms_sm; - - if (sm != NULL && - sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) - refcount++; - } - } - for (unsigned c = 0; c < vd->vdev_children; c++) - refcount += get_metaslab_refcount(vd->vdev_child[c]); - - return (refcount); -} - -static int -get_obsolete_refcount(vdev_t *vd) -{ - int refcount = 0; - - uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd); - if (vd->vdev_top == vd && obsolete_sm_obj != 0) { - dmu_object_info_t doi; - VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset, - obsolete_sm_obj, &doi)); - if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { - refcount++; - } - } else { - ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); - ASSERT3U(obsolete_sm_obj, ==, 0); - } - for (unsigned c = 0; c < vd->vdev_children; c++) { - refcount += get_obsolete_refcount(vd->vdev_child[c]); - } - - return (refcount); -} - -static int -get_prev_obsolete_spacemap_refcount(spa_t *spa) -{ - uint64_t prev_obj = - spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object; - if (prev_obj != 0) { - dmu_object_info_t doi; - VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi)); - if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { - return (1); - } - } - return (0); -} - -static int -get_checkpoint_refcount(vdev_t *vd) -{ - int refcount = 0; - - if (vd->vdev_top == vd && vd->vdev_top_zap != 0 && - zap_contains(spa_meta_objset(vd->vdev_spa), - vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0) - refcount++; - - for (uint64_t c = 0; c < vd->vdev_children; c++) - refcount += get_checkpoint_refcount(vd->vdev_child[c]); - - return (refcount); -} - -static int -verify_spacemap_refcounts(spa_t *spa) -{ - uint64_t expected_refcount = 0; - uint64_t actual_refcount; - - (void) feature_get_refcount(spa, - &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], - &expected_refcount); - actual_refcount = get_dtl_refcount(spa->spa_root_vdev); - actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); - actual_refcount += get_obsolete_refcount(spa->spa_root_vdev); - actual_refcount += get_prev_obsolete_spacemap_refcount(spa); - actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev); - - if (expected_refcount != actual_refcount) { - (void) printf("space map refcount mismatch: expected %lld != " - "actual %lld\n", - (longlong_t)expected_refcount, - (longlong_t)actual_refcount); - return (2); - } - return (0); -} - -static void -dump_spacemap(objset_t *os, space_map_t *sm) -{ - char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", - "INVALID", "INVALID", "INVALID", "INVALID" }; - - if (sm == NULL) - return; - - (void) printf("space map object %llu:\n", - (longlong_t)sm->sm_object); - (void) printf(" smp_length = 0x%llx\n", - (longlong_t)sm->sm_phys->smp_length); - (void) printf(" smp_alloc = 0x%llx\n", - (longlong_t)sm->sm_phys->smp_alloc); - - if (dump_opt['d'] < 6 && dump_opt['m'] < 4) - return; - - /* - * Print out the freelist entries in both encoded and decoded form. - */ - uint8_t mapshift = sm->sm_shift; - int64_t alloc = 0; - uint64_t word, entry_id = 0; - for (uint64_t offset = 0; offset < space_map_length(sm); - offset += sizeof (word)) { - - VERIFY0(dmu_read(os, space_map_object(sm), offset, - sizeof (word), &word, DMU_READ_PREFETCH)); - - if (sm_entry_is_debug(word)) { - (void) printf("\t [%6llu] %s: txg %llu pass %llu\n", - (u_longlong_t)entry_id, - ddata[SM_DEBUG_ACTION_DECODE(word)], - (u_longlong_t)SM_DEBUG_TXG_DECODE(word), - (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); - entry_id++; - continue; - } - - uint8_t words; - char entry_type; - uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; - - if (sm_entry_is_single_word(word)) { - entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? - 'A' : 'F'; - entry_off = (SM_OFFSET_DECODE(word) << mapshift) + - sm->sm_start; - entry_run = SM_RUN_DECODE(word) << mapshift; - words = 1; - } else { - /* it is a two-word entry so we read another word */ - ASSERT(sm_entry_is_double_word(word)); - - uint64_t extra_word; - offset += sizeof (extra_word); - VERIFY0(dmu_read(os, space_map_object(sm), offset, - sizeof (extra_word), &extra_word, - DMU_READ_PREFETCH)); - - ASSERT3U(offset, <=, space_map_length(sm)); - - entry_run = SM2_RUN_DECODE(word) << mapshift; - entry_vdev = SM2_VDEV_DECODE(word); - entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? - 'A' : 'F'; - entry_off = (SM2_OFFSET_DECODE(extra_word) << - mapshift) + sm->sm_start; - words = 2; - } - - (void) printf("\t [%6llu] %c range:" - " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", - (u_longlong_t)entry_id, - entry_type, (u_longlong_t)entry_off, - (u_longlong_t)(entry_off + entry_run), - (u_longlong_t)entry_run, - (u_longlong_t)entry_vdev, words); - - if (entry_type == 'A') - alloc += entry_run; - else - alloc -= entry_run; - entry_id++; - } - if (alloc != space_map_allocated(sm)) { - (void) printf("space_map_object alloc (%lld) INCONSISTENT " - "with space map summary (%lld)\n", - (longlong_t)space_map_allocated(sm), (longlong_t)alloc); - } -} - -static void -dump_metaslab_stats(metaslab_t *msp) -{ - char maxbuf[32]; - range_tree_t *rt = msp->ms_allocatable; - avl_tree_t *t = &msp->ms_allocatable_by_size; - int free_pct = range_tree_space(rt) * 100 / msp->ms_size; - - /* max sure nicenum has enough space */ - CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ); - - zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf)); - - (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", - "segments", avl_numnodes(t), "maxsize", maxbuf, - "freepct", free_pct); - (void) printf("\tIn-memory histogram:\n"); - dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); -} - -static void -dump_metaslab(metaslab_t *msp) -{ - vdev_t *vd = msp->ms_group->mg_vd; - spa_t *spa = vd->vdev_spa; - space_map_t *sm = msp->ms_sm; - char freebuf[32]; - - zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf, - sizeof (freebuf)); - - (void) printf( - "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", - (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, - (u_longlong_t)space_map_object(sm), freebuf); - - if (dump_opt['m'] > 2 && !dump_opt['L']) { - mutex_enter(&msp->ms_lock); - VERIFY0(metaslab_load(msp)); - range_tree_stat_verify(msp->ms_allocatable); - dump_metaslab_stats(msp); - metaslab_unload(msp); - mutex_exit(&msp->ms_lock); - } - - if (dump_opt['m'] > 1 && sm != NULL && - spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { - /* - * The space map histogram represents free space in chunks - * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). - */ - (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", - (u_longlong_t)msp->ms_fragmentation); - dump_histogram(sm->sm_phys->smp_histogram, - SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); - } - - ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift)); - dump_spacemap(spa->spa_meta_objset, msp->ms_sm); -} - -static void -print_vdev_metaslab_header(vdev_t *vd) -{ - vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias; - const char *bias_str; - - bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ? - VDEV_ALLOC_BIAS_LOG : - (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL : - (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP : - vd->vdev_islog ? "log" : ""; - - (void) printf("\tvdev %10llu %s\n" - "\t%-10s%5llu %-19s %-15s %-12s\n", - (u_longlong_t)vd->vdev_id, bias_str, - "metaslabs", (u_longlong_t)vd->vdev_ms_count, - "offset", "spacemap", "free"); - (void) printf("\t%15s %19s %15s %12s\n", - "---------------", "-------------------", - "---------------", "------------"); -} - -static void -dump_metaslab_groups(spa_t *spa) -{ - vdev_t *rvd = spa->spa_root_vdev; - metaslab_class_t *mc = spa_normal_class(spa); - uint64_t fragmentation; - - metaslab_class_histogram_verify(mc); - - for (unsigned c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - metaslab_group_t *mg = tvd->vdev_mg; - - if (mg == NULL || mg->mg_class != mc) - continue; - - metaslab_group_histogram_verify(mg); - mg->mg_fragmentation = metaslab_group_fragmentation(mg); - - (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" - "fragmentation", - (u_longlong_t)tvd->vdev_id, - (u_longlong_t)tvd->vdev_ms_count); - if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { - (void) printf("%3s\n", "-"); - } else { - (void) printf("%3llu%%\n", - (u_longlong_t)mg->mg_fragmentation); - } - dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); - } - - (void) printf("\tpool %s\tfragmentation", spa_name(spa)); - fragmentation = metaslab_class_fragmentation(mc); - if (fragmentation == ZFS_FRAG_INVALID) - (void) printf("\t%3s\n", "-"); - else - (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); - dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); -} - -static void -print_vdev_indirect(vdev_t *vd) -{ - vdev_indirect_config_t *vic = &vd->vdev_indirect_config; - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - vdev_indirect_births_t *vib = vd->vdev_indirect_births; - - if (vim == NULL) { - ASSERT3P(vib, ==, NULL); - return; - } - - ASSERT3U(vdev_indirect_mapping_object(vim), ==, - vic->vic_mapping_object); - ASSERT3U(vdev_indirect_births_object(vib), ==, - vic->vic_births_object); - - (void) printf("indirect births obj %llu:\n", - (longlong_t)vic->vic_births_object); - (void) printf(" vib_count = %llu\n", - (longlong_t)vdev_indirect_births_count(vib)); - for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) { - vdev_indirect_birth_entry_phys_t *cur_vibe = - &vib->vib_entries[i]; - (void) printf("\toffset %llx -> txg %llu\n", - (longlong_t)cur_vibe->vibe_offset, - (longlong_t)cur_vibe->vibe_phys_birth_txg); - } - (void) printf("\n"); - - (void) printf("indirect mapping obj %llu:\n", - (longlong_t)vic->vic_mapping_object); - (void) printf(" vim_max_offset = 0x%llx\n", - (longlong_t)vdev_indirect_mapping_max_offset(vim)); - (void) printf(" vim_bytes_mapped = 0x%llx\n", - (longlong_t)vdev_indirect_mapping_bytes_mapped(vim)); - (void) printf(" vim_count = %llu\n", - (longlong_t)vdev_indirect_mapping_num_entries(vim)); - - if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3) - return; - - uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim); - - for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { - vdev_indirect_mapping_entry_phys_t *vimep = - &vim->vim_entries[i]; - (void) printf("\t<%llx:%llx:%llx> -> " - "<%llx:%llx:%llx> (%x obsolete)\n", - (longlong_t)vd->vdev_id, - (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), - (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), - (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst), - (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst), - (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), - counts[i]); - } - (void) printf("\n"); - - uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd); - if (obsolete_sm_object != 0) { - objset_t *mos = vd->vdev_spa->spa_meta_objset; - (void) printf("obsolete space map object %llu:\n", - (u_longlong_t)obsolete_sm_object); - ASSERT(vd->vdev_obsolete_sm != NULL); - ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==, - obsolete_sm_object); - dump_spacemap(mos, vd->vdev_obsolete_sm); - (void) printf("\n"); - } -} - -static void -dump_metaslabs(spa_t *spa) -{ - vdev_t *vd, *rvd = spa->spa_root_vdev; - uint64_t m, c = 0, children = rvd->vdev_children; - - (void) printf("\nMetaslabs:\n"); - - if (!dump_opt['d'] && zopt_objects > 0) { - c = zopt_object[0]; - - if (c >= children) - (void) fatal("bad vdev id: %llu", (u_longlong_t)c); - - if (zopt_objects > 1) { - vd = rvd->vdev_child[c]; - print_vdev_metaslab_header(vd); - - for (m = 1; m < zopt_objects; m++) { - if (zopt_object[m] < vd->vdev_ms_count) - dump_metaslab( - vd->vdev_ms[zopt_object[m]]); - else - (void) fprintf(stderr, "bad metaslab " - "number %llu\n", - (u_longlong_t)zopt_object[m]); - } - (void) printf("\n"); - return; - } - children = c + 1; - } - for (; c < children; c++) { - vd = rvd->vdev_child[c]; - print_vdev_metaslab_header(vd); - - print_vdev_indirect(vd); - - for (m = 0; m < vd->vdev_ms_count; m++) - dump_metaslab(vd->vdev_ms[m]); - (void) printf("\n"); - } -} - -static void -dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) -{ - const ddt_phys_t *ddp = dde->dde_phys; - const ddt_key_t *ddk = &dde->dde_key; - const char *types[4] = { "ditto", "single", "double", "triple" }; - char blkbuf[BP_SPRINTF_LEN]; - blkptr_t blk; - - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { - if (ddp->ddp_phys_birth == 0) - continue; - ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); - snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); - (void) printf("index %llx refcnt %llu %s %s\n", - (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, - types[p], blkbuf); - } -} - -static void -dump_dedup_ratio(const ddt_stat_t *dds) -{ - double rL, rP, rD, D, dedup, compress, copies; - - if (dds->dds_blocks == 0) - return; - - rL = (double)dds->dds_ref_lsize; - rP = (double)dds->dds_ref_psize; - rD = (double)dds->dds_ref_dsize; - D = (double)dds->dds_dsize; - - dedup = rD / D; - compress = rL / rP; - copies = rD / rP; - - (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, " - "dedup * compress / copies = %.2f\n\n", - dedup, compress, copies, dedup * compress / copies); -} - -static void -dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) -{ - char name[DDT_NAMELEN]; - ddt_entry_t dde; - uint64_t walk = 0; - dmu_object_info_t doi; - uint64_t count, dspace, mspace; - int error; - - error = ddt_object_info(ddt, type, class, &doi); - - if (error == ENOENT) - return; - ASSERT(error == 0); - - error = ddt_object_count(ddt, type, class, &count); - ASSERT(error == 0); - if (count == 0) - return; - - dspace = doi.doi_physical_blocks_512 << 9; - mspace = doi.doi_fill_count * doi.doi_data_block_size; - - ddt_object_name(ddt, type, class, name); - - (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", - name, - (u_longlong_t)count, - (u_longlong_t)(dspace / count), - (u_longlong_t)(mspace / count)); - - if (dump_opt['D'] < 3) - return; - - zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); - - if (dump_opt['D'] < 4) - return; - - if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE) - return; - - (void) printf("%s contents:\n\n", name); - - while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) - dump_dde(ddt, &dde, walk); - - ASSERT3U(error, ==, ENOENT); - - (void) printf("\n"); -} - -static void -dump_all_ddts(spa_t *spa) -{ - ddt_histogram_t ddh_total; - ddt_stat_t dds_total; - - bzero(&ddh_total, sizeof (ddh_total)); - bzero(&dds_total, sizeof (dds_total)); - - for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { - ddt_t *ddt = spa->spa_ddt[c]; - for (enum ddt_type type = 0; type < DDT_TYPES; type++) { - for (enum ddt_class class = 0; class < DDT_CLASSES; - class++) { - dump_ddt(ddt, type, class); - } - } - } - - ddt_get_dedup_stats(spa, &dds_total); - - if (dds_total.dds_blocks == 0) { - (void) printf("All DDTs are empty\n"); - return; - } - - (void) printf("\n"); - - if (dump_opt['D'] > 1) { - (void) printf("DDT histogram (aggregated over all DDTs):\n"); - ddt_get_dedup_histogram(spa, &ddh_total); - zpool_dump_ddt(&dds_total, &ddh_total); - } - - dump_dedup_ratio(&dds_total); -} - -static void -dump_dtl_seg(void *arg, uint64_t start, uint64_t size) -{ - char *prefix = arg; - - (void) printf("%s [%llu,%llu) length %llu\n", - prefix, - (u_longlong_t)start, - (u_longlong_t)(start + size), - (u_longlong_t)(size)); -} - -static void -dump_dtl(vdev_t *vd, int indent) -{ - spa_t *spa = vd->vdev_spa; - boolean_t required; - const char *name[DTL_TYPES] = { "missing", "partial", "scrub", - "outage" }; - char prefix[256]; - - spa_vdev_state_enter(spa, SCL_NONE); - required = vdev_dtl_required(vd); - (void) spa_vdev_state_exit(spa, NULL, 0); - - if (indent == 0) - (void) printf("\nDirty time logs:\n\n"); - - (void) printf("\t%*s%s [%s]\n", indent, "", - vd->vdev_path ? vd->vdev_path : - vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), - required ? "DTL-required" : "DTL-expendable"); - - for (int t = 0; t < DTL_TYPES; t++) { - range_tree_t *rt = vd->vdev_dtl[t]; - if (range_tree_space(rt) == 0) - continue; - (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", - indent + 2, "", name[t]); - range_tree_walk(rt, dump_dtl_seg, prefix); - if (dump_opt['d'] > 5 && vd->vdev_children == 0) - dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm); - } - - for (unsigned c = 0; c < vd->vdev_children; c++) - dump_dtl(vd->vdev_child[c], indent + 4); -} - -/* from spa_history.c: spa_history_create_obj() */ -#define HIS_BUF_LEN_DEF (128 << 10) -#define HIS_BUF_LEN_MAX (1 << 30) - -static void -dump_history(spa_t *spa) -{ - nvlist_t **events = NULL; - char *buf = NULL; - uint64_t bufsize = HIS_BUF_LEN_DEF; - uint64_t resid, len, off = 0; - uint_t num = 0; - int error; - time_t tsec; - struct tm t; - char tbuf[30]; - char internalstr[MAXPATHLEN]; - - if ((buf = malloc(bufsize)) == NULL) - (void) fprintf(stderr, "Unable to read history: " - "out of memory\n"); - do { - len = bufsize; - - if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { - (void) fprintf(stderr, "Unable to read history: " - "error %d\n", error); - return; - } - - if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) - break; - off -= resid; - - /* - * If the history block is too big, double the buffer - * size and try again. - */ - if (resid == len) { - free(buf); - buf = NULL; - - bufsize <<= 1; - if ((bufsize >= HIS_BUF_LEN_MAX) || - ((buf = malloc(bufsize)) == NULL)) { - (void) fprintf(stderr, "Unable to read history: " - "out of memory\n"); - return; - } - } - } while (len != 0); - free(buf); - - (void) printf("\nHistory:\n"); - for (unsigned i = 0; i < num; i++) { - uint64_t time, txg, ievent; - char *cmd, *intstr; - boolean_t printed = B_FALSE; - - if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME, - &time) != 0) - goto next; - if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD, - &cmd) != 0) { - if (nvlist_lookup_uint64(events[i], - ZPOOL_HIST_INT_EVENT, &ievent) != 0) - goto next; - verify(nvlist_lookup_uint64(events[i], - ZPOOL_HIST_TXG, &txg) == 0); - verify(nvlist_lookup_string(events[i], - ZPOOL_HIST_INT_STR, &intstr) == 0); - if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) - goto next; - - (void) snprintf(internalstr, - sizeof (internalstr), - "[internal %s txg:%ju] %s", - zfs_history_event_names[ievent], (uintmax_t)txg, - intstr); - cmd = internalstr; - } - tsec = time; - (void) localtime_r(&tsec, &t); - (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); - (void) printf("%s %s\n", tbuf, cmd); - printed = B_TRUE; - -next: - if (dump_opt['h'] > 1) { - if (!printed) - (void) printf("unrecognized record:\n"); - dump_nvlist(events[i], 2); - } - } -} - -/*ARGSUSED*/ -static void -dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) -{ -} - -static uint64_t -blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, - const zbookmark_phys_t *zb) -{ - if (dnp == NULL) { - ASSERT(zb->zb_level < 0); - if (zb->zb_object == 0) - return (zb->zb_blkid); - return (zb->zb_blkid * BP_GET_LSIZE(bp)); - } - - ASSERT(zb->zb_level >= 0); - - return ((zb->zb_blkid << - (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * - dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); -} - -static void -snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) -{ - const dva_t *dva = bp->blk_dva; - int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; - - if (dump_opt['b'] >= 6) { - snprintf_blkptr(blkbuf, buflen, bp); - return; - } - - if (BP_IS_EMBEDDED(bp)) { - (void) sprintf(blkbuf, - "EMBEDDED et=%u %llxL/%llxP B=%llu", - (int)BPE_GET_ETYPE(bp), - (u_longlong_t)BPE_GET_LSIZE(bp), - (u_longlong_t)BPE_GET_PSIZE(bp), - (u_longlong_t)bp->blk_birth); - return; - } - - blkbuf[0] = '\0'; - for (int i = 0; i < ndvas; i++) - (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), "%llu:%llx:%llx ", - (u_longlong_t)DVA_GET_VDEV(&dva[i]), - (u_longlong_t)DVA_GET_OFFSET(&dva[i]), - (u_longlong_t)DVA_GET_ASIZE(&dva[i])); - - if (BP_IS_HOLE(bp)) { - (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), - "%llxL B=%llu", - (u_longlong_t)BP_GET_LSIZE(bp), - (u_longlong_t)bp->blk_birth); - } else { - (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), - "%llxL/%llxP F=%llu B=%llu/%llu", - (u_longlong_t)BP_GET_LSIZE(bp), - (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)BP_GET_FILL(bp), - (u_longlong_t)bp->blk_birth, - (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); - } -} - -static void -print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb, - const dnode_phys_t *dnp) -{ - char blkbuf[BP_SPRINTF_LEN]; - int l; - - if (!BP_IS_EMBEDDED(bp)) { - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - } - - (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); - - ASSERT(zb->zb_level >= 0); - - for (l = dnp->dn_nlevels - 1; l >= -1; l--) { - if (l == zb->zb_level) { - (void) printf("L%llx", (u_longlong_t)zb->zb_level); - } else { - (void) printf(" "); - } - } - - snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); - (void) printf("%s\n", blkbuf); -} - -static int -visit_indirect(spa_t *spa, const dnode_phys_t *dnp, - blkptr_t *bp, const zbookmark_phys_t *zb) -{ - int err = 0; - - if (bp->blk_birth == 0) - return (0); - - print_indirect(bp, zb, dnp); - - if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { - arc_flags_t flags = ARC_FLAG_WAIT; - int i; - blkptr_t *cbp; - int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; - arc_buf_t *buf; - uint64_t fill = 0; - - err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) - return (err); - ASSERT(buf->b_data); - - /* recursively visit blocks below this */ - cbp = buf->b_data; - for (i = 0; i < epb; i++, cbp++) { - zbookmark_phys_t czb; - - SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, - zb->zb_level - 1, - zb->zb_blkid * epb + i); - err = visit_indirect(spa, dnp, cbp, &czb); - if (err) - break; - fill += BP_GET_FILL(cbp); - } - if (!err) - ASSERT3U(fill, ==, BP_GET_FILL(bp)); - arc_buf_destroy(buf, &buf); - } - - return (err); -} - -/*ARGSUSED*/ -static void -dump_indirect(dnode_t *dn) -{ - dnode_phys_t *dnp = dn->dn_phys; - int j; - zbookmark_phys_t czb; - - (void) printf("Indirect blocks:\n"); - - SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), - dn->dn_object, dnp->dn_nlevels - 1, 0); - for (j = 0; j < dnp->dn_nblkptr; j++) { - czb.zb_blkid = j; - (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, - &dnp->dn_blkptr[j], &czb); - } - - (void) printf("\n"); -} - -/*ARGSUSED*/ -static void -dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) -{ - dsl_dir_phys_t *dd = data; - time_t crtime; - char nice[32]; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ); - - if (dd == NULL) - return; - - ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); - - crtime = dd->dd_creation_time; - (void) printf("\t\tcreation_time = %s", ctime(&crtime)); - (void) printf("\t\thead_dataset_obj = %llu\n", - (u_longlong_t)dd->dd_head_dataset_obj); - (void) printf("\t\tparent_dir_obj = %llu\n", - (u_longlong_t)dd->dd_parent_obj); - (void) printf("\t\torigin_obj = %llu\n", - (u_longlong_t)dd->dd_origin_obj); - (void) printf("\t\tchild_dir_zapobj = %llu\n", - (u_longlong_t)dd->dd_child_dir_zapobj); - zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice)); - (void) printf("\t\tused_bytes = %s\n", nice); - zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice)); - (void) printf("\t\tcompressed_bytes = %s\n", nice); - zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice)); - (void) printf("\t\tuncompressed_bytes = %s\n", nice); - zdb_nicenum(dd->dd_quota, nice, sizeof (nice)); - (void) printf("\t\tquota = %s\n", nice); - zdb_nicenum(dd->dd_reserved, nice, sizeof (nice)); - (void) printf("\t\treserved = %s\n", nice); - (void) printf("\t\tprops_zapobj = %llu\n", - (u_longlong_t)dd->dd_props_zapobj); - (void) printf("\t\tdeleg_zapobj = %llu\n", - (u_longlong_t)dd->dd_deleg_zapobj); - (void) printf("\t\tflags = %llx\n", - (u_longlong_t)dd->dd_flags); - -#define DO(which) \ - zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \ - sizeof (nice)); \ - (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) - DO(HEAD); - DO(SNAP); - DO(CHILD); - DO(CHILD_RSRV); - DO(REFRSRV); -#undef DO - (void) printf("\t\tclones = %llu\n", - (u_longlong_t)dd->dd_clones); -} - -/*ARGSUSED*/ -static void -dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) -{ - dsl_dataset_phys_t *ds = data; - time_t crtime; - char used[32], compressed[32], uncompressed[32], unique[32]; - char blkbuf[BP_SPRINTF_LEN]; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (used) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ); - - if (ds == NULL) - return; - - ASSERT(size == sizeof (*ds)); - crtime = ds->ds_creation_time; - zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used)); - zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed)); - zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed, - sizeof (uncompressed)); - zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique)); - snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); - - (void) printf("\t\tdir_obj = %llu\n", - (u_longlong_t)ds->ds_dir_obj); - (void) printf("\t\tprev_snap_obj = %llu\n", - (u_longlong_t)ds->ds_prev_snap_obj); - (void) printf("\t\tprev_snap_txg = %llu\n", - (u_longlong_t)ds->ds_prev_snap_txg); - (void) printf("\t\tnext_snap_obj = %llu\n", - (u_longlong_t)ds->ds_next_snap_obj); - (void) printf("\t\tsnapnames_zapobj = %llu\n", - (u_longlong_t)ds->ds_snapnames_zapobj); - (void) printf("\t\tnum_children = %llu\n", - (u_longlong_t)ds->ds_num_children); - (void) printf("\t\tuserrefs_obj = %llu\n", - (u_longlong_t)ds->ds_userrefs_obj); - (void) printf("\t\tcreation_time = %s", ctime(&crtime)); - (void) printf("\t\tcreation_txg = %llu\n", - (u_longlong_t)ds->ds_creation_txg); - (void) printf("\t\tdeadlist_obj = %llu\n", - (u_longlong_t)ds->ds_deadlist_obj); - (void) printf("\t\tused_bytes = %s\n", used); - (void) printf("\t\tcompressed_bytes = %s\n", compressed); - (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); - (void) printf("\t\tunique = %s\n", unique); - (void) printf("\t\tfsid_guid = %llu\n", - (u_longlong_t)ds->ds_fsid_guid); - (void) printf("\t\tguid = %llu\n", - (u_longlong_t)ds->ds_guid); - (void) printf("\t\tflags = %llx\n", - (u_longlong_t)ds->ds_flags); - (void) printf("\t\tnext_clones_obj = %llu\n", - (u_longlong_t)ds->ds_next_clones_obj); - (void) printf("\t\tprops_obj = %llu\n", - (u_longlong_t)ds->ds_props_obj); - (void) printf("\t\tbp = %s\n", blkbuf); -} - -/* ARGSUSED */ -static int -dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - char blkbuf[BP_SPRINTF_LEN]; - - if (bp->blk_birth != 0) { - snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); - (void) printf("\t%s\n", blkbuf); - } - return (0); -} - -static void -dump_bptree(objset_t *os, uint64_t obj, const char *name) -{ - char bytes[32]; - bptree_phys_t *bt; - dmu_buf_t *db; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); - - if (dump_opt['d'] < 3) - return; - - VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); - bt = db->db_data; - zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes)); - (void) printf("\n %s: %llu datasets, %s\n", - name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); - dmu_buf_rele(db, FTAG); - - if (dump_opt['d'] < 5) - return; - - (void) printf("\n"); - - (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); -} - -/* ARGSUSED */ -static int -dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - char blkbuf[BP_SPRINTF_LEN]; - - ASSERT(bp->blk_birth != 0); - snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); - (void) printf("\t%s\n", blkbuf); - return (0); -} - -static void -dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) -{ - char bytes[32]; - char comp[32]; - char uncomp[32]; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); - - if (dump_opt['d'] < 3) - return; - - zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes)); - if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { - zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp)); - zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp)); - (void) printf(" %*s: object %llu, %llu local blkptrs, " - "%llu subobjs in object %llu, %s (%s/%s comp)\n", - indent * 8, name, - (u_longlong_t)bpo->bpo_object, - (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, - (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, - (u_longlong_t)bpo->bpo_phys->bpo_subobjs, - bytes, comp, uncomp); - - for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { - uint64_t subobj; - bpobj_t subbpo; - int error; - VERIFY0(dmu_read(bpo->bpo_os, - bpo->bpo_phys->bpo_subobjs, - i * sizeof (subobj), sizeof (subobj), &subobj, 0)); - error = bpobj_open(&subbpo, bpo->bpo_os, subobj); - if (error != 0) { - (void) printf("ERROR %u while trying to open " - "subobj id %llu\n", - error, (u_longlong_t)subobj); - continue; - } - dump_full_bpobj(&subbpo, "subobj", indent + 1); - bpobj_close(&subbpo); - } - } else { - (void) printf(" %*s: object %llu, %llu blkptrs, %s\n", - indent * 8, name, - (u_longlong_t)bpo->bpo_object, - (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, - bytes); - } - - if (dump_opt['d'] < 5) - return; - - - if (indent == 0) { - (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); - (void) printf("\n"); - } -} - -static void -bpobj_count_refd(bpobj_t *bpo) -{ - mos_obj_refd(bpo->bpo_object); - - if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { - mos_obj_refd(bpo->bpo_phys->bpo_subobjs); - for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { - uint64_t subobj; - bpobj_t subbpo; - int error; - VERIFY0(dmu_read(bpo->bpo_os, - bpo->bpo_phys->bpo_subobjs, - i * sizeof (subobj), sizeof (subobj), &subobj, 0)); - error = bpobj_open(&subbpo, bpo->bpo_os, subobj); - if (error != 0) { - (void) printf("ERROR %u while trying to open " - "subobj id %llu\n", - error, (u_longlong_t)subobj); - continue; - } - bpobj_count_refd(&subbpo); - bpobj_close(&subbpo); - } - } -} - -static void -dump_deadlist(dsl_deadlist_t *dl) -{ - dsl_deadlist_entry_t *dle; - uint64_t unused; - char bytes[32]; - char comp[32]; - char uncomp[32]; - uint64_t empty_bpobj = - dmu_objset_spa(dl->dl_os)->spa_dsl_pool->dp_empty_bpobj; - - /* force the tree to be loaded */ - dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused); - - if (dl->dl_oldfmt) { - if (dl->dl_bpobj.bpo_object != empty_bpobj) - bpobj_count_refd(&dl->dl_bpobj); - } else { - mos_obj_refd(dl->dl_object); - for (dle = avl_first(&dl->dl_tree); dle; - dle = AVL_NEXT(&dl->dl_tree, dle)) { - if (dle->dle_bpobj.bpo_object != empty_bpobj) - bpobj_count_refd(&dle->dle_bpobj); - } - } - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); - - if (dump_opt['d'] < 3) - return; - - if (dl->dl_oldfmt) { - dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); - return; - } - - zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes)); - zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp)); - zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp)); - (void) printf("\n Deadlist: %s (%s/%s comp)\n", - bytes, comp, uncomp); - - if (dump_opt['d'] < 4) - return; - - (void) printf("\n"); - - for (dle = avl_first(&dl->dl_tree); dle; - dle = AVL_NEXT(&dl->dl_tree, dle)) { - if (dump_opt['d'] >= 5) { - char buf[128]; - (void) snprintf(buf, sizeof (buf), - "mintxg %llu -> obj %llu", - (longlong_t)dle->dle_mintxg, - (longlong_t)dle->dle_bpobj.bpo_object); - dump_full_bpobj(&dle->dle_bpobj, buf, 0); - } else { - (void) printf("mintxg %llu -> obj %llu\n", - (longlong_t)dle->dle_mintxg, - (longlong_t)dle->dle_bpobj.bpo_object); - } - } -} - -static avl_tree_t idx_tree; -static avl_tree_t domain_tree; -static boolean_t fuid_table_loaded; -static objset_t *sa_os = NULL; -static sa_attr_type_t *sa_attr_table = NULL; - -static int -open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) -{ - int err; - uint64_t sa_attrs = 0; - uint64_t version = 0; - - VERIFY3P(sa_os, ==, NULL); - err = dmu_objset_own(path, type, B_TRUE, tag, osp); - if (err != 0) { - (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, - strerror(err)); - return (err); - } - - if (dmu_objset_type(*osp) == DMU_OST_ZFS) { - (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, - 8, 1, &version); - if (version >= ZPL_VERSION_SA) { - (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, - 8, 1, &sa_attrs); - } - err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END, - &sa_attr_table); - if (err != 0) { - (void) fprintf(stderr, "sa_setup failed: %s\n", - strerror(err)); - dmu_objset_disown(*osp, tag); - *osp = NULL; - } - } - sa_os = *osp; - - return (0); -} - -static void -close_objset(objset_t *os, void *tag) -{ - VERIFY3P(os, ==, sa_os); - if (os->os_sa != NULL) - sa_tear_down(os); - dmu_objset_disown(os, tag); - sa_attr_table = NULL; - sa_os = NULL; -} - -static void -fuid_table_destroy() -{ - if (fuid_table_loaded) { - zfs_fuid_table_destroy(&idx_tree, &domain_tree); - fuid_table_loaded = B_FALSE; - } -} - -/* - * print uid or gid information. - * For normal POSIX id just the id is printed in decimal format. - * For CIFS files with FUID the fuid is printed in hex followed by - * the domain-rid string. - */ -static void -print_idstr(uint64_t id, const char *id_type) -{ - if (FUID_INDEX(id)) { - char *domain; - - domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id)); - (void) printf("\t%s %llx [%s-%d]\n", id_type, - (u_longlong_t)id, domain, (int)FUID_RID(id)); - } else { - (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id); - } - -} - -static void -dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) -{ - uint32_t uid_idx, gid_idx; - - uid_idx = FUID_INDEX(uid); - gid_idx = FUID_INDEX(gid); - - /* Load domain table, if not already loaded */ - if (!fuid_table_loaded && (uid_idx || gid_idx)) { - uint64_t fuid_obj; - - /* first find the fuid object. It lives in the master node */ - VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, - 8, 1, &fuid_obj) == 0); - zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); - (void) zfs_fuid_table_load(os, fuid_obj, - &idx_tree, &domain_tree); - fuid_table_loaded = B_TRUE; - } - - print_idstr(uid, "uid"); - print_idstr(gid, "gid"); -} - -/*ARGSUSED*/ -static void -dump_znode(objset_t *os, uint64_t object, void *data, size_t size) -{ - char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ - sa_handle_t *hdl; - uint64_t xattr, rdev, gen; - uint64_t uid, gid, mode, fsize, parent, links; - uint64_t pflags; - uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; - time_t z_crtime, z_atime, z_mtime, z_ctime; - sa_bulk_attr_t bulk[12]; - int idx = 0; - int error; - - VERIFY3P(os, ==, sa_os); - if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { - (void) printf("Failed to get handle for SA znode\n"); - return; - } - - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, - &links, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, - &mode, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], - NULL, &parent, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, - &fsize, 8); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, - acctm, 16); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, - modtm, 16); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, - crtm, 16); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, - chgtm, 16); - SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, - &pflags, 8); - - if (sa_bulk_lookup(hdl, bulk, idx)) { - (void) sa_handle_destroy(hdl); - return; - } - - z_crtime = (time_t)crtm[0]; - z_atime = (time_t)acctm[0]; - z_mtime = (time_t)modtm[0]; - z_ctime = (time_t)chgtm[0]; - - if (dump_opt['d'] > 4) { - error = zfs_obj_to_path(os, object, path, sizeof (path)); - if (error == ESTALE) { - (void) snprintf(path, sizeof (path), "on delete queue"); - } else if (error != 0) { - leaked_objects++; - (void) snprintf(path, sizeof (path), - "path not found, possibly leaked"); - } - (void) printf("\tpath %s\n", path); - } - dump_uidgid(os, uid, gid); - (void) printf("\tatime %s", ctime(&z_atime)); - (void) printf("\tmtime %s", ctime(&z_mtime)); - (void) printf("\tctime %s", ctime(&z_ctime)); - (void) printf("\tcrtime %s", ctime(&z_crtime)); - (void) printf("\tgen %llu\n", (u_longlong_t)gen); - (void) printf("\tmode %llo\n", (u_longlong_t)mode); - (void) printf("\tsize %llu\n", (u_longlong_t)fsize); - (void) printf("\tparent %llu\n", (u_longlong_t)parent); - (void) printf("\tlinks %llu\n", (u_longlong_t)links); - (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); - if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, - sizeof (uint64_t)) == 0) - (void) printf("\txattr %llu\n", (u_longlong_t)xattr); - if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, - sizeof (uint64_t)) == 0) - (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); - sa_handle_destroy(hdl); -} - -/*ARGSUSED*/ -static void -dump_acl(objset_t *os, uint64_t object, void *data, size_t size) -{ -} - -/*ARGSUSED*/ -static void -dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) -{ -} - -static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { - dump_none, /* unallocated */ - dump_zap, /* object directory */ - dump_uint64, /* object array */ - dump_none, /* packed nvlist */ - dump_packed_nvlist, /* packed nvlist size */ - dump_none, /* bpobj */ - dump_bpobj, /* bpobj header */ - dump_none, /* SPA space map header */ - dump_none, /* SPA space map */ - dump_none, /* ZIL intent log */ - dump_dnode, /* DMU dnode */ - dump_dmu_objset, /* DMU objset */ - dump_dsl_dir, /* DSL directory */ - dump_zap, /* DSL directory child map */ - dump_zap, /* DSL dataset snap map */ - dump_zap, /* DSL props */ - dump_dsl_dataset, /* DSL dataset */ - dump_znode, /* ZFS znode */ - dump_acl, /* ZFS V0 ACL */ - dump_uint8, /* ZFS plain file */ - dump_zpldir, /* ZFS directory */ - dump_zap, /* ZFS master node */ - dump_zap, /* ZFS delete queue */ - dump_uint8, /* zvol object */ - dump_zap, /* zvol prop */ - dump_uint8, /* other uint8[] */ - dump_uint64, /* other uint64[] */ - dump_zap, /* other ZAP */ - dump_zap, /* persistent error log */ - dump_uint8, /* SPA history */ - dump_history_offsets, /* SPA history offsets */ - dump_zap, /* Pool properties */ - dump_zap, /* DSL permissions */ - dump_acl, /* ZFS ACL */ - dump_uint8, /* ZFS SYSACL */ - dump_none, /* FUID nvlist */ - dump_packed_nvlist, /* FUID nvlist size */ - dump_zap, /* DSL dataset next clones */ - dump_zap, /* DSL scrub queue */ - dump_zap, /* ZFS user/group used */ - dump_zap, /* ZFS user/group quota */ - dump_zap, /* snapshot refcount tags */ - dump_ddt_zap, /* DDT ZAP object */ - dump_zap, /* DDT statistics */ - dump_znode, /* SA object */ - dump_zap, /* SA Master Node */ - dump_sa_attrs, /* SA attribute registration */ - dump_sa_layouts, /* SA attribute layouts */ - dump_zap, /* DSL scrub translations */ - dump_none, /* fake dedup BP */ - dump_zap, /* deadlist */ - dump_none, /* deadlist hdr */ - dump_zap, /* dsl clones */ - dump_bpobj_subobjs, /* bpobj subobjs */ - dump_unknown, /* Unknown type, must be last */ -}; - -static void -dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, - uint64_t *dnode_slots_used) -{ - dmu_buf_t *db = NULL; - dmu_object_info_t doi; - dnode_t *dn; - void *bonus = NULL; - size_t bsize = 0; - char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; - char bonus_size[32]; - char aux[50]; - int error; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ); - - if (*print_header) { - (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n", - "Object", "lvl", "iblk", "dblk", "dsize", "dnsize", - "lsize", "%full", "type"); - *print_header = 0; - } - - if (object == 0) { - dn = DMU_META_DNODE(os); - } else { - error = dmu_bonus_hold(os, object, FTAG, &db); - if (error) - fatal("dmu_bonus_hold(%llu) failed, errno %u", - object, error); - bonus = db->db_data; - bsize = db->db_size; - dn = DB_DNODE((dmu_buf_impl_t *)db); - } - dmu_object_info_from_dnode(dn, &doi); - - if (dnode_slots_used != NULL) - *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; - - zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); - zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); - zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize)); - zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); - zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); - zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize)); - (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count * - doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) / - doi.doi_max_offset); - - aux[0] = '\0'; - - if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) { - (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", - ZDB_CHECKSUM_NAME(doi.doi_checksum)); - } - - if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) { - (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", - ZDB_COMPRESS_NAME(doi.doi_compress)); - } - - (void) printf("%10" PRIu64 - " %3u %5s %5s %5s %5s %5s %6s %s%s\n", - object, doi.doi_indirection, iblk, dblk, - asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux); - - if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { - (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n", - "", "", "", "", "", "", bonus_size, "bonus", - ZDB_OT_NAME(doi.doi_bonus_type)); - } - - if (verbosity >= 4) { - (void) printf("\tdnode flags: %s%s%s\n", - (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? - "USED_BYTES " : "", - (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? - "USERUSED_ACCOUNTED " : "", - (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? - "SPILL_BLKPTR" : ""); - (void) printf("\tdnode maxblkid: %llu\n", - (longlong_t)dn->dn_phys->dn_maxblkid); - - object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object, - bonus, bsize); - object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0); - *print_header = 1; - } - - if (verbosity >= 5) - dump_indirect(dn); - - if (verbosity >= 5) { - /* - * Report the list of segments that comprise the object. - */ - uint64_t start = 0; - uint64_t end; - uint64_t blkfill = 1; - int minlvl = 1; - - if (dn->dn_type == DMU_OT_DNODE) { - minlvl = 0; - blkfill = DNODES_PER_BLOCK; - } - - for (;;) { - char segsize[32]; - /* make sure nicenum has enough space */ - CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ); - error = dnode_next_offset(dn, - 0, &start, minlvl, blkfill, 0); - if (error) - break; - end = start; - error = dnode_next_offset(dn, - DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); - zdb_nicenum(end - start, segsize, sizeof (segsize)); - (void) printf("\t\tsegment [%016llx, %016llx)" - " size %5s\n", (u_longlong_t)start, - (u_longlong_t)end, segsize); - if (error) - break; - start = end; - } - } - - if (db != NULL) - dmu_buf_rele(db, FTAG); -} - -static void -count_dir_mos_objects(dsl_dir_t *dd) -{ - mos_obj_refd(dd->dd_object); - mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj); - mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj); - mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj); - mos_obj_refd(dsl_dir_phys(dd)->dd_clones); -} - -static void -count_ds_mos_objects(dsl_dataset_t *ds) -{ - mos_obj_refd(ds->ds_object); - mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj); - mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj); - mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj); - mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj); - - if (!dsl_dataset_is_snapshot(ds)) { - count_dir_mos_objects(ds->ds_dir); - } -} - -static const char *objset_types[DMU_OST_NUMTYPES] = { - "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; - -static void -dump_dir(objset_t *os) -{ - dmu_objset_stats_t dds; - uint64_t object, object_count; - uint64_t refdbytes, usedobjs, scratch; - char numbuf[32]; - char blkbuf[BP_SPRINTF_LEN + 20]; - char osname[ZFS_MAX_DATASET_NAME_LEN]; - const char *type = "UNKNOWN"; - int verbosity = dump_opt['d']; - int print_header = 1; - unsigned i; - int error; - uint64_t total_slots_used = 0; - uint64_t max_slot_used = 0; - uint64_t dnode_slots; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ); - - dsl_pool_config_enter(dmu_objset_pool(os), FTAG); - dmu_objset_fast_stat(os, &dds); - dsl_pool_config_exit(dmu_objset_pool(os), FTAG); - - if (dds.dds_type < DMU_OST_NUMTYPES) - type = objset_types[dds.dds_type]; - - if (dds.dds_type == DMU_OST_META) { - dds.dds_creation_txg = TXG_INITIAL; - usedobjs = BP_GET_FILL(os->os_rootbp); - refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> - dd_used_bytes; - } else { - dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); - } - - ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); - - zdb_nicenum(refdbytes, numbuf, sizeof (numbuf)); - - if (verbosity >= 4) { - (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); - (void) snprintf_blkptr(blkbuf + strlen(blkbuf), - sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); - } else { - blkbuf[0] = '\0'; - } - - dmu_objset_name(os, osname); - - (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, " - "%s, %llu objects%s%s\n", - osname, type, (u_longlong_t)dmu_objset_id(os), - (u_longlong_t)dds.dds_creation_txg, - numbuf, (u_longlong_t)usedobjs, blkbuf, - (dds.dds_inconsistent) ? " (inconsistent)" : ""); - - if (zopt_objects != 0) { - for (i = 0; i < zopt_objects; i++) - dump_object(os, zopt_object[i], verbosity, - &print_header, NULL); - (void) printf("\n"); - return; - } - - if (dump_opt['i'] != 0 || verbosity >= 2) - dump_intent_log(dmu_objset_zil(os)); - - if (dmu_objset_ds(os) != NULL) { - dsl_dataset_t *ds = dmu_objset_ds(os); - dump_deadlist(&ds->ds_deadlist); - - if (dsl_dataset_remap_deadlist_exists(ds)) { - (void) printf("ds_remap_deadlist:\n"); - dump_deadlist(&ds->ds_remap_deadlist); - } - count_ds_mos_objects(ds); - } - - if (verbosity < 2) - return; - - if (BP_IS_HOLE(os->os_rootbp)) - return; - - dump_object(os, 0, verbosity, &print_header, NULL); - object_count = 0; - if (DMU_USERUSED_DNODE(os) != NULL && - DMU_USERUSED_DNODE(os)->dn_type != 0) { - dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header, - NULL); - dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header, - NULL); - } - - object = 0; - while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { - dump_object(os, object, verbosity, &print_header, &dnode_slots); - object_count++; - total_slots_used += dnode_slots; - max_slot_used = object + dnode_slots - 1; - } - - (void) printf("\n"); - - (void) printf(" Dnode slots:\n"); - (void) printf("\tTotal used: %10llu\n", - (u_longlong_t)total_slots_used); - (void) printf("\tMax used: %10llu\n", - (u_longlong_t)max_slot_used); - (void) printf("\tPercent empty: %10lf\n", - (double)(max_slot_used - total_slots_used)*100 / - (double)max_slot_used); - - (void) printf("\n"); - - if (error != ESRCH) { - (void) fprintf(stderr, "dmu_object_next() = %d\n", error); - abort(); - } - - ASSERT3U(object_count, ==, usedobjs); - - if (leaked_objects != 0) { - (void) printf("%d potentially leaked objects detected\n", - leaked_objects); - leaked_objects = 0; - } -} - -static void -dump_uberblock(uberblock_t *ub, const char *header, const char *footer) -{ - time_t timestamp = ub->ub_timestamp; - - (void) printf("%s", header ? header : ""); - (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); - (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); - (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); - (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); - (void) printf("\ttimestamp = %llu UTC = %s", - (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); - - (void) printf("\tmmp_magic = %016llx\n", - (u_longlong_t)ub->ub_mmp_magic); - if (MMP_VALID(ub)) { - (void) printf("\tmmp_delay = %0llu\n", - (u_longlong_t)ub->ub_mmp_delay); - if (MMP_SEQ_VALID(ub)) - (void) printf("\tmmp_seq = %u\n", - (unsigned int) MMP_SEQ(ub)); - if (MMP_FAIL_INT_VALID(ub)) - (void) printf("\tmmp_fail = %u\n", - (unsigned int) MMP_FAIL_INT(ub)); - if (MMP_INTERVAL_VALID(ub)) - (void) printf("\tmmp_write = %u\n", - (unsigned int) MMP_INTERVAL(ub)); - /* After MMP_* to make summarize_uberblock_mmp cleaner */ - (void) printf("\tmmp_valid = %x\n", - (unsigned int) ub->ub_mmp_config & 0xFF); - } - - if (dump_opt['u'] >= 3) { - char blkbuf[BP_SPRINTF_LEN]; - snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); - (void) printf("\trootbp = %s\n", blkbuf); - } - (void) printf("\tcheckpoint_txg = %llu\n", - (u_longlong_t)ub->ub_checkpoint_txg); - (void) printf("%s", footer ? footer : ""); -} - -static void -dump_config(spa_t *spa) -{ - dmu_buf_t *db; - size_t nvsize = 0; - int error = 0; - - - error = dmu_bonus_hold(spa->spa_meta_objset, - spa->spa_config_object, FTAG, &db); - - if (error == 0) { - nvsize = *(uint64_t *)db->db_data; - dmu_buf_rele(db, FTAG); - - (void) printf("\nMOS Configuration:\n"); - dump_packed_nvlist(spa->spa_meta_objset, - spa->spa_config_object, (void *)&nvsize, 1); - } else { - (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d", - (u_longlong_t)spa->spa_config_object, error); - } -} - -static void -dump_cachefile(const char *cachefile) -{ - int fd; - struct stat64 statbuf; - char *buf; - nvlist_t *config; - - if ((fd = open64(cachefile, O_RDONLY)) < 0) { - (void) fprintf(stderr, "cannot open '%s': %s\n", cachefile, - strerror(errno)); - exit(1); - } - - if (fstat64(fd, &statbuf) != 0) { - (void) fprintf(stderr, "failed to stat '%s': %s\n", cachefile, - strerror(errno)); - exit(1); - } - - if ((buf = malloc(statbuf.st_size)) == NULL) { - (void) fprintf(stderr, "failed to allocate %llu bytes\n", - (u_longlong_t)statbuf.st_size); - exit(1); - } - - if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { - (void) fprintf(stderr, "failed to read %llu bytes\n", - (u_longlong_t)statbuf.st_size); - exit(1); - } - - (void) close(fd); - - if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { - (void) fprintf(stderr, "failed to unpack nvlist\n"); - exit(1); - } - - free(buf); - - dump_nvlist(config, 0); - - nvlist_free(config); -} - -#define ZDB_MAX_UB_HEADER_SIZE 32 - -static void -dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift) -{ - vdev_t vd; - vdev_t *vdp = &vd; - char header[ZDB_MAX_UB_HEADER_SIZE]; - - vd.vdev_ashift = ashift; - vdp->vdev_top = vdp; - - for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) { - uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i); - uberblock_t *ub = (void *)((char *)lbl + uoff); - - if (uberblock_verify(ub)) - continue; - - if ((dump_opt['u'] < 4) && - (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && - (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) - continue; - - (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, - "Uberblock[%d]\n", i); - dump_uberblock(ub, header, ""); - } -} - -static char curpath[PATH_MAX]; - -/* - * Iterate through the path components, recursively passing - * current one's obj and remaining path until we find the obj - * for the last one. - */ -static int -dump_path_impl(objset_t *os, uint64_t obj, char *name) -{ - int err; - int header = 1; - uint64_t child_obj; - char *s; - dmu_buf_t *db; - dmu_object_info_t doi; - - if ((s = strchr(name, '/')) != NULL) - *s = '\0'; - err = zap_lookup(os, obj, name, 8, 1, &child_obj); - - (void) strlcat(curpath, name, sizeof (curpath)); - - if (err != 0) { - (void) fprintf(stderr, "failed to lookup %s: %s\n", - curpath, strerror(err)); - return (err); - } - - child_obj = ZFS_DIRENT_OBJ(child_obj); - err = sa_buf_hold(os, child_obj, FTAG, &db); - if (err != 0) { - (void) fprintf(stderr, - "failed to get SA dbuf for obj %llu: %s\n", - (u_longlong_t)child_obj, strerror(err)); - return (EINVAL); - } - dmu_object_info_from_db(db, &doi); - sa_buf_rele(db, FTAG); - - if (doi.doi_bonus_type != DMU_OT_SA && - doi.doi_bonus_type != DMU_OT_ZNODE) { - (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n", - doi.doi_bonus_type, (u_longlong_t)child_obj); - return (EINVAL); - } - - if (dump_opt['v'] > 6) { - (void) printf("obj=%llu %s type=%d bonustype=%d\n", - (u_longlong_t)child_obj, curpath, doi.doi_type, - doi.doi_bonus_type); - } - - (void) strlcat(curpath, "/", sizeof (curpath)); - - switch (doi.doi_type) { - case DMU_OT_DIRECTORY_CONTENTS: - if (s != NULL && *(s + 1) != '\0') - return (dump_path_impl(os, child_obj, s + 1)); - /*FALLTHROUGH*/ - case DMU_OT_PLAIN_FILE_CONTENTS: - dump_object(os, child_obj, dump_opt['v'], &header, NULL); - return (0); - default: - (void) fprintf(stderr, "object %llu has non-file/directory " - "type %d\n", (u_longlong_t)obj, doi.doi_type); - break; - } - - return (EINVAL); -} - -/* - * Dump the blocks for the object specified by path inside the dataset. - */ -static int -dump_path(char *ds, char *path) -{ - int err; - objset_t *os; - uint64_t root_obj; - - err = open_objset(ds, DMU_OST_ZFS, FTAG, &os); - if (err != 0) - return (err); - - err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj); - if (err != 0) { - (void) fprintf(stderr, "can't lookup root znode: %s\n", - strerror(err)); - dmu_objset_disown(os, FTAG); - return (EINVAL); - } - - (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds); - - err = dump_path_impl(os, root_obj, path); - - close_objset(os, FTAG); - return (err); -} - -static int -dump_label(const char *dev) -{ - int fd; - vdev_label_t label; - char path[MAXPATHLEN]; - char *buf = label.vl_vdev_phys.vp_nvlist; - size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); - struct stat64 statbuf; - uint64_t psize, ashift; - boolean_t label_found = B_FALSE; - - (void) strlcpy(path, dev, sizeof (path)); - if (dev[0] == '/') { - if (strncmp(dev, ZFS_DISK_ROOTD, - strlen(ZFS_DISK_ROOTD)) == 0) { - (void) snprintf(path, sizeof (path), "%s%s", - ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD)); - } - } else if (stat64(path, &statbuf) != 0) { - char *s; - - (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD, - dev); - if (((s = strrchr(dev, 's')) == NULL && - (s = strchr(dev, 'p')) == NULL) || - !isdigit(*(s + 1))) - (void) strlcat(path, "s0", sizeof (path)); - } - - if ((fd = open64(path, O_RDONLY)) < 0) { - (void) fprintf(stderr, "cannot open '%s': %s\n", path, - strerror(errno)); - exit(1); - } - - if (fstat64(fd, &statbuf) != 0) { - (void) fprintf(stderr, "failed to stat '%s': %s\n", path, - strerror(errno)); - (void) close(fd); - exit(1); - } - - if (S_ISBLK(statbuf.st_mode)) { - (void) fprintf(stderr, - "cannot use '%s': character device required\n", path); - (void) close(fd); - exit(1); - } - - psize = statbuf.st_size; - psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); - - for (int l = 0; l < VDEV_LABELS; l++) { - nvlist_t *config = NULL; - - if (!dump_opt['q']) { - (void) printf("------------------------------------\n"); - (void) printf("LABEL %d\n", l); - (void) printf("------------------------------------\n"); - } - - if (pread64(fd, &label, sizeof (label), - vdev_label_offset(psize, l, 0)) != sizeof (label)) { - if (!dump_opt['q']) - (void) printf("failed to read label %d\n", l); - continue; - } - - if (nvlist_unpack(buf, buflen, &config, 0) != 0) { - if (!dump_opt['q']) - (void) printf("failed to unpack label %d\n", l); - ashift = SPA_MINBLOCKSHIFT; - } else { - nvlist_t *vdev_tree = NULL; - - if (!dump_opt['q']) - dump_nvlist(config, 4); - if ((nvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) || - (nvlist_lookup_uint64(vdev_tree, - ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) - ashift = SPA_MINBLOCKSHIFT; - nvlist_free(config); - label_found = B_TRUE; - } - if (dump_opt['u']) - dump_label_uberblocks(&label, ashift); - } - - (void) close(fd); - - return (label_found ? 0 : 2); -} - -static uint64_t dataset_feature_count[SPA_FEATURES]; -static uint64_t remap_deadlist_count = 0; - -/*ARGSUSED*/ -static int -dump_one_dir(const char *dsname, void *arg) -{ - int error; - objset_t *os; - - error = open_objset(dsname, DMU_OST_ANY, FTAG, &os); - if (error != 0) - return (0); - - for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { - if (!dmu_objset_ds(os)->ds_feature_inuse[f]) - continue; - ASSERT(spa_feature_table[f].fi_flags & - ZFEATURE_FLAG_PER_DATASET); - dataset_feature_count[f]++; - } - - if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) { - remap_deadlist_count++; - } - - dump_dir(os); - close_objset(os, FTAG); - fuid_table_destroy(); - return (0); -} - -/* - * Block statistics. - */ -#define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) -typedef struct zdb_blkstats { - uint64_t zb_asize; - uint64_t zb_lsize; - uint64_t zb_psize; - uint64_t zb_count; - uint64_t zb_gangs; - uint64_t zb_ditto_samevdev; - uint64_t zb_ditto_same_ms; - uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; -} zdb_blkstats_t; - -/* - * Extended object types to report deferred frees and dedup auto-ditto blocks. - */ -#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) -#define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) -#define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) -#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) - -static const char *zdb_ot_extname[] = { - "deferred free", - "dedup ditto", - "other", - "Total", -}; - -#define ZB_TOTAL DN_MAX_LEVELS - -typedef struct zdb_cb { - zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; - uint64_t zcb_removing_size; - uint64_t zcb_checkpoint_size; - uint64_t zcb_dedup_asize; - uint64_t zcb_dedup_blocks; - uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; - uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] - [BPE_PAYLOAD_SIZE]; - uint64_t zcb_start; - hrtime_t zcb_lastprint; - uint64_t zcb_totalasize; - uint64_t zcb_errors[256]; - int zcb_readfails; - int zcb_haderrors; - spa_t *zcb_spa; - uint32_t **zcb_vd_obsolete_counts; -} zdb_cb_t; - -/* test if two DVA offsets from same vdev are within the same metaslab */ -static boolean_t -same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2) -{ - vdev_t *vd = vdev_lookup_top(spa, vdev); - uint64_t ms_shift = vd->vdev_ms_shift; - - return ((off1 >> ms_shift) == (off2 >> ms_shift)); -} - -static void -zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, - dmu_object_type_t type) -{ - uint64_t refcnt = 0; - - ASSERT(type < ZDB_OT_TOTAL); - - if (zilog && zil_bp_tree_add(zilog, bp) != 0) - return; - - spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); - - for (int i = 0; i < 4; i++) { - int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; - int t = (i & 1) ? type : ZDB_OT_TOTAL; - int equal; - zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; - - zb->zb_asize += BP_GET_ASIZE(bp); - zb->zb_lsize += BP_GET_LSIZE(bp); - zb->zb_psize += BP_GET_PSIZE(bp); - zb->zb_count++; - - /* - * The histogram is only big enough to record blocks up to - * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, - * "other", bucket. - */ - unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; - idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); - zb->zb_psize_histogram[idx]++; - - zb->zb_gangs += BP_COUNT_GANG(bp); - - switch (BP_GET_NDVAS(bp)) { - case 2: - if (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[1])) { - zb->zb_ditto_samevdev++; - - if (same_metaslab(zcb->zcb_spa, - DVA_GET_VDEV(&bp->blk_dva[0]), - DVA_GET_OFFSET(&bp->blk_dva[0]), - DVA_GET_OFFSET(&bp->blk_dva[1]))) - zb->zb_ditto_same_ms++; - } - break; - case 3: - equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[1])) + - (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[2])) + - (DVA_GET_VDEV(&bp->blk_dva[1]) == - DVA_GET_VDEV(&bp->blk_dva[2])); - if (equal != 0) { - zb->zb_ditto_samevdev++; - - if (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[1]) && - same_metaslab(zcb->zcb_spa, - DVA_GET_VDEV(&bp->blk_dva[0]), - DVA_GET_OFFSET(&bp->blk_dva[0]), - DVA_GET_OFFSET(&bp->blk_dva[1]))) - zb->zb_ditto_same_ms++; - else if (DVA_GET_VDEV(&bp->blk_dva[0]) == - DVA_GET_VDEV(&bp->blk_dva[2]) && - same_metaslab(zcb->zcb_spa, - DVA_GET_VDEV(&bp->blk_dva[0]), - DVA_GET_OFFSET(&bp->blk_dva[0]), - DVA_GET_OFFSET(&bp->blk_dva[2]))) - zb->zb_ditto_same_ms++; - else if (DVA_GET_VDEV(&bp->blk_dva[1]) == - DVA_GET_VDEV(&bp->blk_dva[2]) && - same_metaslab(zcb->zcb_spa, - DVA_GET_VDEV(&bp->blk_dva[1]), - DVA_GET_OFFSET(&bp->blk_dva[1]), - DVA_GET_OFFSET(&bp->blk_dva[2]))) - zb->zb_ditto_same_ms++; - } - break; - } - } - - spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); - - if (BP_IS_EMBEDDED(bp)) { - zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; - zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] - [BPE_GET_PSIZE(bp)]++; - return; - } - - if (dump_opt['L']) - return; - - if (BP_GET_DEDUP(bp)) { - ddt_t *ddt; - ddt_entry_t *dde; - - ddt = ddt_select(zcb->zcb_spa, bp); - ddt_enter(ddt); - dde = ddt_lookup(ddt, bp, B_FALSE); - - if (dde == NULL) { - refcnt = 0; - } else { - ddt_phys_t *ddp = ddt_phys_select(dde, bp); - ddt_phys_decref(ddp); - refcnt = ddp->ddp_refcnt; - if (ddt_phys_total_refcnt(dde) == 0) - ddt_remove(ddt, dde); - } - ddt_exit(ddt); - } - - VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa, - refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa), - bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); -} - -/* ARGSUSED */ -static void -zdb_blkptr_done(zio_t *zio) -{ - spa_t *spa = zio->io_spa; - blkptr_t *bp = zio->io_bp; - int ioerr = zio->io_error; - zdb_cb_t *zcb = zio->io_private; - zbookmark_phys_t *zb = &zio->io_bookmark; - - abd_free(zio->io_abd); - - mutex_enter(&spa->spa_scrub_lock); - spa->spa_scrub_inflight--; - spa->spa_load_verify_ios--; - cv_broadcast(&spa->spa_scrub_io_cv); - - if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { - char blkbuf[BP_SPRINTF_LEN]; - - zcb->zcb_haderrors = 1; - zcb->zcb_errors[ioerr]++; - - if (dump_opt['b'] >= 2) - snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); - else - blkbuf[0] = '\0'; - - (void) printf("zdb_blkptr_cb: " - "Got error %d reading " - "<%llu, %llu, %lld, %llx> %s -- skipping\n", - ioerr, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, - blkbuf); - } - mutex_exit(&spa->spa_scrub_lock); -} - -/* ARGSUSED */ -static int -zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) -{ - zdb_cb_t *zcb = arg; - dmu_object_type_t type; - boolean_t is_metadata; - - if (bp == NULL) - return (0); - - if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { - char blkbuf[BP_SPRINTF_LEN]; - snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); - (void) printf("objset %llu object %llu " - "level %lld offset 0x%llx %s\n", - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, - (u_longlong_t)blkid2offset(dnp, bp, zb), - blkbuf); - } - - if (BP_IS_HOLE(bp)) - return (0); - - type = BP_GET_TYPE(bp); - - zdb_count_block(zcb, zilog, bp, - (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); - - is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); - - if (!BP_IS_EMBEDDED(bp) && - (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { - size_t size = BP_GET_PSIZE(bp); - abd_t *abd = abd_alloc(size, B_FALSE); - int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; - - /* If it's an intent log block, failure is expected. */ - if (zb->zb_level == ZB_ZIL_LEVEL) - flags |= ZIO_FLAG_SPECULATIVE; - - mutex_enter(&spa->spa_scrub_lock); - while (spa->spa_load_verify_ios > max_inflight) - cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); - spa->spa_scrub_inflight++; - spa->spa_load_verify_ios++; - mutex_exit(&spa->spa_scrub_lock); - - zio_nowait(zio_read(NULL, spa, bp, abd, size, - zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); - } - - zcb->zcb_readfails = 0; - - /* only call gethrtime() every 100 blocks */ - static int iters; - if (++iters > 100) - iters = 0; - else - return (0); - - if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { - uint64_t now = gethrtime(); - char buf[10]; - uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; - int kb_per_sec = - 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); - int sec_remaining = - (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ); - - zfs_nicenum(bytes, buf, sizeof (buf)); - (void) fprintf(stderr, - "\r%5s completed (%4dMB/s) " - "estimated time remaining: %uhr %02umin %02usec ", - buf, kb_per_sec / 1024, - sec_remaining / 60 / 60, - sec_remaining / 60 % 60, - sec_remaining % 60); - - zcb->zcb_lastprint = now; - } - - return (0); -} - -static void -zdb_leak(void *arg, uint64_t start, uint64_t size) -{ - vdev_t *vd = arg; - - (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", - (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); -} - -static metaslab_ops_t zdb_metaslab_ops = { - NULL /* alloc */ -}; - -static void -zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) -{ - ddt_bookmark_t ddb; - ddt_entry_t dde; - int error; - - ASSERT(!dump_opt['L']); - - bzero(&ddb, sizeof (ddb)); - while ((error = ddt_walk(spa, &ddb, &dde)) == 0) { - blkptr_t blk; - ddt_phys_t *ddp = dde.dde_phys; - - if (ddb.ddb_class == DDT_CLASS_UNIQUE) - return; - - ASSERT(ddt_phys_total_refcnt(&dde) > 1); - - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { - if (ddp->ddp_phys_birth == 0) - continue; - ddt_bp_create(ddb.ddb_checksum, - &dde.dde_key, ddp, &blk); - if (p == DDT_PHYS_DITTO) { - zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO); - } else { - zcb->zcb_dedup_asize += - BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1); - zcb->zcb_dedup_blocks++; - } - } - ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum]; - ddt_enter(ddt); - VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL); - ddt_exit(ddt); - } - - ASSERT(error == ENOENT); -} - -/* ARGSUSED */ -static void -claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, - uint64_t size, void *arg) -{ - /* - * This callback was called through a remap from - * a device being removed. Therefore, the vdev that - * this callback is applied to is a concrete - * vdev. - */ - ASSERT(vdev_is_concrete(vd)); - - VERIFY0(metaslab_claim_impl(vd, offset, size, - spa_min_claim_txg(vd->vdev_spa))); -} - -static void -claim_segment_cb(void *arg, uint64_t offset, uint64_t size) -{ - vdev_t *vd = arg; - - vdev_indirect_ops.vdev_op_remap(vd, offset, size, - claim_segment_impl_cb, NULL); -} - -/* - * After accounting for all allocated blocks that are directly referenced, - * we might have missed a reference to a block from a partially complete - * (and thus unused) indirect mapping object. We perform a secondary pass - * through the metaslabs we have already mapped and claim the destination - * blocks. - */ -static void -zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) -{ - if (dump_opt['L']) - return; - - if (spa->spa_vdev_removal == NULL) - return; - - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - - spa_vdev_removal_t *svr = spa->spa_vdev_removal; - vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - - for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { - metaslab_t *msp = vd->vdev_ms[msi]; - - if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim)) - break; - - ASSERT0(range_tree_space(svr->svr_allocd_segs)); - - if (msp->ms_sm != NULL) { - VERIFY0(space_map_load(msp->ms_sm, - svr->svr_allocd_segs, SM_ALLOC)); - - /* - * Clear everything past what has been synced unless - * it's past the spacemap, because we have not allocated - * mappings for it yet. - */ - uint64_t vim_max_offset = - vdev_indirect_mapping_max_offset(vim); - uint64_t sm_end = msp->ms_sm->sm_start + - msp->ms_sm->sm_size; - if (sm_end > vim_max_offset) - range_tree_clear(svr->svr_allocd_segs, - vim_max_offset, sm_end - vim_max_offset); - } - - zcb->zcb_removing_size += - range_tree_space(svr->svr_allocd_segs); - range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); - } - - spa_config_exit(spa, SCL_CONFIG, FTAG); -} - -/* ARGSUSED */ -static int -increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - zdb_cb_t *zcb = arg; - spa_t *spa = zcb->zcb_spa; - vdev_t *vd; - const dva_t *dva = &bp->blk_dva[0]; - - ASSERT(!dump_opt['L']); - ASSERT3U(BP_GET_NDVAS(bp), ==, 1); - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva)); - ASSERT3P(vd, !=, NULL); - spa_config_exit(spa, SCL_VDEV, FTAG); - - ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0); - ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL); - - vdev_indirect_mapping_increment_obsolete_count( - vd->vdev_indirect_mapping, - DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), - zcb->zcb_vd_obsolete_counts[vd->vdev_id]); - - return (0); -} - -static uint32_t * -zdb_load_obsolete_counts(vdev_t *vd) -{ - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - spa_t *spa = vd->vdev_spa; - spa_condensing_indirect_phys_t *scip = - &spa->spa_condensing_indirect_phys; - uint32_t *counts; - - EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL); - counts = vdev_indirect_mapping_load_obsolete_counts(vim); - if (vd->vdev_obsolete_sm != NULL) { - vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, - vd->vdev_obsolete_sm); - } - if (scip->scip_vdev == vd->vdev_id && - scip->scip_prev_obsolete_sm_object != 0) { - space_map_t *prev_obsolete_sm = NULL; - VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, - scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); - vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, - prev_obsolete_sm); - space_map_close(prev_obsolete_sm); - } - return (counts); -} - -typedef struct checkpoint_sm_exclude_entry_arg { - vdev_t *cseea_vd; - uint64_t cseea_checkpoint_size; -} checkpoint_sm_exclude_entry_arg_t; - -static int -checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) -{ - checkpoint_sm_exclude_entry_arg_t *cseea = arg; - vdev_t *vd = cseea->cseea_vd; - metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; - uint64_t end = sme->sme_offset + sme->sme_run; - - ASSERT(sme->sme_type == SM_FREE); - - /* - * Since the vdev_checkpoint_sm exists in the vdev level - * and the ms_sm space maps exist in the metaslab level, - * an entry in the checkpoint space map could theoretically - * cross the boundaries of the metaslab that it belongs. - * - * In reality, because of the way that we populate and - * manipulate the checkpoint's space maps currently, - * there shouldn't be any entries that cross metaslabs. - * Hence the assertion below. - * - * That said, there is no fundamental requirement that - * the checkpoint's space map entries should not cross - * metaslab boundaries. So if needed we could add code - * that handles metaslab-crossing segments in the future. - */ - VERIFY3U(sme->sme_offset, >=, ms->ms_start); - VERIFY3U(end, <=, ms->ms_start + ms->ms_size); - - /* - * By removing the entry from the allocated segments we - * also verify that the entry is there to begin with. - */ - mutex_enter(&ms->ms_lock); - range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run); - mutex_exit(&ms->ms_lock); - - cseea->cseea_checkpoint_size += sme->sme_run; - return (0); -} - -static void -zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb) -{ - spa_t *spa = vd->vdev_spa; - space_map_t *checkpoint_sm = NULL; - uint64_t checkpoint_sm_obj; - - /* - * If there is no vdev_top_zap, we are in a pool whose - * version predates the pool checkpoint feature. - */ - if (vd->vdev_top_zap == 0) - return; - - /* - * If there is no reference of the vdev_checkpoint_sm in - * the vdev_top_zap, then one of the following scenarios - * is true: - * - * 1] There is no checkpoint - * 2] There is a checkpoint, but no checkpointed blocks - * have been freed yet - * 3] The current vdev is indirect - * - * In these cases we return immediately. - */ - if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, - VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) - return; - - VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, - VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, - &checkpoint_sm_obj)); - - checkpoint_sm_exclude_entry_arg_t cseea; - cseea.cseea_vd = vd; - cseea.cseea_checkpoint_size = 0; - - VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), - checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); - - VERIFY0(space_map_iterate(checkpoint_sm, - space_map_length(checkpoint_sm), - checkpoint_sm_exclude_entry_cb, &cseea)); - space_map_close(checkpoint_sm); - - zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size; -} - -static void -zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb) -{ - ASSERT(!dump_opt['L']); - - vdev_t *rvd = spa->spa_root_vdev; - for (uint64_t c = 0; c < rvd->vdev_children; c++) { - ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id); - zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb); - } -} - -static void -load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype) -{ - vdev_t *rvd = spa->spa_root_vdev; - for (uint64_t i = 0; i < rvd->vdev_children; i++) { - vdev_t *vd = rvd->vdev_child[i]; - - ASSERT3U(i, ==, vd->vdev_id); - - if (vd->vdev_ops == &vdev_indirect_ops) - continue; - - for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { - metaslab_t *msp = vd->vdev_ms[m]; - - (void) fprintf(stderr, - "\rloading concrete vdev %llu, " - "metaslab %llu of %llu ...", - (longlong_t)vd->vdev_id, - (longlong_t)msp->ms_id, - (longlong_t)vd->vdev_ms_count); - - mutex_enter(&msp->ms_lock); - metaslab_unload(msp); - - /* - * We don't want to spend the CPU manipulating the - * size-ordered tree, so clear the range_tree ops. - */ - msp->ms_allocatable->rt_ops = NULL; - - if (msp->ms_sm != NULL) { - VERIFY0(space_map_load(msp->ms_sm, - msp->ms_allocatable, maptype)); - } - if (!msp->ms_loaded) - msp->ms_loaded = B_TRUE; - mutex_exit(&msp->ms_lock); - } - } -} - -/* - * vm_idxp is an in-out parameter which (for indirect vdevs) is the - * index in vim_entries that has the first entry in this metaslab. - * On return, it will be set to the first entry after this metaslab. - */ -static void -load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, - uint64_t *vim_idxp) -{ - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - - mutex_enter(&msp->ms_lock); - metaslab_unload(msp); - - /* - * We don't want to spend the CPU manipulating the - * size-ordered tree, so clear the range_tree ops. - */ - msp->ms_allocatable->rt_ops = NULL; - - for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim); - (*vim_idxp)++) { - vdev_indirect_mapping_entry_phys_t *vimep = - &vim->vim_entries[*vim_idxp]; - uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); - uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst); - ASSERT3U(ent_offset, >=, msp->ms_start); - if (ent_offset >= msp->ms_start + msp->ms_size) - break; - - /* - * Mappings do not cross metaslab boundaries, - * because we create them by walking the metaslabs. - */ - ASSERT3U(ent_offset + ent_len, <=, - msp->ms_start + msp->ms_size); - range_tree_add(msp->ms_allocatable, ent_offset, ent_len); - } - - if (!msp->ms_loaded) - msp->ms_loaded = B_TRUE; - mutex_exit(&msp->ms_lock); -} - -static void -zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb) -{ - ASSERT(!dump_opt['L']); - - vdev_t *rvd = spa->spa_root_vdev; - for (uint64_t c = 0; c < rvd->vdev_children; c++) { - vdev_t *vd = rvd->vdev_child[c]; - - ASSERT3U(c, ==, vd->vdev_id); - - if (vd->vdev_ops != &vdev_indirect_ops) - continue; - - /* - * Note: we don't check for mapping leaks on - * removing vdevs because their ms_allocatable's - * are used to look for leaks in allocated space. - */ - zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd); - - /* - * Normally, indirect vdevs don't have any - * metaslabs. We want to set them up for - * zio_claim(). - */ - VERIFY0(vdev_metaslab_init(vd, 0)); - - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - uint64_t vim_idx = 0; - for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { - - (void) fprintf(stderr, - "\rloading indirect vdev %llu, " - "metaslab %llu of %llu ...", - (longlong_t)vd->vdev_id, - (longlong_t)vd->vdev_ms[m]->ms_id, - (longlong_t)vd->vdev_ms_count); - - load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m], - &vim_idx); - } - ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim)); - } -} - -static void -zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) -{ - zcb->zcb_spa = spa; - - if (dump_opt['L']) - return; - - dsl_pool_t *dp = spa->spa_dsl_pool; - vdev_t *rvd = spa->spa_root_vdev; - - /* - * We are going to be changing the meaning of the metaslab's - * ms_allocatable. Ensure that the allocator doesn't try to - * use the tree. - */ - spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; - spa->spa_log_class->mc_ops = &zdb_metaslab_ops; - - zcb->zcb_vd_obsolete_counts = - umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), - UMEM_NOFAIL); - - /* - * For leak detection, we overload the ms_allocatable trees - * to contain allocated segments instead of free segments. - * As a result, we can't use the normal metaslab_load/unload - * interfaces. - */ - zdb_leak_init_prepare_indirect_vdevs(spa, zcb); - load_concrete_ms_allocatable_trees(spa, SM_ALLOC); - - /* - * On load_concrete_ms_allocatable_trees() we loaded all the - * allocated entries from the ms_sm to the ms_allocatable for - * each metaslab. If the pool has a checkpoint or is in the - * middle of discarding a checkpoint, some of these blocks - * may have been freed but their ms_sm may not have been - * updated because they are referenced by the checkpoint. In - * order to avoid false-positives during leak-detection, we - * go through the vdev's checkpoint space map and exclude all - * its entries from their relevant ms_allocatable. - * - * We also aggregate the space held by the checkpoint and add - * it to zcb_checkpoint_size. - * - * Note that at this point we are also verifying that all the - * entries on the checkpoint_sm are marked as allocated in - * the ms_sm of their relevant metaslab. - * [see comment in checkpoint_sm_exclude_entry_cb()] - */ - zdb_leak_init_exclude_checkpoint(spa, zcb); - ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa)); - - /* for cleaner progress output */ - (void) fprintf(stderr, "\n"); - - if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { - ASSERT(spa_feature_is_enabled(spa, - SPA_FEATURE_DEVICE_REMOVAL)); - (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj, - increment_indirect_mapping_cb, zcb, NULL); - } - - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - zdb_ddt_leak_init(spa, zcb); - spa_config_exit(spa, SCL_CONFIG, FTAG); -} - -static boolean_t -zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) -{ - boolean_t leaks = B_FALSE; - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - uint64_t total_leaked = 0; - - ASSERT(vim != NULL); - - for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { - vdev_indirect_mapping_entry_phys_t *vimep = - &vim->vim_entries[i]; - uint64_t obsolete_bytes = 0; - uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); - metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; - - /* - * This is not very efficient but it's easy to - * verify correctness. - */ - for (uint64_t inner_offset = 0; - inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); - inner_offset += 1 << vd->vdev_ashift) { - if (range_tree_contains(msp->ms_allocatable, - offset + inner_offset, 1 << vd->vdev_ashift)) { - obsolete_bytes += 1 << vd->vdev_ashift; - } - } - - int64_t bytes_leaked = obsolete_bytes - - zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]; - ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=, - zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]); - if (bytes_leaked != 0 && - (vdev_obsolete_counts_are_precise(vd) || - dump_opt['d'] >= 5)) { - (void) printf("obsolete indirect mapping count " - "mismatch on %llu:%llx:%llx : %llx bytes leaked\n", - (u_longlong_t)vd->vdev_id, - (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), - (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), - (u_longlong_t)bytes_leaked); - } - total_leaked += ABS(bytes_leaked); - } - - if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) { - int pct_leaked = total_leaked * 100 / - vdev_indirect_mapping_bytes_mapped(vim); - (void) printf("cannot verify obsolete indirect mapping " - "counts of vdev %llu because precise feature was not " - "enabled when it was removed: %d%% (%llx bytes) of mapping" - "unreferenced\n", - (u_longlong_t)vd->vdev_id, pct_leaked, - (u_longlong_t)total_leaked); - } else if (total_leaked > 0) { - (void) printf("obsolete indirect mapping count mismatch " - "for vdev %llu -- %llx total bytes mismatched\n", - (u_longlong_t)vd->vdev_id, - (u_longlong_t)total_leaked); - leaks |= B_TRUE; - } - - vdev_indirect_mapping_free_obsolete_counts(vim, - zcb->zcb_vd_obsolete_counts[vd->vdev_id]); - zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL; - - return (leaks); -} - -static boolean_t -zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) -{ - if (dump_opt['L']) - return (B_FALSE); - - boolean_t leaks = B_FALSE; - - vdev_t *rvd = spa->spa_root_vdev; - for (unsigned c = 0; c < rvd->vdev_children; c++) { - vdev_t *vd = rvd->vdev_child[c]; -#if DEBUG - metaslab_group_t *mg = vd->vdev_mg; -#endif - - if (zcb->zcb_vd_obsolete_counts[c] != NULL) { - leaks |= zdb_check_for_obsolete_leaks(vd, zcb); - } - - for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { - metaslab_t *msp = vd->vdev_ms[m]; - ASSERT3P(mg, ==, msp->ms_group); - - /* - * ms_allocatable has been overloaded - * to contain allocated segments. Now that - * we finished traversing all blocks, any - * block that remains in the ms_allocatable - * represents an allocated block that we - * did not claim during the traversal. - * Claimed blocks would have been removed - * from the ms_allocatable. For indirect - * vdevs, space remaining in the tree - * represents parts of the mapping that are - * not referenced, which is not a bug. - */ - if (vd->vdev_ops == &vdev_indirect_ops) { - range_tree_vacate(msp->ms_allocatable, - NULL, NULL); - } else { - range_tree_vacate(msp->ms_allocatable, - zdb_leak, vd); - } - - if (msp->ms_loaded) { - msp->ms_loaded = B_FALSE; - } - } - - } - - umem_free(zcb->zcb_vd_obsolete_counts, - rvd->vdev_children * sizeof (uint32_t *)); - zcb->zcb_vd_obsolete_counts = NULL; - - return (leaks); -} - -/* ARGSUSED */ -static int -count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - zdb_cb_t *zcb = arg; - - if (dump_opt['b'] >= 5) { - char blkbuf[BP_SPRINTF_LEN]; - snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); - (void) printf("[%s] %s\n", - "deferred free", blkbuf); - } - zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); - return (0); -} - -static int -dump_block_stats(spa_t *spa) -{ - zdb_cb_t zcb; - zdb_blkstats_t *zb, *tzb; - uint64_t norm_alloc, norm_space, total_alloc, total_found; - int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; - boolean_t leaks = B_FALSE; - int err; - - bzero(&zcb, sizeof (zcb)); - (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", - (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", - (dump_opt['c'] == 1) ? "metadata " : "", - dump_opt['c'] ? "checksums " : "", - (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", - !dump_opt['L'] ? "nothing leaked " : ""); - - /* - * When leak detection is enabled we load all space maps as SM_ALLOC - * maps, then traverse the pool claiming each block we discover. If - * the pool is perfectly consistent, the segment trees will be empty - * when we're done. Anything left over is a leak; any block we can't - * claim (because it's not part of any space map) is a double - * allocation, reference to a freed block, or an unclaimed log block. - * - * When leak detection is disabled (-L option) we still traverse the - * pool claiming each block we discover, but we skip opening any space - * maps. - */ - bzero(&zcb, sizeof (zdb_cb_t)); - zdb_leak_init(spa, &zcb); - - /* - * If there's a deferred-free bplist, process that first. - */ - (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, - count_block_cb, &zcb, NULL); - - if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { - (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, - count_block_cb, &zcb, NULL); - } - - zdb_claim_removing(spa, &zcb); - - if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { - VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, - spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, - &zcb, NULL)); - } - - if (dump_opt['c'] > 1) - flags |= TRAVERSE_PREFETCH_DATA; - - zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); - zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa)); - zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); - zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); - err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); - - /* - * If we've traversed the data blocks then we need to wait for those - * I/Os to complete. We leverage "The Godfather" zio to wait on - * all async I/Os to complete. - */ - if (dump_opt['c']) { - for (int i = 0; i < max_ncpus; i++) { - (void) zio_wait(spa->spa_async_zio_root[i]); - spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); - } - } - - /* - * Done after zio_wait() since zcb_haderrors is modified in - * zdb_blkptr_done() - */ - zcb.zcb_haderrors |= err; - - if (zcb.zcb_haderrors) { - (void) printf("\nError counts:\n\n"); - (void) printf("\t%5s %s\n", "errno", "count"); - for (int e = 0; e < 256; e++) { - if (zcb.zcb_errors[e] != 0) { - (void) printf("\t%5d %llu\n", - e, (u_longlong_t)zcb.zcb_errors[e]); - } - } - } - - /* - * Report any leaked segments. - */ - leaks |= zdb_leak_fini(spa, &zcb); - - tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL]; - - norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); - norm_space = metaslab_class_get_space(spa_normal_class(spa)); - - total_alloc = norm_alloc + - metaslab_class_get_alloc(spa_log_class(spa)) + - metaslab_class_get_alloc(spa_special_class(spa)) + - metaslab_class_get_alloc(spa_dedup_class(spa)); - total_found = tzb->zb_asize - zcb.zcb_dedup_asize + - zcb.zcb_removing_size + zcb.zcb_checkpoint_size; - - if (total_found == total_alloc && !dump_opt['L']) { - (void) printf("\n\tNo leaks (block sum matches space" - " maps exactly)\n"); - } else if (!dump_opt['L']) { - (void) printf("block traversal size %llu != alloc %llu " - "(%s %lld)\n", - (u_longlong_t)total_found, - (u_longlong_t)total_alloc, - (dump_opt['L']) ? "unreachable" : "leaked", - (longlong_t)(total_alloc - total_found)); - leaks = B_TRUE; - } - - if (tzb->zb_count == 0) - return (2); - - (void) printf("\n"); - (void) printf("\t%-16s %14llu\n", "bp count:", - (u_longlong_t)tzb->zb_count); - (void) printf("\t%-16s %14llu\n", "ganged count:", - (longlong_t)tzb->zb_gangs); - (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:", - (u_longlong_t)tzb->zb_lsize, - (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); - (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", - "bp physical:", (u_longlong_t)tzb->zb_psize, - (u_longlong_t)(tzb->zb_psize / tzb->zb_count), - (double)tzb->zb_lsize / tzb->zb_psize); - (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", - "bp allocated:", (u_longlong_t)tzb->zb_asize, - (u_longlong_t)(tzb->zb_asize / tzb->zb_count), - (double)tzb->zb_lsize / tzb->zb_asize); - (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n", - "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize, - (u_longlong_t)zcb.zcb_dedup_blocks, - (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0); - (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", - (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); - - if (spa_special_class(spa)->mc_rotor != NULL) { - uint64_t alloc = metaslab_class_get_alloc( - spa_special_class(spa)); - uint64_t space = metaslab_class_get_space( - spa_special_class(spa)); - - (void) printf("\t%-16s %14llu used: %5.2f%%\n", - "Special class", (u_longlong_t)alloc, - 100.0 * alloc / space); - } - - if (spa_dedup_class(spa)->mc_rotor != NULL) { - uint64_t alloc = metaslab_class_get_alloc( - spa_dedup_class(spa)); - uint64_t space = metaslab_class_get_space( - spa_dedup_class(spa)); - - (void) printf("\t%-16s %14llu used: %5.2f%%\n", - "Dedup class", (u_longlong_t)alloc, - 100.0 * alloc / space); - } - - for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { - if (zcb.zcb_embedded_blocks[i] == 0) - continue; - (void) printf("\n"); - (void) printf("\tadditional, non-pointer bps of type %u: " - "%10llu\n", - i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); - - if (dump_opt['b'] >= 3) { - (void) printf("\t number of (compressed) bytes: " - "number of bps\n"); - dump_histogram(zcb.zcb_embedded_histogram[i], - sizeof (zcb.zcb_embedded_histogram[i]) / - sizeof (zcb.zcb_embedded_histogram[i][0]), 0); - } - } - - if (tzb->zb_ditto_samevdev != 0) { - (void) printf("\tDittoed blocks on same vdev: %llu\n", - (longlong_t)tzb->zb_ditto_samevdev); - } - if (tzb->zb_ditto_same_ms != 0) { - (void) printf("\tDittoed blocks in same metaslab: %llu\n", - (longlong_t)tzb->zb_ditto_same_ms); - } - - for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) { - vdev_t *vd = spa->spa_root_vdev->vdev_child[v]; - vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; - - if (vim == NULL) { - continue; - } - - char mem[32]; - zdb_nicenum(vdev_indirect_mapping_num_entries(vim), - mem, vdev_indirect_mapping_size(vim)); - - (void) printf("\tindirect vdev id %llu has %llu segments " - "(%s in memory)\n", - (longlong_t)vd->vdev_id, - (longlong_t)vdev_indirect_mapping_num_entries(vim), mem); - } - - if (dump_opt['b'] >= 2) { - int l, t, level; - (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" - "\t avg\t comp\t%%Total\tType\n"); - - for (t = 0; t <= ZDB_OT_TOTAL; t++) { - char csize[32], lsize[32], psize[32], asize[32]; - char avg[32], gang[32]; - const char *typename; - - /* make sure nicenum has enough space */ - CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ); - CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ); - - if (t < DMU_OT_NUMTYPES) - typename = dmu_ot[t].ot_name; - else - typename = zdb_ot_extname[t - DMU_OT_NUMTYPES]; - - if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { - (void) printf("%6s\t%5s\t%5s\t%5s" - "\t%5s\t%5s\t%6s\t%s\n", - "-", - "-", - "-", - "-", - "-", - "-", - "-", - typename); - continue; - } - - for (l = ZB_TOTAL - 1; l >= -1; l--) { - level = (l == -1 ? ZB_TOTAL : l); - zb = &zcb.zcb_type[level][t]; - - if (zb->zb_asize == 0) - continue; - - if (dump_opt['b'] < 3 && level != ZB_TOTAL) - continue; - - if (level == 0 && zb->zb_asize == - zcb.zcb_type[ZB_TOTAL][t].zb_asize) - continue; - - zdb_nicenum(zb->zb_count, csize, - sizeof (csize)); - zdb_nicenum(zb->zb_lsize, lsize, - sizeof (lsize)); - zdb_nicenum(zb->zb_psize, psize, - sizeof (psize)); - zdb_nicenum(zb->zb_asize, asize, - sizeof (asize)); - zdb_nicenum(zb->zb_asize / zb->zb_count, avg, - sizeof (avg)); - zdb_nicenum(zb->zb_gangs, gang, sizeof (gang)); - - (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" - "\t%5.2f\t%6.2f\t", - csize, lsize, psize, asize, avg, - (double)zb->zb_lsize / zb->zb_psize, - 100.0 * zb->zb_asize / tzb->zb_asize); - - if (level == ZB_TOTAL) - (void) printf("%s\n", typename); - else - (void) printf(" L%d %s\n", - level, typename); - - if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { - (void) printf("\t number of ganged " - "blocks: %s\n", gang); - } - - if (dump_opt['b'] >= 4) { - (void) printf("psize " - "(in 512-byte sectors): " - "number of blocks\n"); - dump_histogram(zb->zb_psize_histogram, - PSIZE_HISTO_SIZE, 0); - } - } - } - } - - (void) printf("\n"); - - if (leaks) - return (2); - - if (zcb.zcb_haderrors) - return (3); - - return (0); -} - -typedef struct zdb_ddt_entry { - ddt_key_t zdde_key; - uint64_t zdde_ref_blocks; - uint64_t zdde_ref_lsize; - uint64_t zdde_ref_psize; - uint64_t zdde_ref_dsize; - avl_node_t zdde_node; -} zdb_ddt_entry_t; - -/* ARGSUSED */ -static int -zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) -{ - avl_tree_t *t = arg; - avl_index_t where; - zdb_ddt_entry_t *zdde, zdde_search; - - if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) - return (0); - - if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { - (void) printf("traversing objset %llu, %llu objects, " - "%lu blocks so far\n", - (u_longlong_t)zb->zb_objset, - (u_longlong_t)BP_GET_FILL(bp), - avl_numnodes(t)); - } - - if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || - BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) - return (0); - - ddt_key_fill(&zdde_search.zdde_key, bp); - - zdde = avl_find(t, &zdde_search, &where); - - if (zdde == NULL) { - zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL); - zdde->zdde_key = zdde_search.zdde_key; - avl_insert(t, zdde, where); - } - - zdde->zdde_ref_blocks += 1; - zdde->zdde_ref_lsize += BP_GET_LSIZE(bp); - zdde->zdde_ref_psize += BP_GET_PSIZE(bp); - zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp); - - return (0); -} - -static void -dump_simulated_ddt(spa_t *spa) -{ - avl_tree_t t; - void *cookie = NULL; - zdb_ddt_entry_t *zdde; - ddt_histogram_t ddh_total; - ddt_stat_t dds_total; - - bzero(&ddh_total, sizeof (ddh_total)); - bzero(&dds_total, sizeof (dds_total)); - avl_create(&t, ddt_entry_compare, - sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); - - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - - (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - zdb_ddt_add_cb, &t); - - spa_config_exit(spa, SCL_CONFIG, FTAG); - - while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { - ddt_stat_t dds; - uint64_t refcnt = zdde->zdde_ref_blocks; - ASSERT(refcnt != 0); - - dds.dds_blocks = zdde->zdde_ref_blocks / refcnt; - dds.dds_lsize = zdde->zdde_ref_lsize / refcnt; - dds.dds_psize = zdde->zdde_ref_psize / refcnt; - dds.dds_dsize = zdde->zdde_ref_dsize / refcnt; - - dds.dds_ref_blocks = zdde->zdde_ref_blocks; - dds.dds_ref_lsize = zdde->zdde_ref_lsize; - dds.dds_ref_psize = zdde->zdde_ref_psize; - dds.dds_ref_dsize = zdde->zdde_ref_dsize; - - ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], - &dds, 0); - - umem_free(zdde, sizeof (*zdde)); - } - - avl_destroy(&t); - - ddt_histogram_stat(&dds_total, &ddh_total); - - (void) printf("Simulated DDT histogram:\n"); - - zpool_dump_ddt(&dds_total, &ddh_total); - - dump_dedup_ratio(&dds_total); -} - -static int -verify_device_removal_feature_counts(spa_t *spa) -{ - uint64_t dr_feature_refcount = 0; - uint64_t oc_feature_refcount = 0; - uint64_t indirect_vdev_count = 0; - uint64_t precise_vdev_count = 0; - uint64_t obsolete_counts_object_count = 0; - uint64_t obsolete_sm_count = 0; - uint64_t obsolete_counts_count = 0; - uint64_t scip_count = 0; - uint64_t obsolete_bpobj_count = 0; - int ret = 0; - - spa_condensing_indirect_phys_t *scip = - &spa->spa_condensing_indirect_phys; - if (scip->scip_next_mapping_object != 0) { - vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev]; - ASSERT(scip->scip_prev_obsolete_sm_object != 0); - ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); - - (void) printf("Condensing indirect vdev %llu: new mapping " - "object %llu, prev obsolete sm %llu\n", - (u_longlong_t)scip->scip_vdev, - (u_longlong_t)scip->scip_next_mapping_object, - (u_longlong_t)scip->scip_prev_obsolete_sm_object); - if (scip->scip_prev_obsolete_sm_object != 0) { - space_map_t *prev_obsolete_sm = NULL; - VERIFY0(space_map_open(&prev_obsolete_sm, - spa->spa_meta_objset, - scip->scip_prev_obsolete_sm_object, - 0, vd->vdev_asize, 0)); - dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm); - (void) printf("\n"); - space_map_close(prev_obsolete_sm); - } - - scip_count += 2; - } - - for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { - vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; - vdev_indirect_config_t *vic = &vd->vdev_indirect_config; - - if (vic->vic_mapping_object != 0) { - ASSERT(vd->vdev_ops == &vdev_indirect_ops || - vd->vdev_removing); - indirect_vdev_count++; - - if (vd->vdev_indirect_mapping->vim_havecounts) { - obsolete_counts_count++; - } - } - if (vdev_obsolete_counts_are_precise(vd)) { - ASSERT(vic->vic_mapping_object != 0); - precise_vdev_count++; - } - if (vdev_obsolete_sm_object(vd) != 0) { - ASSERT(vic->vic_mapping_object != 0); - obsolete_sm_count++; - } - } - - (void) feature_get_refcount(spa, - &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL], - &dr_feature_refcount); - (void) feature_get_refcount(spa, - &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS], - &oc_feature_refcount); - - if (dr_feature_refcount != indirect_vdev_count) { - ret = 1; - (void) printf("Number of indirect vdevs (%llu) " \ - "does not match feature count (%llu)\n", - (u_longlong_t)indirect_vdev_count, - (u_longlong_t)dr_feature_refcount); - } else { - (void) printf("Verified device_removal feature refcount " \ - "of %llu is correct\n", - (u_longlong_t)dr_feature_refcount); - } - - if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_OBSOLETE_BPOBJ) == 0) { - obsolete_bpobj_count++; - } - - - obsolete_counts_object_count = precise_vdev_count; - obsolete_counts_object_count += obsolete_sm_count; - obsolete_counts_object_count += obsolete_counts_count; - obsolete_counts_object_count += scip_count; - obsolete_counts_object_count += obsolete_bpobj_count; - obsolete_counts_object_count += remap_deadlist_count; - - if (oc_feature_refcount != obsolete_counts_object_count) { - ret = 1; - (void) printf("Number of obsolete counts objects (%llu) " \ - "does not match feature count (%llu)\n", - (u_longlong_t)obsolete_counts_object_count, - (u_longlong_t)oc_feature_refcount); - (void) printf("pv:%llu os:%llu oc:%llu sc:%llu " - "ob:%llu rd:%llu\n", - (u_longlong_t)precise_vdev_count, - (u_longlong_t)obsolete_sm_count, - (u_longlong_t)obsolete_counts_count, - (u_longlong_t)scip_count, - (u_longlong_t)obsolete_bpobj_count, - (u_longlong_t)remap_deadlist_count); - } else { - (void) printf("Verified indirect_refcount feature refcount " \ - "of %llu is correct\n", - (u_longlong_t)oc_feature_refcount); - } - return (ret); -} - -static void -zdb_set_skip_mmp(char *target) -{ - spa_t *spa; - - /* - * Disable the activity check to allow examination of - * active pools. - */ - mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(target)) != NULL) { - spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; - } - mutex_exit(&spa_namespace_lock); -} - -#define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" -/* - * Import the checkpointed state of the pool specified by the target - * parameter as readonly. The function also accepts a pool config - * as an optional parameter, else it attempts to infer the config by - * the name of the target pool. - * - * Note that the checkpointed state's pool name will be the name of - * the original pool with the above suffix appened to it. In addition, - * if the target is not a pool name (e.g. a path to a dataset) then - * the new_path parameter is populated with the updated path to - * reflect the fact that we are looking into the checkpointed state. - * - * The function returns a newly-allocated copy of the name of the - * pool containing the checkpointed state. When this copy is no - * longer needed it should be freed with free(3C). Same thing - * applies to the new_path parameter if allocated. - */ -static char * -import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) -{ - int error = 0; - char *poolname, *bogus_name; - - /* If the target is not a pool, the extract the pool name */ - char *path_start = strchr(target, '/'); - if (path_start != NULL) { - size_t poolname_len = path_start - target; - poolname = strndup(target, poolname_len); - } else { - poolname = target; - } - - if (cfg == NULL) { - zdb_set_skip_mmp(poolname); - error = spa_get_stats(poolname, &cfg, NULL, 0); - if (error != 0) { - fatal("Tried to read config of pool \"%s\" but " - "spa_get_stats() failed with error %d\n", - poolname, error); - } - } - - (void) asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX); - fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); - - error = spa_import(bogus_name, cfg, NULL, - ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | - ZFS_IMPORT_SKIP_MMP); - if (error != 0) { - fatal("Tried to import pool \"%s\" but spa_import() failed " - "with error %d\n", bogus_name, error); - } - - if (new_path != NULL && path_start != NULL) - (void) asprintf(new_path, "%s%s", bogus_name, path_start); - - if (target != poolname) - free(poolname); - - return (bogus_name); -} - -typedef struct verify_checkpoint_sm_entry_cb_arg { - vdev_t *vcsec_vd; - - /* the following fields are only used for printing progress */ - uint64_t vcsec_entryid; - uint64_t vcsec_num_entries; -} verify_checkpoint_sm_entry_cb_arg_t; - -#define ENTRIES_PER_PROGRESS_UPDATE 10000 - -static int -verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) -{ - verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg; - vdev_t *vd = vcsec->vcsec_vd; - metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; - uint64_t end = sme->sme_offset + sme->sme_run; - - ASSERT(sme->sme_type == SM_FREE); - - if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) { - (void) fprintf(stderr, - "\rverifying vdev %llu, space map entry %llu of %llu ...", - (longlong_t)vd->vdev_id, - (longlong_t)vcsec->vcsec_entryid, - (longlong_t)vcsec->vcsec_num_entries); - } - vcsec->vcsec_entryid++; - - /* - * See comment in checkpoint_sm_exclude_entry_cb() - */ - VERIFY3U(sme->sme_offset, >=, ms->ms_start); - VERIFY3U(end, <=, ms->ms_start + ms->ms_size); - - /* - * The entries in the vdev_checkpoint_sm should be marked as - * allocated in the checkpointed state of the pool, therefore - * their respective ms_allocateable trees should not contain them. - */ - mutex_enter(&ms->ms_lock); - range_tree_verify_not_present(ms->ms_allocatable, - sme->sme_offset, sme->sme_run); - mutex_exit(&ms->ms_lock); - - return (0); -} - -/* - * Verify that all segments in the vdev_checkpoint_sm are allocated - * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's - * ms_allocatable). - * - * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of - * each vdev in the current state of the pool to the metaslab space maps - * (ms_sm) of the checkpointed state of the pool. - * - * Note that the function changes the state of the ms_allocatable - * trees of the current spa_t. The entries of these ms_allocatable - * trees are cleared out and then repopulated from with the free - * entries of their respective ms_sm space maps. - */ -static void -verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) -{ - vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; - vdev_t *current_rvd = current->spa_root_vdev; - - load_concrete_ms_allocatable_trees(checkpoint, SM_FREE); - - for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) { - vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c]; - vdev_t *current_vd = current_rvd->vdev_child[c]; - - space_map_t *checkpoint_sm = NULL; - uint64_t checkpoint_sm_obj; - - if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { - /* - * Since we don't allow device removal in a pool - * that has a checkpoint, we expect that all removed - * vdevs were removed from the pool before the - * checkpoint. - */ - ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); - continue; - } - - /* - * If the checkpoint space map doesn't exist, then nothing - * here is checkpointed so there's nothing to verify. - */ - if (current_vd->vdev_top_zap == 0 || - zap_contains(spa_meta_objset(current), - current_vd->vdev_top_zap, - VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) - continue; - - VERIFY0(zap_lookup(spa_meta_objset(current), - current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, - sizeof (uint64_t), 1, &checkpoint_sm_obj)); - - VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current), - checkpoint_sm_obj, 0, current_vd->vdev_asize, - current_vd->vdev_ashift)); - - verify_checkpoint_sm_entry_cb_arg_t vcsec; - vcsec.vcsec_vd = ckpoint_vd; - vcsec.vcsec_entryid = 0; - vcsec.vcsec_num_entries = - space_map_length(checkpoint_sm) / sizeof (uint64_t); - VERIFY0(space_map_iterate(checkpoint_sm, - space_map_length(checkpoint_sm), - verify_checkpoint_sm_entry_cb, &vcsec)); - dump_spacemap(current->spa_meta_objset, checkpoint_sm); - space_map_close(checkpoint_sm); - } - - /* - * If we've added vdevs since we took the checkpoint, ensure - * that their checkpoint space maps are empty. - */ - if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) { - for (uint64_t c = ckpoint_rvd->vdev_children; - c < current_rvd->vdev_children; c++) { - vdev_t *current_vd = current_rvd->vdev_child[c]; - ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL); - } - } - - /* for cleaner progress output */ - (void) fprintf(stderr, "\n"); -} - -/* - * Verifies that all space that's allocated in the checkpoint is - * still allocated in the current version, by checking that everything - * in checkpoint's ms_allocatable (which is actually allocated, not - * allocatable/free) is not present in current's ms_allocatable. - * - * Note that the function changes the state of the ms_allocatable - * trees of both spas when called. The entries of all ms_allocatable - * trees are cleared out and then repopulated from their respective - * ms_sm space maps. In the checkpointed state we load the allocated - * entries, and in the current state we load the free entries. - */ -static void -verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current) -{ - vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; - vdev_t *current_rvd = current->spa_root_vdev; - - load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC); - load_concrete_ms_allocatable_trees(current, SM_FREE); - - for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) { - vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i]; - vdev_t *current_vd = current_rvd->vdev_child[i]; - - if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { - /* - * See comment in verify_checkpoint_vdev_spacemaps() - */ - ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); - continue; - } - - for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) { - metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m]; - metaslab_t *current_msp = current_vd->vdev_ms[m]; - - (void) fprintf(stderr, - "\rverifying vdev %llu of %llu, " - "metaslab %llu of %llu ...", - (longlong_t)current_vd->vdev_id, - (longlong_t)current_rvd->vdev_children, - (longlong_t)current_vd->vdev_ms[m]->ms_id, - (longlong_t)current_vd->vdev_ms_count); - - /* - * We walk through the ms_allocatable trees that - * are loaded with the allocated blocks from the - * ms_sm spacemaps of the checkpoint. For each - * one of these ranges we ensure that none of them - * exists in the ms_allocatable trees of the - * current state which are loaded with the ranges - * that are currently free. - * - * This way we ensure that none of the blocks that - * are part of the checkpoint were freed by mistake. - */ - range_tree_walk(ckpoint_msp->ms_allocatable, - (range_tree_func_t *)range_tree_verify_not_present, - current_msp->ms_allocatable); - } - } - - /* for cleaner progress output */ - (void) fprintf(stderr, "\n"); -} - -static void -verify_checkpoint_blocks(spa_t *spa) -{ - ASSERT(!dump_opt['L']); - - spa_t *checkpoint_spa; - char *checkpoint_pool; - nvlist_t *config = NULL; - int error = 0; - - /* - * We import the checkpointed state of the pool (under a different - * name) so we can do verification on it against the current state - * of the pool. - */ - checkpoint_pool = import_checkpointed_state(spa->spa_name, config, - NULL); - ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0); - - error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG); - if (error != 0) { - fatal("Tried to open pool \"%s\" but spa_open() failed with " - "error %d\n", checkpoint_pool, error); - } - - /* - * Ensure that ranges in the checkpoint space maps of each vdev - * are allocated according to the checkpointed state's metaslab - * space maps. - */ - verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa); - - /* - * Ensure that allocated ranges in the checkpoint's metaslab - * space maps remain allocated in the metaslab space maps of - * the current state. - */ - verify_checkpoint_ms_spacemaps(checkpoint_spa, spa); - - /* - * Once we are done, we get rid of the checkpointed state. - */ - spa_close(checkpoint_spa, FTAG); - free(checkpoint_pool); -} - -static void -dump_leftover_checkpoint_blocks(spa_t *spa) -{ - vdev_t *rvd = spa->spa_root_vdev; - - for (uint64_t i = 0; i < rvd->vdev_children; i++) { - vdev_t *vd = rvd->vdev_child[i]; - - space_map_t *checkpoint_sm = NULL; - uint64_t checkpoint_sm_obj; - - if (vd->vdev_top_zap == 0) - continue; - - if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, - VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) - continue; - - VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, - VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, - sizeof (uint64_t), 1, &checkpoint_sm_obj)); - - VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), - checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); - dump_spacemap(spa->spa_meta_objset, checkpoint_sm); - space_map_close(checkpoint_sm); - } -} - -static int -verify_checkpoint(spa_t *spa) -{ - uberblock_t checkpoint; - int error; - - if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) - return (0); - - error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), - sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); - - if (error == ENOENT && !dump_opt['L']) { - /* - * If the feature is active but the uberblock is missing - * then we must be in the middle of discarding the - * checkpoint. - */ - (void) printf("\nPartially discarded checkpoint " - "state found:\n"); - dump_leftover_checkpoint_blocks(spa); - return (0); - } else if (error != 0) { - (void) printf("lookup error %d when looking for " - "checkpointed uberblock in MOS\n", error); - return (error); - } - dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n"); - - if (checkpoint.ub_checkpoint_txg == 0) { - (void) printf("\nub_checkpoint_txg not set in checkpointed " - "uberblock\n"); - error = 3; - } - - if (error == 0 && !dump_opt['L']) - verify_checkpoint_blocks(spa); - - return (error); -} - -/* ARGSUSED */ -static void -mos_leaks_cb(void *arg, uint64_t start, uint64_t size) -{ - for (uint64_t i = start; i < size; i++) { - (void) printf("MOS object %llu referenced but not allocated\n", - (u_longlong_t)i); - } -} - -static range_tree_t *mos_refd_objs; - -static void -mos_obj_refd(uint64_t obj) -{ - if (obj != 0 && mos_refd_objs != NULL) - range_tree_add(mos_refd_objs, obj, 1); -} - -static void -mos_leak_vdev(vdev_t *vd) -{ - mos_obj_refd(vd->vdev_dtl_object); - mos_obj_refd(vd->vdev_ms_array); - mos_obj_refd(vd->vdev_top_zap); - mos_obj_refd(vd->vdev_indirect_config.vic_births_object); - mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object); - mos_obj_refd(vd->vdev_leaf_zap); - if (vd->vdev_checkpoint_sm != NULL) - mos_obj_refd(vd->vdev_checkpoint_sm->sm_object); - if (vd->vdev_indirect_mapping != NULL) { - mos_obj_refd(vd->vdev_indirect_mapping-> - vim_phys->vimp_counts_object); - } - if (vd->vdev_obsolete_sm != NULL) - mos_obj_refd(vd->vdev_obsolete_sm->sm_object); - - for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { - metaslab_t *ms = vd->vdev_ms[m]; - mos_obj_refd(space_map_object(ms->ms_sm)); - } - - for (uint64_t c = 0; c < vd->vdev_children; c++) { - mos_leak_vdev(vd->vdev_child[c]); - } -} - -static int -dump_mos_leaks(spa_t *spa) -{ - int rv = 0; - objset_t *mos = spa->spa_meta_objset; - dsl_pool_t *dp = spa->spa_dsl_pool; - - /* Visit and mark all referenced objects in the MOS */ - - mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT); - mos_obj_refd(spa->spa_pool_props_object); - mos_obj_refd(spa->spa_config_object); - mos_obj_refd(spa->spa_ddt_stat_object); - mos_obj_refd(spa->spa_feat_desc_obj); - mos_obj_refd(spa->spa_feat_enabled_txg_obj); - mos_obj_refd(spa->spa_feat_for_read_obj); - mos_obj_refd(spa->spa_feat_for_write_obj); - mos_obj_refd(spa->spa_history); - mos_obj_refd(spa->spa_errlog_last); - mos_obj_refd(spa->spa_errlog_scrub); - mos_obj_refd(spa->spa_all_vdev_zaps); - mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); - mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); - mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj); - bpobj_count_refd(&spa->spa_deferred_bpobj); - mos_obj_refd(dp->dp_empty_bpobj); - bpobj_count_refd(&dp->dp_obsolete_bpobj); - bpobj_count_refd(&dp->dp_free_bpobj); - mos_obj_refd(spa->spa_l2cache.sav_object); - mos_obj_refd(spa->spa_spares.sav_object); - - mos_obj_refd(spa->spa_condensing_indirect_phys. - scip_next_mapping_object); - mos_obj_refd(spa->spa_condensing_indirect_phys. - scip_prev_obsolete_sm_object); - if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { - vdev_indirect_mapping_t *vim = - vdev_indirect_mapping_open(mos, - spa->spa_condensing_indirect_phys.scip_next_mapping_object); - mos_obj_refd(vim->vim_phys->vimp_counts_object); - vdev_indirect_mapping_close(vim); - } - - if (dp->dp_origin_snap != NULL) { - dsl_dataset_t *ds; - - dsl_pool_config_enter(dp, FTAG); - VERIFY0(dsl_dataset_hold_obj(dp, - dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj, - FTAG, &ds)); - count_ds_mos_objects(ds); - dump_deadlist(&ds->ds_deadlist); - dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); - - count_ds_mos_objects(dp->dp_origin_snap); - dump_deadlist(&dp->dp_origin_snap->ds_deadlist); - } - count_dir_mos_objects(dp->dp_mos_dir); - if (dp->dp_free_dir != NULL) - count_dir_mos_objects(dp->dp_free_dir); - if (dp->dp_leak_dir != NULL) - count_dir_mos_objects(dp->dp_leak_dir); - - mos_leak_vdev(spa->spa_root_vdev); - - for (uint64_t class = 0; class < DDT_CLASSES; class++) { - for (uint64_t type = 0; type < DDT_TYPES; type++) { - for (uint64_t cksum = 0; - cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) { - ddt_t *ddt = spa->spa_ddt[cksum]; - mos_obj_refd(ddt->ddt_object[type][class]); - } - } - } - - /* - * Visit all allocated objects and make sure they are referenced. - */ - uint64_t object = 0; - while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { - if (range_tree_contains(mos_refd_objs, object, 1)) { - range_tree_remove(mos_refd_objs, object, 1); - } else { - dmu_object_info_t doi; - const char *name; - dmu_object_info(mos, object, &doi); - if (doi.doi_type & DMU_OT_NEWTYPE) { - dmu_object_byteswap_t bswap = - DMU_OT_BYTESWAP(doi.doi_type); - name = dmu_ot_byteswap[bswap].ob_name; - } else { - name = dmu_ot[doi.doi_type].ot_name; - } - - (void) printf("MOS object %llu (%s) leaked\n", - (u_longlong_t)object, name); - rv = 2; - } - } - (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); - if (!range_tree_is_empty(mos_refd_objs)) - rv = 2; - range_tree_vacate(mos_refd_objs, NULL, NULL); - range_tree_destroy(mos_refd_objs); - return (rv); -} - -static void -dump_zpool(spa_t *spa) -{ - dsl_pool_t *dp = spa_get_dsl(spa); - int rc = 0; - - if (dump_opt['S']) { - dump_simulated_ddt(spa); - return; - } - - if (!dump_opt['e'] && dump_opt['C'] > 1) { - (void) printf("\nCached configuration:\n"); - dump_nvlist(spa->spa_config, 8); - } - - if (dump_opt['C']) - dump_config(spa); - - if (dump_opt['u']) - dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n"); - - if (dump_opt['D']) - dump_all_ddts(spa); - - if (dump_opt['d'] > 2 || dump_opt['m']) - dump_metaslabs(spa); - if (dump_opt['M']) - dump_metaslab_groups(spa); - - if (dump_opt['d'] || dump_opt['i']) { - mos_refd_objs = range_tree_create(NULL, NULL); - dump_dir(dp->dp_meta_objset); - - if (dump_opt['d'] >= 3) { - dsl_pool_t *dp = spa->spa_dsl_pool; - dump_full_bpobj(&spa->spa_deferred_bpobj, - "Deferred frees", 0); - if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { - dump_full_bpobj(&dp->dp_free_bpobj, - "Pool snapshot frees", 0); - } - if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { - ASSERT(spa_feature_is_enabled(spa, - SPA_FEATURE_DEVICE_REMOVAL)); - dump_full_bpobj(&dp->dp_obsolete_bpobj, - "Pool obsolete blocks", 0); - } - - if (spa_feature_is_active(spa, - SPA_FEATURE_ASYNC_DESTROY)) { - dump_bptree(spa->spa_meta_objset, - dp->dp_bptree_obj, - "Pool dataset frees"); - } - dump_dtl(spa->spa_root_vdev, 0); - } - (void) dmu_objset_find(spa_name(spa), dump_one_dir, - NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); - - if (rc == 0 && !dump_opt['L']) - rc = dump_mos_leaks(spa); - - for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { - uint64_t refcount; - - if (!(spa_feature_table[f].fi_flags & - ZFEATURE_FLAG_PER_DATASET)) { - ASSERT0(dataset_feature_count[f]); - continue; - } - (void) feature_get_refcount(spa, - &spa_feature_table[f], &refcount); - if (dataset_feature_count[f] != refcount) { - (void) printf("%s feature refcount mismatch: " - "%lld datasets != %lld refcount\n", - spa_feature_table[f].fi_uname, - (longlong_t)dataset_feature_count[f], - (longlong_t)refcount); - rc = 2; - } else { - (void) printf("Verified %s feature refcount " - "of %llu is correct\n", - spa_feature_table[f].fi_uname, - (longlong_t)refcount); - } - } - - if (rc == 0) { - rc = verify_device_removal_feature_counts(spa); - } - } - - if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) - rc = dump_block_stats(spa); - - if (rc == 0) - rc = verify_spacemap_refcounts(spa); - - if (dump_opt['s']) - show_pool_stats(spa); - - if (dump_opt['h']) - dump_history(spa); - - if (rc == 0) - rc = verify_checkpoint(spa); - - if (rc != 0) { - dump_debug_buffer(); - exit(rc); - } -} - -#define ZDB_FLAG_CHECKSUM 0x0001 -#define ZDB_FLAG_DECOMPRESS 0x0002 -#define ZDB_FLAG_BSWAP 0x0004 -#define ZDB_FLAG_GBH 0x0008 -#define ZDB_FLAG_INDIRECT 0x0010 -#define ZDB_FLAG_PHYS 0x0020 -#define ZDB_FLAG_RAW 0x0040 -#define ZDB_FLAG_PRINT_BLKPTR 0x0080 - -static int flagbits[256]; - -static void -zdb_print_blkptr(blkptr_t *bp, int flags) -{ - char blkbuf[BP_SPRINTF_LEN]; - - if (flags & ZDB_FLAG_BSWAP) - byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); - - snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); - (void) printf("%s\n", blkbuf); -} - -static void -zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) -{ - int i; - - for (i = 0; i < nbps; i++) - zdb_print_blkptr(&bp[i], flags); -} - -static void -zdb_dump_gbh(void *buf, int flags) -{ - zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags); -} - -static void -zdb_dump_block_raw(void *buf, uint64_t size, int flags) -{ - if (flags & ZDB_FLAG_BSWAP) - byteswap_uint64_array(buf, size); - (void) write(1, buf, size); -} - -static void -zdb_dump_block(char *label, void *buf, uint64_t size, int flags) -{ - uint64_t *d = (uint64_t *)buf; - unsigned nwords = size / sizeof (uint64_t); - int do_bswap = !!(flags & ZDB_FLAG_BSWAP); - unsigned i, j; - const char *hdr; - char *c; - - - if (do_bswap) - hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8"; - else - hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f"; - - (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); - - for (i = 0; i < nwords; i += 2) { - (void) printf("%06llx: %016llx %016llx ", - (u_longlong_t)(i * sizeof (uint64_t)), - (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]), - (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1])); - - c = (char *)&d[i]; - for (j = 0; j < 2 * sizeof (uint64_t); j++) - (void) printf("%c", isprint(c[j]) ? c[j] : '.'); - (void) printf("\n"); - } -} - -/* - * There are two acceptable formats: - * leaf_name - For example: c1t0d0 or /tmp/ztest.0a - * child[.child]* - For example: 0.1.1 - * - * The second form can be used to specify arbitrary vdevs anywhere - * in the heirarchy. For example, in a pool with a mirror of - * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 . - */ -static vdev_t * -zdb_vdev_lookup(vdev_t *vdev, const char *path) -{ - char *s, *p, *q; - unsigned i; - - if (vdev == NULL) - return (NULL); - - /* First, assume the x.x.x.x format */ - i = strtoul(path, &s, 10); - if (s == path || (s && *s != '.' && *s != '\0')) - goto name; - if (i >= vdev->vdev_children) - return (NULL); - - vdev = vdev->vdev_child[i]; - if (*s == '\0') - return (vdev); - return (zdb_vdev_lookup(vdev, s+1)); - -name: - for (i = 0; i < vdev->vdev_children; i++) { - vdev_t *vc = vdev->vdev_child[i]; - - if (vc->vdev_path == NULL) { - vc = zdb_vdev_lookup(vc, path); - if (vc == NULL) - continue; - else - return (vc); - } - - p = strrchr(vc->vdev_path, '/'); - p = p ? p + 1 : vc->vdev_path; - q = &vc->vdev_path[strlen(vc->vdev_path) - 2]; - - if (strcmp(vc->vdev_path, path) == 0) - return (vc); - if (strcmp(p, path) == 0) - return (vc); - if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0) - return (vc); - } - - return (NULL); -} - -/* ARGSUSED */ -static int -random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused) -{ - return (random_get_pseudo_bytes(buf, len)); -} - -/* - * Read a block from a pool and print it out. The syntax of the - * block descriptor is: - * - * pool:vdev_specifier:offset:size[:flags] - * - * pool - The name of the pool you wish to read from - * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) - * offset - offset, in hex, in bytes - * size - Amount of data to read, in hex, in bytes - * flags - A string of characters specifying options - * b: Decode a blkptr at given offset within block - * *c: Calculate and display checksums - * d: Decompress data before dumping - * e: Byteswap data before dumping - * g: Display data as a gang block header - * i: Display as an indirect block - * p: Do I/O to physical offset - * r: Dump raw data to stdout - * - * * = not yet implemented - */ -static void -zdb_read_block(char *thing, spa_t *spa) -{ - blkptr_t blk, *bp = &blk; - dva_t *dva = bp->blk_dva; - int flags = 0; - uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; - zio_t *zio; - vdev_t *vd; - abd_t *pabd; - void *lbuf, *buf; - const char *s, *vdev; - char *p, *dup, *flagstr; - int i, error; - - dup = strdup(thing); - s = strtok(dup, ":"); - vdev = s ? s : ""; - s = strtok(NULL, ":"); - offset = strtoull(s ? s : "", NULL, 16); - s = strtok(NULL, ":"); - size = strtoull(s ? s : "", NULL, 16); - s = strtok(NULL, ":"); - if (s) - flagstr = strdup(s); - else - flagstr = strdup(""); - - s = NULL; - if (size == 0) - s = "size must not be zero"; - if (!IS_P2ALIGNED(size, DEV_BSIZE)) - s = "size must be a multiple of sector size"; - if (!IS_P2ALIGNED(offset, DEV_BSIZE)) - s = "offset must be a multiple of sector size"; - if (s) { - (void) printf("Invalid block specifier: %s - %s\n", thing, s); - free(flagstr); - free(dup); - return; - } - - for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) { - for (i = 0; flagstr[i]; i++) { - int bit = flagbits[(uchar_t)flagstr[i]]; - - if (bit == 0) { - (void) printf("***Invalid flag: %c\n", - flagstr[i]); - continue; - } - flags |= bit; - - /* If it's not something with an argument, keep going */ - if ((bit & (ZDB_FLAG_CHECKSUM | - ZDB_FLAG_PRINT_BLKPTR)) == 0) - continue; - - p = &flagstr[i + 1]; - if (bit == ZDB_FLAG_PRINT_BLKPTR) - blkptr_offset = strtoull(p, &p, 16); - if (*p != ':' && *p != '\0') { - (void) printf("***Invalid flag arg: '%s'\n", s); - free(flagstr); - free(dup); - return; - } - i += p - &flagstr[i + 1]; /* skip over the number */ - } - } - free(flagstr); - - vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev); - if (vd == NULL) { - (void) printf("***Invalid vdev: %s\n", vdev); - free(dup); - return; - } else { - if (vd->vdev_path) - (void) fprintf(stderr, "Found vdev: %s\n", - vd->vdev_path); - else - (void) fprintf(stderr, "Found vdev type: %s\n", - vd->vdev_ops->vdev_op_type); - } - - psize = size; - lsize = size; - - pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE); - lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); - - BP_ZERO(bp); - - DVA_SET_VDEV(&dva[0], vd->vdev_id); - DVA_SET_OFFSET(&dva[0], offset); - DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH)); - DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); - - BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); - - BP_SET_LSIZE(bp, lsize); - BP_SET_PSIZE(bp, psize); - BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); - BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); - BP_SET_TYPE(bp, DMU_OT_NONE); - BP_SET_LEVEL(bp, 0); - BP_SET_DEDUP(bp, 0); - BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); - - spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - zio = zio_root(spa, NULL, NULL, 0); - - if (vd == vd->vdev_top) { - /* - * Treat this as a normal block read. - */ - zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, - ZIO_PRIORITY_SYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); - } else { - /* - * Treat this as a vdev child I/O. - */ - zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, - psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, - ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE | - ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | - ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL, - NULL, NULL)); - } - - error = zio_wait(zio); - spa_config_exit(spa, SCL_STATE, FTAG); - - if (error) { - (void) printf("Read of %s failed, error: %d\n", thing, error); - goto out; - } - - if (flags & ZDB_FLAG_DECOMPRESS) { - /* - * We don't know how the data was compressed, so just try - * every decompress function at every inflated blocksize. - */ - enum zio_compress c; - void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); - void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); - - abd_copy_to_buf(pbuf2, pabd, psize); - - VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize, - random_get_pseudo_bytes_cb, NULL)); - - VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, - SPA_MAXBLOCKSIZE - psize)); - - for (lsize = SPA_MAXBLOCKSIZE; lsize > psize; - lsize -= SPA_MINBLOCKSIZE) { - for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { - if (zio_decompress_data(c, pabd, - lbuf, psize, lsize) == 0 && - zio_decompress_data_buf(c, pbuf2, - lbuf2, psize, lsize) == 0 && - bcmp(lbuf, lbuf2, lsize) == 0) - break; - } - if (c != ZIO_COMPRESS_FUNCTIONS) - break; - lsize -= SPA_MINBLOCKSIZE; - } - - umem_free(pbuf2, SPA_MAXBLOCKSIZE); - umem_free(lbuf2, SPA_MAXBLOCKSIZE); - - if (lsize <= psize) { - (void) printf("Decompress of %s failed\n", thing); - goto out; - } - buf = lbuf; - size = lsize; - } else { - buf = abd_to_buf(pabd); - size = psize; - } - - if (flags & ZDB_FLAG_PRINT_BLKPTR) - zdb_print_blkptr((blkptr_t *)(void *) - ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); - else if (flags & ZDB_FLAG_RAW) - zdb_dump_block_raw(buf, size, flags); - else if (flags & ZDB_FLAG_INDIRECT) - zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t), - flags); - else if (flags & ZDB_FLAG_GBH) - zdb_dump_gbh(buf, flags); - else - zdb_dump_block(thing, buf, size, flags); - -out: - abd_free(pabd); - umem_free(lbuf, SPA_MAXBLOCKSIZE); - free(dup); -} - -static void -zdb_embedded_block(char *thing) -{ - blkptr_t bp; - unsigned long long *words = (void *)&bp; - char *buf; - int err; - - bzero(&bp, sizeof (bp)); - err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:" - "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx", - words + 0, words + 1, words + 2, words + 3, - words + 4, words + 5, words + 6, words + 7, - words + 8, words + 9, words + 10, words + 11, - words + 12, words + 13, words + 14, words + 15); - if (err != 16) { - (void) fprintf(stderr, "invalid input format\n"); - exit(1); - } - ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); - buf = malloc(SPA_MAXBLOCKSIZE); - if (buf == NULL) { - (void) fprintf(stderr, "out of memory\n"); - exit(1); - } - err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); - if (err != 0) { - (void) fprintf(stderr, "decode failed: %u\n", err); - free(buf); - exit(1); - } - zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); - free(buf); -} - -int -main(int argc, char **argv) -{ - int c; - struct rlimit rl = { 1024, 1024 }; - spa_t *spa = NULL; - objset_t *os = NULL; - int dump_all = 1; - int verbose = 0; - int error = 0; - char **searchdirs = NULL; - int nsearch = 0; - char *target, *target_pool; - nvlist_t *policy = NULL; - uint64_t max_txg = UINT64_MAX; - int flags = ZFS_IMPORT_MISSING_LOG; - int rewind = ZPOOL_NEVER_REWIND; - char *spa_config_path_env; - boolean_t target_is_spa = B_TRUE; - nvlist_t *cfg = NULL; - - (void) setrlimit(RLIMIT_NOFILE, &rl); - (void) enable_extended_FILE_stdio(-1, -1); - - dprintf_setup(&argc, argv); - - /* - * If there is an environment variable SPA_CONFIG_PATH it overrides - * default spa_config_path setting. If -U flag is specified it will - * override this environment variable settings once again. - */ - spa_config_path_env = getenv("SPA_CONFIG_PATH"); - if (spa_config_path_env != NULL) - spa_config_path = spa_config_path_env; - - while ((c = getopt(argc, argv, - "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:X")) != -1) { - switch (c) { - case 'b': - case 'c': - case 'C': - case 'd': - case 'D': - case 'E': - case 'G': - case 'h': - case 'i': - case 'l': - case 'm': - case 'M': - case 'O': - case 'R': - case 's': - case 'S': - case 'u': - dump_opt[c]++; - dump_all = 0; - break; - case 'A': - case 'e': - case 'F': - case 'k': - case 'L': - case 'P': - case 'q': - case 'X': - dump_opt[c]++; - break; - /* NB: Sort single match options below. */ - case 'I': - max_inflight = strtoull(optarg, NULL, 0); - if (max_inflight == 0) { - (void) fprintf(stderr, "maximum number " - "of inflight I/Os must be greater " - "than 0\n"); - usage(); - } - break; - case 'o': - error = set_global_var(optarg); - if (error != 0) - usage(); - break; - case 'p': - if (searchdirs == NULL) { - searchdirs = umem_alloc(sizeof (char *), - UMEM_NOFAIL); - } else { - char **tmp = umem_alloc((nsearch + 1) * - sizeof (char *), UMEM_NOFAIL); - bcopy(searchdirs, tmp, nsearch * - sizeof (char *)); - umem_free(searchdirs, - nsearch * sizeof (char *)); - searchdirs = tmp; - } - searchdirs[nsearch++] = optarg; - break; - case 't': - max_txg = strtoull(optarg, NULL, 0); - if (max_txg < TXG_INITIAL) { - (void) fprintf(stderr, "incorrect txg " - "specified: %s\n", optarg); - usage(); - } - break; - case 'U': - spa_config_path = optarg; - if (spa_config_path[0] != '/') { - (void) fprintf(stderr, - "cachefile must be an absolute path " - "(i.e. start with a slash)\n"); - usage(); - } - break; - case 'v': - verbose++; - break; - case 'V': - flags = ZFS_IMPORT_VERBATIM; - break; - case 'x': - vn_dumpdir = optarg; - break; - default: - usage(); - break; - } - } - - if (!dump_opt['e'] && searchdirs != NULL) { - (void) fprintf(stderr, "-p option requires use of -e\n"); - usage(); - } - - /* - * ZDB does not typically re-read blocks; therefore limit the ARC - * to 256 MB, which can be used entirely for metadata. - */ - zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024; - - /* - * "zdb -c" uses checksum-verifying scrub i/os which are async reads. - * "zdb -b" uses traversal prefetch which uses async reads. - * For good performance, let several of them be active at once. - */ - zfs_vdev_async_read_max_active = 10; - - /* - * Disable reference tracking for better performance. - */ - reference_tracking_enable = B_FALSE; - - /* - * Do not fail spa_load when spa_load_verify fails. This is needed - * to load non-idle pools. - */ - spa_load_verify_dryrun = B_TRUE; - - kernel_init(FREAD); - g_zfs = libzfs_init(); - if (g_zfs == NULL) - fatal("Fail to initialize zfs"); - - if (dump_all) - verbose = MAX(verbose, 1); - - for (c = 0; c < 256; c++) { - if (dump_all && strchr("AeEFklLOPRSX", c) == NULL) - dump_opt[c] = 1; - if (dump_opt[c]) - dump_opt[c] += verbose; - } - - aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2); - zfs_recover = (dump_opt['A'] > 1); - - argc -= optind; - argv += optind; - - if (argc < 2 && dump_opt['R']) - usage(); - - if (dump_opt['E']) { - if (argc != 1) - usage(); - zdb_embedded_block(argv[0]); - return (0); - } - - if (argc < 1) { - if (!dump_opt['e'] && dump_opt['C']) { - dump_cachefile(spa_config_path); - return (0); - } - usage(); - } - - if (dump_opt['l']) - return (dump_label(argv[0])); - - if (dump_opt['O']) { - if (argc != 2) - usage(); - dump_opt['v'] = verbose + 3; - return (dump_path(argv[0], argv[1])); - } - - if (dump_opt['X'] || dump_opt['F']) - rewind = ZPOOL_DO_REWIND | - (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0); - - if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 || - nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 || - nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0) - fatal("internal error: %s", strerror(ENOMEM)); - - error = 0; - target = argv[0]; - - if (strpbrk(target, "/@") != NULL) { - size_t targetlen; - - target_pool = strdup(target); - *strpbrk(target_pool, "/@") = '\0'; - - target_is_spa = B_FALSE; - targetlen = strlen(target); - if (targetlen && target[targetlen - 1] == '/') - target[targetlen - 1] = '\0'; - } else { - target_pool = target; - } - - if (dump_opt['e']) { - importargs_t args = { 0 }; - - args.paths = nsearch; - args.path = searchdirs; - args.can_be_active = B_TRUE; - - error = zpool_tryimport(g_zfs, target_pool, &cfg, &args); - - if (error == 0) { - - if (nvlist_add_nvlist(cfg, - ZPOOL_LOAD_POLICY, policy) != 0) { - fatal("can't open '%s': %s", - target, strerror(ENOMEM)); - } - - if (dump_opt['C'] > 1) { - (void) printf("\nConfiguration for import:\n"); - dump_nvlist(cfg, 8); - } - - /* - * Disable the activity check to allow examination of - * active pools. - */ - error = spa_import(target_pool, cfg, NULL, - flags | ZFS_IMPORT_SKIP_MMP); - } - } - - char *checkpoint_pool = NULL; - char *checkpoint_target = NULL; - if (dump_opt['k']) { - checkpoint_pool = import_checkpointed_state(target, cfg, - &checkpoint_target); - - if (checkpoint_target != NULL) - target = checkpoint_target; - - } - - if (error == 0) { - if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { - ASSERT(checkpoint_pool != NULL); - ASSERT(checkpoint_target == NULL); - - error = spa_open(checkpoint_pool, &spa, FTAG); - if (error != 0) { - fatal("Tried to open pool \"%s\" but " - "spa_open() failed with error %d\n", - checkpoint_pool, error); - } - - } else if (target_is_spa || dump_opt['R']) { - zdb_set_skip_mmp(target); - error = spa_open_rewind(target, &spa, FTAG, policy, - NULL); - if (error) { - /* - * If we're missing the log device then - * try opening the pool after clearing the - * log state. - */ - mutex_enter(&spa_namespace_lock); - if ((spa = spa_lookup(target)) != NULL && - spa->spa_log_state == SPA_LOG_MISSING) { - spa->spa_log_state = SPA_LOG_CLEAR; - error = 0; - } - mutex_exit(&spa_namespace_lock); - - if (!error) { - error = spa_open_rewind(target, &spa, - FTAG, policy, NULL); - } - } - } else { - zdb_set_skip_mmp(target); - error = open_objset(target, DMU_OST_ANY, FTAG, &os); - } - } - nvlist_free(policy); - - if (error) - fatal("can't open '%s': %s", target, strerror(error)); - - argv++; - argc--; - if (!dump_opt['R']) { - if (argc > 0) { - zopt_objects = argc; - zopt_object = calloc(zopt_objects, sizeof (uint64_t)); - for (unsigned i = 0; i < zopt_objects; i++) { - errno = 0; - zopt_object[i] = strtoull(argv[i], NULL, 0); - if (zopt_object[i] == 0 && errno != 0) - fatal("bad number %s: %s", - argv[i], strerror(errno)); - } - } - if (os != NULL) { - dump_dir(os); - } else if (zopt_objects > 0 && !dump_opt['m']) { - dump_dir(spa->spa_meta_objset); - } else { - dump_zpool(spa); - } - } else { - flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; - flagbits['c'] = ZDB_FLAG_CHECKSUM; - flagbits['d'] = ZDB_FLAG_DECOMPRESS; - flagbits['e'] = ZDB_FLAG_BSWAP; - flagbits['g'] = ZDB_FLAG_GBH; - flagbits['i'] = ZDB_FLAG_INDIRECT; - flagbits['p'] = ZDB_FLAG_PHYS; - flagbits['r'] = ZDB_FLAG_RAW; - - for (int i = 0; i < argc; i++) - zdb_read_block(argv[i], spa); - } - - if (dump_opt['k']) { - free(checkpoint_pool); - if (!target_is_spa) - free(checkpoint_target); - } - - if (os != NULL) - close_objset(os, FTAG); - else - spa_close(spa, FTAG); - - fuid_table_destroy(); - - dump_debug_buffer(); - - libzfs_fini(g_zfs); - kernel_fini(); - - return (error); -} diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.h b/cddl/contrib/opensolaris/cmd/zdb/zdb.h deleted file mode 100644 index 49579811efbb..000000000000 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2017 Spectra Logic Corp Inc. All rights reserved. - * Use is subject to license terms. - */ - - -#ifndef _ZDB_H -#define _ZDB_H - -void dump_intent_log(zilog_t *); -extern uint8_t dump_opt[256]; - -#endif /* _ZDB_H */ diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c deleted file mode 100644 index 9f3f23f82da1..000000000000 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c +++ /dev/null @@ -1,424 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2013, 2017 by Delphix. All rights reserved. - */ - -/* - * Print intent log header and statistics. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <sys/zfs_context.h> -#include <sys/spa.h> -#include <sys/dmu.h> -#include <sys/stat.h> -#include <sys/resource.h> -#include <sys/zil.h> -#include <sys/zil_impl.h> -#include <sys/spa_impl.h> -#include <sys/abd.h> - -#include "zdb.h" - -extern uint8_t dump_opt[256]; - -static char tab_prefix[4] = "\t\t\t"; - -static void -print_log_bp(const blkptr_t *bp, const char *prefix) -{ - char blkbuf[BP_SPRINTF_LEN]; - - snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); - (void) printf("%s%s\n", prefix, blkbuf); -} - -/* ARGSUSED */ -static void -zil_prt_rec_create(zilog_t *zilog, int txtype, void *arg) -{ - lr_create_t *lr = arg; - time_t crtime = lr->lr_crtime[0]; - char *name, *link; - lr_attr_t *lrattr; - - name = (char *)(lr + 1); - - if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR || - lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) { - lrattr = (lr_attr_t *)(lr + 1); - name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); - } - - if (txtype == TX_SYMLINK) { - link = name + strlen(name) + 1; - (void) printf("%s%s -> %s\n", tab_prefix, name, link); - } else if (txtype != TX_MKXATTR) { - (void) printf("%s%s\n", tab_prefix, name); - } - - (void) printf("%s%s", tab_prefix, ctime(&crtime)); - (void) printf("%sdoid %" PRIu64 ", foid %" PRIu64 ", slots %" PRIu64 - ", mode %" PRIo64 "\n", - tab_prefix, lr->lr_doid, - (uint64_t)LR_FOID_GET_OBJ(lr->lr_foid), - (uint64_t)LR_FOID_GET_SLOTS(lr->lr_foid), - lr->lr_mode); - (void) printf("%suid %" PRIu64 ", gid %" PRIu64 ", gen %" PRIu64 - ", rdev %#" PRIx64 "\n", - tab_prefix, lr->lr_uid, lr->lr_gid, lr->lr_gen, lr->lr_rdev); -} - -/* ARGSUSED */ -static void -zil_prt_rec_remove(zilog_t *zilog, int txtype, void *arg) -{ - lr_remove_t *lr = arg; - - (void) printf("%sdoid %llu, name %s\n", tab_prefix, - (u_longlong_t)lr->lr_doid, (char *)(lr + 1)); -} - -/* ARGSUSED */ -static void -zil_prt_rec_link(zilog_t *zilog, int txtype, void *arg) -{ - lr_link_t *lr = arg; - - (void) printf("%sdoid %llu, link_obj %llu, name %s\n", tab_prefix, - (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj, - (char *)(lr + 1)); -} - -/* ARGSUSED */ -static void -zil_prt_rec_rename(zilog_t *zilog, int txtype, void *arg) -{ - lr_rename_t *lr = arg; - char *snm = (char *)(lr + 1); - char *tnm = snm + strlen(snm) + 1; - - (void) printf("%ssdoid %llu, tdoid %llu\n", tab_prefix, - (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid); - (void) printf("%ssrc %s tgt %s\n", tab_prefix, snm, tnm); -} - -/* ARGSUSED */ -static int -zil_prt_rec_write_cb(void *data, size_t len, void *unused) -{ - char *cdata = data; - for (size_t i = 0; i < len; i++) { - if (isprint(*cdata)) - (void) printf("%c ", *cdata); - else - (void) printf("%2X", *cdata); - cdata++; - } - return (0); -} - -/* ARGSUSED */ -static void -zil_prt_rec_write(zilog_t *zilog, int txtype, void *arg) -{ - lr_write_t *lr = arg; - abd_t *data; - blkptr_t *bp = &lr->lr_blkptr; - zbookmark_phys_t zb; - int verbose = MAX(dump_opt['d'], dump_opt['i']); - int error; - - (void) printf("%sfoid %llu, offset %llx, length %llx\n", tab_prefix, - (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset, - (u_longlong_t)lr->lr_length); - - if (txtype == TX_WRITE2 || verbose < 5) - return; - - if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { - (void) printf("%shas blkptr, %s\n", tab_prefix, - !BP_IS_HOLE(bp) && - bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ? - "will claim" : "won't claim"); - print_log_bp(bp, tab_prefix); - - if (BP_IS_HOLE(bp)) { - (void) printf("\t\t\tLSIZE 0x%llx\n", - (u_longlong_t)BP_GET_LSIZE(bp)); - (void) printf("%s<hole>\n", tab_prefix); - return; - } - if (bp->blk_birth < zilog->zl_header->zh_claim_txg) { - (void) printf("%s<block already committed>\n", - tab_prefix); - return; - } - - SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), - lr->lr_foid, ZB_ZIL_LEVEL, - lr->lr_offset / BP_GET_LSIZE(bp)); - - data = abd_alloc(BP_GET_LSIZE(bp), B_FALSE); - error = zio_wait(zio_read(NULL, zilog->zl_spa, - bp, data, BP_GET_LSIZE(bp), NULL, NULL, - ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb)); - if (error) - goto out; - } else { - /* data is stored after the end of the lr_write record */ - data = abd_alloc(lr->lr_length, B_FALSE); - abd_copy_from_buf(data, lr + 1, lr->lr_length); - } - - (void) printf("%s", tab_prefix); - (void) abd_iterate_func(data, - 0, MIN(lr->lr_length, (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)), - zil_prt_rec_write_cb, NULL); - (void) printf("\n"); - -out: - abd_free(data); -} - -/* ARGSUSED */ -static void -zil_prt_rec_truncate(zilog_t *zilog, int txtype, void *arg) -{ - lr_truncate_t *lr = arg; - - (void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", tab_prefix, - (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset, - (u_longlong_t)lr->lr_length); -} - -/* ARGSUSED */ -static void -zil_prt_rec_setattr(zilog_t *zilog, int txtype, void *arg) -{ - lr_setattr_t *lr = arg; - time_t atime = (time_t)lr->lr_atime[0]; - time_t mtime = (time_t)lr->lr_mtime[0]; - - (void) printf("%sfoid %llu, mask 0x%llx\n", tab_prefix, - (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask); - - if (lr->lr_mask & AT_MODE) { - (void) printf("%sAT_MODE %llo\n", tab_prefix, - (longlong_t)lr->lr_mode); - } - - if (lr->lr_mask & AT_UID) { - (void) printf("%sAT_UID %llu\n", tab_prefix, - (u_longlong_t)lr->lr_uid); - } - - if (lr->lr_mask & AT_GID) { - (void) printf("%sAT_GID %llu\n", tab_prefix, - (u_longlong_t)lr->lr_gid); - } - - if (lr->lr_mask & AT_SIZE) { - (void) printf("%sAT_SIZE %llu\n", tab_prefix, - (u_longlong_t)lr->lr_size); - } - - if (lr->lr_mask & AT_ATIME) { - (void) printf("%sAT_ATIME %llu.%09llu %s", tab_prefix, - (u_longlong_t)lr->lr_atime[0], - (u_longlong_t)lr->lr_atime[1], - ctime(&atime)); - } - - if (lr->lr_mask & AT_MTIME) { - (void) printf("%sAT_MTIME %llu.%09llu %s", tab_prefix, - (u_longlong_t)lr->lr_mtime[0], - (u_longlong_t)lr->lr_mtime[1], - ctime(&mtime)); - } -} - -/* ARGSUSED */ -static void -zil_prt_rec_acl(zilog_t *zilog, int txtype, void *arg) -{ - lr_acl_t *lr = arg; - - (void) printf("%sfoid %llu, aclcnt %llu\n", tab_prefix, - (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt); -} - -typedef void (*zil_prt_rec_func_t)(zilog_t *, int, void *); -typedef struct zil_rec_info { - zil_prt_rec_func_t zri_print; - const char *zri_name; - uint64_t zri_count; -} zil_rec_info_t; - -static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { - {.zri_print = NULL, .zri_name = "Total "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_CREATE "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKXATTR "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_SYMLINK "}, - {.zri_print = zil_prt_rec_remove, .zri_name = "TX_REMOVE "}, - {.zri_print = zil_prt_rec_remove, .zri_name = "TX_RMDIR "}, - {.zri_print = zil_prt_rec_link, .zri_name = "TX_LINK "}, - {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME "}, - {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE "}, - {.zri_print = zil_prt_rec_truncate, .zri_name = "TX_TRUNCATE "}, - {.zri_print = zil_prt_rec_setattr, .zri_name = "TX_SETATTR "}, - {.zri_print = zil_prt_rec_acl, .zri_name = "TX_ACL_V0 "}, - {.zri_print = zil_prt_rec_acl, .zri_name = "TX_ACL_ACL "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_CREATE_ACL "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_CREATE_ATTR "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_CREATE_ACL_ATTR "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "}, - {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "}, - {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "}, -}; - -/* ARGSUSED */ -static int -print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg) -{ - int txtype; - int verbose = MAX(dump_opt['d'], dump_opt['i']); - - /* reduce size of txtype to strip off TX_CI bit */ - txtype = lr->lrc_txtype; - - ASSERT(txtype != 0 && (uint_t)txtype < TX_MAX_TYPE); - ASSERT(lr->lrc_txg); - - (void) printf("\t\t%s%s len %6llu, txg %llu, seq %llu\n", - (lr->lrc_txtype & TX_CI) ? "CI-" : "", - zil_rec_info[txtype].zri_name, - (u_longlong_t)lr->lrc_reclen, - (u_longlong_t)lr->lrc_txg, - (u_longlong_t)lr->lrc_seq); - - if (txtype && verbose >= 3) - zil_rec_info[txtype].zri_print(zilog, txtype, lr); - - zil_rec_info[txtype].zri_count++; - zil_rec_info[0].zri_count++; - - return (0); -} - -/* ARGSUSED */ -static int -print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) -{ - char blkbuf[BP_SPRINTF_LEN + 10]; - int verbose = MAX(dump_opt['d'], dump_opt['i']); - const char *claim; - - if (verbose <= 3) - return (0); - - if (verbose >= 5) { - (void) strcpy(blkbuf, ", "); - snprintf_blkptr(blkbuf + strlen(blkbuf), - sizeof (blkbuf) - strlen(blkbuf), bp); - } else { - blkbuf[0] = '\0'; - } - - if (claim_txg != 0) - claim = "already claimed"; - else if (bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa)) - claim = "will claim"; - else - claim = "won't claim"; - - (void) printf("\tBlock seqno %llu, %s%s\n", - (u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf); - - return (0); -} - -static void -print_log_stats(int verbose) -{ - unsigned i, w, p10; - - if (verbose > 3) - (void) printf("\n"); - - if (zil_rec_info[0].zri_count == 0) - return; - - for (w = 1, p10 = 10; zil_rec_info[0].zri_count >= p10; p10 *= 10) - w++; - - for (i = 0; i < TX_MAX_TYPE; i++) - if (zil_rec_info[i].zri_count || verbose >= 3) - (void) printf("\t\t%s %*llu\n", - zil_rec_info[i].zri_name, w, - (u_longlong_t)zil_rec_info[i].zri_count); - (void) printf("\n"); -} - -/* ARGSUSED */ -void -dump_intent_log(zilog_t *zilog) -{ - const zil_header_t *zh = zilog->zl_header; - int verbose = MAX(dump_opt['d'], dump_opt['i']); - int i; - - if (BP_IS_HOLE(&zh->zh_log) || verbose < 1) - return; - - (void) printf("\n ZIL header: claim_txg %llu, " - "claim_blk_seq %llu, claim_lr_seq %llu", - (u_longlong_t)zh->zh_claim_txg, - (u_longlong_t)zh->zh_claim_blk_seq, - (u_longlong_t)zh->zh_claim_lr_seq); - (void) printf(" replay_seq %llu, flags 0x%llx\n", - (u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags); - - for (i = 0; i < TX_MAX_TYPE; i++) - zil_rec_info[i].zri_count = 0; - - /* see comment in zil_claim() or zil_check_log_chain() */ - if (zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 && - zh->zh_claim_txg == 0) - return; - - if (verbose >= 2) { - (void) printf("\n"); - (void) zil_parse(zilog, print_log_block, print_log_record, NULL, - zh->zh_claim_txg); - print_log_stats(verbose); - } -} diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs-program.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs-program.8 deleted file mode 100644 index 76bb97c2d96d..000000000000 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs-program.8 +++ /dev/null @@ -1,551 +0,0 @@ -.\" This file and its contents are supplied under the terms of the -.\" Common Development and Distribution License ("CDDL"), version 1.0. -.\" You may only use this file in accordance with the terms of version -.\" 1.0 of the CDDL. -.\" -.\" A full copy of the text of the CDDL should have accompanied this -.\" source. A copy of the CDDL is also available via the Internet at -.\" http://www.illumos.org/license/CDDL. -.\" -.\" -.\" Copyright (c) 2016, 2017 by Delphix. All rights reserved. -.\" Copyright (c) 2018 Datto Inc. -.\" -.Dd April 18, 2020 -.Dt ZFS-PROGRAM 8 -.Os -.Sh NAME -.Nm zfs program -.Nd executes ZFS channel programs -.Sh SYNOPSIS -.Cm zfs program -.Op Fl jn -.Op Fl t Ar instruction-limit -.Op Fl m Ar memory-limit -.Ar pool -.Ar script -.\".Op Ar optional arguments to channel program -.Sh DESCRIPTION -The ZFS channel program interface allows ZFS administrative operations to be -run programmatically as a Lua script. -The entire script is executed atomically, with no other administrative -operations taking effect concurrently. -A library of ZFS calls is made available to channel program scripts. -Channel programs may only be run with root privileges. -.Pp -A modified version of the Lua 5.2 interpreter is used to run channel program -scripts. -The Lua 5.2 manual can be found at: -.Bd -centered -offset indent -.Lk http://www.lua.org/manual/5.2/ -.Ed -.Pp -The channel program given by -.Ar script -will be run on -.Ar pool , -and any attempts to access or modify other pools will cause an error. -.Sh OPTIONS -.Bl -tag -width "-t" -.It Fl j -Display channel program output in JSON format. -When this flag is specified and standard output is empty - -channel program encountered an error. -The details of such an error will be printed to standard error in plain text. -.It Fl n -Executes a read-only channel program, which runs faster. -The program cannot change on-disk state by calling functions from the -zfs.sync submodule. -The program can be used to gather information such as properties and -determining if changes would succeed (zfs.check.*). -Without this flag, all pending changes must be synced to disk before a -channel program can complete. -.It Fl t Ar instruction-limit -Execution time limit, in number of Lua instructions to execute. -If a channel program executes more than the specified number of instructions, -it will be stopped and an error will be returned. -The default limit is 10 million instructions, and it can be set to a maximum of -100 million instructions. -.It Fl m Ar memory-limit -Memory limit, in bytes. -If a channel program attempts to allocate more memory than the given limit, it -will be stopped and an error returned. -The default memory limit is 10 MB, and can be set to a maximum of 100 MB. -.El -.Pp -All remaining argument strings will be passed directly to the Lua script as -described in the -.Sx LUA INTERFACE -section below. -.Sh LUA INTERFACE -A channel program can be invoked either from the command line, or via a library -call to -.Fn lzc_channel_program . -.Ss Arguments -Arguments passed to the channel program are converted to a Lua table. -If invoked from the command line, extra arguments to the Lua script will be -accessible as an array stored in the argument table with the key 'argv': -.Bd -literal -offset indent -args = ... -argv = args["argv"] --- argv == {1="arg1", 2="arg2", ...} -.Ed -.Pp -If invoked from the libZFS interface, an arbitrary argument list can be -passed to the channel program, which is accessible via the same -"..." syntax in Lua: -.Bd -literal -offset indent -args = ... --- args == {"foo"="bar", "baz"={...}, ...} -.Ed -.Pp -Note that because Lua arrays are 1-indexed, arrays passed to Lua from the -libZFS interface will have their indices incremented by 1. -That is, the element -in -.Va arr[0] -in a C array passed to a channel program will be stored in -.Va arr[1] -when accessed from Lua. -.Ss Return Values -Lua return statements take the form: -.Bd -literal -offset indent -return ret0, ret1, ret2, ... -.Ed -.Pp -Return statements returning multiple values are permitted internally in a -channel program script, but attempting to return more than one value from the -top level of the channel program is not permitted and will throw an error. -However, tables containing multiple values can still be returned. -If invoked from the command line, a return statement: -.Bd -literal -offset indent -a = {foo="bar", baz=2} -return a -.Ed -.Pp -Will be output formatted as: -.Bd -literal -offset indent -Channel program fully executed with return value: - return: - baz: 2 - foo: 'bar' -.Ed -.Ss Fatal Errors -If the channel program encounters a fatal error while running, a non-zero exit -status will be returned. -If more information about the error is available, a singleton list will be -returned detailing the error: -.Bd -literal -offset indent -error: "error string, including Lua stack trace" -.Ed -.Pp -If a fatal error is returned, the channel program may have not executed at all, -may have partially executed, or may have fully executed but failed to pass a -return value back to userland. -.Pp -If the channel program exhausts an instruction or memory limit, a fatal error -will be generated and the program will be stopped, leaving the program partially -executed. -No attempt is made to reverse or undo any operations already performed. -Note that because both the instruction count and amount of memory used by a -channel program are deterministic when run against the same inputs and -filesystem state, as long as a channel program has run successfully once, you -can guarantee that it will finish successfully against a similar size system. -.Pp -If a channel program attempts to return too large a value, the program will -fully execute but exit with a nonzero status code and no return value. -.Pp -.Em Note: -ZFS API functions do not generate Fatal Errors when correctly invoked, they -return an error code and the channel program continues executing. -See the -.Sx ZFS API -section below for function-specific details on error return codes. -.Ss Lua to C Value Conversion -When invoking a channel program via the libZFS interface, it is necessary to -translate arguments and return values from Lua values to their C equivalents, -and vice-versa. -.Pp -There is a correspondence between nvlist values in C and Lua tables. -A Lua table which is returned from the channel program will be recursively -converted to an nvlist, with table values converted to their natural -equivalents: -.Bd -literal -offset indent -string -> string -number -> int64 -boolean -> boolean_value -nil -> boolean (no value) -table -> nvlist -.Ed -.Pp -Likewise, table keys are replaced by string equivalents as follows: -.Bd -literal -offset indent -string -> no change -number -> signed decimal string ("%lld") -boolean -> "true" | "false" -.Ed -.Pp -Any collision of table key strings (for example, the string "true" and a -true boolean value) will cause a fatal error. -.Pp -Lua numbers are represented internally as signed 64-bit integers. -.Sh LUA STANDARD LIBRARY -The following Lua built-in base library functions are available: -.Bd -literal -offset indent -assert rawlen -collectgarbage rawget -error rawset -getmetatable select -ipairs setmetatable -next tonumber -pairs tostring -rawequal type -.Ed -.Pp -All functions in the -.Em coroutine , -.Em string , -and -.Em table -built-in submodules are also available. -A complete list and documentation of these modules is available in the Lua -manual. -.Pp -The following functions base library functions have been disabled and are -not available for use in channel programs: -.Bd -literal -offset indent -dofile -loadfile -load -pcall -print -xpcall -.Ed -.Sh ZFS API -.Ss Function Arguments -Each API function takes a fixed set of required positional arguments and -optional keyword arguments. -For example, the destroy function takes a single positional string argument -(the name of the dataset to destroy) and an optional "defer" keyword boolean -argument. -When using parentheses to specify the arguments to a Lua function, only -positional arguments can be used: -.Bd -literal -offset indent -zfs.sync.destroy("rpool@snap") -.Ed -.Pp -To use keyword arguments, functions must be called with a single argument that -is a Lua table containing entries mapping integers to positional arguments and -strings to keyword arguments: -.Bd -literal -offset indent -zfs.sync.destroy({1="rpool@snap", defer=true}) -.Ed -.Pp -The Lua language allows curly braces to be used in place of parenthesis as -syntactic sugar for this calling convention: -.Bd -literal -offset indent -zfs.sync.snapshot{"rpool@snap", defer=true} -.Ed -.Ss Function Return Values -If an API function succeeds, it returns 0. -If it fails, it returns an error code and the channel program continues -executing. -API functions do not generate Fatal Errors except in the case of an -unrecoverable internal file system error. -.Pp -In addition to returning an error code, some functions also return extra -details describing what caused the error. -This extra description is given as a second return value, and will always be a -Lua table, or Nil if no error details were returned. -Different keys will exist in the error details table depending on the function -and error case. -Any such function may be called expecting a single return value: -.Bd -literal -offset indent -errno = zfs.sync.promote(dataset) -.Ed -.Pp -Or, the error details can be retrieved: -.Bd -literal -offset indent -errno, details = zfs.sync.promote(dataset) -if (errno == EEXIST) then - assert(details ~= Nil) - list_of_conflicting_snapshots = details -end -.Ed -.Pp -The following global aliases for API function error return codes are defined -for use in channel programs: -.Bd -literal -offset indent -EPERM ECHILD ENODEV ENOSPC -ENOENT EAGAIN ENOTDIR ESPIPE -ESRCH ENOMEM EISDIR EROFS -EINTR EACCES EINVAL EMLINK -EIO EFAULT ENFILE EPIPE -ENXIO ENOTBLK EMFILE EDOM -E2BIG EBUSY ENOTTY ERANGE -ENOEXEC EEXIST ETXTBSY EDQUOT -EBADF EXDEV EFBIG -.Ed -.Ss API Functions -For detailed descriptions of the exact behavior of any zfs administrative -operations, see the main -.Xr zfs 8 -manual page. -.Bl -tag -width "xx" -.It Em zfs.debug(msg) -Record a debug message in the zfs_dbgmsg log. -A log of these messages can be printed via mdb's "::zfs_dbgmsg" command, or -can be monitored live by running: -.Bd -literal -offset indent - dtrace -n 'zfs-dbgmsg{trace(stringof(arg0))}' -.Ed -.Pp -msg (string) -.Bd -ragged -compact -offset "xxxx" -Debug message to be printed. -.Ed -.It Em zfs.exists(dataset) -Returns true if the given dataset exists, or false if it doesn't. -A fatal error will be thrown if the dataset is not in the target pool. -That is, in a channel program running on rpool, -zfs.exists("rpool/nonexistent_fs") returns false, but -zfs.exists("somepool/fs_that_may_exist") will error. -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Dataset to check for existence. -Must be in the target pool. -.Ed -.It Em zfs.get_prop(dataset, property) -Returns two values. -First, a string, number or table containing the property value for the given -dataset. -Second, a string containing the source of the property (i.e. the name of the -dataset in which it was set or nil if it is readonly). -Throws a Lua error if the dataset is invalid or the property doesn't exist. -Note that Lua only supports int64 number types whereas ZFS number properties -are uint64. -This means very large values (like guid) may wrap around and appear negative. -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Filesystem or snapshot path to retrieve properties from. -.Ed -.Pp -property (string) -.Bd -ragged -compact -offset "xxxx" -Name of property to retrieve. -All filesystem, snapshot and volume properties are supported except -for 'mounted' and 'iscsioptions.' -Also supports the 'written@snap' and 'written#bookmark' properties and -the '<user|group><quota|used>@id' properties, though the id must be in numeric -form. -.Ed -.El -.Bl -tag -width "xx" -.It Sy zfs.sync submodule -The sync submodule contains functions that modify the on-disk state. -They are executed in "syncing context". -.Pp -The available sync submodule functions are as follows: -.Bl -tag -width "xx" -.It Em zfs.sync.destroy(dataset, [defer=true|false]) -Destroy the given dataset. -Returns 0 on successful destroy, or a nonzero error code if the dataset could -not be destroyed (for example, if the dataset has any active children or -clones). -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Filesystem or snapshot to be destroyed. -.Ed -.Pp -[optional] defer (boolean) -.Bd -ragged -compact -offset "xxxx" -Valid only for destroying snapshots. -If set to true, and the snapshot has holds or clones, allows the snapshot to be -marked for deferred deletion rather than failing. -.Ed -.It Em zfs.sync.promote(dataset) -Promote the given clone to a filesystem. -Returns 0 on successful promotion, or a nonzero error code otherwise. -If EEXIST is returned, the second return value will be an array of the clone's -snapshots whose names collide with snapshots of the parent filesystem. -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Clone to be promoted. -.Ed -.It Em zfs.sync.rollback(filesystem) -Rollback to the previous snapshot for a dataset. -Returns 0 on successful rollback, or a nonzero error code otherwise. -Rollbacks can be performed on filesystems or zvols, but not on snapshots -or mounted datasets. -EBUSY is returned in the case where the filesystem is mounted. -.Pp -filesystem (string) -.Bd -ragged -compact -offset "xxxx" -Filesystem to rollback. -.Ed -.It Em zfs.sync.snapshot(dataset) -Create a snapshot of a filesystem. -Returns 0 if the snapshot was successfully created, -and a nonzero error code otherwise. -.Pp -Note: Taking a snapshot will fail on any pool older than legacy version 27. -To enable taking snapshots from ZCP scripts, the pool must be upgraded. -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Name of snapshot to create. -.Ed -.El -.It Sy zfs.check submodule -For each function in the zfs.sync submodule, there is a corresponding zfs.check -function which performs a "dry run" of the same operation. -Each takes the same arguments as its zfs.sync counterpart and returns 0 if the -operation would succeed, or a non-zero error code if it would fail, along with -any other error details. -That is, each has the same behavior as the corresponding sync function except -for actually executing the requested change. -For example, -.Em zfs.check.destroy("fs") -returns 0 if -.Em zfs.sync.destroy("fs") -would successfully destroy the dataset. -.Pp -The available zfs.check functions are: -.Bl -tag -width "xx" -.It Em zfs.check.destroy(dataset, [defer=true|false]) -.It Em zfs.check.promote(dataset) -.It Em zfs.check.rollback(filesystem) -.It Em zfs.check.snapshot(dataset) -.El -.It Sy zfs.list submodule -The zfs.list submodule provides functions for iterating over datasets and -properties. -Rather than returning tables, these functions act as Lua iterators, and are -generally used as follows: -.Bd -literal -offset indent -for child in zfs.list.children("rpool") do - ... -end -.Ed -.Pp -The available zfs.list functions are: -.Bl -tag -width "xx" -.It Em zfs.list.clones(snapshot) -Iterate through all clones of the given snapshot. -.Pp -snapshot (string) -.Bd -ragged -compact -offset "xxxx" -Must be a valid snapshot path in the current pool. -.Ed -.It Em zfs.list.snapshots(dataset) -Iterate through all snapshots of the given dataset. -Each snapshot is returned as a string containing the full dataset name, e.g. -"pool/fs@snap". -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Must be a valid filesystem or volume. -.Ed -.It Em zfs.list.children(dataset) -Iterate through all direct children of the given dataset. -Each child is returned as a string containing the full dataset name, e.g. -"pool/fs/child". -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Must be a valid filesystem or volume. -.Ed -.It Em zfs.list.properties(dataset) -Iterate through all user properties for the given dataset. -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Must be a valid filesystem, snapshot, or volume. -.Ed -.It Em zfs.list.system_properties(dataset) -Returns an array of strings, the names of the valid system (non-user defined) -properties for the given dataset. -Throws a Lua error if the dataset is invalid. -.Pp -dataset (string) -.Bd -ragged -compact -offset "xxxx" -Must be a valid filesystem, snapshot or volume. -.Ed -.El -.El -.Sh EXAMPLES -.Ss Example 1 -The following channel program recursively destroys a filesystem and all its -snapshots and children in a naive manner. -Note that this does not involve any error handling or reporting. -.Bd -literal -offset indent -function destroy_recursive(root) - for child in zfs.list.children(root) do - destroy_recursive(child) - end - for snap in zfs.list.snapshots(root) do - zfs.sync.destroy(snap) - end - zfs.sync.destroy(root) -end -destroy_recursive("pool/somefs") -.Ed -.Ss Example 2 -A more verbose and robust version of the same channel program, which -properly detects and reports errors, and also takes the dataset to destroy -as a command line argument, would be as follows: -.Bd -literal -offset indent -succeeded = {} -failed = {} - -function destroy_recursive(root) - for child in zfs.list.children(root) do - destroy_recursive(child) - end - for snap in zfs.list.snapshots(root) do - err = zfs.sync.destroy(snap) - if (err ~= 0) then - failed[snap] = err - else - succeeded[snap] = err - end - end - err = zfs.sync.destroy(root) - if (err ~= 0) then - failed[root] = err - else - succeeded[root] = err - end -end - -args = ... -argv = args["argv"] - -destroy_recursive(argv[1]) - -results = {} -results["succeeded"] = succeeded -results["failed"] = failed -return results -.Ed -.Ss Example 3 -The following function performs a forced promote operation by attempting to -promote the given clone and destroying any conflicting snapshots. -.Bd -literal -offset indent -function force_promote(ds) - errno, details = zfs.check.promote(ds) - if (errno == EEXIST) then - assert(details ~= Nil) - for i, snap in ipairs(details) do - zfs.sync.destroy(ds .. "@" .. snap) - end - elseif (errno ~= 0) then - return errno - end - return zfs.sync.promote(ds) -end -.Ed diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 deleted file mode 100644 index 33e0ca4b3040..000000000000 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 +++ /dev/null @@ -1,3973 +0,0 @@ -'\" te -.\" Copyright (c) 2013, Martin Matuska <mm@FreeBSD.org>. -.\" All Rights Reserved. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2011, 2014 by Delphix. All rights reserved. -.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org> -.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org> -.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org> -.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. -.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org> -.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved. -.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org> -.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved. -.\" Copyright 2019 Joyent, Inc. -.\" Copyright (c) 2018 Datto Inc. -.\" -.\" $FreeBSD$ -.\" -.Dd February 16, 2020 -.Dt ZFS 8 -.Os -.Sh NAME -.Nm zfs -.Nd configures ZFS file systems -.Sh SYNOPSIS -.Nm -.Op Fl \&? -.Nm -.Cm create -.Op Fl pu -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... Ar filesystem -.Nm -.Cm create -.Op Fl ps -.Op Fl b Ar blocksize -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Fl V -.Ar size volume -.Nm -.Cm destroy -.Op Fl fnpRrv -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm destroy -.Op Fl dnpRrv -.Sm off -.Ar filesystem Ns | Ns volume -.Ns @snap -.Op % Ns Ar snap -.Op , Ns Ar snap Op % Ns Ar snap -.Op , Ns ... -.Sm on -.Nm -.Cm destroy -.Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark -.Nm -.Cm snapshot Ns | Ns Cm snap -.Op Fl r -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Ar filesystem@snapname Ns | Ns Ar volume@snapname -.Ar filesystem@snapname Ns | Ns Ar volume@snapname Ns ... -.Nm -.Cm rollback -.Op Fl rRf -.Ar snapshot -.Nm -.Cm clone -.Op Fl p -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Ar snapshot filesystem Ns | Ns Ar volume -.Nm -.Cm promote -.Ar clone-filesystem -.Nm -.Cm rename -.Op Fl f -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Nm -.Cm rename -.Op Fl f -.Fl p -.Ar filesystem Ns | Ns Ar volume -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm rename -.Fl r -.Ar snapshot snapshot -.Nm -.Cm rename -.Ar bookmark bookmark -.Nm -.Cm rename -.Fl u -.Op Fl p -.Ar filesystem filesystem -.Nm -.Cm list -.Op Fl r Ns | Ns Fl d Ar depth -.Op Fl Hp -.Op Fl o Ar property Ns Oo , Ns property Ns Oc Ns ... -.Op Fl t Ar type Ns Oo , Ns type Ns Oc Ns ... -.Oo Fl s Ar property Oc Ns ... -.Oo Fl S Ar property Oc Ns ... -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot | Ns Ar bookmark Ns ... -.Nm -.Cm remap -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm set -.Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns ... -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ... -.Nm -.Cm get -.Op Fl r Ns | Ns Fl d Ar depth -.Op Fl Hp -.Op Fl o Ar all | field Ns Oo , Ns Ar field Oc Ns ... -.Op Fl t Ar type Ns Oo Ns , Ar type Oc Ns ... -.Op Fl s Ar source Ns Oo Ns , Ns Ar source Oc Ns ... -.Ar all | property Ns Oo Ns , Ns Ar property Oc Ns ... -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ... -.Nm -.Cm inherit -.Op Fl rS -.Ar property -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ... -.Nm -.Cm upgrade -.Op Fl v -.Nm -.Cm upgrade -.Op Fl r -.Op Fl V Ar version -.Fl a | Ar filesystem -.Nm -.Cm userspace -.Op Fl Hinp -.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... -.Oo Fl s Ar field Oc Ns ... -.Oo Fl S Ar field Oc Ns ... -.Op Fl t Ar type Ns Oo Ns , Ns Ar type Oc Ns ... -.Ar filesystem Ns | Ns Ar snapshot -.Nm -.Cm groupspace -.Op Fl Hinp -.Op Fl o Ar field Ns Oo , Ns field Oc Ns ... -.Oo Fl s Ar field Oc Ns ... -.Oo Fl S Ar field Oc Ns ... -.Op Fl t Ar type Ns Oo Ns , Ns Ar type Oc Ns ... -.Ar filesystem Ns | Ns Ar snapshot -.Nm -.Cm mount -.Nm -.Cm mount -.Op Fl vO -.Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... -.Fl a | Ar filesystem -.Nm -.Cm unmount Ns | Ns Cm umount -.Op Fl f -.Fl a | Ar filesystem Ns | Ns Ar mountpoint -.Nm -.Cm share -.Fl a | Ar filesystem -.Nm -.Cm unshare -.Fl a | Ar filesystem Ns | Ns Ar mountpoint -.Nm -.Cm bookmark -.Ar snapshot -.Ar bookmark -.Nm -.Cm send -.Op Fl DLPRVcenpv -.Op Fl i Ar snapshot | Fl I Ar snapshot -.Ar snapshot -.Nm -.Cm send -.Op Fl LPcenv -.Op Fl i Ar snapshot Ns | Ns Ar bookmark -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Nm -.Cm send -.Op Fl PVenv -.Fl t Ar receive_resume_token -.Nm -.Cm receive Ns | Ns Cm recv -.Op Fl vnsFMu -.Op Fl o Sy origin Ns = Ns Ar snapshot -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Nm -.Cm receive Ns | Ns Cm recv -.Op Fl vnsFMu -.Op Fl d | e -.Op Fl o Sy origin Ns = Ns Ar snapshot -.Ar filesystem -.Nm -.Cm receive Ns | Ns Cm recv -.Fl A -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm allow -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm allow -.Op Fl ldug -.Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ... -.Ar perm Ns | Ns Ar @setname Ns -.Oo Ns , Ns Ar perm Ns | Ns Ar @setname Oc Ns ... -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm allow -.Op Fl ld -.Fl e Ns | Ns Cm everyone -.Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm allow -.Fl c -.Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm allow -.Fl s -.Ar @setname -.Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm unallow -.Op Fl rldug -.Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ... -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm unallow -.Op Fl rld -.Fl e Ns | Ns Cm everyone -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm unallow -.Op Fl r -.Fl c -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm unallow -.Op Fl r -.Fl s -.Ar @setname -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Nm -.Cm hold -.Op Fl r -.Ar tag snapshot Ns ... -.Nm -.Cm holds -.Op Fl Hp -.Op Fl r Ns | Ns Fl d Ar depth -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns -.Ns ... -.Nm -.Cm release -.Op Fl r -.Ar tag snapshot Ns ... -.Nm -.Cm diff -.Op Fl FHt -.Ar snapshot -.Op Ar snapshot Ns | Ns Ar filesystem -.Nm -.Cm program -.Op Fl jn -.Op Fl t Ar timeout -.Op Fl m Ar memory_limit -.Ar pool script -.Op Ar arg1 No ... -.Nm -.Cm jail -.Ar jailid Ns | Ns Ar jailname filesystem -.Nm -.Cm unjail -.Ar jailid Ns | Ns Ar jailname filesystem -.Sh DESCRIPTION -The -.Nm -command configures -.Tn ZFS -datasets within a -.Tn ZFS -storage pool, as described in -.Xr zpool 8 . -A dataset is identified by a unique path within the -.Tn ZFS -namespace. For example: -.Bd -ragged -offset 4n -.No pool/ Ns Brq filesystem,volume,snapshot -.Ed -.Pp -where the maximum length of a dataset name is -.Dv MAXNAMELEN -(256 bytes) -and the maximum amount of nesting allowed in a path is 50 levels deep. -.Pp -A dataset can be one of the following: -.Bl -hang -width 12n -.It Sy file system -A -.Tn ZFS -dataset of type -.Em filesystem -can be mounted within the standard system namespace and behaves like other file -systems. While -.Tn ZFS -file systems are designed to be -.Tn POSIX -compliant, known issues exist that prevent compliance in some cases. -Applications that depend on standards conformance might fail due to nonstandard -behavior when checking file system free space. -.It Sy volume -A logical volume exported as a raw or block device. This type of dataset should -only be used under special circumstances. File systems are typically used in -most environments. -.It Sy snapshot -A read-only version of a file system or volume at a given point in time. It is -specified as -.Em filesystem@name -or -.Em volume@name . -.El -.Ss ZFS File System Hierarchy -A -.Tn ZFS -storage pool is a logical collection of devices that provide space for -datasets. A storage pool is also the root of the -.Tn ZFS -file system hierarchy. -.Pp -The root of the pool can be accessed as a file system, such as mounting and -unmounting, taking snapshots, and setting properties. The physical storage -characteristics, however, are managed by the -.Xr zpool 8 -command. -.Pp -See -.Xr zpool 8 -for more information on creating and administering pools. -.Ss Snapshots -A snapshot is a read-only copy of a file system or volume. Snapshots can be -created extremely quickly, and initially consume no additional space within the -pool. As data within the active dataset changes, the snapshot consumes more -data than would otherwise be shared with the active dataset. -.Pp -Snapshots can have arbitrary names. Snapshots of volumes can be cloned or -rolled back, but cannot be accessed independently. -.Pp -File system snapshots can be accessed under the -.Pa \&.zfs/snapshot -directory in the root of the file system. Snapshots are automatically mounted -on demand and may be unmounted at regular intervals. The visibility of the -.Pa \&.zfs -directory can be controlled by the -.Sy snapdir -property. -.Ss Clones -A clone is a writable volume or file system whose initial contents are the same -as another dataset. As with snapshots, creating a clone is nearly -instantaneous, and initially consumes no additional space. -.Pp -Clones can only be created from a snapshot. When a snapshot is cloned, it -creates an implicit dependency between the parent and child. Even though the -clone is created somewhere else in the dataset hierarchy, the original snapshot -cannot be destroyed as long as a clone exists. The -.Sy origin -property exposes this dependency, and the -.Cm destroy -command lists any such dependencies, if they exist. -.Pp -The clone parent-child dependency relationship can be reversed by using the -.Cm promote -subcommand. This causes the "origin" file system to become a clone of the -specified file system, which makes it possible to destroy the file system that -the clone was created from. -.Ss Mount Points -Creating a -.Tn ZFS -file system is a simple operation, so the number of file systems per system is -likely to be numerous. To cope with this, -.Tn ZFS -automatically manages mounting and unmounting file systems without the need to -edit the -.Pa /etc/fstab -file. All automatically managed file systems are mounted by -.Tn ZFS -at boot time. -.Pp -By default, file systems are mounted under -.Pa /path , -where -.Ar path -is the name of the file system in the -.Tn ZFS -namespace. Directories are created and destroyed as needed. -.Pp -A file system can also have a mount point set in the -.Sy mountpoint -property. This directory is created as needed, and -.Tn ZFS -automatically mounts the file system when the -.Qq Nm Cm mount Fl a -command is invoked (without editing -.Pa /etc/fstab ) . -The -.Sy mountpoint -property can be inherited, so if -.Em pool/home -has a mount point of -.Pa /home , -then -.Em pool/home/user -automatically inherits a mount point of -.Pa /home/user . -.Pp -A file system -.Sy mountpoint -property of -.Cm none -prevents the file system from being mounted. -.Pp -If needed, -.Tn ZFS -file systems can also be managed with traditional tools -.Pq Xr mount 8 , Xr umount 8 , Xr fstab 5 . -If a file system's mount point is set to -.Cm legacy , -.Tn ZFS -makes no attempt to manage the file system, and the administrator is -responsible for mounting and unmounting the file system. -.Ss Jails -.No A Tn ZFS -dataset can be attached to a jail by using the -.Qq Nm Cm jail -subcommand. You cannot attach a dataset to one jail and the children of the -same dataset to another jail. You can also not attach the root file system -of the jail or any dataset which needs to be mounted before the zfs rc script -is run inside the jail, as it would be attached unmounted until it is -mounted from the rc script inside the jail. To allow management of the -dataset from within a jail, the -.Sy jailed -property has to be set and the jail needs access to the -.Pa /dev/zfs -device. The -.Sy quota -property cannot be changed from within a jail. See -.Xr jail 8 -for information on how to allow mounting -.Tn ZFS -datasets from within a jail. -.Pp -.No A Tn ZFS -dataset can be detached from a jail using the -.Qq Nm Cm unjail -subcommand. -.Pp -After a dataset is attached to a jail and the jailed property is set, a jailed -file system cannot be mounted outside the jail, since the jail administrator -might have set the mount point to an unacceptable value. -.Ss Deduplication -Deduplication is the process for removing redundant data at the block-level, -reducing the total amount of data stored. If a file system has the -.Cm dedup -property enabled, duplicate data blocks are removed synchronously. The result -is that only unique data is stored and common components are shared among -files. -.Ss Native Properties -Properties are divided into two types, native properties and user-defined (or -"user") properties. Native properties either export internal statistics or -control -.Tn ZFS -behavior. In addition, native properties are either editable or read-only. User -properties have no effect on -.Tn ZFS -behavior, but you can use them to annotate datasets in a way that is meaningful -in your environment. For more information about user properties, see the -.Qq Sx User Properties -section, below. -.Pp -Every dataset has a set of properties that export statistics about the dataset -as well as control various behaviors. Properties are inherited from the parent -unless overridden by the child. Some properties apply only to certain types of -datasets (file systems, volumes, or snapshots). -.Pp -The values of numeric properties can be specified using human-readable suffixes -(for example, -.Sy k , KB , M , Gb , -and so forth, up to -.Sy Z -for zettabyte). The following are all valid (and equal) specifications: -.Bd -ragged -offset 4n -1536M, 1.5g, 1.50GB -.Ed -.Pp -The values of non-numeric properties are case sensitive and must be lowercase, -except for -.Sy mountpoint , sharenfs , No and Sy sharesmb . -.Pp -The following native properties consist of read-only statistics about the -dataset. These properties can be neither set, nor inherited. Native properties -apply to all dataset types unless otherwise noted. -.Bl -tag -width 2n -.It Sy available -The amount of space available to the dataset and all its children, assuming -that there is no other activity in the pool. Because space is shared within a -pool, availability can be limited by any number of factors, including physical -pool size, quotas, reservations, or other datasets within the pool. -.Pp -This property can also be referred to by its shortened column name, -.Sy avail . -.It Sy compressratio -For non-snapshots, the compression ratio achieved for the -.Sy used -space of this dataset, expressed as a multiplier. The -.Sy used -property includes descendant datasets, and, for clones, does not include -the space shared with the origin snapshot. For snapshots, the -.Sy compressratio -is the same as the -.Sy refcompressratio -property. Compression can be turned on by running: -.Qq Nm Cm set compression=on Ar dataset -The default value is -.Cm off . -.It Sy createtxg -The transaction group (txg) in which the dataset was created. -Bookmarks have the same -.Sy createtxg -as the snapshot they are initially tied to. -This property is suitable for ordering a list of snapshots, -e.g. for incremental send and receive. -.It Sy creation -The time this dataset was created. -.It Sy clones -For snapshots, this property is a comma-separated list of filesystems or -volumes which are clones of this snapshot. The clones' -.Sy origin -property is this snapshot. If the -.Sy clones -property is not empty, then this snapshot can not be destroyed (even with the -.Fl r -or -.Fl f -options). -.It Sy defer_destroy -This property is -.Cm on -if the snapshot has been marked for deferred destroy by using the -.Qq Nm Cm destroy -d -command. Otherwise, the property is -.Cm off . -.It Sy filesystem_count -The total number of filesystems and volumes that exist under this location in the -dataset tree. -This value is only available when a -.Sy filesystem_limit -has -been set somewhere in the tree under which the dataset resides. -.It Sy guid -The 64 bit GUID of this dataset or bookmark which does not change over its -entire lifetime. -When a snapshot is sent to another pool, the received snapshot has the same -GUID. -Thus, the -.Sy guid -is suitable to identify a snapshot across pools. -.It Sy logicalreferenced -The amount of space that is -.Qq logically -accessible by this dataset. -See the -.Sy referenced -property. -The logical space ignores the effect of the -.Sy compression -and -.Sy copies -properties, giving a quantity closer to the amount of data that applications -see. -However, it does include space consumed by metadata. -.Pp -This property can also be referred to by its shortened column name, -.Sy lrefer . -.It Sy logicalused -The amount of space that is -.Qq logically -consumed by this dataset and all its descendents. -See the -.Sy used -property. -The logical space ignores the effect of the -.Sy compression -and -.Sy copies -properties, giving a quantity closer to the amount of data that applications -see. -.Pp -This property can also be referred to by its shortened column name, -.Sy lused . -.It Sy mounted -For file systems, indicates whether the file system is currently mounted. This -property can be either -.Cm yes -or -.Cm no . -.It Sy origin -For cloned file systems or volumes, the snapshot from which the clone was -created. See also the -.Sy clones -property. -.It Sy receive_resume_token -For filesystems or volumes which have saved partially-completed state from -.Sy zfs receive -s , -this opaque token can be provided to -.Sy zfs send -t -to resume and complete the -.Sy zfs receive . -.It Sy referenced -The amount of data that is accessible by this dataset, which may or may not be -shared with other datasets in the pool. When a snapshot or clone is created, it -initially references the same amount of space as the file system or snapshot it -was created from, since its contents are identical. -.Pp -This property can also be referred to by its shortened column name, -.Sy refer . -.It Sy refcompressratio -The compression ratio achieved for the -.Sy referenced -space of this dataset, expressed as a multiplier. See also the -.Sy compressratio -property. -.It Sy snapshot_count -The total number of snapshots that exist under this location in the dataset tree. -This value is only available when a -.Sy snapshot_limit -has been set somewhere -in the tree under which the dataset resides. -.It Sy type -The type of dataset: -.Sy filesystem , volume , No or Sy snapshot . -.It Sy used -The amount of space consumed by this dataset and all its descendents. This is -the value that is checked against this dataset's quota and reservation. The -space used does not include this dataset's reservation, but does take into -account the reservations of any descendent datasets. The amount of space that a -dataset consumes from its parent, as well as the amount of space that are freed -if this dataset is recursively destroyed, is the greater of its space used and -its reservation. -.Pp -When snapshots (see the -.Qq Sx Snapshots -section) are created, their space is -initially shared between the snapshot and the file system, and possibly with -previous snapshots. As the file system changes, space that was previously -shared becomes unique to the snapshot, and counted in the snapshot's space -used. Additionally, deleting snapshots can increase the amount of space unique -to (and used by) other snapshots. -.Pp -The amount of space used, available, or referenced does not take into account -pending changes. Pending changes are generally accounted for within a few -seconds. Committing a change to a disk using -.Xr fsync 2 -or -.Sy O_SYNC -does not necessarily guarantee that the space usage information is updated -immediately. -.It Sy usedby* -The -.Sy usedby* -properties decompose the -.Sy used -properties into the various reasons that space is used. Specifically, -.Sy used No = -.Sy usedbysnapshots + usedbydataset + usedbychildren + usedbyrefreservation . -These properties are only available for datasets created -with -.Tn ZFS -pool version 13 pools and higher. -.It Sy usedbysnapshots -The amount of space consumed by snapshots of this dataset. In particular, it is -the amount of space that would be freed if all of this dataset's snapshots were -destroyed. Note that this is not simply the sum of the snapshots' -.Sy used -properties because space can be shared by multiple snapshots. -.It Sy usedbydataset -The amount of space used by this dataset itself, which would be freed if the -dataset were destroyed (after first removing any -.Sy refreservation -and destroying any necessary snapshots or descendents). -.It Sy usedbychildren -The amount of space used by children of this dataset, which would be freed if -all the dataset's children were destroyed. -.It Sy usedbyrefreservation -The amount of space used by a -.Sy refreservation -set on this dataset, which would be freed if the -.Sy refreservation -was removed. -.It Sy userused@ Ns Ar user -The amount of space consumed by the specified user in this dataset. Space is -charged to the owner of each file, as displayed by -.Qq Nm ls Fl l . -The amount of space charged is displayed by -.Qq Nm du -and -.Qq Nm ls Fl s . -See the -.Qq Nm Cm userspace -subcommand for more information. -.Pp -Unprivileged users can access only their own space usage. The root user, or a -user who has been granted the -.Sy userused -privilege with -.Qq Nm Cm allow , -can access everyone's usage. -.Pp -The -.Sy userused@ Ns ... -properties are not displayed by -.Qq Nm Cm get all . -The user's name must be appended after the -.Sy @ -symbol, using one of the following forms: -.Bl -bullet -offset 2n -.It -POSIX name (for example, -.Em joe ) -.It -POSIX numeric ID (for example, -.Em 1001 ) -.El -.It Sy userrefs -This property is set to the number of user holds on this snapshot. User holds -are set by using the -.Qq Nm Cm hold -command. -.It Sy groupused@ Ns Ar group -The amount of space consumed by the specified group in this dataset. Space is -charged to the group of each file, as displayed by -.Nm ls Fl l . -See the -.Sy userused@ Ns Ar user -property for more information. -.Pp -Unprivileged users can only access their own groups' space usage. The root -user, or a user who has been granted the -.Sy groupused -privilege with -.Qq Nm Cm allow , -can access all groups' usage. -.It Sy volblocksize Ns = Ns Ar blocksize -For volumes, specifies the block size of the volume. The -.Ar blocksize -cannot be changed once the volume has been written, so it should be set at -volume creation time. The default -.Ar blocksize -for volumes is 8 Kbytes. Any -power of 2 from 512 bytes to 128 Kbytes is valid. -.Pp -This property can also be referred to by its shortened column name, -.Sy volblock . -.It Sy written -The amount of -.Sy referenced -space written to this dataset since the previous snapshot. -.It Sy written@ Ns Ar snapshot -The amount of -.Sy referenced -space written to this dataset since the specified snapshot. This is the space -that is referenced by this dataset but was not referenced by the specified -snapshot. -.Pp -The -.Ar snapshot -may be specified as a short snapshot name (just the part after the -.Sy @ ) , -in which case it will be interpreted as a snapshot in the same filesystem as -this dataset. The -.Ar snapshot -may be a full snapshot name -.Pq Em filesystem@snapshot , -which for clones may be a snapshot in the origin's filesystem (or the origin of -the origin's filesystem, etc). -.El -.Pp -The following native properties can be used to change the behavior of a -.Tn ZFS -dataset. -.Bl -tag -width 2n -.It Xo -.Sy aclinherit Ns = Ns Cm discard | -.Cm noallow | -.Cm restricted | -.Cm passthrough | -.Cm passthrough-x -.Xc -Controls how -.Tn ACL -entries are inherited when files and directories are created. A file system -with an -.Sy aclinherit -property of -.Cm discard -does not inherit any -.Tn ACL -entries. A file system with an -.Sy aclinherit -property value of -.Cm noallow -only inherits inheritable -.Tn ACL -entries that specify "deny" permissions. The property value -.Cm restricted -(the default) removes the -.Em write_acl -and -.Em write_owner -permissions when the -.Tn ACL -entry is inherited. A file system with an -.Sy aclinherit -property value of -.Cm passthrough -inherits all inheritable -.Tn ACL -entries without any modifications made to the -.Tn ACL -entries when they are inherited. A file system with an -.Sy aclinherit -property value of -.Cm passthrough-x -has the same meaning as -.Cm passthrough , -except that the -.Em owner@ , group@ , No and Em everyone@ Tn ACE Ns s -inherit the execute permission only if the file creation mode also requests the -execute bit. -.Pp -When the property value is set to -.Cm passthrough , -files are created with a mode determined by the inheritable -.Tn ACE Ns s. -If no inheritable -.Tn ACE Ns s -exist that affect the mode, then the mode is set in accordance to the requested -mode from the application. -.It Sy aclmode Ns = Ns Cm discard | groupmask | passthrough | restricted -Controls how an -.Tn ACL -is modified during -.Xr chmod 2 . -A file system with an -.Sy aclmode -property of -.Cm discard -(the default) deletes all -.Tn ACL -entries that do not represent the mode of the file. An -.Sy aclmode -property of -.Cm groupmask -reduces permissions granted in all -.Em ALLOW -entries found in the -.Tn ACL -such that they are no greater than the group permissions specified by -.Xr chmod 2 . -A file system with an -.Sy aclmode -property of -.Cm passthrough -indicates that no changes are made to the -.Tn ACL -other than creating or updating the necessary -.Tn ACL -entries to represent the new mode of the file or directory. -An -.Sy aclmode -property of -.Cm restricted -will cause the -.Xr chmod 2 -operation to return an error when used on any file or directory which has -a non-trivial -.Tn ACL -whose entries can not be represented by a mode. -.Xr chmod 2 -is required to change the set user ID, set group ID, or sticky bits on a file -or directory, as they do not have equivalent -.Tn ACL -entries. -In order to use -.Xr chmod 2 -on a file or directory with a non-trivial -.Tn ACL -when -.Sy aclmode -is set to -.Cm restricted , -you must first remove all -.Tn ACL -entries which do not represent the current mode. -.It Sy atime Ns = Ns Cm on | off -Controls whether the access time for files is updated when they are read. -Turning this property off avoids producing write traffic when reading files and -can result in significant performance gains, though it might confuse mailers -and other similar utilities. The default value is -.Cm on . -.It Sy canmount Ns = Ns Cm on | off | noauto -If this property is set to -.Cm off , -the file system cannot be mounted, and is ignored by -.Qq Nm Cm mount Fl a . -Setting this property to -.Cm off -is similar to setting the -.Sy mountpoint -property to -.Cm none , -except that the dataset still has a normal -.Sy mountpoint -property, which can be inherited. Setting this property to -.Cm off -allows datasets to be used solely as a mechanism to inherit properties. One -example of setting -.Sy canmount Ns = Ns Cm off -is to have two datasets with the same -.Sy mountpoint , -so that the children of both datasets appear in the same directory, but might -have different inherited characteristics. -.Pp -When the -.Cm noauto -value is set, a dataset can only be mounted and unmounted explicitly. The -dataset is not mounted automatically when the dataset is created or imported, -nor is it mounted by the -.Qq Nm Cm mount Fl a -command or unmounted by the -.Qq Nm Cm umount Fl a -command. -.Pp -This property is not inherited. -.It Sy checksum Ns = Ns Cm on | off | fletcher2 | fletcher4 | sha256 | noparity | sha512 | skein -Controls the checksum used to verify data integrity. The default value is -.Cm on , -which automatically selects an appropriate algorithm (currently, -.Cm fletcher4 , -but this may change in future releases). The value -.Cm off -disables integrity checking on user data. -The value -.Cm noparity -not only -disables integrity but also disables maintaining parity for user data. This -setting is used internally by a dump device residing on a RAID-Z pool and should -not be used by any other dataset. -Disabling checksums is -.Em NOT -a recommended practice. -The -.Sy sha512 , -and -.Sy skein -checksum algorithms require enabling the appropriate features on the pool. -Please see -.Xr zpool-features 7 -for more information on these algorithms. -.Pp -Changing this property affects only newly-written data. -.Pp -The salted checksum algorithm -.Pq Cm edonr -is currently not supported on FreeBSD. -.It Sy compression Ns = Ns Cm on | off | lzjb | gzip | gzip- Ns Ar N | Cm zle | Cm lz4 -Controls the compression algorithm used for this dataset. -Setting compression to -.Cm on -indicates that the current default compression algorithm should be used. -The default balances compression and decompression speed, with compression -ratio and is expected to work well on a wide variety of workloads. -Unlike all other settings for this property, on does not select a fixed -compression type. -As new compression algorithms are added to ZFS and enabled on a pool, the -default compression algorithm may change. -The current default compression algorthm is either -.Cm lzjb -or, if the -.Sy lz4_compress -feature is enabled, -.Cm lz4 . -The -.Cm lzjb -compression algorithm is optimized for performance while providing decent data -compression. Setting compression to -.Cm on -uses the -.Cm lzjb -compression algorithm. The -.Cm gzip -compression algorithm uses the same compression as the -.Xr gzip 1 -command. You can specify the -.Cm gzip -level by using the value -.Cm gzip- Ns Ar N -where -.Ar N -is an integer from 1 (fastest) to 9 (best compression ratio). Currently, -.Cm gzip -is equivalent to -.Cm gzip-6 -(which is also the default for -.Xr gzip 1 ) . -The -.Cm zle -compression algorithm compresses runs of zeros. -.Pp -The -.Sy lz4 -compression algorithm is a high-performance replacement -for the -.Sy lzjb -algorithm. It features significantly faster -compression and decompression, as well as a moderately higher -compression ratio than -.Sy lzjb , -but can only be used on pools with -the -.Sy lz4_compress -feature set to -.Sy enabled . -See -.Xr zpool-features 7 -for details on ZFS feature flags and the -.Sy lz4_compress -feature. -.Pp -This property can also be referred to by its shortened column name -.Cm compress . -Changing this property affects only newly-written data. -.It Sy copies Ns = Ns Cm 1 | 2 | 3 -Controls the number of copies of data stored for this dataset. These copies are -in addition to any redundancy provided by the pool, for example, mirroring or -RAID-Z. The copies are stored on different disks, if possible. The space used -by multiple copies is charged to the associated file and dataset, changing the -.Sy used -property and counting against quotas and reservations. -.Pp -Changing this property only affects newly-written data. Therefore, set this -property at file system creation time by using the -.Fl o Cm copies= Ns Ar N -option. -.It Sy dedup Ns = Ns Cm on | off | verify | sha256 Ns Oo Cm ,verify Oc | Sy sha512 Ns Oo Cm ,verify Oc | Sy skein Ns Oo Cm ,verify Oc -Configures deduplication for a dataset. The default value is -.Cm off . -The default deduplication checksum is -.Cm sha256 -(this may change in the future). -When -.Sy dedup -is enabled, the checksum defined here overrides the -.Sy checksum -property. Setting the value to -.Cm verify -has the same effect as the setting -.Cm sha256,verify . -.Pp -If set to -.Cm verify , -.Tn ZFS -will do a byte-to-byte comparsion in case of two blocks having the same -signature to make sure the block contents are identical. -.It Sy devices Ns = Ns Cm on | off -The -.Sy devices -property is currently not supported on -.Fx . -.It Sy exec Ns = Ns Cm on | off -Controls whether processes can be executed from within this file system. The -default value is -.Cm on . -.It Sy mlslabel Ns = Ns Ar label | Cm none -The -.Sy mlslabel -property is currently not supported on -.Fx . -.It Sy filesystem_limit Ns = Ns Ar count | Cm none -Limits the number of filesystems and volumes that can exist under this point in -the dataset tree. -The limit is not enforced if the user is allowed to change -the limit. -Setting a -.Sy filesystem_limit -on a descendent of a filesystem that -already has a -.Sy filesystem_limit -does not override the ancestor's -.Sy filesystem_limit , -but rather imposes an additional limit. -This feature must be enabled to be used -.Po see -.Xr zpool-features 7 -.Pc . -.It Sy special_small_blocks Ns = Ns Ar size -This value represents the threshold block size for including small file -blocks into the special allocation class. -Blocks smaller than or equal to this value will be assigned to the special -allocation class while greater blocks will be assigned to the regular class. -Valid values are zero or a power of two from 512B up to 128K. -The default size is 0 which means no small file blocks will be allocated in -the special class. -.Pp -Before setting this property, a special class vdev must be added to the -pool. -See -.Xr zpool 8 -for more details on the special allocation class. -.It Sy mountpoint Ns = Ns Ar path | Cm none | legacy -Controls the mount point used for this file system. -See the -.Qq Sx Mount Points -section for more information on how this property is used. -.Pp -When the -.Sy mountpoint -property is changed for a file system, the file system and any children that -inherit the mount point are unmounted. If the new value is -.Cm legacy , -then they remain unmounted. Otherwise, they are automatically remounted in the -new location if the property was previously -.Cm legacy -or -.Cm none , -or if they were mounted before the property was changed. In addition, any -shared file systems are unshared and shared in the new location. -.It Sy nbmand Ns = Ns Cm on | off -The -.Sy nbmand -property is currently not supported on -.Fx . -.It Sy primarycache Ns = Ns Cm all | none | metadata -Controls what is cached in the primary cache (ARC). If this property is set to -.Cm all , -then both user data and metadata is cached. If this property is set to -.Cm none , -then neither user data nor metadata is cached. If this property is set to -.Cm metadata , -then only metadata is cached. The default value is -.Cm all . -.It Sy quota Ns = Ns Ar size | Cm none -Limits the amount of space a dataset and its descendents can consume. This -property enforces a hard limit on the amount of space used. This includes all -space consumed by descendents, including file systems and snapshots. Setting a -quota on a descendent of a dataset that already has a quota does not override -the ancestor's quota, but rather imposes an additional limit. -.Pp -Quotas cannot be set on volumes, as the -.Sy volsize -property acts as an implicit quota. -.It Sy snapshot_limit Ns = Ns Ar count | Cm none -Limits the number of snapshots that can be created on a dataset and its -descendents. -Setting a -.Sy snapshot_limit -on a descendent of a dataset that already -has a -.Sy snapshot_limit -does not override the ancestor's -.Sy snapshot_limit , -but -rather imposes an additional limit. -The limit is not enforced if the user is -allowed to change the limit. -For example, this means that recursive snapshots -taken from the global zone are counted against each delegated dataset within -a jail. -This feature must be enabled to be used -.Po see -.Xr zpool-features 7 -.Pc . -.It Sy userquota@ Ns Ar user Ns = Ns Ar size | Cm none -Limits the amount of space consumed by the specified user. -Similar to the -.Sy refquota -property, the -.Sy userquota -space calculation does not include space that is used by descendent datasets, -such as snapshots and clones. User space consumption is identified by the -.Sy userspace@ Ns Ar user -property. -.Pp -Enforcement of user quotas may be delayed by several seconds. This delay means -that a user might exceed their quota before the system notices that they are -over quota and begins to refuse additional writes with the -.Em EDQUOT -error message. See the -.Cm userspace -subcommand for more information. -.Pp -Unprivileged users can only access their own groups' space usage. The root -user, or a user who has been granted the -.Sy userquota -privilege with -.Qq Nm Cm allow , -can get and set everyone's quota. -.Pp -This property is not available on volumes, on file systems before version 4, or -on pools before version 15. The -.Sy userquota@ Ns ... -properties are not displayed by -.Qq Nm Cm get all . -The user's name must be appended after the -.Sy @ -symbol, using one of the following forms: -.Bl -bullet -offset 2n -.It -POSIX name (for example, -.Em joe ) -.It -POSIX numeric ID (for example, -.Em 1001 ) -.El -.It Sy groupquota@ Ns Ar group Ns = Ns Ar size | Cm none -Limits the amount of space consumed by the specified group. Group space -consumption is identified by the -.Sy userquota@ Ns Ar user -property. -.Pp -Unprivileged users can access only their own groups' space usage. The root -user, or a user who has been granted the -.Sy groupquota -privilege with -.Qq Nm Cm allow , -can get and set all groups' quotas. -.It Sy readonly Ns = Ns Cm on | off -Controls whether this dataset can be modified. The default value is -.Cm off . -.It Sy recordsize Ns = Ns Ar size -Specifies a suggested block size for files in the file system. This property is -designed solely for use with database workloads that access files in fixed-size -records. -.Tn ZFS -automatically tunes block sizes according to internal algorithms optimized for -typical access patterns. -.Pp -For databases that create very large files but access them in small random -chunks, these algorithms may be suboptimal. Specifying a -.Sy recordsize -greater than or equal to the record size of the database can result in -significant performance gains. Use of this property for general purpose file -systems is strongly discouraged, and may adversely affect performance. -.Pp -The size specified must be a power of two greater than or equal to 512 and less -than or equal to 128 Kbytes. -If the -.Sy large_blocks -feature is enabled on the pool, the size may be up to 1 Mbyte. -See -.Xr zpool-features 7 -for details on ZFS feature flags. -.Pp -Changing the file system's -.Sy recordsize -affects only files created afterward; existing files are unaffected. -.Pp -This property can also be referred to by its shortened column name, -.Sy recsize . -.It Sy redundant_metadata Ns = Ns Cm all | most -Controls what types of metadata are stored redundantly. -ZFS stores an extra copy of metadata, so that if a single block is corrupted, -the amount of user data lost is limited. -This extra copy is in addition to any redundancy provided at the pool level -.Pq e.g. by mirroring or RAID-Z , -and is in addition to an extra copy specified by the -.Sy copies -property -.Pq up to a total of 3 copies . -For example if the pool is mirrored, -.Cm copies Ns = Ns Ar 2 , -and -.Cm redundant_metadata Ns = Ns Ar most , -then ZFS -stores 6 copies of most metadata, and 4 copies of data and some -metadata. -.Pp -When set to -.Cm all , -ZFS stores an extra copy of all metadata. -If a -single on-disk block is corrupt, at worst a single block of user data -.Po which is -.Cm recordsize -bytes long -can be lost. -.Pc -.Pp -When set to -.Cm most , -ZFS stores an extra copy of most types of -metadata. -This can improve performance of random writes, because less -metadata must be written. -In practice, at worst about 100 blocks -.Po of -.Cm recordsize -bytes each -.Pc -of user data can be lost if a single -on-disk block is corrupt. -The exact behavior of which metadata blocks -are stored redundantly may change in future releases. -.Pp -The default value is -.Cm all . -.It Sy refquota Ns = Ns Ar size | Cm none -Limits the amount of space a dataset can consume. This property enforces a hard -limit on the amount of space used. This hard limit does not include space used -by descendents, including file systems and snapshots. -.It Sy refreservation Ns = Ns Ar size | Cm none | Cm auto -The minimum amount of space guaranteed to a dataset, not including its -descendents. When the amount of space used is below this value, the dataset is -treated as if it were taking up the amount of space specified by -.Sy refreservation . -The -.Sy refreservation -reservation is accounted for in the parent datasets' space used, and counts -against the parent datasets' quotas and reservations. -.Pp -If -.Sy refreservation -is set, a snapshot is only allowed if there is enough free pool space outside -of this reservation to accommodate the current number of "referenced" bytes in -the dataset. -.Pp -If -.Sy refreservation -is set to -.Sy auto , -a volume is thick provisioned or not sparse. -.Sy refreservation Ns = Cm auto -is only supported on volumes. -See -.Sy volsize -in the Native Properties -section for more information about sparse volumes. -.Pp -This property can also be referred to by its shortened column name, -.Sy refreserv . -.It Sy reservation Ns = Ns Ar size | Cm none -The minimum amount of space guaranteed to a dataset and its descendents. When -the amount of space used is below this value, the dataset is treated as if it -were taking up the amount of space specified by its reservation. Reservations -are accounted for in the parent datasets' space used, and count against the -parent datasets' quotas and reservations. -.Pp -This property can also be referred to by its shortened column name, -.Sy reserv . -.It Sy secondarycache Ns = Ns Cm all | none | metadata -Controls what is cached in the secondary cache (L2ARC). If this property is set -to -.Cm all , -then both user data and metadata is cached. If this property is set to -.Cm none , -then neither user data nor metadata is cached. If this property is set to -.Cm metadata , -then only metadata is cached. The default value is -.Cm all . -.It Sy setuid Ns = Ns Cm on | off -Controls whether the -.No set- Ns Tn UID -bit is respected for the file system. The default value is -.Cm on . -.It Sy sharesmb Ns = Ns Cm on | off | Ar opts -The -.Sy sharesmb -property currently has no effect on -.Fx . -.It Sy sharenfs Ns = Ns Cm on | off | Ar opts -Controls whether the file system is shared via -.Tn NFS , -and what options are used. A file system with a -.Sy sharenfs -property of -.Cm off -is managed the traditional way via -.Xr exports 5 . -Otherwise, the file system is automatically shared and unshared with the -.Qq Nm Cm share -and -.Qq Nm Cm unshare -commands. If the property is set to -.Cm on -no -.Tn NFS -export options are used. Otherwise, -.Tn NFS -export options are equivalent to the contents of this property. The export -options may be comma-separated. See -.Xr exports 5 -for a list of valid options. -.Pp -When the -.Sy sharenfs -property is changed for a dataset, the -.Xr mountd 8 -daemon is reloaded. -.It Sy logbias Ns = Ns Cm latency | throughput -Provide a hint to -.Tn ZFS -about handling of synchronous requests in this dataset. -If -.Sy logbias -is set to -.Cm latency -(the default), -.Tn ZFS -will use pool log devices (if configured) to handle the requests at low -latency. If -.Sy logbias -is set to -.Cm throughput , -.Tn ZFS -will not use configured pool log devices. -.Tn ZFS -will instead optimize synchronous operations for global pool throughput and -efficient use of resources. -.It Sy snapdir Ns = Ns Cm hidden | visible -Controls whether the -.Pa \&.zfs -directory is hidden or visible in the root of the file system as discussed in -the -.Qq Sx Snapshots -section. The default value is -.Cm hidden . -.It Sy sync Ns = Ns Cm standard | always | disabled -Controls the behavior of synchronous requests (e.g. -.Xr fsync 2 , -O_DSYNC). This property accepts the following values: -.Bl -tag -offset 4n -width 8n -.It Sy standard -This is the POSIX specified behavior of ensuring all synchronous requests are -written to stable storage and all devices are flushed to ensure data is not -cached by device controllers (this is the default). -.It Sy always -All file system transactions are written and flushed before their system calls -return. This has a large performance penalty. -.It Sy disabled -Disables synchronous requests. File system transactions are only committed to -stable storage periodically. This option will give the highest performance. -However, it is very dangerous as -.Tn ZFS -would be ignoring the synchronous transaction demands of applications such as -databases or -.Tn NFS . -Administrators should only use this option when the risks are understood. -.El -.It Sy volsize Ns = Ns Ar size -For volumes, specifies the logical size of the volume. By default, creating a -volume establishes a reservation of equal size. For storage pools with a -version number of 9 or higher, a -.Sy refreservation -is set instead. Any changes to -.Sy volsize -are reflected in an equivalent change to the reservation (or -.Sy refreservation ) . -The -.Sy volsize -can only be set to a multiple of -.Cm volblocksize , -and cannot be zero. -.Pp -The reservation is kept equal to the volume's logical size to prevent -unexpected behavior for consumers. Without the reservation, the volume could -run out of space, resulting in undefined behavior or data corruption, depending -on how the volume is used. These effects can also occur when the volume size is -changed while it is in use (particularly when shrinking the size). Extreme care -should be used when adjusting the volume size. -.Pp -Though not recommended, a "sparse volume" (also known as "thin provisioned") -can be created by specifying the -.Fl s -option to the -.Qq Nm Cm create Fl V -command, or by changing the value of the -.Sy refreservation -property, or -.Sy reservation -property on pool -.Po -version 8 or earlier -.Pc -after the volume has been created. -A "sparse volume" is a volume where the value of -.Sy refreservation -is less then the size of the volume plus the space required to store its -metadata. -Consequently, writes to a sparse volume can fail with -.Sy ENOSPC -when the pool is low on space. For a sparse volume, changes to -.Sy volsize -are not reflected in the -.Sy refreservation . -A volume that is not sparse is said to be "thick provisioned". -A sparse volume can become thick provisioned by setting -.Sy refreservation -to -.Sy auto . -.It Sy volmode Ns = Ns Cm default | geom | dev | none -This property specifies how volumes should be exposed to the OS. -Setting it to -.Sy geom -exposes volumes as -.Xr geom 4 -providers, providing maximal functionality. -Setting it to -.Sy dev -exposes volumes only as cdev device in devfs. -Such volumes can be accessed only as raw disk device files, i.e. they -can not be partitioned, mounted, participate in RAIDs, etc, but they -are faster, and in some use scenarios with untrusted consumer, such as -NAS or VM storage, can be more safe. -Volumes with property set to -.Sy none -are not exposed outside ZFS, but can be snapshoted, cloned, replicated, etc, -that can be suitable for backup purposes. -Value -.Sy default -means that volumes exposition is controlled by system-wide sysctl/tunable -.Va vfs.zfs.vol.mode , -where -.Sy geom , -.Sy dev -and -.Sy none -are encoded as 1, 2 and 3 respectively. -The default values is -.Sy geom . -This property can be changed any time, but so far it is processed only -during volume creation and pool import. -.It Sy vscan Ns = Ns Cm off | on -The -.Sy vscan -property is currently not supported on -.Fx . -.It Sy xattr Ns = Ns Cm off | on -The -.Sy xattr -property is currently not supported on -.Fx . -.It Sy jailed Ns = Ns Cm off | on -Controls whether the dataset is managed from a jail. See the -.Qq Sx Jails -section for more information. The default value is -.Cm off . -.El -.Pp -The following three properties cannot be changed after the file system is -created, and therefore, should be set when the file system is created. If the -properties are not set with the -.Qq Nm Cm create -or -.Nm zpool Cm create -commands, these properties are inherited from the parent dataset. If the parent -dataset lacks these properties due to having been created prior to these -features being supported, the new file system will have the default values for -these properties. -.Bl -tag -width 4n -.It Sy casesensitivity Ns = Ns Cm sensitive | insensitive | mixed -Indicates whether the file name matching algorithm used by the file system -should be case-sensitive, case-insensitive, or allow a combination of both -styles of matching. The default value for the -.Sy casesensitivity -property is -.Cm sensitive . -Traditionally, UNIX and POSIX file systems have case-sensitive file names. -.Pp -The -.Cm mixed -value for the -.Sy casesensitivity -property indicates that the -file system can support requests for both case-sensitive and case-insensitive -matching behavior. -.It Sy normalization Ns = Ns Cm none | formC | formD | formKC | formKD -Indicates whether the file system should perform a -.Sy unicode -normalization of file names whenever two file names are compared, and which -normalization algorithm should be used. File names are always stored -unmodified, names are normalized as part of any comparison process. If this -property is set to a legal value other than -.Cm none , -and the -.Sy utf8only -property was left unspecified, the -.Sy utf8only -property is automatically set to -.Cm on . -The default value of the -.Sy normalization -property is -.Cm none . -This property cannot be changed after the file system is created. -.It Sy utf8only Ns = Ns Cm on | off -Indicates whether the file system should reject file names that include -characters that are not present in the -.Sy UTF-8 -character code set. If this property is explicitly set to -.Cm off , -the normalization property must either not be explicitly set or be set to -.Cm none . -The default value for the -.Sy utf8only -property is -.Cm off . -This property cannot be changed after the file system is created. -.El -.Pp -The -.Sy casesensitivity , normalization , No and Sy utf8only -properties are also new permissions that can be assigned to non-privileged -users by using the -.Tn ZFS -delegated administration feature. -.Ss Temporary Mount Point Properties -When a file system is mounted, either through -.Xr mount 8 -for legacy mounts or the -.Qq Nm Cm mount -command for normal file systems, its mount options are set according to its -properties. The correlation between properties and mount options is as follows: -.Bl -column -offset 4n "PROPERTY" "MOUNT OPTION" -.It "PROPERTY MOUNT OPTION" -.It "atime atime/noatime" -.It "exec exec/noexec" -.It "readonly ro/rw" -.It "setuid suid/nosuid" -.El -.Pp -In addition, these options can be set on a per-mount basis using the -.Fl o -option, without affecting the property that is stored on disk. The values -specified on the command line override the values stored in the dataset. These -properties are reported as "temporary" by the -.Qq Nm Cm get -command. If the properties are changed while the dataset is mounted, the new -setting overrides any temporary settings. -.Ss User Properties -In addition to the standard native properties, -.Tn ZFS -supports arbitrary user properties. User properties have no effect on -.Tn ZFS -behavior, but applications or administrators can use them to annotate datasets -(file systems, volumes, and snapshots). -.Pp -User property names must contain a colon -.Pq Sy \&: -character to distinguish them from native properties. They may contain -lowercase letters, numbers, and the following punctuation characters: colon -.Pq Sy \&: , -dash -.Pq Sy \&- , -period -.Pq Sy \&. -and underscore -.Pq Sy \&_ . -The expected convention is that the property name is divided into two portions -such as -.Em module Ns Sy \&: Ns Em property , -but this namespace is not enforced by -.Tn ZFS . -User property names can be at most 256 characters, and cannot begin with a dash -.Pq Sy \&- . -.Pp -When making programmatic use of user properties, it is strongly suggested to -use a reversed -.Tn DNS -domain name for the -.Ar module -component of property names to reduce the chance that two -independently-developed packages use the same property name for different -purposes. Property names beginning with -.Em com.sun -are reserved for use by Sun Microsystems. -.Pp -The values of user properties are arbitrary strings, are always inherited, and -are never validated. All of the commands that operate on properties -.Po -.Qq Nm Cm list , -.Qq Nm Cm get , -.Qq Nm Cm set -and so forth -.Pc -can be used to manipulate both native properties and user properties. Use the -.Qq Nm Cm inherit -command to clear a user property. If the property is not defined in any parent -dataset, it is removed entirely. Property values are limited to 1024 -characters. -.Sh SUBCOMMANDS -All subcommands that modify state are logged persistently to the pool in their -original form. -.Bl -tag -width 2n -.It Xo -.Nm -.Op Fl \&? -.Xc -.Pp -Displays a help message. -.It Xo -.Nm -.Cm create -.Op Fl pu -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Ar filesystem -.Xc -.Pp -Creates a new -.Tn ZFS -file system. The file system is automatically mounted according to the -.Sy mountpoint -property inherited from the parent. -.Bl -tag -width indent -.It Fl p -Creates all the non-existing parent datasets. Datasets created in this manner -are automatically mounted according to the -.Sy mountpoint -property inherited from their parent. Any property specified on the command -line using the -.Fl o -option is ignored. If the target filesystem already exists, the operation -completes successfully. -.It Fl u -Newly created file system is not mounted. -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property as if the command -.Qq Nm Cm set Ar property Ns = Ns Ar value -was invoked at the same time the dataset was created. Any editable -.Tn ZFS -property can also be set at creation time. Multiple -.Fl o -options can be specified. An error results if the same property is specified in -multiple -.Fl o -options. -.El -.It Xo -.Nm -.Cm create -.Op Fl ps -.Op Fl b Ar blocksize -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Fl V -.Ar size volume -.Xc -.Pp -Creates a volume of the given size. The volume is exported as a block device in -.Pa /dev/zvol/path , -where -.Ar path -is the name of the volume in the -.Tn ZFS -namespace. The size represents the logical size as exported by the device. By -default, a reservation of equal size is created. -.Pp -.Ar size -is automatically rounded up to the nearest 128 Kbytes to ensure that -the volume has an integral number of blocks regardless of -.Ar blocksize . -.Bl -tag -width indent -.It Fl p -Creates all the non-existing parent datasets. Datasets created in this manner -are automatically mounted according to the -.Sy mountpoint -property inherited from their parent. Any property specified on the command -line using the -.Fl o -option is ignored. If the target filesystem already exists, the operation -completes successfully. -.It Fl s -Creates a sparse volume with no reservation. See -.Sy volsize -in the -.Qq Sx Native Properties -section for more information about sparse volumes. -.It Fl b Ar blocksize -Equivalent to -.Fl o Cm volblocksize Ns = Ns Ar blocksize . -If this option is specified in conjunction with -.Fl o Cm volblocksize , -the resulting behavior is undefined. -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property as if the -.Qq Nm Cm set Ar property Ns = Ns Ar value -command was invoked at the same time the dataset was created. Any editable -.Tn ZFS -property can also be set at creation time. Multiple -.Fl o -options can be specified. An error results if the same property is specified in -multiple -.Fl o -options. -.El -.It Xo -.Nm -.Cm destroy -.Op Fl fnpRrv -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Destroys the given dataset. By default, the command unshares any file systems -that are currently shared, unmounts any file systems that are currently -mounted, and refuses to destroy a dataset that has active dependents (children -or clones). -.Bl -tag -width indent -.It Fl r -Recursively destroy all children. -.It Fl R -Recursively destroy all dependents, including cloned file systems outside the -target hierarchy. -.It Fl f -Force an unmount of any file systems using the -.Qq Nm Cm unmount Fl f -command. This option has no effect on non-file systems or unmounted file -systems. -.It Fl n -Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in -conjunction with the -.Fl v -or -.Fl p -flags to determine what data would be deleted. -.It Fl p -Print machine-parsable verbose information about the deleted data. -.It Fl v -Print verbose information about the deleted data. -.El -.Pp -Extreme care should be taken when applying either the -.Fl r -or the -.Fl R -options, as they can destroy large portions of a pool and cause unexpected -behavior for mounted file systems in use. -.It Xo -.Nm -.Cm destroy -.Op Fl dnpRrv -.Sm off -.Ar snapshot -.Op % Ns Ar snapname -.Op , Ns ... -.Sm on -.Xc -.Pp -The given snapshots are destroyed immediately if and only if the -.Qq Nm Cm destroy -command without the -.Fl d -option would have destroyed it. Such immediate destruction would occur, for -example, if the snapshot had no clones and the user-initiated reference count -were zero. -.Pp -If a snapshot does not qualify for immediate destruction, it is marked for -deferred deletion. In this state, it exists as a usable, visible snapshot until -both of the preconditions listed above are met, at which point it is destroyed. -.Pp -An inclusive range of snapshots may be specified by separating the -first and last snapshots with a percent sign -.Pq Sy % . -The first and/or last snapshots may be left blank, in which case the -filesystem's oldest or newest snapshot will be implied. -.Pp -Multiple snapshots -(or ranges of snapshots) of the same filesystem or volume may be specified -in a comma-separated list of snapshots. -Only the snapshot's short name (the -part after the -.Sy @ ) -should be specified when using a range or comma-separated list to identify -multiple snapshots. -.Bl -tag -width indent -.It Fl r -Destroy (or mark for deferred deletion) all snapshots with this name in -descendent file systems. -.It Fl R -Recursively destroy all clones of these snapshots, including the clones, -snapshots, and children. -If this flag is specified, the -.Fl d -flag will have no effect. -.It Fl n -Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in -conjunction with the -.Fl v -or -.Fl p -flags to determine what data would be deleted. -.It Fl p -Print machine-parsable verbose information about the deleted data. -.It Fl v -Print verbose information about the deleted data. -.It Fl d -Defer snapshot deletion. -.El -.Pp -Extreme care should be taken when applying either the -.Fl r -or the -.Fl R -options, as they can destroy large portions of a pool and cause unexpected -behavior for mounted file systems in use. -.It Xo -.Nm -.Cm destroy -.Ar filesystem Ns | Ns Ar volume Ns # Ns Ar bookmark -.Xc -.Pp -The given bookmark is destroyed. -.It Xo -.Nm -.Cm snapshot Ns | Ns Cm snap -.Op Fl r -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Ar filesystem@snapname Ns | Ns volume@snapname -.Ar filesystem@snapname Ns | Ns volume@snapname Ns ... -.Xc -.Pp -Creates snapshots with the given names. All previous modifications by -successful system calls to the file system are part of the snapshots. -Snapshots are taken atomically, so that all snapshots correspond to the same -moment in time. See the -.Qq Sx Snapshots -section for details. -.Bl -tag -width indent -.It Fl r -Recursively create snapshots of all descendent datasets -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property; see -.Qq Nm Cm create -for details. -.El -.It Xo -.Nm -.Cm rollback -.Op Fl rRf -.Ar snapshot -.Xc -.Pp -Roll back the given dataset to a previous snapshot. When a dataset is rolled -back, all data that has changed since the snapshot is discarded, and the -dataset reverts to the state at the time of the snapshot. By default, the -command refuses to roll back to a snapshot other than the most recent one. In -order to do so, all intermediate snapshots and bookmarks must be destroyed -by specifying the -.Fl r -option. -.Pp -The -.Fl rR -options do not recursively destroy the child snapshots of a -recursive snapshot. -Only direct snapshots of the specified filesystem -are destroyed by either of these options. -To completely roll back a -recursive snapshot, you must rollback the individual child snapshots. -.Bl -tag -width indent -.It Fl r -Destroy any snapshots and bookmarks more recent than the one specified. -.It Fl R -Destroy any more recent snapshots and bookmarks, as well as any clones of those -snapshots. -.It Fl f -Used with the -.Fl R -option to force an unmount of any clone file systems that are to be destroyed. -.El -.It Xo -.Nm -.Cm clone -.Op Fl p -.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... -.Ar snapshot filesystem Ns | Ns Ar volume -.Xc -.Pp -Creates a clone of the given snapshot. See the -.Qq Sx Clones -section for details. The target dataset can be located anywhere in the -.Tn ZFS -hierarchy, and is created as the same type as the original. -.Bl -tag -width indent -.It Fl p -Creates all the non-existing parent datasets. Datasets created in this manner -are automatically mounted according to the -.Sy mountpoint -property inherited from their parent. If the target filesystem or volume -already exists, the operation completes successfully. -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property; see -.Qq Nm Cm create -for details. -.El -.It Xo -.Nm -.Cm promote -.Ar clone-filesystem -.Xc -.Pp -Promotes a clone file system to no longer be dependent on its "origin" -snapshot. This makes it possible to destroy the file system that the clone was -created from. The clone parent-child dependency relationship is reversed, so -that the origin file system becomes a clone of the specified file system. -.Pp -The snapshot that was cloned, and any snapshots previous to this snapshot, are -now owned by the promoted clone. The space they use moves from the origin file -system to the promoted clone, so enough space must be available to accommodate -these snapshots. No new space is consumed by this operation, but the space -accounting is adjusted. The promoted clone must not have any conflicting -snapshot names of its own. The -.Cm rename -subcommand can be used to rename any conflicting snapshots. -.It Xo -.Nm -.Cm rename -.Op Fl f -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Xc -.It Xo -.Nm -.Cm rename -.Op Fl f -.Fl p -.Ar filesystem Ns | Ns Ar volume -.Ar filesystem Ns | Ns Ar volume -.Xc -.It Xo -.Nm -.Cm rename -.Fl u -.Op Fl p -.Ar filesystem filesystem -.Xc -.Pp -Renames the given dataset. The new target can be located anywhere in the -.Tn ZFS -hierarchy, with the exception of snapshots. Snapshots can only be renamed -within the parent file system or volume. When renaming a snapshot, the parent -file system of the snapshot does not need to be specified as part of the second -argument. Renamed file systems can inherit new mount points, in which case they -are unmounted and remounted at the new mount point. -.Bl -tag -width indent -.It Fl p -Creates all the nonexistent parent datasets. Datasets created in this manner -are automatically mounted according to the -.Sy mountpoint -property inherited from their parent. -.It Fl u -Do not remount file systems during rename. If a file system's -.Sy mountpoint -property is set to -.Cm legacy -or -.Cm none , -file system is not unmounted even if this option is not given. -.It Fl f -Force unmount any filesystems that need to be unmounted in the process. -This flag has no effect if used together with the -.Fl u -flag. -.El -.It Xo -.Nm -.Cm rename -.Fl r -.Ar snapshot snapshot -.Xc -.Pp -Recursively rename the snapshots of all descendent datasets. Snapshots are the -only dataset that can be renamed recursively. -.It Xo -.Nm -.Cm rename -.Ar bookmark bookmark -.Xc -.Pp -Renames the given bookmark. -Bookmarks can only be renamed within the parent file system or volume. -When renaming a bookmark, the parent file system or volume of the bookmark -does not need to be specified as part of the second argument. -.It Xo -.Nm -.Cm list -.Op Fl r Ns | Ns Fl d Ar depth -.Op Fl Hp -.Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... -.Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -.Oo Fl s Ar property Oc Ns ... -.Oo Fl S Ar property Oc Ns ... -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ... -.Xc -.Pp -Lists the property information for the given datasets in tabular form. If -specified, you can list property information by the absolute pathname or the -relative pathname. By default, all file systems and volumes are displayed. -Snapshots are displayed if the -.Sy listsnaps -property is -.Cm on -(the default is -.Cm off ) . -The following fields are displayed, -.Sy name , used , available , referenced , mountpoint . -.Bl -tag -width indent -.It Fl r -Recursively display any children of the dataset on the command line. -.It Fl d Ar depth -Recursively display any children of the dataset, limiting the recursion to -.Ar depth . -A depth of -.Sy 1 -will display only the dataset and its direct children. -.It Fl H -Used for scripting mode. Do not print headers and separate fields by a single -tab instead of arbitrary white space. -.It Fl p -Display numbers in parsable (exact) values. -.It Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... -A comma-separated list of properties to display. The property must be: -.Bl -bullet -offset 2n -.It -One of the properties described in the -.Qq Sx Native Properties -section -.It -A user property -.It -The value -.Cm name -to display the dataset name -.It -The value -.Cm space -to display space usage properties on file systems and volumes. This is a -shortcut for specifying -.Fl o -.Sy name,avail,used,usedsnap,usedds,usedrefreserv,usedchild -.Fl t -.Sy filesystem,volume -syntax. -.El -.It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -A comma-separated list of types to display, where -.Ar type -is one of -.Sy filesystem , snapshot , snap , volume , bookmark , No or Sy all . -For example, specifying -.Fl t Cm snapshot -displays only snapshots. -.It Fl s Ar property -A property for sorting the output by column in ascending order based on the -value of the property. The property must be one of the properties described in -the -.Qq Sx Properties -section, or the special value -.Cm name -to sort by the dataset name. Multiple properties can be specified at one time -using multiple -.Fl s -property options. Multiple -.Fl s -options are evaluated from left to right in decreasing order of importance. -.Pp -The following is a list of sorting criteria: -.Bl -bullet -offset 2n -.It -Numeric types sort in numeric order. -.It -String types sort in alphabetical order. -.It -Types inappropriate for a row sort that row to the literal bottom, regardless -of the specified ordering. -.It -If no sorting options are specified the existing behavior of -.Qq Nm Cm list -is preserved. -.El -.It Fl S Ar property -Same as the -.Fl s -option, but sorts by property in descending order. -.El -.It Xo -.Nm -.Cm set -.Ar property Ns = Ns Ar value Oo Ar property Ns = Ns Ar value Oc Ns ... -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Xc -.Pp -Sets the property or list of properties to the given value(s) for each dataset. -Only some properties can be edited. See the "Properties" section for more -information on what properties can be set and acceptable values. Numeric values -can be specified as exact values, or in a human-readable form with a suffix of -.Sy B , K , M , G , T , P , E , Z -(for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or -zettabytes, respectively). User properties can be set on snapshots. For more -information, see the -.Qq Sx User Properties -section. -.It Xo -.Nm -.Cm get -.Op Fl r Ns | Ns Fl d Ar depth -.Op Fl Hp -.Op Fl o Ar all | field Ns Oo , Ns Ar field Oc Ns ... -.Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -.Op Fl s Ar source Ns Oo , Ns Ar source Oc Ns ... -.Ar all | property Ns Oo , Ns Ar property Oc Ns ... -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns | Ns Ar bookmark Ns ... -.Xc -.Pp -Displays properties for the given datasets. If no datasets are specified, then -the command displays properties for all datasets on the system. For each -property, the following columns are displayed: -.Pp -.Bl -hang -width "property" -offset indent -compact -.It name -Dataset name -.It property -Property name -.It value -Property value -.It source -Property source. Can either be local, default, temporary, inherited, received, -or none -(\&-). -.El -.Pp -All columns except the -.Sy RECEIVED -column are displayed by default. The columns to display can be specified -by using the -.Fl o -option. This command takes a comma-separated list of properties as described in -the -.Qq Sx Native Properties -and -.Qq Sx User Properties -sections. -.Pp -The special value -.Cm all -can be used to display all properties that apply to the given dataset's type -(filesystem, volume, snapshot, or bookmark). -.Bl -tag -width indent -.It Fl r -Recursively display properties for any children. -.It Fl d Ar depth -Recursively display any children of the dataset, limiting the recursion to -.Ar depth . -A depth of -.Sy 1 -will display only the dataset and its direct children. -.It Fl H -Display output in a form more easily parsed by scripts. Any headers are -omitted, and fields are explicitly separated by a single tab instead of an -arbitrary amount of space. -.It Fl p -Display numbers in parsable (exact) values. -.It Fl o Cm all | Ar field Ns Oo , Ns Ar field Oc Ns ... -A comma-separated list of columns to display. Supported values are -.Sy name,property,value,received,source . -Default values are -.Sy name,property,value,source . -The keyword -.Cm all -specifies all columns. -.It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -A comma-separated list of types to display, where -.Ar type -is one of -.Sy filesystem , snapshot , volume , No or Sy all . -For example, specifying -.Fl t Cm snapshot -displays only snapshots. -.It Fl s Ar source Ns Oo , Ns Ar source Oc Ns ... -A comma-separated list of sources to display. Those properties coming from a -source other than those in this list are ignored. Each source must be one of -the following: -.Sy local,default,inherited,temporary,received,none . -The default value is all sources. -.El -.It Xo -.Nm -.Cm inherit -.Op Fl rS -.Ar property -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ... -.Xc -.Pp -Clears the specified property, causing it to be inherited from an ancestor, -restored to default if no ancestor has the property set, or with the -.Fl S -option reverted to the received value if one exists. -See the -.Qq Sx Properties -section for a listing of default values, and details on which properties can be -inherited. -.Bl -tag -width indent -.It Fl r -Recursively inherit the given property for all children. -.It Fl S -Revert the property to the received value if one exists; otherwise operate as -if the -.Fl S -option was not specified. -.El -.It Xo -.Nm -.Cm remap -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Remap the indirect blocks in the given filesystem or volume so that they no -longer reference blocks on previously removed vdevs and we can eventually -shrink the size of the indirect mapping objects for the previously removed -vdevs. Note that remapping all blocks might not be possible and that -references from snapshots will still exist and cannot be remapped. -.It Xo -.Nm -.Cm upgrade -.Op Fl v -.Xc -.Pp -Displays a list of file systems that are not the most recent version. -.Bl -tag -width indent -.It Fl v -Displays -.Tn ZFS -filesystem versions supported by the current software. The current -.Tn ZFS -filesystem version and all previous supported versions are displayed, along -with an explanation of the features provided with each version. -.El -.It Xo -.Nm -.Cm upgrade -.Op Fl r -.Op Fl V Ar version -.Fl a | Ar filesystem -.Xc -.Pp -Upgrades file systems to a new on-disk version. Once this is done, the file -systems will no longer be accessible on systems running older versions of the -software. -.Qq Nm Cm send -streams generated from new snapshots of these file systems cannot be accessed -on systems running older versions of the software. -.Pp -In general, the file system version is independent of the pool version. See -.Xr zpool 8 -for information on the -.Nm zpool Cm upgrade -command. -.Pp -In some cases, the file system version and the pool version are interrelated -and the pool version must be upgraded before the file system version can be -upgraded. -.Bl -tag -width indent -.It Fl r -Upgrade the specified file system and all descendent file systems. -.It Fl V Ar version -Upgrade to the specified -.Ar version . -If the -.Fl V -flag is not specified, this command upgrades to the most recent version. This -option can only be used to increase the version number, and only up to the most -recent version supported by this software. -.It Fl a -Upgrade all file systems on all imported pools. -.It Ar filesystem -Upgrade the specified file system. -.El -.It Xo -.Nm -.Cm userspace -.Op Fl Hinp -.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... -.Oo Fl s Ar field Oc Ns ... -.Oo Fl S Ar field Oc Ns ... -.Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -.Ar filesystem Ns | Ns Ar snapshot -.Xc -.Pp -Displays space consumed by, and quotas on, each user in the specified -filesystem or snapshot. This corresponds to the -.Sy userused@ Ns Ar user -and -.Sy userquota@ Ns Ar user -properties. -.Bl -tag -width indent -.It Fl n -Print numeric ID instead of user/group name. -.It Fl H -Do not print headers, use tab-delimited output. -.It Fl p -Use exact (parsable) numeric output. -.It Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... -Display only the specified fields from the following set: -.Sy type,name,used,quota . -The default is to display all fields. -.It Fl s Ar field -Sort output by this field. The -.Fl s -and -.Fl S -flags may be specified multiple times to sort first by one field, then by -another. The default is -.Fl s Cm type Fl s Cm name . -.It Fl S Ar field -Sort by this field in reverse order. See -.Fl s . -.It Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -Print only the specified types from the following set: -.Sy all,posixuser,smbuser,posixgroup,smbgroup . -.Pp -The default is -.Fl t Cm posixuser,smbuser . -.Pp -The default can be changed to include group types. -.It Fl i -Translate SID to POSIX ID. This flag currently has no effect on -.Fx . -.El -.It Xo -.Nm -.Cm groupspace -.Op Fl Hinp -.Op Fl o Ar field Ns Oo , Ns Ar field Oc Ns ... -.Oo Fl s Ar field Oc Ns ... -.Oo Fl S Ar field Oc Ns ... -.Op Fl t Ar type Ns Oo , Ns Ar type Oc Ns ... -.Ar filesystem Ns | Ns Ar snapshot -.Xc -.Pp -Displays space consumed by, and quotas on, each group in the specified -filesystem or snapshot. This subcommand is identical to -.Qq Nm Cm userspace , -except that the default types to display are -.Fl t Sy posixgroup,smbgroup . -.It Xo -.Nm -.Cm mount -.Xc -.Pp -Displays all -.Tn ZFS -file systems currently mounted. -.Bl -tag -width indent -.It Fl f -.El -.It Xo -.Nm -.Cm mount -.Op Fl vO -.Op Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... -.Fl a | Ar filesystem -.Xc -.Pp -Mounts -.Tn ZFS -file systems. -.Bl -tag -width indent -.It Fl v -Report mount progress. -.It Fl O -Perform an overlay mount. Overlay mounts are not supported on -.Fx . -.It Fl o Ar property Ns Oo , Ns Ar property Oc Ns ... -An optional, comma-separated list of mount options to use temporarily for the -duration of the mount. See the -.Qq Sx Temporary Mount Point Properties -section for details. -.It Fl a -Mount all available -.Tn ZFS -file systems. -This command may be executed on -.Fx -system startup by -.Pa /etc/rc.d/zfs . -For more information, see variable -.Va zfs_enable -in -.Xr rc.conf 5 . -.It Ar filesystem -Mount the specified filesystem. -.El -.It Xo -.Nm -.Cm unmount Ns | Ns Cm umount -.Op Fl f -.Fl a | Ar filesystem Ns | Ns Ar mountpoint -.Xc -.Pp -Unmounts currently mounted -.Tn ZFS -file systems. -.Bl -tag -width indent -.It Fl f -Forcefully unmount the file system, even if it is currently in use. -.It Fl a -Unmount all available -.Tn ZFS -file systems. -.It Ar filesystem | mountpoint -Unmount the specified filesystem. The command can also be given a path to a -.Tn ZFS -file system mount point on the system. -.El -.It Xo -.Nm -.Cm share -.Fl a | Ar filesystem -.Xc -.Pp -Shares -.Tn ZFS -file systems that have the -.Sy sharenfs -property set. -.Bl -tag -width indent -.It Fl a -Share all -.Tn ZFS -file systems that have the -.Sy sharenfs -property set. -This command may be executed on -.Fx -system startup by -.Pa /etc/rc.d/zfs . -For more information, see variable -.Va zfs_enable -in -.Xr rc.conf 5 . -.It Ar filesystem -Share the specified filesystem according to the -.Tn sharenfs -property. File systems are shared when the -.Tn sharenfs -property is set. -.El -.It Xo -.Nm -.Cm unshare -.Fl a | Ar filesystem Ns | Ns Ar mountpoint -.Xc -.Pp -Unshares -.Tn ZFS -file systems that have the -.Tn sharenfs -property set. -.Bl -tag -width indent -.It Fl a -Unshares -.Tn ZFS -file systems that have the -.Sy sharenfs -property set. -This command may be executed on -.Fx -system shutdown by -.Pa /etc/rc.d/zfs . -For more information, see variable -.Va zfs_enable -in -.Xr rc.conf 5 . -.It Ar filesystem | mountpoint -Unshare the specified filesystem. The command can also be given a path to a -.Tn ZFS -file system shared on the system. -.El -.It Xo -.Nm -.Cm bookmark -.Ar snapshot -.Ar bookmark -.Xc -.Pp -Creates a bookmark of the given snapshot. -Bookmarks mark the point in time -when the snapshot was created, and can be used as the incremental source for -a -.Qq Nm Cm send -command. -.Pp -This feature must be enabled to be used. -See -.Xr zpool-features 7 -for details on ZFS feature flags and the -.Sy bookmark -feature. -.It Xo -.Nm -.Cm send -.Op Fl DLPRVcenpv -.Op Fl i Ar snapshot | Fl I Ar snapshot -.Ar snapshot -.Xc -.Pp -Creates a stream representation of the last -.Ar snapshot -argument (not part of -.Fl i -or -.Fl I ) -which is written to standard output. The output can be redirected to -a file or to a different system (for example, using -.Xr ssh 1 ) . -By default, a full stream is generated. -.Bl -tag -width indent -.It Fl i Ar snapshot -Generate an incremental stream from the first -.Ar snapshot Pq the incremental source -to the second -.Ar snapshot Pq the incremental target . -The incremental source can be specified as the last component of the -snapshot name -.Pq the Em @ No character and following -and -it is assumed to be from the same file system as the incremental target. -.Pp -If the destination is a clone, the source may be the origin snapshot, which -must be fully specified (for example, -.Cm pool/fs@origin , -not just -.Cm @origin ) . -.It Fl I Ar snapshot -Generate a stream package that sends all intermediary snapshots from the first -.Ar snapshot -to the second -.Ar snapshot . -For example, -.Ic -I @a fs@d -is similar to -.Ic -i @a fs@b; -i @b fs@c; -i @c fs@d . -The incremental -source may be specified as with the -.Fl i -option. -.It Fl R, -replicate -Generate a replication stream package, which will replicate the specified -filesystem, and all descendent file systems, up to the named snapshot. When -received, all properties, snapshots, descendent file systems, and clones are -preserved. -.Pp -If the -.Fl i -or -.Fl I -flags are used in conjunction with the -.Fl R -flag, an incremental replication stream is generated. The current values of -properties, and current snapshot and file system names are set when the stream -is received. If the -.Fl F -flag is specified when this stream is received, snapshots and file systems that -do not exist on the sending side are destroyed. -.It Fl D, -dedup -Generate a deduplicated stream. Blocks which would have been sent multiple -times in the send stream will only be sent once. The receiving system must -also support this feature to receive a deduplicated stream. This flag can -be used regardless of the dataset's -.Sy dedup -property, but performance will be much better if the filesystem uses a -dedup-capable checksum (eg. -.Sy sha256 ) . -.It Fl L, -large-block -Generate a stream which may contain blocks larger than 128KB. -This flag -has no effect if the -.Sy large_blocks -pool feature is disabled, or if the -.Sy recordsize -property of this filesystem has never been set above 128KB. -The receiving system must have the -.Sy large_blocks -pool feature enabled as well. -See -.Xr zpool-features 7 -for details on ZFS feature flags and the -.Sy large_blocks -feature. -.It Fl e, -embed -Generate a more compact stream by using WRITE_EMBEDDED records for blocks -which are stored more compactly on disk by the -.Sy embedded_data -pool -feature. -This flag has no effect if the -.Sy embedded_data -feature is -disabled. -The receiving system must have the -.Sy embedded_data -feature -enabled. -If the -.Sy lz4_compress -feature is active on the sending system, -then the receiving system must have that feature enabled as well. -See -.Xr zpool-features 7 -for details on ZFS feature flags and the -.Sy embedded_data -feature. -.It Fl c, -compressed -Generate a more compact stream by using compressed WRITE records for blocks -which are compressed on disk and in memory (see the -.Sy compression -property for details). -If the -.Sy lz4_compress -feature is active on the sending system, then the receiving system must have that -feature enabled as well. If the -.Sy large_blocks -feature is enabled on the sending system but the -.Fl L -option is not supplied in conjunction with -.Fl c -then the data will be decompressed before sending so it can be split -into smaller block sizes. -.It Fl p, -props -Include the dataset's properties in the stream. This flag is implicit when -.Fl R -is specified. The receiving system must also support this feature. -.It Fl n, -dryrun -Do a dry-run ("No-op") send. Do not generate any actual send data. This is -useful in conjunction with the -.Fl v -or -.Fl P -flags to determine what data will be sent. -In this case, the verbose output will be written to -standard output (contrast with a non-dry-run, where the stream is written -to standard output and the verbose output goes to standard error). -.It Fl P, -parsable -Print machine-parsable verbose information about the stream package generated. -.It Fl v, -verbose -Print verbose information about the stream package generated. -This information includes a per-second report of how much data has been sent. -.It Fl V -Set the process title to a per-second report of how much data has been sent. -.El -.Pp -The format of the stream is committed. You will be able to receive your streams -on future versions of -.Tn ZFS . -.It Xo -.Nm -.Cm send -.Op Fl LPcenv -.Op Fl i Ar snapshot Ns | Ns Ar bookmark -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Xc -.Pp -Generate a send stream, which may be of a filesystem, and may be -incremental from a bookmark. -If the destination is a filesystem or volume, -the pool must be read-only, or the filesystem must not be mounted. -When the -stream generated from a filesystem or volume is received, the default snapshot -name will be -.Pq --head-- . -.Bl -tag -width indent -.It Fl i Ar snapshot Ns | Ns Ar bookmark -Generate an incremental send stream. -The incremental source must be an earlier -snapshot in the destination's history. -It will commonly be an earlier -snapshot in the destination's filesystem, in which case it can be -specified as the last component of the name -.Pq the Em # No or Em @ No character and following . -.Pp -If the incremental target is a clone, the incremental source can -be the origin snapshot, or an earlier snapshot in the origin's filesystem, -or the origin's origin, etc. -.It Fl n, -dryrun -Do a dry-run -.Pq Qq No-op -send. -Do not generate any actual send data. -This is useful in conjunction with the -.Fl v -or -.Fl P -flags to determine what data will be sent. -In this case, the verbose output will be written to standard output -.Po contrast with a non-dry-run, where the stream is written to standard output -and the verbose output goes to standard error -.Pc . -.It Fl v, -verbose -Print verbose information about the stream package generated. -This information includes a per-second report of how much data has been sent. -.It Fl L, -large-block -Generate a stream which may contain blocks larger than 128KB. -This flag -has no effect if the -.Sy large_blocks -pool feature is disabled, or if the -.Sy recordsize -property of this filesystem has never been set above 128KB. -The receiving system must have the -.Sy large_blocks -pool feature enabled as well. -See -.Xr zpool-features 7 -for details on ZFS feature flags and the -.Sy large_blocks -feature. -.It Fl P, -parsable -Print machine-parsable verbose information about the stream package generated. -.It Fl c, -compressed -Generate a more compact stream by using compressed WRITE records for blocks -which are compressed on disk and in memory (see the -.Sy compression -property for details). If the -.Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. If the -.Sy large_blocks -feature is enabled on the sending system but the -.Fl L -option is not supplied in conjunction with -.Fl c -then the data will be decompressed before sending so it can be split -into smaller block sizes. -.It Fl e, -embed -Generate a more compact stream by using WRITE_EMBEDDED records for blocks -which are stored more compactly on disk by the -.Sy embedded_data -pool -feature. -This flag has no effect if the -.Sy embedded_data -feature is -disabled. -The receiving system must have the -.Sy embedded_data -feature -enabled. -If the -.Sy lz4_compress -feature is active on the sending system, -then the receiving system must have that feature enabled as well. -See -.Xr zpool-features 7 -for details on ZFS feature flags and the -.Sy embedded_data -feature. -.El -.It Xo -.Nm -.Cm send -.Op Fl Penv -.Fl t -.Ar receive_resume_token -.Xc -Creates a send stream which resumes an interrupted receive. The -.Ar receive_resume_token -is the value of this property on the filesystem -or volume that was being received into. See the documentation for -.Sy zfs receive -s -for more details. -.It Xo -.Nm -.Cm receive Ns | Ns Cm recv -.Op Fl vnsFMu -.Op Fl o Sy origin Ns = Ns Ar snapshot -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot -.Xc -.It Xo -.Nm -.Cm receive Ns | Ns Cm recv -.Op Fl vnsFMu -.Op Fl d | e -.Op Fl o Sy origin Ns = Ns Ar snapshot -.Ar filesystem -.Xc -.Pp -Creates a snapshot whose contents are as specified in the stream provided on -standard input. If a full stream is received, then a new file system is created -as well. Streams are created using the -.Qq Nm Cm send -subcommand, which by default creates a full stream. -.Qq Nm Cm recv -can be used as an alias for -.Qq Nm Cm receive . -.Pp -If an incremental stream is received, then the destination file system must -already exist, and its most recent snapshot must match the incremental stream's -source. For -.Sy zvol Ns s, -the destination device link is destroyed and recreated, which means the -.Sy zvol -cannot be accessed during the -.Sy receive -operation. -.Pp -When a snapshot replication package stream that is generated by using the -.Qq Nm Cm send Fl R -command is received, any snapshots that do not exist on the sending location -are destroyed by using the -.Qq Nm Cm destroy Fl d -command. -.Pp -The name of the snapshot (and file system, if a full stream is received) that -this subcommand creates depends on the argument type and the -.Fl d -or -.Fl e -option. -.Pp -If the argument is a snapshot name, the specified -.Ar snapshot -is created. If the argument is a file system or volume name, a snapshot with -the same name as the sent snapshot is created within the specified -.Ar filesystem -or -.Ar volume . -If the -.Fl d -or -.Fl e -option is specified, the snapshot name is determined by appending the sent -snapshot's name to the specified -.Ar filesystem . -If the -.Fl d -option is specified, all but the pool name of the sent snapshot path is -appended (for example, -.Sy b/c@1 -appended from sent snapshot -.Sy a/b/c@1 ) , -and if the -.Fl e -option is specified, only the tail of the sent snapshot path is appended (for -example, -.Sy c@1 -appended from sent snapshot -.Sy a/b/c@1 ) . -In the case of -.Fl d , -any file systems needed to replicate the path of the sent snapshot are created -within the specified file system. -.Bl -tag -width indent -.It Fl d -Use the full sent snapshot path without the first element (without pool name) -to determine the name of the new snapshot as described in the paragraph above. -.It Fl e -Use only the last element of the sent snapshot path to determine the name of -the new snapshot as described in the paragraph above. -.It Fl u -File system that is associated with the received stream is not mounted. -.It Fl v -Print verbose information about the stream and the time required to perform the -receive operation. -.It Fl n -Do not actually receive the stream. This can be useful in conjunction with the -.Fl v -option to verify the name the receive operation would use. -.It Fl o Sy origin Ns = Ns Ar snapshot -Forces the stream to be received as a clone of the given snapshot. -If the stream is a full send stream, this will create the filesystem -described by the stream as a clone of the specified snapshot. Which -snapshot was specified will not affect the success or failure of the -receive, as long as the snapshot does exist. If the stream is an -incremental send stream, all the normal verification will be performed. -.It Fl F -Force a rollback of the file system to the most recent snapshot before -performing the receive operation. If receiving an incremental replication -stream (for example, one generated by -.Qq Nm Cm send Fl R Bro Fl i | Fl I Brc ) , -destroy snapshots and file systems that do not exist on the sending side. -.It Fl M -Force an unmount of the file system while receiving a snapshot. -This option is not supported on Linux. -.It Fl s -If the receive is interrupted, save the partially received state, rather -than deleting it. Interruption may be due to premature termination of -the stream -.Po e.g. due to network failure or failure of the remote system -if the stream is being read over a network connection -.Pc , -a checksum error in the stream, termination of the -.Nm zfs Cm receive -process, or unclean shutdown of the system. -.Pp -The receive can be resumed with a stream generated by -.Nm zfs Cm send Fl t Ar token , -where the -.Ar token -is the value of the -.Sy receive_resume_token -property of the filesystem or volume which is received into. -.Pp -To use this flag, the storage pool must have the -.Sy extensible_dataset -feature enabled. See -.Xr zpool-features 7 -for details on ZFS feature flags. -.El -.It Xo -.Nm -.Cm receive Ns | Ns Cm recv -.Fl A -.Ar filesystem Ns | Ns Ar volume -.Xc -Abort an interrupted -.Nm zfs Cm receive Fl s , -deleting its saved partially received state. -.It Xo -.Nm -.Cm allow -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Displays permissions that have been delegated on the specified filesystem or -volume. See the other forms of -.Qq Nm Cm allow -for more information. -.It Xo -.Nm -.Cm allow -.Op Fl ldug -.Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ... -.Ar perm Ns | Ns Ar @setname Ns -.Oo Ns , Ns Ar perm Ns | Ns Ar @setname Oc Ns ... -.Ar filesystem Ns | Ns Ar volume -.Xc -.It Xo -.Nm -.Cm allow -.Op Fl ld -.Fl e Ns | Ns Cm everyone -.Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Delegates -.Tn ZFS -administration permission for the file systems to non-privileged users. -.Bl -tag -width indent -.It Xo -.Op Fl ug -.Ar user Ns | Ns Ar group Ns Oo , Ar user Ns | Ns Ar group Oc Ns ... -.Xc -Specifies to whom the permissions are delegated. Multiple entities can be -specified as a comma-separated list. If neither of the -.Fl ug -options are specified, then the argument is interpreted preferentially as the -keyword -.Cm everyone , -then as a user name, and lastly as a group name. To specify -a user or group named -.Qq everyone , -use the -.Fl u -or -.Fl g -options. To specify a group with the same name as a user, use the -.Fl g -option. -.It Op Fl e Ns | Ns Cm everyone -Specifies that the permissions be delegated to -.Qq everyone . -.It Xo -.Ar perm Ns | Ns Ar @setname Ns Oo , Ns Ar perm Ns | Ns Ar @setname Oc Ns ... -.Xc -The permissions to delegate. Multiple permissions -may be specified as a comma-separated list. Permission names are the same as -.Tn ZFS -subcommand and property names. See the property list below. Property set names, -which begin with an at sign -.Pq Sy @ , -may be specified. See the -.Fl s -form below for details. -.It Xo -.Op Fl ld -.Ar filesystem Ns | Ns Ar volume -.Xc -Specifies where the permissions are delegated. If neither of the -.Fl ld -options are specified, or both are, then the permissions are allowed for the -file system or volume, and all of its descendents. If only the -.Fl l -option is used, then is allowed "locally" only for the specified file system. -If only the -.Fl d -option is used, then is allowed only for the descendent file systems. -.El -.Pp -Permissions are generally the ability to use a -.Tn ZFS -subcommand or change a -.Tn ZFS -property. The following permissions are available: -.Bl -column -offset 4n "secondarycache" "subcommand" -.It NAME Ta TYPE Ta NOTES -.It allow Ta subcommand Ta Must Xo -also have the permission that is being allowed -.Xc -.It clone Ta subcommand Ta Must Xo -also have the 'create' ability and 'mount' ability in the origin file system -.Xc -.It create Ta subcommand Ta Must also have the 'mount' ability -.It destroy Ta subcommand Ta Must also have the 'mount' ability -.It diff Ta subcommand Ta Allows lookup of paths within a dataset given an -object number, and the ability to create snapshots necessary to 'zfs diff' -.It hold Ta subcommand Ta Allows adding a user hold to a snapshot -.It mount Ta subcommand Ta Allows mount/umount of Tn ZFS No datasets -.It promote Ta subcommand Ta Must Xo -also have the 'mount' and 'promote' ability in the origin file system -.Xc -.It receive Ta subcommand Ta Must also have the 'mount' and 'create' ability -.It release Ta subcommand Ta Allows Xo -releasing a user hold which might destroy the snapshot -.Xc -.It rename Ta subcommand Ta Must Xo -also have the 'mount' and 'create' ability in the new parent -.Xc -.It rollback Ta subcommand Ta Must also have the 'mount' ability -.It send Ta subcommand -.It share Ta subcommand Ta Allows Xo -sharing file systems over the -.Tn NFS -protocol -.Xc -.It snapshot Ta subcommand Ta Must also have the 'mount' ability -.It groupquota Ta other Ta Allows accessing any groupquota@... property -.It groupused Ta other Ta Allows reading any groupused@... property -.It userprop Ta other Ta Allows changing any user property -.It userquota Ta other Ta Allows accessing any userquota@... property -.It userused Ta other Ta Allows reading any userused@... property -.It aclinherit Ta property -.It aclmode Ta property -.It atime Ta property -.It canmount Ta property -.It casesensitivity Ta property -.It checksum Ta property -.It compression Ta property -.It copies Ta property -.It dedup Ta property -.It devices Ta property -.It exec Ta property -.It filesystem_limit Ta property -.It logbias Ta property -.It jailed Ta property -.It mlslabel Ta property -.It mountpoint Ta property -.It nbmand Ta property -.It normalization Ta property -.It primarycache Ta property -.It quota Ta property -.It readonly Ta property -.It recordsize Ta property -.It refquota Ta property -.It refreservation Ta property -.It reservation Ta property -.It secondarycache Ta property -.It setuid Ta property -.It sharenfs Ta property -.It sharesmb Ta property -.It snapdir Ta property -.It snapshot_limit Ta property -.It sync Ta property -.It utf8only Ta property -.It version Ta property -.It volblocksize Ta property -.It volsize Ta property -.It vscan Ta property -.It xattr Ta property -.El -.It Xo -.Nm -.Cm allow -.Fl c -.Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Sets "create time" permissions. These permissions are granted (locally) to the -creator of any newly-created descendent file system. -.It Xo -.Nm -.Cm allow -.Fl s -.Ar @setname -.Ar perm Ns | Ns Ar @setname Ns Op Ns , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Defines or adds permissions to a permission set. The set can be used by other -.Qq Nm Cm allow -commands for the specified file system and its descendents. Sets are evaluated -dynamically, so changes to a set are immediately reflected. Permission sets -follow the same naming restrictions as ZFS file systems, but the name must -begin with an "at sign" -.Pq Sy @ , -and can be no more than 64 characters long. -.It Xo -.Nm -.Cm unallow -.Op Fl rldug -.Ar user Ns | Ns Ar group Ns Oo Ns , Ns Ar user Ns | Ns Ar group Oc Ns ... -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Xc -.It Xo -.Nm -.Cm unallow -.Op Fl rld -.Fl e Ns | Ns Cm everyone -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Xc -.It Xo -.Nm -.Cm unallow -.Op Fl r -.Fl c -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Removes permissions that were granted with the -.Qq Nm Cm allow -command. No permissions are explicitly denied, so other permissions granted are -still in effect. For example, if the permission is granted by an ancestor. If -no permissions are specified, then all permissions for the specified -.Ar user , group , No or everyone -are removed. Specifying -.Cm everyone -.Po or using the Fl e -option -.Pc only removes the permissions that were granted to everyone , -not all permissions for every user and group. See the -.Qq Nm Cm allow -command for a description of the -.Fl ldugec -options. -.Bl -tag -width indent -.It Fl r -Recursively remove the permissions from this file system and all descendents. -.El -.It Xo -.Nm -.Cm unallow -.Op Fl r -.Fl s -.Ar @setname -.Oo Ar perm Ns | Ns Ar @setname Ns Op , Ns Ar perm Ns | Ns Ar @setname Ns -.Ns ... Oc -.Ar filesystem Ns | Ns Ar volume -.Xc -.Pp -Removes permissions from a permission set. If no permissions are specified, -then all permissions are removed, thus removing the set entirely. -.It Xo -.Nm -.Cm hold -.Op Fl r -.Ar tag snapshot Ns ... -.Xc -.Pp -Adds a single reference, named with the -.Ar tag -argument, to the specified snapshot or snapshots. Each snapshot has its own tag -namespace, and tags must be unique within that space. -.Pp -If a hold exists on a snapshot, attempts to destroy that snapshot by using the -.Qq Nm Cm destroy -command returns -.Em EBUSY . -.Bl -tag -width indent -.It Fl r -Specifies that a hold with the given tag is applied recursively to the -snapshots of all descendent file systems. -.El -.It Xo -.Nm -.Cm holds -.Op Fl Hp -.Op Fl r Ns | Ns Fl d Ar depth -.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns -.Ns ... -.Xc -.Pp -Lists all existing user references for the given dataset or datasets. -.Bl -tag -width indent -.It Fl H -Used for scripting mode. Do not print headers and separate fields by a single -tab instead of arbitrary white space. -.It Fl p -Display numbers in parsable (exact) values. -.It Fl r -Lists the holds that are set on the descendent snapshots of the named datasets -or snapshots, in addition to listing the holds on the named snapshots, if any. -.It Fl d Ar depth -Recursively display any holds on the named snapshots, or descendent snapshots of -the named datasets or snapshots, limiting the recursion to -.Ar depth . -.El -.It Xo -.Nm -.Cm release -.Op Fl r -.Ar tag snapshot Ns ... -.Xc -.Pp -Removes a single reference, named with the -.Ar tag -argument, from the specified snapshot or snapshots. The tag must already exist -for each snapshot. -.Bl -tag -width indent -.It Fl r -Recursively releases a hold with the given tag on the snapshots of all -descendent file systems. -.El -.It Xo -.Nm -.Cm diff -.Op Fl FHt -.Ar snapshot -.Op Ar snapshot Ns | Ns Ar filesystem -.Xc -.Pp -Display the difference between a snapshot of a given filesystem and another -snapshot of that filesystem from a later time or the current contents of the -filesystem. The first column is a character indicating the type of change, -the other columns indicate pathname, new pathname -.Pq in case of rename , -change in link count, and optionally file type and/or change time. -.Pp -The types of change are: -.Bl -column -offset 2n indent -.It \&- Ta path was removed -.It \&+ Ta path was added -.It \&M Ta path was modified -.It \&R Ta path was renamed -.El -.Bl -tag -width indent -.It Fl F -Display an indication of the type of file, in a manner similar to the -.Fl F -option of -.Xr ls 1 . -.Bl -column -offset 2n indent -.It \&B Ta block device -.It \&C Ta character device -.It \&F Ta regular file -.It \&/ Ta directory -.It \&@ Ta symbolic link -.It \&= Ta socket -.It \&> Ta door (not supported on Fx ) -.It \&| Ta named pipe (not supported on Fx ) -.It \&P Ta event port (not supported on Fx ) -.El -.It Fl H -Give more parsable tab-separated output, without header lines and without -arrows. -.It Fl t -Display the path's inode change time as the first column of output. -.El -.It Xo -.Nm -.Cm program -.Op Fl jn -.Op Fl t Ar timeout -.Op Fl m Ar memory_limit -.Ar pool script -.Op Ar arg1 No ... -.Xc -.Pp -Executes -.Ar script -as a ZFS channel program on -.Ar pool . -The ZFS channel -program interface allows ZFS administrative operations to be run -programmatically via a Lua script. -The entire script is executed atomically, with no other administrative -operations taking effect concurrently. -A library of ZFS calls is made available to channel program scripts. -Channel programs may only be run with root privileges. -.Pp -For full documentation of the ZFS channel program interface, see the manual -page for -.Xr zfs-program 8 . -.Bl -tag -width indent -.It Fl j -Display channel program output in JSON format. -When this flag is specified and standard output is empty - -channel program encountered an error. -The details of such an error will be printed to standard error in plain text. -.It Fl n -Executes a read-only channel program, which runs faster. -The program cannot change on-disk state by calling functions from -the zfs.sync submodule. -The program can be used to gather information such as properties and -determining if changes would succeed (zfs.check.*). -Without this flag, all pending changes must be synced to disk before -a channel program can complete. -.It Fl t Ar timeout -Execution time limit, in milliseconds. -If a channel program executes for longer than the provided timeout, it will -be stopped and an error will be returned. -The default timeout is 1000 ms, and can be set to a maximum of 10000 ms. -.It Fl m Ar memory-limit -Memory limit, in bytes. -If a channel program attempts to allocate more memory than the given limit, -it will be stopped and an error returned. -The default memory limit is 10 MB, and can be set to a maximum of 100 MB. -.Pp -All remaining argument strings are passed directly to the channel program as -arguments. -See -.Xr zfs-program 8 -for more information. -.El -.It Xo -.Nm -.Cm jail -.Ar jailid filesystem -.Xc -.Pp -Attaches the specified -.Ar filesystem -to the jail identified by JID -.Ar jailid . -From now on this file system tree can be managed from within a jail if the -.Sy jailed -property has been set. To use this functionality, the jail needs the -.Va allow.mount -and -.Va allow.mount.zfs -parameters set to 1 and the -.Va enforce_statfs -parameter set to a value lower than 2. -.Pp -See -.Xr jail 8 -for more information on managing jails and configuring the parameters above. -.It Xo -.Nm -.Cm unjail -.Ar jailid filesystem -.Xc -.Pp -Detaches the specified -.Ar filesystem -from the jail identified by JID -.Ar jailid . -.El -.Sh EXIT STATUS -The following exit values are returned: -.Bl -tag -offset 2n -width 2n -.It 0 -Successful completion. -.It 1 -An error occurred. -.It 2 -Invalid command line options were specified. -.El -.Sh EXAMPLES -.Bl -tag -width 0n -.It Sy Example 1 No Creating a Tn ZFS No File System Hierarchy -.Pp -The following commands create a file system named -.Em pool/home -and a file system named -.Em pool/home/bob . -The mount point -.Pa /home -is set for the parent file system, and is automatically inherited by the child -file system. -.Bd -literal -offset 2n -.Li # Ic zfs create pool/home -.Li # Ic zfs set mountpoint=/home pool/home -.Li # Ic zfs create pool/home/bob -.Ed -.It Sy Example 2 No Creating a Tn ZFS No Snapshot -.Pp -The following command creates a snapshot named -.Sy yesterday . -This snapshot is mounted on demand in the -.Pa \&.zfs/snapshot -directory at the root of the -.Em pool/home/bob -file system. -.Bd -literal -offset 2n -.Li # Ic zfs snapshot pool/home/bob@yesterday -.Ed -.It Sy Example 3 No Creating and Destroying Multiple Snapshots -.Pp -The following command creates snapshots named -.Em yesterday -of -.Em pool/home -and all of its descendent file systems. Each snapshot is mounted on demand in -the -.Pa \&.zfs/snapshot -directory at the root of its file system. The second command destroys the newly -created snapshots. -.Bd -literal -offset 2n -.Li # Ic zfs snapshot -r pool/home@yesterday -.Li # Ic zfs destroy -r pool/home@yesterday -.Ed -.It Sy Example 4 No Disabling and Enabling File System Compression -.Pp -The following command disables the -.Sy compression -property for all file systems under -.Em pool/home . -The next command explicitly enables -.Sy compression -for -.Em pool/home/anne . -.Bd -literal -offset 2n -.Li # Ic zfs set compression=off pool/home -.Li # Ic zfs set compression=on pool/home/anne -.Ed -.It Sy Example 5 No Listing Tn ZFS No Datasets -.Pp -The following command lists all active file systems and volumes in the system. -Snapshots are displayed if the -.Sy listsnaps -property is -.Cm on . -The default is -.Cm off . -See -.Xr zpool 8 -for more information on pool properties. -.Bd -literal -offset 2n -.Li # Ic zfs list - NAME USED AVAIL REFER MOUNTPOINT - pool 450K 457G 18K /pool - pool/home 315K 457G 21K /home - pool/home/anne 18K 457G 18K /home/anne - pool/home/bob 276K 457G 276K /home/bob -.Ed -.It Sy Example 6 No Setting a Quota on a Tn ZFS No File System -.Pp -The following command sets a quota of 50 Gbytes for -.Em pool/home/bob . -.Bd -literal -offset 2n -.Li # Ic zfs set quota=50G pool/home/bob -.Ed -.It Sy Example 7 No Listing Tn ZFS No Properties -.Pp -The following command lists all properties for -.Em pool/home/bob . -.Bd -literal -offset 2n -.Li # Ic zfs get all pool/home/bob -NAME PROPERTY VALUE SOURCE -pool/home/bob type filesystem - -pool/home/bob creation Tue Jul 21 15:53 2009 - -pool/home/bob used 21K - -pool/home/bob available 20.0G - -pool/home/bob referenced 21K - -pool/home/bob compressratio 1.00x - -pool/home/bob mounted yes - -pool/home/bob quota 20G local -pool/home/bob reservation none default -pool/home/bob recordsize 128K default -pool/home/bob mountpoint /home/bob default -pool/home/bob sharenfs off default -pool/home/bob checksum on default -pool/home/bob compression on local -pool/home/bob atime on default -pool/home/bob devices on default -pool/home/bob exec on default -pool/home/bob filesystem_limit none default -pool/home/bob setuid on default -pool/home/bob readonly off default -pool/home/bob jailed off default -pool/home/bob snapdir hidden default -pool/home/bob snapshot_limit none default -pool/home/bob aclmode discard default -pool/home/bob aclinherit restricted default -pool/home/bob canmount on default -pool/home/bob xattr on default -pool/home/bob copies 1 default -pool/home/bob version 5 - -pool/home/bob utf8only off - -pool/home/bob normalization none - -pool/home/bob casesensitivity sensitive - -pool/home/bob vscan off default -pool/home/bob nbmand off default -pool/home/bob sharesmb off default -pool/home/bob refquota none default -pool/home/bob refreservation none default -pool/home/bob primarycache all default -pool/home/bob secondarycache all default -pool/home/bob usedbysnapshots 0 - -pool/home/bob usedbydataset 21K - -pool/home/bob usedbychildren 0 - -pool/home/bob usedbyrefreservation 0 - -pool/home/bob logbias latency default -pool/home/bob dedup off default -pool/home/bob mlslabel - -pool/home/bob sync standard default -pool/home/bob refcompressratio 1.00x - -.Ed -.Pp -The following command gets a single property value. -.Bd -literal -offset 2n -.Li # Ic zfs get -H -o value compression pool/home/bob -on -.Ed -.Pp -The following command lists all properties with local settings for -.Em pool/home/bob . -.Bd -literal -offset 2n -.Li # Ic zfs get -s local -o name,property,value all pool/home/bob -NAME PROPERTY VALUE -pool/home/bob quota 20G -pool/home/bob compression on -.Ed -.It Sy Example 8 No Rolling Back a Tn ZFS No File System -.Pp -The following command reverts the contents of -.Em pool/home/anne -to the snapshot named -.Em yesterday , -deleting all intermediate snapshots. -.Bd -literal -offset 2n -.Li # Ic zfs rollback -r pool/home/anne@yesterday -.Ed -.It Sy Example 9 No Creating a Tn ZFS No Clone -.Pp -The following command creates a writable file system whose initial contents are -the same as -.Em pool/home/bob@yesterday . -.Bd -literal -offset 2n -.Li # Ic zfs clone pool/home/bob@yesterday pool/clone -.Ed -.It Sy Example 10 No Promoting a Tn ZFS No Clone -.Pp -The following commands illustrate how to test out changes to a file system, and -then replace the original file system with the changed one, using clones, clone -promotion, and renaming: -.Bd -literal -offset 2n -.Li # Ic zfs create pool/project/production -.Ed -.Pp -Populate -.Pa /pool/project/production -with data and continue with the following commands: -.Bd -literal -offset 2n -.Li # Ic zfs snapshot pool/project/production@today -.Li # Ic zfs clone pool/project/production@today pool/project/beta -.Ed -.Pp -Now make changes to -.Pa /pool/project/beta -and continue with the following commands: -.Bd -literal -offset 2n -.Li # Ic zfs promote pool/project/beta -.Li # Ic zfs rename pool/project/production pool/project/legacy -.Li # Ic zfs rename pool/project/beta pool/project/production -.Ed -.Pp -Once the legacy version is no longer needed, it can be destroyed. -.Bd -literal -offset 2n -.Li # Ic zfs destroy pool/project/legacy -.Ed -.It Sy Example 11 No Inheriting Tn ZFS No Properties -.Pp -The following command causes -.Em pool/home/bob -and -.Em pool/home/anne -to inherit the -.Sy checksum -property from their parent. -.Bd -literal -offset 2n -.Li # Ic zfs inherit checksum pool/home/bob pool/home/anne -.Ed -.It Sy Example 12 No Remotely Replicating Tn ZFS No Data -.Pp -The following commands send a full stream and then an incremental stream to a -remote machine, restoring them into -.Sy poolB/received/fs@a -and -.Sy poolB/received/fs@b , -respectively. -.Sy poolB -must contain the file system -.Sy poolB/received , -and must not initially contain -.Sy poolB/received/fs . -.Bd -literal -offset 2n -.Li # Ic zfs send pool/fs@a | ssh host zfs receive poolB/received/fs@a -.Li # Ic zfs send -i a pool/fs@b | ssh host zfs receive poolB/received/fs -.Ed -.It Xo -.Sy Example 13 -Using the -.Qq zfs receive -d -Option -.Xc -.Pp -The following command sends a full stream of -.Sy poolA/fsA/fsB@snap -to a remote machine, receiving it into -.Sy poolB/received/fsA/fsB@snap . -The -.Sy fsA/fsB@snap -portion of the received snapshot's name is determined from the name of the sent -snapshot. -.Sy poolB -must contain the file system -.Sy poolB/received . -If -.Sy poolB/received/fsA -does not exist, it is created as an empty file system. -.Bd -literal -offset 2n -.Li # Ic zfs send poolA/fsA/fsB@snap | ssh host zfs receive -d poolB/received -.Ed -.It Sy Example 14 No Setting User Properties -.Pp -The following example sets the user-defined -.Sy com.example:department -property for a dataset. -.Bd -literal -offset 2n -.Li # Ic zfs set com.example:department=12345 tank/accounting -.Ed -.It Sy Example 15 No Performing a Rolling Snapshot -.Pp -The following example shows how to maintain a history of snapshots with a -consistent naming scheme. To keep a week's worth of snapshots, the user -destroys the oldest snapshot, renames the remaining snapshots, and then creates -a new snapshot, as follows: -.Bd -literal -offset 2n -.Li # Ic zfs destroy -r pool/users@7daysago -.Li # Ic zfs rename -r pool/users@6daysago @7daysago -.Li # Ic zfs rename -r pool/users@5daysago @6daysago -.Li # Ic zfs rename -r pool/users@4daysago @5daysago -.Li # Ic zfs rename -r pool/users@3daysago @4daysago -.Li # Ic zfs rename -r pool/users@2daysago @3daysago -.Li # Ic zfs rename -r pool/users@yesterday @2daysago -.Li # Ic zfs rename -r pool/users@today @yesterday -.Li # Ic zfs snapshot -r pool/users@today -.Ed -.It Xo -.Sy Example 16 -Setting -.Qq sharenfs -Property Options on a ZFS File System -.Xc -.Pp -The following command shows how to set -.Sy sharenfs -property options to enable root access for a specific network on the -.Em tank/home -file system. The contents of the -.Sy sharenfs -property are valid -.Xr exports 5 -options. -.Bd -literal -offset 2n -.Li # Ic zfs set sharenfs="maproot=root,network 192.168.0.0/24" tank/home -.Ed -.Pp -Another way to write this command with the same result is: -.Bd -literal -offset 2n -.Li # Ic set zfs sharenfs="-maproot=root -network 192.168.0.0/24" tank/home -.Ed -.It Xo -.Sy Example 17 -Delegating -.Tn ZFS -Administration Permissions on a -.Tn ZFS -Dataset -.Xc -.Pp -The following example shows how to set permissions so that user -.Em cindys -can create, destroy, mount, and take snapshots on -.Em tank/cindys . -The permissions on -.Em tank/cindys -are also displayed. -.Bd -literal -offset 2n -.Li # Ic zfs allow cindys create,destroy,mount,snapshot tank/cindys -.Li # Ic zfs allow tank/cindys ----- Permissions on tank/cindys -------------------------------------- -Local+Descendent permissions: - user cindys create,destroy,mount,snapshot -.Ed -.It Sy Example 18 No Delegating Create Time Permissions on a Tn ZFS No Dataset -.Pp -The following example shows how to grant anyone in the group -.Em staff -to create file systems in -.Em tank/users . -This syntax also allows staff members to destroy their own file systems, but -not destroy anyone else's file system. The permissions on -.Em tank/users -are also displayed. -.Bd -literal -offset 2n -.Li # Ic zfs allow staff create,mount tank/users -.Li # Ic zfs allow -c destroy tank/users -.Li # Ic zfs allow tank/users ----- Permissions on tank/users --------------------------------------- -Permission sets: - destroy -Local+Descendent permissions: - group staff create,mount -.Ed -.It Xo -.Sy Example 19 -Defining and Granting a Permission Set on a -.Tn ZFS -Dataset -.Xc -.Pp -The following example shows how to define and grant a permission set on the -.Em tank/users -file system. The permissions on -.Em tank/users -are also displayed. -.Bd -literal -offset 2n -.Li # Ic zfs allow -s @pset create,destroy,snapshot,mount tank/users -.Li # Ic zfs allow staff @pset tank/users -.Li # Ic zfs allow tank/users ----- Permissions on tank/users --------------------------------------- -Permission sets: - @pset create,destroy,mount,snapshot -Local+Descendent permissions: - group staff @pset -.Ed -.It Sy Example 20 No Delegating Property Permissions on a Tn ZFS No Dataset -.Pp -The following example shows to grant the ability to set quotas and reservations -on the -.Sy users/home -file system. The permissions on -.Sy users/home -are also displayed. -.Bd -literal -offset 2n -.Li # Ic zfs allow cindys quota,reservation users/home -.Li # Ic zfs allow users/home ----- Permissions on users/home --------------------------------------- -Local+Descendent permissions: - user cindys quota,reservation -.Li # Ic su - cindys -.Li cindys% Ic zfs set quota=10G users/home/marks -.Li cindys% Ic zfs get quota users/home/marks -NAME PROPERTY VALUE SOURCE -users/home/marks quota 10G local -.Ed -.It Sy Example 21 No Removing ZFS Delegated Permissions on a Tn ZFS No Dataset -.Pp -The following example shows how to remove the snapshot permission from the -.Em staff -group on the -.Em tank/users -file system. The permissions on -.Em tank/users -are also displayed. -.Bd -literal -offset 2n -.Li # Ic zfs unallow staff snapshot tank/users -.Li # Ic zfs allow tank/users ----- Permissions on tank/users --------------------------------------- -Permission sets: - @pset create,destroy,mount,snapshot -Local+Descendent permissions: - group staff @pset -.Ed -.It Sy Example 22 Showing the differences between a snapshot and a ZFS Dataset -.Pp -The following example shows how to see what has changed between a prior -snapshot of a ZFS Dataset and its current state. The -.Fl F -option is used to indicate type information for the files affected. -.Bd -literal -offset 2n -.Li # Ic zfs diff tank/test@before tank/test -M / /tank/test/ -M F /tank/test/linked (+1) -R F /tank/test/oldname -> /tank/test/newname -- F /tank/test/deleted -+ F /tank/test/created -M F /tank/test/modified -.Ed -.El -.Sh SEE ALSO -.Xr chmod 2 , -.Xr fsync 2 , -.Xr exports 5 , -.Xr fstab 5 , -.Xr rc.conf 5 , -.Xr jail 8 , -.Xr mount 8 , -.Xr umount 8 , -.Xr zfs-program 8 , -.Xr zpool 8 -.Sh HISTORY -The -.Nm -utility first appeared in -.Fx 7.0 . -.Sh AUTHORS -This manual page is a -.Xr mdoc 7 -reimplementation of the -.Tn OpenSolaris -manual page -.Em zfs(1M) , -modified and customized for -.Fx -and licensed under the -Common Development and Distribution License -.Pq Tn CDDL . -.Pp -The -.Xr mdoc 7 -implementation of this manual page was initially written by -.An Martin Matuska Aq mm@FreeBSD.org . diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c deleted file mode 100644 index a291db083568..000000000000 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c +++ /dev/null @@ -1,497 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved. - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. - */ - -#include <libintl.h> -#include <libuutil.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> - -#include <libzfs.h> - -#include "zfs_util.h" -#include "zfs_iter.h" - -/* - * This is a private interface used to gather up all the datasets specified on - * the command line so that we can iterate over them in order. - * - * First, we iterate over all filesystems, gathering them together into an - * AVL tree. We report errors for any explicitly specified datasets - * that we couldn't open. - * - * When finished, we have an AVL tree of ZFS handles. We go through and execute - * the provided callback for each one, passing whatever data the user supplied. - */ - -typedef struct zfs_node { - zfs_handle_t *zn_handle; - uu_avl_node_t zn_avlnode; -} zfs_node_t; - -typedef struct callback_data { - uu_avl_t *cb_avl; - int cb_flags; - zfs_type_t cb_types; - zfs_sort_column_t *cb_sortcol; - zprop_list_t **cb_proplist; - int cb_depth_limit; - int cb_depth; - uint8_t cb_props_table[ZFS_NUM_PROPS]; -} callback_data_t; - -uu_avl_pool_t *avl_pool; - -/* - * Include snaps if they were requested or if this a zfs list where types - * were not specified and the "listsnapshots" property is set on this pool. - */ -static boolean_t -zfs_include_snapshots(zfs_handle_t *zhp, callback_data_t *cb) -{ - zpool_handle_t *zph; - - if ((cb->cb_flags & ZFS_ITER_PROP_LISTSNAPS) == 0) - return (cb->cb_types & ZFS_TYPE_SNAPSHOT); - - zph = zfs_get_pool_handle(zhp); - return (zpool_get_prop_int(zph, ZPOOL_PROP_LISTSNAPS, NULL)); -} - -/* - * Called for each dataset. If the object is of an appropriate type, - * add it to the avl tree and recurse over any children as necessary. - */ -static int -zfs_callback(zfs_handle_t *zhp, void *data) -{ - callback_data_t *cb = data; - boolean_t should_close = B_TRUE; - boolean_t include_snaps = zfs_include_snapshots(zhp, cb); - boolean_t include_bmarks = (cb->cb_types & ZFS_TYPE_BOOKMARK); - - if ((zfs_get_type(zhp) & cb->cb_types) || - ((zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) && include_snaps)) { - uu_avl_index_t idx; - zfs_node_t *node = safe_malloc(sizeof (zfs_node_t)); - - node->zn_handle = zhp; - uu_avl_node_init(node, &node->zn_avlnode, avl_pool); - if (uu_avl_find(cb->cb_avl, node, cb->cb_sortcol, - &idx) == NULL) { - if (cb->cb_proplist) { - if ((*cb->cb_proplist) && - !(*cb->cb_proplist)->pl_all) - zfs_prune_proplist(zhp, - cb->cb_props_table); - - if (zfs_expand_proplist(zhp, cb->cb_proplist, - (cb->cb_flags & ZFS_ITER_RECVD_PROPS), - (cb->cb_flags & ZFS_ITER_LITERAL_PROPS)) - != 0) { - free(node); - return (-1); - } - } - uu_avl_insert(cb->cb_avl, node, idx); - should_close = B_FALSE; - } else { - free(node); - } - } - - /* - * Recurse if necessary. - */ - if (cb->cb_flags & ZFS_ITER_RECURSE && - ((cb->cb_flags & ZFS_ITER_DEPTH_LIMIT) == 0 || - cb->cb_depth < cb->cb_depth_limit)) { - cb->cb_depth++; - if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) - (void) zfs_iter_filesystems(zhp, zfs_callback, data); - if (((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | - ZFS_TYPE_BOOKMARK)) == 0) && include_snaps) - (void) zfs_iter_snapshots(zhp, - (cb->cb_flags & ZFS_ITER_SIMPLE) != 0, zfs_callback, - data, 0, 0); - if (((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | - ZFS_TYPE_BOOKMARK)) == 0) && include_bmarks) - (void) zfs_iter_bookmarks(zhp, zfs_callback, data); - cb->cb_depth--; - } - - if (should_close) - zfs_close(zhp); - - return (0); -} - -int -zfs_add_sort_column(zfs_sort_column_t **sc, const char *name, - boolean_t reverse) -{ - zfs_sort_column_t *col; - zfs_prop_t prop; - - if ((prop = zfs_name_to_prop(name)) == ZPROP_INVAL && - !zfs_prop_user(name)) - return (-1); - - col = safe_malloc(sizeof (zfs_sort_column_t)); - - col->sc_prop = prop; - col->sc_reverse = reverse; - if (prop == ZPROP_INVAL) { - col->sc_user_prop = safe_malloc(strlen(name) + 1); - (void) strcpy(col->sc_user_prop, name); - } - - if (*sc == NULL) { - col->sc_last = col; - *sc = col; - } else { - (*sc)->sc_last->sc_next = col; - (*sc)->sc_last = col; - } - - return (0); -} - -void -zfs_free_sort_columns(zfs_sort_column_t *sc) -{ - zfs_sort_column_t *col; - - while (sc != NULL) { - col = sc->sc_next; - free(sc->sc_user_prop); - free(sc); - sc = col; - } -} - -boolean_t -zfs_sort_only_by_name(const zfs_sort_column_t *sc) -{ - - return (sc != NULL && sc->sc_next == NULL && - sc->sc_prop == ZFS_PROP_NAME); -} - -/* ARGSUSED */ -static int -zfs_compare(const void *larg, const void *rarg, void *unused) -{ - zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; - zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; - const char *lname = zfs_get_name(l); - const char *rname = zfs_get_name(r); - char *lat, *rat; - uint64_t lcreate, rcreate; - int ret; - - lat = (char *)strchr(lname, '@'); - rat = (char *)strchr(rname, '@'); - - if (lat != NULL) - *lat = '\0'; - if (rat != NULL) - *rat = '\0'; - - ret = strcmp(lname, rname); - if (ret == 0 && (lat != NULL || rat != NULL)) { - /* - * If we're comparing a dataset to one of its snapshots, we - * always make the full dataset first. - */ - if (lat == NULL) { - ret = -1; - } else if (rat == NULL) { - ret = 1; - } else { - /* - * If we have two snapshots from the same dataset, then - * we want to sort them according to creation time. We - * use the hidden CREATETXG property to get an absolute - * ordering of snapshots. - */ - lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); - rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); - - /* - * Both lcreate and rcreate being 0 means we don't have - * properties and we should compare full name. - */ - if (lcreate == 0 && rcreate == 0) - ret = strcmp(lat + 1, rat + 1); - else if (lcreate < rcreate) - ret = -1; - else if (lcreate > rcreate) - ret = 1; - } - } - - if (lat != NULL) - *lat = '@'; - if (rat != NULL) - *rat = '@'; - - return (ret); -} - -/* - * Sort datasets by specified columns. - * - * o Numeric types sort in ascending order. - * o String types sort in alphabetical order. - * o Types inappropriate for a row sort that row to the literal - * bottom, regardless of the specified ordering. - * - * If no sort columns are specified, or two datasets compare equally - * across all specified columns, they are sorted alphabetically by name - * with snapshots grouped under their parents. - */ -static int -zfs_sort(const void *larg, const void *rarg, void *data) -{ - zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; - zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; - zfs_sort_column_t *sc = (zfs_sort_column_t *)data; - zfs_sort_column_t *psc; - - for (psc = sc; psc != NULL; psc = psc->sc_next) { - char lbuf[ZFS_MAXPROPLEN], rbuf[ZFS_MAXPROPLEN]; - char *lstr, *rstr; - uint64_t lnum, rnum; - boolean_t lvalid, rvalid; - int ret = 0; - - /* - * We group the checks below the generic code. If 'lstr' and - * 'rstr' are non-NULL, then we do a string based comparison. - * Otherwise, we compare 'lnum' and 'rnum'. - */ - lstr = rstr = NULL; - if (psc->sc_prop == ZPROP_INVAL) { - nvlist_t *luser, *ruser; - nvlist_t *lval, *rval; - - luser = zfs_get_user_props(l); - ruser = zfs_get_user_props(r); - - lvalid = (nvlist_lookup_nvlist(luser, - psc->sc_user_prop, &lval) == 0); - rvalid = (nvlist_lookup_nvlist(ruser, - psc->sc_user_prop, &rval) == 0); - - if (lvalid) - verify(nvlist_lookup_string(lval, - ZPROP_VALUE, &lstr) == 0); - if (rvalid) - verify(nvlist_lookup_string(rval, - ZPROP_VALUE, &rstr) == 0); - } else if (psc->sc_prop == ZFS_PROP_NAME) { - lvalid = rvalid = B_TRUE; - - (void) strlcpy(lbuf, zfs_get_name(l), sizeof (lbuf)); - (void) strlcpy(rbuf, zfs_get_name(r), sizeof (rbuf)); - - lstr = lbuf; - rstr = rbuf; - } else if (zfs_prop_is_string(psc->sc_prop)) { - lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf, - sizeof (lbuf), NULL, NULL, 0, B_TRUE) == 0); - rvalid = (zfs_prop_get(r, psc->sc_prop, rbuf, - sizeof (rbuf), NULL, NULL, 0, B_TRUE) == 0); - - lstr = lbuf; - rstr = rbuf; - } else { - lvalid = zfs_prop_valid_for_type(psc->sc_prop, - zfs_get_type(l)); - rvalid = zfs_prop_valid_for_type(psc->sc_prop, - zfs_get_type(r)); - - if (lvalid) - (void) zfs_prop_get_numeric(l, psc->sc_prop, - &lnum, NULL, NULL, 0); - if (rvalid) - (void) zfs_prop_get_numeric(r, psc->sc_prop, - &rnum, NULL, NULL, 0); - } - - if (!lvalid && !rvalid) - continue; - else if (!lvalid) - return (1); - else if (!rvalid) - return (-1); - - if (lstr) - ret = strcmp(lstr, rstr); - else if (lnum < rnum) - ret = -1; - else if (lnum > rnum) - ret = 1; - - if (ret != 0) { - if (psc->sc_reverse == B_TRUE) - ret = (ret < 0) ? 1 : -1; - return (ret); - } - } - - return (zfs_compare(larg, rarg, NULL)); -} - -int -zfs_for_each(int argc, char **argv, int flags, zfs_type_t types, - zfs_sort_column_t *sortcol, zprop_list_t **proplist, int limit, - zfs_iter_f callback, void *data) -{ - callback_data_t cb = {0}; - int ret = 0; - zfs_node_t *node; - uu_avl_walk_t *walk; - - avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t), - offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT); - - if (avl_pool == NULL) - nomem(); - - cb.cb_sortcol = sortcol; - cb.cb_flags = flags; - cb.cb_proplist = proplist; - cb.cb_types = types; - cb.cb_depth_limit = limit; - /* - * If cb_proplist is provided then in the zfs_handles created we - * retain only those properties listed in cb_proplist and sortcol. - * The rest are pruned. So, the caller should make sure that no other - * properties other than those listed in cb_proplist/sortcol are - * accessed. - * - * If cb_proplist is NULL then we retain all the properties. We - * always retain the zoned property, which some other properties - * need (userquota & friends), and the createtxg property, which - * we need to sort snapshots. - */ - if (cb.cb_proplist && *cb.cb_proplist) { - zprop_list_t *p = *cb.cb_proplist; - - while (p) { - if (p->pl_prop >= ZFS_PROP_TYPE && - p->pl_prop < ZFS_NUM_PROPS) { - cb.cb_props_table[p->pl_prop] = B_TRUE; - } - p = p->pl_next; - } - - while (sortcol) { - if (sortcol->sc_prop >= ZFS_PROP_TYPE && - sortcol->sc_prop < ZFS_NUM_PROPS) { - cb.cb_props_table[sortcol->sc_prop] = B_TRUE; - } - sortcol = sortcol->sc_next; - } - - cb.cb_props_table[ZFS_PROP_ZONED] = B_TRUE; - cb.cb_props_table[ZFS_PROP_CREATETXG] = B_TRUE; - } else { - (void) memset(cb.cb_props_table, B_TRUE, - sizeof (cb.cb_props_table)); - } - - if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) - nomem(); - - if (argc == 0) { - /* - * If given no arguments, iterate over all datasets. - */ - cb.cb_flags |= ZFS_ITER_RECURSE; - ret = zfs_iter_root(g_zfs, zfs_callback, &cb); - } else { - int i; - zfs_handle_t *zhp; - zfs_type_t argtype; - - /* - * If we're recursive, then we always allow filesystems as - * arguments. If we also are interested in snapshots or - * bookmarks, then we can take volumes as well. - */ - argtype = types; - if (flags & ZFS_ITER_RECURSE) { - argtype |= ZFS_TYPE_FILESYSTEM; - if (types & (ZFS_TYPE_SNAPSHOT | ZFS_TYPE_BOOKMARK)) - argtype |= ZFS_TYPE_VOLUME; - } - - for (i = 0; i < argc; i++) { - if (flags & ZFS_ITER_ARGS_CAN_BE_PATHS) { - zhp = zfs_path_to_zhandle(g_zfs, argv[i], - argtype); - } else { - zhp = zfs_open(g_zfs, argv[i], argtype); - } - if (zhp != NULL) - ret |= zfs_callback(zhp, &cb); - else - ret = 1; - } - } - - /* - * At this point we've got our AVL tree full of zfs handles, so iterate - * over each one and execute the real user callback. - */ - for (node = uu_avl_first(cb.cb_avl); node != NULL; - node = uu_avl_next(cb.cb_avl, node)) - ret |= callback(node->zn_handle, data); - - /* - * Finally, clean up the AVL tree. - */ - if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) - nomem(); - - while ((node = uu_avl_walk_next(walk)) != NULL) { - uu_avl_remove(cb.cb_avl, node); - zfs_close(node->zn_handle); - free(node); - } - - uu_avl_walk_end(walk); - uu_avl_destroy(cb.cb_avl); - uu_avl_pool_destroy(avl_pool); - - return (ret); -} diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h deleted file mode 100644 index b89b466ce6fe..000000000000 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. - */ - -#ifndef ZFS_ITER_H -#define ZFS_ITER_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct zfs_sort_column { - struct zfs_sort_column *sc_next; - struct zfs_sort_column *sc_last; - zfs_prop_t sc_prop; - char *sc_user_prop; - boolean_t sc_reverse; -} zfs_sort_column_t; - -#define ZFS_ITER_RECURSE (1 << 0) -#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1) -#define ZFS_ITER_PROP_LISTSNAPS (1 << 2) -#define ZFS_ITER_DEPTH_LIMIT (1 << 3) -#define ZFS_ITER_RECVD_PROPS (1 << 4) -#define ZFS_ITER_SIMPLE (1 << 5) -#define ZFS_ITER_LITERAL_PROPS (1 << 6) - -int zfs_for_each(int, char **, int options, zfs_type_t, - zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *); -int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t); -void zfs_free_sort_columns(zfs_sort_column_t *); -boolean_t zfs_sort_only_by_name(const zfs_sort_column_t *); - -#ifdef __cplusplus -} -#endif - -#endif /* ZFS_ITER_H */ diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c deleted file mode 100644 index d453ba030488..000000000000 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c +++ /dev/null @@ -1,7592 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. - * Copyright 2012 Milan Jurik. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. - * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. - * Copyright 2016 Nexenta Systems, Inc. - * Copyright (c) 2019 Datto Inc. - */ - -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <getopt.h> -#include <libgen.h> -#include <libintl.h> -#include <libuutil.h> -#include <libnvpair.h> -#include <locale.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <fcntl.h> -#include <zone.h> -#include <grp.h> -#include <pwd.h> -#include <signal.h> -#include <sys/debug.h> -#include <sys/list.h> -#include <sys/mntent.h> -#include <sys/mnttab.h> -#include <sys/mount.h> -#include <sys/stat.h> -#include <sys/fs/zfs.h> -#include <sys/types.h> -#include <time.h> -#include <err.h> -#include <jail.h> - -#include <libzfs.h> -#include <libzfs_core.h> -#include <zfs_prop.h> -#include <zfs_deleg.h> -#include <libuutil.h> -#ifdef illumos -#include <aclutils.h> -#include <directory.h> -#include <idmap.h> -#include <libshare.h> -#endif - -#include "zfs_iter.h" -#include "zfs_util.h" -#include "zfs_comutil.h" - -libzfs_handle_t *g_zfs; - -static FILE *mnttab_file; -static char history_str[HIS_MAX_RECORD_LEN]; -static boolean_t log_history = B_TRUE; - -static int zfs_do_clone(int argc, char **argv); -static int zfs_do_create(int argc, char **argv); -static int zfs_do_destroy(int argc, char **argv); -static int zfs_do_get(int argc, char **argv); -static int zfs_do_inherit(int argc, char **argv); -static int zfs_do_list(int argc, char **argv); -static int zfs_do_mount(int argc, char **argv); -static int zfs_do_rename(int argc, char **argv); -static int zfs_do_rollback(int argc, char **argv); -static int zfs_do_set(int argc, char **argv); -static int zfs_do_upgrade(int argc, char **argv); -static int zfs_do_snapshot(int argc, char **argv); -static int zfs_do_unmount(int argc, char **argv); -static int zfs_do_share(int argc, char **argv); -static int zfs_do_unshare(int argc, char **argv); -static int zfs_do_send(int argc, char **argv); -static int zfs_do_receive(int argc, char **argv); -static int zfs_do_promote(int argc, char **argv); -static int zfs_do_userspace(int argc, char **argv); -static int zfs_do_allow(int argc, char **argv); -static int zfs_do_unallow(int argc, char **argv); -static int zfs_do_hold(int argc, char **argv); -static int zfs_do_holds(int argc, char **argv); -static int zfs_do_release(int argc, char **argv); -static int zfs_do_diff(int argc, char **argv); -static int zfs_do_jail(int argc, char **argv); -static int zfs_do_unjail(int argc, char **argv); -static int zfs_do_bookmark(int argc, char **argv); -static int zfs_do_remap(int argc, char **argv); -static int zfs_do_channel_program(int argc, char **argv); - -/* - * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. - */ - -#ifdef DEBUG -const char * -_umem_debug_init(void) -{ - return ("default,verbose"); /* $UMEM_DEBUG setting */ -} - -const char * -_umem_logging_init(void) -{ - return ("fail,contents"); /* $UMEM_LOGGING setting */ -} -#endif - -typedef enum { - HELP_CLONE, - HELP_CREATE, - HELP_DESTROY, - HELP_GET, - HELP_INHERIT, - HELP_UPGRADE, - HELP_JAIL, - HELP_UNJAIL, - HELP_LIST, - HELP_MOUNT, - HELP_PROMOTE, - HELP_RECEIVE, - HELP_RENAME, - HELP_ROLLBACK, - HELP_SEND, - HELP_SET, - HELP_SHARE, - HELP_SNAPSHOT, - HELP_UNMOUNT, - HELP_UNSHARE, - HELP_ALLOW, - HELP_UNALLOW, - HELP_USERSPACE, - HELP_GROUPSPACE, - HELP_HOLD, - HELP_HOLDS, - HELP_RELEASE, - HELP_DIFF, - HELP_REMAP, - HELP_BOOKMARK, - HELP_CHANNEL_PROGRAM, -} zfs_help_t; - -typedef struct zfs_command { - const char *name; - int (*func)(int argc, char **argv); - zfs_help_t usage; -} zfs_command_t; - -/* - * Master command table. Each ZFS command has a name, associated function, and - * usage message. The usage messages need to be internationalized, so we have - * to have a function to return the usage message based on a command index. - * - * These commands are organized according to how they are displayed in the usage - * message. An empty command (one with a NULL name) indicates an empty line in - * the generic usage message. - */ -static zfs_command_t command_table[] = { - { "create", zfs_do_create, HELP_CREATE }, - { "destroy", zfs_do_destroy, HELP_DESTROY }, - { NULL }, - { "snapshot", zfs_do_snapshot, HELP_SNAPSHOT }, - { "rollback", zfs_do_rollback, HELP_ROLLBACK }, - { "clone", zfs_do_clone, HELP_CLONE }, - { "promote", zfs_do_promote, HELP_PROMOTE }, - { "rename", zfs_do_rename, HELP_RENAME }, - { "bookmark", zfs_do_bookmark, HELP_BOOKMARK }, - { "program", zfs_do_channel_program, HELP_CHANNEL_PROGRAM }, - { NULL }, - { "list", zfs_do_list, HELP_LIST }, - { NULL }, - { "set", zfs_do_set, HELP_SET }, - { "get", zfs_do_get, HELP_GET }, - { "inherit", zfs_do_inherit, HELP_INHERIT }, - { "upgrade", zfs_do_upgrade, HELP_UPGRADE }, - { "userspace", zfs_do_userspace, HELP_USERSPACE }, - { "groupspace", zfs_do_userspace, HELP_GROUPSPACE }, - { NULL }, - { "mount", zfs_do_mount, HELP_MOUNT }, - { "unmount", zfs_do_unmount, HELP_UNMOUNT }, - { "share", zfs_do_share, HELP_SHARE }, - { "unshare", zfs_do_unshare, HELP_UNSHARE }, - { NULL }, - { "send", zfs_do_send, HELP_SEND }, - { "receive", zfs_do_receive, HELP_RECEIVE }, - { NULL }, - { "allow", zfs_do_allow, HELP_ALLOW }, - { NULL }, - { "unallow", zfs_do_unallow, HELP_UNALLOW }, - { NULL }, - { "hold", zfs_do_hold, HELP_HOLD }, - { "holds", zfs_do_holds, HELP_HOLDS }, - { "release", zfs_do_release, HELP_RELEASE }, - { "diff", zfs_do_diff, HELP_DIFF }, - { NULL }, - { "jail", zfs_do_jail, HELP_JAIL }, - { "unjail", zfs_do_unjail, HELP_UNJAIL }, - { "remap", zfs_do_remap, HELP_REMAP }, -}; - -#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) - -zfs_command_t *current_command; - -static const char * -get_usage(zfs_help_t idx) -{ - switch (idx) { - case HELP_CLONE: - return (gettext("\tclone [-p] [-o property=value] ... " - "<snapshot> <filesystem|volume>\n")); - case HELP_CREATE: - return (gettext("\tcreate [-pu] [-o property=value] ... " - "<filesystem>\n" - "\tcreate [-ps] [-b blocksize] [-o property=value] ... " - "-V <size> <volume>\n")); - case HELP_DESTROY: - return (gettext("\tdestroy [-fnpRrv] <filesystem|volume>\n" - "\tdestroy [-dnpRrv] " - "<filesystem|volume>@<snap>[%<snap>][,...]\n" - "\tdestroy <filesystem|volume>#<bookmark>\n")); - case HELP_GET: - return (gettext("\tget [-rHp] [-d max] " - "[-o \"all\" | field[,...]]\n" - "\t [-t type[,...]] [-s source[,...]]\n" - "\t <\"all\" | property[,...]> " - "[filesystem|volume|snapshot|bookmark] ...\n")); - case HELP_INHERIT: - return (gettext("\tinherit [-rS] <property> " - "<filesystem|volume|snapshot> ...\n")); - case HELP_UPGRADE: - return (gettext("\tupgrade [-v]\n" - "\tupgrade [-r] [-V version] <-a | filesystem ...>\n")); - case HELP_JAIL: - return (gettext("\tjail <jailid|jailname> <filesystem>\n")); - case HELP_UNJAIL: - return (gettext("\tunjail <jailid|jailname> <filesystem>\n")); - case HELP_LIST: - return (gettext("\tlist [-Hp] [-r|-d max] [-o property[,...]] " - "[-s property]...\n\t [-S property]... [-t type[,...]] " - "[filesystem|volume|snapshot] ...\n")); - case HELP_MOUNT: - return (gettext("\tmount\n" - "\tmount [-vO] [-o opts] <-a | filesystem>\n")); - case HELP_PROMOTE: - return (gettext("\tpromote <clone-filesystem>\n")); - case HELP_RECEIVE: - return (gettext("\treceive|recv [-vnsFMu] <filesystem|volume|" - "snapshot>\n" - "\treceive|recv [-vnsFMu] [-o origin=<snapshot>] [-d | -e] " - "<filesystem>\n" - "\treceive|recv -A <filesystem|volume>\n")); - case HELP_RENAME: - return (gettext("\trename [-f] <filesystem|volume|snapshot> " - "<filesystem|volume|snapshot>\n" - "\trename [-f] -p <filesystem|volume> <filesystem|volume>\n" - "\trename -r <snapshot> <snapshot>\n" - "\trename <bookmark> <bookmark>\n" - "\trename -u [-p] <filesystem> <filesystem>")); - case HELP_ROLLBACK: - return (gettext("\trollback [-rRf] <snapshot>\n")); - case HELP_SEND: - return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] " - "<snapshot>\n" - "\tsend [-LPcenv] [-i snapshot|bookmark] " - "<filesystem|volume|snapshot>\n" - "\tsend [-nvPe] -t <receive_resume_token>\n")); - case HELP_SET: - return (gettext("\tset <property=value> ... " - "<filesystem|volume|snapshot> ...\n")); - case HELP_SHARE: - return (gettext("\tshare <-a | filesystem>\n")); - case HELP_SNAPSHOT: - return (gettext("\tsnapshot|snap [-r] [-o property=value] ... " - "<filesystem|volume>@<snap> ...\n")); - case HELP_UNMOUNT: - return (gettext("\tunmount|umount [-f] " - "<-a | filesystem|mountpoint>\n")); - case HELP_UNSHARE: - return (gettext("\tunshare " - "<-a | filesystem|mountpoint>\n")); - case HELP_ALLOW: - return (gettext("\tallow <filesystem|volume>\n" - "\tallow [-ldug] " - "<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n" - "\t <filesystem|volume>\n" - "\tallow [-ld] -e <perm|@setname>[,...] " - "<filesystem|volume>\n" - "\tallow -c <perm|@setname>[,...] <filesystem|volume>\n" - "\tallow -s @setname <perm|@setname>[,...] " - "<filesystem|volume>\n")); - case HELP_UNALLOW: - return (gettext("\tunallow [-rldug] " - "<\"everyone\"|user|group>[,...]\n" - "\t [<perm|@setname>[,...]] <filesystem|volume>\n" - "\tunallow [-rld] -e [<perm|@setname>[,...]] " - "<filesystem|volume>\n" - "\tunallow [-r] -c [<perm|@setname>[,...]] " - "<filesystem|volume>\n" - "\tunallow [-r] -s @setname [<perm|@setname>[,...]] " - "<filesystem|volume>\n")); - case HELP_USERSPACE: - return (gettext("\tuserspace [-Hinp] [-o field[,...]] " - "[-s field] ...\n" - "\t [-S field] ... [-t type[,...]] " - "<filesystem|snapshot>\n")); - case HELP_GROUPSPACE: - return (gettext("\tgroupspace [-Hinp] [-o field[,...]] " - "[-s field] ...\n" - "\t [-S field] ... [-t type[,...]] " - "<filesystem|snapshot>\n")); - case HELP_HOLD: - return (gettext("\thold [-r] <tag> <snapshot> ...\n")); - case HELP_HOLDS: - return (gettext("\tholds [-Hp] [-r|-d depth] " - "<filesystem|volume|snapshot> ...\n")); - case HELP_RELEASE: - return (gettext("\trelease [-r] <tag> <snapshot> ...\n")); - case HELP_DIFF: - return (gettext("\tdiff [-FHt] <snapshot> " - "[snapshot|filesystem]\n")); - case HELP_REMAP: - return (gettext("\tremap <filesystem | volume>\n")); - case HELP_BOOKMARK: - return (gettext("\tbookmark <snapshot> <bookmark>\n")); - case HELP_CHANNEL_PROGRAM: - return (gettext("\tprogram [-jn] [-t <instruction limit>] " - "[-m <memory limit (b)>] <pool> <program file> " - "[lua args...]\n")); - } - - abort(); - /* NOTREACHED */ -} - -void -nomem(void) -{ - (void) fprintf(stderr, gettext("internal error: out of memory\n")); - exit(1); -} - -/* - * Utility function to guarantee malloc() success. - */ - -void * -safe_malloc(size_t size) -{ - void *data; - - if ((data = calloc(1, size)) == NULL) - nomem(); - - return (data); -} - -void * -safe_realloc(void *data, size_t size) -{ - void *newp; - if ((newp = realloc(data, size)) == NULL) { - free(data); - nomem(); - } - - return (newp); -} - -static char * -safe_strdup(char *str) -{ - char *dupstr = strdup(str); - - if (dupstr == NULL) - nomem(); - - return (dupstr); -} - -/* - * Callback routine that will print out information for each of - * the properties. - */ -static int -usage_prop_cb(int prop, void *cb) -{ - FILE *fp = cb; - - (void) fprintf(fp, "\t%-15s ", zfs_prop_to_name(prop)); - - if (zfs_prop_readonly(prop)) - (void) fprintf(fp, " NO "); - else - (void) fprintf(fp, "YES "); - - if (zfs_prop_inheritable(prop)) - (void) fprintf(fp, " YES "); - else - (void) fprintf(fp, " NO "); - - if (zfs_prop_values(prop) == NULL) - (void) fprintf(fp, "-\n"); - else - (void) fprintf(fp, "%s\n", zfs_prop_values(prop)); - - return (ZPROP_CONT); -} - -/* - * Display usage message. If we're inside a command, display only the usage for - * that command. Otherwise, iterate over the entire command table and display - * a complete usage message. - */ -static void -usage(boolean_t requested) -{ - int i; - boolean_t show_properties = B_FALSE; - FILE *fp = requested ? stdout : stderr; - - if (current_command == NULL) { - - (void) fprintf(fp, gettext("usage: zfs command args ...\n")); - (void) fprintf(fp, - gettext("where 'command' is one of the following:\n\n")); - - for (i = 0; i < NCOMMAND; i++) { - if (command_table[i].name == NULL) - (void) fprintf(fp, "\n"); - else - (void) fprintf(fp, "%s", - get_usage(command_table[i].usage)); - } - - (void) fprintf(fp, gettext("\nEach dataset is of the form: " - "pool/[dataset/]*dataset[@name]\n")); - } else { - (void) fprintf(fp, gettext("usage:\n")); - (void) fprintf(fp, "%s", get_usage(current_command->usage)); - } - - if (current_command != NULL && - (strcmp(current_command->name, "set") == 0 || - strcmp(current_command->name, "get") == 0 || - strcmp(current_command->name, "inherit") == 0 || - strcmp(current_command->name, "list") == 0)) - show_properties = B_TRUE; - - if (show_properties) { - (void) fprintf(fp, - gettext("\nThe following properties are supported:\n")); - - (void) fprintf(fp, "\n\t%-14s %s %s %s\n\n", - "PROPERTY", "EDIT", "INHERIT", "VALUES"); - - /* Iterate over all properties */ - (void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE, - ZFS_TYPE_DATASET); - - (void) fprintf(fp, "\t%-15s ", "userused@..."); - (void) fprintf(fp, " NO NO <size>\n"); - (void) fprintf(fp, "\t%-15s ", "groupused@..."); - (void) fprintf(fp, " NO NO <size>\n"); - (void) fprintf(fp, "\t%-15s ", "userquota@..."); - (void) fprintf(fp, "YES NO <size> | none\n"); - (void) fprintf(fp, "\t%-15s ", "groupquota@..."); - (void) fprintf(fp, "YES NO <size> | none\n"); - (void) fprintf(fp, "\t%-15s ", "written@<snap>"); - (void) fprintf(fp, " NO NO <size>\n"); - - (void) fprintf(fp, gettext("\nSizes are specified in bytes " - "with standard units such as K, M, G, etc.\n")); - (void) fprintf(fp, gettext("\nUser-defined properties can " - "be specified by using a name containing a colon (:).\n")); - (void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ " - "properties must be appended with\n" - "a user or group specifier of one of these forms:\n" - " POSIX name (eg: \"matt\")\n" - " POSIX id (eg: \"126829\")\n" - " SMB name@domain (eg: \"matt@sun\")\n" - " SMB SID (eg: \"S-1-234-567-89\")\n")); - } else { - (void) fprintf(fp, - gettext("\nFor the property list, run: %s\n"), - "zfs set|get"); - (void) fprintf(fp, - gettext("\nFor the delegated permission list, run: %s\n"), - "zfs allow|unallow"); - } - - /* - * See comments at end of main(). - */ - if (getenv("ZFS_ABORT") != NULL) { - (void) printf("dumping core by request\n"); - abort(); - } - - exit(requested ? 0 : 2); -} - -/* - * Take a property=value argument string and add it to the given nvlist. - * Modifies the argument inplace. - */ -static int -parseprop(nvlist_t *props, char *propname) -{ - char *propval, *strval; - - if ((propval = strchr(propname, '=')) == NULL) { - (void) fprintf(stderr, gettext("missing " - "'=' for property=value argument\n")); - return (-1); - } - *propval = '\0'; - propval++; - if (nvlist_lookup_string(props, propname, &strval) == 0) { - (void) fprintf(stderr, gettext("property '%s' " - "specified multiple times\n"), propname); - return (-1); - } - if (nvlist_add_string(props, propname, propval) != 0) - nomem(); - return (0); -} - -static int -parse_depth(char *opt, int *flags) -{ - char *tmp; - int depth; - - depth = (int)strtol(opt, &tmp, 0); - if (*tmp) { - (void) fprintf(stderr, - gettext("%s is not an integer\n"), opt); - usage(B_FALSE); - } - if (depth < 0) { - (void) fprintf(stderr, - gettext("Depth can not be negative.\n")); - usage(B_FALSE); - } - *flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE); - return (depth); -} - -#define PROGRESS_DELAY 2 /* seconds */ - -static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; -static time_t pt_begin; -static char *pt_header = NULL; -static boolean_t pt_shown; - -static void -start_progress_timer(void) -{ - pt_begin = time(NULL) + PROGRESS_DELAY; - pt_shown = B_FALSE; -} - -static void -set_progress_header(char *header) -{ - assert(pt_header == NULL); - pt_header = safe_strdup(header); - if (pt_shown) { - (void) printf("%s: ", header); - (void) fflush(stdout); - } -} - -static void -update_progress(char *update) -{ - if (!pt_shown && time(NULL) > pt_begin) { - int len = strlen(update); - - (void) printf("%s: %s%*.*s", pt_header, update, len, len, - pt_reverse); - (void) fflush(stdout); - pt_shown = B_TRUE; - } else if (pt_shown) { - int len = strlen(update); - - (void) printf("%s%*.*s", update, len, len, pt_reverse); - (void) fflush(stdout); - } -} - -static void -finish_progress(char *done) -{ - if (pt_shown) { - (void) printf("%s\n", done); - (void) fflush(stdout); - } - free(pt_header); - pt_header = NULL; -} - -/* - * Check if the dataset is mountable and should be automatically mounted. - */ -static boolean_t -should_auto_mount(zfs_handle_t *zhp) -{ - if (!zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, zfs_get_type(zhp))) - return (B_FALSE); - return (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON); -} - -/* - * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol> - * - * Given an existing dataset, create a writable copy whose initial contents - * are the same as the source. The newly created dataset maintains a - * dependency on the original; the original cannot be destroyed so long as - * the clone exists. - * - * The '-p' flag creates all the non-existing ancestors of the target first. - */ -static int -zfs_do_clone(int argc, char **argv) -{ - zfs_handle_t *zhp = NULL; - boolean_t parents = B_FALSE; - nvlist_t *props; - int ret = 0; - int c; - - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - /* check options */ - while ((c = getopt(argc, argv, "o:p")) != -1) { - switch (c) { - case 'o': - if (parseprop(props, optarg) != 0) - return (1); - break; - case 'p': - parents = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - goto usage; - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing source dataset " - "argument\n")); - goto usage; - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing target dataset " - "argument\n")); - goto usage; - } - if (argc > 2) { - (void) fprintf(stderr, gettext("too many arguments\n")); - goto usage; - } - - /* open the source dataset */ - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) - return (1); - - if (parents && zfs_name_valid(argv[1], ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME)) { - /* - * Now create the ancestors of the target dataset. If the - * target already exists and '-p' option was used we should not - * complain. - */ - if (zfs_dataset_exists(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME)) - return (0); - if (zfs_create_ancestors(g_zfs, argv[1]) != 0) - return (1); - } - - /* pass to libzfs */ - ret = zfs_clone(zhp, argv[1], props); - - /* create the mountpoint if necessary */ - if (ret == 0) { - zfs_handle_t *clone; - - clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET); - if (clone != NULL) { - /* - * If the user doesn't want the dataset - * automatically mounted, then skip the mount/share - * step. - */ - if (should_auto_mount(clone)) { - if ((ret = zfs_mount(clone, NULL, 0)) != 0) { - (void) fprintf(stderr, gettext("clone " - "successfully created, " - "but not mounted\n")); - } else if ((ret = zfs_share(clone)) != 0) { - (void) fprintf(stderr, gettext("clone " - "successfully created, " - "but not shared\n")); - } - } - zfs_close(clone); - } - } - - zfs_close(zhp); - nvlist_free(props); - - return (!!ret); - -usage: - if (zhp) - zfs_close(zhp); - nvlist_free(props); - usage(B_FALSE); - return (-1); -} - -/* - * zfs create [-pu] [-o prop=value] ... fs - * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size - * - * Create a new dataset. This command can be used to create filesystems - * and volumes. Snapshot creation is handled by 'zfs snapshot'. - * For volumes, the user must specify a size to be used. - * - * The '-s' flag applies only to volumes, and indicates that we should not try - * to set the reservation for this volume. By default we set a reservation - * equal to the size for any volume. For pools with SPA_VERSION >= - * SPA_VERSION_REFRESERVATION, we set a refreservation instead. - * - * The '-p' flag creates all the non-existing ancestors of the target first. - * - * The '-u' flag prevents mounting of newly created file system. - */ -static int -zfs_do_create(int argc, char **argv) -{ - zfs_type_t type = ZFS_TYPE_FILESYSTEM; - zfs_handle_t *zhp = NULL; - uint64_t volsize = 0; - int c; - boolean_t noreserve = B_FALSE; - boolean_t bflag = B_FALSE; - boolean_t parents = B_FALSE; - boolean_t nomount = B_FALSE; - int ret = 1; - nvlist_t *props; - uint64_t intval; - - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - /* check options */ - while ((c = getopt(argc, argv, ":V:b:so:pu")) != -1) { - switch (c) { - case 'V': - type = ZFS_TYPE_VOLUME; - if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) { - (void) fprintf(stderr, gettext("bad volume " - "size '%s': %s\n"), optarg, - libzfs_error_description(g_zfs)); - goto error; - } - - if (nvlist_add_uint64(props, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0) - nomem(); - volsize = intval; - break; - case 'p': - parents = B_TRUE; - break; - case 'b': - bflag = B_TRUE; - if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) { - (void) fprintf(stderr, gettext("bad volume " - "block size '%s': %s\n"), optarg, - libzfs_error_description(g_zfs)); - goto error; - } - - if (nvlist_add_uint64(props, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - intval) != 0) - nomem(); - break; - case 'o': - if (parseprop(props, optarg) != 0) - goto error; - break; - case 's': - noreserve = B_TRUE; - break; - case 'u': - nomount = B_TRUE; - break; - case ':': - (void) fprintf(stderr, gettext("missing size " - "argument\n")); - goto badusage; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - goto badusage; - } - } - - if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) { - (void) fprintf(stderr, gettext("'-s' and '-b' can only be " - "used when creating a volume\n")); - goto badusage; - } - if (nomount && type != ZFS_TYPE_FILESYSTEM) { - (void) fprintf(stderr, gettext("'-u' can only be " - "used when creating a file system\n")); - goto badusage; - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc == 0) { - (void) fprintf(stderr, gettext("missing %s argument\n"), - zfs_type_to_name(type)); - goto badusage; - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - goto badusage; - } - - if (type == ZFS_TYPE_VOLUME && !noreserve) { - zpool_handle_t *zpool_handle; - nvlist_t *real_props = NULL; - uint64_t spa_version; - char *p; - zfs_prop_t resv_prop; - char *strval; - char msg[1024]; - - if ((p = strchr(argv[0], '/')) != NULL) - *p = '\0'; - zpool_handle = zpool_open(g_zfs, argv[0]); - if (p != NULL) - *p = '/'; - if (zpool_handle == NULL) - goto error; - spa_version = zpool_get_prop_int(zpool_handle, - ZPOOL_PROP_VERSION, NULL); - if (spa_version >= SPA_VERSION_REFRESERVATION) - resv_prop = ZFS_PROP_REFRESERVATION; - else - resv_prop = ZFS_PROP_RESERVATION; - - (void) snprintf(msg, sizeof (msg), - gettext("cannot create '%s'"), argv[0]); - if (props && (real_props = zfs_valid_proplist(g_zfs, type, - props, 0, NULL, zpool_handle, msg)) == NULL) { - zpool_close(zpool_handle); - goto error; - } - zpool_close(zpool_handle); - - volsize = zvol_volsize_to_reservation(volsize, real_props); - nvlist_free(real_props); - - if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop), - &strval) != 0) { - if (nvlist_add_uint64(props, - zfs_prop_to_name(resv_prop), volsize) != 0) { - nvlist_free(props); - nomem(); - } - } - } - - if (parents && zfs_name_valid(argv[0], type)) { - /* - * Now create the ancestors of target dataset. If the target - * already exists and '-p' option was used we should not - * complain. - */ - if (zfs_dataset_exists(g_zfs, argv[0], type)) { - ret = 0; - goto error; - } - if (zfs_create_ancestors(g_zfs, argv[0]) != 0) - goto error; - } - - /* pass to libzfs */ - if (zfs_create(g_zfs, argv[0], type, props) != 0) - goto error; - - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) - goto error; - - ret = 0; - - /* - * Mount and/or share the new filesystem as appropriate. We provide a - * verbose error message to let the user know that their filesystem was - * in fact created, even if we failed to mount or share it. - * If the user doesn't want the dataset automatically mounted, - * then skip the mount/share step altogether. - */ - if (!nomount && should_auto_mount(zhp)) { - if (zfs_mount(zhp, NULL, 0) != 0) { - (void) fprintf(stderr, gettext("filesystem " - "successfully created, but not mounted\n")); - ret = 1; - } else if (zfs_share(zhp) != 0) { - (void) fprintf(stderr, gettext("filesystem " - "successfully created, but not shared\n")); - ret = 1; - } - } - -error: - if (zhp) - zfs_close(zhp); - nvlist_free(props); - return (ret); -badusage: - nvlist_free(props); - usage(B_FALSE); - return (2); -} - -/* - * zfs destroy [-rRf] <fs, vol> - * zfs destroy [-rRd] <snap> - * - * -r Recursively destroy all children - * -R Recursively destroy all dependents, including clones - * -f Force unmounting of any dependents - * -d If we can't destroy now, mark for deferred destruction - * - * Destroys the given dataset. By default, it will unmount any filesystems, - * and refuse to destroy a dataset that has any dependents. A dependent can - * either be a child, or a clone of a child. - */ -typedef struct destroy_cbdata { - boolean_t cb_first; - boolean_t cb_force; - boolean_t cb_recurse; - boolean_t cb_error; - boolean_t cb_doclones; - zfs_handle_t *cb_target; - boolean_t cb_defer_destroy; - boolean_t cb_verbose; - boolean_t cb_parsable; - boolean_t cb_dryrun; - nvlist_t *cb_nvl; - nvlist_t *cb_batchedsnaps; - - /* first snap in contiguous run */ - char *cb_firstsnap; - /* previous snap in contiguous run */ - char *cb_prevsnap; - int64_t cb_snapused; - char *cb_snapspec; - char *cb_bookmark; -} destroy_cbdata_t; - -/* - * Check for any dependents based on the '-r' or '-R' flags. - */ -static int -destroy_check_dependent(zfs_handle_t *zhp, void *data) -{ - destroy_cbdata_t *cbp = data; - const char *tname = zfs_get_name(cbp->cb_target); - const char *name = zfs_get_name(zhp); - - if (strncmp(tname, name, strlen(tname)) == 0 && - (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) { - /* - * This is a direct descendant, not a clone somewhere else in - * the hierarchy. - */ - if (cbp->cb_recurse) - goto out; - - if (cbp->cb_first) { - (void) fprintf(stderr, gettext("cannot destroy '%s': " - "%s has children\n"), - zfs_get_name(cbp->cb_target), - zfs_type_to_name(zfs_get_type(cbp->cb_target))); - (void) fprintf(stderr, gettext("use '-r' to destroy " - "the following datasets:\n")); - cbp->cb_first = B_FALSE; - cbp->cb_error = B_TRUE; - } - - (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); - } else { - /* - * This is a clone. We only want to report this if the '-r' - * wasn't specified, or the target is a snapshot. - */ - if (!cbp->cb_recurse && - zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT) - goto out; - - if (cbp->cb_first) { - (void) fprintf(stderr, gettext("cannot destroy '%s': " - "%s has dependent clones\n"), - zfs_get_name(cbp->cb_target), - zfs_type_to_name(zfs_get_type(cbp->cb_target))); - (void) fprintf(stderr, gettext("use '-R' to destroy " - "the following datasets:\n")); - cbp->cb_first = B_FALSE; - cbp->cb_error = B_TRUE; - cbp->cb_dryrun = B_TRUE; - } - - (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); - } - -out: - zfs_close(zhp); - return (0); -} - -static int -destroy_callback(zfs_handle_t *zhp, void *data) -{ - destroy_cbdata_t *cb = data; - const char *name = zfs_get_name(zhp); - - if (cb->cb_verbose) { - if (cb->cb_parsable) { - (void) printf("destroy\t%s\n", name); - } else if (cb->cb_dryrun) { - (void) printf(gettext("would destroy %s\n"), - name); - } else { - (void) printf(gettext("will destroy %s\n"), - name); - } - } - - /* - * Ignore pools (which we've already flagged as an error before getting - * here). - */ - if (strchr(zfs_get_name(zhp), '/') == NULL && - zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - zfs_close(zhp); - return (0); - } - if (cb->cb_dryrun) { - zfs_close(zhp); - return (0); - } - - /* - * We batch up all contiguous snapshots (even of different - * filesystems) and destroy them with one ioctl. We can't - * simply do all snap deletions and then all fs deletions, - * because we must delete a clone before its origin. - */ - if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) { - fnvlist_add_boolean(cb->cb_batchedsnaps, name); - } else { - int error = zfs_destroy_snaps_nvl(g_zfs, - cb->cb_batchedsnaps, B_FALSE); - fnvlist_free(cb->cb_batchedsnaps); - cb->cb_batchedsnaps = fnvlist_alloc(); - - if (error != 0 || - zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || - zfs_destroy(zhp, cb->cb_defer_destroy) != 0) { - zfs_close(zhp); - return (-1); - } - } - - zfs_close(zhp); - return (0); -} - -static int -destroy_print_cb(zfs_handle_t *zhp, void *arg) -{ - destroy_cbdata_t *cb = arg; - const char *name = zfs_get_name(zhp); - int err = 0; - - if (nvlist_exists(cb->cb_nvl, name)) { - if (cb->cb_firstsnap == NULL) - cb->cb_firstsnap = strdup(name); - if (cb->cb_prevsnap != NULL) - free(cb->cb_prevsnap); - /* this snap continues the current range */ - cb->cb_prevsnap = strdup(name); - if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL) - nomem(); - if (cb->cb_verbose) { - if (cb->cb_parsable) { - (void) printf("destroy\t%s\n", name); - } else if (cb->cb_dryrun) { - (void) printf(gettext("would destroy %s\n"), - name); - } else { - (void) printf(gettext("will destroy %s\n"), - name); - } - } - } else if (cb->cb_firstsnap != NULL) { - /* end of this range */ - uint64_t used = 0; - err = lzc_snaprange_space(cb->cb_firstsnap, - cb->cb_prevsnap, &used); - cb->cb_snapused += used; - free(cb->cb_firstsnap); - cb->cb_firstsnap = NULL; - free(cb->cb_prevsnap); - cb->cb_prevsnap = NULL; - } - zfs_close(zhp); - return (err); -} - -static int -destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) -{ - int err = 0; - assert(cb->cb_firstsnap == NULL); - assert(cb->cb_prevsnap == NULL); - err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb, 0, 0); - if (cb->cb_firstsnap != NULL) { - uint64_t used = 0; - if (err == 0) { - err = lzc_snaprange_space(cb->cb_firstsnap, - cb->cb_prevsnap, &used); - } - cb->cb_snapused += used; - free(cb->cb_firstsnap); - cb->cb_firstsnap = NULL; - free(cb->cb_prevsnap); - cb->cb_prevsnap = NULL; - } - return (err); -} - -static int -snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg) -{ - destroy_cbdata_t *cb = arg; - int err = 0; - - /* Check for clones. */ - if (!cb->cb_doclones && !cb->cb_defer_destroy) { - cb->cb_target = zhp; - cb->cb_first = B_TRUE; - err = zfs_iter_dependents(zhp, B_TRUE, - destroy_check_dependent, cb); - } - - if (err == 0) { - if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp))) - nomem(); - } - zfs_close(zhp); - return (err); -} - -static int -gather_snapshots(zfs_handle_t *zhp, void *arg) -{ - destroy_cbdata_t *cb = arg; - int err = 0; - - err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb); - if (err == ENOENT) - err = 0; - if (err != 0) - goto out; - - if (cb->cb_verbose) { - err = destroy_print_snapshots(zhp, cb); - if (err != 0) - goto out; - } - - if (cb->cb_recurse) - err = zfs_iter_filesystems(zhp, gather_snapshots, cb); - -out: - zfs_close(zhp); - return (err); -} - -static int -destroy_clones(destroy_cbdata_t *cb) -{ - nvpair_t *pair; - for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL); - pair != NULL; - pair = nvlist_next_nvpair(cb->cb_nvl, pair)) { - zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair), - ZFS_TYPE_SNAPSHOT); - if (zhp != NULL) { - boolean_t defer = cb->cb_defer_destroy; - int err = 0; - - /* - * We can't defer destroy non-snapshots, so set it to - * false while destroying the clones. - */ - cb->cb_defer_destroy = B_FALSE; - err = zfs_iter_dependents(zhp, B_FALSE, - destroy_callback, cb); - cb->cb_defer_destroy = defer; - zfs_close(zhp); - if (err != 0) - return (err); - } - } - return (0); -} - -static int -zfs_do_destroy(int argc, char **argv) -{ - destroy_cbdata_t cb = { 0 }; - int rv = 0; - int err = 0; - int c; - zfs_handle_t *zhp = NULL; - char *at, *pound; - zfs_type_t type = ZFS_TYPE_DATASET; - - /* check options */ - while ((c = getopt(argc, argv, "vpndfrR")) != -1) { - switch (c) { - case 'v': - cb.cb_verbose = B_TRUE; - break; - case 'p': - cb.cb_verbose = B_TRUE; - cb.cb_parsable = B_TRUE; - break; - case 'n': - cb.cb_dryrun = B_TRUE; - break; - case 'd': - cb.cb_defer_destroy = B_TRUE; - type = ZFS_TYPE_SNAPSHOT; - break; - case 'f': - cb.cb_force = B_TRUE; - break; - case 'r': - cb.cb_recurse = B_TRUE; - break; - case 'R': - cb.cb_recurse = B_TRUE; - cb.cb_doclones = B_TRUE; - break; - case '?': - default: - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc == 0) { - (void) fprintf(stderr, gettext("missing dataset argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - at = strchr(argv[0], '@'); - pound = strchr(argv[0], '#'); - if (at != NULL) { - - /* Build the list of snaps to destroy in cb_nvl. */ - cb.cb_nvl = fnvlist_alloc(); - - *at = '\0'; - zhp = zfs_open(g_zfs, argv[0], - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - return (1); - - cb.cb_snapspec = at + 1; - if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 || - cb.cb_error) { - rv = 1; - goto out; - } - - if (nvlist_empty(cb.cb_nvl)) { - (void) fprintf(stderr, gettext("could not find any " - "snapshots to destroy; check snapshot names.\n")); - rv = 1; - goto out; - } - - if (cb.cb_verbose) { - char buf[16]; - zfs_nicenum(cb.cb_snapused, buf, sizeof (buf)); - if (cb.cb_parsable) { - (void) printf("reclaim\t%llu\n", - cb.cb_snapused); - } else if (cb.cb_dryrun) { - (void) printf(gettext("would reclaim %s\n"), - buf); - } else { - (void) printf(gettext("will reclaim %s\n"), - buf); - } - } - - if (!cb.cb_dryrun) { - if (cb.cb_doclones) { - cb.cb_batchedsnaps = fnvlist_alloc(); - err = destroy_clones(&cb); - if (err == 0) { - err = zfs_destroy_snaps_nvl(g_zfs, - cb.cb_batchedsnaps, B_FALSE); - } - if (err != 0) { - rv = 1; - goto out; - } - } - if (err == 0) { - err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl, - cb.cb_defer_destroy); - } - } - - if (err != 0) - rv = 1; - } else if (pound != NULL) { - int err; - nvlist_t *nvl; - - if (cb.cb_dryrun) { - (void) fprintf(stderr, - "dryrun is not supported with bookmark\n"); - return (-1); - } - - if (cb.cb_defer_destroy) { - (void) fprintf(stderr, - "defer destroy is not supported with bookmark\n"); - return (-1); - } - - if (cb.cb_recurse) { - (void) fprintf(stderr, - "recursive is not supported with bookmark\n"); - return (-1); - } - - if (!zfs_bookmark_exists(argv[0])) { - (void) fprintf(stderr, gettext("bookmark '%s' " - "does not exist.\n"), argv[0]); - return (1); - } - - nvl = fnvlist_alloc(); - fnvlist_add_boolean(nvl, argv[0]); - - err = lzc_destroy_bookmarks(nvl, NULL); - if (err != 0) { - (void) zfs_standard_error(g_zfs, err, - "cannot destroy bookmark"); - } - - nvlist_free(cb.cb_nvl); - - return (err); - } else { - /* Open the given dataset */ - if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) - return (1); - - cb.cb_target = zhp; - - /* - * Perform an explicit check for pools before going any further. - */ - if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && - zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - (void) fprintf(stderr, gettext("cannot destroy '%s': " - "operation does not apply to pools\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zfs destroy -r " - "%s' to destroy all datasets in the pool\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zpool destroy %s' " - "to destroy the pool itself\n"), zfs_get_name(zhp)); - rv = 1; - goto out; - } - - /* - * Check for any dependents and/or clones. - */ - cb.cb_first = B_TRUE; - if (!cb.cb_doclones && - zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, - &cb) != 0) { - rv = 1; - goto out; - } - - if (cb.cb_error) { - rv = 1; - goto out; - } - - cb.cb_batchedsnaps = fnvlist_alloc(); - if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, - &cb) != 0) { - rv = 1; - goto out; - } - - /* - * Do the real thing. The callback will close the - * handle regardless of whether it succeeds or not. - */ - err = destroy_callback(zhp, &cb); - zhp = NULL; - if (err == 0) { - err = zfs_destroy_snaps_nvl(g_zfs, - cb.cb_batchedsnaps, cb.cb_defer_destroy); - } - if (err != 0) - rv = 1; - } - -out: - fnvlist_free(cb.cb_batchedsnaps); - fnvlist_free(cb.cb_nvl); - if (zhp != NULL) - zfs_close(zhp); - return (rv); -} - -static boolean_t -is_recvd_column(zprop_get_cbdata_t *cbp) -{ - int i; - zfs_get_column_t col; - - for (i = 0; i < ZFS_GET_NCOLS && - (col = cbp->cb_columns[i]) != GET_COL_NONE; i++) - if (col == GET_COL_RECVD) - return (B_TRUE); - return (B_FALSE); -} - -/* - * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...] - * < all | property[,property]... > < fs | snap | vol > ... - * - * -r recurse over any child datasets - * -H scripted mode. Headers are stripped, and fields are separated - * by tabs instead of spaces. - * -o Set of fields to display. One of "name,property,value, - * received,source". Default is "name,property,value,source". - * "all" is an alias for all five. - * -s Set of sources to allow. One of - * "local,default,inherited,received,temporary,none". Default is - * all six. - * -p Display values in parsable (literal) format. - * - * Prints properties for the given datasets. The user can control which - * columns to display as well as which property types to allow. - */ - -/* - * Invoked to display the properties for a single dataset. - */ -static int -get_callback(zfs_handle_t *zhp, void *data) -{ - char buf[ZFS_MAXPROPLEN]; - char rbuf[ZFS_MAXPROPLEN]; - zprop_source_t sourcetype; - char source[ZFS_MAX_DATASET_NAME_LEN]; - zprop_get_cbdata_t *cbp = data; - nvlist_t *user_props = zfs_get_user_props(zhp); - zprop_list_t *pl = cbp->cb_proplist; - nvlist_t *propval; - char *strval; - char *sourceval; - boolean_t received = is_recvd_column(cbp); - - for (; pl != NULL; pl = pl->pl_next) { - char *recvdval = NULL; - /* - * Skip the special fake placeholder. This will also skip over - * the name property when 'all' is specified. - */ - if (pl->pl_prop == ZFS_PROP_NAME && - pl == cbp->cb_proplist) - continue; - - if (pl->pl_prop != ZPROP_INVAL) { - if (zfs_prop_get(zhp, pl->pl_prop, buf, - sizeof (buf), &sourcetype, source, - sizeof (source), - cbp->cb_literal) != 0) { - if (pl->pl_all) - continue; - if (!zfs_prop_valid_for_type(pl->pl_prop, - ZFS_TYPE_DATASET)) { - (void) fprintf(stderr, - gettext("No such property '%s'\n"), - zfs_prop_to_name(pl->pl_prop)); - continue; - } - sourcetype = ZPROP_SRC_NONE; - (void) strlcpy(buf, "-", sizeof (buf)); - } - - if (received && (zfs_prop_get_recvd(zhp, - zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf), - cbp->cb_literal) == 0)) - recvdval = rbuf; - - zprop_print_one_property(zfs_get_name(zhp), cbp, - zfs_prop_to_name(pl->pl_prop), - buf, sourcetype, source, recvdval); - } else if (zfs_prop_userquota(pl->pl_user_prop)) { - sourcetype = ZPROP_SRC_LOCAL; - - if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, - buf, sizeof (buf), cbp->cb_literal) != 0) { - sourcetype = ZPROP_SRC_NONE; - (void) strlcpy(buf, "-", sizeof (buf)); - } - - zprop_print_one_property(zfs_get_name(zhp), cbp, - pl->pl_user_prop, buf, sourcetype, source, NULL); - } else if (zfs_prop_written(pl->pl_user_prop)) { - sourcetype = ZPROP_SRC_LOCAL; - - if (zfs_prop_get_written(zhp, pl->pl_user_prop, - buf, sizeof (buf), cbp->cb_literal) != 0) { - sourcetype = ZPROP_SRC_NONE; - (void) strlcpy(buf, "-", sizeof (buf)); - } - - zprop_print_one_property(zfs_get_name(zhp), cbp, - pl->pl_user_prop, buf, sourcetype, source, NULL); - } else { - if (nvlist_lookup_nvlist(user_props, - pl->pl_user_prop, &propval) != 0) { - if (pl->pl_all) - continue; - sourcetype = ZPROP_SRC_NONE; - strval = "-"; - } else { - verify(nvlist_lookup_string(propval, - ZPROP_VALUE, &strval) == 0); - verify(nvlist_lookup_string(propval, - ZPROP_SOURCE, &sourceval) == 0); - - if (strcmp(sourceval, - zfs_get_name(zhp)) == 0) { - sourcetype = ZPROP_SRC_LOCAL; - } else if (strcmp(sourceval, - ZPROP_SOURCE_VAL_RECVD) == 0) { - sourcetype = ZPROP_SRC_RECEIVED; - } else { - sourcetype = ZPROP_SRC_INHERITED; - (void) strlcpy(source, - sourceval, sizeof (source)); - } - } - - if (received && (zfs_prop_get_recvd(zhp, - pl->pl_user_prop, rbuf, sizeof (rbuf), - cbp->cb_literal) == 0)) - recvdval = rbuf; - - zprop_print_one_property(zfs_get_name(zhp), cbp, - pl->pl_user_prop, strval, sourcetype, - source, recvdval); - } - } - - return (0); -} - -static int -zfs_do_get(int argc, char **argv) -{ - zprop_get_cbdata_t cb = { 0 }; - int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS; - int types = ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK; - char *value, *fields; - int ret = 0; - int limit = 0; - zprop_list_t fake_name = { 0 }; - - /* - * Set up default columns and sources. - */ - cb.cb_sources = ZPROP_SRC_ALL; - cb.cb_columns[0] = GET_COL_NAME; - cb.cb_columns[1] = GET_COL_PROPERTY; - cb.cb_columns[2] = GET_COL_VALUE; - cb.cb_columns[3] = GET_COL_SOURCE; - cb.cb_type = ZFS_TYPE_DATASET; - - /* check options */ - while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) { - switch (c) { - case 'p': - cb.cb_literal = B_TRUE; - break; - case 'd': - limit = parse_depth(optarg, &flags); - break; - case 'r': - flags |= ZFS_ITER_RECURSE; - break; - case 'H': - cb.cb_scripted = B_TRUE; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case 'o': - /* - * Process the set of columns to display. We zero out - * the structure to give us a blank slate. - */ - bzero(&cb.cb_columns, sizeof (cb.cb_columns)); - i = 0; - while (*optarg != '\0') { - static char *col_subopts[] = - { "name", "property", "value", "received", - "source", "all", NULL }; - - if (i == ZFS_GET_NCOLS) { - (void) fprintf(stderr, gettext("too " - "many fields given to -o " - "option\n")); - usage(B_FALSE); - } - - switch (getsubopt(&optarg, col_subopts, - &value)) { - case 0: - cb.cb_columns[i++] = GET_COL_NAME; - break; - case 1: - cb.cb_columns[i++] = GET_COL_PROPERTY; - break; - case 2: - cb.cb_columns[i++] = GET_COL_VALUE; - break; - case 3: - cb.cb_columns[i++] = GET_COL_RECVD; - flags |= ZFS_ITER_RECVD_PROPS; - break; - case 4: - cb.cb_columns[i++] = GET_COL_SOURCE; - break; - case 5: - if (i > 0) { - (void) fprintf(stderr, - gettext("\"all\" conflicts " - "with specific fields " - "given to -o option\n")); - usage(B_FALSE); - } - cb.cb_columns[0] = GET_COL_NAME; - cb.cb_columns[1] = GET_COL_PROPERTY; - cb.cb_columns[2] = GET_COL_VALUE; - cb.cb_columns[3] = GET_COL_RECVD; - cb.cb_columns[4] = GET_COL_SOURCE; - flags |= ZFS_ITER_RECVD_PROPS; - i = ZFS_GET_NCOLS; - break; - default: - (void) fprintf(stderr, - gettext("invalid column name " - "'%s'\n"), suboptarg); - usage(B_FALSE); - } - } - break; - - case 's': - cb.cb_sources = 0; - while (*optarg != '\0') { - static char *source_subopts[] = { - "local", "default", "inherited", - "received", "temporary", "none", - NULL }; - - switch (getsubopt(&optarg, source_subopts, - &value)) { - case 0: - cb.cb_sources |= ZPROP_SRC_LOCAL; - break; - case 1: - cb.cb_sources |= ZPROP_SRC_DEFAULT; - break; - case 2: - cb.cb_sources |= ZPROP_SRC_INHERITED; - break; - case 3: - cb.cb_sources |= ZPROP_SRC_RECEIVED; - break; - case 4: - cb.cb_sources |= ZPROP_SRC_TEMPORARY; - break; - case 5: - cb.cb_sources |= ZPROP_SRC_NONE; - break; - default: - (void) fprintf(stderr, - gettext("invalid source " - "'%s'\n"), suboptarg); - usage(B_FALSE); - } - } - break; - - case 't': - types = 0; - flags &= ~ZFS_ITER_PROP_LISTSNAPS; - while (*optarg != '\0') { - static char *type_subopts[] = { "filesystem", - "volume", "snapshot", "bookmark", - "all", NULL }; - - switch (getsubopt(&optarg, type_subopts, - &value)) { - case 0: - types |= ZFS_TYPE_FILESYSTEM; - break; - case 1: - types |= ZFS_TYPE_VOLUME; - break; - case 2: - types |= ZFS_TYPE_SNAPSHOT; - break; - case 3: - types |= ZFS_TYPE_BOOKMARK; - break; - case 4: - types = ZFS_TYPE_DATASET | - ZFS_TYPE_BOOKMARK; - break; - - default: - (void) fprintf(stderr, - gettext("invalid type '%s'\n"), - suboptarg); - usage(B_FALSE); - } - } - break; - - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing property " - "argument\n")); - usage(B_FALSE); - } - - fields = argv[0]; - - if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET) - != 0) - usage(B_FALSE); - - argc--; - argv++; - - /* - * As part of zfs_expand_proplist(), we keep track of the maximum column - * width for each property. For the 'NAME' (and 'SOURCE') columns, we - * need to know the maximum name length. However, the user likely did - * not specify 'name' as one of the properties to fetch, so we need to - * make sure we always include at least this property for - * print_get_headers() to work properly. - */ - if (cb.cb_proplist != NULL) { - fake_name.pl_prop = ZFS_PROP_NAME; - fake_name.pl_width = strlen(gettext("NAME")); - fake_name.pl_next = cb.cb_proplist; - cb.cb_proplist = &fake_name; - } - - cb.cb_first = B_TRUE; - - /* run for each object */ - ret = zfs_for_each(argc, argv, flags, types, NULL, - &cb.cb_proplist, limit, get_callback, &cb); - - if (cb.cb_proplist == &fake_name) - zprop_free_list(fake_name.pl_next); - else - zprop_free_list(cb.cb_proplist); - - return (ret); -} - -/* - * inherit [-rS] <property> <fs|vol> ... - * - * -r Recurse over all children - * -S Revert to received value, if any - * - * For each dataset specified on the command line, inherit the given property - * from its parent. Inheriting a property at the pool level will cause it to - * use the default value. The '-r' flag will recurse over all children, and is - * useful for setting a property on a hierarchy-wide basis, regardless of any - * local modifications for each dataset. - */ - -typedef struct inherit_cbdata { - const char *cb_propname; - boolean_t cb_received; -} inherit_cbdata_t; - -static int -inherit_recurse_cb(zfs_handle_t *zhp, void *data) -{ - inherit_cbdata_t *cb = data; - zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname); - - /* - * If we're doing it recursively, then ignore properties that - * are not valid for this type of dataset. - */ - if (prop != ZPROP_INVAL && - !zfs_prop_valid_for_type(prop, zfs_get_type(zhp))) - return (0); - - return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0); -} - -static int -inherit_cb(zfs_handle_t *zhp, void *data) -{ - inherit_cbdata_t *cb = data; - - return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0); -} - -static int -zfs_do_inherit(int argc, char **argv) -{ - int c; - zfs_prop_t prop; - inherit_cbdata_t cb = { 0 }; - char *propname; - int ret = 0; - int flags = 0; - boolean_t received = B_FALSE; - - /* check options */ - while ((c = getopt(argc, argv, "rS")) != -1) { - switch (c) { - case 'r': - flags |= ZFS_ITER_RECURSE; - break; - case 'S': - received = B_TRUE; - break; - case '?': - default: - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing property argument\n")); - usage(B_FALSE); - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing dataset argument\n")); - usage(B_FALSE); - } - - propname = argv[0]; - argc--; - argv++; - - if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) { - if (zfs_prop_readonly(prop)) { - (void) fprintf(stderr, gettext( - "%s property is read-only\n"), - propname); - return (1); - } - if (!zfs_prop_inheritable(prop) && !received) { - (void) fprintf(stderr, gettext("'%s' property cannot " - "be inherited\n"), propname); - if (prop == ZFS_PROP_QUOTA || - prop == ZFS_PROP_RESERVATION || - prop == ZFS_PROP_REFQUOTA || - prop == ZFS_PROP_REFRESERVATION) { - (void) fprintf(stderr, gettext("use 'zfs set " - "%s=none' to clear\n"), propname); - (void) fprintf(stderr, gettext("use 'zfs " - "inherit -S %s' to revert to received " - "value\n"), propname); - } - return (1); - } - if (received && (prop == ZFS_PROP_VOLSIZE || - prop == ZFS_PROP_VERSION)) { - (void) fprintf(stderr, gettext("'%s' property cannot " - "be reverted to a received value\n"), propname); - return (1); - } - } else if (!zfs_prop_user(propname)) { - (void) fprintf(stderr, gettext("invalid property '%s'\n"), - propname); - usage(B_FALSE); - } - - cb.cb_propname = propname; - cb.cb_received = received; - - if (flags & ZFS_ITER_RECURSE) { - ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, - NULL, NULL, 0, inherit_recurse_cb, &cb); - } else { - ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, - NULL, NULL, 0, inherit_cb, &cb); - } - - return (ret); -} - -typedef struct upgrade_cbdata { - uint64_t cb_numupgraded; - uint64_t cb_numsamegraded; - uint64_t cb_numfailed; - uint64_t cb_version; - boolean_t cb_newer; - boolean_t cb_foundone; - char cb_lastfs[ZFS_MAX_DATASET_NAME_LEN]; -} upgrade_cbdata_t; - -static int -same_pool(zfs_handle_t *zhp, const char *name) -{ - int len1 = strcspn(name, "/@"); - const char *zhname = zfs_get_name(zhp); - int len2 = strcspn(zhname, "/@"); - - if (len1 != len2) - return (B_FALSE); - return (strncmp(name, zhname, len1) == 0); -} - -static int -upgrade_list_callback(zfs_handle_t *zhp, void *data) -{ - upgrade_cbdata_t *cb = data; - int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - - /* list if it's old/new */ - if ((!cb->cb_newer && version < ZPL_VERSION) || - (cb->cb_newer && version > ZPL_VERSION)) { - char *str; - if (cb->cb_newer) { - str = gettext("The following filesystems are " - "formatted using a newer software version and\n" - "cannot be accessed on the current system.\n\n"); - } else { - str = gettext("The following filesystems are " - "out of date, and can be upgraded. After being\n" - "upgraded, these filesystems (and any 'zfs send' " - "streams generated from\n" - "subsequent snapshots) will no longer be " - "accessible by older software versions.\n\n"); - } - - if (!cb->cb_foundone) { - (void) puts(str); - (void) printf(gettext("VER FILESYSTEM\n")); - (void) printf(gettext("--- ------------\n")); - cb->cb_foundone = B_TRUE; - } - - (void) printf("%2u %s\n", version, zfs_get_name(zhp)); - } - - return (0); -} - -static int -upgrade_set_callback(zfs_handle_t *zhp, void *data) -{ - upgrade_cbdata_t *cb = data; - int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - int needed_spa_version; - int spa_version; - - if (zfs_spa_version(zhp, &spa_version) < 0) - return (-1); - - needed_spa_version = zfs_spa_version_map(cb->cb_version); - - if (needed_spa_version < 0) - return (-1); - - if (spa_version < needed_spa_version) { - /* can't upgrade */ - (void) printf(gettext("%s: can not be " - "upgraded; the pool version needs to first " - "be upgraded\nto version %d\n\n"), - zfs_get_name(zhp), needed_spa_version); - cb->cb_numfailed++; - return (0); - } - - /* upgrade */ - if (version < cb->cb_version) { - char verstr[16]; - (void) snprintf(verstr, sizeof (verstr), - "%llu", cb->cb_version); - if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) { - /* - * If they did "zfs upgrade -a", then we could - * be doing ioctls to different pools. We need - * to log this history once to each pool, and bypass - * the normal history logging that happens in main(). - */ - (void) zpool_log_history(g_zfs, history_str); - log_history = B_FALSE; - } - if (zfs_prop_set(zhp, "version", verstr) == 0) - cb->cb_numupgraded++; - else - cb->cb_numfailed++; - (void) strcpy(cb->cb_lastfs, zfs_get_name(zhp)); - } else if (version > cb->cb_version) { - /* can't downgrade */ - (void) printf(gettext("%s: can not be downgraded; " - "it is already at version %u\n"), - zfs_get_name(zhp), version); - cb->cb_numfailed++; - } else { - cb->cb_numsamegraded++; - } - return (0); -} - -/* - * zfs upgrade - * zfs upgrade -v - * zfs upgrade [-r] [-V <version>] <-a | filesystem> - */ -static int -zfs_do_upgrade(int argc, char **argv) -{ - boolean_t all = B_FALSE; - boolean_t showversions = B_FALSE; - int ret = 0; - upgrade_cbdata_t cb = { 0 }; - int c; - int flags = ZFS_ITER_ARGS_CAN_BE_PATHS; - - /* check options */ - while ((c = getopt(argc, argv, "rvV:a")) != -1) { - switch (c) { - case 'r': - flags |= ZFS_ITER_RECURSE; - break; - case 'v': - showversions = B_TRUE; - break; - case 'V': - if (zfs_prop_string_to_index(ZFS_PROP_VERSION, - optarg, &cb.cb_version) != 0) { - (void) fprintf(stderr, - gettext("invalid version %s\n"), optarg); - usage(B_FALSE); - } - break; - case 'a': - all = B_TRUE; - break; - case '?': - default: - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if ((!all && !argc) && ((flags & ZFS_ITER_RECURSE) | cb.cb_version)) - usage(B_FALSE); - if (showversions && (flags & ZFS_ITER_RECURSE || all || - cb.cb_version || argc)) - usage(B_FALSE); - if ((all || argc) && (showversions)) - usage(B_FALSE); - if (all && argc) - usage(B_FALSE); - - if (showversions) { - /* Show info on available versions. */ - (void) printf(gettext("The following filesystem versions are " - "supported:\n\n")); - (void) printf(gettext("VER DESCRIPTION\n")); - (void) printf("--- -----------------------------------------" - "---------------\n"); - (void) printf(gettext(" 1 Initial ZFS filesystem version\n")); - (void) printf(gettext(" 2 Enhanced directory entries\n")); - (void) printf(gettext(" 3 Case insensitive and filesystem " - "user identifier (FUID)\n")); - (void) printf(gettext(" 4 userquota, groupquota " - "properties\n")); - (void) printf(gettext(" 5 System attributes\n")); - (void) printf(gettext("\nFor more information on a particular " - "version, including supported releases,\n")); - (void) printf("see the ZFS Administration Guide.\n\n"); - ret = 0; - } else if (argc || all) { - /* Upgrade filesystems */ - if (cb.cb_version == 0) - cb.cb_version = ZPL_VERSION; - ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM, - NULL, NULL, 0, upgrade_set_callback, &cb); - (void) printf(gettext("%llu filesystems upgraded\n"), - cb.cb_numupgraded); - if (cb.cb_numsamegraded) { - (void) printf(gettext("%llu filesystems already at " - "this version\n"), - cb.cb_numsamegraded); - } - if (cb.cb_numfailed != 0) - ret = 1; - } else { - /* List old-version filesystems */ - boolean_t found; - (void) printf(gettext("This system is currently running " - "ZFS filesystem version %llu.\n\n"), ZPL_VERSION); - - flags |= ZFS_ITER_RECURSE; - ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM, - NULL, NULL, 0, upgrade_list_callback, &cb); - - found = cb.cb_foundone; - cb.cb_foundone = B_FALSE; - cb.cb_newer = B_TRUE; - - ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM, - NULL, NULL, 0, upgrade_list_callback, &cb); - - if (!cb.cb_foundone && !found) { - (void) printf(gettext("All filesystems are " - "formatted with the current version.\n")); - } - } - - return (ret); -} - -/* - * zfs userspace [-Hinp] [-o field[,...]] [-s field [-s field]...] - * [-S field [-S field]...] [-t type[,...]] filesystem | snapshot - * zfs groupspace [-Hinp] [-o field[,...]] [-s field [-s field]...] - * [-S field [-S field]...] [-t type[,...]] filesystem | snapshot - * - * -H Scripted mode; elide headers and separate columns by tabs. - * -i Translate SID to POSIX ID. - * -n Print numeric ID instead of user/group name. - * -o Control which fields to display. - * -p Use exact (parsable) numeric output. - * -s Specify sort columns, descending order. - * -S Specify sort columns, ascending order. - * -t Control which object types to display. - * - * Displays space consumed by, and quotas on, each user in the specified - * filesystem or snapshot. - */ - -/* us_field_types, us_field_hdr and us_field_names should be kept in sync */ -enum us_field_types { - USFIELD_TYPE, - USFIELD_NAME, - USFIELD_USED, - USFIELD_QUOTA -}; -static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" }; -static char *us_field_names[] = { "type", "name", "used", "quota" }; -#define USFIELD_LAST (sizeof (us_field_names) / sizeof (char *)) - -#define USTYPE_PSX_GRP (1 << 0) -#define USTYPE_PSX_USR (1 << 1) -#define USTYPE_SMB_GRP (1 << 2) -#define USTYPE_SMB_USR (1 << 3) -#define USTYPE_ALL \ - (USTYPE_PSX_GRP | USTYPE_PSX_USR | USTYPE_SMB_GRP | USTYPE_SMB_USR) - -static int us_type_bits[] = { - USTYPE_PSX_GRP, - USTYPE_PSX_USR, - USTYPE_SMB_GRP, - USTYPE_SMB_USR, - USTYPE_ALL -}; -static char *us_type_names[] = { "posixgroup", "posixuser", "smbgroup", - "smbuser", "all" }; - -typedef struct us_node { - nvlist_t *usn_nvl; - uu_avl_node_t usn_avlnode; - uu_list_node_t usn_listnode; -} us_node_t; - -typedef struct us_cbdata { - nvlist_t **cb_nvlp; - uu_avl_pool_t *cb_avl_pool; - uu_avl_t *cb_avl; - boolean_t cb_numname; - boolean_t cb_nicenum; - boolean_t cb_sid2posix; - zfs_userquota_prop_t cb_prop; - zfs_sort_column_t *cb_sortcol; - size_t cb_width[USFIELD_LAST]; -} us_cbdata_t; - -static boolean_t us_populated = B_FALSE; - -typedef struct { - zfs_sort_column_t *si_sortcol; - boolean_t si_numname; -} us_sort_info_t; - -static int -us_field_index(char *field) -{ - int i; - - for (i = 0; i < USFIELD_LAST; i++) { - if (strcmp(field, us_field_names[i]) == 0) - return (i); - } - - return (-1); -} - -static int -us_compare(const void *larg, const void *rarg, void *unused) -{ - const us_node_t *l = larg; - const us_node_t *r = rarg; - us_sort_info_t *si = (us_sort_info_t *)unused; - zfs_sort_column_t *sortcol = si->si_sortcol; - boolean_t numname = si->si_numname; - nvlist_t *lnvl = l->usn_nvl; - nvlist_t *rnvl = r->usn_nvl; - int rc = 0; - boolean_t lvb, rvb; - - for (; sortcol != NULL; sortcol = sortcol->sc_next) { - char *lvstr = ""; - char *rvstr = ""; - uint32_t lv32 = 0; - uint32_t rv32 = 0; - uint64_t lv64 = 0; - uint64_t rv64 = 0; - zfs_prop_t prop = sortcol->sc_prop; - const char *propname = NULL; - boolean_t reverse = sortcol->sc_reverse; - - switch (prop) { - case ZFS_PROP_TYPE: - propname = "type"; - (void) nvlist_lookup_uint32(lnvl, propname, &lv32); - (void) nvlist_lookup_uint32(rnvl, propname, &rv32); - if (rv32 != lv32) - rc = (rv32 < lv32) ? 1 : -1; - break; - case ZFS_PROP_NAME: - propname = "name"; - if (numname) { -compare_nums: - (void) nvlist_lookup_uint64(lnvl, propname, - &lv64); - (void) nvlist_lookup_uint64(rnvl, propname, - &rv64); - if (rv64 != lv64) - rc = (rv64 < lv64) ? 1 : -1; - } else { - if ((nvlist_lookup_string(lnvl, propname, - &lvstr) == ENOENT) || - (nvlist_lookup_string(rnvl, propname, - &rvstr) == ENOENT)) { - goto compare_nums; - } - rc = strcmp(lvstr, rvstr); - } - break; - case ZFS_PROP_USED: - case ZFS_PROP_QUOTA: - if (!us_populated) - break; - if (prop == ZFS_PROP_USED) - propname = "used"; - else - propname = "quota"; - (void) nvlist_lookup_uint64(lnvl, propname, &lv64); - (void) nvlist_lookup_uint64(rnvl, propname, &rv64); - if (rv64 != lv64) - rc = (rv64 < lv64) ? 1 : -1; - break; - - default: - break; - } - - if (rc != 0) { - if (rc < 0) - return (reverse ? 1 : -1); - else - return (reverse ? -1 : 1); - } - } - - /* - * If entries still seem to be the same, check if they are of the same - * type (smbentity is added only if we are doing SID to POSIX ID - * translation where we can have duplicate type/name combinations). - */ - if (nvlist_lookup_boolean_value(lnvl, "smbentity", &lvb) == 0 && - nvlist_lookup_boolean_value(rnvl, "smbentity", &rvb) == 0 && - lvb != rvb) - return (lvb < rvb ? -1 : 1); - - return (0); -} - -static inline const char * -us_type2str(unsigned field_type) -{ - switch (field_type) { - case USTYPE_PSX_USR: - return ("POSIX User"); - case USTYPE_PSX_GRP: - return ("POSIX Group"); - case USTYPE_SMB_USR: - return ("SMB User"); - case USTYPE_SMB_GRP: - return ("SMB Group"); - default: - return ("Undefined"); - } -} - -static int -userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) -{ - us_cbdata_t *cb = (us_cbdata_t *)arg; - zfs_userquota_prop_t prop = cb->cb_prop; - char *name = NULL; - char *propname; - char sizebuf[32]; - us_node_t *node; - uu_avl_pool_t *avl_pool = cb->cb_avl_pool; - uu_avl_t *avl = cb->cb_avl; - uu_avl_index_t idx; - nvlist_t *props; - us_node_t *n; - zfs_sort_column_t *sortcol = cb->cb_sortcol; - unsigned type = 0; - const char *typestr; - size_t namelen; - size_t typelen; - size_t sizelen; - int typeidx, nameidx, sizeidx; - us_sort_info_t sortinfo = { sortcol, cb->cb_numname }; - boolean_t smbentity = B_FALSE; - - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) - nomem(); - node = safe_malloc(sizeof (us_node_t)); - uu_avl_node_init(node, &node->usn_avlnode, avl_pool); - node->usn_nvl = props; - - if (domain != NULL && domain[0] != '\0') { - /* SMB */ - char sid[MAXNAMELEN + 32]; - uid_t id; -#ifdef illumos - int err; - int flag = IDMAP_REQ_FLG_USE_CACHE; -#endif - - smbentity = B_TRUE; - - (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid); - - if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { - type = USTYPE_SMB_GRP; -#ifdef illumos - err = sid_to_id(sid, B_FALSE, &id); -#endif - } else { - type = USTYPE_SMB_USR; -#ifdef illumos - err = sid_to_id(sid, B_TRUE, &id); -#endif - } - -#ifdef illumos - if (err == 0) { - rid = id; - if (!cb->cb_sid2posix) { - if (type == USTYPE_SMB_USR) { - (void) idmap_getwinnamebyuid(rid, flag, - &name, NULL); - } else { - (void) idmap_getwinnamebygid(rid, flag, - &name, NULL); - } - if (name == NULL) - name = sid; - } - } -#endif - } - - if (cb->cb_sid2posix || domain == NULL || domain[0] == '\0') { - /* POSIX or -i */ - if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { - type = USTYPE_PSX_GRP; - if (!cb->cb_numname) { - struct group *g; - - if ((g = getgrgid(rid)) != NULL) - name = g->gr_name; - } - } else { - type = USTYPE_PSX_USR; - if (!cb->cb_numname) { - struct passwd *p; - - if ((p = getpwuid(rid)) != NULL) - name = p->pw_name; - } - } - } - - /* - * Make sure that the type/name combination is unique when doing - * SID to POSIX ID translation (hence changing the type from SMB to - * POSIX). - */ - if (cb->cb_sid2posix && - nvlist_add_boolean_value(props, "smbentity", smbentity) != 0) - nomem(); - - /* Calculate/update width of TYPE field */ - typestr = us_type2str(type); - typelen = strlen(gettext(typestr)); - typeidx = us_field_index("type"); - if (typelen > cb->cb_width[typeidx]) - cb->cb_width[typeidx] = typelen; - if (nvlist_add_uint32(props, "type", type) != 0) - nomem(); - - /* Calculate/update width of NAME field */ - if ((cb->cb_numname && cb->cb_sid2posix) || name == NULL) { - if (nvlist_add_uint64(props, "name", rid) != 0) - nomem(); - namelen = snprintf(NULL, 0, "%u", rid); - } else { - if (nvlist_add_string(props, "name", name) != 0) - nomem(); - namelen = strlen(name); - } - nameidx = us_field_index("name"); - if (namelen > cb->cb_width[nameidx]) - cb->cb_width[nameidx] = namelen; - - /* - * Check if this type/name combination is in the list and update it; - * otherwise add new node to the list. - */ - if ((n = uu_avl_find(avl, node, &sortinfo, &idx)) == NULL) { - uu_avl_insert(avl, node, idx); - } else { - nvlist_free(props); - free(node); - node = n; - props = node->usn_nvl; - } - - /* Calculate/update width of USED/QUOTA fields */ - if (cb->cb_nicenum) - zfs_nicenum(space, sizebuf, sizeof (sizebuf)); - else - (void) snprintf(sizebuf, sizeof (sizebuf), "%llu", space); - sizelen = strlen(sizebuf); - if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) { - propname = "used"; - if (!nvlist_exists(props, "quota")) - (void) nvlist_add_uint64(props, "quota", 0); - } else { - propname = "quota"; - if (!nvlist_exists(props, "used")) - (void) nvlist_add_uint64(props, "used", 0); - } - sizeidx = us_field_index(propname); - if (sizelen > cb->cb_width[sizeidx]) - cb->cb_width[sizeidx] = sizelen; - - if (nvlist_add_uint64(props, propname, space) != 0) - nomem(); - - return (0); -} - -static void -print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types, - size_t *width, us_node_t *node) -{ - nvlist_t *nvl = node->usn_nvl; - char valstr[MAXNAMELEN]; - boolean_t first = B_TRUE; - int cfield = 0; - int field; - uint32_t ustype; - - /* Check type */ - (void) nvlist_lookup_uint32(nvl, "type", &ustype); - if (!(ustype & types)) - return; - - while ((field = fields[cfield]) != USFIELD_LAST) { - nvpair_t *nvp = NULL; - data_type_t type; - uint32_t val32; - uint64_t val64; - char *strval = NULL; - - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - if (strcmp(nvpair_name(nvp), - us_field_names[field]) == 0) - break; - } - - type = nvpair_type(nvp); - switch (type) { - case DATA_TYPE_UINT32: - (void) nvpair_value_uint32(nvp, &val32); - break; - case DATA_TYPE_UINT64: - (void) nvpair_value_uint64(nvp, &val64); - break; - case DATA_TYPE_STRING: - (void) nvpair_value_string(nvp, &strval); - break; - default: - (void) fprintf(stderr, "invalid data type\n"); - } - - switch (field) { - case USFIELD_TYPE: - strval = (char *)us_type2str(val32); - break; - case USFIELD_NAME: - if (type == DATA_TYPE_UINT64) { - (void) sprintf(valstr, "%llu", val64); - strval = valstr; - } - break; - case USFIELD_USED: - case USFIELD_QUOTA: - if (type == DATA_TYPE_UINT64) { - if (parsable) { - (void) sprintf(valstr, "%llu", val64); - } else { - zfs_nicenum(val64, valstr, - sizeof (valstr)); - } - if (field == USFIELD_QUOTA && - strcmp(valstr, "0") == 0) - strval = "none"; - else - strval = valstr; - } - break; - } - - if (!first) { - if (scripted) - (void) printf("\t"); - else - (void) printf(" "); - } - if (scripted) - (void) printf("%s", strval); - else if (field == USFIELD_TYPE || field == USFIELD_NAME) - (void) printf("%-*s", width[field], strval); - else - (void) printf("%*s", width[field], strval); - - first = B_FALSE; - cfield++; - } - - (void) printf("\n"); -} - -static void -print_us(boolean_t scripted, boolean_t parsable, int *fields, int types, - size_t *width, boolean_t rmnode, uu_avl_t *avl) -{ - us_node_t *node; - const char *col; - int cfield = 0; - int field; - - if (!scripted) { - boolean_t first = B_TRUE; - - while ((field = fields[cfield]) != USFIELD_LAST) { - col = gettext(us_field_hdr[field]); - if (field == USFIELD_TYPE || field == USFIELD_NAME) { - (void) printf(first ? "%-*s" : " %-*s", - width[field], col); - } else { - (void) printf(first ? "%*s" : " %*s", - width[field], col); - } - first = B_FALSE; - cfield++; - } - (void) printf("\n"); - } - - for (node = uu_avl_first(avl); node; node = uu_avl_next(avl, node)) { - print_us_node(scripted, parsable, fields, types, width, node); - if (rmnode) - nvlist_free(node->usn_nvl); - } -} - -static int -zfs_do_userspace(int argc, char **argv) -{ - zfs_handle_t *zhp; - zfs_userquota_prop_t p; - - uu_avl_pool_t *avl_pool; - uu_avl_t *avl_tree; - uu_avl_walk_t *walk; - char *delim; - char deffields[] = "type,name,used,quota"; - char *ofield = NULL; - char *tfield = NULL; - int cfield = 0; - int fields[256]; - int i; - boolean_t scripted = B_FALSE; - boolean_t prtnum = B_FALSE; - boolean_t parsable = B_FALSE; - boolean_t sid2posix = B_FALSE; - int ret = 0; - int c; - zfs_sort_column_t *sortcol = NULL; - int types = USTYPE_PSX_USR | USTYPE_SMB_USR; - us_cbdata_t cb; - us_node_t *node; - us_node_t *rmnode; - uu_list_pool_t *listpool; - uu_list_t *list; - uu_avl_index_t idx = 0; - uu_list_index_t idx2 = 0; - - if (argc < 2) - usage(B_FALSE); - - if (strcmp(argv[0], "groupspace") == 0) - /* Toggle default group types */ - types = USTYPE_PSX_GRP | USTYPE_SMB_GRP; - - while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) { - switch (c) { - case 'n': - prtnum = B_TRUE; - break; - case 'H': - scripted = B_TRUE; - break; - case 'p': - parsable = B_TRUE; - break; - case 'o': - ofield = optarg; - break; - case 's': - case 'S': - if (zfs_add_sort_column(&sortcol, optarg, - c == 's' ? B_FALSE : B_TRUE) != 0) { - (void) fprintf(stderr, - gettext("invalid field '%s'\n"), optarg); - usage(B_FALSE); - } - break; - case 't': - tfield = optarg; - break; - case 'i': - sid2posix = B_TRUE; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing dataset name\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - /* Use default output fields if not specified using -o */ - if (ofield == NULL) - ofield = deffields; - do { - if ((delim = strchr(ofield, ',')) != NULL) - *delim = '\0'; - if ((fields[cfield++] = us_field_index(ofield)) == -1) { - (void) fprintf(stderr, gettext("invalid type '%s' " - "for -o option\n"), ofield); - return (-1); - } - if (delim != NULL) - ofield = delim + 1; - } while (delim != NULL); - fields[cfield] = USFIELD_LAST; - - /* Override output types (-t option) */ - if (tfield != NULL) { - types = 0; - - do { - boolean_t found = B_FALSE; - - if ((delim = strchr(tfield, ',')) != NULL) - *delim = '\0'; - for (i = 0; i < sizeof (us_type_bits) / sizeof (int); - i++) { - if (strcmp(tfield, us_type_names[i]) == 0) { - found = B_TRUE; - types |= us_type_bits[i]; - break; - } - } - if (!found) { - (void) fprintf(stderr, gettext("invalid type " - "'%s' for -t option\n"), tfield); - return (-1); - } - if (delim != NULL) - tfield = delim + 1; - } while (delim != NULL); - } - - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) - return (1); - - if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t), - offsetof(us_node_t, usn_avlnode), us_compare, UU_DEFAULT)) == NULL) - nomem(); - if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) - nomem(); - - /* Always add default sorting columns */ - (void) zfs_add_sort_column(&sortcol, "type", B_FALSE); - (void) zfs_add_sort_column(&sortcol, "name", B_FALSE); - - cb.cb_sortcol = sortcol; - cb.cb_numname = prtnum; - cb.cb_nicenum = !parsable; - cb.cb_avl_pool = avl_pool; - cb.cb_avl = avl_tree; - cb.cb_sid2posix = sid2posix; - - for (i = 0; i < USFIELD_LAST; i++) - cb.cb_width[i] = strlen(gettext(us_field_hdr[i])); - - for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { - if (((p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA) && - !(types & (USTYPE_PSX_USR | USTYPE_SMB_USR))) || - ((p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) && - !(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP)))) - continue; - cb.cb_prop = p; - if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0) - return (ret); - } - - /* Sort the list */ - if ((node = uu_avl_first(avl_tree)) == NULL) - return (0); - - us_populated = B_TRUE; - - listpool = uu_list_pool_create("tmplist", sizeof (us_node_t), - offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT); - list = uu_list_create(listpool, NULL, UU_DEFAULT); - uu_list_node_init(node, &node->usn_listnode, listpool); - - while (node != NULL) { - rmnode = node; - node = uu_avl_next(avl_tree, node); - uu_avl_remove(avl_tree, rmnode); - if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) - uu_list_insert(list, rmnode, idx2); - } - - for (node = uu_list_first(list); node != NULL; - node = uu_list_next(list, node)) { - us_sort_info_t sortinfo = { sortcol, cb.cb_numname }; - - if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == NULL) - uu_avl_insert(avl_tree, node, idx); - } - - uu_list_destroy(list); - uu_list_pool_destroy(listpool); - - /* Print and free node nvlist memory */ - print_us(scripted, parsable, fields, types, cb.cb_width, B_TRUE, - cb.cb_avl); - - zfs_free_sort_columns(sortcol); - - /* Clean up the AVL tree */ - if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) - nomem(); - - while ((node = uu_avl_walk_next(walk)) != NULL) { - uu_avl_remove(cb.cb_avl, node); - free(node); - } - - uu_avl_walk_end(walk); - uu_avl_destroy(avl_tree); - uu_avl_pool_destroy(avl_pool); - - return (ret); -} - -/* - * list [-Hp][-r|-d max] [-o property[,...]] [-s property] ... [-S property] ... - * [-t type[,...]] [filesystem|volume|snapshot] ... - * - * -H Scripted mode; elide headers and separate columns by tabs. - * -p Display values in parsable (literal) format. - * -r Recurse over all children. - * -d Limit recursion by depth. - * -o Control which fields to display. - * -s Specify sort columns, descending order. - * -S Specify sort columns, ascending order. - * -t Control which object types to display. - * - * When given no arguments, list all filesystems in the system. - * Otherwise, list the specified datasets, optionally recursing down them if - * '-r' is specified. - */ -typedef struct list_cbdata { - boolean_t cb_first; - boolean_t cb_literal; - boolean_t cb_scripted; - zprop_list_t *cb_proplist; -} list_cbdata_t; - -/* - * Given a list of columns to display, output appropriate headers for each one. - */ -static void -print_header(list_cbdata_t *cb) -{ - zprop_list_t *pl = cb->cb_proplist; - char headerbuf[ZFS_MAXPROPLEN]; - const char *header; - int i; - boolean_t first = B_TRUE; - boolean_t right_justify; - - for (; pl != NULL; pl = pl->pl_next) { - if (!first) { - (void) printf(" "); - } else { - first = B_FALSE; - } - - right_justify = B_FALSE; - if (pl->pl_prop != ZPROP_INVAL) { - header = zfs_prop_column_name(pl->pl_prop); - right_justify = zfs_prop_align_right(pl->pl_prop); - } else { - for (i = 0; pl->pl_user_prop[i] != '\0'; i++) - headerbuf[i] = toupper(pl->pl_user_prop[i]); - headerbuf[i] = '\0'; - header = headerbuf; - } - - if (pl->pl_next == NULL && !right_justify) - (void) printf("%s", header); - else if (right_justify) - (void) printf("%*s", pl->pl_width, header); - else - (void) printf("%-*s", pl->pl_width, header); - } - - (void) printf("\n"); -} - -/* - * Given a dataset and a list of fields, print out all the properties according - * to the described layout. - */ -static void -print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) -{ - zprop_list_t *pl = cb->cb_proplist; - boolean_t first = B_TRUE; - char property[ZFS_MAXPROPLEN]; - nvlist_t *userprops = zfs_get_user_props(zhp); - nvlist_t *propval; - char *propstr; - boolean_t right_justify; - - for (; pl != NULL; pl = pl->pl_next) { - if (!first) { - if (cb->cb_scripted) - (void) printf("\t"); - else - (void) printf(" "); - } else { - first = B_FALSE; - } - - if (pl->pl_prop == ZFS_PROP_NAME) { - (void) strlcpy(property, zfs_get_name(zhp), - sizeof (property)); - propstr = property; - right_justify = zfs_prop_align_right(pl->pl_prop); - } else if (pl->pl_prop != ZPROP_INVAL) { - if (zfs_prop_get(zhp, pl->pl_prop, property, - sizeof (property), NULL, NULL, 0, - cb->cb_literal) != 0) - propstr = "-"; - else - propstr = property; - right_justify = zfs_prop_align_right(pl->pl_prop); - } else if (zfs_prop_userquota(pl->pl_user_prop)) { - if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, - property, sizeof (property), cb->cb_literal) != 0) - propstr = "-"; - else - propstr = property; - right_justify = B_TRUE; - } else if (zfs_prop_written(pl->pl_user_prop)) { - if (zfs_prop_get_written(zhp, pl->pl_user_prop, - property, sizeof (property), cb->cb_literal) != 0) - propstr = "-"; - else - propstr = property; - right_justify = B_TRUE; - } else { - if (nvlist_lookup_nvlist(userprops, - pl->pl_user_prop, &propval) != 0) - propstr = "-"; - else - verify(nvlist_lookup_string(propval, - ZPROP_VALUE, &propstr) == 0); - right_justify = B_FALSE; - } - - /* - * If this is being called in scripted mode, or if this is the - * last column and it is left-justified, don't include a width - * format specifier. - */ - if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) - (void) printf("%s", propstr); - else if (right_justify) - (void) printf("%*s", pl->pl_width, propstr); - else - (void) printf("%-*s", pl->pl_width, propstr); - } - - (void) printf("\n"); -} - -/* - * Generic callback function to list a dataset or snapshot. - */ -static int -list_callback(zfs_handle_t *zhp, void *data) -{ - list_cbdata_t *cbp = data; - - if (cbp->cb_first) { - if (!cbp->cb_scripted) - print_header(cbp); - cbp->cb_first = B_FALSE; - } - - print_dataset(zhp, cbp); - - return (0); -} - -static int -zfs_do_list(int argc, char **argv) -{ - int c; - static char default_fields[] = - "name,used,available,referenced,mountpoint"; - int types = ZFS_TYPE_DATASET; - boolean_t types_specified = B_FALSE; - char *fields = NULL; - list_cbdata_t cb = { 0 }; - char *value; - int limit = 0; - int ret = 0; - zfs_sort_column_t *sortcol = NULL; - int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS; - - /* check options */ - while ((c = getopt(argc, argv, "HS:d:o:prs:t:")) != -1) { - switch (c) { - case 'o': - fields = optarg; - break; - case 'p': - cb.cb_literal = B_TRUE; - flags |= ZFS_ITER_LITERAL_PROPS; - break; - case 'd': - limit = parse_depth(optarg, &flags); - break; - case 'r': - flags |= ZFS_ITER_RECURSE; - break; - case 'H': - cb.cb_scripted = B_TRUE; - break; - case 's': - if (zfs_add_sort_column(&sortcol, optarg, - B_FALSE) != 0) { - (void) fprintf(stderr, - gettext("invalid property '%s'\n"), optarg); - usage(B_FALSE); - } - break; - case 'S': - if (zfs_add_sort_column(&sortcol, optarg, - B_TRUE) != 0) { - (void) fprintf(stderr, - gettext("invalid property '%s'\n"), optarg); - usage(B_FALSE); - } - break; - case 't': - types = 0; - types_specified = B_TRUE; - flags &= ~ZFS_ITER_PROP_LISTSNAPS; - while (*optarg != '\0') { - static char *type_subopts[] = { "filesystem", - "volume", "snapshot", "snap", "bookmark", - "all", NULL }; - - switch (getsubopt(&optarg, type_subopts, - &value)) { - case 0: - types |= ZFS_TYPE_FILESYSTEM; - break; - case 1: - types |= ZFS_TYPE_VOLUME; - break; - case 2: - case 3: - types |= ZFS_TYPE_SNAPSHOT; - break; - case 4: - types |= ZFS_TYPE_BOOKMARK; - break; - case 5: - types = ZFS_TYPE_DATASET | - ZFS_TYPE_BOOKMARK; - break; - default: - (void) fprintf(stderr, - gettext("invalid type '%s'\n"), - suboptarg); - usage(B_FALSE); - } - } - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (fields == NULL) - fields = default_fields; - - /* - * If we are only going to list snapshot names and sort by name, - * then we can use faster version. - */ - if (strcmp(fields, "name") == 0 && zfs_sort_only_by_name(sortcol)) - flags |= ZFS_ITER_SIMPLE; - - /* - * If "-o space" and no types were specified, don't display snapshots. - */ - if (strcmp(fields, "space") == 0 && types_specified == B_FALSE) - types &= ~ZFS_TYPE_SNAPSHOT; - - /* - * If the user specifies '-o all', the zprop_get_list() doesn't - * normally include the name of the dataset. For 'zfs list', we always - * want this property to be first. - */ - if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET) - != 0) - usage(B_FALSE); - - cb.cb_first = B_TRUE; - - ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, - limit, list_callback, &cb); - - zprop_free_list(cb.cb_proplist); - zfs_free_sort_columns(sortcol); - - if (ret == 0 && cb.cb_first && !cb.cb_scripted) - (void) printf(gettext("no datasets available\n")); - - return (ret); -} - -/* - * zfs rename [-f] <fs | snap | vol> <fs | snap | vol> - * zfs rename [-f] -p <fs | vol> <fs | vol> - * zfs rename -r <snap> <snap> - * zfs rename <bmark> <bmark> - * zfs rename -u [-p] <fs> <fs> - * - * Renames the given dataset to another of the same type. - * - * The '-p' flag creates all the non-existing ancestors of the target first. - */ -/* ARGSUSED */ -static int -zfs_do_rename(int argc, char **argv) -{ - zfs_handle_t *zhp; - renameflags_t flags = { 0 }; - int c; - int ret = 0; - int types; - boolean_t parents = B_FALSE; - boolean_t bookmarks = B_FALSE; - char *snapshot = NULL; - - /* check options */ - while ((c = getopt(argc, argv, "fpru")) != -1) { - switch (c) { - case 'p': - parents = B_TRUE; - break; - case 'r': - flags.recurse = B_TRUE; - break; - case 'u': - flags.nounmount = B_TRUE; - break; - case 'f': - flags.forceunmount = B_TRUE; - break; - case '?': - default: - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing source dataset " - "argument\n")); - usage(B_FALSE); - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing target dataset " - "argument\n")); - usage(B_FALSE); - } - if (argc > 2) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - if (flags.recurse && parents) { - (void) fprintf(stderr, gettext("-p and -r options are mutually " - "exclusive\n")); - usage(B_FALSE); - } - - if (flags.recurse && strchr(argv[0], '@') == NULL) { - (void) fprintf(stderr, gettext("source dataset for recursive " - "rename must be a snapshot\n")); - usage(B_FALSE); - } - - if (flags.nounmount && parents) { - (void) fprintf(stderr, gettext("-u and -p options are mutually " - "exclusive\n")); - usage(B_FALSE); - } - - if (strchr(argv[0], '#') != NULL) - bookmarks = B_TRUE; - - if (bookmarks && (flags.nounmount || flags.recurse || - flags.forceunmount || parents)) { - (void) fprintf(stderr, gettext("options are not supported " - "for renaming bookmarks\n")); - usage(B_FALSE); - } - - if (flags.nounmount) - types = ZFS_TYPE_FILESYSTEM; - else if (parents) - types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; - else if (bookmarks) - types = ZFS_TYPE_BOOKMARK; - else - types = ZFS_TYPE_DATASET; - - if (flags.recurse) { - /* - * When we do recursive rename we are fine when the given - * snapshot for the given dataset doesn't exist - it can - * still exists below. - */ - - snapshot = strchr(argv[0], '@'); - assert(snapshot != NULL); - *snapshot = '\0'; - snapshot++; - } - - if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) - return (1); - - /* If we were asked and the name looks good, try to create ancestors. */ - if (parents && zfs_name_valid(argv[1], zfs_get_type(zhp)) && - zfs_create_ancestors(g_zfs, argv[1]) != 0) { - zfs_close(zhp); - return (1); - } - - ret = (zfs_rename(zhp, snapshot, argv[1], flags) != 0); - - zfs_close(zhp); - return (ret); -} - -/* - * zfs promote <fs> - * - * Promotes the given clone fs to be the parent - */ -/* ARGSUSED */ -static int -zfs_do_promote(int argc, char **argv) -{ - zfs_handle_t *zhp; - int ret = 0; - - /* check options */ - if (argc > 1 && argv[1][0] == '-') { - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - argv[1][1]); - usage(B_FALSE); - } - - /* check number of arguments */ - if (argc < 2) { - (void) fprintf(stderr, gettext("missing clone filesystem" - " argument\n")); - usage(B_FALSE); - } - if (argc > 2) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - return (1); - - ret = (zfs_promote(zhp) != 0); - - - zfs_close(zhp); - return (ret); -} - -/* - * zfs rollback [-rRf] <snapshot> - * - * -r Delete any intervening snapshots before doing rollback - * -R Delete any snapshots and their clones - * -f ignored for backwards compatability - * - * Given a filesystem, rollback to a specific snapshot, discarding any changes - * since then and making it the active dataset. If more recent snapshots exist, - * the command will complain unless the '-r' flag is given. - */ -typedef struct rollback_cbdata { - uint64_t cb_create; - uint8_t cb_younger_ds_printed; - boolean_t cb_first; - int cb_doclones; - char *cb_target; - int cb_error; - boolean_t cb_recurse; -} rollback_cbdata_t; - -static int -rollback_check_dependent(zfs_handle_t *zhp, void *data) -{ - rollback_cbdata_t *cbp = data; - - if (cbp->cb_first && cbp->cb_recurse) { - (void) fprintf(stderr, gettext("cannot rollback to " - "'%s': clones of previous snapshots exist\n"), - cbp->cb_target); - (void) fprintf(stderr, gettext("use '-R' to " - "force deletion of the following clones and " - "dependents:\n")); - cbp->cb_first = 0; - cbp->cb_error = 1; - } - - (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); - - zfs_close(zhp); - return (0); -} - -/* - * Report some snapshots/bookmarks more recent than the one specified. - * Used when '-r' is not specified. We reuse this same callback for the - * snapshot dependents - if 'cb_dependent' is set, then this is a - * dependent and we should report it without checking the transaction group. - */ -static int -rollback_check(zfs_handle_t *zhp, void *data) -{ - rollback_cbdata_t *cbp = data; - /* - * Max number of younger snapshots and/or bookmarks to display before - * we stop the iteration. - */ - const uint8_t max_younger = 32; - - if (cbp->cb_doclones) { - zfs_close(zhp); - return (0); - } - - if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { - if (cbp->cb_first && !cbp->cb_recurse) { - (void) fprintf(stderr, gettext("cannot " - "rollback to '%s': more recent snapshots " - "or bookmarks exist\n"), - cbp->cb_target); - (void) fprintf(stderr, gettext("use '-r' to " - "force deletion of the following " - "snapshots and bookmarks:\n")); - cbp->cb_first = 0; - cbp->cb_error = 1; - } - - if (cbp->cb_recurse) { - if (zfs_iter_dependents(zhp, B_TRUE, - rollback_check_dependent, cbp) != 0) { - zfs_close(zhp); - return (-1); - } - } else { - (void) fprintf(stderr, "%s\n", - zfs_get_name(zhp)); - cbp->cb_younger_ds_printed++; - } - } - zfs_close(zhp); - - if (cbp->cb_younger_ds_printed == max_younger) { - /* - * This non-recursive rollback is going to fail due to the - * presence of snapshots and/or bookmarks that are younger than - * the rollback target. - * We printed some of the offending objects, now we stop - * zfs_iter_snapshot/bookmark iteration so we can fail fast and - * avoid iterating over the rest of the younger objects - */ - (void) fprintf(stderr, gettext("Output limited to %d " - "snapshots/bookmarks\n"), max_younger); - return (-1); - } - return (0); -} - -static int -zfs_do_rollback(int argc, char **argv) -{ - int ret = 0; - int c; - boolean_t force = B_FALSE; - rollback_cbdata_t cb = { 0 }; - zfs_handle_t *zhp, *snap; - char parentname[ZFS_MAX_DATASET_NAME_LEN]; - char *delim; - uint64_t min_txg = 0; - - /* check options */ - while ((c = getopt(argc, argv, "rRf")) != -1) { - switch (c) { - case 'r': - cb.cb_recurse = 1; - break; - case 'R': - cb.cb_recurse = 1; - cb.cb_doclones = 1; - break; - case 'f': - force = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing dataset argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - /* open the snapshot */ - if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) - return (1); - - /* open the parent dataset */ - (void) strlcpy(parentname, argv[0], sizeof (parentname)); - verify((delim = strrchr(parentname, '@')) != NULL); - *delim = '\0'; - if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_DATASET)) == NULL) { - zfs_close(snap); - return (1); - } - - /* - * Check for more recent snapshots and/or clones based on the presence - * of '-r' and '-R'. - */ - cb.cb_target = argv[0]; - cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); - cb.cb_first = B_TRUE; - cb.cb_error = 0; - - if (cb.cb_create > 0) - min_txg = cb.cb_create; - - if ((ret = zfs_iter_snapshots(zhp, B_FALSE, rollback_check, &cb, - min_txg, 0)) != 0) - goto out; - if ((ret = zfs_iter_bookmarks(zhp, rollback_check, &cb)) != 0) - goto out; - - if ((ret = cb.cb_error) != 0) - goto out; - - /* - * Rollback parent to the given snapshot. - */ - ret = zfs_rollback(zhp, snap, force); - -out: - zfs_close(snap); - zfs_close(zhp); - - if (ret == 0) - return (0); - else - return (1); -} - -/* - * zfs set property=value ... { fs | snap | vol } ... - * - * Sets the given properties for all datasets specified on the command line. - */ - -static int -set_callback(zfs_handle_t *zhp, void *data) -{ - nvlist_t *props = data; - - if (zfs_prop_set_list(zhp, props) != 0) { - switch (libzfs_errno(g_zfs)) { - case EZFS_MOUNTFAILED: - (void) fprintf(stderr, gettext("property may be set " - "but unable to remount filesystem\n")); - break; - case EZFS_SHARENFSFAILED: - (void) fprintf(stderr, gettext("property may be set " - "but unable to reshare filesystem\n")); - break; - } - return (1); - } - return (0); -} - -static int -zfs_do_set(int argc, char **argv) -{ - nvlist_t *props = NULL; - int ds_start = -1; /* argv idx of first dataset arg */ - int ret = 0; - - /* check for options */ - if (argc > 1 && argv[1][0] == '-') { - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - argv[1][1]); - usage(B_FALSE); - } - - /* check number of arguments */ - if (argc < 2) { - (void) fprintf(stderr, gettext("missing arguments\n")); - usage(B_FALSE); - } - if (argc < 3) { - if (strchr(argv[1], '=') == NULL) { - (void) fprintf(stderr, gettext("missing property=value " - "argument(s)\n")); - } else { - (void) fprintf(stderr, gettext("missing dataset " - "name(s)\n")); - } - usage(B_FALSE); - } - - /* validate argument order: prop=val args followed by dataset args */ - for (int i = 1; i < argc; i++) { - if (strchr(argv[i], '=') != NULL) { - if (ds_start > 0) { - /* out-of-order prop=val argument */ - (void) fprintf(stderr, gettext("invalid " - "argument order\n"), i); - usage(B_FALSE); - } - } else if (ds_start < 0) { - ds_start = i; - } - } - if (ds_start < 0) { - (void) fprintf(stderr, gettext("missing dataset name(s)\n")); - usage(B_FALSE); - } - - /* Populate a list of property settings */ - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) - nomem(); - for (int i = 1; i < ds_start; i++) { - if ((ret = parseprop(props, argv[i])) != 0) - goto error; - } - - ret = zfs_for_each(argc - ds_start, argv + ds_start, 0, - ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, props); - -error: - nvlist_free(props); - return (ret); -} - -typedef struct snap_cbdata { - nvlist_t *sd_nvl; - boolean_t sd_recursive; - const char *sd_snapname; -} snap_cbdata_t; - -static int -zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) -{ - snap_cbdata_t *sd = arg; - char *name; - int rv = 0; - int error; - - if (sd->sd_recursive && - zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) != 0) { - zfs_close(zhp); - return (0); - } - - error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname); - if (error == -1) - nomem(); - fnvlist_add_boolean(sd->sd_nvl, name); - free(name); - - if (sd->sd_recursive) - rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); - zfs_close(zhp); - return (rv); -} - -/* - * zfs snapshot [-r] [-o prop=value] ... <fs@snap> - * - * Creates a snapshot with the given name. While functionally equivalent to - * 'zfs create', it is a separate command to differentiate intent. - */ -static int -zfs_do_snapshot(int argc, char **argv) -{ - int ret = 0; - int c; - nvlist_t *props; - snap_cbdata_t sd = { 0 }; - boolean_t multiple_snaps = B_FALSE; - - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) - nomem(); - if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - /* check options */ - while ((c = getopt(argc, argv, "ro:")) != -1) { - switch (c) { - case 'o': - if (parseprop(props, optarg) != 0) - return (1); - break; - case 'r': - sd.sd_recursive = B_TRUE; - multiple_snaps = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - goto usage; - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing snapshot argument\n")); - goto usage; - } - - if (argc > 1) - multiple_snaps = B_TRUE; - for (; argc > 0; argc--, argv++) { - char *atp; - zfs_handle_t *zhp; - - atp = strchr(argv[0], '@'); - if (atp == NULL) - goto usage; - *atp = '\0'; - sd.sd_snapname = atp + 1; - zhp = zfs_open(g_zfs, argv[0], - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - goto usage; - if (zfs_snapshot_cb(zhp, &sd) != 0) - goto usage; - } - - ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props); - nvlist_free(sd.sd_nvl); - nvlist_free(props); - if (ret != 0 && multiple_snaps) - (void) fprintf(stderr, gettext("no snapshots were created\n")); - return (ret != 0); - -usage: - nvlist_free(sd.sd_nvl); - nvlist_free(props); - usage(B_FALSE); - return (-1); -} - -/* - * Send a backup stream to stdout. - */ -static int -zfs_do_send(int argc, char **argv) -{ - char *fromname = NULL; - char *toname = NULL; - char *resume_token = NULL; - char *cp; - zfs_handle_t *zhp; - sendflags_t flags = { 0 }; - int c, err; - nvlist_t *dbgnv = NULL; - boolean_t extraverbose = B_FALSE; - - struct option long_options[] = { - {"replicate", no_argument, NULL, 'R'}, - {"props", no_argument, NULL, 'p'}, - {"parsable", no_argument, NULL, 'P'}, - {"dedup", no_argument, NULL, 'D'}, - {"verbose", no_argument, NULL, 'v'}, - {"dryrun", no_argument, NULL, 'n'}, - {"large-block", no_argument, NULL, 'L'}, - {"embed", no_argument, NULL, 'e'}, - {"resume", required_argument, NULL, 't'}, - {"compressed", no_argument, NULL, 'c'}, - {0, 0, 0, 0} - }; - - /* check options */ - while ((c = getopt_long(argc, argv, ":i:I:RbDpVvnPLet:c", long_options, - NULL)) != -1) { - switch (c) { - case 'i': - if (fromname) - usage(B_FALSE); - fromname = optarg; - break; - case 'I': - if (fromname) - usage(B_FALSE); - fromname = optarg; - flags.doall = B_TRUE; - break; - case 'R': - flags.replicate = B_TRUE; - break; - case 'p': - flags.props = B_TRUE; - break; - case 'P': - flags.parsable = B_TRUE; - flags.verbose = B_TRUE; - break; - case 'V': - flags.progress = B_TRUE; - flags.progressastitle = B_TRUE; - break; - case 'v': - if (flags.verbose) - extraverbose = B_TRUE; - flags.verbose = B_TRUE; - flags.progress = B_TRUE; - break; - case 'D': - flags.dedup = B_TRUE; - break; - case 'n': - flags.dryrun = B_TRUE; - break; - case 'L': - flags.largeblock = B_TRUE; - break; - case 'e': - flags.embed_data = B_TRUE; - break; - case 't': - resume_token = optarg; - break; - case 'c': - flags.compress = B_TRUE; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - /*FALLTHROUGH*/ - default: - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (resume_token != NULL) { - if (fromname != NULL || flags.replicate || flags.props || - flags.dedup) { - (void) fprintf(stderr, - gettext("invalid flags combined with -t\n")); - usage(B_FALSE); - } - if (argc != 0) { - (void) fprintf(stderr, gettext("no additional " - "arguments are permitted with -t\n")); - usage(B_FALSE); - } - } else { - if (argc < 1) { - (void) fprintf(stderr, - gettext("missing snapshot argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - } - - if (!flags.dryrun && isatty(STDOUT_FILENO)) { - (void) fprintf(stderr, - gettext("Error: Stream can not be written to a terminal.\n" - "You must redirect standard output.\n")); - return (1); - } - - if (resume_token != NULL) { - return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO, - resume_token)); - } - - /* - * Special case sending a filesystem, or from a bookmark. - */ - if (strchr(argv[0], '@') == NULL || - (fromname && strchr(fromname, '#') != NULL)) { - char frombuf[ZFS_MAX_DATASET_NAME_LEN]; - - if (flags.replicate || flags.doall || flags.props || - flags.dedup || (strchr(argv[0], '@') == NULL && - (flags.dryrun || flags.verbose || flags.progress))) { - (void) fprintf(stderr, gettext("Error: " - "Unsupported flag with filesystem or bookmark.\n")); - return (1); - } - - zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET); - if (zhp == NULL) - return (1); - - if (fromname != NULL && - (fromname[0] == '#' || fromname[0] == '@')) { - /* - * Incremental source name begins with # or @. - * Default to same fs as target. - */ - (void) strncpy(frombuf, argv[0], sizeof (frombuf)); - cp = strchr(frombuf, '@'); - if (cp != NULL) - *cp = '\0'; - (void) strlcat(frombuf, fromname, sizeof (frombuf)); - fromname = frombuf; - } - err = zfs_send_one(zhp, fromname, STDOUT_FILENO, flags); - zfs_close(zhp); - return (err != 0); - } - - cp = strchr(argv[0], '@'); - *cp = '\0'; - toname = cp + 1; - zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - return (1); - - /* - * If they specified the full path to the snapshot, chop off - * everything except the short name of the snapshot, but special - * case if they specify the origin. - */ - if (fromname && (cp = strchr(fromname, '@')) != NULL) { - char origin[ZFS_MAX_DATASET_NAME_LEN]; - zprop_source_t src; - - (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, - origin, sizeof (origin), &src, NULL, 0, B_FALSE); - - if (strcmp(origin, fromname) == 0) { - fromname = NULL; - flags.fromorigin = B_TRUE; - } else { - *cp = '\0'; - if (cp != fromname && strcmp(argv[0], fromname)) { - (void) fprintf(stderr, - gettext("incremental source must be " - "in same filesystem\n")); - usage(B_FALSE); - } - fromname = cp + 1; - if (strchr(fromname, '@') || strchr(fromname, '/')) { - (void) fprintf(stderr, - gettext("invalid incremental source\n")); - usage(B_FALSE); - } - } - } - - if (flags.replicate && fromname == NULL) - flags.doall = B_TRUE; - - err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0, - extraverbose ? &dbgnv : NULL); - - if (extraverbose && dbgnv != NULL) { - /* - * dump_nvlist prints to stdout, but that's been - * redirected to a file. Make it print to stderr - * instead. - */ - (void) dup2(STDERR_FILENO, STDOUT_FILENO); - dump_nvlist(dbgnv, 0); - nvlist_free(dbgnv); - } - zfs_close(zhp); - - return (err != 0); -} - -/* - * Restore a backup stream from stdin. - */ -static int -zfs_do_receive(int argc, char **argv) -{ - int c, err = 0; - recvflags_t flags = { 0 }; - boolean_t abort_resumable = B_FALSE; - - nvlist_t *props; - nvpair_t *nvp = NULL; - - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - /* check options */ - while ((c = getopt(argc, argv, ":o:denuvMFsA")) != -1) { - switch (c) { - case 'o': - if (parseprop(props, optarg) != 0) - return (1); - break; - case 'd': - flags.isprefix = B_TRUE; - break; - case 'e': - flags.isprefix = B_TRUE; - flags.istail = B_TRUE; - break; - case 'n': - flags.dryrun = B_TRUE; - break; - case 'u': - flags.nomount = B_TRUE; - break; - case 'v': - flags.verbose = B_TRUE; - break; - case 's': - flags.resumable = B_TRUE; - break; - case 'F': - flags.force = B_TRUE; - break; - case 'M': - flags.forceunmount = B_TRUE; - break; - case 'A': - abort_resumable = B_TRUE; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - while ((nvp = nvlist_next_nvpair(props, nvp))) { - if (strcmp(nvpair_name(nvp), "origin") != 0) { - (void) fprintf(stderr, gettext("invalid option")); - usage(B_FALSE); - } - } - - if (abort_resumable) { - if (flags.isprefix || flags.istail || flags.dryrun || - flags.resumable || flags.nomount) { - (void) fprintf(stderr, gettext("invalid option")); - usage(B_FALSE); - } - - char namebuf[ZFS_MAX_DATASET_NAME_LEN]; - (void) snprintf(namebuf, sizeof (namebuf), - "%s/%%recv", argv[0]); - - if (zfs_dataset_exists(g_zfs, namebuf, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) { - zfs_handle_t *zhp = zfs_open(g_zfs, - namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - return (1); - err = zfs_destroy(zhp, B_FALSE); - } else { - zfs_handle_t *zhp = zfs_open(g_zfs, - argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - usage(B_FALSE); - if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) || - zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, - NULL, 0, NULL, NULL, 0, B_TRUE) == -1) { - (void) fprintf(stderr, - gettext("'%s' does not have any " - "resumable receive state to abort\n"), - argv[0]); - return (1); - } - err = zfs_destroy(zhp, B_FALSE); - } - - return (err != 0); - } - - if (isatty(STDIN_FILENO)) { - (void) fprintf(stderr, - gettext("Error: Backup stream can not be read " - "from a terminal.\n" - "You must redirect standard input.\n")); - return (1); - } - err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL); - - return (err != 0); -} - -/* - * allow/unallow stuff - */ -/* copied from zfs/sys/dsl_deleg.h */ -#define ZFS_DELEG_PERM_CREATE "create" -#define ZFS_DELEG_PERM_DESTROY "destroy" -#define ZFS_DELEG_PERM_SNAPSHOT "snapshot" -#define ZFS_DELEG_PERM_ROLLBACK "rollback" -#define ZFS_DELEG_PERM_CLONE "clone" -#define ZFS_DELEG_PERM_PROMOTE "promote" -#define ZFS_DELEG_PERM_RENAME "rename" -#define ZFS_DELEG_PERM_MOUNT "mount" -#define ZFS_DELEG_PERM_SHARE "share" -#define ZFS_DELEG_PERM_SEND "send" -#define ZFS_DELEG_PERM_RECEIVE "receive" -#define ZFS_DELEG_PERM_ALLOW "allow" -#define ZFS_DELEG_PERM_USERPROP "userprop" -#define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */ -#define ZFS_DELEG_PERM_USERQUOTA "userquota" -#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" -#define ZFS_DELEG_PERM_USERUSED "userused" -#define ZFS_DELEG_PERM_GROUPUSED "groupused" -#define ZFS_DELEG_PERM_HOLD "hold" -#define ZFS_DELEG_PERM_RELEASE "release" -#define ZFS_DELEG_PERM_DIFF "diff" -#define ZFS_DELEG_PERM_BOOKMARK "bookmark" -#define ZFS_DELEG_PERM_REMAP "remap" - -#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE - -static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { - { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW }, - { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, - { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, - { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, - { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, - { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, - { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, - { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, - { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, - { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, - { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, - { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, - { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, - { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, - { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, - { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, - { ZFS_DELEG_PERM_REMAP, ZFS_DELEG_NOTE_REMAP }, - - { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, - { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, - { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, - { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, - { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, - { NULL, ZFS_DELEG_NOTE_NONE } -}; - -/* permission structure */ -typedef struct deleg_perm { - zfs_deleg_who_type_t dp_who_type; - const char *dp_name; - boolean_t dp_local; - boolean_t dp_descend; -} deleg_perm_t; - -/* */ -typedef struct deleg_perm_node { - deleg_perm_t dpn_perm; - - uu_avl_node_t dpn_avl_node; -} deleg_perm_node_t; - -typedef struct fs_perm fs_perm_t; - -/* permissions set */ -typedef struct who_perm { - zfs_deleg_who_type_t who_type; - const char *who_name; /* id */ - char who_ug_name[256]; /* user/group name */ - fs_perm_t *who_fsperm; /* uplink */ - - uu_avl_t *who_deleg_perm_avl; /* permissions */ -} who_perm_t; - -/* */ -typedef struct who_perm_node { - who_perm_t who_perm; - uu_avl_node_t who_avl_node; -} who_perm_node_t; - -typedef struct fs_perm_set fs_perm_set_t; -/* fs permissions */ -struct fs_perm { - const char *fsp_name; - - uu_avl_t *fsp_sc_avl; /* sets,create */ - uu_avl_t *fsp_uge_avl; /* user,group,everyone */ - - fs_perm_set_t *fsp_set; /* uplink */ -}; - -/* */ -typedef struct fs_perm_node { - fs_perm_t fspn_fsperm; - uu_avl_t *fspn_avl; - - uu_list_node_t fspn_list_node; -} fs_perm_node_t; - -/* top level structure */ -struct fs_perm_set { - uu_list_pool_t *fsps_list_pool; - uu_list_t *fsps_list; /* list of fs_perms */ - - uu_avl_pool_t *fsps_named_set_avl_pool; - uu_avl_pool_t *fsps_who_perm_avl_pool; - uu_avl_pool_t *fsps_deleg_perm_avl_pool; -}; - -static inline const char * -deleg_perm_type(zfs_deleg_note_t note) -{ - /* subcommands */ - switch (note) { - /* SUBCOMMANDS */ - /* OTHER */ - case ZFS_DELEG_NOTE_GROUPQUOTA: - case ZFS_DELEG_NOTE_GROUPUSED: - case ZFS_DELEG_NOTE_USERPROP: - case ZFS_DELEG_NOTE_USERQUOTA: - case ZFS_DELEG_NOTE_USERUSED: - /* other */ - return (gettext("other")); - default: - return (gettext("subcommand")); - } -} - -static int -who_type2weight(zfs_deleg_who_type_t who_type) -{ - int res; - switch (who_type) { - case ZFS_DELEG_NAMED_SET_SETS: - case ZFS_DELEG_NAMED_SET: - res = 0; - break; - case ZFS_DELEG_CREATE_SETS: - case ZFS_DELEG_CREATE: - res = 1; - break; - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_USER: - res = 2; - break; - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_GROUP: - res = 3; - break; - case ZFS_DELEG_EVERYONE_SETS: - case ZFS_DELEG_EVERYONE: - res = 4; - break; - default: - res = -1; - } - - return (res); -} - -/* ARGSUSED */ -static int -who_perm_compare(const void *larg, const void *rarg, void *unused) -{ - const who_perm_node_t *l = larg; - const who_perm_node_t *r = rarg; - zfs_deleg_who_type_t ltype = l->who_perm.who_type; - zfs_deleg_who_type_t rtype = r->who_perm.who_type; - int lweight = who_type2weight(ltype); - int rweight = who_type2weight(rtype); - int res = lweight - rweight; - if (res == 0) - res = strncmp(l->who_perm.who_name, r->who_perm.who_name, - ZFS_MAX_DELEG_NAME-1); - - if (res == 0) - return (0); - if (res > 0) - return (1); - else - return (-1); -} - -/* ARGSUSED */ -static int -deleg_perm_compare(const void *larg, const void *rarg, void *unused) -{ - const deleg_perm_node_t *l = larg; - const deleg_perm_node_t *r = rarg; - int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name, - ZFS_MAX_DELEG_NAME-1); - - if (res == 0) - return (0); - - if (res > 0) - return (1); - else - return (-1); -} - -static inline void -fs_perm_set_init(fs_perm_set_t *fspset) -{ - bzero(fspset, sizeof (fs_perm_set_t)); - - if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool", - sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node), - NULL, UU_DEFAULT)) == NULL) - nomem(); - if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL, - UU_DEFAULT)) == NULL) - nomem(); - - if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create( - "named_set_avl_pool", sizeof (who_perm_node_t), offsetof( - who_perm_node_t, who_avl_node), who_perm_compare, - UU_DEFAULT)) == NULL) - nomem(); - - if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create( - "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof( - who_perm_node_t, who_avl_node), who_perm_compare, - UU_DEFAULT)) == NULL) - nomem(); - - if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create( - "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof( - deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT)) - == NULL) - nomem(); -} - -static inline void fs_perm_fini(fs_perm_t *); -static inline void who_perm_fini(who_perm_t *); - -static inline void -fs_perm_set_fini(fs_perm_set_t *fspset) -{ - fs_perm_node_t *node = uu_list_first(fspset->fsps_list); - - while (node != NULL) { - fs_perm_node_t *next_node = - uu_list_next(fspset->fsps_list, node); - fs_perm_t *fsperm = &node->fspn_fsperm; - fs_perm_fini(fsperm); - uu_list_remove(fspset->fsps_list, node); - free(node); - node = next_node; - } - - uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool); - uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool); - uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool); -} - -static inline void -deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type, - const char *name) -{ - deleg_perm->dp_who_type = type; - deleg_perm->dp_name = name; -} - -static inline void -who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm, - zfs_deleg_who_type_t type, const char *name) -{ - uu_avl_pool_t *pool; - pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool; - - bzero(who_perm, sizeof (who_perm_t)); - - if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL, - UU_DEFAULT)) == NULL) - nomem(); - - who_perm->who_type = type; - who_perm->who_name = name; - who_perm->who_fsperm = fsperm; -} - -static inline void -who_perm_fini(who_perm_t *who_perm) -{ - deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl); - - while (node != NULL) { - deleg_perm_node_t *next_node = - uu_avl_next(who_perm->who_deleg_perm_avl, node); - - uu_avl_remove(who_perm->who_deleg_perm_avl, node); - free(node); - node = next_node; - } - - uu_avl_destroy(who_perm->who_deleg_perm_avl); -} - -static inline void -fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname) -{ - uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool; - uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool; - - bzero(fsperm, sizeof (fs_perm_t)); - - if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT)) - == NULL) - nomem(); - - if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT)) - == NULL) - nomem(); - - fsperm->fsp_set = fspset; - fsperm->fsp_name = fsname; -} - -static inline void -fs_perm_fini(fs_perm_t *fsperm) -{ - who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl); - while (node != NULL) { - who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl, - node); - who_perm_t *who_perm = &node->who_perm; - who_perm_fini(who_perm); - uu_avl_remove(fsperm->fsp_sc_avl, node); - free(node); - node = next_node; - } - - node = uu_avl_first(fsperm->fsp_uge_avl); - while (node != NULL) { - who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl, - node); - who_perm_t *who_perm = &node->who_perm; - who_perm_fini(who_perm); - uu_avl_remove(fsperm->fsp_uge_avl, node); - free(node); - node = next_node; - } - - uu_avl_destroy(fsperm->fsp_sc_avl); - uu_avl_destroy(fsperm->fsp_uge_avl); -} - -static void -set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node, - zfs_deleg_who_type_t who_type, const char *name, char locality) -{ - uu_avl_index_t idx = 0; - - deleg_perm_node_t *found_node = NULL; - deleg_perm_t *deleg_perm = &node->dpn_perm; - - deleg_perm_init(deleg_perm, who_type, name); - - if ((found_node = uu_avl_find(avl, node, NULL, &idx)) - == NULL) - uu_avl_insert(avl, node, idx); - else { - node = found_node; - deleg_perm = &node->dpn_perm; - } - - - switch (locality) { - case ZFS_DELEG_LOCAL: - deleg_perm->dp_local = B_TRUE; - break; - case ZFS_DELEG_DESCENDENT: - deleg_perm->dp_descend = B_TRUE; - break; - case ZFS_DELEG_NA: - break; - default: - assert(B_FALSE); /* invalid locality */ - } -} - -static inline int -parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality) -{ - nvpair_t *nvp = NULL; - fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set; - uu_avl_t *avl = who_perm->who_deleg_perm_avl; - zfs_deleg_who_type_t who_type = who_perm->who_type; - - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - const char *name = nvpair_name(nvp); - data_type_t type = nvpair_type(nvp); - uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool; - deleg_perm_node_t *node = - safe_malloc(sizeof (deleg_perm_node_t)); - - assert(type == DATA_TYPE_BOOLEAN); - - uu_avl_node_init(node, &node->dpn_avl_node, avl_pool); - set_deleg_perm_node(avl, node, who_type, name, locality); - } - - return (0); -} - -static inline int -parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) -{ - nvpair_t *nvp = NULL; - fs_perm_set_t *fspset = fsperm->fsp_set; - - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - nvlist_t *nvl2 = NULL; - const char *name = nvpair_name(nvp); - uu_avl_t *avl = NULL; - uu_avl_pool_t *avl_pool = NULL; - zfs_deleg_who_type_t perm_type = name[0]; - char perm_locality = name[1]; - const char *perm_name = name + 3; - boolean_t is_set = B_TRUE; - who_perm_t *who_perm = NULL; - - assert('$' == name[2]); - - if (nvpair_value_nvlist(nvp, &nvl2) != 0) - return (-1); - - switch (perm_type) { - case ZFS_DELEG_CREATE: - case ZFS_DELEG_CREATE_SETS: - case ZFS_DELEG_NAMED_SET: - case ZFS_DELEG_NAMED_SET_SETS: - avl_pool = fspset->fsps_named_set_avl_pool; - avl = fsperm->fsp_sc_avl; - break; - case ZFS_DELEG_USER: - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_GROUP: - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_EVERYONE: - case ZFS_DELEG_EVERYONE_SETS: - avl_pool = fspset->fsps_who_perm_avl_pool; - avl = fsperm->fsp_uge_avl; - break; - - default: - assert(!"unhandled zfs_deleg_who_type_t"); - } - - if (is_set) { - who_perm_node_t *found_node = NULL; - who_perm_node_t *node = safe_malloc( - sizeof (who_perm_node_t)); - who_perm = &node->who_perm; - uu_avl_index_t idx = 0; - - uu_avl_node_init(node, &node->who_avl_node, avl_pool); - who_perm_init(who_perm, fsperm, perm_type, perm_name); - - if ((found_node = uu_avl_find(avl, node, NULL, &idx)) - == NULL) { - if (avl == fsperm->fsp_uge_avl) { - uid_t rid = 0; - struct passwd *p = NULL; - struct group *g = NULL; - const char *nice_name = NULL; - - switch (perm_type) { - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_USER: - rid = atoi(perm_name); - p = getpwuid(rid); - if (p) - nice_name = p->pw_name; - break; - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_GROUP: - rid = atoi(perm_name); - g = getgrgid(rid); - if (g) - nice_name = g->gr_name; - break; - - default: - break; - } - - if (nice_name != NULL) - (void) strlcpy( - node->who_perm.who_ug_name, - nice_name, 256); - else { - /* User or group unknown */ - (void) snprintf( - node->who_perm.who_ug_name, - sizeof ( - node->who_perm.who_ug_name), - "(unknown: %d)", rid); - } - } - - uu_avl_insert(avl, node, idx); - } else { - node = found_node; - who_perm = &node->who_perm; - } - } - - (void) parse_who_perm(who_perm, nvl2, perm_locality); - } - - return (0); -} - -static inline int -parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl) -{ - nvpair_t *nvp = NULL; - uu_avl_index_t idx = 0; - - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - nvlist_t *nvl2 = NULL; - const char *fsname = nvpair_name(nvp); - data_type_t type = nvpair_type(nvp); - fs_perm_t *fsperm = NULL; - fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t)); - if (node == NULL) - nomem(); - - fsperm = &node->fspn_fsperm; - - assert(DATA_TYPE_NVLIST == type); - - uu_list_node_init(node, &node->fspn_list_node, - fspset->fsps_list_pool); - - idx = uu_list_numnodes(fspset->fsps_list); - fs_perm_init(fsperm, fspset, fsname); - - if (nvpair_value_nvlist(nvp, &nvl2) != 0) - return (-1); - - (void) parse_fs_perm(fsperm, nvl2); - - uu_list_insert(fspset->fsps_list, node, idx); - } - - return (0); -} - -static inline const char * -deleg_perm_comment(zfs_deleg_note_t note) -{ - const char *str = ""; - - /* subcommands */ - switch (note) { - /* SUBCOMMANDS */ - case ZFS_DELEG_NOTE_ALLOW: - str = gettext("Must also have the permission that is being" - "\n\t\t\t\tallowed"); - break; - case ZFS_DELEG_NOTE_CLONE: - str = gettext("Must also have the 'create' ability and 'mount'" - "\n\t\t\t\tability in the origin file system"); - break; - case ZFS_DELEG_NOTE_CREATE: - str = gettext("Must also have the 'mount' ability"); - break; - case ZFS_DELEG_NOTE_DESTROY: - str = gettext("Must also have the 'mount' ability"); - break; - case ZFS_DELEG_NOTE_DIFF: - str = gettext("Allows lookup of paths within a dataset;" - "\n\t\t\t\tgiven an object number. Ordinary users need this" - "\n\t\t\t\tin order to use zfs diff"); - break; - case ZFS_DELEG_NOTE_HOLD: - str = gettext("Allows adding a user hold to a snapshot"); - break; - case ZFS_DELEG_NOTE_MOUNT: - str = gettext("Allows mount/umount of ZFS datasets"); - break; - case ZFS_DELEG_NOTE_PROMOTE: - str = gettext("Must also have the 'mount'\n\t\t\t\tand" - " 'promote' ability in the origin file system"); - break; - case ZFS_DELEG_NOTE_RECEIVE: - str = gettext("Must also have the 'mount' and 'create'" - " ability"); - break; - case ZFS_DELEG_NOTE_RELEASE: - str = gettext("Allows releasing a user hold which\n\t\t\t\t" - "might destroy the snapshot"); - break; - case ZFS_DELEG_NOTE_RENAME: - str = gettext("Must also have the 'mount' and 'create'" - "\n\t\t\t\tability in the new parent"); - break; - case ZFS_DELEG_NOTE_ROLLBACK: - str = gettext(""); - break; - case ZFS_DELEG_NOTE_SEND: - str = gettext(""); - break; - case ZFS_DELEG_NOTE_SHARE: - str = gettext("Allows sharing file systems over NFS or SMB" - "\n\t\t\t\tprotocols"); - break; - case ZFS_DELEG_NOTE_SNAPSHOT: - str = gettext(""); - break; -/* - * case ZFS_DELEG_NOTE_VSCAN: - * str = gettext(""); - * break; - */ - /* OTHER */ - case ZFS_DELEG_NOTE_GROUPQUOTA: - str = gettext("Allows accessing any groupquota@... property"); - break; - case ZFS_DELEG_NOTE_GROUPUSED: - str = gettext("Allows reading any groupused@... property"); - break; - case ZFS_DELEG_NOTE_USERPROP: - str = gettext("Allows changing any user property"); - break; - case ZFS_DELEG_NOTE_USERQUOTA: - str = gettext("Allows accessing any userquota@... property"); - break; - case ZFS_DELEG_NOTE_USERUSED: - str = gettext("Allows reading any userused@... property"); - break; - /* other */ - default: - str = ""; - } - - return (str); -} - -struct allow_opts { - boolean_t local; - boolean_t descend; - boolean_t user; - boolean_t group; - boolean_t everyone; - boolean_t create; - boolean_t set; - boolean_t recursive; /* unallow only */ - boolean_t prt_usage; - - boolean_t prt_perms; - char *who; - char *perms; - const char *dataset; -}; - -static inline int -prop_cmp(const void *a, const void *b) -{ - const char *str1 = *(const char **)a; - const char *str2 = *(const char **)b; - return (strcmp(str1, str2)); -} - -static void -allow_usage(boolean_t un, boolean_t requested, const char *msg) -{ - const char *opt_desc[] = { - "-h", gettext("show this help message and exit"), - "-l", gettext("set permission locally"), - "-d", gettext("set permission for descents"), - "-u", gettext("set permission for user"), - "-g", gettext("set permission for group"), - "-e", gettext("set permission for everyone"), - "-c", gettext("set create time permission"), - "-s", gettext("define permission set"), - /* unallow only */ - "-r", gettext("remove permissions recursively"), - }; - size_t unallow_size = sizeof (opt_desc) / sizeof (char *); - size_t allow_size = unallow_size - 2; - const char *props[ZFS_NUM_PROPS]; - int i; - size_t count = 0; - FILE *fp = requested ? stdout : stderr; - zprop_desc_t *pdtbl = zfs_prop_get_table(); - const char *fmt = gettext("%-16s %-14s\t%s\n"); - - (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW : - HELP_ALLOW)); - (void) fprintf(fp, gettext("Options:\n")); - for (i = 0; i < (un ? unallow_size : allow_size); i++) { - const char *opt = opt_desc[i++]; - const char *optdsc = opt_desc[i]; - (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc); - } - - (void) fprintf(fp, gettext("\nThe following permissions are " - "supported:\n\n")); - (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"), - gettext("NOTES")); - for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) { - const char *perm_name = zfs_deleg_perm_tbl[i].z_perm; - zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note; - const char *perm_type = deleg_perm_type(perm_note); - const char *perm_comment = deleg_perm_comment(perm_note); - (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment); - } - - for (i = 0; i < ZFS_NUM_PROPS; i++) { - zprop_desc_t *pd = &pdtbl[i]; - if (pd->pd_visible != B_TRUE) - continue; - - if (pd->pd_attr == PROP_READONLY) - continue; - - props[count++] = pd->pd_name; - } - props[count] = NULL; - - qsort(props, count, sizeof (char *), prop_cmp); - - for (i = 0; i < count; i++) - (void) fprintf(fp, fmt, props[i], gettext("property"), ""); - - if (msg != NULL) - (void) fprintf(fp, gettext("\nzfs: error: %s"), msg); - - exit(requested ? 0 : 2); -} - -static inline const char * -munge_args(int argc, char **argv, boolean_t un, size_t expected_argc, - char **permsp) -{ - if (un && argc == expected_argc - 1) - *permsp = NULL; - else if (argc == expected_argc) - *permsp = argv[argc - 2]; - else - allow_usage(un, B_FALSE, - gettext("wrong number of parameters\n")); - - return (argv[argc - 1]); -} - -static void -parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts) -{ - int uge_sum = opts->user + opts->group + opts->everyone; - int csuge_sum = opts->create + opts->set + uge_sum; - int ldcsuge_sum = csuge_sum + opts->local + opts->descend; - int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum; - - if (uge_sum > 1) - allow_usage(un, B_FALSE, - gettext("-u, -g, and -e are mutually exclusive\n")); - - if (opts->prt_usage) { - if (argc == 0 && all_sum == 0) - allow_usage(un, B_TRUE, NULL); - else - usage(B_FALSE); - } - - if (opts->set) { - if (csuge_sum > 1) - allow_usage(un, B_FALSE, - gettext("invalid options combined with -s\n")); - - opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); - if (argv[0][0] != '@') - allow_usage(un, B_FALSE, - gettext("invalid set name: missing '@' prefix\n")); - opts->who = argv[0]; - } else if (opts->create) { - if (ldcsuge_sum > 1) - allow_usage(un, B_FALSE, - gettext("invalid options combined with -c\n")); - opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); - } else if (opts->everyone) { - if (csuge_sum > 1) - allow_usage(un, B_FALSE, - gettext("invalid options combined with -e\n")); - opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); - } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone") - == 0) { - opts->everyone = B_TRUE; - argc--; - argv++; - opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); - } else if (argc == 1 && !un) { - opts->prt_perms = B_TRUE; - opts->dataset = argv[argc-1]; - } else { - opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); - opts->who = argv[0]; - } - - if (!opts->local && !opts->descend) { - opts->local = B_TRUE; - opts->descend = B_TRUE; - } -} - -static void -store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend, - const char *who, char *perms, nvlist_t *top_nvl) -{ - int i; - char ld[2] = { '\0', '\0' }; - char who_buf[MAXNAMELEN + 32]; - char base_type = '\0'; - char set_type = '\0'; - nvlist_t *base_nvl = NULL; - nvlist_t *set_nvl = NULL; - nvlist_t *nvl; - - if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0) - nomem(); - if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - switch (type) { - case ZFS_DELEG_NAMED_SET_SETS: - case ZFS_DELEG_NAMED_SET: - set_type = ZFS_DELEG_NAMED_SET_SETS; - base_type = ZFS_DELEG_NAMED_SET; - ld[0] = ZFS_DELEG_NA; - break; - case ZFS_DELEG_CREATE_SETS: - case ZFS_DELEG_CREATE: - set_type = ZFS_DELEG_CREATE_SETS; - base_type = ZFS_DELEG_CREATE; - ld[0] = ZFS_DELEG_NA; - break; - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_USER: - set_type = ZFS_DELEG_USER_SETS; - base_type = ZFS_DELEG_USER; - if (local) - ld[0] = ZFS_DELEG_LOCAL; - if (descend) - ld[1] = ZFS_DELEG_DESCENDENT; - break; - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_GROUP: - set_type = ZFS_DELEG_GROUP_SETS; - base_type = ZFS_DELEG_GROUP; - if (local) - ld[0] = ZFS_DELEG_LOCAL; - if (descend) - ld[1] = ZFS_DELEG_DESCENDENT; - break; - case ZFS_DELEG_EVERYONE_SETS: - case ZFS_DELEG_EVERYONE: - set_type = ZFS_DELEG_EVERYONE_SETS; - base_type = ZFS_DELEG_EVERYONE; - if (local) - ld[0] = ZFS_DELEG_LOCAL; - if (descend) - ld[1] = ZFS_DELEG_DESCENDENT; - break; - - default: - assert(set_type != '\0' && base_type != '\0'); - } - - if (perms != NULL) { - char *curr = perms; - char *end = curr + strlen(perms); - - while (curr < end) { - char *delim = strchr(curr, ','); - if (delim == NULL) - delim = end; - else - *delim = '\0'; - - if (curr[0] == '@') - nvl = set_nvl; - else - nvl = base_nvl; - - (void) nvlist_add_boolean(nvl, curr); - if (delim != end) - *delim = ','; - curr = delim + 1; - } - - for (i = 0; i < 2; i++) { - char locality = ld[i]; - if (locality == 0) - continue; - - if (!nvlist_empty(base_nvl)) { - if (who != NULL) - (void) snprintf(who_buf, - sizeof (who_buf), "%c%c$%s", - base_type, locality, who); - else - (void) snprintf(who_buf, - sizeof (who_buf), "%c%c$", - base_type, locality); - - (void) nvlist_add_nvlist(top_nvl, who_buf, - base_nvl); - } - - - if (!nvlist_empty(set_nvl)) { - if (who != NULL) - (void) snprintf(who_buf, - sizeof (who_buf), "%c%c$%s", - set_type, locality, who); - else - (void) snprintf(who_buf, - sizeof (who_buf), "%c%c$", - set_type, locality); - - (void) nvlist_add_nvlist(top_nvl, who_buf, - set_nvl); - } - } - } else { - for (i = 0; i < 2; i++) { - char locality = ld[i]; - if (locality == 0) - continue; - - if (who != NULL) - (void) snprintf(who_buf, sizeof (who_buf), - "%c%c$%s", base_type, locality, who); - else - (void) snprintf(who_buf, sizeof (who_buf), - "%c%c$", base_type, locality); - (void) nvlist_add_boolean(top_nvl, who_buf); - - if (who != NULL) - (void) snprintf(who_buf, sizeof (who_buf), - "%c%c$%s", set_type, locality, who); - else - (void) snprintf(who_buf, sizeof (who_buf), - "%c%c$", set_type, locality); - (void) nvlist_add_boolean(top_nvl, who_buf); - } - } -} - -static int -construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp) -{ - if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - if (opts->set) { - store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local, - opts->descend, opts->who, opts->perms, *nvlp); - } else if (opts->create) { - store_allow_perm(ZFS_DELEG_CREATE, opts->local, - opts->descend, NULL, opts->perms, *nvlp); - } else if (opts->everyone) { - store_allow_perm(ZFS_DELEG_EVERYONE, opts->local, - opts->descend, NULL, opts->perms, *nvlp); - } else { - char *curr = opts->who; - char *end = curr + strlen(curr); - - while (curr < end) { - const char *who; - zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN; - char *endch; - char *delim = strchr(curr, ','); - char errbuf[256]; - char id[64]; - struct passwd *p = NULL; - struct group *g = NULL; - - uid_t rid; - if (delim == NULL) - delim = end; - else - *delim = '\0'; - - rid = (uid_t)strtol(curr, &endch, 0); - if (opts->user) { - who_type = ZFS_DELEG_USER; - if (*endch != '\0') - p = getpwnam(curr); - else - p = getpwuid(rid); - - if (p != NULL) - rid = p->pw_uid; - else if (*endch != '\0') { - (void) snprintf(errbuf, 256, gettext( - "invalid user %s\n"), curr); - allow_usage(un, B_TRUE, errbuf); - } - } else if (opts->group) { - who_type = ZFS_DELEG_GROUP; - if (*endch != '\0') - g = getgrnam(curr); - else - g = getgrgid(rid); - - if (g != NULL) - rid = g->gr_gid; - else if (*endch != '\0') { - (void) snprintf(errbuf, 256, gettext( - "invalid group %s\n"), curr); - allow_usage(un, B_TRUE, errbuf); - } - } else { - if (*endch != '\0') { - p = getpwnam(curr); - } else { - p = getpwuid(rid); - } - - if (p == NULL) { - if (*endch != '\0') { - g = getgrnam(curr); - } else { - g = getgrgid(rid); - } - } - - if (p != NULL) { - who_type = ZFS_DELEG_USER; - rid = p->pw_uid; - } else if (g != NULL) { - who_type = ZFS_DELEG_GROUP; - rid = g->gr_gid; - } else { - (void) snprintf(errbuf, 256, gettext( - "invalid user/group %s\n"), curr); - allow_usage(un, B_TRUE, errbuf); - } - } - - (void) sprintf(id, "%u", rid); - who = id; - - store_allow_perm(who_type, opts->local, - opts->descend, who, opts->perms, *nvlp); - curr = delim + 1; - } - } - - return (0); -} - -static void -print_set_creat_perms(uu_avl_t *who_avl) -{ - const char *sc_title[] = { - gettext("Permission sets:\n"), - gettext("Create time permissions:\n"), - NULL - }; - const char **title_ptr = sc_title; - who_perm_node_t *who_node = NULL; - int prev_weight = -1; - - for (who_node = uu_avl_first(who_avl); who_node != NULL; - who_node = uu_avl_next(who_avl, who_node)) { - uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; - zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; - const char *who_name = who_node->who_perm.who_name; - int weight = who_type2weight(who_type); - boolean_t first = B_TRUE; - deleg_perm_node_t *deleg_node; - - if (prev_weight != weight) { - (void) printf(*title_ptr++); - prev_weight = weight; - } - - if (who_name == NULL || strnlen(who_name, 1) == 0) - (void) printf("\t"); - else - (void) printf("\t%s ", who_name); - - for (deleg_node = uu_avl_first(avl); deleg_node != NULL; - deleg_node = uu_avl_next(avl, deleg_node)) { - if (first) { - (void) printf("%s", - deleg_node->dpn_perm.dp_name); - first = B_FALSE; - } else - (void) printf(",%s", - deleg_node->dpn_perm.dp_name); - } - - (void) printf("\n"); - } -} - -static void -print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend, - const char *title) -{ - who_perm_node_t *who_node = NULL; - boolean_t prt_title = B_TRUE; - uu_avl_walk_t *walk; - - if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL) - nomem(); - - while ((who_node = uu_avl_walk_next(walk)) != NULL) { - const char *who_name = who_node->who_perm.who_name; - const char *nice_who_name = who_node->who_perm.who_ug_name; - uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; - zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; - char delim = ' '; - deleg_perm_node_t *deleg_node; - boolean_t prt_who = B_TRUE; - - for (deleg_node = uu_avl_first(avl); - deleg_node != NULL; - deleg_node = uu_avl_next(avl, deleg_node)) { - if (local != deleg_node->dpn_perm.dp_local || - descend != deleg_node->dpn_perm.dp_descend) - continue; - - if (prt_who) { - const char *who = NULL; - if (prt_title) { - prt_title = B_FALSE; - (void) printf(title); - } - - switch (who_type) { - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_USER: - who = gettext("user"); - if (nice_who_name) - who_name = nice_who_name; - break; - case ZFS_DELEG_GROUP_SETS: - case ZFS_DELEG_GROUP: - who = gettext("group"); - if (nice_who_name) - who_name = nice_who_name; - break; - case ZFS_DELEG_EVERYONE_SETS: - case ZFS_DELEG_EVERYONE: - who = gettext("everyone"); - who_name = NULL; - break; - - default: - assert(who != NULL); - } - - prt_who = B_FALSE; - if (who_name == NULL) - (void) printf("\t%s", who); - else - (void) printf("\t%s %s", who, who_name); - } - - (void) printf("%c%s", delim, - deleg_node->dpn_perm.dp_name); - delim = ','; - } - - if (!prt_who) - (void) printf("\n"); - } - - uu_avl_walk_end(walk); -} - -static void -print_fs_perms(fs_perm_set_t *fspset) -{ - fs_perm_node_t *node = NULL; - char buf[MAXNAMELEN + 32]; - const char *dsname = buf; - - for (node = uu_list_first(fspset->fsps_list); node != NULL; - node = uu_list_next(fspset->fsps_list, node)) { - uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl; - uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl; - int left = 0; - - (void) snprintf(buf, sizeof (buf), - gettext("---- Permissions on %s "), - node->fspn_fsperm.fsp_name); - (void) printf(dsname); - left = 70 - strlen(buf); - while (left-- > 0) - (void) printf("-"); - (void) printf("\n"); - - print_set_creat_perms(sc_avl); - print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE, - gettext("Local permissions:\n")); - print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE, - gettext("Descendent permissions:\n")); - print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE, - gettext("Local+Descendent permissions:\n")); - } -} - -static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL }; - -struct deleg_perms { - boolean_t un; - nvlist_t *nvl; -}; - -static int -set_deleg_perms(zfs_handle_t *zhp, void *data) -{ - struct deleg_perms *perms = (struct deleg_perms *)data; - zfs_type_t zfs_type = zfs_get_type(zhp); - - if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME) - return (0); - - return (zfs_set_fsacl(zhp, perms->un, perms->nvl)); -} - -static int -zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) -{ - zfs_handle_t *zhp; - nvlist_t *perm_nvl = NULL; - nvlist_t *update_perm_nvl = NULL; - int error = 1; - int c; - struct allow_opts opts = { 0 }; - - const char *optstr = un ? "ldugecsrh" : "ldugecsh"; - - /* check opts */ - while ((c = getopt(argc, argv, optstr)) != -1) { - switch (c) { - case 'l': - opts.local = B_TRUE; - break; - case 'd': - opts.descend = B_TRUE; - break; - case 'u': - opts.user = B_TRUE; - break; - case 'g': - opts.group = B_TRUE; - break; - case 'e': - opts.everyone = B_TRUE; - break; - case 's': - opts.set = B_TRUE; - break; - case 'c': - opts.create = B_TRUE; - break; - case 'r': - opts.recursive = B_TRUE; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case 'h': - opts.prt_usage = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check arguments */ - parse_allow_args(argc, argv, un, &opts); - - /* try to open the dataset */ - if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME)) == NULL) { - (void) fprintf(stderr, "Failed to open dataset: %s\n", - opts.dataset); - return (-1); - } - - if (zfs_get_fsacl(zhp, &perm_nvl) != 0) - goto cleanup2; - - fs_perm_set_init(&fs_perm_set); - if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) { - (void) fprintf(stderr, "Failed to parse fsacl permissions\n"); - goto cleanup1; - } - - if (opts.prt_perms) - print_fs_perms(&fs_perm_set); - else { - (void) construct_fsacl_list(un, &opts, &update_perm_nvl); - if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0) - goto cleanup0; - - if (un && opts.recursive) { - struct deleg_perms data = { un, update_perm_nvl }; - if (zfs_iter_filesystems(zhp, set_deleg_perms, - &data) != 0) - goto cleanup0; - } - } - - error = 0; - -cleanup0: - nvlist_free(perm_nvl); - nvlist_free(update_perm_nvl); -cleanup1: - fs_perm_set_fini(&fs_perm_set); -cleanup2: - zfs_close(zhp); - - return (error); -} - -static int -zfs_do_allow(int argc, char **argv) -{ - return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); -} - -static int -zfs_do_unallow(int argc, char **argv) -{ - return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE)); -} - -static int -zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) -{ - int errors = 0; - int i; - const char *tag; - boolean_t recursive = B_FALSE; - const char *opts = holding ? "rt" : "r"; - int c; - - /* check options */ - while ((c = getopt(argc, argv, opts)) != -1) { - switch (c) { - case 'r': - recursive = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 2) - usage(B_FALSE); - - tag = argv[0]; - --argc; - ++argv; - - if (holding && tag[0] == '.') { - /* tags starting with '.' are reserved for libzfs */ - (void) fprintf(stderr, gettext("tag may not start with '.'\n")); - usage(B_FALSE); - } - - for (i = 0; i < argc; ++i) { - zfs_handle_t *zhp; - char parent[ZFS_MAX_DATASET_NAME_LEN]; - const char *delim; - char *path = argv[i]; - - delim = strchr(path, '@'); - if (delim == NULL) { - (void) fprintf(stderr, - gettext("'%s' is not a snapshot\n"), path); - ++errors; - continue; - } - (void) strncpy(parent, path, delim - path); - parent[delim - path] = '\0'; - - zhp = zfs_open(g_zfs, parent, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) { - ++errors; - continue; - } - if (holding) { - if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0) - ++errors; - } else { - if (zfs_release(zhp, delim+1, tag, recursive) != 0) - ++errors; - } - zfs_close(zhp); - } - - return (errors != 0); -} - -/* - * zfs hold [-r] [-t] <tag> <snap> ... - * - * -r Recursively hold - * - * Apply a user-hold with the given tag to the list of snapshots. - */ -static int -zfs_do_hold(int argc, char **argv) -{ - return (zfs_do_hold_rele_impl(argc, argv, B_TRUE)); -} - -/* - * zfs release [-r] <tag> <snap> ... - * - * -r Recursively release - * - * Release a user-hold with the given tag from the list of snapshots. - */ -static int -zfs_do_release(int argc, char **argv) -{ - return (zfs_do_hold_rele_impl(argc, argv, B_FALSE)); -} - -typedef struct holds_cbdata { - boolean_t cb_recursive; - const char *cb_snapname; - nvlist_t **cb_nvlp; - size_t cb_max_namelen; - size_t cb_max_taglen; -} holds_cbdata_t; - -#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y" -#define DATETIME_BUF_LEN (32) -/* - * - */ -static void -print_holds(boolean_t scripted, boolean_t literal, size_t nwidth, - size_t tagwidth, nvlist_t *nvl) -{ - int i; - nvpair_t *nvp = NULL; - char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" }; - const char *col; - - if (!scripted) { - for (i = 0; i < 3; i++) { - col = gettext(hdr_cols[i]); - if (i < 2) - (void) printf("%-*s ", i ? tagwidth : nwidth, - col); - else - (void) printf("%s\n", col); - } - } - - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - char *zname = nvpair_name(nvp); - nvlist_t *nvl2; - nvpair_t *nvp2 = NULL; - (void) nvpair_value_nvlist(nvp, &nvl2); - while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) { - char tsbuf[DATETIME_BUF_LEN]; - char *tagname = nvpair_name(nvp2); - uint64_t val = 0; - time_t time; - struct tm t; - - (void) nvpair_value_uint64(nvp2, &val); - if (literal) - snprintf(tsbuf, DATETIME_BUF_LEN, "%llu", val); - else { - time = (time_t)val; - (void) localtime_r(&time, &t); - (void) strftime(tsbuf, DATETIME_BUF_LEN, - gettext(STRFTIME_FMT_STR), &t); - } - - if (scripted) { - (void) printf("%s\t%s\t%s\n", zname, - tagname, tsbuf); - } else { - (void) printf("%-*s %-*s %s\n", nwidth, - zname, tagwidth, tagname, tsbuf); - } - } - } -} - -/* - * Generic callback function to list a dataset or snapshot. - */ -static int -holds_callback(zfs_handle_t *zhp, void *data) -{ - holds_cbdata_t *cbp = data; - nvlist_t *top_nvl = *cbp->cb_nvlp; - nvlist_t *nvl = NULL; - nvpair_t *nvp = NULL; - const char *zname = zfs_get_name(zhp); - size_t znamelen = strlen(zname); - - if (cbp->cb_recursive && cbp->cb_snapname != NULL) { - const char *snapname; - char *delim = strchr(zname, '@'); - if (delim == NULL) - return (0); - - snapname = delim + 1; - if (strcmp(cbp->cb_snapname, snapname)) - return (0); - } - - if (zfs_get_holds(zhp, &nvl) != 0) - return (-1); - - if (znamelen > cbp->cb_max_namelen) - cbp->cb_max_namelen = znamelen; - - while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { - const char *tag = nvpair_name(nvp); - size_t taglen = strlen(tag); - if (taglen > cbp->cb_max_taglen) - cbp->cb_max_taglen = taglen; - } - - return (nvlist_add_nvlist(top_nvl, zname, nvl)); -} - -/* - * zfs holds [-Hp] [-r | -d max] <dataset|snap> ... - * - * -H Suppress header output - * -p Output literal values - * -r Recursively search for holds - * -d max Limit depth of recursive search - */ -static int -zfs_do_holds(int argc, char **argv) -{ - int errors = 0; - int c; - int i; - boolean_t scripted = B_FALSE; - boolean_t literal = B_FALSE; - boolean_t recursive = B_FALSE; - const char *opts = "d:rHp"; - nvlist_t *nvl; - - int types = ZFS_TYPE_SNAPSHOT; - holds_cbdata_t cb = { 0 }; - - int limit = 0; - int ret = 0; - int flags = 0; - - /* check options */ - while ((c = getopt(argc, argv, opts)) != -1) { - switch (c) { - case 'd': - limit = parse_depth(optarg, &flags); - recursive = B_TRUE; - break; - case 'r': - recursive = B_TRUE; - break; - case 'H': - scripted = B_TRUE; - break; - case 'p': - literal = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - if (recursive) { - types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; - flags |= ZFS_ITER_RECURSE; - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) - usage(B_FALSE); - - if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) - nomem(); - - for (i = 0; i < argc; ++i) { - char *snapshot = argv[i]; - const char *delim; - const char *snapname = NULL; - - delim = strchr(snapshot, '@'); - if (delim != NULL) { - snapname = delim + 1; - if (recursive) - snapshot[delim - snapshot] = '\0'; - } - - cb.cb_recursive = recursive; - cb.cb_snapname = snapname; - cb.cb_nvlp = &nvl; - - /* - * 1. collect holds data, set format options - */ - ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit, - holds_callback, &cb); - if (ret != 0) - ++errors; - } - - /* - * 2. print holds data - */ - print_holds(scripted, literal, cb.cb_max_namelen, cb.cb_max_taglen, - nvl); - - if (nvlist_empty(nvl)) - (void) printf(gettext("no datasets available\n")); - - nvlist_free(nvl); - - return (0 != errors); -} - -#define CHECK_SPINNER 30 -#define SPINNER_TIME 3 /* seconds */ -#define MOUNT_TIME 1 /* seconds */ - -typedef struct get_all_state { - boolean_t ga_verbose; - get_all_cb_t *ga_cbp; -} get_all_state_t; - -static int -get_one_dataset(zfs_handle_t *zhp, void *data) -{ - static char *spin[] = { "-", "\\", "|", "/" }; - static int spinval = 0; - static int spincheck = 0; - static time_t last_spin_time = (time_t)0; - get_all_state_t *state = data; - zfs_type_t type = zfs_get_type(zhp); - - if (state->ga_verbose) { - if (--spincheck < 0) { - time_t now = time(NULL); - if (last_spin_time + SPINNER_TIME < now) { - update_progress(spin[spinval++ % 4]); - last_spin_time = now; - } - spincheck = CHECK_SPINNER; - } - } - - /* - * Interate over any nested datasets. - */ - if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) { - zfs_close(zhp); - return (1); - } - - /* - * Skip any datasets whose type does not match. - */ - if ((type & ZFS_TYPE_FILESYSTEM) == 0) { - zfs_close(zhp); - return (0); - } - libzfs_add_handle(state->ga_cbp, zhp); - assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc); - - return (0); -} - -static void -get_all_datasets(get_all_cb_t *cbp, boolean_t verbose) -{ - get_all_state_t state = { - .ga_verbose = verbose, - .ga_cbp = cbp - }; - - if (verbose) - set_progress_header(gettext("Reading ZFS config")); - (void) zfs_iter_root(g_zfs, get_one_dataset, &state); - - if (verbose) - finish_progress(gettext("done.")); -} - -/* - * Generic callback for sharing or mounting filesystems. Because the code is so - * similar, we have a common function with an extra parameter to determine which - * mode we are using. - */ -typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t; - -typedef struct share_mount_state { - share_mount_op_t sm_op; - boolean_t sm_verbose; - int sm_flags; - char *sm_options; - char *sm_proto; /* only valid for OP_SHARE */ - pthread_mutex_t sm_lock; /* protects the remaining fields */ - uint_t sm_total; /* number of filesystems to process */ - uint_t sm_done; /* number of filesystems processed */ - int sm_status; /* -1 if any of the share/mount operations failed */ -} share_mount_state_t; - -/* - * Share or mount a dataset. - */ -static int -share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, - boolean_t explicit, const char *options) -{ - char mountpoint[ZFS_MAXPROPLEN]; - char shareopts[ZFS_MAXPROPLEN]; - char smbshareopts[ZFS_MAXPROPLEN]; - const char *cmdname = op == OP_SHARE ? "share" : "mount"; - struct mnttab mnt; - uint64_t zoned, canmount; - boolean_t shared_nfs, shared_smb; - - assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM); - - /* - * Check to make sure we can mount/share this dataset. If we - * are in the global zone and the filesystem is exported to a - * local zone, or if we are in a local zone and the - * filesystem is not exported, then it is an error. - */ - zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); - - if (zoned && getzoneid() == GLOBAL_ZONEID) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "dataset is exported to a local zone\n"), cmdname, - zfs_get_name(zhp)); - return (1); - - } else if (!zoned && getzoneid() != GLOBAL_ZONEID) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "permission denied\n"), cmdname, - zfs_get_name(zhp)); - return (1); - } - - /* - * Ignore any filesystems which don't apply to us. This - * includes those with a legacy mountpoint, or those with - * legacy share options. - */ - verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, - sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts, - sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0); - - if (op == OP_SHARE && strcmp(shareopts, "off") == 0 && - strcmp(smbshareopts, "off") == 0) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot share '%s': " - "legacy share\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("to " - "share this filesystem set " - "sharenfs property on\n")); - return (1); - } - - /* - * We cannot share or mount legacy filesystems. If the - * shareopts is non-legacy but the mountpoint is legacy, we - * treat it as a legacy share. - */ - if (strcmp(mountpoint, "legacy") == 0) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "legacy mountpoint\n"), cmdname, zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use %s(8) to " - "%s this filesystem\n"), cmdname, cmdname); - return (1); - } - - if (strcmp(mountpoint, "none") == 0) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': no " - "mountpoint set\n"), cmdname, zfs_get_name(zhp)); - return (1); - } - - /* - * canmount explicit outcome - * on no pass through - * on yes pass through - * off no return 0 - * off yes display error, return 1 - * noauto no return 0 - * noauto yes pass through - */ - canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT); - if (canmount == ZFS_CANMOUNT_OFF) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "'canmount' property is set to 'off'\n"), cmdname, - zfs_get_name(zhp)); - return (1); - } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) { - return (0); - } - - /* - * If this filesystem is inconsistent and has a receive resume - * token, we can not mount it. - */ - if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && - zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, - NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "Contains partially-completed state from " - "\"zfs receive -r\", which can be resumed with " - "\"zfs send -t\"\n"), - cmdname, zfs_get_name(zhp)); - return (1); - } - - /* - * At this point, we have verified that the mountpoint and/or - * shareopts are appropriate for auto management. If the - * filesystem is already mounted or shared, return (failing - * for explicit requests); otherwise mount or share the - * filesystem. - */ - switch (op) { - case OP_SHARE: - - shared_nfs = zfs_is_shared_nfs(zhp, NULL); - shared_smb = zfs_is_shared_smb(zhp, NULL); - - if ((shared_nfs && shared_smb) || - (shared_nfs && strcmp(shareopts, "on") == 0 && - strcmp(smbshareopts, "off") == 0) || - (shared_smb && strcmp(smbshareopts, "on") == 0 && - strcmp(shareopts, "off") == 0)) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot share " - "'%s': filesystem already shared\n"), - zfs_get_name(zhp)); - return (1); - } - - if (!zfs_is_mounted(zhp, NULL) && - zfs_mount(zhp, NULL, 0) != 0) - return (1); - - if (protocol == NULL) { - if (zfs_shareall(zhp) != 0) - return (1); - } else if (strcmp(protocol, "nfs") == 0) { - if (zfs_share_nfs(zhp)) - return (1); - } else if (strcmp(protocol, "smb") == 0) { - if (zfs_share_smb(zhp)) - return (1); - } else { - (void) fprintf(stderr, gettext("cannot share " - "'%s': invalid share type '%s' " - "specified\n"), - zfs_get_name(zhp), protocol); - return (1); - } - - break; - - case OP_MOUNT: - if (options == NULL) - mnt.mnt_mntopts = ""; - else - mnt.mnt_mntopts = (char *)options; - - if (!hasmntopt(&mnt, MNTOPT_REMOUNT) && - zfs_is_mounted(zhp, NULL)) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot mount " - "'%s': filesystem already mounted\n"), - zfs_get_name(zhp)); - return (1); - } - - if (zfs_mount(zhp, options, flags) != 0) - return (1); - break; - } - - return (0); -} - -/* - * Reports progress in the form "(current/total)". Not thread-safe. - */ -static void -report_mount_progress(int current, int total) -{ - static time_t last_progress_time = 0; - time_t now = time(NULL); - char info[32]; - - /* display header if we're here for the first time */ - if (current == 1) { - set_progress_header(gettext("Mounting ZFS filesystems")); - } else if (current != total && last_progress_time + MOUNT_TIME >= now) { - /* too soon to report again */ - return; - } - - last_progress_time = now; - - (void) sprintf(info, "(%d/%d)", current, total); - - if (current == total) - finish_progress(info); - else - update_progress(info); -} - -/* - * zfs_foreach_mountpoint() callback that mounts or shares on filesystem and - * updates the progress meter - */ -static int -share_mount_one_cb(zfs_handle_t *zhp, void *arg) -{ - share_mount_state_t *sms = arg; - int ret; - - ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto, - B_FALSE, sms->sm_options); - - pthread_mutex_lock(&sms->sm_lock); - if (ret != 0) - sms->sm_status = ret; - sms->sm_done++; - if (sms->sm_verbose) - report_mount_progress(sms->sm_done, sms->sm_total); - pthread_mutex_unlock(&sms->sm_lock); - return (ret); -} - -static void -append_options(char *mntopts, char *newopts) -{ - int len = strlen(mntopts); - - /* original length plus new string to append plus 1 for the comma */ - if (len + 1 + strlen(newopts) >= MNT_LINE_MAX) { - (void) fprintf(stderr, gettext("the opts argument for " - "'%c' option is too long (more than %d chars)\n"), - "-o", MNT_LINE_MAX); - usage(B_FALSE); - } - - if (*mntopts) - mntopts[len++] = ','; - - (void) strcpy(&mntopts[len], newopts); -} - -static int -share_mount(int op, int argc, char **argv) -{ - int do_all = 0; - boolean_t verbose = B_FALSE; - int c, ret = 0; - char *options = NULL; - int flags = 0; - - /* check options */ - while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) - != -1) { - switch (c) { - case 'a': - do_all = 1; - break; - case 'v': - verbose = B_TRUE; - break; - case 'o': - if (*optarg == '\0') { - (void) fprintf(stderr, gettext("empty mount " - "options (-o) specified\n")); - usage(B_FALSE); - } - - if (options == NULL) - options = safe_malloc(MNT_LINE_MAX + 1); - - /* option validation is done later */ - append_options(options, optarg); - break; - - case 'O': - warnx("no overlay mounts support on FreeBSD, ignoring"); - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (do_all) { - char *protocol = NULL; - - if (op == OP_SHARE && argc > 0) { - if (strcmp(argv[0], "nfs") != 0 && - strcmp(argv[0], "smb") != 0) { - (void) fprintf(stderr, gettext("share type " - "must be 'nfs' or 'smb'\n")); - usage(B_FALSE); - } - protocol = argv[0]; - argc--; - argv++; - } - - if (argc != 0) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - start_progress_timer(); - get_all_cb_t cb = { 0 }; - get_all_datasets(&cb, verbose); - - if (cb.cb_used == 0) { - if (options != NULL) - free(options); - return (0); - } - -#ifdef illumos - if (op == OP_SHARE) { - sa_init_selective_arg_t sharearg; - sharearg.zhandle_arr = cb.cb_handles; - sharearg.zhandle_len = cb.cb_used; - if ((ret = zfs_init_libshare_arg(g_zfs, - SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != SA_OK) { - (void) fprintf(stderr, gettext( - "Could not initialize libshare, %d"), ret); - return (ret); - } - } -#endif - share_mount_state_t share_mount_state = { 0 }; - share_mount_state.sm_op = op; - share_mount_state.sm_verbose = verbose; - share_mount_state.sm_flags = flags; - share_mount_state.sm_options = options; - share_mount_state.sm_proto = protocol; - share_mount_state.sm_total = cb.cb_used; - pthread_mutex_init(&share_mount_state.sm_lock, NULL); - - /* - * libshare isn't mt-safe, so only do the operation in parallel - * if we're mounting. - */ - zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, - share_mount_one_cb, &share_mount_state, op == OP_MOUNT); - ret = share_mount_state.sm_status; - - for (int i = 0; i < cb.cb_used; i++) - zfs_close(cb.cb_handles[i]); - free(cb.cb_handles); - } else if (argc == 0) { - struct mnttab entry; - - if ((op == OP_SHARE) || (options != NULL)) { - (void) fprintf(stderr, gettext("missing filesystem " - "argument (specify -a for all)\n")); - usage(B_FALSE); - } - - /* - * When mount is given no arguments, go through /etc/mnttab and - * display any active ZFS mounts. We hide any snapshots, since - * they are controlled automatically. - */ - rewind(mnttab_file); - while (getmntent(mnttab_file, &entry) == 0) { - if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 || - strchr(entry.mnt_special, '@') != NULL) - continue; - - (void) printf("%-30s %s\n", entry.mnt_special, - entry.mnt_mountp); - } - - } else { - zfs_handle_t *zhp; - - if (argc > 1) { - (void) fprintf(stderr, - gettext("too many arguments\n")); - usage(B_FALSE); - } - - if ((zhp = zfs_open(g_zfs, argv[0], - ZFS_TYPE_FILESYSTEM)) == NULL) { - ret = 1; - } else { - ret = share_mount_one(zhp, op, flags, NULL, B_TRUE, - options); - zfs_close(zhp); - } - } - - return (ret); -} - -/* - * zfs mount -a [nfs] - * zfs mount filesystem - * - * Mount all filesystems, or mount the given filesystem. - */ -static int -zfs_do_mount(int argc, char **argv) -{ - return (share_mount(OP_MOUNT, argc, argv)); -} - -/* - * zfs share -a [nfs | smb] - * zfs share filesystem - * - * Share all filesystems, or share the given filesystem. - */ -static int -zfs_do_share(int argc, char **argv) -{ - return (share_mount(OP_SHARE, argc, argv)); -} - -typedef struct unshare_unmount_node { - zfs_handle_t *un_zhp; - char *un_mountp; - uu_avl_node_t un_avlnode; -} unshare_unmount_node_t; - -/* ARGSUSED */ -static int -unshare_unmount_compare(const void *larg, const void *rarg, void *unused) -{ - const unshare_unmount_node_t *l = larg; - const unshare_unmount_node_t *r = rarg; - - return (strcmp(l->un_mountp, r->un_mountp)); -} - -/* - * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an - * absolute path, find the entry /etc/mnttab, verify that its a ZFS filesystem, - * and unmount it appropriately. - */ -static int -unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) -{ - zfs_handle_t *zhp; - int ret = 0; - struct stat64 statbuf; - struct extmnttab entry; - const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount"; - ino_t path_inode; - - /* - * Search for the path in /etc/mnttab. Rather than looking for the - * specific path, which can be fooled by non-standard paths (i.e. ".." - * or "//"), we stat() the path and search for the corresponding - * (major,minor) device pair. - */ - if (stat64(path, &statbuf) != 0) { - (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"), - cmdname, path, strerror(errno)); - return (1); - } - path_inode = statbuf.st_ino; - - /* - * Search for the given (major,minor) pair in the mount table. - */ -#ifdef illumos - rewind(mnttab_file); - while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) { - if (entry.mnt_major == major(statbuf.st_dev) && - entry.mnt_minor == minor(statbuf.st_dev)) - break; - } -#else - { - struct statfs sfs; - - if (statfs(path, &sfs) != 0) { - (void) fprintf(stderr, "%s: %s\n", path, - strerror(errno)); - ret = -1; - } - statfs2mnttab(&sfs, &entry); - } -#endif - if (ret != 0) { - if (op == OP_SHARE) { - (void) fprintf(stderr, gettext("cannot %s '%s': not " - "currently mounted\n"), cmdname, path); - return (1); - } - (void) fprintf(stderr, gettext("warning: %s not in mnttab\n"), - path); - if ((ret = umount2(path, flags)) != 0) - (void) fprintf(stderr, gettext("%s: %s\n"), path, - strerror(errno)); - return (ret != 0); - } - - if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { - (void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS " - "filesystem\n"), cmdname, path); - return (1); - } - - if ((zhp = zfs_open(g_zfs, entry.mnt_special, - ZFS_TYPE_FILESYSTEM)) == NULL) - return (1); - - ret = 1; - if (stat64(entry.mnt_mountp, &statbuf) != 0) { - (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"), - cmdname, path, strerror(errno)); - goto out; - } else if (statbuf.st_ino != path_inode) { - (void) fprintf(stderr, gettext("cannot " - "%s '%s': not a mountpoint\n"), cmdname, path); - goto out; - } - - if (op == OP_SHARE) { - char nfs_mnt_prop[ZFS_MAXPROPLEN]; - char smbshare_prop[ZFS_MAXPROPLEN]; - - verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop, - sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshare_prop, - sizeof (smbshare_prop), NULL, NULL, 0, B_FALSE) == 0); - - if (strcmp(nfs_mnt_prop, "off") == 0 && - strcmp(smbshare_prop, "off") == 0) { - (void) fprintf(stderr, gettext("cannot unshare " - "'%s': legacy share\n"), path); -#ifdef illumos - (void) fprintf(stderr, gettext("use " - "unshare(1M) to unshare this filesystem\n")); -#endif - } else if (!zfs_is_shared(zhp)) { - (void) fprintf(stderr, gettext("cannot unshare '%s': " - "not currently shared\n"), path); - } else { - ret = zfs_unshareall_bypath(zhp, path); - } - } else { - char mtpt_prop[ZFS_MAXPROPLEN]; - - verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mtpt_prop, - sizeof (mtpt_prop), NULL, NULL, 0, B_FALSE) == 0); - - if (is_manual) { - ret = zfs_unmount(zhp, NULL, flags); - } else if (strcmp(mtpt_prop, "legacy") == 0) { - (void) fprintf(stderr, gettext("cannot unmount " - "'%s': legacy mountpoint\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use umount(8) " - "to unmount this filesystem\n")); - } else { - ret = zfs_unmountall(zhp, flags); - } - } - -out: - zfs_close(zhp); - - return (ret != 0); -} - -/* - * Generic callback for unsharing or unmounting a filesystem. - */ -static int -unshare_unmount(int op, int argc, char **argv) -{ - int do_all = 0; - int flags = 0; - int ret = 0; - int c; - zfs_handle_t *zhp; - char nfs_mnt_prop[ZFS_MAXPROPLEN]; - char sharesmb[ZFS_MAXPROPLEN]; - - /* check options */ - while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) { - switch (c) { - case 'a': - do_all = 1; - break; - case 'f': - flags = MS_FORCE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (do_all) { - /* - * We could make use of zfs_for_each() to walk all datasets in - * the system, but this would be very inefficient, especially - * since we would have to linearly search /etc/mnttab for each - * one. Instead, do one pass through /etc/mnttab looking for - * zfs entries and call zfs_unmount() for each one. - * - * Things get a little tricky if the administrator has created - * mountpoints beneath other ZFS filesystems. In this case, we - * have to unmount the deepest filesystems first. To accomplish - * this, we place all the mountpoints in an AVL tree sorted by - * the special type (dataset name), and walk the result in - * reverse to make sure to get any snapshots first. - */ - struct mnttab entry; - uu_avl_pool_t *pool; - uu_avl_t *tree = NULL; - unshare_unmount_node_t *node; - uu_avl_index_t idx; - uu_avl_walk_t *walk; - - if (argc != 0) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - if (((pool = uu_avl_pool_create("unmount_pool", - sizeof (unshare_unmount_node_t), - offsetof(unshare_unmount_node_t, un_avlnode), - unshare_unmount_compare, UU_DEFAULT)) == NULL) || - ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL)) - nomem(); - - rewind(mnttab_file); - while (getmntent(mnttab_file, &entry) == 0) { - - /* ignore non-ZFS entries */ - if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) - continue; - - /* ignore snapshots */ - if (strchr(entry.mnt_special, '@') != NULL) - continue; - - if ((zhp = zfs_open(g_zfs, entry.mnt_special, - ZFS_TYPE_FILESYSTEM)) == NULL) { - ret = 1; - continue; - } - - /* - * Ignore datasets that are excluded/restricted by - * parent pool name. - */ - if (zpool_skip_pool(zfs_get_pool_name(zhp))) { - zfs_close(zhp); - continue; - } - - switch (op) { - case OP_SHARE: - verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, - nfs_mnt_prop, - sizeof (nfs_mnt_prop), - NULL, NULL, 0, B_FALSE) == 0); - if (strcmp(nfs_mnt_prop, "off") != 0) - break; - verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, - nfs_mnt_prop, - sizeof (nfs_mnt_prop), - NULL, NULL, 0, B_FALSE) == 0); - if (strcmp(nfs_mnt_prop, "off") == 0) - continue; - break; - case OP_MOUNT: - /* Ignore legacy mounts */ - verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, - nfs_mnt_prop, - sizeof (nfs_mnt_prop), - NULL, NULL, 0, B_FALSE) == 0); - if (strcmp(nfs_mnt_prop, "legacy") == 0) - continue; - /* Ignore canmount=noauto mounts */ - if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == - ZFS_CANMOUNT_NOAUTO) - continue; - default: - break; - } - - node = safe_malloc(sizeof (unshare_unmount_node_t)); - node->un_zhp = zhp; - node->un_mountp = safe_strdup(entry.mnt_mountp); - - uu_avl_node_init(node, &node->un_avlnode, pool); - - if (uu_avl_find(tree, node, NULL, &idx) == NULL) { - uu_avl_insert(tree, node, idx); - } else { - zfs_close(node->un_zhp); - free(node->un_mountp); - free(node); - } - } - - /* - * Walk the AVL tree in reverse, unmounting each filesystem and - * removing it from the AVL tree in the process. - */ - if ((walk = uu_avl_walk_start(tree, - UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) - nomem(); - - while ((node = uu_avl_walk_next(walk)) != NULL) { - uu_avl_remove(tree, node); - - switch (op) { - case OP_SHARE: - if (zfs_unshareall_bypath(node->un_zhp, - node->un_mountp) != 0) - ret = 1; - break; - - case OP_MOUNT: - if (zfs_unmount(node->un_zhp, - node->un_mountp, flags) != 0) - ret = 1; - break; - } - - zfs_close(node->un_zhp); - free(node->un_mountp); - free(node); - } - - uu_avl_walk_end(walk); - uu_avl_destroy(tree); - uu_avl_pool_destroy(pool); - - } else { - if (argc != 1) { - if (argc == 0) - (void) fprintf(stderr, - gettext("missing filesystem argument\n")); - else - (void) fprintf(stderr, - gettext("too many arguments\n")); - usage(B_FALSE); - } - - /* - * We have an argument, but it may be a full path or a ZFS - * filesystem. Pass full paths off to unmount_path() (shared by - * manual_unmount), otherwise open the filesystem and pass to - * zfs_unmount(). - */ - if (argv[0][0] == '/') - return (unshare_unmount_path(op, argv[0], - flags, B_FALSE)); - - if ((zhp = zfs_open(g_zfs, argv[0], - ZFS_TYPE_FILESYSTEM)) == NULL) - return (1); - - verify(zfs_prop_get(zhp, op == OP_SHARE ? - ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, - nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, - NULL, 0, B_FALSE) == 0); - - switch (op) { - case OP_SHARE: - verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, - nfs_mnt_prop, - sizeof (nfs_mnt_prop), - NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, - sharesmb, sizeof (sharesmb), NULL, NULL, - 0, B_FALSE) == 0); - - if (strcmp(nfs_mnt_prop, "off") == 0 && - strcmp(sharesmb, "off") == 0) { - (void) fprintf(stderr, gettext("cannot " - "unshare '%s': legacy share\n"), - zfs_get_name(zhp)); -#ifdef illumos - (void) fprintf(stderr, gettext("use " - "unshare(1M) to unshare this " - "filesystem\n")); -#endif - ret = 1; - } else if (!zfs_is_shared(zhp)) { - (void) fprintf(stderr, gettext("cannot " - "unshare '%s': not currently " - "shared\n"), zfs_get_name(zhp)); - ret = 1; - } else if (zfs_unshareall(zhp) != 0) { - ret = 1; - } - break; - - case OP_MOUNT: - if (strcmp(nfs_mnt_prop, "legacy") == 0) { - (void) fprintf(stderr, gettext("cannot " - "unmount '%s': legacy " - "mountpoint\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use " - "umount(8) to unmount this " - "filesystem\n")); - ret = 1; - } else if (!zfs_is_mounted(zhp, NULL)) { - (void) fprintf(stderr, gettext("cannot " - "unmount '%s': not currently " - "mounted\n"), - zfs_get_name(zhp)); - ret = 1; - } else if (zfs_unmountall(zhp, flags) != 0) { - ret = 1; - } - break; - } - - zfs_close(zhp); - } - - return (ret); -} - -/* - * zfs unmount -a - * zfs unmount filesystem - * - * Unmount all filesystems, or a specific ZFS filesystem. - */ -static int -zfs_do_unmount(int argc, char **argv) -{ - return (unshare_unmount(OP_MOUNT, argc, argv)); -} - -/* - * zfs unshare -a - * zfs unshare filesystem - * - * Unshare all filesystems, or a specific ZFS filesystem. - */ -static int -zfs_do_unshare(int argc, char **argv) -{ - return (unshare_unmount(OP_SHARE, argc, argv)); -} - -/* - * Attach/detach the given dataset to/from the given jail - */ -/* ARGSUSED */ -static int -do_jail(int argc, char **argv, int attach) -{ - zfs_handle_t *zhp; - int jailid, ret; - - /* check number of arguments */ - if (argc < 3) { - (void) fprintf(stderr, gettext("missing argument(s)\n")); - usage(B_FALSE); - } - if (argc > 3) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - jailid = jail_getid(argv[1]); - if (jailid < 0) { - (void) fprintf(stderr, gettext("invalid jail id or name\n")); - usage(B_FALSE); - } - - zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM); - if (zhp == NULL) - return (1); - - ret = (zfs_jail(zhp, jailid, attach) != 0); - - zfs_close(zhp); - return (ret); -} - -/* - * zfs jail jailid filesystem - * - * Attach the given dataset to the given jail - */ -/* ARGSUSED */ -static int -zfs_do_jail(int argc, char **argv) -{ - - return (do_jail(argc, argv, 1)); -} - -/* - * zfs unjail jailid filesystem - * - * Detach the given dataset from the given jail - */ -/* ARGSUSED */ -static int -zfs_do_unjail(int argc, char **argv) -{ - - return (do_jail(argc, argv, 0)); -} - -/* - * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is - * 'legacy'. Otherwise, complain that use should be using 'zfs mount'. - */ -static int -manual_mount(int argc, char **argv) -{ - zfs_handle_t *zhp; - char mountpoint[ZFS_MAXPROPLEN]; - char mntopts[MNT_LINE_MAX] = { '\0' }; - int ret = 0; - int c; - int flags = 0; - char *dataset, *path; - - /* check options */ - while ((c = getopt(argc, argv, ":mo:O")) != -1) { - switch (c) { - case 'o': - (void) strlcpy(mntopts, optarg, sizeof (mntopts)); - break; - case 'O': - flags |= MS_OVERLAY; - break; - case 'm': - flags |= MS_NOMNTTAB; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - (void) fprintf(stderr, gettext("usage: mount [-o opts] " - "<path>\n")); - return (2); - } - } - - argc -= optind; - argv += optind; - - /* check that we only have two arguments */ - if (argc != 2) { - if (argc == 0) - (void) fprintf(stderr, gettext("missing dataset " - "argument\n")); - else if (argc == 1) - (void) fprintf(stderr, - gettext("missing mountpoint argument\n")); - else - (void) fprintf(stderr, gettext("too many arguments\n")); - (void) fprintf(stderr, "usage: mount <dataset> <mountpoint>\n"); - return (2); - } - - dataset = argv[0]; - path = argv[1]; - - /* try to open the dataset */ - if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL) - return (1); - - (void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, B_FALSE); - - /* check for legacy mountpoint and complain appropriately */ - ret = 0; - if (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) { - if (zmount(dataset, path, flags, MNTTYPE_ZFS, - NULL, 0, mntopts, sizeof (mntopts)) != 0) { - (void) fprintf(stderr, gettext("mount failed: %s\n"), - strerror(errno)); - ret = 1; - } - } else { - (void) fprintf(stderr, gettext("filesystem '%s' cannot be " - "mounted using 'mount -t zfs'\n"), dataset); - (void) fprintf(stderr, gettext("Use 'zfs set mountpoint=%s' " - "instead.\n"), path); - (void) fprintf(stderr, gettext("If you must use 'mount -t zfs' " - "or /etc/fstab, use 'zfs set mountpoint=legacy'.\n")); - (void) fprintf(stderr, gettext("See zfs(8) for more " - "information.\n")); - ret = 1; - } - - return (ret); -} - -/* - * Called when invoked as /etc/fs/zfs/umount. Unlike a manual mount, we allow - * unmounts of non-legacy filesystems, as this is the dominant administrative - * interface. - */ -static int -manual_unmount(int argc, char **argv) -{ - int flags = 0; - int c; - - /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { - switch (c) { - case 'f': - flags = MS_FORCE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - (void) fprintf(stderr, gettext("usage: unmount [-f] " - "<path>\n")); - return (2); - } - } - - argc -= optind; - argv += optind; - - /* check arguments */ - if (argc != 1) { - if (argc == 0) - (void) fprintf(stderr, gettext("missing path " - "argument\n")); - else - (void) fprintf(stderr, gettext("too many arguments\n")); - (void) fprintf(stderr, gettext("usage: unmount [-f] <path>\n")); - return (2); - } - - return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE)); -} - -static int -find_command_idx(char *command, int *idx) -{ - int i; - - for (i = 0; i < NCOMMAND; i++) { - if (command_table[i].name == NULL) - continue; - - if (strcmp(command, command_table[i].name) == 0) { - *idx = i; - return (0); - } - } - return (1); -} - -static int -zfs_do_diff(int argc, char **argv) -{ - zfs_handle_t *zhp; - int flags = 0; - char *tosnap = NULL; - char *fromsnap = NULL; - char *atp, *copy; - int err = 0; - int c; - - while ((c = getopt(argc, argv, "FHt")) != -1) { - switch (c) { - case 'F': - flags |= ZFS_DIFF_CLASSIFY; - break; - case 'H': - flags |= ZFS_DIFF_PARSEABLE; - break; - case 't': - flags |= ZFS_DIFF_TIMESTAMP; - break; - default: - (void) fprintf(stderr, - gettext("invalid option '%c'\n"), optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, - gettext("must provide at least one snapshot name\n")); - usage(B_FALSE); - } - - if (argc > 2) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - fromsnap = argv[0]; - tosnap = (argc == 2) ? argv[1] : NULL; - - copy = NULL; - if (*fromsnap != '@') - copy = strdup(fromsnap); - else if (tosnap) - copy = strdup(tosnap); - if (copy == NULL) - usage(B_FALSE); - - if ((atp = strchr(copy, '@')) != NULL) - *atp = '\0'; - - if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL) - return (1); - - free(copy); - - /* - * Ignore SIGPIPE so that the library can give us - * information on any failure - */ - (void) sigignore(SIGPIPE); - - err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags); - - zfs_close(zhp); - - return (err != 0); -} - -/* - * zfs remap <filesystem | volume> - * - * Remap the indirect blocks in the given fileystem or volume. - */ -static int -zfs_do_remap(int argc, char **argv) -{ - const char *fsname; - int err = 0; - int c; - - /* check options */ - while ((c = getopt(argc, argv, "")) != -1) { - switch (c) { - case '?': - (void) fprintf(stderr, - gettext("invalid option '%c'\n"), optopt); - usage(B_FALSE); - } - } - - if (argc != 2) { - (void) fprintf(stderr, gettext("wrong number of arguments\n")); - usage(B_FALSE); - } - - fsname = argv[1]; - err = zfs_remap_indirects(g_zfs, fsname); - - return (err); -} - -/* - * zfs bookmark <fs@snap> <fs#bmark> - * - * Creates a bookmark with the given name from the given snapshot. - */ -static int -zfs_do_bookmark(int argc, char **argv) -{ - char snapname[ZFS_MAX_DATASET_NAME_LEN]; - zfs_handle_t *zhp; - nvlist_t *nvl; - int ret = 0; - int c; - - /* check options */ - while ((c = getopt(argc, argv, "")) != -1) { - switch (c) { - case '?': - (void) fprintf(stderr, - gettext("invalid option '%c'\n"), optopt); - goto usage; - } - } - - argc -= optind; - argv += optind; - - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing snapshot argument\n")); - goto usage; - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing bookmark argument\n")); - goto usage; - } - - if (strchr(argv[1], '#') == NULL) { - (void) fprintf(stderr, - gettext("invalid bookmark name '%s' -- " - "must contain a '#'\n"), argv[1]); - goto usage; - } - - if (argv[0][0] == '@') { - /* - * Snapshot name begins with @. - * Default to same fs as bookmark. - */ - (void) strncpy(snapname, argv[1], sizeof (snapname)); - *strchr(snapname, '#') = '\0'; - (void) strlcat(snapname, argv[0], sizeof (snapname)); - } else { - (void) strncpy(snapname, argv[0], sizeof (snapname)); - } - zhp = zfs_open(g_zfs, snapname, ZFS_TYPE_SNAPSHOT); - if (zhp == NULL) - goto usage; - zfs_close(zhp); - - - nvl = fnvlist_alloc(); - fnvlist_add_string(nvl, argv[1], snapname); - ret = lzc_bookmark(nvl, NULL); - fnvlist_free(nvl); - - if (ret != 0) { - const char *err_msg = NULL; - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot create bookmark '%s'"), argv[1]); - - switch (ret) { - case EXDEV: - err_msg = "bookmark is in a different pool"; - break; - case EEXIST: - err_msg = "bookmark exists"; - break; - case EINVAL: - err_msg = "invalid argument"; - break; - case ENOTSUP: - err_msg = "bookmark feature not enabled"; - break; - case ENOSPC: - err_msg = "out of space"; - break; - default: - (void) zfs_standard_error(g_zfs, ret, errbuf); - break; - } - if (err_msg != NULL) { - (void) fprintf(stderr, "%s: %s\n", errbuf, - dgettext(TEXT_DOMAIN, err_msg)); - } - } - - return (ret != 0); - -usage: - usage(B_FALSE); - return (-1); -} - -static int -zfs_do_channel_program(int argc, char **argv) -{ - int ret, fd; - char c; - char *progbuf, *filename, *poolname; - size_t progsize, progread; - nvlist_t *outnvl = NULL; - uint64_t instrlimit = ZCP_DEFAULT_INSTRLIMIT; - uint64_t memlimit = ZCP_DEFAULT_MEMLIMIT; - boolean_t sync_flag = B_TRUE, json_output = B_FALSE; - zpool_handle_t *zhp; - - /* check options */ - while (-1 != - (c = getopt(argc, argv, "jnt:(instr-limit)m:(memory-limit)"))) { - switch (c) { - case 't': - case 'm': { - uint64_t arg; - char *endp; - - errno = 0; - arg = strtoull(optarg, &endp, 0); - if (errno != 0 || *endp != '\0') { - (void) fprintf(stderr, gettext( - "invalid argument " - "'%s': expected integer\n"), optarg); - goto usage; - } - - if (c == 't') { - if (arg > ZCP_MAX_INSTRLIMIT || arg == 0) { - (void) fprintf(stderr, gettext( - "Invalid instruction limit: " - "%s\n"), optarg); - return (1); - } else { - instrlimit = arg; - } - } else { - ASSERT3U(c, ==, 'm'); - if (arg > ZCP_MAX_MEMLIMIT || arg == 0) { - (void) fprintf(stderr, gettext( - "Invalid memory limit: " - "%s\n"), optarg); - return (1); - } else { - memlimit = arg; - } - } - break; - } - case 'n': { - sync_flag = B_FALSE; - break; - } - case 'j': { - json_output = B_TRUE; - break; - } - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - goto usage; - } - } - - argc -= optind; - argv += optind; - - if (argc < 2) { - (void) fprintf(stderr, - gettext("invalid number of arguments\n")); - goto usage; - } - - poolname = argv[0]; - filename = argv[1]; - if (strcmp(filename, "-") == 0) { - fd = 0; - filename = "standard input"; - } else if ((fd = open(filename, O_RDONLY)) < 0) { - (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), - filename, strerror(errno)); - return (1); - } - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { - (void) fprintf(stderr, gettext("cannot open pool '%s'"), - poolname); - return (1); - } - zpool_close(zhp); - - /* - * Read in the channel program, expanding the program buffer as - * necessary. - */ - progread = 0; - progsize = 1024; - progbuf = safe_malloc(progsize); - do { - ret = read(fd, progbuf + progread, progsize - progread); - progread += ret; - if (progread == progsize && ret > 0) { - progsize *= 2; - progbuf = safe_realloc(progbuf, progsize); - } - } while (ret > 0); - - if (fd != 0) - (void) close(fd); - if (ret < 0) { - free(progbuf); - (void) fprintf(stderr, - gettext("cannot read '%s': %s\n"), - filename, strerror(errno)); - return (1); - } - progbuf[progread] = '\0'; - - /* - * Any remaining arguments are passed as arguments to the lua script as - * a string array: - * { - * "argv" -> [ "arg 1", ... "arg n" ], - * } - */ - nvlist_t *argnvl = fnvlist_alloc(); - fnvlist_add_string_array(argnvl, ZCP_ARG_CLIARGV, argv + 2, argc - 2); - - if (sync_flag) { - ret = lzc_channel_program(poolname, progbuf, - instrlimit, memlimit, argnvl, &outnvl); - } else { - ret = lzc_channel_program_nosync(poolname, progbuf, - instrlimit, memlimit, argnvl, &outnvl); - } - - if (ret != 0) { - /* - * On error, report the error message handed back by lua if one - * exists. Otherwise, generate an appropriate error message, - * falling back on strerror() for an unexpected return code. - */ - char *errstring = NULL; - const char *msg = gettext("Channel program execution failed"); - if (outnvl != NULL && nvlist_exists(outnvl, ZCP_RET_ERROR)) { - (void) nvlist_lookup_string(outnvl, - ZCP_RET_ERROR, &errstring); - if (errstring == NULL) - errstring = strerror(ret); - } else { - switch (ret) { - case EINVAL: - errstring = - "Invalid instruction or memory limit."; - break; - case ENOMEM: - errstring = "Return value too large."; - break; - case ENOSPC: - errstring = "Memory limit exhausted."; - break; -#ifdef illumos - case ETIME: -#else - case ETIMEDOUT: -#endif - errstring = "Timed out."; - break; - case EPERM: - errstring = "Permission denied. Channel " - "programs must be run as root."; - break; - default: - (void) zfs_standard_error(g_zfs, ret, msg); - } - } - if (errstring != NULL) - (void) fprintf(stderr, "%s:\n%s\n", msg, errstring); - } else { - if (json_output) { - (void) nvlist_print_json(stdout, outnvl); - } else if (nvlist_empty(outnvl)) { - (void) fprintf(stdout, gettext("Channel program fully " - "executed and did not produce output.\n")); - } else { - (void) fprintf(stdout, gettext("Channel program fully " - "executed and produced output:\n")); - dump_nvlist(outnvl, 4); - } - } - - free(progbuf); - fnvlist_free(outnvl); - fnvlist_free(argnvl); - return (ret != 0); - -usage: - usage(B_FALSE); - return (-1); -} - -int -main(int argc, char **argv) -{ - int ret = 0; - int i; - char *progname; - char *cmdname; - - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - opterr = 0; - - if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, gettext("internal error: failed to " - "initialize ZFS library\n")); - return (1); - } - - zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); - - libzfs_print_on_error(g_zfs, B_TRUE); - - if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) { - (void) fprintf(stderr, gettext("internal error: unable to " - "open %s\n"), MNTTAB); - return (1); - } - - /* - * This command also doubles as the /etc/fs mount and unmount program. - * Determine if we should take this behavior based on argv[0]. - */ - progname = basename(argv[0]); - if (strcmp(progname, "mount") == 0) { - ret = manual_mount(argc, argv); - } else if (strcmp(progname, "umount") == 0) { - ret = manual_unmount(argc, argv); - } else { - /* - * Make sure the user has specified some command. - */ - if (argc < 2) { - (void) fprintf(stderr, gettext("missing command\n")); - usage(B_FALSE); - } - - cmdname = argv[1]; - - /* - * The 'umount' command is an alias for 'unmount' - */ - if (strcmp(cmdname, "umount") == 0) - cmdname = "unmount"; - - /* - * The 'recv' command is an alias for 'receive' - */ - if (strcmp(cmdname, "recv") == 0) - cmdname = "receive"; - - /* - * The 'snap' command is an alias for 'snapshot' - */ - if (strcmp(cmdname, "snap") == 0) - cmdname = "snapshot"; - - /* - * Special case '-?' - */ - if (strcmp(cmdname, "-?") == 0) - usage(B_TRUE); - - /* - * Run the appropriate command. - */ - libzfs_mnttab_cache(g_zfs, B_TRUE); - if (find_command_idx(cmdname, &i) == 0) { - current_command = &command_table[i]; - ret = command_table[i].func(argc - 1, argv + 1); - } else if (strchr(cmdname, '=') != NULL) { - verify(find_command_idx("set", &i) == 0); - current_command = &command_table[i]; - ret = command_table[i].func(argc, argv); - } else { - (void) fprintf(stderr, gettext("unrecognized " - "command '%s'\n"), cmdname); - usage(B_FALSE); - } - libzfs_mnttab_cache(g_zfs, B_FALSE); - } - - (void) fclose(mnttab_file); - - if (ret == 0 && log_history) - (void) zpool_log_history(g_zfs, history_str); - - libzfs_fini(g_zfs); - - /* - * The 'ZFS_ABORT' environment variable causes us to dump core on exit - * for the purposes of running ::findleaks. - */ - if (getenv("ZFS_ABORT") != NULL) { - (void) printf("dumping core by request\n"); - abort(); - } - - return (ret); -} diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h deleted file mode 100644 index a56af59adb15..000000000000 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _ZFS_UTIL_H -#define _ZFS_UTIL_H - -#include <libzfs.h> - -#ifdef __cplusplus -extern "C" { -#endif - -void * safe_malloc(size_t size); -void nomem(void); -extern libzfs_handle_t *g_zfs; - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFS_UTIL_H */ diff --git a/cddl/contrib/opensolaris/cmd/zhack/zhack.c b/cddl/contrib/opensolaris/cmd/zhack/zhack.c deleted file mode 100644 index 20a0c60e6a18..000000000000 --- a/cddl/contrib/opensolaris/cmd/zhack/zhack.c +++ /dev/null @@ -1,535 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - */ - -/* - * zhack is a debugging tool that can write changes to ZFS pool using libzpool - * for testing purposes. Altering pools with zhack is unsupported and may - * result in corrupted pools. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <sys/zfs_context.h> -#include <sys/spa.h> -#include <sys/spa_impl.h> -#include <sys/dmu.h> -#include <sys/zap.h> -#include <sys/zfs_znode.h> -#include <sys/dsl_synctask.h> -#include <sys/vdev.h> -#include <sys/fs/zfs.h> -#include <sys/dmu_objset.h> -#include <sys/dsl_pool.h> -#include <sys/zio_checksum.h> -#include <sys/zio_compress.h> -#include <sys/zfeature.h> -#include <sys/dmu_tx.h> -#undef verify -#include <libzfs.h> - -extern boolean_t zfeature_checks_disable; - -const char cmdname[] = "zhack"; -libzfs_handle_t *g_zfs; -static importargs_t g_importargs; -static char *g_pool; -static boolean_t g_readonly; - -static void -usage(void) -{ - (void) fprintf(stderr, - "Usage: %s [-c cachefile] [-d dir] <subcommand> <args> ...\n" - "where <subcommand> <args> is one of the following:\n" - "\n", cmdname); - - (void) fprintf(stderr, - " feature stat <pool>\n" - " print information about enabled features\n" - " feature enable [-d desc] <pool> <feature>\n" - " add a new enabled feature to the pool\n" - " -d <desc> sets the feature's description\n" - " feature ref [-md] <pool> <feature>\n" - " change the refcount on the given feature\n" - " -d decrease instead of increase the refcount\n" - " -m add the feature to the label if increasing refcount\n" - "\n" - " <feature> : should be a feature guid\n"); - exit(1); -} - - -static void -fatal(spa_t *spa, void *tag, const char *fmt, ...) -{ - va_list ap; - - if (spa != NULL) { - spa_close(spa, tag); - (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE); - } - - va_start(ap, fmt); - (void) fprintf(stderr, "%s: ", cmdname); - (void) vfprintf(stderr, fmt, ap); - va_end(ap); - (void) fprintf(stderr, "\n"); - - exit(1); -} - -/* ARGSUSED */ -static int -space_delta_cb(dmu_object_type_t bonustype, void *data, - uint64_t *userp, uint64_t *groupp) -{ - /* - * Is it a valid type of object to track? - */ - if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) - return (ENOENT); - (void) fprintf(stderr, "modifying object that needs user accounting"); - abort(); - /* NOTREACHED */ -} - -/* - * Target is the dataset whose pool we want to open. - */ -static void -zhack_import(char *target, boolean_t readonly) -{ - nvlist_t *config; - nvlist_t *props; - int error; - - kernel_init(readonly ? FREAD : (FREAD | FWRITE)); - g_zfs = libzfs_init(); - ASSERT(g_zfs != NULL); - - dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); - - g_readonly = readonly; - g_importargs.unique = B_TRUE; - g_importargs.can_be_active = readonly; - g_pool = strdup(target); - - error = zpool_tryimport(g_zfs, target, &config, &g_importargs); - if (error) - fatal(NULL, FTAG, "cannot import '%s': %s", target, - libzfs_error_description(g_zfs)); - - props = NULL; - if (readonly) { - VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0); - } - - zfeature_checks_disable = B_TRUE; - error = spa_import(target, config, props, - (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL)); - zfeature_checks_disable = B_FALSE; - if (error == EEXIST) - error = 0; - - if (error) - fatal(NULL, FTAG, "can't import '%s': %s", target, - strerror(error)); -} - -static void -zhack_spa_open(char *target, boolean_t readonly, void *tag, spa_t **spa) -{ - int err; - - zhack_import(target, readonly); - - zfeature_checks_disable = B_TRUE; - err = spa_open(target, spa, tag); - zfeature_checks_disable = B_FALSE; - - if (err != 0) - fatal(*spa, FTAG, "cannot open '%s': %s", target, - strerror(err)); - if (spa_version(*spa) < SPA_VERSION_FEATURES) { - fatal(*spa, FTAG, "'%s' has version %d, features not enabled", - target, (int)spa_version(*spa)); - } -} - -static void -dump_obj(objset_t *os, uint64_t obj, const char *name) -{ - zap_cursor_t zc; - zap_attribute_t za; - - (void) printf("%s_obj:\n", name); - - for (zap_cursor_init(&zc, os, obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - if (za.za_integer_length == 8) { - ASSERT(za.za_num_integers == 1); - (void) printf("\t%s = %llu\n", - za.za_name, (u_longlong_t)za.za_first_integer); - } else { - ASSERT(za.za_integer_length == 1); - char val[1024]; - VERIFY(zap_lookup(os, obj, za.za_name, - 1, sizeof (val), val) == 0); - (void) printf("\t%s = %s\n", za.za_name, val); - } - } - zap_cursor_fini(&zc); -} - -static void -dump_mos(spa_t *spa) -{ - nvlist_t *nv = spa->spa_label_features; - - (void) printf("label config:\n"); - for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL); - pair != NULL; - pair = nvlist_next_nvpair(nv, pair)) { - (void) printf("\t%s\n", nvpair_name(pair)); - } -} - -static void -zhack_do_feature_stat(int argc, char **argv) -{ - spa_t *spa; - objset_t *os; - char *target; - - argc--; - argv++; - - if (argc < 1) { - (void) fprintf(stderr, "error: missing pool name\n"); - usage(); - } - target = argv[0]; - - zhack_spa_open(target, B_TRUE, FTAG, &spa); - os = spa->spa_meta_objset; - - dump_obj(os, spa->spa_feat_for_read_obj, "for_read"); - dump_obj(os, spa->spa_feat_for_write_obj, "for_write"); - dump_obj(os, spa->spa_feat_desc_obj, "descriptions"); - if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { - dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg"); - } - dump_mos(spa); - - spa_close(spa, FTAG); -} - -static void -zhack_feature_enable_sync(void *arg, dmu_tx_t *tx) -{ - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - zfeature_info_t *feature = arg; - - feature_enable_sync(spa, feature, tx); - - spa_history_log_internal(spa, "zhack enable feature", tx, - "guid=%s flags=%x", - feature->fi_guid, feature->fi_flags); -} - -static void -zhack_do_feature_enable(int argc, char **argv) -{ - char c; - char *desc, *target; - spa_t *spa; - objset_t *mos; - zfeature_info_t feature; - spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; - - /* - * Features are not added to the pool's label until their refcounts - * are incremented, so fi_mos can just be left as false for now. - */ - desc = NULL; - feature.fi_uname = "zhack"; - feature.fi_flags = 0; - feature.fi_depends = nodeps; - feature.fi_feature = SPA_FEATURE_NONE; - - optind = 1; - while ((c = getopt(argc, argv, "rmd:")) != -1) { - switch (c) { - case 'r': - feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT; - break; - case 'd': - desc = strdup(optarg); - break; - default: - usage(); - break; - } - } - - if (desc == NULL) - desc = strdup("zhack injected"); - feature.fi_desc = desc; - - argc -= optind; - argv += optind; - - if (argc < 2) { - (void) fprintf(stderr, "error: missing feature or pool name\n"); - usage(); - } - target = argv[0]; - feature.fi_guid = argv[1]; - - if (!zfeature_is_valid_guid(feature.fi_guid)) - fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid); - - zhack_spa_open(target, B_FALSE, FTAG, &spa); - mos = spa->spa_meta_objset; - - if (zfeature_is_supported(feature.fi_guid)) - fatal(spa, FTAG, "'%s' is a real feature, will not enable"); - if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid)) - fatal(spa, FTAG, "feature already enabled: %s", - feature.fi_guid); - - VERIFY0(dsl_sync_task(spa_name(spa), NULL, - zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL)); - - spa_close(spa, FTAG); - - free(desc); -} - -static void -feature_incr_sync(void *arg, dmu_tx_t *tx) -{ - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - zfeature_info_t *feature = arg; - uint64_t refcount; - - VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); - feature_sync(spa, feature, refcount + 1, tx); - spa_history_log_internal(spa, "zhack feature incr", tx, - "name=%s", feature->fi_guid); -} - -static void -feature_decr_sync(void *arg, dmu_tx_t *tx) -{ - spa_t *spa = dmu_tx_pool(tx)->dp_spa; - zfeature_info_t *feature = arg; - uint64_t refcount; - - VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); - feature_sync(spa, feature, refcount - 1, tx); - spa_history_log_internal(spa, "zhack feature decr", tx, - "name=%s", feature->fi_guid); -} - -static void -zhack_do_feature_ref(int argc, char **argv) -{ - char c; - char *target; - boolean_t decr = B_FALSE; - spa_t *spa; - objset_t *mos; - zfeature_info_t feature; - spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; - - /* - * fi_desc does not matter here because it was written to disk - * when the feature was enabled, but we need to properly set the - * feature for read or write based on the information we read off - * disk later. - */ - feature.fi_uname = "zhack"; - feature.fi_flags = 0; - feature.fi_desc = NULL; - feature.fi_depends = nodeps; - feature.fi_feature = SPA_FEATURE_NONE; - - optind = 1; - while ((c = getopt(argc, argv, "md")) != -1) { - switch (c) { - case 'm': - feature.fi_flags |= ZFEATURE_FLAG_MOS; - break; - case 'd': - decr = B_TRUE; - break; - default: - usage(); - break; - } - } - argc -= optind; - argv += optind; - - if (argc < 2) { - (void) fprintf(stderr, "error: missing feature or pool name\n"); - usage(); - } - target = argv[0]; - feature.fi_guid = argv[1]; - - if (!zfeature_is_valid_guid(feature.fi_guid)) - fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid); - - zhack_spa_open(target, B_FALSE, FTAG, &spa); - mos = spa->spa_meta_objset; - - if (zfeature_is_supported(feature.fi_guid)) { - fatal(spa, FTAG, - "'%s' is a real feature, will not change refcount"); - } - - if (0 == zap_contains(mos, spa->spa_feat_for_read_obj, - feature.fi_guid)) { - feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT; - } else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj, - feature.fi_guid)) { - feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT; - } else { - fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid); - } - - if (decr) { - uint64_t count; - if (feature_get_refcount_from_disk(spa, &feature, - &count) == 0 && count != 0) { - fatal(spa, FTAG, "feature refcount already 0: %s", - feature.fi_guid); - } - } - - VERIFY0(dsl_sync_task(spa_name(spa), NULL, - decr ? feature_decr_sync : feature_incr_sync, &feature, - 5, ZFS_SPACE_CHECK_NORMAL)); - - spa_close(spa, FTAG); -} - -static int -zhack_do_feature(int argc, char **argv) -{ - char *subcommand; - - argc--; - argv++; - if (argc == 0) { - (void) fprintf(stderr, - "error: no feature operation specified\n"); - usage(); - } - - subcommand = argv[0]; - if (strcmp(subcommand, "stat") == 0) { - zhack_do_feature_stat(argc, argv); - } else if (strcmp(subcommand, "enable") == 0) { - zhack_do_feature_enable(argc, argv); - } else if (strcmp(subcommand, "ref") == 0) { - zhack_do_feature_ref(argc, argv); - } else { - (void) fprintf(stderr, "error: unknown subcommand: %s\n", - subcommand); - usage(); - } - - return (0); -} - -#define MAX_NUM_PATHS 1024 - -int -main(int argc, char **argv) -{ - extern void zfs_prop_init(void); - - char *path[MAX_NUM_PATHS]; - const char *subcommand; - int rv = 0; - char c; - - g_importargs.path = path; - - dprintf_setup(&argc, argv); - zfs_prop_init(); - - while ((c = getopt(argc, argv, "c:d:")) != -1) { - switch (c) { - case 'c': - g_importargs.cachefile = optarg; - break; - case 'd': - assert(g_importargs.paths < MAX_NUM_PATHS); - g_importargs.path[g_importargs.paths++] = optarg; - break; - default: - usage(); - break; - } - } - - argc -= optind; - argv += optind; - optind = 1; - - if (argc == 0) { - (void) fprintf(stderr, "error: no command specified\n"); - usage(); - } - - subcommand = argv[0]; - - if (strcmp(subcommand, "feature") == 0) { - rv = zhack_do_feature(argc, argv); - } else { - (void) fprintf(stderr, "error: unknown subcommand: %s\n", - subcommand); - usage(); - } - - if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) { - fatal(NULL, FTAG, "pool export failed; " - "changes may not be committed to disk\n"); - } - - libzfs_fini(g_zfs); - kernel_fini(); - - return (rv); -} diff --git a/cddl/contrib/opensolaris/cmd/zinject/translate.c b/cddl/contrib/opensolaris/cmd/zinject/translate.c deleted file mode 100644 index 99a3d0ca4ff3..000000000000 --- a/cddl/contrib/opensolaris/cmd/zinject/translate.c +++ /dev/null @@ -1,492 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2020 by Delphix. All rights reserved. - */ - -#include <libzfs.h> - -#include <sys/zfs_context.h> - -#include <errno.h> -#include <fcntl.h> -#include <stdarg.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <sys/file.h> -#include <sys/mntent.h> -#include <sys/mnttab.h> -#include <sys/param.h> -#include <sys/stat.h> - -#include <sys/dmu.h> -#include <sys/dmu_objset.h> -#include <sys/dnode.h> -#include <sys/vdev_impl.h> - -#include "zinject.h" - -extern void kernel_init(int); -extern void kernel_fini(void); - -static int debug; - -static void -ziprintf(const char *fmt, ...) -{ - va_list ap; - - if (!debug) - return; - - va_start(ap, fmt); - (void) vprintf(fmt, ap); - va_end(ap); -} - -static void -compress_slashes(const char *src, char *dest) -{ - while (*src != '\0') { - *dest = *src++; - while (*dest == '/' && *src == '/') - ++src; - ++dest; - } - *dest = '\0'; -} - -/* - * Given a full path to a file, translate into a dataset name and a relative - * path within the dataset. 'dataset' must be at least MAXNAMELEN characters, - * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64 - * buffer, which we need later to get the object ID. - */ -static int -parse_pathname(const char *inpath, char *dataset, char *relpath, - struct stat64 *statbuf) -{ - struct statfs sfs; - const char *rel; - char fullpath[MAXPATHLEN]; - - compress_slashes(inpath, fullpath); - - if (fullpath[0] != '/') { - (void) fprintf(stderr, "invalid object '%s': must be full " - "path\n", fullpath); - usage(); - return (-1); - } - - if (strlen(fullpath) >= MAXPATHLEN) { - (void) fprintf(stderr, "invalid object; pathname too long\n"); - return (-1); - } - - if (stat64(fullpath, statbuf) != 0) { - (void) fprintf(stderr, "cannot open '%s': %s\n", - fullpath, strerror(errno)); - return (-1); - } - - if (statfs(fullpath, &sfs) == -1) { - (void) fprintf(stderr, "cannot find mountpoint for '%s': %s\n", - fullpath, strerror(errno)); - return (-1); - } - - if (strcmp(sfs.f_fstypename, MNTTYPE_ZFS) != 0) { - (void) fprintf(stderr, "invalid path '%s': not a ZFS " - "filesystem\n", fullpath); - return (-1); - } - - if (strncmp(fullpath, sfs.f_mntonname, strlen(sfs.f_mntonname)) != 0) { - (void) fprintf(stderr, "invalid path '%s': mountpoint " - "doesn't match path\n", fullpath); - return (-1); - } - - (void) strcpy(dataset, sfs.f_mntfromname); - - rel = fullpath + strlen(sfs.f_mntonname); - if (rel[0] == '/') - rel++; - (void) strcpy(relpath, rel); - - return (0); -} - -/* - * Convert from a (dataset, path) pair into a (objset, object) pair. Note that - * we grab the object number from the inode number, since looking this up via - * libzpool is a real pain. - */ -/* ARGSUSED */ -static int -object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, - zinject_record_t *record) -{ - objset_t *os; - int err; - - /* - * Before doing any libzpool operations, call sync() to ensure that the - * on-disk state is consistent with the in-core state. - */ - sync(); - - err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); - if (err != 0) { - (void) fprintf(stderr, "cannot open dataset '%s': %s\n", - dataset, strerror(err)); - return (-1); - } - - record->zi_objset = dmu_objset_id(os); - record->zi_object = statbuf->st_ino; - - dmu_objset_disown(os, FTAG); - - return (0); -} - -/* - * Calculate the real range based on the type, level, and range given. - */ -static int -calculate_range(const char *dataset, err_type_t type, int level, char *range, - zinject_record_t *record) -{ - objset_t *os = NULL; - dnode_t *dn = NULL; - int err; - int ret = -1; - - /* - * Determine the numeric range from the string. - */ - if (range == NULL) { - /* - * If range is unspecified, set the range to [0,-1], which - * indicates that the whole object should be treated as an - * error. - */ - record->zi_start = 0; - record->zi_end = -1ULL; - } else { - char *end; - - /* XXX add support for suffixes */ - record->zi_start = strtoull(range, &end, 10); - - - if (*end == '\0') - record->zi_end = record->zi_start + 1; - else if (*end == ',') - record->zi_end = strtoull(end + 1, &end, 10); - - if (*end != '\0') { - (void) fprintf(stderr, "invalid range '%s': must be " - "a numeric range of the form 'start[,end]'\n", - range); - goto out; - } - } - - switch (type) { - case TYPE_DATA: - break; - - case TYPE_DNODE: - /* - * If this is a request to inject faults into the dnode, then we - * must translate the current (objset,object) pair into an - * offset within the metadnode for the objset. Specifying any - * kind of range with type 'dnode' is illegal. - */ - if (range != NULL) { - (void) fprintf(stderr, "range cannot be specified when " - "type is 'dnode'\n"); - goto out; - } - - record->zi_start = record->zi_object * sizeof (dnode_phys_t); - record->zi_end = record->zi_start + sizeof (dnode_phys_t); - record->zi_object = 0; - break; - } - - /* - * Get the dnode associated with object, so we can calculate the block - * size. - */ - if ((err = dmu_objset_own(dataset, DMU_OST_ANY, - B_TRUE, FTAG, &os)) != 0) { - (void) fprintf(stderr, "cannot open dataset '%s': %s\n", - dataset, strerror(err)); - goto out; - } - - if (record->zi_object == 0) { - dn = DMU_META_DNODE(os); - } else { - err = dnode_hold(os, record->zi_object, FTAG, &dn); - if (err != 0) { - (void) fprintf(stderr, "failed to hold dnode " - "for object %llu\n", - (u_longlong_t)record->zi_object); - goto out; - } - } - - - ziprintf("data shift: %d\n", (int)dn->dn_datablkshift); - ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift); - - /* - * Translate range into block IDs. - */ - if (record->zi_start != 0 || record->zi_end != -1ULL) { - record->zi_start >>= dn->dn_datablkshift; - record->zi_end >>= dn->dn_datablkshift; - } - - /* - * Check level, and then translate level 0 blkids into ranges - * appropriate for level of indirection. - */ - record->zi_level = level; - if (level > 0) { - ziprintf("level 0 blkid range: [%llu, %llu]\n", - record->zi_start, record->zi_end); - - if (level >= dn->dn_nlevels) { - (void) fprintf(stderr, "level %d exceeds max level " - "of object (%d)\n", level, dn->dn_nlevels - 1); - goto out; - } - - if (record->zi_start != 0 || record->zi_end != 0) { - int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT; - - for (; level > 0; level--) { - record->zi_start >>= shift; - record->zi_end >>= shift; - } - } - } - - ret = 0; -out: - if (dn) { - if (dn != DMU_META_DNODE(os)) - dnode_rele(dn, FTAG); - } - if (os) - dmu_objset_disown(os, FTAG); - - return (ret); -} - -int -translate_record(err_type_t type, const char *object, const char *range, - int level, zinject_record_t *record, char *poolname, char *dataset) -{ - char path[MAXPATHLEN]; - char *slash; - struct stat64 statbuf; - int ret = -1; - - kernel_init(FREAD); - - debug = (getenv("ZINJECT_DEBUG") != NULL); - - ziprintf("translating: %s\n", object); - - if (MOS_TYPE(type)) { - /* - * MOS objects are treated specially. - */ - switch (type) { - case TYPE_MOS: - record->zi_type = 0; - break; - case TYPE_MOSDIR: - record->zi_type = DMU_OT_OBJECT_DIRECTORY; - break; - case TYPE_METASLAB: - record->zi_type = DMU_OT_OBJECT_ARRAY; - break; - case TYPE_CONFIG: - record->zi_type = DMU_OT_PACKED_NVLIST; - break; - case TYPE_BPOBJ: - record->zi_type = DMU_OT_BPOBJ; - break; - case TYPE_SPACEMAP: - record->zi_type = DMU_OT_SPACE_MAP; - break; - case TYPE_ERRLOG: - record->zi_type = DMU_OT_ERROR_LOG; - break; - } - - dataset[0] = '\0'; - (void) strcpy(poolname, object); - return (0); - } - - /* - * Convert a full path into a (dataset, file) pair. - */ - if (parse_pathname(object, dataset, path, &statbuf) != 0) - goto err; - - ziprintf(" dataset: %s\n", dataset); - ziprintf(" path: %s\n", path); - - /* - * Convert (dataset, file) into (objset, object) - */ - if (object_from_path(dataset, path, &statbuf, record) != 0) - goto err; - - ziprintf("raw objset: %llu\n", record->zi_objset); - ziprintf("raw object: %llu\n", record->zi_object); - - /* - * For the given object, calculate the real (type, level, range) - */ - if (calculate_range(dataset, type, level, (char *)range, record) != 0) - goto err; - - ziprintf(" objset: %llu\n", record->zi_objset); - ziprintf(" object: %llu\n", record->zi_object); - if (record->zi_start == 0 && - record->zi_end == -1ULL) - ziprintf(" range: all\n"); - else - ziprintf(" range: [%llu, %llu]\n", record->zi_start, - record->zi_end); - - /* - * Copy the pool name - */ - (void) strcpy(poolname, dataset); - if ((slash = strchr(poolname, '/')) != NULL) - *slash = '\0'; - - ret = 0; - -err: - kernel_fini(); - return (ret); -} - -int -translate_raw(const char *str, zinject_record_t *record) -{ - /* - * A raw bookmark of the form objset:object:level:blkid, where each - * number is a hexidecimal value. - */ - if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset, - (u_longlong_t *)&record->zi_object, &record->zi_level, - (u_longlong_t *)&record->zi_start) != 4) { - (void) fprintf(stderr, "bad raw spec '%s': must be of the form " - "'objset:object:level:blkid'\n", str); - return (-1); - } - - record->zi_end = record->zi_start; - - return (0); -} - -int -translate_device(const char *pool, const char *device, err_type_t label_type, - zinject_record_t *record) -{ - char *end; - zpool_handle_t *zhp; - nvlist_t *tgt; - boolean_t isspare, iscache; - - /* - * Given a device name or GUID, create an appropriate injection record - * with zi_guid set. - */ - if ((zhp = zpool_open(g_zfs, pool)) == NULL) - return (-1); - - record->zi_guid = strtoull(device, &end, 16); - if (record->zi_guid == 0 || *end != '\0') { - tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL); - - if (tgt == NULL) { - (void) fprintf(stderr, "cannot find device '%s' in " - "pool '%s'\n", device, pool); - return (-1); - } - - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, - &record->zi_guid) == 0); - } - - /* - * Device faults can take on three different forms: - * 1). delayed or hanging I/O - * 2). zfs label faults - * 3). generic disk faults - */ - if (record->zi_timer != 0) { - record->zi_cmd = ZINJECT_DELAY_IO; - } else if (label_type != TYPE_INVAL) { - record->zi_cmd = ZINJECT_LABEL_FAULT; - } else { - record->zi_cmd = ZINJECT_DEVICE_FAULT; - } - - switch (label_type) { - case TYPE_LABEL_UBERBLOCK: - record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]); - record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1; - break; - case TYPE_LABEL_NVLIST: - record->zi_start = offsetof(vdev_label_t, vl_vdev_phys); - record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1; - break; - case TYPE_LABEL_PAD1: - record->zi_start = offsetof(vdev_label_t, vl_pad1); - record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; - break; - case TYPE_LABEL_PAD2: - record->zi_start = offsetof(vdev_label_t, vl_be); - record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; - break; - } - return (0); -} diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.c b/cddl/contrib/opensolaris/cmd/zinject/zinject.c deleted file mode 100644 index bf42bc483830..000000000000 --- a/cddl/contrib/opensolaris/cmd/zinject/zinject.c +++ /dev/null @@ -1,1093 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 by Delphix. All rights reserved. - */ - -/* - * ZFS Fault Injector - * - * This userland component takes a set of options and uses libzpool to translate - * from a user-visible object type and name to an internal representation. - * There are two basic types of faults: device faults and data faults. - * - * - * DEVICE FAULTS - * - * Errors can be injected into a particular vdev using the '-d' option. This - * option takes a path or vdev GUID to uniquely identify the device within a - * pool. There are two types of errors that can be injected, EIO and ENXIO, - * that can be controlled through the '-e' option. The default is ENXIO. For - * EIO failures, any attempt to read data from the device will return EIO, but - * subsequent attempt to reopen the device will succeed. For ENXIO failures, - * any attempt to read from the device will return EIO, but any attempt to - * reopen the device will also return ENXIO. - * For label faults, the -L option must be specified. This allows faults - * to be injected into either the nvlist, uberblock, pad1, or pad2 region - * of all the labels for the specified device. - * - * This form of the command looks like: - * - * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool - * - * - * DATA FAULTS - * - * We begin with a tuple of the form: - * - * <type,level,range,object> - * - * type A string describing the type of data to target. Each type - * implicitly describes how to interpret 'object'. Currently, - * the following values are supported: - * - * data User data for a file - * dnode Dnode for a file or directory - * - * The following MOS objects are special. Instead of injecting - * errors on a particular object or blkid, we inject errors across - * all objects of the given type. - * - * mos Any data in the MOS - * mosdir object directory - * config pool configuration - * bpobj blkptr list - * spacemap spacemap - * metaslab metaslab - * errlog persistent error log - * - * level Object level. Defaults to '0', not applicable to all types. If - * a range is given, this corresponds to the indirect block - * corresponding to the specific range. - * - * range A numerical range [start,end) within the object. Defaults to - * the full size of the file. - * - * object A string describing the logical location of the object. For - * files and directories (currently the only supported types), - * this is the path of the object on disk. - * - * This is translated, via libzpool, into the following internal representation: - * - * <type,objset,object,level,range> - * - * These types should be self-explanatory. This tuple is then passed to the - * kernel via a special ioctl() to initiate fault injection for the given - * object. Note that 'type' is not strictly necessary for fault injection, but - * is used when translating existing faults into a human-readable string. - * - * - * The command itself takes one of the forms: - * - * zinject - * zinject <-a | -u pool> - * zinject -c <id|all> - * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] - * [-r range] <object> - * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool - * - * With no arguments, the command prints all currently registered injection - * handlers, with their numeric identifiers. - * - * The '-c' option will clear the given handler, or all handlers if 'all' is - * specified. - * - * The '-e' option takes a string describing the errno to simulate. This must - * be either 'io' or 'checksum'. In most cases this will result in the same - * behavior, but RAID-Z will produce a different set of ereports for this - * situation. - * - * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is - * specified, then the ARC cache is flushed appropriately. If '-u' is - * specified, then the underlying SPA is unloaded. Either of these flags can be - * specified independently of any other handlers. The '-m' flag automatically - * does an unmount and remount of the underlying dataset to aid in flushing the - * cache. - * - * The '-f' flag controls the frequency of errors injected, expressed as a - * integer percentage between 1 and 100. The default is 100. - * - * The this form is responsible for actually injecting the handler into the - * framework. It takes the arguments described above, translates them to the - * internal tuple using libzpool, and then issues an ioctl() to register the - * handler. - * - * The final form can target a specific bookmark, regardless of whether a - * human-readable interface has been designed. It allows developers to specify - * a particular block by number. - */ - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> - -#include <sys/fs/zfs.h> -#include <sys/param.h> -#include <sys/mount.h> - -#include <libzfs.h> -#include <libzfs_compat.h> - -#undef verify /* both libzfs.h and zfs_context.h want to define this */ - -#include "zinject.h" - -libzfs_handle_t *g_zfs; -int zfs_fd; - -#ifndef ECKSUM -#define ECKSUM EBADE -#endif - -static const char *errtable[TYPE_INVAL] = { - "data", - "dnode", - "mos", - "mosdir", - "metaslab", - "config", - "bpobj", - "spacemap", - "errlog", - "uber", - "nvlist", - "pad1", - "pad2" -}; - -static err_type_t -name_to_type(const char *arg) -{ - int i; - for (i = 0; i < TYPE_INVAL; i++) - if (strcmp(errtable[i], arg) == 0) - return (i); - - return (TYPE_INVAL); -} - -static const char * -type_to_name(uint64_t type) -{ - switch (type) { - case DMU_OT_OBJECT_DIRECTORY: - return ("mosdir"); - case DMU_OT_OBJECT_ARRAY: - return ("metaslab"); - case DMU_OT_PACKED_NVLIST: - return ("config"); - case DMU_OT_BPOBJ: - return ("bpobj"); - case DMU_OT_SPACE_MAP: - return ("spacemap"); - case DMU_OT_ERROR_LOG: - return ("errlog"); - default: - return ("-"); - } -} - - -/* - * Print usage message. - */ -void -usage(void) -{ - (void) printf( - "usage:\n" - "\n" - "\tzinject\n" - "\n" - "\t\tList all active injection records.\n" - "\n" - "\tzinject -c <id|all>\n" - "\n" - "\t\tClear the particular record (if given a numeric ID), or\n" - "\t\tall records if 'all' is specificed.\n" - "\n" - "\tzinject -p <function name> pool\n" - "\n" - "\t\tInject a panic fault at the specified function. Only \n" - "\t\tfunctions which call spa_vdev_config_exit(), or \n" - "\t\tspa_vdev_exit() will trigger a panic.\n" - "\n" - "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n" - "\t [-T <read|write|free|claim|all> pool\n" - "\n" - "\t\tInject a fault into a particular device or the device's\n" - "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " - "\t\t'pad1', or 'pad2'.\n" - "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" - "\n" - "\tzinject -d device -A <degrade|fault> pool\n" - "\n" - "\t\tPerform a specific action on a particular device\n" - "\n" - "\tzinject -d device -D latency:lanes pool\n" - "\n" - "\t\tAdd an artificial delay to IO requests on a particular\n" - "\t\tdevice, such that the requests take a minimum of 'latency'\n" - "\t\tmilliseconds to complete. Each delay has an associated\n" - "\t\tnumber of 'lanes' which defines the number of concurrent\n" - "\t\tIO requests that can be processed.\n" - "\n" - "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n" - "\t\tthe device will only be able to service a single IO request\n" - "\t\tat a time with each request taking 10 ms to complete. So,\n" - "\t\tif only a single request is submitted every 10 ms, the\n" - "\t\taverage latency will be 10 ms; but if more than one request\n" - "\t\tis submitted every 10 ms, the average latency will be more\n" - "\t\tthan 10 ms.\n" - "\n" - "\t\tSimilarly, if a delay of 10 ms is specified to have two\n" - "\t\tlanes (-D 10:2), then the device will be able to service\n" - "\t\ttwo requests at a time, each with a minimum latency of\n" - "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n" - "\t\tthe average latency will be 10 ms; but if more than two\n" - "\t\trequests are submitted every 10 ms, the average latency\n" - "\t\twill be more than 10 ms.\n" - "\n" - "\t\tAlso note, these delays are additive. So two invocations\n" - "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n" - "\t\tof '-D 10:2'. This also means, one can specify multiple\n" - "\t\tlanes with differing target latencies. For example, an\n" - "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n" - "\t\tcreate 3 lanes on the device; one lane with a latency\n" - "\t\tof 10 ms and two lanes with a 25 ms latency.\n" - "\n" - "\tzinject -I [-s <seconds> | -g <txgs>] pool\n" - "\n" - "\t\tCause the pool to stop writing blocks yet not\n" - "\t\treport errors for a duration. Simulates buggy hardware\n" - "\t\tthat fails to honor cache flush requests.\n" - "\t\tDefault duration is 30 seconds. The machine is panicked\n" - "\t\tat the end of the duration.\n" - "\n" - "\tzinject -b objset:object:level:blkid pool\n" - "\n" - "\t\tInject an error into pool 'pool' with the numeric bookmark\n" - "\t\tspecified by the remaining tuple. Each number is in\n" - "\t\thexidecimal, and only one block can be specified.\n" - "\n" - "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n" - "\t [-a] [-m] [-u] [-f freq] <object>\n" - "\n" - "\t\tInject an error into the object specified by the '-t' option\n" - "\t\tand the object descriptor. The 'object' parameter is\n" - "\t\tinterperted depending on the '-t' option.\n" - "\n" - "\t\t-q\tQuiet mode. Only print out the handler number added.\n" - "\t\t-e\tInject a specific error. Must be either 'io' or\n" - "\t\t\t'checksum'. Default is 'io'.\n" - "\t\t-l\tInject error at a particular block level. Default is " - "0.\n" - "\t\t-m\tAutomatically remount underlying filesystem.\n" - "\t\t-r\tInject error over a particular logical range of an\n" - "\t\t\tobject. Will be translated to the appropriate blkid\n" - "\t\t\trange according to the object's properties.\n" - "\t\t-a\tFlush the ARC cache. Can be specified without any\n" - "\t\t\tassociated object.\n" - "\t\t-u\tUnload the associated pool. Can be specified with only\n" - "\t\t\ta pool object.\n" - "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" - "\t\t\ta percentage between 1 and 100.\n" - "\n" - "\t-t data\t\tInject an error into the plain file contents of a\n" - "\t\t\tfile. The object must be specified as a complete path\n" - "\t\t\tto a file on a ZFS filesystem.\n" - "\n" - "\t-t dnode\tInject an error into the metadnode in the block\n" - "\t\t\tcorresponding to the dnode for a file or directory. The\n" - "\t\t\t'-r' option is incompatible with this mode. The object\n" - "\t\t\tis specified as a complete path to a file or directory\n" - "\t\t\ton a ZFS filesystem.\n" - "\n" - "\t-t <mos>\tInject errors into the MOS for objects of the given\n" - "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n" - "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n" - "\t\t\tthe poolname.\n"); -} - -static int -iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *), - void *data) -{ - zfs_cmd_t zc = { 0 }; - int ret; - - while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0) - if ((ret = func((int)zc.zc_guid, zc.zc_name, - &zc.zc_inject_record, data)) != 0) - return (ret); - - if (errno != ENOENT) { - (void) fprintf(stderr, "Unable to list handlers: %s\n", - strerror(errno)); - return (-1); - } - - return (0); -} - -static int -print_data_handler(int id, const char *pool, zinject_record_t *record, - void *data) -{ - int *count = data; - - if (record->zi_guid != 0 || record->zi_func[0] != '\0') - return (0); - - if (*count == 0) { - (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n", - "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE"); - (void) printf("--- --------------- ------ " - "------ -------- --- ---------------\n"); - } - - *count += 1; - - (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool, - (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object, - type_to_name(record->zi_type), record->zi_level); - - if (record->zi_start == 0 && - record->zi_end == -1ULL) - (void) printf("all\n"); - else - (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start, - (u_longlong_t)record->zi_end); - - return (0); -} - -static int -print_device_handler(int id, const char *pool, zinject_record_t *record, - void *data) -{ - int *count = data; - - if (record->zi_guid == 0 || record->zi_func[0] != '\0') - return (0); - - if (record->zi_cmd == ZINJECT_DELAY_IO) - return (0); - - if (*count == 0) { - (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); - (void) printf("--- --------------- ----------------\n"); - } - - *count += 1; - - (void) printf("%3d %-15s %llx\n", id, pool, - (u_longlong_t)record->zi_guid); - - return (0); -} - -static int -print_delay_handler(int id, const char *pool, zinject_record_t *record, - void *data) -{ - int *count = data; - - if (record->zi_guid == 0 || record->zi_func[0] != '\0') - return (0); - - if (record->zi_cmd != ZINJECT_DELAY_IO) - return (0); - - if (*count == 0) { - (void) printf("%3s %-15s %-15s %-15s %s\n", - "ID", "POOL", "DELAY (ms)", "LANES", "GUID"); - (void) printf("--- --------------- --------------- " - "--------------- ----------------\n"); - } - - *count += 1; - - (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool, - (u_longlong_t)NSEC2MSEC(record->zi_timer), - (u_longlong_t)record->zi_nlanes, - (u_longlong_t)record->zi_guid); - - return (0); -} - -static int -print_panic_handler(int id, const char *pool, zinject_record_t *record, - void *data) -{ - int *count = data; - - if (record->zi_func[0] == '\0') - return (0); - - if (*count == 0) { - (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION"); - (void) printf("--- --------------- ----------------\n"); - } - - *count += 1; - - (void) printf("%3d %-15s %s\n", id, pool, record->zi_func); - - return (0); -} - -/* - * Print all registered error handlers. Returns the number of handlers - * registered. - */ -static int -print_all_handlers(void) -{ - int count = 0, total = 0; - - (void) iter_handlers(print_device_handler, &count); - if (count > 0) { - total += count; - (void) printf("\n"); - count = 0; - } - - (void) iter_handlers(print_delay_handler, &count); - if (count > 0) { - total += count; - (void) printf("\n"); - count = 0; - } - - (void) iter_handlers(print_data_handler, &count); - if (count > 0) { - total += count; - (void) printf("\n"); - count = 0; - } - - (void) iter_handlers(print_panic_handler, &count); - - return (count + total); -} - -/* ARGSUSED */ -static int -cancel_one_handler(int id, const char *pool, zinject_record_t *record, - void *data) -{ - zfs_cmd_t zc = { 0 }; - - zc.zc_guid = (uint64_t)id; - - if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { - (void) fprintf(stderr, "failed to remove handler %d: %s\n", - id, strerror(errno)); - return (1); - } - - return (0); -} - -/* - * Remove all fault injection handlers. - */ -static int -cancel_all_handlers(void) -{ - int ret = iter_handlers(cancel_one_handler, NULL); - - if (ret == 0) - (void) printf("removed all registered handlers\n"); - - return (ret); -} - -/* - * Remove a specific fault injection handler. - */ -static int -cancel_handler(int id) -{ - zfs_cmd_t zc = { 0 }; - - zc.zc_guid = (uint64_t)id; - - if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) { - (void) fprintf(stderr, "failed to remove handler %d: %s\n", - id, strerror(errno)); - return (1); - } - - (void) printf("removed handler %d\n", id); - - return (0); -} - -/* - * Register a new fault injection handler. - */ -static int -register_handler(const char *pool, int flags, zinject_record_t *record, - int quiet) -{ - zfs_cmd_t zc = { 0 }; - - (void) strcpy(zc.zc_name, pool); - zc.zc_inject_record = *record; - zc.zc_guid = flags; - - if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) { - (void) fprintf(stderr, "failed to add handler: %s\n", - strerror(errno)); - return (1); - } - - if (flags & ZINJECT_NULL) - return (0); - - if (quiet) { - (void) printf("%llu\n", (u_longlong_t)zc.zc_guid); - } else { - (void) printf("Added handler %llu with the following " - "properties:\n", (u_longlong_t)zc.zc_guid); - (void) printf(" pool: %s\n", pool); - if (record->zi_guid) { - (void) printf(" vdev: %llx\n", - (u_longlong_t)record->zi_guid); - } else if (record->zi_func[0] != '\0') { - (void) printf(" panic function: %s\n", - record->zi_func); - } else if (record->zi_duration > 0) { - (void) printf(" time: %lld seconds\n", - (u_longlong_t)record->zi_duration); - } else if (record->zi_duration < 0) { - (void) printf(" txgs: %lld \n", - (u_longlong_t)-record->zi_duration); - } else { - (void) printf("objset: %llu\n", - (u_longlong_t)record->zi_objset); - (void) printf("object: %llu\n", - (u_longlong_t)record->zi_object); - (void) printf(" type: %llu\n", - (u_longlong_t)record->zi_type); - (void) printf(" level: %d\n", record->zi_level); - if (record->zi_start == 0 && - record->zi_end == -1ULL) - (void) printf(" range: all\n"); - else - (void) printf(" range: [%llu, %llu)\n", - (u_longlong_t)record->zi_start, - (u_longlong_t)record->zi_end); - } - } - - return (0); -} - -int -perform_action(const char *pool, zinject_record_t *record, int cmd) -{ - zfs_cmd_t zc = { 0 }; - - ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED); - (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); - zc.zc_guid = record->zi_guid; - zc.zc_cookie = cmd; - - if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) - return (0); - - return (1); -} - -static int -parse_delay(char *str, uint64_t *delay, uint64_t *nlanes) -{ - unsigned long scan_delay; - unsigned long scan_nlanes; - - if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2) - return (1); - - /* - * We explicitly disallow a delay of zero here, because we key - * off this value being non-zero in translate_device(), to - * determine if the fault is a ZINJECT_DELAY_IO fault or not. - */ - if (scan_delay == 0) - return (1); - - /* - * The units for the CLI delay parameter is milliseconds, but - * the data passed to the kernel is interpreted as nanoseconds. - * Thus we scale the milliseconds to nanoseconds here, and this - * nanosecond value is used to pass the delay to the kernel. - */ - *delay = MSEC2NSEC(scan_delay); - *nlanes = scan_nlanes; - - return (0); -} - -int -main(int argc, char **argv) -{ - int c; - char *range = NULL; - char *cancel = NULL; - char *end; - char *raw = NULL; - char *device = NULL; - int level = 0; - int quiet = 0; - int error = 0; - int domount = 0; - int io_type = ZIO_TYPES; - int action = VDEV_STATE_UNKNOWN; - err_type_t type = TYPE_INVAL; - err_type_t label = TYPE_INVAL; - zinject_record_t record = { 0 }; - char pool[MAXNAMELEN]; - char dataset[MAXNAMELEN]; - zfs_handle_t *zhp; - int nowrites = 0; - int dur_txg = 0; - int dur_secs = 0; - int ret; - int flags = 0; - - if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, "internal error: failed to " - "initialize ZFS library\n"); - return (1); - } - - libzfs_print_on_error(g_zfs, B_TRUE); - - if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { - (void) fprintf(stderr, "failed to open ZFS device\n"); - return (1); - } - - if (argc == 1) { - /* - * No arguments. Print the available handlers. If there are no - * available handlers, direct the user to '-h' for help - * information. - */ - if (print_all_handlers() == 0) { - (void) printf("No handlers registered.\n"); - (void) printf("Run 'zinject -h' for usage " - "information.\n"); - } - - return (0); - } - - while ((c = getopt(argc, argv, - ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) { - switch (c) { - case 'a': - flags |= ZINJECT_FLUSH_ARC; - break; - case 'A': - if (strcasecmp(optarg, "degrade") == 0) { - action = VDEV_STATE_DEGRADED; - } else if (strcasecmp(optarg, "fault") == 0) { - action = VDEV_STATE_FAULTED; - } else { - (void) fprintf(stderr, "invalid action '%s': " - "must be 'degrade' or 'fault'\n", optarg); - usage(); - return (1); - } - break; - case 'b': - raw = optarg; - break; - case 'c': - cancel = optarg; - break; - case 'd': - device = optarg; - break; - case 'D': - ret = parse_delay(optarg, &record.zi_timer, - &record.zi_nlanes); - if (ret != 0) { - (void) fprintf(stderr, "invalid i/o delay " - "value: '%s'\n", optarg); - usage(); - return (1); - } - break; - case 'e': - if (strcasecmp(optarg, "io") == 0) { - error = EIO; - } else if (strcasecmp(optarg, "checksum") == 0) { - error = ECKSUM; - } else if (strcasecmp(optarg, "nxio") == 0) { - error = ENXIO; - } else if (strcasecmp(optarg, "dtl") == 0) { - error = ECHILD; - } else { - (void) fprintf(stderr, "invalid error type " - "'%s': must be 'io', 'checksum' or " - "'nxio'\n", optarg); - usage(); - return (1); - } - break; - case 'f': - record.zi_freq = atoi(optarg); - if (record.zi_freq < 1 || record.zi_freq > 100) { - (void) fprintf(stderr, "frequency range must " - "be in the range (0, 100]\n"); - return (1); - } - break; - case 'F': - record.zi_failfast = B_TRUE; - break; - case 'g': - dur_txg = 1; - record.zi_duration = (int)strtol(optarg, &end, 10); - if (record.zi_duration <= 0 || *end != '\0') { - (void) fprintf(stderr, "invalid duration '%s': " - "must be a positive integer\n", optarg); - usage(); - return (1); - } - /* store duration of txgs as its negative */ - record.zi_duration *= -1; - break; - case 'h': - usage(); - return (0); - case 'I': - /* default duration, if one hasn't yet been defined */ - nowrites = 1; - if (dur_secs == 0 && dur_txg == 0) - record.zi_duration = 30; - break; - case 'l': - level = (int)strtol(optarg, &end, 10); - if (*end != '\0') { - (void) fprintf(stderr, "invalid level '%s': " - "must be an integer\n", optarg); - usage(); - return (1); - } - break; - case 'm': - domount = 1; - break; - case 'p': - (void) strlcpy(record.zi_func, optarg, - sizeof (record.zi_func)); - record.zi_cmd = ZINJECT_PANIC; - break; - case 'q': - quiet = 1; - break; - case 'r': - range = optarg; - break; - case 's': - dur_secs = 1; - record.zi_duration = (int)strtol(optarg, &end, 10); - if (record.zi_duration <= 0 || *end != '\0') { - (void) fprintf(stderr, "invalid duration '%s': " - "must be a positive integer\n", optarg); - usage(); - return (1); - } - break; - case 'T': - if (strcasecmp(optarg, "read") == 0) { - io_type = ZIO_TYPE_READ; - } else if (strcasecmp(optarg, "write") == 0) { - io_type = ZIO_TYPE_WRITE; - } else if (strcasecmp(optarg, "free") == 0) { - io_type = ZIO_TYPE_FREE; - } else if (strcasecmp(optarg, "claim") == 0) { - io_type = ZIO_TYPE_CLAIM; - } else if (strcasecmp(optarg, "all") == 0) { - io_type = ZIO_TYPES; - } else { - (void) fprintf(stderr, "invalid I/O type " - "'%s': must be 'read', 'write', 'free', " - "'claim' or 'all'\n", optarg); - usage(); - return (1); - } - break; - case 't': - if ((type = name_to_type(optarg)) == TYPE_INVAL && - !MOS_TYPE(type)) { - (void) fprintf(stderr, "invalid type '%s'\n", - optarg); - usage(); - return (1); - } - break; - case 'u': - flags |= ZINJECT_UNLOAD_SPA; - break; - case 'L': - if ((label = name_to_type(optarg)) == TYPE_INVAL && - !LABEL_TYPE(type)) { - (void) fprintf(stderr, "invalid label type " - "'%s'\n", optarg); - usage(); - return (1); - } - break; - case ':': - (void) fprintf(stderr, "option -%c requires an " - "operand\n", optopt); - usage(); - return (1); - case '?': - (void) fprintf(stderr, "invalid option '%c'\n", - optopt); - usage(); - return (2); - } - } - - argc -= optind; - argv += optind; - - if (record.zi_duration != 0) - record.zi_cmd = ZINJECT_IGNORED_WRITES; - - if (cancel != NULL) { - /* - * '-c' is invalid with any other options. - */ - if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) { - (void) fprintf(stderr, "cancel (-c) incompatible with " - "any other options\n"); - usage(); - return (2); - } - if (argc != 0) { - (void) fprintf(stderr, "extraneous argument to '-c'\n"); - usage(); - return (2); - } - - if (strcmp(cancel, "all") == 0) { - return (cancel_all_handlers()); - } else { - int id = (int)strtol(cancel, &end, 10); - if (*end != '\0') { - (void) fprintf(stderr, "invalid handle id '%s':" - " must be an integer or 'all'\n", cancel); - usage(); - return (1); - } - return (cancel_handler(id)); - } - } - - if (device != NULL) { - /* - * Device (-d) injection uses a completely different mechanism - * for doing injection, so handle it separately here. - */ - if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) { - (void) fprintf(stderr, "device (-d) incompatible with " - "data error injection\n"); - usage(); - return (2); - } - - if (argc != 1) { - (void) fprintf(stderr, "device (-d) injection requires " - "a single pool name\n"); - usage(); - return (2); - } - - (void) strcpy(pool, argv[0]); - dataset[0] = '\0'; - - if (error == ECKSUM) { - (void) fprintf(stderr, "device error type must be " - "'io' or 'nxio'\n"); - return (1); - } - - record.zi_iotype = io_type; - if (translate_device(pool, device, label, &record) != 0) - return (1); - if (!error) - error = ENXIO; - - if (action != VDEV_STATE_UNKNOWN) - return (perform_action(pool, &record, action)); - - } else if (raw != NULL) { - if (range != NULL || type != TYPE_INVAL || level != 0 || - record.zi_cmd != ZINJECT_UNINITIALIZED) { - (void) fprintf(stderr, "raw (-b) format with " - "any other options\n"); - usage(); - return (2); - } - - if (argc != 1) { - (void) fprintf(stderr, "raw (-b) format expects a " - "single pool name\n"); - usage(); - return (2); - } - - (void) strcpy(pool, argv[0]); - dataset[0] = '\0'; - - if (error == ENXIO) { - (void) fprintf(stderr, "data error type must be " - "'checksum' or 'io'\n"); - return (1); - } - - record.zi_cmd = ZINJECT_DATA_FAULT; - if (translate_raw(raw, &record) != 0) - return (1); - if (!error) - error = EIO; - } else if (record.zi_cmd == ZINJECT_PANIC) { - if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0 || device != NULL) { - (void) fprintf(stderr, "panic (-p) incompatible with " - "other options\n"); - usage(); - return (2); - } - - if (argc < 1 || argc > 2) { - (void) fprintf(stderr, "panic (-p) injection requires " - "a single pool name and an optional id\n"); - usage(); - return (2); - } - - (void) strcpy(pool, argv[0]); - if (argv[1] != NULL) - record.zi_type = atoi(argv[1]); - dataset[0] = '\0'; - } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) { - if (nowrites == 0) { - (void) fprintf(stderr, "-s or -g meaningless " - "without -I (ignore writes)\n"); - usage(); - return (2); - } else if (dur_secs && dur_txg) { - (void) fprintf(stderr, "choose a duration either " - "in seconds (-s) or a number of txgs (-g) " - "but not both\n"); - usage(); - return (2); - } else if (argc != 1) { - (void) fprintf(stderr, "ignore writes (-I) " - "injection requires a single pool name\n"); - usage(); - return (2); - } - - (void) strcpy(pool, argv[0]); - dataset[0] = '\0'; - } else if (type == TYPE_INVAL) { - if (flags == 0) { - (void) fprintf(stderr, "at least one of '-b', '-d', " - "'-t', '-a', '-p', '-I' or '-u' " - "must be specified\n"); - usage(); - return (2); - } - - if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) { - (void) strcpy(pool, argv[0]); - dataset[0] = '\0'; - } else if (argc != 0) { - (void) fprintf(stderr, "extraneous argument for " - "'-f'\n"); - usage(); - return (2); - } - - flags |= ZINJECT_NULL; - } else { - if (argc != 1) { - (void) fprintf(stderr, "missing object\n"); - usage(); - return (2); - } - - if (error == ENXIO) { - (void) fprintf(stderr, "data error type must be " - "'checksum' or 'io'\n"); - return (1); - } - - record.zi_cmd = ZINJECT_DATA_FAULT; - if (translate_record(type, argv[0], range, level, &record, pool, - dataset) != 0) - return (1); - if (!error) - error = EIO; - } - - /* - * If this is pool-wide metadata, unmount everything. The ioctl() will - * unload the pool, so that we trigger spa-wide reopen of metadata next - * time we access the pool. - */ - if (dataset[0] != '\0' && domount) { - if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) - return (1); - - if (zfs_unmount(zhp, NULL, 0) != 0) - return (1); - } - - record.zi_error = error; - - ret = register_handler(pool, flags, &record, quiet); - - if (dataset[0] != '\0' && domount) - ret = (zfs_mount(zhp, NULL, 0) != 0); - - libzfs_fini(g_zfs); - - return (ret); -} diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.h b/cddl/contrib/opensolaris/cmd/zinject/zinject.h deleted file mode 100644 index 46fdcad8b31f..000000000000 --- a/cddl/contrib/opensolaris/cmd/zinject/zinject.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _ZINJECT_H -#define _ZINJECT_H - -#include <sys/zfs_ioctl.h> - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - TYPE_DATA, /* plain file contents */ - TYPE_DNODE, /* metadnode contents */ - TYPE_MOS, /* all MOS data */ - TYPE_MOSDIR, /* MOS object directory */ - TYPE_METASLAB, /* metaslab objects */ - TYPE_CONFIG, /* MOS config */ - TYPE_BPOBJ, /* block pointer list */ - TYPE_SPACEMAP, /* space map objects */ - TYPE_ERRLOG, /* persistent error log */ - TYPE_LABEL_UBERBLOCK, /* label specific uberblock */ - TYPE_LABEL_NVLIST, /* label specific nvlist */ - TYPE_LABEL_PAD1, /* label specific 8K pad1 area */ - TYPE_LABEL_PAD2, /* label specific 8K pad2 area */ - TYPE_INVAL -} err_type_t; - -#define MOS_TYPE(t) \ - ((t) >= TYPE_MOS && (t) < TYPE_LABEL_UBERBLOCK) - -#define LABEL_TYPE(t) \ - ((t) >= TYPE_LABEL_UBERBLOCK && (t) < TYPE_INVAL) - -int translate_record(err_type_t type, const char *object, const char *range, - int level, zinject_record_t *record, char *poolname, char *dataset); -int translate_raw(const char *raw, zinject_record_t *record); -int translate_device(const char *pool, const char *device, - err_type_t label_type, zinject_record_t *record); -void usage(void); - -extern libzfs_handle_t *g_zfs; - -#ifdef __cplusplus -} -#endif - -#endif /* _ZINJECT_H */ diff --git a/cddl/contrib/opensolaris/cmd/zlook/zlook.c b/cddl/contrib/opensolaris/cmd/zlook/zlook.c deleted file mode 100644 index 29a6559f9023..000000000000 --- a/cddl/contrib/opensolaris/cmd/zlook/zlook.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This is a test program that uses ioctls to the ZFS Unit Test driver - * to perform readdirs or lookups using flags not normally available - * to user-land programs. This allows testing of the flags' - * behavior outside of a complicated consumer, such as the SMB driver. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <stropts.h> -#include <errno.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/dirent.h> -#include <sys/attr.h> -#include <stddef.h> -#include <fcntl.h> -#include <string.h> -#include <time.h> - -#define _KERNEL - -#include <sys/fs/zut.h> -#include <sys/extdirent.h> - -#undef _KERNEL - -#define MAXBUF (64 * 1024) -#define BIGBUF 4096 -#define LILBUF (sizeof (dirent_t)) - -#define DIRENT_NAMELEN(reclen) \ - ((reclen) - (offsetof(dirent_t, d_name[0]))) - -static void -usage(char *pnam) -{ - (void) fprintf(stderr, "Usage:\n %s -l [-is] dir-to-look-in " - "file-in-dir [xfile-on-file]\n", pnam); - (void) fprintf(stderr, " %s -i [-ls] dir-to-look-in " - "file-in-dir [xfile-on-file]\n", pnam); - (void) fprintf(stderr, " %s -s [-il] dir-to-look-in " - "file-in-dir [xfile-on-file]\n", pnam); - (void) fprintf(stderr, "\t Perform a lookup\n"); - (void) fprintf(stderr, "\t -l == lookup\n"); - (void) fprintf(stderr, "\t -i == request FIGNORECASE\n"); - (void) fprintf(stderr, "\t -s == request stat(2) and xvattr info\n"); - (void) fprintf(stderr, " %s -r [-ea] [-b buffer-size-in-bytes] " - "dir-to-look-in [file-in-dir]\n", pnam); - (void) fprintf(stderr, " %s -e [-ra] [-b buffer-size-in-bytes] " - "dir-to-look-in [file-in-dir]\n", pnam); - (void) fprintf(stderr, " %s -a [-re] [-b buffer-size-in-bytes] " - "dir-to-look-in [file-in-dir]\n", pnam); - (void) fprintf(stderr, "\t Perform a readdir\n"); - (void) fprintf(stderr, "\t -r == readdir\n"); - (void) fprintf(stderr, "\t -e == request extended entries\n"); - (void) fprintf(stderr, "\t -a == request access filtering\n"); - (void) fprintf(stderr, "\t -b == buffer size (default 4K)\n"); - (void) fprintf(stderr, " %s -A path\n", pnam); - (void) fprintf(stderr, "\t Look up _PC_ACCESS_FILTERING " - "for path with pathconf(2)\n"); - (void) fprintf(stderr, " %s -E path\n", pnam); - (void) fprintf(stderr, "\t Look up _PC_SATTR_EXISTS " - "for path with pathconf(2)\n"); - (void) fprintf(stderr, " %s -S path\n", pnam); - (void) fprintf(stderr, "\t Look up _PC_SATTR_EXISTS " - "for path with pathconf(2)\n"); - exit(EINVAL); -} - -static void -print_extd_entries(zut_readdir_t *r) -{ - struct edirent *eodp; - char *bufstart; - - eodp = (edirent_t *)(uintptr_t)r->zr_buf; - bufstart = (char *)eodp; - while ((char *)eodp < bufstart + r->zr_bytes) { - char *blanks = " "; - int i = 0; - while (i < EDIRENT_NAMELEN(eodp->ed_reclen)) { - if (!eodp->ed_name[i]) - break; - (void) printf("%c", eodp->ed_name[i++]); - } - if (i < 16) - (void) printf("%.*s", 16 - i, blanks); - (void) printf("\t%x\n", eodp->ed_eflags); - eodp = (edirent_t *)((intptr_t)eodp + eodp->ed_reclen); - } -} - -static void -print_entries(zut_readdir_t *r) -{ - dirent64_t *dp; - char *bufstart; - - dp = (dirent64_t *)(intptr_t)r->zr_buf; - bufstart = (char *)dp; - while ((char *)dp < bufstart + r->zr_bytes) { - int i = 0; - while (i < DIRENT_NAMELEN(dp->d_reclen)) { - if (!dp->d_name[i]) - break; - (void) printf("%c", dp->d_name[i++]); - } - (void) printf("\n"); - dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); - } -} - -static void -print_stats(struct stat64 *sb) -{ - char timebuf[512]; - - (void) printf("st_mode\t\t\t%04lo\n", (unsigned long)sb->st_mode); - (void) printf("st_ino\t\t\t%llu\n", (unsigned long long)sb->st_ino); - (void) printf("st_nlink\t\t%lu\n", (unsigned long)sb->st_nlink); - (void) printf("st_uid\t\t\t%d\n", sb->st_uid); - (void) printf("st_gid\t\t\t%d\n", sb->st_gid); - (void) printf("st_size\t\t\t%lld\n", (long long)sb->st_size); - (void) printf("st_blksize\t\t%ld\n", (long)sb->st_blksize); - (void) printf("st_blocks\t\t%lld\n", (long long)sb->st_blocks); - - timebuf[0] = 0; - if (ctime_r(&sb->st_atime, timebuf, 512)) { - (void) printf("st_atime\t\t"); - (void) printf("%s", timebuf); - } - timebuf[0] = 0; - if (ctime_r(&sb->st_mtime, timebuf, 512)) { - (void) printf("st_mtime\t\t"); - (void) printf("%s", timebuf); - } - timebuf[0] = 0; - if (ctime_r(&sb->st_ctime, timebuf, 512)) { - (void) printf("st_ctime\t\t"); - (void) printf("%s", timebuf); - } -} - -static void -print_xvs(uint64_t xvs) -{ - uint_t bits; - int idx = 0; - - if (xvs == 0) - return; - - (void) printf("-------------------\n"); - (void) printf("Attribute bit(s) set:\n"); - (void) printf("-------------------\n"); - - bits = xvs & ((1 << F_ATTR_ALL) - 1); - while (bits) { - uint_t rest = bits >> 1; - if (bits & 1) { - (void) printf("%s", attr_to_name((f_attr_t)idx)); - if (rest) - (void) printf(", "); - } - idx++; - bits = rest; - } - (void) printf("\n"); -} - -int -main(int argc, char **argv) -{ - zut_lookup_t lk = {0}; - zut_readdir_t rd = {0}; - boolean_t checking = B_FALSE; - boolean_t looking = B_FALSE; - boolean_t reading = B_FALSE; - boolean_t bflag = B_FALSE; - long rddir_bufsize = BIGBUF; - int error = 0; - int check; - int fd; - int c; - - while ((c = getopt(argc, argv, "lisaerb:ASE")) != -1) { - switch (c) { - case 'l': - looking = B_TRUE; - break; - case 'i': - lk.zl_reqflags |= ZUT_IGNORECASE; - looking = B_TRUE; - break; - case 's': - lk.zl_reqflags |= ZUT_GETSTAT; - looking = B_TRUE; - break; - case 'a': - rd.zr_reqflags |= ZUT_ACCFILTER; - reading = B_TRUE; - break; - case 'e': - rd.zr_reqflags |= ZUT_EXTRDDIR; - reading = B_TRUE; - break; - case 'r': - reading = B_TRUE; - break; - case 'b': - reading = B_TRUE; - bflag = B_TRUE; - rddir_bufsize = strtol(optarg, NULL, 0); - break; - case 'A': - checking = B_TRUE; - check = _PC_ACCESS_FILTERING; - break; - case 'S': - checking = B_TRUE; - check = _PC_SATTR_ENABLED; - break; - case 'E': - checking = B_TRUE; - check = _PC_SATTR_EXISTS; - break; - case '?': - default: - usage(argv[0]); /* no return */ - } - } - - if ((checking && looking) || (checking && reading) || - (looking && reading) || (!reading && bflag) || - (!checking && !reading && !looking)) - usage(argv[0]); /* no return */ - - if (rddir_bufsize < LILBUF || rddir_bufsize > MAXBUF) { - (void) fprintf(stderr, "Sorry, buffer size " - "must be >= %d and less than or equal to %d bytes.\n", - (int)LILBUF, MAXBUF); - exit(EINVAL); - } - - if (checking) { - char pathbuf[MAXPATHLEN]; - long result; - - if (argc - optind < 1) - usage(argv[0]); /* no return */ - (void) strlcpy(pathbuf, argv[optind], MAXPATHLEN); - result = pathconf(pathbuf, check); - (void) printf("pathconf(2) check for %s\n", pathbuf); - switch (check) { - case _PC_SATTR_ENABLED: - (void) printf("System attributes "); - if (result != 0) - (void) printf("Enabled\n"); - else - (void) printf("Not enabled\n"); - break; - case _PC_SATTR_EXISTS: - (void) printf("System attributes "); - if (result != 0) - (void) printf("Exist\n"); - else - (void) printf("Do not exist\n"); - break; - case _PC_ACCESS_FILTERING: - (void) printf("Access filtering "); - if (result != 0) - (void) printf("Available\n"); - else - (void) printf("Not available\n"); - break; - } - return (result); - } - - if ((fd = open(ZUT_DEV, O_RDONLY)) < 0) { - perror(ZUT_DEV); - return (ENXIO); - } - - if (reading) { - char *buf; - - if (argc - optind < 1) - usage(argv[0]); /* no return */ - - (void) strlcpy(rd.zr_dir, argv[optind], MAXPATHLEN); - if (argc - optind > 1) { - (void) strlcpy(rd.zr_file, argv[optind + 1], - MAXNAMELEN); - rd.zr_reqflags |= ZUT_XATTR; - } - - if ((buf = malloc(rddir_bufsize)) == NULL) { - error = errno; - perror("malloc"); - (void) close(fd); - return (error); - } - - rd.zr_buf = (uint64_t)(uintptr_t)buf; - rd.zr_buflen = rddir_bufsize; - - while (!rd.zr_eof) { - int ierr; - - if ((ierr = ioctl(fd, ZUT_IOC_READDIR, &rd)) != 0) { - (void) fprintf(stderr, - "IOCTL error: %s (%d)\n", - strerror(ierr), ierr); - free(buf); - (void) close(fd); - return (ierr); - } - if (rd.zr_retcode) { - (void) fprintf(stderr, - "readdir result: %s (%d)\n", - strerror(rd.zr_retcode), rd.zr_retcode); - free(buf); - (void) close(fd); - return (rd.zr_retcode); - } - if (rd.zr_reqflags & ZUT_EXTRDDIR) - print_extd_entries(&rd); - else - print_entries(&rd); - } - free(buf); - } else { - int ierr; - - if (argc - optind < 2) - usage(argv[0]); /* no return */ - - (void) strlcpy(lk.zl_dir, argv[optind], MAXPATHLEN); - (void) strlcpy(lk.zl_file, argv[optind + 1], MAXNAMELEN); - if (argc - optind > 2) { - (void) strlcpy(lk.zl_xfile, - argv[optind + 2], MAXNAMELEN); - lk.zl_reqflags |= ZUT_XATTR; - } - - if ((ierr = ioctl(fd, ZUT_IOC_LOOKUP, &lk)) != 0) { - (void) fprintf(stderr, - "IOCTL error: %s (%d)\n", - strerror(ierr), ierr); - (void) close(fd); - return (ierr); - } - - (void) printf("\nLookup of "); - if (lk.zl_reqflags & ZUT_XATTR) { - (void) printf("extended attribute \"%s\" of ", - lk.zl_xfile); - } - (void) printf("file \"%s\" ", lk.zl_file); - (void) printf("in directory \"%s\" ", lk.zl_dir); - if (lk.zl_retcode) { - (void) printf("failed: %s (%d)\n", - strerror(lk.zl_retcode), lk.zl_retcode); - (void) close(fd); - return (lk.zl_retcode); - } - - (void) printf("succeeded.\n"); - if (lk.zl_reqflags & ZUT_IGNORECASE) { - (void) printf("----------------------------\n"); - (void) printf("dirent flags: 0x%0x\n", lk.zl_deflags); - (void) printf("real name: %s\n", lk.zl_real); - } - if (lk.zl_reqflags & ZUT_GETSTAT) { - (void) printf("----------------------------\n"); - print_stats(&lk.zl_statbuf); - print_xvs(lk.zl_xvattrs); - } - } - - (void) close(fd); - return (0); -} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 b/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 deleted file mode 100644 index 980d4da0e31b..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 +++ /dev/null @@ -1,674 +0,0 @@ -'\" te -.\" Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. -.\" All Rights Reserved. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" Copyright (c) 2012, 2017 by Delphix. All rights reserved. -.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. -.\" Copyright (c) 2013, Joyent, Inc. All rights reserved. -.\" -.\" $FreeBSD$ -.\" -.Dd August 16, 2019 -.Dt ZPOOL-FEATURES 7 -.Os -.Sh NAME -.Nm zpool-features -.Nd ZFS pool feature descriptions -.Sh DESCRIPTION -ZFS pool on\-disk format versions are specified via "features" which replace -the old on\-disk format numbers (the last supported on\-disk format number is -28). -To enable a feature on a pool use the -.Cm upgrade -subcommand of the -.Xr zpool 8 -command, or set the -.Sy feature@feature_name -property to -.Ar enabled . -.Pp -The pool format does not affect file system version compatibility or the ability -to send file systems between pools. -.Pp -Since most features can be enabled independently of each other the on\-disk -format of the pool is specified by the set of all features marked as -.Sy active -on the pool. -If the pool was created by another software version this set may -include unsupported features. -.Ss Identifying features -Every feature has a guid of the form -.Sy com.example:feature_name . -The reverse DNS name ensures that the feature's guid is unique across all ZFS -implementations. -When unsupported features are encountered on a pool they will -be identified by their guids. -Refer to the documentation for the ZFS implementation that created the pool -for information about those features. -.Pp -Each supported feature also has a short name. -By convention a feature's short name is the portion of its guid which follows -the ':' (e.g. -.Sy com.example:feature_name -would have the short name -.Sy feature_name ), -however a feature's short name may differ across ZFS implementations if -following the convention would result in name conflicts. -.Ss Feature states -Features can be in one of three states: -.Bl -tag -width "XXXXXXXX" -.It Sy active -This feature's on\-disk format changes are in effect on the pool. -Support for this feature is required to import the pool in read\-write mode. -If this feature is not read-only compatible, support is also required to -import the pool in read\-only mode (see "Read\-only compatibility"). -.It Sy enabled -An administrator has marked this feature as enabled on the pool, but the -feature's on\-disk format changes have not been made yet. -The pool can still be imported by software that does not support this feature, -but changes may be made to the on\-disk format at any time which will move -the feature to the -.Sy active -state. -Some features may support returning to the -.Sy enabled -state after becoming -.Sy active . -See feature\-specific documentation for details. -.It Sy disabled -This feature's on\-disk format changes have not been made and will not be made -unless an administrator moves the feature to the -.Sy enabled -state. -Features cannot be disabled once they have been enabled. -.El -.Pp -The state of supported features is exposed through pool properties of the form -.Sy feature@short_name . -.Ss Read\-only compatibility -Some features may make on\-disk format changes that do not interfere with other -software's ability to read from the pool. -These features are referred to as "read\-only compatible". -If all unsupported features on a pool are read\-only compatible, the pool can -be imported in read\-only mode by setting the -.Sy readonly -property during import (see -.Xr zpool 8 -for details on importing pools). -.Ss Unsupported features -For each unsupported feature enabled on an imported pool a pool property -named -.Sy unsupported@feature_guid -will indicate why the import was allowed despite the unsupported feature. -Possible values for this property are: -.Bl -tag -width "XXXXXXXX" -.It Sy inactive -The feature is in the -.Sy enabled -state and therefore the pool's on\-disk format is still compatible with -software that does not support this feature. -.It Sy readonly -The feature is read\-only compatible and the pool has been imported in -read\-only mode. -.El -.Ss Feature dependencies -Some features depend on other features being enabled in order to function -properly. -Enabling a feature will automatically enable any features it depends on. -.Sh FEATURES -The following features are supported on this system: -.Bl -tag -width "XXXXXXXX" -.It Sy async_destroy -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:async_destroy" -.It GUID Ta com.delphix:async_destroy -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -Destroying a file system requires traversing all of its data in order to -return its used space to the pool. -Without -.Sy async_destroy -the file system is not fully removed until all space has been reclaimed. -If the destroy operation is interrupted by a reboot or power outage the next -attempt to open the pool will need to complete the destroy operation -synchronously. -.Pp -When -.Sy async_destroy -is enabled the file system's data will be reclaimed by a background process, -allowing the destroy operation to complete without traversing the entire file -system. -The background process is able to resume interrupted destroys after the pool -has been opened, eliminating the need to finish interrupted destroys as part -of the open operation. -The amount of space remaining to be reclaimed by the background process is -available through the -.Sy freeing -property. -.Pp -This feature is only -.Sy active -while -.Sy freeing -is non\-zero. -.It Sy empty_bpobj -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:empty_bpobj" -.It GUID Ta com.delphix:empty_bpobj -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -This feature increases the performance of creating and using a large number -of snapshots of a single filesystem or volume, and also reduces the disk -space required. -.Pp -When there are many snapshots, each snapshot uses many Block Pointer Objects -.Pq bpobj's -to track blocks associated with that snapshot. -However, in common use cases, most of these bpobj's are empty. -This feature allows us to create each bpobj on-demand, thus eliminating the -empty bpobjs. -.Pp -This feature is -.Sy active -while there are any filesystems, volumes, or snapshots which were created -after enabling this feature. -.It Sy filesystem_limits -.Bl -column "READ\-ONLY COMPATIBLE" "com.joyent:filesystem_limits" -.It GUID Ta com.joyent:filesystem_limits -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta extensible_dataset -.El -.Pp -This feature enables filesystem and snapshot limits. -These limits can be used -to control how many filesystems and/or snapshots can be created at the point in -the tree on which the limits are set. -.Pp -This feature is -.Sy active -once either of the limit properties has been -set on a dataset. -Once activated the feature is never deactivated. -.It Sy lz4_compress -.Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:lz4_compress" -.It GUID Ta org.illumos:lz4_compress -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta none -.El -.Pp -.Sy lz4 -is a high-performance real-time compression algorithm that -features significantly faster compression and decompression as well as a -higher compression ratio than the older -.Sy lzjb -compression. -Typically, -.Sy lz4 -compression is approximately 50% faster on -compressible data and 200% faster on incompressible data than -.Sy lzjb . -It is also approximately 80% faster on decompression, while -giving approximately 10% better compression ratio. -.Pp -When the -.Sy lz4_compress -feature is set to -.Sy enabled , -the -administrator can turn on -.Sy lz4 -compression on any dataset on the -pool using the -.Xr zfs 8 -command. -Also, all newly written metadata -will be compressed with -.Sy lz4 -algorithm. -Since this feature is not read-only compatible, this -operation will render the pool unimportable on systems without support -for the -.Sy lz4_compress -feature. -Booting off of -.Sy lz4 --compressed root pools is supported. -.Pp -This feature becomes -.Sy active -as soon as it is enabled and will -never return to being -.Sy enabled . -.It Sy multi_vdev_crash_dump -.Bl -column "READ\-ONLY COMPATIBLE" "com.joyent:multi_vdev_crash_dump" -.It GUID Ta com.joyent:multi_vdev_crash_dump -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta none -.El -.Pp -This feature allows a dump device to be configured with a pool comprised -of multiple vdevs. -Those vdevs may be arranged in any mirrored or raidz -configuration. -.\" TODO: this is not yet supported on FreeBSD. -.\" .Pp -.\" When the -.\" .Sy multi_vdev_crash_dump -.\" feature is set to -.\" .Sy enabled , -.\" the administrator can use the -.\" .Xr dumpon 8 -.\" command to configure a -.\" dump device on a pool comprised of multiple vdevs. -.It Sy spacemap_histogram -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:spacemap_histogram" -.It GUID Ta com.delphix:spacemap_histogram -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -This feature allows ZFS to maintain more information about how free space -is organized within the pool. -If this feature is -.Sy enabled , -ZFS will -set this feature to -.Sy active -when a new space map object is created or -an existing space map is upgraded to the new format. -Once the feature is -.Sy active , -it will remain in that state until the pool is destroyed. -.It Sy extensible_dataset -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:extensible_dataset" -.It GUID Ta com.delphix:extensible_dataset -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta none -.El -.Pp -This feature allows more flexible use of internal ZFS data structures, -and exists for other features to depend on. -.Pp -This feature will be -.Sy active -when the first dependent feature uses it, -and will be returned to the -.Sy enabled -state when all datasets that use -this feature are destroyed. -.It Sy bookmarks -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:bookmarks" -.It GUID Ta com.delphix:bookmarks -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta extensible_dataset -.El -.Pp -This feature enables use of the -.Nm zfs -.Cm bookmark -subcommand. -.Pp -This feature is -.Sy active -while any bookmarks exist in the pool. -All bookmarks in the pool can be listed by running -.Nm zfs -.Cm list -.Fl t No bookmark Fl r Ar poolname . -.It Sy enabled_txg -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:enabled_txg" -.It GUID Ta com.delphix:enabled_txg -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -Once this feature is enabled ZFS records the transaction group number -in which new features are enabled. -This has no user-visible impact, -but other features may depend on this feature. -.Pp -This feature becomes -.Sy active -as soon as it is enabled and will -never return to being -.Sy enabled . -.It Sy hole_birth -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:hole_birth" -.It GUID Ta com.delphix:hole_birth -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta enabled_txg -.El -.Pp -This feature improves performance of incremental sends -.Pq Dq zfs send -i -and receives for objects with many holes. -The most common case of -hole-filled objects is zvols. -.Pp -An incremental send stream from snapshot -.Sy A -to snapshot -.Sy B -contains information about every block that changed between -.Sy A -and -.Sy B . -Blocks which did not change between those snapshots can be -identified and omitted from the stream using a piece of metadata called -the 'block birth time', but birth times are not recorded for holes -.Pq blocks filled only with zeroes . -Since holes created after -.Sy A -cannot be -distinguished from holes created before -.Sy A , -information about every -hole in the entire filesystem or zvol is included in the send stream. -.Pp -For workloads where holes are rare this is not a problem. -However, when -incrementally replicating filesystems or zvols with many holes -.Pq for example a zvol formatted with another filesystem -a lot of time will -be spent sending and receiving unnecessary information about holes that -already exist on the receiving side. -.Pp -Once the -.Sy hole_birth -feature has been enabled the block birth times -of all new holes will be recorded. -Incremental sends between snapshots -created after this feature is enabled will use this new metadata to avoid -sending information about holes that already exist on the receiving side. -.Pp -This feature becomes -.Sy active -as soon as it is enabled and will -never return to being -.Sy enabled . -.It Sy embedded_data -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:embedded_data" -.It GUID Ta com.delphix:embedded_data -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta none -.El -.Pp -This feature improves the performance and compression ratio of -highly-compressible blocks. -Blocks whose contents can compress to 112 bytes -or smaller can take advantage of this feature. -.Pp -When this feature is enabled, the contents of highly-compressible blocks are -stored in the block "pointer" itself -.Po a misnomer in this case, as it contains -the compressed data, rather than a pointer to its location on disk -.Pc . -Thus -the space of the block -.Pq one sector, typically 512 bytes or 4KB -is saved, -and no additional i/o is needed to read and write the data block. -.Pp -This feature becomes -.Sy active -as soon as it is enabled and will -never return to being -.Sy enabled . -.It Sy zpool_checkpoint -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:zpool_checkpoint" -.It GUID Ta com.delphix:zpool_checkpoint -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -This feature enables the "zpool checkpoint" subcommand that can -checkpoint the state of the pool at the time it was issued and later -rewind back to it or discard it. -.Pp -This feature becomes -.Sy active -when the "zpool checkpoint" command is used to checkpoint the pool. -The feature will only return back to being -.Sy enabled -when the pool is rewound or the checkpoint has been discarded. -.It Sy device_removal -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:device_removal" -.It GUID Ta com.delphix:device_removal -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta none -.El -.Pp -This feature enables the "zpool remove" subcommand to remove top-level -vdevs, evacuating them to reduce the total size of the pool. -.Pp -This feature becomes -.Sy active -when the "zpool remove" command is used -on a top-level vdev, and will never return to being -.Sy enabled . -.It Sy obsolete_counts -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:obsolete_counts" -.It GUID Ta com.delphix:obsolete_counts -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta device_removal -.El -.Pp -This feature is an enhancement of device_removal, which will over time -reduce the memory used to track removed devices. When indirect blocks -are freed or remapped, we note that their part of the indirect mapping -is "obsolete", i.e. no longer needed. See also the "zfs remap" -subcommand in -.Xr zfs 8 . - -This feature becomes -.Sy active -when the "zpool remove" command is -used on a top-level vdev, and will never return to being -.Sy enabled . -.It Sy spacemap_v2 -.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:spacemap_v2" -.It GUID Ta com.delphix:spacemap_v2 -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -This feature enables the use of the new space map encoding which -consists of two words (instead of one) whenever it is advantageous. -The new encoding allows space maps to represent large regions of -space more efficiently on-disk while also increasing their maximum -addressable offset. -.Pp -This feature becomes -.Sy active -as soon as it is enabled and will -never return to being -.Sy enabled . -.It Sy large_blocks -.Bl -column "READ\-ONLY COMPATIBLE" "org.open-zfs:large_block" -.It GUID Ta org.open-zfs:large_block -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta extensible_dataset -.El -.Pp -The -.Sy large_block -feature allows the record size on a dataset to be -set larger than 128KB. -.Pp -This feature becomes -.Sy active -once a -.Sy recordsize -property has been set larger than 128KB, and will return to being -.Sy enabled -once all filesystems that have ever had their recordsize larger than 128KB -are destroyed. -.Pp -Booting from datasets that use the -.Sy large_block -feature is supported by the -.Fx -boot loader. -.It Sy large_dnode -.Bl -column "READ\-ONLY COMPATIBLE" "org.zfsonlinux:large_dnode" -.It GUID Ta org.zfsonlinux:large_dnode -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta extensible_dataset -.El -.Pp -The -.Sy large_dnode -feature allows the size of dnodes in a dataset to be set larger than 512B. -.Pp -This feature becomes -.Sy active -once a dataset contains an object with a dnode larger than 512B, -which occurs as a result of setting the -.Sy dnodesize -dataset property to a value other than -.Sy legacy . -The feature will return to being -.Sy enabled -once all filesystems that have ever contained a dnode larger than 512B are -destroyed. -Large dnodes allow more data to be stored in the bonus buffer, thus potentially -improving performance by avoiding the use of spill blocks. -.It Sy sha512 -.Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:sha512" -.It GUID Ta org.illumos:sha512 -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta extensible_dataset -.El -.Pp -The -.Sy sha512 -feature enables the use of the SHA-512/256 truncated hash algorithm -.Pq FIPS 180-4 -for checksum and dedup. -The native 64-bit arithmetic of SHA-512 provides an approximate 50% -performance boost over SHA-256 on 64-bit hardware and is thus a good -minimum-change replacement candidate for systems where hash performance is -important, but these systems cannot for whatever reason utilize the faster -.Sy skein -algorithms. -.Pp -When the -.Sy sha512 -feature is set to -.Sy enabled , -the administrator can turn on the -.Sy sha512 -checksum on any dataset using the -.Dl # zfs set checksum=sha512 Ar dataset -command. -This feature becomes -.Sy active -once a -.Sy checksum -property has been set to -.Sy sha512 , -and will return to being -.Sy enabled -once all filesystems that have ever had their checksum set to -.Sy sha512 -are destroyed. -.It Sy skein -.Bl -column "READ\-ONLY COMPATIBLE" "org.illumos:skein" -.It GUID Ta org.illumos:skein -.It READ\-ONLY COMPATIBLE Ta no -.It DEPENDENCIES Ta extensible_dataset -.El -.Pp -The -.Sy skein -feature enables the use of the Skein hash algorithm for checksum and dedup. -Skein is a high-performance secure hash algorithm that was a finalist in the -NIST SHA-3 competition. -It provides a very high security margin and high performance on 64-bit hardware -.Pq 80% faster than SHA-256 . -This implementation also utilizes the new salted checksumming functionality in -ZFS, which means that the checksum is pre-seeded with a secret 256-bit random -key -.Pq stored on the pool -before being fed the data block to be checksummed. -Thus the produced checksums are unique to a given pool, preventing hash -collision attacks on systems with dedup. -.Pp -When the -.Sy skein -feature is set to -.Sy enabled , -the administrator can turn on the -.Sy skein -checksum on any dataset using the -.Dl # zfs set checksum=skein Ar dataset -command. -This feature becomes -.Sy active -once a -.Sy checksum -property has been set to -.Sy skein , -and will return to being -.Sy enabled -once all filesystems that have ever had their checksum set to -.Sy skein -are destroyed. -Booting off of pools using -.Sy skein -is supported. -.It Sy allocation_classes -.Bl -column "READ\-ONLY COMPATIBLE" "com.intel:allocation_classes" -.It GUID Ta com.intel:allocation_classes -.It READ\-ONLY COMPATIBLE Ta yes -.It DEPENDENCIES Ta none -.El -.Pp -This feature enables support for separate allocation classes. -.Pp -This feature becomes -.Sy active -when a dedicated allocation class vdev -(dedup or special) is created with -.Dq zpool create -or -.Dq zpool add . -With device removal, it can be returned to the -.Sy enabled -state if all the top-level vdevs from an allocation class are removed. -.El -.Sh SEE ALSO -.Xr zpool 8 -.Sh AUTHORS -This manual page is a -.Xr mdoc 7 -reimplementation of the -.Tn illumos -manual page -.Em zpool-features(5) , -modified and customized for -.Fx -and licensed under the Common Development and Distribution License -.Pq Tn CDDL . -.Pp -The -.Xr mdoc 7 -implementation of this manual page was initially written by -.An Martin Matuska Aq mm@FreeBSD.org . diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 deleted file mode 100644 index f5caffb95d79..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 +++ /dev/null @@ -1,2485 +0,0 @@ -'\" te -.\" Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. -.\" Copyright (c) 2013-2014, Xin Li <delphij@FreeBSD.org>. -.\" All Rights Reserved. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2011, Justin T. Gibbs <gibbs@FreeBSD.org> -.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org> -.\" Copyright (c) 2012, 2017 by Delphix. All Rights Reserved. -.\" Copyright 2017 Nexenta Systems, Inc. -.\" Copyright (c) 2017 Datto Inc. -.\" Copyright (c) 2017 George Melikov. All Rights Reserved. -.\" Copyright 2019 Joyent, Inc. -.\" -.\" $FreeBSD$ -.\" -.Dd February 25, 2020 -.Dt ZPOOL 8 -.Os -.Sh NAME -.Nm zpool -.Nd configures ZFS storage pools -.Sh SYNOPSIS -.Nm -.Op Fl \&? -.Nm -.Cm add -.Op Fl fgLnP -.Ar pool vdev ... -.Nm -.Cm attach -.Op Fl f -.Ar pool device new_device -.Nm -.Cm checkpoint -.Op Fl d, -discard -.Ar pool -.Nm -.Cm clear -.Op Fl F Op Fl n -.Ar pool -.Op Ar device -.Nm -.Cm create -.Op Fl fnd -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Op Fl O Ar file-system-property Ns = Ns Ar value -.Ar ... -.Op Fl m Ar mountpoint -.Op Fl R Ar root -.Op Fl t Ar tempname -.Ar pool vdev ... -.Nm -.Cm destroy -.Op Fl f -.Ar pool -.Nm -.Cm detach -.Ar pool device -.Nm -.Cm export -.Op Fl f -.Ar pool ... -.Nm -.Cm get -.Op Fl Hp -.Op Fl o Ar field Ns Op , Ns Ar ... -.Ar all | property Ns Op , Ns Ar ... -.Ar pool ... -.Nm -.Cm history -.Op Fl il -.Op Ar pool -.Ar ... -.Nm -.Cm import -.Op Fl d Ar dir | Fl c Ar cachefile -.Op Fl D -.Nm -.Cm import -.Op Fl o Ar mntopts -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Op Fl -rewind-to-checkpoint -.Op Fl d Ar dir | Fl c Ar cachefile -.Op Fl D -.Op Fl f -.Op Fl m -.Op Fl N -.Op Fl R Ar root -.Op Fl F Op Fl n -.Fl a -.Nm -.Cm import -.Op Fl o Ar mntopts -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Op Fl -rewind-to-checkpoint -.Op Fl d Ar dir | Fl c Ar cachefile -.Op Fl D -.Op Fl f -.Op Fl m -.Op Fl N -.Op Fl R Ar root -.Op Fl t -.Op Fl F Op Fl n -.Ar pool | id -.Op Ar newpool -.Nm -.Cm initialize -.Op Fl cs -.Ar pool -.Op Ar device Ns ... -.Nm -.Cm iostat -.Op Fl v -.Op Fl T Cm d Ns | Ns Cm u -.Op Fl gLP -.Op Ar pool -.Ar ... -.Op Ar interval Op Ar count -.Nm -.Cm labelclear -.Op Fl f -.Ar device -.Nm -.Cm list -.Op Fl HgLpPv -.Op Fl o Ar property Ns Op , Ns Ar ... -.Op Fl T Cm d Ns | Ns Cm u -.Op Ar pool -.Ar ... -.Op Ar interval Op Ar count -.Nm -.Cm offline -.Op Fl t -.Ar pool device ... -.Nm -.Cm online -.Op Fl e -.Ar pool device ... -.Nm -.Cm reguid -.Ar pool -.Nm -.Cm remove -.Op Fl np -.Ar pool device ... -.Nm -.Cm remove -.Fl s -.Ar pool -.Nm -.Cm reopen -.Ar pool -.Nm -.Cm replace -.Op Fl f -.Ar pool device -.Op Ar new_device -.Nm -.Cm scrub -.Op Fl s | Fl p -.Ar pool ... -.Nm -.Cm set -.Ar property Ns = Ns Ar value pool -.Nm -.Cm split -.Op Fl gLnP -.Op Fl R Ar altroot -.Op Fl o Ar mntopts -.Op Fl o Ar property Ns = Ns Ar value -.Ar pool newpool -.Op Ar device ... -.Nm -.Cm status -.Op Fl DgLPvx -.Op Fl T Cm d Ns | Ns Cm u -.Op Ar pool -.Ar ... -.Op Ar interval Op Ar count -.Nm -.Cm sync -.Oo Ar pool Oc Ns ... -.Nm -.Cm upgrade -.Op Fl v -.Nm -.Cm upgrade -.Op Fl V Ar version -.Fl a | Ar pool ... -.Sh DESCRIPTION -The -.Nm -command configures -.Tn ZFS -storage pools. A storage pool is a collection of devices that provides physical -storage and data replication for -.Tn ZFS -datasets. -.Pp -All datasets within a storage pool share the same space. See -.Xr zfs 8 -for information on managing datasets. -.Ss Virtual Devices (vdevs) -A -.Qq virtual device -.Pq No vdev -describes a single device or a collection of devices organized according to -certain performance and fault characteristics. The following virtual devices -are supported: -.Bl -tag -width "XXXXXX" -.It Sy disk -A block device, typically located under -.Pa /dev . -.Tn ZFS -can use individual slices or partitions, though the recommended mode of -operation is to use whole disks. A disk can be specified by a full path to the -device or the -.Xr geom 4 -provider name. When given a whole disk, -.Tn ZFS -automatically labels the disk, if necessary. -.It Sy file -A regular file. The use of files as a backing store is strongly discouraged. It -is designed primarily for experimental purposes, as the fault tolerance of a -file is only as good the file system of which it is a part. A file must be -specified by a full path. -.It Sy mirror -A mirror of two or more devices. Data is replicated in an identical fashion -across all components of a mirror. A mirror with -.Em N -disks of size -.Em X -can hold -.Em X -bytes and can withstand -.Pq Em N-1 -devices failing before data integrity is compromised. -.It Sy raidz -(or -.Sy raidz1 raidz2 raidz3 ) . -A variation on -.Sy RAID-5 -that allows for better distribution of parity and eliminates the -.Qq Sy RAID-5 -write hole (in which data and parity become inconsistent after a power loss). -Data and parity is striped across all disks within a -.No raidz -group. -.Pp -A -.No raidz -group can have single-, double- , or triple parity, meaning that the -.No raidz -group can sustain one, two, or three failures, respectively, without -losing any data. The -.Sy raidz1 No vdev -type specifies a single-parity -.No raidz -group; the -.Sy raidz2 No vdev -type specifies a double-parity -.No raidz -group; and the -.Sy raidz3 No vdev -type specifies a triple-parity -.No raidz -group. The -.Sy raidz No vdev -type is an alias for -.Sy raidz1 . -.Pp -A -.No raidz -group with -.Em N -disks of size -.Em X -with -.Em P -parity disks can hold approximately -.Sm off -.Pq Em N-P -*X -.Sm on -bytes and can withstand -.Em P -device(s) failing before data integrity is compromised. The minimum number of -devices in a -.No raidz -group is one more than the number of parity disks. The -recommended number is between 3 and 9 to help increase performance. -.It Sy spare -A special -.No pseudo- Ns No vdev -which keeps track of available hot spares for a pool. -For more information, see the -.Qq Sx Hot Spares -section. -.It Sy log -A separate-intent log device. If more than one log device is specified, then -writes are load-balanced between devices. Log devices can be mirrored. However, -.No raidz -.No vdev -types are not supported for the intent log. For more information, -see the -.Qq Sx Intent Log -section. -.It Sy dedup -A device dedicated solely for allocating dedup data. -The redundancy of this device should match the redundancy of the other normal -devices in the pool. -If more than one dedup device is specified, then allocations are load-balanced -between devices. -.It Sy special -A device dedicated solely for allocating various kinds of internal metadata, -and optionally small file data. -The redundancy of this device should match the redundancy of the other normal -devices in the pool. -If more than one special device is specified, then allocations are -load-balanced between devices. -.Pp -For more information on special allocations, see the -.Sx Special Allocation Class -section. -.It Sy cache -A device used to cache storage pool data. -A cache device cannot be configured as a mirror or raidz group. -For more information, see the -.Qq Sx Cache Devices -section. -.El -.Pp -Virtual devices cannot be nested, so a mirror or -.No raidz -virtual device can only -contain files or disks. Mirrors of mirrors (or other combinations) are not -allowed. -.Pp -A pool can have any number of virtual devices at the top of the configuration -(known as -.Qq root -.No vdev Ns s). -Data is dynamically distributed across all top-level devices to balance data -among devices. As new virtual devices are added, -.Tn ZFS -automatically places data on the newly available devices. -.Pp -Virtual devices are specified one at a time on the command line, separated by -whitespace. The keywords -.Qq mirror -and -.Qq raidz -are used to distinguish where a group ends and another begins. For example, the -following creates two root -.No vdev Ns s, -each a mirror of two disks: -.Bd -literal -offset 2n -.Li # Ic zpool create mypool mirror da0 da1 mirror da2 da3 -.Ed -.Ss Device Failure and Recovery -.Tn ZFS -supports a rich set of mechanisms for handling device failure and data -corruption. All metadata and data is checksummed, and -.Tn ZFS -automatically repairs bad data from a good copy when corruption is detected. -.Pp -In order to take advantage of these features, a pool must make use of some form -of redundancy, using either mirrored or -.No raidz -groups. While -.Tn ZFS -supports running in a non-redundant configuration, where each root -.No vdev -is simply a disk or file, this is strongly discouraged. A single case of bit -corruption can render some or all of your data unavailable. -.Pp -A pool's health status is described by one of three states: online, degraded, -or faulted. An online pool has all devices operating normally. A degraded pool -is one in which one or more devices have failed, but the data is still -available due to a redundant configuration. A faulted pool has corrupted -metadata, or one or more faulted devices, and insufficient replicas to continue -functioning. -.Pp -The health of the top-level -.No vdev , -such as mirror or -.No raidz -device, is -potentially impacted by the state of its associated -.No vdev Ns s, -or component devices. A top-level -.No vdev -or component device is in one of the following states: -.Bl -tag -width "DEGRADED" -.It Sy DEGRADED -One or more top-level -.No vdev Ns s -is in the degraded state because one or more -component devices are offline. Sufficient replicas exist to continue -functioning. -.Pp -One or more component devices is in the degraded or faulted state, but -sufficient replicas exist to continue functioning. The underlying conditions -are as follows: -.Bl -bullet -offset 2n -.It -The number of checksum errors exceeds acceptable levels and the device is -degraded as an indication that something may be wrong. -.Tn ZFS -continues to use the device as necessary. -.It -The number of -.Tn I/O -errors exceeds acceptable levels. The device could not be -marked as faulted because there are insufficient replicas to continue -functioning. -.El -.It Sy FAULTED -One or more top-level -.No vdev Ns s -is in the faulted state because one or more -component devices are offline. Insufficient replicas exist to continue -functioning. -.Pp -One or more component devices is in the faulted state, and insufficient -replicas exist to continue functioning. The underlying conditions are as -follows: -.Bl -bullet -offset 2n -.It -The device could be opened, but the contents did not match expected values. -.It -The number of -.Tn I/O -errors exceeds acceptable levels and the device is faulted to -prevent further use of the device. -.El -.It Sy OFFLINE -The device was explicitly taken offline by the -.Qq Nm Cm offline -command. -.It Sy ONLINE -The device is online and functioning. -.It Sy REMOVED -The device was physically removed while the system was running. Device removal -detection is hardware-dependent and may not be supported on all platforms. -.It Sy UNAVAIL -The device could not be opened. If a pool is imported when a device was -unavailable, then the device will be identified by a unique identifier instead -of its path since the path was never correct in the first place. -.El -.Pp -If a device is removed and later reattached to the system, -.Tn ZFS -attempts to put the device online automatically. Device attach detection is -hardware-dependent and might not be supported on all platforms. -.Ss Hot Spares -.Tn ZFS -allows devices to be associated with pools as -.Qq hot spares . -These devices are not actively used in the pool, but when an active device -fails, it is automatically replaced by a hot spare. To create a pool with hot -spares, specify a -.Qq spare -.No vdev -with any number of devices. For example, -.Bd -literal -offset 2n -.Li # Ic zpool create pool mirror da0 da1 spare da2 da3 -.Ed -.Pp -Spares can be shared across multiple pools, and can be added with the -.Qq Nm Cm add -command and removed with the -.Qq Nm Cm remove -command. Once a spare replacement is initiated, a new "spare" -.No vdev -is created -within the configuration that will remain there until the original device is -replaced. At this point, the hot spare becomes available again if another -device fails. -.Pp -If a pool has a shared spare that is currently being used, the pool can not be -exported since other pools may use this shared spare, which may lead to -potential data corruption. -.Pp -Shared spares add some risk. -If the pools are imported on different hosts, and both pools suffer a device -failure at the same time, both could attempt to use the spare at the same time. -This may not be detected, resulting in data corruption. -.Pp -An in-progress spare replacement can be cancelled by detaching the hot spare. -If the original faulted device is detached, then the hot spare assumes its -place in the configuration, and is removed from the spare list of all active -pools. -.Pp -Spares cannot replace log devices. -.Pp -This feature requires a userland helper. -FreeBSD provides -.Xr zfsd 8 -for this purpose. -It must be manually enabled by adding -.Va zfsd_enable="YES" -to -.Pa /etc/rc.conf . -.Ss Intent Log -The -.Tn ZFS -Intent Log -.Pq Tn ZIL -satisfies -.Tn POSIX -requirements for synchronous transactions. For instance, databases often -require their transactions to be on stable storage devices when returning from -a system call. -.Tn NFS -and other applications can also use -.Xr fsync 2 -to ensure data stability. By default, the intent log is allocated from blocks -within the main pool. However, it might be possible to get better performance -using separate intent log devices such as -.Tn NVRAM -or a dedicated disk. For example: -.Bd -literal -offset 2n -.Li # Ic zpool create pool da0 da1 log da2 -.Ed -.Pp -Multiple log devices can also be specified, and they can be mirrored. See the -.Sx EXAMPLES -section for an example of mirroring multiple log devices. -.Pp -Log devices can be added, replaced, attached, detached, imported and exported -as part of the larger pool. -Mirrored devices can be removed by specifying the top-level mirror vdev. -.Ss Cache devices -Devices can be added to a storage pool as "cache devices." These devices -provide an additional layer of caching between main memory and disk. For -read-heavy workloads, where the working set size is much larger than what can -be cached in main memory, using cache devices allow much more of this working -set to be served from low latency media. Using cache devices provides the -greatest performance improvement for random read-workloads of mostly static -content. -.Pp -To create a pool with cache devices, specify a "cache" -.No vdev -with any number of devices. For example: -.Bd -literal -offset 2n -.Li # Ic zpool create pool da0 da1 cache da2 da3 -.Ed -.Pp -Cache devices cannot be mirrored or part of a -.No raidz -configuration. If a read -error is encountered on a cache device, that read -.Tn I/O -is reissued to the original storage pool device, which might be part of a -mirrored or -.No raidz -configuration. -.Pp -The content of the cache devices is considered volatile, as is the case with -other system caches. -.Ss Pool checkpoint -Before starting critical procedures that include destructive actions (e.g -.Nm zfs Cm destroy -), an administrator can checkpoint the pool's state and in the case of a -mistake or failure, rewind the entire pool back to the checkpoint. -Otherwise, the checkpoint can be discarded when the procedure has completed -successfully. -.Pp -A pool checkpoint can be thought of as a pool-wide snapshot and should be used -with care as it contains every part of the pool's state, from properties to vdev -configuration. -Thus, while a pool has a checkpoint certain operations are not allowed. -Specifically, vdev removal/attach/detach, mirror splitting, and -changing the pool's guid. -Adding a new vdev is supported but in the case of a rewind it will have to be -added again. -Finally, users of this feature should keep in mind that scrubs in a pool that -has a checkpoint do not repair checkpointed data. -.Pp -To create a checkpoint for a pool: -.Bd -literal -# zpool checkpoint pool -.Ed -.Pp -To later rewind to its checkpointed state, you need to first export it and -then rewind it during import: -.Bd -literal -# zpool export pool -# zpool import --rewind-to-checkpoint pool -.Ed -.Pp -To discard the checkpoint from a pool: -.Bd -literal -# zpool checkpoint -d pool -.Ed -.Pp -Dataset reservations (controlled by the -.Nm reservation -or -.Nm refreservation -zfs properties) may be unenforceable while a checkpoint exists, because the -checkpoint is allowed to consume the dataset's reservation. -Finally, data that is part of the checkpoint but has been freed in the -current state of the pool won't be scanned during a scrub. -.Ss Special Allocation Class -The allocations in the special class are dedicated to specific block types. -By default this includes all metadata, the indirect blocks of user data, and -any dedup data. -The class can also be provisioned to accept a limited percentage of small file -data blocks. -.Pp -A pool must always have at least one general (non-specified) vdev before -other devices can be assigned to the special class. -If the special class becomes full, then allocations intended for it will spill -back into the normal class. -.Pp -Dedup data can be excluded from the special class by setting the -.Sy vfs.zfs.ddt_data_is_special -sysctl to false (0). -.Pp -Inclusion of small file blocks in the special class is opt-in. -Each dataset can control the size of small file blocks allowed in the special -class by setting the -.Sy special_small_blocks -dataset property. -It defaults to zero so you must opt-in by setting it to a non-zero value. -See -.Xr zfs 1M -for more info on setting this property. -.Ss Properties -Each pool has several properties associated with it. Some properties are -read-only statistics while others are configurable and change the behavior of -the pool. The following are read-only properties: -.Bl -tag -width "dedupratio" -.It Sy allocated -Amount of storage space used within the pool. -.It Sy capacity -Percentage of pool space used. This property can also be referred to by its -shortened column name, "cap". -.It Sy dedupratio -The deduplication ratio specified for a pool, expressed as a multiplier. -For example, a -.Sy dedupratio -value of 1.76 indicates that 1.76 units of data were stored but only 1 unit of disk space was actually consumed. See -.Xr zfs 8 -for a description of the deduplication feature. -.It Sy expandsize -Amount of uninitialized space within the pool or device that can be used to -increase the total capacity of the pool. -Uninitialized space consists of -any space on an EFI labeled vdev which has not been brought online -.Pq i.e. zpool online -e . -This space occurs when a LUN is dynamically expanded. -.It Sy fragmentation -The amount of fragmentation in the pool. -.It Sy free -Number of blocks within the pool that are not allocated. -.It Sy freeing -After a file system or snapshot is destroyed, the space it was using is -returned to the pool asynchronously. -.Sy freeing -is the amount of space remaining to be reclaimed. -Over time -.Sy freeing -will decrease while -.Sy free -increases. -.It Sy guid -A unique identifier for the pool. -.It Sy health -The current health of the pool. Health can be -.Qq Sy ONLINE , -.Qq Sy DEGRADED , -.Qq Sy FAULTED , -.Qq Sy OFFLINE , -.Qq Sy REMOVED , -or -.Qq Sy UNAVAIL . -.It Sy size -Total size of the storage pool. -.It Sy unsupported@ Ns Ar feature_guid -Information about unsupported features that are enabled on the pool. -See -.Xr zpool-features 7 -for details. -.El -.Pp -The space usage properties report actual physical space available to the -storage pool. The physical space can be different from the total amount of -space that any contained datasets can actually use. The amount of space used in -a -.No raidz -configuration depends on the characteristics of the data being written. -In addition, -.Tn ZFS -reserves some space for internal accounting that the -.Xr zfs 8 -command takes into account, but the -.Xr zpool 8 -command does not. For non-full pools of a reasonable size, these effects should -be invisible. For small pools, or pools that are close to being completely -full, these discrepancies may become more noticeable. -.Pp -The following property can be set at creation time and import time: -.Bl -tag -width 2n -.It Sy altroot -Alternate root directory. If set, this directory is prepended to any mount -points within the pool. This can be used when examining an unknown pool where -the mount points cannot be trusted, or in an alternate boot environment, where -the typical paths are not valid. -.Sy altroot -is not a persistent property. It is valid only while the system is up. -Setting -.Sy altroot -defaults to using -.Cm cachefile=none , -though this may be overridden using an explicit setting. -.El -.Pp -The following property can only be set at import time: -.Bl -tag -width 2n -.It Sy readonly Ns = Ns Cm on No | Cm off -If set to -.Cm on , -pool will be imported in read-only mode with the following restrictions: -.Bl -bullet -offset 2n -.It -Synchronous data in the intent log will not be accessible -.It -Properties of the pool can not be changed -.It -Datasets of this pool can only be mounted read-only -.It -To write to a read-only pool, a export and import of the pool is required. -.El -.Pp -This property can also be referred to by its shortened column name, -.Sy rdonly . -.El -.Pp -The following properties can be set at creation time and import time, and later -changed with the -.Ic zpool set -command: -.Bl -tag -width 2n -.It Sy autoexpand Ns = Ns Cm on No | Cm off -Controls automatic pool expansion when the underlying LUN is grown. If set to -.Qq Cm on , -the pool will be resized according to the size of the expanded -device. If the device is part of a mirror or -.No raidz -then all devices within that -.No mirror/ Ns No raidz -group must be expanded before the new space is made available to -the pool. The default behavior is -.Qq off . -This property can also be referred to by its shortened column name, -.Sy expand . -.It Sy autoreplace Ns = Ns Cm on No | Cm off -Controls automatic device replacement. If set to -.Qq Cm off , -device replacement must be initiated by the administrator by using the -.Qq Nm Cm replace -command. If set to -.Qq Cm on , -any new device, found in the same -physical location as a device that previously belonged to the pool, is -automatically formatted and replaced. The default behavior is -.Qq Cm off . -This property can also be referred to by its shortened column name, "replace". -.It Sy bootfs Ns = Ns Ar pool Ns / Ns Ar dataset -Identifies the default bootable dataset for the root pool. This property is -expected to be set mainly by the installation and upgrade programs. -.It Sy cachefile Ns = Ns Ar path No | Cm none -Controls the location of where the pool configuration is cached. Discovering -all pools on system startup requires a cached copy of the configuration data -that is stored on the root file system. All pools in this cache are -automatically imported when the system boots. Some environments, such as -install and clustering, need to cache this information in a different location -so that pools are not automatically imported. Setting this property caches the -pool configuration in a different location that can later be imported with -.Qq Nm Cm import Fl c . -Setting it to the special value -.Qq Cm none -creates a temporary pool that is never cached, and the special value -.Cm '' -(empty string) uses the default location. -.It Sy comment Ns = Ns Ar text -A text string consisting of printable ASCII characters that will be stored -such that it is available even if the pool becomes faulted. -An administrator can provide additional information about a pool using this -property. -.It Sy dedupditto Ns = Ns Ar number -Threshold for the number of block ditto copies. If the reference count for a -deduplicated block increases above this number, a new ditto copy of this block -is automatically stored. Default setting is -.Cm 0 -which causes no ditto copies to be created for deduplicated blocks. -The miniumum legal nonzero setting is 100. -.It Sy delegation Ns = Ns Cm on No | Cm off -Controls whether a non-privileged user is granted access based on the dataset -permissions defined on the dataset. See -.Xr zfs 8 -for more information on -.Tn ZFS -delegated administration. -.It Sy failmode Ns = Ns Cm wait No | Cm continue No | Cm panic -Controls the system behavior in the event of catastrophic pool failure. This -condition is typically a result of a loss of connectivity to the underlying -storage device(s) or a failure of all devices within the pool. The behavior of -such an event is determined as follows: -.Bl -tag -width indent -.It Sy wait -Blocks all -.Tn I/O -access until the device connectivity is recovered and the errors are cleared. -This is the default behavior. -.It Sy continue -Returns -.Em EIO -to any new write -.Tn I/O -requests but allows reads to any of the remaining healthy devices. Any write -requests that have yet to be committed to disk would be blocked. -.It Sy panic -Prints out a message to the console and generates a system crash dump. -.El -.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled -The value of this property is the current state of -.Ar feature_name . -The only valid value when setting this property is -.Sy enabled -which moves -.Ar feature_name -to the enabled state. -See -.Xr zpool-features 7 -for details on feature states. -.It Sy listsnapshots Ns = Ns Cm on No | Cm off -Controls whether information about snapshots associated with this pool is -output when -.Qq Nm zfs Cm list -is run without the -.Fl t -option. The default value is -.Cm off . -This property can also be referred to by its shortened name, -.Sy listsnaps . -.It Sy multihost Ns = Ns Sy on No | Sy off -Controls whether a pool activity check should be performed during -.Nm zpool Cm import . -When a pool is determined to be active it cannot be imported, even with the -.Fl f -option. -This property is intended to be used in failover configurations -where multiple hosts have access to a pool on shared storage. -.Pp -Multihost provides protection on import only. -It does not protect against an -individual device being used in multiple pools, regardless of the type of vdev. -See the discussion under -.Sy zpool create. -.Pp -When this property is on, periodic writes to storage occur to show the pool is -in use. -See -.Sy vfs.zfs.multihost_interval -sysctl. -In order to enable this property each host must set a unique hostid. -The default value is -.Sy off . -.It Sy version Ns = Ns Ar version -The current on-disk version of the pool. This can be increased, but never -decreased. The preferred method of updating pools is with the -.Qq Nm Cm upgrade -command, though this property can be used when a specific version is needed -for backwards compatibility. -Once feature flags is enabled on a pool this property will no longer have a -value. -.El -.Sh SUBCOMMANDS -All subcommands that modify state are logged persistently to the pool in their -original form. -.Pp -The -.Nm -command provides subcommands to create and destroy storage pools, add capacity -to storage pools, and provide information about the storage pools. The following -subcommands are supported: -.Bl -tag -width 2n -.It Xo -.Nm -.Op Fl \&? -.Xc -.Pp -Displays a help message. -.It Xo -.Nm -.Cm add -.Op Fl fgLnP -.Ar pool vdev ... -.Xc -.Pp -Adds the specified virtual devices to the given pool. The -.No vdev -specification is described in the -.Qq Sx Virtual Devices -section. The behavior of the -.Fl f -option, and the device checks performed are described in the -.Qq Nm Cm create -subcommand. -.Bl -tag -width indent -.It Fl f -Forces use of -.Ar vdev , -even if they appear in use or specify a conflicting replication level. -Not all devices can be overridden in this manner. -.It Fl g -Display -.Ar vdev , -GUIDs instead of the normal device names. -These GUIDs can be used in place of -device names for the zpool detach/offline/remove/replace commands. -.It Fl L -Display real paths for -.Ar vdev Ns s -resolving all symbolic links. -This can be used to look up the current block -device name regardless of the /dev/disk/ path used to open it. -.It Fl n -Displays the configuration that would be used without actually adding the -.Ar vdev Ns s. -The actual pool creation can still fail due to insufficient privileges or -device sharing. -.It Fl P -Display real paths for -.Ar vdev Ns s -instead of only the last component of the path. -This can be used in conjunction with the -.Fl L -flag. -.El -.It Xo -.Nm -.Cm attach -.Op Fl f -.Ar pool device new_device -.Xc -.Pp -Attaches -.Ar new_device -to an existing -.Sy zpool -device. The existing device cannot be part of a -.No raidz -configuration. If -.Ar device -is not currently part of a mirrored configuration, -.Ar device -automatically transforms into a two-way mirror of -.Ar device No and Ar new_device . -If -.Ar device -is part of a two-way mirror, attaching -.Ar new_device -creates a three-way mirror, and so on. In either case, -.Ar new_device -begins to resilver immediately. -.Bl -tag -width indent -.It Fl f -Forces use of -.Ar new_device , -even if its appears to be in use. Not all devices can be overridden in this -manner. -.El -.It Xo -.Nm -.Cm checkpoint -.Op Fl d, -discard -.Ar pool -.Xc -Checkpoints the current state of -.Ar pool -, which can be later restored by -.Nm zpool Cm import --rewind-to-checkpoint . -The existence of a checkpoint in a pool prohibits the following -.Nm zpool -commands: -.Cm remove , -.Cm attach , -.Cm detach , -.Cm split , -and -.Cm reguid . -In addition, it may break reservation boundaries if the pool lacks free -space. -The -.Nm zpool Cm status -command indicates the existence of a checkpoint or the progress of discarding a -checkpoint from a pool. -The -.Nm zpool Cm list -command reports how much space the checkpoint takes from the pool. -.Bl -tag -width Ds -.It Fl d, -discard -Discards an existing checkpoint from -.Ar pool . -.El -.It Xo -.Nm -.Cm clear -.Op Fl F Op Fl n -.Ar pool -.Op Ar device -.Xc -.Pp -Clears device errors in a pool. -If no arguments are specified, all device errors within the pool are cleared. -If one or more devices is specified, only those errors associated with the -specified device or devices are cleared. -If multihost is enabled, and the pool has been suspended, this will not -resume I/O. -While the pool was suspended, it may have been imported on -another host, and resuming I/O could result in pool damage. -.Bl -tag -width indent -.It Fl F -Initiates recovery mode for an unopenable pool. Attempts to discard the last -few transactions in the pool to return it to an openable state. Not all damaged -pools can be recovered by using this option. If successful, the data from the -discarded transactions is irretrievably lost. -.It Fl n -Used in combination with the -.Fl F -flag. Check whether discarding transactions would make the pool openable, but -do not actually discard any transactions. -.El -.It Xo -.Nm -.Cm create -.Op Fl fnd -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Op Fl O Ar file-system-property Ns = Ns Ar value -.Ar ... -.Op Fl m Ar mountpoint -.Op Fl R Ar root -.Op Fl t Ar tempname -.Ar pool vdev ... -.Xc -.Pp -Creates a new storage pool containing the virtual devices specified on the -command line. The pool name must begin with a letter, and can only contain -alphanumeric characters as well as underscore ("_"), dash ("-"), and period -("."). The pool names "mirror", "raidz", "spare" and "log" are reserved, as are -names beginning with the pattern "c[0-9]". The -.No vdev -specification is described in the -.Qq Sx Virtual Devices -section. -.Pp -The command attempts to verify that each device specified is accessible and not -currently in use by another subsystem. -However this check is not robust enough -to detect simultaneous attempts to use a new device in different pools, even if -.Sy multihost -is -.Sy enabled. -The -administrator must ensure that simultaneous invocations of any combination of -.Sy zpool replace , -.Sy zpool create , -.Sy zpool add , -or -.Sy zpool labelclear , -do not refer to the same device. -Using the same device in two pools will -result in pool corruption. -.Pp -There are some uses, such as being currently mounted, or specified as the -dedicated dump device, that prevents a device from ever being used by ZFS. -Other uses, such as having a preexisting UFS file system, can be overridden -with the -.Fl f -option. -.Pp -The command also checks that the replication strategy for the pool is -consistent. An attempt to combine redundant and non-redundant storage in a -single pool, or to mix disks and files, results in an error unless -.Fl f -is specified. The use of differently sized devices within a single -.No raidz -or mirror group is also flagged as an error unless -.Fl f -is specified. -.Pp -Unless the -.Fl R -option is specified, the default mount point is -.Qq Pa /pool . -The mount point must not exist or must be empty, or else the -root dataset cannot be mounted. This can be overridden with the -.Fl m -option. -.Pp -By default all supported features are enabled on the new pool unless the -.Fl d -option is specified. -.Bl -tag -width indent -.It Fl f -Forces use of -.Ar vdev Ns s, -even if they appear in use or specify a conflicting replication level. -Not all devices can be overridden in this manner. -.It Fl n -Displays the configuration that would be used without actually creating the -pool. The actual pool creation can still fail due to insufficient privileges or -device sharing. -.It Fl d -Do not enable any features on the new pool. -Individual features can be enabled by setting their corresponding properties -to -.Sy enabled -with the -.Fl o -option. -See -.Xr zpool-features 7 -for details about feature properties. -.It Xo -.Fl o Ar property Ns = Ns Ar value -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Xc -Sets the given pool properties. See the -.Qq Sx Properties -section for a list of valid properties that can be set. -.It Xo -.Fl O -.Ar file-system-property Ns = Ns Ar value -.Op Fl O Ar file-system-property Ns = Ns Ar value -.Ar ... -.Xc -Sets the given file system properties in the root file system of the pool. See -.Xr zfs 8 Properties -for a list of valid properties that -can be set. -.It Fl R Ar root -Equivalent to -.Qq Fl o Cm cachefile=none,altroot= Ns Pa root -.It Fl m Ar mountpoint -Sets the mount point for the root dataset. The default mount point is -.Qq Pa /pool -or -.Qq Cm altroot Ns Pa /pool -if -.Sy altroot -is specified. The mount point must be an absolute path, -.Qq Cm legacy , -or -.Qq Cm none . -For more information on dataset mount points, see -.Xr zfs 8 . -.It Fl t Ar tempname -Sets the in-core pool name to -.Pa tempname -while the on-disk name will be the name specified as the pool name -.Pa pool . -This will set the default -.Sy cachefile -property to -.Sy none . -This is intended to handle name space collisions when creating pools -for other systems, such as virtual machines or physical machines -whose pools live on network block devices. -.El -.It Xo -.Nm -.Cm destroy -.Op Fl f -.Ar pool -.Xc -.Pp -Destroys the given pool, freeing up any devices for other use. This command -tries to unmount any active datasets before destroying the pool. -.Bl -tag -width indent -.It Fl f -Forces any active datasets contained within the pool to be unmounted. -.El -.It Xo -.Nm -.Cm detach -.Ar pool device -.Xc -.Pp -Detaches -.Ar device -from a mirror. The operation is refused if there are no other valid replicas -of the data. -.It Xo -.Nm -.Cm export -.Op Fl f -.Ar pool ... -.Xc -.Pp -Exports the given pools from the system. All devices are marked as exported, -but are still considered in use by other subsystems. The devices can be moved -between systems (even those of different endianness) and imported as long as a -sufficient number of devices are present. -.Pp -Before exporting the pool, all datasets within the pool are unmounted. A pool -can not be exported if it has a shared spare that is currently being used. -.Pp -For pools to be portable, you must give the -.Nm -command whole disks, not just slices, so that -.Tn ZFS -can label the disks with portable -.Sy EFI -labels. Otherwise, disk drivers on platforms of different endianness will not -recognize the disks. -.Bl -tag -width indent -.It Fl f -Forcefully unmount all datasets, using the -.Qq Nm unmount Fl f -command. -.Pp -This command will forcefully export the pool even if it has a shared spare that -is currently being used. This may lead to potential data corruption. -.El -.It Xo -.Nm -.Cm get -.Op Fl Hp -.Op Fl o Ar field Ns Op , Ns Ar ... -.Ar all | property Ns Op , Ns Ar ... -.Ar pool ... -.Xc -.Pp -Retrieves the given list of properties (or all properties if -.Qq Cm all -is used) for the specified storage pool(s). These properties are displayed with -the following fields: -.Bl -column -offset indent "property" -.It name Ta Name of storage pool -.It property Ta Property name -.It value Ta Property value -.It source Ta Property source, either 'default' or 'local'. -.El -.Pp -See the -.Qq Sx Properties -section for more information on the available pool properties. -.It Fl H -Scripted mode. Do not display headers, and separate fields by a single tab -instead of arbitrary space. -.It Fl p -Display numbers in parsable (exact) values. -.It Fl o Ar field -A comma-separated list of columns to display. -.Sy name Ns , Ns -.Sy property Ns , Ns -.Sy value Ns , Ns -.Sy source -is the default value. -.It Xo -.Nm -.Cm history -.Op Fl il -.Op Ar pool -.Ar ... -.Xc -.Pp -Displays the command history of the specified pools or all pools if no pool is -specified. -.Bl -tag -width indent -.It Fl i -Displays internally logged -.Tn ZFS -events in addition to user initiated events. -.It Fl l -Displays log records in long format, which in addition to standard format -includes, the user name, the hostname, and the zone in which the operation was -performed. -.El -.It Xo -.Nm -.Cm import -.Op Fl d Ar dir | Fl c Ar cachefile -.Op Fl D -.Xc -.Pp -Lists pools available to import. If the -.Fl d -option is not specified, this command searches for devices in -.Qq Pa /dev . -The -.Fl d -option can be specified multiple times, and all directories are searched. If -the device appears to be part of an exported pool, this command displays a -summary of the pool with the name of the pool, a numeric identifier, as well as -the -.No vdev -layout and current health of the device for each device or file. -Destroyed pools, pools that were previously destroyed with the -.Qq Nm Cm destroy -command, are not listed unless the -.Fl D -option is specified. -.Pp -The numeric identifier is unique, and can be used instead of the pool name when -multiple exported pools of the same name are available. -.Bl -tag -width indent -.It Fl c Ar cachefile -Reads configuration from the given -.Ar cachefile -that was created with the -.Qq Sy cachefile -pool property. This -.Ar cachefile -is used instead of searching for devices. -.It Fl d Ar dir -Searches for devices or files in -.Ar dir . -The -.Fl d -option can be specified multiple times. -.It Fl D -Lists destroyed pools only. -.El -.It Xo -.Nm -.Cm import -.Op Fl o Ar mntopts -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Op Fl d Ar dir | Fl c Ar cachefile -.Op Fl D -.Op Fl f -.Op Fl m -.Op Fl N -.Op Fl R Ar root -.Op Fl F Op Fl n -.Fl a -.Xc -.Pp -Imports all pools found in the search directories. Identical to the previous -command, except that all pools with a sufficient number of devices available -are imported. Destroyed pools, pools that were previously destroyed with the -.Qq Nm Cm destroy -command, will not be imported unless the -.Fl D -option is specified. -.Bl -tag -width indent -.It Fl o Ar mntopts -Comma-separated list of mount options to use when mounting datasets within the -pool. See -.Xr zfs 8 -for a description of dataset properties and mount options. -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property on the imported pool. See the -.Qq Sx Properties -section for more information on the available pool properties. -.It Fl c Ar cachefile -Reads configuration from the given -.Ar cachefile -that was created with the -.Qq Sy cachefile -pool property. This -.Ar cachefile -is used instead of searching for devices. -.It Fl d Ar dir -Searches for devices or files in -.Ar dir . -The -.Fl d -option can be specified multiple times. This option is incompatible with the -.Fl c -option. -.It Fl D -Imports destroyed pools only. The -.Fl f -option is also required. -.It Fl f -Forces import, even if the pool appears to be potentially active. -.It Fl m -Allows a pool to import when there is a missing log device. Recent transactions -can be lost because the log device will be discarded. -.It Fl N -Import the pool without mounting any file systems. -.It Fl R Ar root -Sets the -.Qq Sy cachefile -property to -.Qq Cm none -and the -.Qq Sy altroot -property to -.Qq Ar root -.It Fl F -Recovery mode for a non-importable pool. Attempt to return the pool to an -importable state by discarding the last few transactions. Not all damaged pools -can be recovered by using this option. If successful, the data from the -discarded transactions is irretrievably lost. This option is ignored if the -pool is importable or already imported. -.It Fl n -Used with the -.Fl F -recovery option. Determines whether a non-importable pool can be made -importable again, but does not actually perform the pool recovery. For more -details about pool recovery mode, see the -.Fl F -option, above. -.It Fl a -Searches for and imports all pools found. -.El -.It Xo -.Nm -.Cm import -.Op Fl o Ar mntopts -.Op Fl o Ar property Ns = Ns Ar value -.Ar ... -.Op Fl d Ar dir | Fl c Ar cachefile -.Op Fl D -.Op Fl f -.Op Fl m -.Op Fl N -.Op Fl R Ar root -.Op Fl t -.Op Fl F Op Fl n -.Ar pool | id -.Op Ar newpool -.Xc -.Pp -Imports a specific pool. A pool can be identified by its name or the numeric -identifier. If -.Ar newpool -is specified, the pool is imported using the name -.Ar newpool . -Otherwise, it is imported with the same name as its exported name. -.Pp -If a device is removed from a system without running -.Qq Nm Cm export -first, the device appears as potentially active. It cannot be determined if -this was a failed export, or whether the device is really in use from another -host. To import a pool in this state, the -.Fl f -option is required. -.Bl -tag -width indent -.It Fl o Ar mntopts -Comma-separated list of mount options to use when mounting datasets within the -pool. See -.Xr zfs 8 -for a description of dataset properties and mount options. -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property on the imported pool. See the -.Qq Sx Properties -section for more information on the available pool properties. -.It Fl c Ar cachefile -Reads configuration from the given -.Ar cachefile -that was created with the -.Qq Sy cachefile -pool property. This -.Ar cachefile -is used instead of searching for devices. -.It Fl d Ar dir -Searches for devices or files in -.Ar dir . -The -.Fl d -option can be specified multiple times. This option is incompatible with the -.Fl c -option. -.It Fl D -Imports destroyed pools only. The -.Fl f -option is also required. -.It Fl f -Forces import, even if the pool appears to be potentially active. -.It Fl m -Allows a pool to import when there is a missing log device. Recent transactions -can be lost because the log device will be discarded. -.It Fl N -Import the pool without mounting any file systems. -.It Fl R Ar root -Equivalent to -.Qq Fl o Cm cachefile=none,altroot= Ns Pa root -.It Fl t -Used with -.Ar newpool . -Specifies that -.Ar newpool -is temporary. -Temporary pool names last until export. -Ensures that the original pool name will be used in all label updates and -therefore is retained upon export. -Will also set -.Sy cachefile -property to -.Sy none -when not explicitly specified. -.It Fl F -Recovery mode for a non-importable pool. Attempt to return the pool to an -importable state by discarding the last few transactions. Not all damaged pools -can be recovered by using this option. If successful, the data from the -discarded transactions is irretrievably lost. This option is ignored if the -pool is importable or already imported. -.It Fl n -Used with the -.Fl F -recovery option. Determines whether a non-importable pool can be made -importable again, but does not actually perform the pool recovery. For more -details about pool recovery mode, see the -.Fl F -option, above. -.It Fl -rewind-to-checkpoint -Rewinds pool to the checkpointed state. -Once the pool is imported with this flag there is no way to undo the rewind. -All changes and data that were written after the checkpoint are lost! -The only exception is when the -.Sy readonly -mounting option is enabled. -In this case, the checkpointed state of the pool is opened and an -administrator can see how the pool would look like if they were -to fully rewind. -.El -.It Xo -.Nm -.Cm initialize -.Op Fl cs -.Ar pool -.Op Ar device Ns ... -.Xc -Begins initializing by writing to all unallocated regions on the specified -devices, or all eligible devices in the pool if no individual devices are -specified. -Only leaf data or log devices may be initialized. -.Bl -tag -width Ds -.It Fl c, -cancel -Cancel initializing on the specified devices, or all eligible devices if none -are specified. -If one or more target devices are invalid or are not currently being -initialized, the command will fail and no cancellation will occur on any device. -.It Fl s -suspend -Suspend initializing on the specified devices, or all eligible devices if none -are specified. -If one or more target devices are invalid or are not currently being -initialized, the command will fail and no suspension will occur on any device. -Initializing can then be resumed by running -.Nm zpool Cm initialize -with no flags on the relevant target devices. -.El -.It Xo -.Nm -.Cm iostat -.Op Fl T Cm d Ns | Ns Cm u -.Op Fl gLPv -.Op Ar pool -.Ar ... -.Op Ar interval Op Ar count -.Xc -.Pp -Displays -.Tn I/O -statistics for the given pools. When given an interval, the statistics are -printed every -.Ar interval -seconds until -.Sy Ctrl-C -is pressed. If no -.Ar pools -are specified, statistics for every pool in the system is shown. If -.Ar count -is specified, the command exits after -.Ar count -reports are printed. -.Bl -tag -width indent -.It Fl T Cm d Ns | Ns Cm u -Print a timestamp. -.Pp -Use modifier -.Cm d -for standard date format. See -.Xr date 1 . -Use modifier -.Cm u -for unixtime -.Pq equals Qq Ic date +%s . -.It Fl g -Display vdev GUIDs instead of the normal device names. -These GUIDs can be used in place of device names for the zpool -detach/offline/remove/replace commands. -.It Fl L -Display real paths for vdevs resolving all symbolic links. -This can be used to look up the current block device name regardless of the -.Pa /dev/disk/ -path used to open it. -.It Fl P -Display full paths for vdevs instead of only the last component of -the path. -This can be used in conjunction with the -.Fl L -flag. -.It Fl v -Verbose statistics. -Reports usage statistics for individual vdevs within the -pool, in addition to the pool-wide statistics. -.El -.It Xo -.Nm -.Cm labelclear -.Op Fl f -.Ar device -.Xc -.Pp -Removes -.Tn ZFS -label information from the specified -.Ar device . -The -.Ar device -must not be part of an active pool configuration. -.Bl -tag -width indent -.It Fl f -Treat exported or foreign devices as inactive. -.El -.It Xo -.Nm -.Cm list -.Op Fl HgLpPv -.Op Fl o Ar property Ns Op , Ns Ar ... -.Op Fl T Cm d Ns | Ns Cm u -.Op Ar pool -.Ar ... -.Op Ar interval Op Ar count -.Xc -.Pp -Lists the given pools along with a health status and space usage. If no -.Ar pools -are specified, all pools in the system are listed. -.Pp -When given an interval, the output is printed every -.Ar interval -seconds until -.Sy Ctrl-C -is pressed. If -.Ar count -is specified, the command exits after -.Ar count -reports are printed. -.Bl -tag -width indent -.It Fl T Cm d Ns | Ns Cm u -Print a timestamp. -.Pp -Use modifier -.Cm d -for standard date format. See -.Xr date 1 . -Use modifier -.Cm u -for unixtime -.Pq equals Qq Ic date +%s . -.It Fl g -Display vdev GUIDs instead of the normal device names. -These GUIDs can be used in place of device names for the zpool -detach/offline/remove/replace commands. -.It Fl H -Scripted mode. Do not display headers, and separate fields by a single tab -instead of arbitrary space. -.It Fl L -Display real paths for vdevs resolving all symbolic links. -This can be used to look up the current block device name regardless of the -/dev/disk/ path used to open it. -.It Fl p -Display numbers in parsable -.Pq exact -values. -.It Fl P -Display full paths for vdevs instead of only the last component of -the path. -This can be used in conjunction with the -.Fl L -flag. -.It Fl v -Verbose statistics. Reports usage statistics for individual -.Em vdevs -within -the pool, in addition to the pool-wide statistics. -.It Fl o Ar property Ns Op , Ns Ar ... -Comma-separated list of properties to display. See the -.Qq Sx Properties -section for a list of valid properties. The default list is -.Sy name , -.Sy size , -.Sy allocated , -.Sy free , -.Sy checkpoint , -.Sy expandsize , -.Sy fragmentation , -.Sy capacity , -.Sy dedupratio , -.Sy health , -.Sy altroot . -.It Fl T Cm d Ns | Ns Cm u -Print a timestamp. -.Pp -Use modifier -.Cm d -for standard date format. See -.Xr date 1 . -Use modifier -.Cm u -for unixtime -.Pq equals Qq Ic date +%s . -.El -.It Xo -.Nm -.Cm offline -.Op Fl t -.Ar pool device ... -.Xc -.Pp -Takes the specified physical device offline. While the -.Ar device -is offline, no attempt is made to read or write to the device. -.Bl -tag -width indent -.It Fl t -Temporary. Upon reboot, the specified physical device reverts to its previous -state. -.El -.It Xo -.Nm -.Cm online -.Op Fl e -.Ar pool device ... -.Xc -.Pp -Brings the specified physical device online. -.Pp -This command is not applicable to spares or cache devices. -.Bl -tag -width indent -.It Fl e -Expand the device to use all available space. If the device is part of a mirror -or -.No raidz -then all devices must be expanded before the new space will become -available to the pool. -.El -.It Xo -.Nm -.Cm reguid -.Ar pool -.Xc -.Pp -Generates a new unique identifier for the pool. You must ensure that all -devices in this pool are online and healthy before performing this action. -.It Xo -.Nm -.Cm remove -.Op Fl np -.Ar pool device ... -.Xc -.Pp -Removes the specified device from the pool. -This command currently only supports removing hot spares, cache, log -devices and mirrored top-level vdevs (mirror of leaf devices); but not raidz. -.Pp -Removing a top-level vdev reduces the total amount of space in the storage pool. -The specified device will be evacuated by copying all allocated space from it to -the other devices in the pool. -In this case, the -.Nm zpool Cm remove -command initiates the removal and returns, while the evacuation continues in -the background. -The removal progress can be monitored with -.Nm zpool Cm status. -This feature must be enabled to be used, see -.Xr zpool-features 7 -.Pp -A mirrored top-level device (log or data) can be removed by specifying the -top-level mirror for the same. -Non-log devices or data devices that are part of a mirrored configuration can -be removed using the -.Qq Nm Cm detach -command. -.Bl -tag -width Ds -.It Fl n -Do not actually perform the removal ("no-op"). -Instead, print the estimated amount of memory that will be used by the -mapping table after the removal completes. -This is nonzero only for top-level vdevs. -.El -.Bl -tag -width Ds -.It Fl p -Used in conjunction with the -.Fl n -flag, displays numbers as parsable (exact) values. -.El -.It Xo -.Nm -.Cm remove -.Fl s -.Ar pool -.Xc -.Pp -Stops and cancels an in-progress removal of a top-level vdev. -.It Xo -.Nm -.Cm reopen -.Ar pool -.Xc -.Pp -Reopen all the vdevs associated with the pool. -.It Xo -.Nm -.Cm replace -.Op Fl f -.Ar pool device -.Op Ar new_device -.Xc -.Pp -Replaces -.Ar old_device -with -.Ar new_device . -This is equivalent to attaching -.Ar new_device , -waiting for it to resilver, and then detaching -.Ar old_device . -.Pp -The size of -.Ar new_device -must be greater than or equal to the minimum size -of all the devices in a mirror or -.No raidz -configuration. -.Pp -.Ar new_device -is required if the pool is not redundant. If -.Ar new_device -is not specified, it defaults to -.Ar old_device . -This form of replacement is useful after an existing disk has failed and has -been physically replaced. In this case, the new disk may have the same -.Pa /dev -path as the old device, even though it is actually a different disk. -.Tn ZFS -recognizes this. -.Bl -tag -width indent -.It Fl f -Forces use of -.Ar new_device , -even if its appears to be in use. Not all devices can be overridden in this -manner. -.El -.It Xo -.Nm -.Cm scrub -.Op Fl s | Fl p -.Ar pool ... -.Xc -.Pp -Begins a scrub or resumes a paused scrub. -The scrub examines all data in the specified pools to verify that it checksums -correctly. -For replicated -.Pq mirror or raidz -devices, ZFS automatically repairs any damage discovered during the scrub. -The -.Nm zpool Cm status -command reports the progress of the scrub and summarizes the results of the -scrub upon completion. -.Pp -Scrubbing and resilvering are very similar operations. -The difference is that resilvering only examines data that ZFS knows to be out -of date -.Po -for example, when attaching a new device to a mirror or replacing an existing -device -.Pc , -whereas scrubbing examines all data to discover silent errors due to hardware -faults or disk failure. -.Pp -Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows -one at a time. -If a scrub is paused, the -.Nm zpool Cm scrub -resumes it. -If a resilver is in progress, ZFS does not allow a scrub to be started until the -resilver completes. -.Bl -tag -width Ds -.It Fl s -Stop scrubbing. -.El -.Bl -tag -width Ds -.It Fl p -Pause scrubbing. -Scrub pause state and progress are periodically synced to disk. -If the system is restarted or pool is exported during a paused scrub, -even after import, scrub will remain paused until it is resumed. -Once resumed the scrub will pick up from the place where it was last -checkpointed to disk. -To resume a paused scrub issue -.Nm zpool Cm scrub -again. -.El -.It Xo -.Nm -.Cm set -.Ar property Ns = Ns Ar value pool -.Xc -.Pp -Sets the given property on the specified pool. See the -.Qq Sx Properties -section for more information on what properties can be set and acceptable -values. -.It Xo -.Nm -.Cm split -.Op Fl gLnP -.Op Fl R Ar altroot -.Op Fl o Ar mntopts -.Op Fl o Ar property Ns = Ns Ar value -.Ar pool newpool -.Op Ar device ... -.Xc -.Pp -Splits off one disk from each mirrored top-level -.No vdev -in a pool and creates a new pool from the split-off disks. The original pool -must be made up of one or more mirrors and must not be in the process of -resilvering. The -.Cm split -subcommand chooses the last device in each mirror -.No vdev -unless overridden by a device specification on the command line. -.Pp -When using a -.Ar device -argument, -.Cm split -includes the specified device(s) in a new pool and, should any devices remain -unspecified, assigns the last device in each mirror -.No vdev -to that pool, as it does normally. If you are uncertain about the outcome of a -.Cm split -command, use the -.Fl n -("dry-run") option to ensure your command will have the effect you intend. -.Bl -tag -width indent -.It Fl R Ar altroot -Automatically import the newly created pool after splitting, using the -specified -.Ar altroot -parameter for the new pool's alternate root. See the -.Sy altroot -description in the -.Qq Sx Properties -section, above. -.It Fl g -Display vdev GUIDs instead of the normal device names. -These GUIDs can be used in place of device names for the zpool -detach/offline/remove/replace commands. -.It Fl L -Display real paths for vdevs resolving all symbolic links. -This can be used to look up the current block device name regardless of the -.Pa /dev/disk/ -path used to open it. -.It Fl n -Displays the configuration that would be created without actually splitting the -pool. The actual pool split could still fail due to insufficient privileges or -device status. -.It Fl o Ar mntopts -Comma-separated list of mount options to use when mounting datasets within the -pool. See -.Xr zfs 8 -for a description of dataset properties and mount options. Valid only in -conjunction with the -.Fl R -option. -.It Fl o Ar property Ns = Ns Ar value -Sets the specified property on the new pool. See the -.Qq Sx Properties -section, above, for more information on the available pool properties. -.It Fl P -Display full paths for vdevs instead of only the last component of -the path. -This can be used in conjunction with the -.Fl L -flag. -.El -.It Xo -.Nm -.Cm status -.Op Fl DgLPvx -.Op Fl T Cm d Ns | Ns Cm u -.Op Ar pool -.Ar ... -.Op Ar interval Op Ar count -.Xc -.Pp -Displays the detailed health status for the given pools. If no -.Ar pool -is specified, then the status of each pool in the system is displayed. For more -information on pool and device health, see the -.Qq Sx Device Failure and Recovery -section. -.Pp -When given an interval, the output is printed every -.Ar interval -seconds until -.Sy Ctrl-C -is pressed. If -.Ar count -is specified, the command exits after -.Ar count -reports are printed. -.Pp -If a scrub or resilver is in progress, this command reports the percentage -done and the estimated time to completion. Both of these are only approximate, -because the amount of data in the pool and the other workloads on the system -can change. -.Bl -tag -width indent -.It Fl D -Display a histogram of deduplication statistics, showing the allocated -.Pq physically present on disk -and referenced -.Pq logically referenced in the pool -block counts and sizes by reference count. -.It Fl g -Display vdev GUIDs instead of the normal device names. -These GUIDs can be used in place of device names for the zpool -detach/offline/remove/replace commands. -.It Fl L -Display real paths for vdevs resolving all symbolic links. -This can be used to look up the current block device name regardless of the -.Pa /dev/disk/ -path used to open it. -.It Fl P -Display full paths for vdevs instead of only the last component of -the path. -This can be used in conjunction with the -.Fl L -flag. -.It Fl T Cm d Ns | Ns Cm u -Print a timestamp. -.Pp -Use modifier -.Cm d -for standard date format. See -.Xr date 1 . -Use modifier -.Cm u -for unixtime -.Pq equals Qq Ic date +%s . -.It Fl v -Displays verbose data error information, printing out a complete list of all -data errors since the last complete pool scrub. -.It Fl x -Only display status for pools that are exhibiting errors or are otherwise -unavailable. -Warnings about pools not using the latest on-disk format, having non-native -block size or disabled features will not be included. -.El -.It Xo -.Nm -.Cm sync -.Oo Ar pool Oc Ns ... -.Xc -Forces all in-core dirty data to be written to the primary pool storage and -not the ZIL. -It will also update administrative information including quota reporting. -Without arguments, -.Nm zpool Cm sync -will sync all pools on the system. -Otherwise, it will only sync the specified -.Ar pool . -.It Xo -.Nm -.Cm upgrade -.Op Fl v -.Xc -.Pp -Displays pools which do not have all supported features enabled and pools -formatted using a legacy -.Tn ZFS -version number. -These pools can continue to be used, but some features may not be available. -Use -.Nm Cm upgrade Fl a -to enable all features on all pools. -.Bl -tag -width indent -.It Fl v -Displays legacy -.Tn ZFS -versions supported by the current software. -See -.Xr zpool-features 7 -for a description of feature flags features supported by the current software. -.El -.It Xo -.Nm -.Cm upgrade -.Op Fl V Ar version -.Fl a | Ar pool ... -.Xc -.Pp -Enables all supported features on the given pool. -Once this is done, the pool will no longer be accessible on systems that do -not support feature flags. -See -.Xr zpool-features 7 -for details on compatibility with systems that support feature flags, but do -not support all features enabled on the pool. -.Bl -tag -width indent -.It Fl a -Enables all supported features on all pools. -.It Fl V Ar version -Upgrade to the specified legacy version. If the -.Fl V -flag is specified, no features will be enabled on the pool. -This option can only be used to increase version number up to the last -supported legacy version number. -.El -.El -.Sh EXIT STATUS -The following exit values are returned: -.Bl -tag -offset 2n -width 2n -.It 0 -Successful completion. -.It 1 -An error occurred. -.It 2 -Invalid command line options were specified. -.El -.Sh ENVIRONMENT VARIABLES -.Bl -tag -width "ZPOOL_VDEV_NAME_FOLLOW_LINKS" -.It Ev ZPOOL_VDEV_NAME_GUID -Cause -.Nm zpool -subcommands to output vdev guids by default. -This behavior is identical to the -.Nm zpool status -g -command line option. -.It Ev ZPOOL_VDEV_NAME_FOLLOW_LINKS -Cause -.Nm zpool -subcommands to follow links for vdev names by default. -This behavior is identical to the -.Nm zpool status -L -command line option. -.It Ev ZPOOL_VDEV_NAME_PATH -Cause -.Nm zpool -subcommands to output full vdev path names by default. -This behavior is identical to the -.Nm zpool status -P -command line option. -.El -.Sh EXAMPLES -.Bl -tag -width 0n -.It Sy Example 1 No Creating a RAID-Z Storage Pool -.Pp -The following command creates a pool with a single -.No raidz -root -.No vdev -that consists of six disks. -.Bd -literal -offset 2n -.Li # Ic zpool create tank raidz da0 da1 da2 da3 da4 da5 -.Ed -.It Sy Example 2 No Creating a Mirrored Storage Pool -.Pp -The following command creates a pool with two mirrors, where each mirror -contains two disks. -.Bd -literal -offset 2n -.Li # Ic zpool create tank mirror da0 da1 mirror da2 da3 -.Ed -.It Sy Example 3 No Creating a Tn ZFS No Storage Pool by Using Partitions -.Pp -The following command creates an unmirrored pool using two GPT partitions. -.Bd -literal -offset 2n -.Li # Ic zpool create tank da0p3 da1p3 -.Ed -.It Sy Example 4 No Creating a Tn ZFS No Storage Pool by Using Files -.Pp -The following command creates an unmirrored pool using files. While not -recommended, a pool based on files can be useful for experimental purposes. -.Bd -literal -offset 2n -.Li # Ic zpool create tank /path/to/file/a /path/to/file/b -.Ed -.It Sy Example 5 No Adding a Mirror to a Tn ZFS No Storage Pool -.Pp -The following command adds two mirrored disks to the pool -.Em tank , -assuming the pool is already made up of two-way mirrors. The additional space -is immediately available to any datasets within the pool. -.Bd -literal -offset 2n -.Li # Ic zpool add tank mirror da2 da3 -.Ed -.It Sy Example 6 No Listing Available Tn ZFS No Storage Pools -.Pp -The following command lists all available pools on the system. -.Bd -literal -offset 2n -.Li # Ic zpool list -NAME SIZE ALLOC FREE FRAG EXPANDSZ CAP DEDUP HEALTH ALTROOT -pool 2.70T 473G 2.24T 33% - 17% 1.00x ONLINE - -test 1.98G 89.5K 1.98G 48% - 0% 1.00x ONLINE - -.Ed -.It Sy Example 7 No Listing All Properties for a Pool -.Pp -The following command lists all the properties for a pool. -.Bd -literal -offset 2n -.Li # Ic zpool get all pool -pool size 2.70T - -pool capacity 17% - -pool altroot - default -pool health ONLINE - -pool guid 2501120270416322443 default -pool version 28 default -pool bootfs pool/root local -pool delegation on default -pool autoreplace off default -pool cachefile - default -pool failmode wait default -pool listsnapshots off default -pool autoexpand off default -pool dedupditto 0 default -pool dedupratio 1.00x - -pool free 2.24T - -pool allocated 473G - -pool readonly off - -.Ed -.It Sy Example 8 No Destroying a Tn ZFS No Storage Pool -.Pp -The following command destroys the pool -.Qq Em tank -and any datasets contained within. -.Bd -literal -offset 2n -.Li # Ic zpool destroy -f tank -.Ed -.It Sy Example 9 No Exporting a Tn ZFS No Storage Pool -.Pp -The following command exports the devices in pool -.Em tank -so that they can be relocated or later imported. -.Bd -literal -offset 2n -.Li # Ic zpool export tank -.Ed -.It Sy Example 10 No Importing a Tn ZFS No Storage Pool -.Pp -The following command displays available pools, and then imports the pool -.Qq Em tank -for use on the system. -.Pp -The results from this command are similar to the following: -.Bd -literal -offset 2n -.Li # Ic zpool import - - pool: tank - id: 15451357997522795478 - state: ONLINE -action: The pool can be imported using its name or numeric identifier. -config: - - tank ONLINE - mirror ONLINE - da0 ONLINE - da1 ONLINE -.Ed -.It Xo -.Sy Example 11 -Upgrading All -.Tn ZFS -Storage Pools to the Current Version -.Xc -.Pp -The following command upgrades all -.Tn ZFS -Storage pools to the current version of -the software. -.Bd -literal -offset 2n -.Li # Ic zpool upgrade -a -This system is currently running ZFS pool version 28. -.Ed -.It Sy Example 12 No Managing Hot Spares -.Pp -The following command creates a new pool with an available hot spare: -.Bd -literal -offset 2n -.Li # Ic zpool create tank mirror da0 da1 spare da2 -.Ed -.Pp -If one of the disks were to fail, the pool would be reduced to the degraded -state. The failed device can be replaced using the following command: -.Bd -literal -offset 2n -.Li # Ic zpool replace tank da0 da2 -.Ed -.Pp -Once the data has been resilvered, the spare is automatically removed and is -made available should another device fails. The hot spare can be permanently -removed from the pool using the following command: -.Bd -literal -offset 2n -.Li # Ic zpool remove tank da2 -.Ed -.It Xo -.Sy Example 13 -Creating a -.Tn ZFS -Pool with Mirrored Separate Intent Logs -.Xc -.Pp -The following command creates a -.Tn ZFS -storage pool consisting of two, two-way -mirrors and mirrored log devices: -.Bd -literal -offset 2n -.Li # Ic zpool create pool mirror da0 da1 mirror da2 da3 log mirror da4 da5 -.Ed -.It Sy Example 14 No Adding Cache Devices to a Tn ZFS No Pool -.Pp -The following command adds two disks for use as cache devices to a -.Tn ZFS -storage pool: -.Bd -literal -offset 2n -.Li # Ic zpool add pool cache da2 da3 -.Ed -.Pp -Once added, the cache devices gradually fill with content from main memory. -Depending on the size of your cache devices, it could take over an hour for -them to fill. Capacity and reads can be monitored using the -.Cm iostat -subcommand as follows: -.Bd -literal -offset 2n -.Li # Ic zpool iostat -v pool 5 -.Ed -.It Xo -.Sy Example 15 -Displaying expanded space on a device -.Xc -.Pp -The following command dipslays the detailed information for the -.Em data -pool. -This pool is comprised of a single -.Em raidz -vdev where one of its -devices increased its capacity by 10GB. -In this example, the pool will not -be able to utilized this extra capacity until all the devices under the -.Em raidz -vdev have been expanded. -.Bd -literal -offset 2n -.Li # Ic zpool list -v data -NAME SIZE ALLOC FREE FRAG EXPANDSZ CAP DEDUP HEALTH ALTROOT -data 23.9G 14.6G 9.30G 48% - 61% 1.00x ONLINE - - raidz1 23.9G 14.6G 9.30G 48% - - ada0 - - - - - - ada1 - - - - 10G - ada2 - - - - - -.Ed -.It Xo -.Sy Example 16 -Removing a Mirrored top-level (Log or Data) Device -.Xc -.Pp -The following commands remove the mirrored log device -.Sy mirror-2 -and mirrored top-level data device -.Sy mirror-1 . -.Pp -Given this configuration: -.Bd -literal -offset 2n - pool: tank - state: ONLINE - scrub: none requested - config: - - NAME STATE READ WRITE CKSUM - tank ONLINE 0 0 0 - mirror-0 ONLINE 0 0 0 - da0 ONLINE 0 0 0 - da1 ONLINE 0 0 0 - mirror-1 ONLINE 0 0 0 - da2 ONLINE 0 0 0 - da3 ONLINE 0 0 0 - logs - mirror-2 ONLINE 0 0 0 - da4 ONLINE 0 0 0 - da5 ONLINE 0 0 0 -.Ed -.Pp -The command to remove the mirrored log -.Em mirror-2 -is: -.Bd -literal -offset 2n -.Li # Ic zpool remove tank mirror-2 -.Ed -.Pp -The command to remove the mirrored data -.Em mirror-1 -is: -.Bd -literal -offset 2n -.Li # Ic zpool remove tank mirror-1 -.Ed -.It Xo -.Sy Example 17 -Recovering a Faulted -.Tn ZFS -Pool -.Xc -.Pp -If a pool is faulted but recoverable, a message indicating this state is -provided by -.Qq Nm Cm status -if the pool was cached (see the -.Fl c Ar cachefile -argument above), or as part of the error output from a failed -.Qq Nm Cm import -of the pool. -.Pp -Recover a cached pool with the -.Qq Nm Cm clear -command: -.Bd -literal -offset 2n -.Li # Ic zpool clear -F data -Pool data returned to its state as of Tue Sep 08 13:23:35 2009. -Discarded approximately 29 seconds of transactions. -.Ed -.Pp -If the pool configuration was not cached, use -.Qq Nm Cm import -with the recovery mode flag: -.Bd -literal -offset 2n -.Li # Ic zpool import -F data -Pool data returned to its state as of Tue Sep 08 13:23:35 2009. -Discarded approximately 29 seconds of transactions. -.Ed -.El -.Sh SEE ALSO -.Xr zpool-features 7 , -.Xr zfs 8 , -.Xr zfsd 8 -.Sh HISTORY -The -.Nm -utility first appeared in -.Fx 7.0 . -.Sh AUTHORS -This manual page is a -.Xr mdoc 7 -reimplementation of the -.Tn OpenSolaris -manual page -.Em zpool(1M) , -modified and customized for -.Fx -and licensed under the Common Development and Distribution License -.Pq Tn CDDL . -.Pp -The -.Xr mdoc 7 -implementation of this manual page was initially written by -.An Martin Matuska Aq mm@FreeBSD.org . diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c deleted file mode 100644 index 2f7de933ed41..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. - */ - -#include <solaris.h> -#include <libintl.h> -#include <libuutil.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> - -#include <libzfs.h> - -#include "zpool_util.h" - -/* - * Private interface for iterating over pools specified on the command line. - * Most consumers will call for_each_pool, but in order to support iostat, we - * allow fined grained control through the zpool_list_t interface. - */ - -typedef struct zpool_node { - zpool_handle_t *zn_handle; - uu_avl_node_t zn_avlnode; - int zn_mark; -} zpool_node_t; - -struct zpool_list { - boolean_t zl_findall; - uu_avl_t *zl_avl; - uu_avl_pool_t *zl_pool; - zprop_list_t **zl_proplist; -}; - -/* ARGSUSED */ -static int -zpool_compare(const void *larg, const void *rarg, void *unused) -{ - zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle; - zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle; - const char *lname = zpool_get_name(l); - const char *rname = zpool_get_name(r); - - return (strcmp(lname, rname)); -} - -/* - * Callback function for pool_list_get(). Adds the given pool to the AVL tree - * of known pools. - */ -static int -add_pool(zpool_handle_t *zhp, void *data) -{ - zpool_list_t *zlp = data; - zpool_node_t *node = safe_malloc(sizeof (zpool_node_t)); - uu_avl_index_t idx; - - node->zn_handle = zhp; - uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool); - if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) { - if (zlp->zl_proplist && - zpool_expand_proplist(zhp, zlp->zl_proplist) != 0) { - zpool_close(zhp); - free(node); - return (-1); - } - uu_avl_insert(zlp->zl_avl, node, idx); - } else { - zpool_close(zhp); - free(node); - return (-1); - } - - return (0); -} - -/* - * Create a list of pools based on the given arguments. If we're given no - * arguments, then iterate over all pools in the system and add them to the AVL - * tree. Otherwise, add only those pool explicitly specified on the command - * line. - */ -zpool_list_t * -pool_list_get(int argc, char **argv, zprop_list_t **proplist, int *err) -{ - zpool_list_t *zlp; - - zlp = safe_malloc(sizeof (zpool_list_t)); - - zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t), - offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT); - - if (zlp->zl_pool == NULL) - zpool_no_memory(); - - if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL, - UU_DEFAULT)) == NULL) - zpool_no_memory(); - - zlp->zl_proplist = proplist; - - if (argc == 0) { - (void) zpool_iter(g_zfs, add_pool, zlp); - zlp->zl_findall = B_TRUE; - } else { - int i; - - for (i = 0; i < argc; i++) { - zpool_handle_t *zhp; - - if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != - NULL) { - if (add_pool(zhp, zlp) != 0) - *err = B_TRUE; - } else { - *err = B_TRUE; - } - } - } - - return (zlp); -} - -/* - * Search for any new pools, adding them to the list. We only add pools when no - * options were given on the command line. Otherwise, we keep the list fixed as - * those that were explicitly specified. - */ -void -pool_list_update(zpool_list_t *zlp) -{ - if (zlp->zl_findall) - (void) zpool_iter(g_zfs, add_pool, zlp); -} - -/* - * Iterate over all pools in the list, executing the callback for each - */ -int -pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func, - void *data) -{ - zpool_node_t *node, *next_node; - int ret = 0; - - for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) { - next_node = uu_avl_next(zlp->zl_avl, node); - if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL || - unavail) - ret |= func(node->zn_handle, data); - } - - return (ret); -} - -/* - * Remove the given pool from the list. When running iostat, we want to remove - * those pools that no longer exist. - */ -void -pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp) -{ - zpool_node_t search, *node; - - search.zn_handle = zhp; - if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) { - uu_avl_remove(zlp->zl_avl, node); - zpool_close(node->zn_handle); - free(node); - } -} - -/* - * Free all the handles associated with this list. - */ -void -pool_list_free(zpool_list_t *zlp) -{ - uu_avl_walk_t *walk; - zpool_node_t *node; - - if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) { - (void) fprintf(stderr, - gettext("internal error: out of memory")); - exit(1); - } - - while ((node = uu_avl_walk_next(walk)) != NULL) { - uu_avl_remove(zlp->zl_avl, node); - zpool_close(node->zn_handle); - free(node); - } - - uu_avl_walk_end(walk); - uu_avl_destroy(zlp->zl_avl); - uu_avl_pool_destroy(zlp->zl_pool); - - free(zlp); -} - -/* - * Returns the number of elements in the pool list. - */ -int -pool_list_count(zpool_list_t *zlp) -{ - return (uu_avl_numnodes(zlp->zl_avl)); -} - -/* - * High level function which iterates over all pools given on the command line, - * using the pool_list_* interfaces. - */ -int -for_each_pool(int argc, char **argv, boolean_t unavail, - zprop_list_t **proplist, zpool_iter_f func, void *data) -{ - zpool_list_t *list; - int ret = 0; - - if ((list = pool_list_get(argc, argv, proplist, &ret)) == NULL) - return (1); - - if (pool_list_iter(list, unavail, func, data) != 0) - ret = 1; - - pool_list_free(list); - - return (ret); -} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c deleted file mode 100644 index 35c3db7893df..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c +++ /dev/null @@ -1,6742 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2012 by Frederik Wessels. All rights reserved. - * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. - * Copyright 2016 Nexenta Systems, Inc. - * Copyright (c) 2017 Datto Inc. - * Copyright (c) 2017, Intel Corporation. - */ - -#include <solaris.h> -#include <assert.h> -#include <ctype.h> -#include <dirent.h> -#include <errno.h> -#include <fcntl.h> -#include <getopt.h> -#include <libgen.h> -#include <libintl.h> -#include <libuutil.h> -#include <locale.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <strings.h> -#include <unistd.h> -#include <priv.h> -#include <pwd.h> -#include <zone.h> -#include <sys/time.h> -#include <zfs_prop.h> -#include <sys/fs/zfs.h> -#include <sys/stat.h> -#include <sys/debug.h> - -#include <libzfs.h> - -#include "zpool_util.h" -#include "zfs_comutil.h" -#include "zfeature_common.h" - -#include "statcommon.h" - -libzfs_handle_t *g_zfs; - -static int zpool_do_create(int, char **); -static int zpool_do_destroy(int, char **); - -static int zpool_do_add(int, char **); -static int zpool_do_remove(int, char **); -static int zpool_do_labelclear(int, char **); - -static int zpool_do_checkpoint(int, char **); - -static int zpool_do_list(int, char **); -static int zpool_do_iostat(int, char **); -static int zpool_do_status(int, char **); - -static int zpool_do_online(int, char **); -static int zpool_do_offline(int, char **); -static int zpool_do_clear(int, char **); -static int zpool_do_reopen(int, char **); - -static int zpool_do_reguid(int, char **); - -static int zpool_do_attach(int, char **); -static int zpool_do_detach(int, char **); -static int zpool_do_replace(int, char **); -static int zpool_do_split(int, char **); - -static int zpool_do_initialize(int, char **); -static int zpool_do_scrub(int, char **); - -static int zpool_do_import(int, char **); -static int zpool_do_export(int, char **); - -static int zpool_do_upgrade(int, char **); - -static int zpool_do_history(int, char **); - -static int zpool_do_get(int, char **); -static int zpool_do_set(int, char **); - -static int zpool_do_sync(int, char **); - -/* - * These libumem hooks provide a reasonable set of defaults for the allocator's - * debugging facilities. - */ - -#ifdef DEBUG -const char * -_umem_debug_init(void) -{ - return ("default,verbose"); /* $UMEM_DEBUG setting */ -} - -const char * -_umem_logging_init(void) -{ - return ("fail,contents"); /* $UMEM_LOGGING setting */ -} -#endif - -typedef enum { - HELP_ADD, - HELP_ATTACH, - HELP_CLEAR, - HELP_CREATE, - HELP_CHECKPOINT, - HELP_DESTROY, - HELP_DETACH, - HELP_EXPORT, - HELP_HISTORY, - HELP_IMPORT, - HELP_IOSTAT, - HELP_LABELCLEAR, - HELP_LIST, - HELP_OFFLINE, - HELP_ONLINE, - HELP_REPLACE, - HELP_REMOVE, - HELP_INITIALIZE, - HELP_SCRUB, - HELP_STATUS, - HELP_UPGRADE, - HELP_GET, - HELP_SET, - HELP_SPLIT, - HELP_SYNC, - HELP_REGUID, - HELP_REOPEN -} zpool_help_t; - - -typedef struct zpool_command { - const char *name; - int (*func)(int, char **); - zpool_help_t usage; -} zpool_command_t; - -/* - * Master command table. Each ZFS command has a name, associated function, and - * usage message. The usage messages need to be internationalized, so we have - * to have a function to return the usage message based on a command index. - * - * These commands are organized according to how they are displayed in the usage - * message. An empty command (one with a NULL name) indicates an empty line in - * the generic usage message. - */ -static zpool_command_t command_table[] = { - { "create", zpool_do_create, HELP_CREATE }, - { "destroy", zpool_do_destroy, HELP_DESTROY }, - { NULL }, - { "add", zpool_do_add, HELP_ADD }, - { "remove", zpool_do_remove, HELP_REMOVE }, - { NULL }, - { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, - { NULL }, - { "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT }, - { NULL }, - { "list", zpool_do_list, HELP_LIST }, - { "iostat", zpool_do_iostat, HELP_IOSTAT }, - { "status", zpool_do_status, HELP_STATUS }, - { NULL }, - { "online", zpool_do_online, HELP_ONLINE }, - { "offline", zpool_do_offline, HELP_OFFLINE }, - { "clear", zpool_do_clear, HELP_CLEAR }, - { "reopen", zpool_do_reopen, HELP_REOPEN }, - { NULL }, - { "attach", zpool_do_attach, HELP_ATTACH }, - { "detach", zpool_do_detach, HELP_DETACH }, - { "replace", zpool_do_replace, HELP_REPLACE }, - { "split", zpool_do_split, HELP_SPLIT }, - { NULL }, - { "initialize", zpool_do_initialize, HELP_INITIALIZE }, - { "scrub", zpool_do_scrub, HELP_SCRUB }, - { NULL }, - { "import", zpool_do_import, HELP_IMPORT }, - { "export", zpool_do_export, HELP_EXPORT }, - { "upgrade", zpool_do_upgrade, HELP_UPGRADE }, - { "reguid", zpool_do_reguid, HELP_REGUID }, - { NULL }, - { "history", zpool_do_history, HELP_HISTORY }, - { "get", zpool_do_get, HELP_GET }, - { "set", zpool_do_set, HELP_SET }, - { "sync", zpool_do_sync, HELP_SYNC }, -}; - -#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) - -#define VDEV_ALLOC_CLASS_LOGS "logs" - -static zpool_command_t *current_command; -static char history_str[HIS_MAX_RECORD_LEN]; -static boolean_t log_history = B_TRUE; -static uint_t timestamp_fmt = NODATE; - -static const char * -get_usage(zpool_help_t idx) -{ - switch (idx) { - case HELP_ADD: - return (gettext("\tadd [-fgLnP] <pool> <vdev> ...\n")); - case HELP_ATTACH: - return (gettext("\tattach [-f] <pool> <device> " - "<new-device>\n")); - case HELP_CLEAR: - return (gettext("\tclear [-nF] <pool> [device]\n")); - case HELP_CREATE: - return (gettext("\tcreate [-fnd] [-B] " - "[-o property=value] ... \n" - "\t [-O file-system-property=value] ...\n" - "\t [-m mountpoint] [-R root] [-t tempname] " - "<pool> <vdev> ...\n")); - case HELP_CHECKPOINT: - return (gettext("\tcheckpoint [--discard] <pool> ...\n")); - case HELP_DESTROY: - return (gettext("\tdestroy [-f] <pool>\n")); - case HELP_DETACH: - return (gettext("\tdetach <pool> <device>\n")); - case HELP_EXPORT: - return (gettext("\texport [-f] <pool> ...\n")); - case HELP_HISTORY: - return (gettext("\thistory [-il] [<pool>] ...\n")); - case HELP_IMPORT: - return (gettext("\timport [-d dir] [-D]\n" - "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " - "[-R root] [-F [-n]] -a\n" - "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " - "[-R root] [-F [-n]] [-t]\n" - "\t [--rewind-to-checkpoint] <pool | id> [newpool]\n")); - case HELP_IOSTAT: - return (gettext("\tiostat [-gLPv] [-T d|u] [pool] ... " - "[interval [count]]\n")); - case HELP_LABELCLEAR: - return (gettext("\tlabelclear [-f] <vdev>\n")); - case HELP_LIST: - return (gettext("\tlist [-gHLpPv] [-o property[,...]] " - "[-T d|u] [pool] ... [interval [count]]\n")); - case HELP_OFFLINE: - return (gettext("\toffline [-t] <pool> <device> ...\n")); - case HELP_ONLINE: - return (gettext("\tonline [-e] <pool> <device> ...\n")); - case HELP_REPLACE: - return (gettext("\treplace [-f] <pool> <device> " - "[new-device]\n")); - case HELP_REMOVE: - return (gettext("\tremove [-nps] <pool> <device> ...\n")); - case HELP_REOPEN: - return (gettext("\treopen <pool>\n")); - case HELP_INITIALIZE: - return (gettext("\tinitialize [-cs] <pool> [<device> ...]\n")); - case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] <pool> ...\n")); - case HELP_STATUS: - return (gettext("\tstatus [-DgLPvx] [-T d|u] [pool] ... " - "[interval [count]]\n")); - case HELP_UPGRADE: - return (gettext("\tupgrade [-v]\n" - "\tupgrade [-V version] <-a | pool ...>\n")); - case HELP_GET: - return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] " - "<\"all\" | property[,...]> <pool> ...\n")); - case HELP_SET: - return (gettext("\tset <property=value> <pool> \n")); - case HELP_SPLIT: - return (gettext("\tsplit [-gLnP] [-R altroot] [-o mntopts]\n" - "\t [-o property=value] <pool> <newpool> " - "[<device> ...]\n")); - case HELP_REGUID: - return (gettext("\treguid <pool>\n")); - case HELP_SYNC: - return (gettext("\tsync [pool] ...\n")); - } - - abort(); - /* NOTREACHED */ -} - - -/* - * Callback routine that will print out a pool property value. - */ -static int -print_prop_cb(int prop, void *cb) -{ - FILE *fp = cb; - - (void) fprintf(fp, "\t%-19s ", zpool_prop_to_name(prop)); - - if (zpool_prop_readonly(prop)) - (void) fprintf(fp, " NO "); - else - (void) fprintf(fp, " YES "); - - if (zpool_prop_values(prop) == NULL) - (void) fprintf(fp, "-\n"); - else - (void) fprintf(fp, "%s\n", zpool_prop_values(prop)); - - return (ZPROP_CONT); -} - -/* - * Display usage message. If we're inside a command, display only the usage for - * that command. Otherwise, iterate over the entire command table and display - * a complete usage message. - */ -void -usage(boolean_t requested) -{ - FILE *fp = requested ? stdout : stderr; - - if (current_command == NULL) { - int i; - - (void) fprintf(fp, gettext("usage: zpool command args ...\n")); - (void) fprintf(fp, - gettext("where 'command' is one of the following:\n\n")); - - for (i = 0; i < NCOMMAND; i++) { - if (command_table[i].name == NULL) - (void) fprintf(fp, "\n"); - else - (void) fprintf(fp, "%s", - get_usage(command_table[i].usage)); - } - } else { - (void) fprintf(fp, gettext("usage:\n")); - (void) fprintf(fp, "%s", get_usage(current_command->usage)); - } - - if (current_command != NULL && - ((strcmp(current_command->name, "set") == 0) || - (strcmp(current_command->name, "get") == 0) || - (strcmp(current_command->name, "list") == 0))) { - - (void) fprintf(fp, - gettext("\nthe following properties are supported:\n")); - - (void) fprintf(fp, "\n\t%-19s %s %s\n\n", - "PROPERTY", "EDIT", "VALUES"); - - /* Iterate over all properties */ - (void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE, - ZFS_TYPE_POOL); - - (void) fprintf(fp, "\t%-19s ", "feature@..."); - (void) fprintf(fp, "YES disabled | enabled | active\n"); - - (void) fprintf(fp, gettext("\nThe feature@ properties must be " - "appended with a feature name.\nSee zpool-features(7).\n")); - } - - /* - * See comments at end of main(). - */ - if (getenv("ZFS_ABORT") != NULL) { - (void) printf("dumping core by request\n"); - abort(); - } - - exit(requested ? 0 : 2); -} - -/* - * print a pool vdev config for dry runs - */ -static void -print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent, - const char *match, int name_flags) -{ - nvlist_t **child; - uint_t c, children; - char *vname; - boolean_t printed = B_FALSE; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) { - if (name != NULL) - (void) printf("\t%*s%s\n", indent, "", name); - return; - } - - for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE; - char *class = ""; - - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - if (is_log) - class = VDEV_ALLOC_BIAS_LOG; - (void) nvlist_lookup_string(child[c], - ZPOOL_CONFIG_ALLOCATION_BIAS, &class); - if (strcmp(match, class) != 0) - continue; - - if (!printed && name != NULL) { - (void) printf("\t%*s%s\n", indent, "", name); - printed = B_TRUE; - } - vname = zpool_vdev_name(g_zfs, zhp, child[c], name_flags); - print_vdev_tree(zhp, vname, child[c], indent + 2, "", - name_flags); - free(vname); - } -} - -static boolean_t -prop_list_contains_feature(nvlist_t *proplist) -{ - nvpair_t *nvp; - for (nvp = nvlist_next_nvpair(proplist, NULL); NULL != nvp; - nvp = nvlist_next_nvpair(proplist, nvp)) { - if (zpool_prop_feature(nvpair_name(nvp))) - return (B_TRUE); - } - return (B_FALSE); -} - -/* - * Add a property pair (name, string-value) into a property nvlist. - */ -static int -add_prop_list(const char *propname, char *propval, nvlist_t **props, - boolean_t poolprop) -{ - zpool_prop_t prop = ZPROP_INVAL; - zfs_prop_t fprop; - nvlist_t *proplist; - const char *normnm; - char *strval; - - if (*props == NULL && - nvlist_alloc(props, NV_UNIQUE_NAME, 0) != 0) { - (void) fprintf(stderr, - gettext("internal error: out of memory\n")); - return (1); - } - - proplist = *props; - - if (poolprop) { - const char *vname = zpool_prop_to_name(ZPOOL_PROP_VERSION); - - if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL && - !zpool_prop_feature(propname)) { - (void) fprintf(stderr, gettext("property '%s' is " - "not a valid pool property\n"), propname); - return (2); - } - - /* - * feature@ properties and version should not be specified - * at the same time. - */ - if ((prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname) && - nvlist_exists(proplist, vname)) || - (prop == ZPOOL_PROP_VERSION && - prop_list_contains_feature(proplist))) { - (void) fprintf(stderr, gettext("'feature@' and " - "'version' properties cannot be specified " - "together\n")); - return (2); - } - - - if (zpool_prop_feature(propname)) - normnm = propname; - else - normnm = zpool_prop_to_name(prop); - } else { - if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) { - normnm = zfs_prop_to_name(fprop); - } else { - normnm = propname; - } - } - - if (nvlist_lookup_string(proplist, normnm, &strval) == 0 && - prop != ZPOOL_PROP_CACHEFILE) { - (void) fprintf(stderr, gettext("property '%s' " - "specified multiple times\n"), propname); - return (2); - } - - if (nvlist_add_string(proplist, normnm, propval) != 0) { - (void) fprintf(stderr, gettext("internal " - "error: out of memory\n")); - return (1); - } - - return (0); -} - -/* - * Set a default property pair (name, string-value) in a property nvlist - */ -static int -add_prop_list_default(const char *propname, char *propval, nvlist_t **props, - boolean_t poolprop) -{ - char *pval; - - if (nvlist_lookup_string(*props, propname, &pval) == 0) - return (0); - - return (add_prop_list(propname, propval, props, poolprop)); -} - -/* - * zpool add [-fgLnP] [-o property=value] <pool> <vdev> ... - * - * -f Force addition of devices, even if they appear in use - * -g Display guid for individual vdev name. - * -L Follow links when resolving vdev path name. - * -n Do not add the devices, but display the resulting layout if - * they were to be added. - * -P Display full path for vdev name. - * - * Adds the given vdevs to 'pool'. As with create, the bulk of this work is - * handled by get_vdev_spec(), which constructs the nvlist needed to pass to - * libzfs. - */ -int -zpool_do_add(int argc, char **argv) -{ - boolean_t force = B_FALSE; - boolean_t dryrun = B_FALSE; - int name_flags = 0; - int c; - nvlist_t *nvroot; - char *poolname; - zpool_boot_label_t boot_type; - uint64_t boot_size; - int ret; - zpool_handle_t *zhp; - nvlist_t *config; - - /* check options */ - while ((c = getopt(argc, argv, "fgLnP")) != -1) { - switch (c) { - case 'f': - force = B_TRUE; - break; - case 'g': - name_flags |= VDEV_NAME_GUID; - break; - case 'L': - name_flags |= VDEV_NAME_FOLLOW_LINKS; - break; - case 'n': - dryrun = B_TRUE; - break; - case 'P': - name_flags |= VDEV_NAME_PATH; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(B_FALSE); - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing vdev specification\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - - argc--; - argv++; - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - if ((config = zpool_get_config(zhp, NULL)) == NULL) { - (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), - poolname); - zpool_close(zhp); - return (1); - } - - if (zpool_is_bootable(zhp)) - boot_type = ZPOOL_COPY_BOOT_LABEL; - else - boot_type = ZPOOL_NO_BOOT_LABEL; - - /* pass off to get_vdev_spec for processing */ - boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); - nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun, - boot_type, boot_size, argc, argv); - if (nvroot == NULL) { - zpool_close(zhp); - return (1); - } - - if (dryrun) { - nvlist_t *poolnvroot; - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &poolnvroot) == 0); - - (void) printf(gettext("would update '%s' to the following " - "configuration:\n"), zpool_get_name(zhp)); - - /* print original main pool and new tree */ - print_vdev_tree(zhp, poolname, poolnvroot, 0, "", - name_flags | VDEV_NAME_TYPE_ID); - print_vdev_tree(zhp, NULL, nvroot, 0, "", name_flags); - - /* print other classes: 'dedup', 'special', and 'log' */ - print_vdev_tree(zhp, "dedup", poolnvroot, 0, - VDEV_ALLOC_BIAS_DEDUP, name_flags); - print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_DEDUP, - name_flags); - - print_vdev_tree(zhp, "special", poolnvroot, 0, - VDEV_ALLOC_BIAS_SPECIAL, name_flags); - print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_SPECIAL, - name_flags); - - print_vdev_tree(zhp, "logs", poolnvroot, 0, VDEV_ALLOC_BIAS_LOG, - name_flags); - print_vdev_tree(zhp, NULL, nvroot, 0, VDEV_ALLOC_BIAS_LOG, - name_flags); - - ret = 0; - } else { - ret = (zpool_add(zhp, nvroot) != 0); - } - - nvlist_free(nvroot); - zpool_close(zhp); - - return (ret); -} - -/* - * zpool remove <pool> <vdev> ... - * - * Removes the given vdev from the pool. - */ -int -zpool_do_remove(int argc, char **argv) -{ - char *poolname; - int i, ret = 0; - zpool_handle_t *zhp; - boolean_t stop = B_FALSE; - boolean_t noop = B_FALSE; - boolean_t parsable = B_FALSE; - char c; - - /* check options */ - while ((c = getopt(argc, argv, "nps")) != -1) { - switch (c) { - case 'n': - noop = B_TRUE; - break; - case 'p': - parsable = B_TRUE; - break; - case 's': - stop = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - if (stop && noop) { - (void) fprintf(stderr, gettext("stop request ignored\n")); - return (0); - } - - if (stop) { - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - if (zpool_vdev_remove_cancel(zhp) != 0) - ret = 1; - } else { - if (argc < 2) { - (void) fprintf(stderr, gettext("missing device\n")); - usage(B_FALSE); - } - - for (i = 1; i < argc; i++) { - if (noop) { - uint64_t size; - - if (zpool_vdev_indirect_size(zhp, argv[i], - &size) != 0) { - ret = 1; - break; - } - if (parsable) { - (void) printf("%s %llu\n", - argv[i], size); - } else { - char valstr[32]; - zfs_nicenum(size, valstr, - sizeof (valstr)); - (void) printf("Memory that will be " - "used after removing %s: %s\n", - argv[i], valstr); - } - } else { - if (zpool_vdev_remove(zhp, argv[i]) != 0) - ret = 1; - } - } - } - - return (ret); -} - -/* - * zpool labelclear [-f] <vdev> - * - * -f Force clearing the label for the vdevs which are members of - * the exported or foreign pools. - * - * Verifies that the vdev is not active and zeros out the label information - * on the device. - */ -int -zpool_do_labelclear(int argc, char **argv) -{ - char vdev[MAXPATHLEN]; - char *name = NULL; - struct stat st; - int c, fd, ret = 0; - nvlist_t *config; - pool_state_t state; - boolean_t inuse = B_FALSE; - boolean_t force = B_FALSE; - - /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { - switch (c) { - case 'f': - force = B_TRUE; - break; - default: - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get vdev name */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing vdev name\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - /* - * Check if we were given absolute path and use it as is. - * Otherwise if the provided vdev name doesn't point to a file, - * try prepending dsk path and appending s0. - */ - (void) strlcpy(vdev, argv[0], sizeof (vdev)); - if (vdev[0] != '/' && stat(vdev, &st) != 0) { - char *s; - - (void) snprintf(vdev, sizeof (vdev), "%s/%s", -#ifdef illumos - ZFS_DISK_ROOT, argv[0]); - if ((s = strrchr(argv[0], 's')) == NULL || - !isdigit(*(s + 1))) - (void) strlcat(vdev, "s0", sizeof (vdev)); -#else - "/dev", argv[0]); -#endif - if (stat(vdev, &st) != 0) { - (void) fprintf(stderr, gettext( - "failed to find device %s, try specifying absolute " - "path instead\n"), argv[0]); - return (1); - } - } - - if ((fd = open(vdev, O_RDWR)) < 0) { - (void) fprintf(stderr, gettext("failed to open %s: %s\n"), - vdev, strerror(errno)); - return (1); - } - - if (zpool_read_label(fd, &config) != 0) { - (void) fprintf(stderr, - gettext("failed to read label from %s\n"), vdev); - return (1); - } - nvlist_free(config); - - ret = zpool_in_use(g_zfs, fd, &state, &name, &inuse); - if (ret != 0) { - (void) fprintf(stderr, - gettext("failed to check state for %s\n"), vdev); - return (1); - } - - if (!inuse) - goto wipe_label; - - switch (state) { - default: - case POOL_STATE_ACTIVE: - case POOL_STATE_SPARE: - case POOL_STATE_L2CACHE: - (void) fprintf(stderr, gettext( - "%s is a member (%s) of pool \"%s\"\n"), - vdev, zpool_pool_state_to_name(state), name); - ret = 1; - goto errout; - - case POOL_STATE_EXPORTED: - if (force) - break; - (void) fprintf(stderr, gettext( - "use '-f' to override the following error:\n" - "%s is a member of exported pool \"%s\"\n"), - vdev, name); - ret = 1; - goto errout; - - case POOL_STATE_POTENTIALLY_ACTIVE: - if (force) - break; - (void) fprintf(stderr, gettext( - "use '-f' to override the following error:\n" - "%s is a member of potentially active pool \"%s\"\n"), - vdev, name); - ret = 1; - goto errout; - - case POOL_STATE_DESTROYED: - /* inuse should never be set for a destroyed pool */ - assert(0); - break; - } - -wipe_label: - ret = zpool_clear_label(fd); - if (ret != 0) { - (void) fprintf(stderr, - gettext("failed to clear label for %s\n"), vdev); - } - -errout: - free(name); - (void) close(fd); - - return (ret); -} - -/* - * zpool create [-fnd] [-B] [-o property=value] ... - * [-O file-system-property=value] ... - * [-R root] [-m mountpoint] [-t tempname] <pool> <dev> ... - * - * -B Create boot partition. - * -f Force creation, even if devices appear in use - * -n Do not create the pool, but display the resulting layout if it - * were to be created. - * -R Create a pool under an alternate root - * -m Set default mountpoint for the root dataset. By default it's - * '/<pool>' - * -t Use the temporary name until the pool is exported. - * -o Set property=value. - * -d Don't automatically enable all supported pool features - * (individual features can be enabled with -o). - * -O Set fsproperty=value in the pool's root file system - * - * Creates the named pool according to the given vdev specification. The - * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once - * we get the nvlist back from get_vdev_spec(), we either print out the contents - * (if '-n' was specified), or pass it to libzfs to do the creation. - */ - -#define SYSTEM256 (256 * 1024 * 1024) -int -zpool_do_create(int argc, char **argv) -{ - boolean_t force = B_FALSE; - boolean_t dryrun = B_FALSE; - boolean_t enable_all_pool_feat = B_TRUE; - zpool_boot_label_t boot_type = ZPOOL_NO_BOOT_LABEL; - uint64_t boot_size = 0; - int c; - nvlist_t *nvroot = NULL; - char *poolname; - char *tname = NULL; - int ret = 1; - char *altroot = NULL; - char *mountpoint = NULL; - nvlist_t *fsprops = NULL; - nvlist_t *props = NULL; - char *propval; - - /* check options */ - while ((c = getopt(argc, argv, ":fndBR:m:o:O:t:")) != -1) { - switch (c) { - case 'f': - force = B_TRUE; - break; - case 'n': - dryrun = B_TRUE; - break; - case 'd': - enable_all_pool_feat = B_FALSE; - break; - case 'B': -#ifdef illumos - /* - * We should create the system partition. - * Also make sure the size is set. - */ - boot_type = ZPOOL_CREATE_BOOT_LABEL; - if (boot_size == 0) - boot_size = SYSTEM256; - break; -#else - (void) fprintf(stderr, - gettext("option '%c' is not supported\n"), - optopt); - goto badusage; -#endif - case 'R': - altroot = optarg; - if (add_prop_list(zpool_prop_to_name( - ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE)) - goto errout; - if (add_prop_list_default(zpool_prop_to_name( - ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) - goto errout; - break; - case 'm': - /* Equivalent to -O mountpoint=optarg */ - mountpoint = optarg; - break; - case 'o': - if ((propval = strchr(optarg, '=')) == NULL) { - (void) fprintf(stderr, gettext("missing " - "'=' for -o option\n")); - goto errout; - } - *propval = '\0'; - propval++; - - if (add_prop_list(optarg, propval, &props, B_TRUE)) - goto errout; - - /* - * Get bootsize value for make_root_vdev(). - */ - if (zpool_name_to_prop(optarg) == ZPOOL_PROP_BOOTSIZE) { - if (zfs_nicestrtonum(g_zfs, propval, - &boot_size) < 0 || boot_size == 0) { - (void) fprintf(stderr, - gettext("bad boot partition size " - "'%s': %s\n"), propval, - libzfs_error_description(g_zfs)); - goto errout; - } - } - - /* - * If the user is creating a pool that doesn't support - * feature flags, don't enable any features. - */ - if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) { - char *end; - u_longlong_t ver; - - ver = strtoull(propval, &end, 10); - if (*end == '\0' && - ver < SPA_VERSION_FEATURES) { - enable_all_pool_feat = B_FALSE; - } - } - if (zpool_name_to_prop(optarg) == ZPOOL_PROP_ALTROOT) - altroot = propval; - break; - case 'O': - if ((propval = strchr(optarg, '=')) == NULL) { - (void) fprintf(stderr, gettext("missing " - "'=' for -O option\n")); - goto errout; - } - *propval = '\0'; - propval++; - - /* - * Mountpoints are checked and then added later. - * Uniquely among properties, they can be specified - * more than once, to avoid conflict with -m. - */ - if (0 == strcmp(optarg, - zfs_prop_to_name(ZFS_PROP_MOUNTPOINT))) { - mountpoint = propval; - } else if (add_prop_list(optarg, propval, &fsprops, - B_FALSE)) { - goto errout; - } - break; - case 't': - /* - * Sanity check temporary pool name. - */ - if (strchr(optarg, '/') != NULL) { - (void) fprintf(stderr, gettext("cannot create " - "'%s': invalid character '/' in temporary " - "name\n"), optarg); - (void) fprintf(stderr, gettext("use 'zfs " - "create' to create a dataset\n")); - goto errout; - } - - if (add_prop_list(zpool_prop_to_name( - ZPOOL_PROP_TNAME), optarg, &props, B_TRUE)) - goto errout; - if (add_prop_list_default(zpool_prop_to_name( - ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) - goto errout; - tname = optarg; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - goto badusage; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - goto badusage; - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - goto badusage; - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing vdev specification\n")); - goto badusage; - } - - poolname = argv[0]; - - /* - * As a special case, check for use of '/' in the name, and direct the - * user to use 'zfs create' instead. - */ - if (strchr(poolname, '/') != NULL) { - (void) fprintf(stderr, gettext("cannot create '%s': invalid " - "character '/' in pool name\n"), poolname); - (void) fprintf(stderr, gettext("use 'zfs create' to " - "create a dataset\n")); - goto errout; - } - - /* - * Make sure the bootsize is set when ZPOOL_CREATE_BOOT_LABEL is used, - * and not set otherwise. - */ - if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { - const char *propname; - char *strptr, *buf = NULL; - int rv; - - propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); - if (nvlist_lookup_string(props, propname, &strptr) != 0) { - (void) asprintf(&buf, "%" PRIu64, boot_size); - if (buf == NULL) { - (void) fprintf(stderr, - gettext("internal error: out of memory\n")); - goto errout; - } - rv = add_prop_list(propname, buf, &props, B_TRUE); - free(buf); - if (rv != 0) - goto errout; - } - } else { - const char *propname; - char *strptr; - - propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); - if (nvlist_lookup_string(props, propname, &strptr) == 0) { - (void) fprintf(stderr, gettext("error: setting boot " - "partition size requires option '-B'\n")); - goto errout; - } - } - - /* pass off to get_vdev_spec for bulk processing */ - nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun, - boot_type, boot_size, argc - 1, argv + 1); - if (nvroot == NULL) - goto errout; - - /* make_root_vdev() allows 0 toplevel children if there are spares */ - if (!zfs_allocatable_devs(nvroot)) { - (void) fprintf(stderr, gettext("invalid vdev " - "specification: at least one toplevel vdev must be " - "specified\n")); - goto errout; - } - - if (altroot != NULL && altroot[0] != '/') { - (void) fprintf(stderr, gettext("invalid alternate root '%s': " - "must be an absolute path\n"), altroot); - goto errout; - } - - /* - * Check the validity of the mountpoint and direct the user to use the - * '-m' mountpoint option if it looks like its in use. - * Ignore the checks if the '-f' option is given. - */ - if (!force && (mountpoint == NULL || - (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && - strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0))) { - char buf[MAXPATHLEN]; - DIR *dirp; - - if (mountpoint && mountpoint[0] != '/') { - (void) fprintf(stderr, gettext("invalid mountpoint " - "'%s': must be an absolute path, 'legacy', or " - "'none'\n"), mountpoint); - goto errout; - } - - if (mountpoint == NULL) { - if (altroot != NULL) - (void) snprintf(buf, sizeof (buf), "%s/%s", - altroot, poolname); - else - (void) snprintf(buf, sizeof (buf), "/%s", - poolname); - } else { - if (altroot != NULL) - (void) snprintf(buf, sizeof (buf), "%s%s", - altroot, mountpoint); - else - (void) snprintf(buf, sizeof (buf), "%s", - mountpoint); - } - - if ((dirp = opendir(buf)) == NULL && errno != ENOENT) { - (void) fprintf(stderr, gettext("mountpoint '%s' : " - "%s\n"), buf, strerror(errno)); - (void) fprintf(stderr, gettext("use '-m' " - "option to provide a different default\n")); - goto errout; - } else if (dirp) { - int count = 0; - - while (count < 3 && readdir(dirp) != NULL) - count++; - (void) closedir(dirp); - - if (count > 2) { - (void) fprintf(stderr, gettext("mountpoint " - "'%s' exists and is not empty\n"), buf); - (void) fprintf(stderr, gettext("use '-m' " - "option to provide a " - "different default\n")); - goto errout; - } - } - } - - /* - * Now that the mountpoint's validity has been checked, ensure that - * the property is set appropriately prior to creating the pool. - */ - if (mountpoint != NULL) { - ret = add_prop_list(zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), - mountpoint, &fsprops, B_FALSE); - if (ret != 0) - goto errout; - } - - ret = 1; - if (dryrun) { - /* - * For a dry run invocation, print out a basic message and run - * through all the vdevs in the list and print out in an - * appropriate hierarchy. - */ - (void) printf(gettext("would create '%s' with the " - "following layout:\n\n"), poolname); - - print_vdev_tree(NULL, poolname, nvroot, 0, "", 0); - print_vdev_tree(NULL, "dedup", nvroot, 0, - VDEV_ALLOC_BIAS_DEDUP, 0); - print_vdev_tree(NULL, "special", nvroot, 0, - VDEV_ALLOC_BIAS_SPECIAL, 0); - print_vdev_tree(NULL, "logs", nvroot, 0, - VDEV_ALLOC_BIAS_LOG, 0); - - ret = 0; - } else { - /* - * Hand off to libzfs. - */ - if (enable_all_pool_feat) { - spa_feature_t i; - for (i = 0; i < SPA_FEATURES; i++) { - char propname[MAXPATHLEN]; - zfeature_info_t *feat = &spa_feature_table[i]; - - (void) snprintf(propname, sizeof (propname), - "feature@%s", feat->fi_uname); - - /* - * Skip feature if user specified it manually - * on the command line. - */ - if (nvlist_exists(props, propname)) - continue; - - ret = add_prop_list(propname, - ZFS_FEATURE_ENABLED, &props, B_TRUE); - if (ret != 0) - goto errout; - } - } - - ret = 1; - if (zpool_create(g_zfs, poolname, - nvroot, props, fsprops) == 0) { - zfs_handle_t *pool = zfs_open(g_zfs, - tname ? tname : poolname, ZFS_TYPE_FILESYSTEM); - if (pool != NULL) { - if (zfs_mount(pool, NULL, 0) == 0) - ret = zfs_shareall(pool); - zfs_close(pool); - } - } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { - (void) fprintf(stderr, gettext("pool name may have " - "been omitted\n")); - } - } - -errout: - nvlist_free(nvroot); - nvlist_free(fsprops); - nvlist_free(props); - return (ret); -badusage: - nvlist_free(fsprops); - nvlist_free(props); - usage(B_FALSE); - return (2); -} - -/* - * zpool destroy <pool> - * - * -f Forcefully unmount any datasets - * - * Destroy the given pool. Automatically unmounts any datasets in the pool. - */ -int -zpool_do_destroy(int argc, char **argv) -{ - boolean_t force = B_FALSE; - int c; - char *pool; - zpool_handle_t *zhp; - int ret; - - /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { - switch (c) { - case 'f': - force = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - pool = argv[0]; - - if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { - /* - * As a special case, check for use of '/' in the name, and - * direct the user to use 'zfs destroy' instead. - */ - if (strchr(pool, '/') != NULL) - (void) fprintf(stderr, gettext("use 'zfs destroy' to " - "destroy a dataset\n")); - return (1); - } - - if (zpool_disable_datasets(zhp, force) != 0) { - (void) fprintf(stderr, gettext("could not destroy '%s': " - "could not unmount datasets\n"), zpool_get_name(zhp)); - return (1); - } - - /* The history must be logged as part of the export */ - log_history = B_FALSE; - - ret = (zpool_destroy(zhp, history_str) != 0); - - zpool_close(zhp); - - return (ret); -} - -/* - * zpool export [-f] <pool> ... - * - * -f Forcefully unmount datasets - * - * Export the given pools. By default, the command will attempt to cleanly - * unmount any active datasets within the pool. If the '-f' flag is specified, - * then the datasets will be forcefully unmounted. - */ -int -zpool_do_export(int argc, char **argv) -{ - boolean_t force = B_FALSE; - boolean_t hardforce = B_FALSE; - int c; - zpool_handle_t *zhp; - int ret; - int i; - - /* check options */ - while ((c = getopt(argc, argv, "fF")) != -1) { - switch (c) { - case 'f': - force = B_TRUE; - break; - case 'F': - hardforce = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* check arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool argument\n")); - usage(B_FALSE); - } - - ret = 0; - for (i = 0; i < argc; i++) { - if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) { - ret = 1; - continue; - } - - if (zpool_disable_datasets(zhp, force) != 0) { - ret = 1; - zpool_close(zhp); - continue; - } - - /* The history must be logged as part of the export */ - log_history = B_FALSE; - - if (hardforce) { - if (zpool_export_force(zhp, history_str) != 0) - ret = 1; - } else if (zpool_export(zhp, force, history_str) != 0) { - ret = 1; - } - - zpool_close(zhp); - } - - return (ret); -} - -/* - * Given a vdev configuration, determine the maximum width needed for the device - * name column. - */ -static int -max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max, - int name_flags) -{ - char *name; - nvlist_t **child; - uint_t c, children; - int ret; - - name = zpool_vdev_name(g_zfs, zhp, nv, name_flags | VDEV_NAME_TYPE_ID); - if (strlen(name) + depth > max) - max = strlen(name) + depth; - - free(name); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if ((ret = max_width(zhp, child[c], depth + 2, - max, name_flags)) > max) - max = ret; - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if ((ret = max_width(zhp, child[c], depth + 2, - max, name_flags)) > max) - max = ret; - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if ((ret = max_width(zhp, child[c], depth + 2, - max, name_flags)) > max) - max = ret; - } - - return (max); -} - -typedef struct spare_cbdata { - uint64_t cb_guid; - zpool_handle_t *cb_zhp; -} spare_cbdata_t; - -static boolean_t -find_vdev(nvlist_t *nv, uint64_t search) -{ - uint64_t guid; - nvlist_t **child; - uint_t c, children; - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && - search == guid) - return (B_TRUE); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev(child[c], search)) - return (B_TRUE); - } - - return (B_FALSE); -} - -static int -find_spare(zpool_handle_t *zhp, void *data) -{ - spare_cbdata_t *cbp = data; - nvlist_t *config, *nvroot; - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - if (find_vdev(nvroot, cbp->cb_guid)) { - cbp->cb_zhp = zhp; - return (1); - } - - zpool_close(zhp); - return (0); -} - -typedef struct status_cbdata { - int cb_count; - int cb_name_flags; - int cb_namewidth; - boolean_t cb_allpools; - boolean_t cb_verbose; - boolean_t cb_explain; - boolean_t cb_first; - boolean_t cb_dedup_stats; - boolean_t cb_print_status; -} status_cbdata_t; - -/* - * Print out configuration state as requested by status_callback. - */ -static void -print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, - nvlist_t *nv, int depth, boolean_t isspare) -{ - nvlist_t **child; - uint_t c, vsc, children; - pool_scan_stat_t *ps = NULL; - vdev_stat_t *vs; - char rbuf[6], wbuf[6], cbuf[6]; - char *vname; - uint64_t notpresent; - uint64_t ashift; - spare_cbdata_t spare_cb; - const char *state; - char *type; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - children = 0; - - verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0); - - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); - - if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) - return; - - state = zpool_state_to_name(vs->vs_state, vs->vs_aux); - if (isspare) { - /* - * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for - * online drives. - */ - if (vs->vs_aux == VDEV_AUX_SPARED) - state = "INUSE"; - else if (vs->vs_state == VDEV_STATE_HEALTHY) - state = "AVAIL"; - } - - (void) printf("\t%*s%-*s %-8s", depth, "", cb->cb_namewidth - depth, - name, state); - - if (!isspare) { - zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); - zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); - zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); - (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); - } - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, - ¬present) == 0 || - vs->vs_state <= VDEV_STATE_CANT_OPEN) { - char *path; - if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) - (void) printf(" was %s", path); - } else if (vs->vs_aux != 0) { - (void) printf(" "); - - switch (vs->vs_aux) { - case VDEV_AUX_OPEN_FAILED: - (void) printf(gettext("cannot open")); - break; - - case VDEV_AUX_BAD_GUID_SUM: - (void) printf(gettext("missing device")); - break; - - case VDEV_AUX_NO_REPLICAS: - (void) printf(gettext("insufficient replicas")); - break; - - case VDEV_AUX_VERSION_NEWER: - (void) printf(gettext("newer version")); - break; - - case VDEV_AUX_UNSUP_FEAT: - (void) printf(gettext("unsupported feature(s)")); - break; - - case VDEV_AUX_ASHIFT_TOO_BIG: - (void) printf(gettext("unsupported minimum blocksize")); - break; - - case VDEV_AUX_SPARED: - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, - &spare_cb.cb_guid) == 0); - if (zpool_iter(g_zfs, find_spare, &spare_cb) == 1) { - if (strcmp(zpool_get_name(spare_cb.cb_zhp), - zpool_get_name(zhp)) == 0) - (void) printf(gettext("currently in " - "use")); - else - (void) printf(gettext("in use by " - "pool '%s'"), - zpool_get_name(spare_cb.cb_zhp)); - zpool_close(spare_cb.cb_zhp); - } else { - (void) printf(gettext("currently in use")); - } - break; - - case VDEV_AUX_ERR_EXCEEDED: - (void) printf(gettext("too many errors")); - break; - - case VDEV_AUX_IO_FAILURE: - (void) printf(gettext("experienced I/O failures")); - break; - - case VDEV_AUX_BAD_LOG: - (void) printf(gettext("bad intent log")); - break; - - case VDEV_AUX_EXTERNAL: - (void) printf(gettext("external device fault")); - break; - - case VDEV_AUX_SPLIT_POOL: - (void) printf(gettext("split into new pool")); - break; - - case VDEV_AUX_ACTIVE: - (void) printf(gettext("currently in use")); - break; - - case VDEV_AUX_CHILDREN_OFFLINE: - (void) printf(gettext("all children offline")); - break; - - default: - (void) printf(gettext("corrupted data")); - break; - } - } else if (children == 0 && !isspare && - VDEV_STAT_VALID(vs_physical_ashift, vsc) && - vs->vs_configured_ashift < vs->vs_physical_ashift) { - (void) printf( - gettext(" block size: %dB configured, %dB native"), - 1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift); - } - - (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS, - (uint64_t **)&ps, &c); - - if (ps != NULL && ps->pss_state == DSS_SCANNING && - vs->vs_scan_processed != 0 && children == 0) { - (void) printf(gettext(" (%s)"), - (ps->pss_func == POOL_SCAN_RESILVER) ? - "resilvering" : "repairing"); - } - - if ((vs->vs_initialize_state == VDEV_INITIALIZE_ACTIVE || - vs->vs_initialize_state == VDEV_INITIALIZE_SUSPENDED || - vs->vs_initialize_state == VDEV_INITIALIZE_COMPLETE) && - !vs->vs_scan_removing) { - char zbuf[1024]; - char tbuf[256]; - struct tm zaction_ts; - - time_t t = vs->vs_initialize_action_time; - int initialize_pct = 100; - if (vs->vs_initialize_state != VDEV_INITIALIZE_COMPLETE) { - initialize_pct = (vs->vs_initialize_bytes_done * 100 / - (vs->vs_initialize_bytes_est + 1)); - } - - (void) localtime_r(&t, &zaction_ts); - (void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts); - - switch (vs->vs_initialize_state) { - case VDEV_INITIALIZE_SUSPENDED: - (void) snprintf(zbuf, sizeof (zbuf), - ", suspended, started at %s", tbuf); - break; - case VDEV_INITIALIZE_ACTIVE: - (void) snprintf(zbuf, sizeof (zbuf), - ", started at %s", tbuf); - break; - case VDEV_INITIALIZE_COMPLETE: - (void) snprintf(zbuf, sizeof (zbuf), - ", completed at %s", tbuf); - break; - } - - (void) printf(gettext(" (%d%% initialized%s)"), - initialize_pct, zbuf); - } - - (void) printf("\n"); - - for (c = 0; c < children; c++) { - uint64_t islog = B_FALSE, ishole = B_FALSE; - - /* Don't print logs or holes here */ - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &islog); - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, - &ishole); - if (islog || ishole) - continue; - /* Only print normal classes here */ - if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) - continue; - - vname = zpool_vdev_name(g_zfs, zhp, child[c], - cb->cb_name_flags | VDEV_NAME_TYPE_ID); - print_status_config(zhp, cb, vname, child[c], depth + 2, - isspare); - free(vname); - } -} - -/* - * Print the configuration of an exported pool. Iterate over all vdevs in the - * pool, printing out the name and status for each one. - */ -static void -print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv, - int depth) -{ - nvlist_t **child; - uint_t c, children; - vdev_stat_t *vs; - char *type, *vname; - - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_MISSING) == 0 || - strcmp(type, VDEV_TYPE_HOLE) == 0) - return; - - verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) == 0); - - (void) printf("\t%*s%-*s", depth, "", cb->cb_namewidth - depth, name); - (void) printf(" %s", zpool_state_to_name(vs->vs_state, vs->vs_aux)); - - if (vs->vs_aux != 0) { - (void) printf(" "); - - switch (vs->vs_aux) { - case VDEV_AUX_OPEN_FAILED: - (void) printf(gettext("cannot open")); - break; - - case VDEV_AUX_BAD_GUID_SUM: - (void) printf(gettext("missing device")); - break; - - case VDEV_AUX_NO_REPLICAS: - (void) printf(gettext("insufficient replicas")); - break; - - case VDEV_AUX_VERSION_NEWER: - (void) printf(gettext("newer version")); - break; - - case VDEV_AUX_UNSUP_FEAT: - (void) printf(gettext("unsupported feature(s)")); - break; - - case VDEV_AUX_ERR_EXCEEDED: - (void) printf(gettext("too many errors")); - break; - - case VDEV_AUX_ACTIVE: - (void) printf(gettext("currently in use")); - break; - - case VDEV_AUX_CHILDREN_OFFLINE: - (void) printf(gettext("all children offline")); - break; - - default: - (void) printf(gettext("corrupted data")); - break; - } - } - (void) printf("\n"); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - return; - - for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE; - - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - if (is_log) - continue; - if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) - continue; - - vname = zpool_vdev_name(g_zfs, NULL, child[c], - cb->cb_name_flags | VDEV_NAME_TYPE_ID); - print_import_config(cb, vname, child[c], depth + 2); - free(vname); - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, - &child, &children) == 0) { - (void) printf(gettext("\tcache\n")); - for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, NULL, child[c], - cb->cb_name_flags); - (void) printf("\t %s\n", vname); - free(vname); - } - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, - &child, &children) == 0) { - (void) printf(gettext("\tspares\n")); - for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, NULL, child[c], - cb->cb_name_flags); - (void) printf("\t %s\n", vname); - free(vname); - } - } -} - -/* - * Print specialized class vdevs. - * - * These are recorded as top level vdevs in the main pool child array - * but with "is_log" set to 1 or an "alloc_bias" string. We use either - * print_status_config() or print_import_config() to print the top level - * class vdevs then any of their children (eg mirrored slogs) are printed - * recursively - which works because only the top level vdev is marked. - */ -static void -print_class_vdevs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, - const char *class) -{ - uint_t c, children; - nvlist_t **child; - boolean_t printed = B_FALSE; - - assert(zhp != NULL || !cb->cb_verbose); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, - &children) != 0) - return; - - for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE; - char *bias = NULL; - char *type = NULL; - - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - - if (is_log) { - bias = VDEV_ALLOC_CLASS_LOGS; - } else { - (void) nvlist_lookup_string(child[c], - ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); - (void) nvlist_lookup_string(child[c], - ZPOOL_CONFIG_TYPE, &type); - } - - if (bias == NULL || strcmp(bias, class) != 0) - continue; - if (!is_log && strcmp(type, VDEV_TYPE_INDIRECT) == 0) - continue; - - if (!printed) { - (void) printf("\t%s\t\n", gettext(class)); - printed = B_TRUE; - } - - char *name = zpool_vdev_name(g_zfs, zhp, child[c], - cb->cb_name_flags | VDEV_NAME_TYPE_ID); - if (cb->cb_print_status) - print_status_config(zhp, cb, name, child[c], 2, - B_FALSE); - else - print_import_config(cb, name, child[c], 2); - free(name); - } -} - -/* - * Display the status for the given pool. - */ -static void -show_import(nvlist_t *config) -{ - uint64_t pool_state; - vdev_stat_t *vs; - char *name; - uint64_t guid; - uint64_t hostid = 0; - char *msgid; - char *hostname = "unknown"; - nvlist_t *nvroot, *nvinfo; - int reason; - const char *health; - uint_t vsc; - char *comment; - status_cbdata_t cb = { 0 }; - - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &guid) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, - &pool_state) == 0); - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0); - health = zpool_state_to_name(vs->vs_state, vs->vs_aux); - - reason = zpool_import_status(config, &msgid); - - (void) printf(gettext(" pool: %s\n"), name); - (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); - (void) printf(gettext(" state: %s"), health); - if (pool_state == POOL_STATE_DESTROYED) - (void) printf(gettext(" (DESTROYED)")); - (void) printf("\n"); - - switch (reason) { - case ZPOOL_STATUS_MISSING_DEV_R: - case ZPOOL_STATUS_MISSING_DEV_NR: - case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext(" status: One or more devices are " - "missing from the system.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_LABEL_R: - case ZPOOL_STATUS_CORRUPT_LABEL_NR: - (void) printf(gettext(" status: One or more devices contains " - "corrupted data.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_DATA: - (void) printf( - gettext(" status: The pool data is corrupted.\n")); - break; - - case ZPOOL_STATUS_OFFLINE_DEV: - (void) printf(gettext(" status: One or more devices " - "are offlined.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_POOL: - (void) printf(gettext(" status: The pool metadata is " - "corrupted.\n")); - break; - - case ZPOOL_STATUS_VERSION_OLDER: - (void) printf(gettext(" status: The pool is formatted using a " - "legacy on-disk version.\n")); - break; - - case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext(" status: The pool is formatted using an " - "incompatible version.\n")); - break; - - case ZPOOL_STATUS_FEAT_DISABLED: - (void) printf(gettext(" status: Some supported features are " - "not enabled on the pool.\n")); - break; - - case ZPOOL_STATUS_UNSUP_FEAT_READ: - (void) printf(gettext("status: The pool uses the following " - "feature(s) not supported on this system:\n")); - zpool_print_unsup_feat(config); - break; - - case ZPOOL_STATUS_UNSUP_FEAT_WRITE: - (void) printf(gettext("status: The pool can only be accessed " - "in read-only mode on this system. It\n\tcannot be " - "accessed in read-write mode because it uses the " - "following\n\tfeature(s) not supported on this system:\n")); - zpool_print_unsup_feat(config); - break; - - case ZPOOL_STATUS_HOSTID_ACTIVE: - (void) printf(gettext(" status: The pool is currently " - "imported by another system.\n")); - break; - - case ZPOOL_STATUS_HOSTID_REQUIRED: - (void) printf(gettext(" status: The pool has the " - "multihost property on. It cannot\n\tbe safely imported " - "when the system hostid is not set.\n")); - break; - - case ZPOOL_STATUS_HOSTID_MISMATCH: - (void) printf(gettext(" status: The pool was last accessed by " - "another system.\n")); - break; - - case ZPOOL_STATUS_FAULTED_DEV_R: - case ZPOOL_STATUS_FAULTED_DEV_NR: - (void) printf(gettext(" status: One or more devices are " - "faulted.\n")); - break; - - case ZPOOL_STATUS_BAD_LOG: - (void) printf(gettext(" status: An intent log record cannot be " - "read.\n")); - break; - - case ZPOOL_STATUS_RESILVERING: - (void) printf(gettext(" status: One or more devices were being " - "resilvered.\n")); - break; - - case ZPOOL_STATUS_NON_NATIVE_ASHIFT: - (void) printf(gettext("status: One or more devices were " - "configured to use a non-native block size.\n" - "\tExpect reduced performance.\n")); - break; - - default: - /* - * No other status can be seen when importing pools. - */ - assert(reason == ZPOOL_STATUS_OK); - } - - /* - * Print out an action according to the overall state of the pool. - */ - if (vs->vs_state == VDEV_STATE_HEALTHY) { - if (reason == ZPOOL_STATUS_VERSION_OLDER || - reason == ZPOOL_STATUS_FEAT_DISABLED) { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric identifier, " - "though\n\tsome features will not be available " - "without an explicit 'zpool upgrade'.\n")); - } else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric " - "identifier and\n\tthe '-f' flag.\n")); - } else { - (void) printf(gettext(" action: The pool can be " - "imported using its name or numeric " - "identifier.\n")); - } - } else if (vs->vs_state == VDEV_STATE_DEGRADED) { - (void) printf(gettext(" action: The pool can be imported " - "despite missing or damaged devices. The\n\tfault " - "tolerance of the pool may be compromised if imported.\n")); - } else { - switch (reason) { - case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext(" action: The pool cannot be " - "imported. Access the pool on a system running " - "newer\n\tsoftware, or recreate the pool from " - "backup.\n")); - break; - case ZPOOL_STATUS_UNSUP_FEAT_READ: - (void) printf(gettext("action: The pool cannot be " - "imported. Access the pool on a system that " - "supports\n\tthe required feature(s), or recreate " - "the pool from backup.\n")); - break; - case ZPOOL_STATUS_UNSUP_FEAT_WRITE: - (void) printf(gettext("action: The pool cannot be " - "imported in read-write mode. Import the pool " - "with\n" - "\t\"-o readonly=on\", access the pool on a system " - "that supports the\n\trequired feature(s), or " - "recreate the pool from backup.\n")); - break; - case ZPOOL_STATUS_MISSING_DEV_R: - case ZPOOL_STATUS_MISSING_DEV_NR: - case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext(" action: The pool cannot be " - "imported. Attach the missing\n\tdevices and try " - "again.\n")); - break; - case ZPOOL_STATUS_HOSTID_ACTIVE: - VERIFY0(nvlist_lookup_nvlist(config, - ZPOOL_CONFIG_LOAD_INFO, &nvinfo)); - - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) - hostname = fnvlist_lookup_string(nvinfo, - ZPOOL_CONFIG_MMP_HOSTNAME); - - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) - hostid = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_HOSTID); - - (void) printf(gettext(" action: The pool must be " - "exported from %s (hostid=%lx)\n\tbefore it " - "can be safely imported.\n"), hostname, - (unsigned long) hostid); - break; - case ZPOOL_STATUS_HOSTID_REQUIRED: - (void) printf(gettext(" action: Check the SMF " - "svc:/system/hostid service.\n")); - break; - default: - (void) printf(gettext(" action: The pool cannot be " - "imported due to damaged devices or data.\n")); - } - } - - /* Print the comment attached to the pool. */ - if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) - (void) printf(gettext("comment: %s\n"), comment); - - /* - * If the state is "closed" or "can't open", and the aux state - * is "corrupt data": - */ - if (((vs->vs_state == VDEV_STATE_CLOSED) || - (vs->vs_state == VDEV_STATE_CANT_OPEN)) && - (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) { - if (pool_state == POOL_STATE_DESTROYED) - (void) printf(gettext("\tThe pool was destroyed, " - "but can be imported using the '-Df' flags.\n")); - else if (pool_state != POOL_STATE_EXPORTED) - (void) printf(gettext("\tThe pool may be active on " - "another system, but can be imported using\n\t" - "the '-f' flag.\n")); - } - - if (msgid != NULL) - (void) printf(gettext(" see: http://illumos.org/msg/%s\n"), - msgid); - - (void) printf(gettext(" config:\n\n")); - - cb.cb_namewidth = max_width(NULL, nvroot, 0, 0, 0); - if (cb.cb_namewidth < 10) - cb.cb_namewidth = 10; - - print_import_config(&cb, name, nvroot, 0); - - print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_DEDUP); - print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_SPECIAL); - print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_CLASS_LOGS); - - if (reason == ZPOOL_STATUS_BAD_GUID_SUM) { - (void) printf(gettext("\n\tAdditional devices are known to " - "be part of this pool, though their\n\texact " - "configuration cannot be determined.\n")); - } -} - -static boolean_t -zfs_force_import_required(nvlist_t *config) -{ - uint64_t state; - uint64_t hostid = 0; - nvlist_t *nvinfo; - - state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); - - if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) - return (B_TRUE); - - nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) { - mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_STATE); - - if (mmp_state != MMP_STATE_INACTIVE) - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * Perform the import for the given configuration. This passes the heavy - * lifting off to zpool_import_props(), and then mounts the datasets contained - * within the pool. - */ -static int -do_import(nvlist_t *config, const char *newname, const char *mntopts, - nvlist_t *props, int flags) -{ - zpool_handle_t *zhp; - char *name; - uint64_t version; - - name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); - version = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION); - - if (!SPA_VERSION_IS_SUPPORTED(version)) { - (void) fprintf(stderr, gettext("cannot import '%s': pool " - "is formatted using an unsupported ZFS version\n"), name); - return (1); - } else if (zfs_force_import_required(config) && - !(flags & ZFS_IMPORT_ANY_HOST)) { - mmp_state_t mmp_state = MMP_STATE_INACTIVE; - nvlist_t *nvinfo; - - nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) - mmp_state = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_STATE); - - if (mmp_state == MMP_STATE_ACTIVE) { - char *hostname = "<unknown>"; - uint64_t hostid = 0; - - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) - hostname = fnvlist_lookup_string(nvinfo, - ZPOOL_CONFIG_MMP_HOSTNAME); - - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) - hostid = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_HOSTID); - - (void) fprintf(stderr, gettext("cannot import '%s': " - "pool is imported on %s (hostid: " - "0x%lx)\nExport the pool on the other system, " - "then run 'zpool import'.\n"), - name, hostname, (unsigned long) hostid); - } else if (mmp_state == MMP_STATE_NO_HOSTID) { - (void) fprintf(stderr, gettext("Cannot import '%s': " - "pool has the multihost property on and the\n" - "system's hostid is not set.\n"), name); - } else { - char *hostname = "<unknown>"; - uint64_t timestamp = 0; - uint64_t hostid = 0; - - if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) - hostname = fnvlist_lookup_string(config, - ZPOOL_CONFIG_HOSTNAME); - - if (nvlist_exists(config, ZPOOL_CONFIG_TIMESTAMP)) - timestamp = fnvlist_lookup_uint64(config, - ZPOOL_CONFIG_TIMESTAMP); - - if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) - hostid = fnvlist_lookup_uint64(config, - ZPOOL_CONFIG_HOSTID); - - (void) fprintf(stderr, gettext("cannot import '%s': " - "pool was previously in use from another system.\n" - "Last accessed by %s (hostid=%lx) at %s" - "The pool can be imported, use 'zpool import -f' " - "to import the pool.\n"), name, hostname, - (unsigned long)hostid, ctime((time_t *)×tamp)); - - } - - return (1); - } - - if (zpool_import_props(g_zfs, config, newname, props, flags) != 0) - return (1); - - if (newname != NULL) - name = (char *)newname; - - if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) - return (1); - - if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && - !(flags & ZFS_IMPORT_ONLY) && - zpool_enable_datasets(zhp, mntopts, 0) != 0) { - zpool_close(zhp); - return (1); - } - - zpool_close(zhp); - return (0); -} - -/* - * zpool checkpoint <pool> - * checkpoint --discard <pool> - * - * -d Discard the checkpoint from a checkpointed - * --discard pool. - * - * Checkpoints the specified pool, by taking a "snapshot" of its - * current state. A pool can only have one checkpoint at a time. - */ -int -zpool_do_checkpoint(int argc, char **argv) -{ - boolean_t discard; - char *pool; - zpool_handle_t *zhp; - int c, err; - - struct option long_options[] = { - {"discard", no_argument, NULL, 'd'}, - {0, 0, 0, 0} - }; - - discard = B_FALSE; - while ((c = getopt_long(argc, argv, ":d", long_options, NULL)) != -1) { - switch (c) { - case 'd': - discard = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool argument\n")); - usage(B_FALSE); - } - - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - pool = argv[0]; - - if ((zhp = zpool_open(g_zfs, pool)) == NULL) { - /* As a special case, check for use of '/' in the name */ - if (strchr(pool, '/') != NULL) - (void) fprintf(stderr, gettext("'zpool checkpoint' " - "doesn't work on datasets. To save the state " - "of a dataset from a specific point in time " - "please use 'zfs snapshot'\n")); - return (1); - } - - if (discard) - err = (zpool_discard_checkpoint(zhp) != 0); - else - err = (zpool_checkpoint(zhp) != 0); - - zpool_close(zhp); - - return (err); -} - -#define CHECKPOINT_OPT 1024 - -/* - * zpool import [-d dir] [-D] - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] - * [-d dir | -c cachefile] [-f] -a - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] - * [-d dir | -c cachefile] [-f] [-n] [-F] [-t] - * <pool | id> [newpool] - * - * -c Read pool information from a cachefile instead of searching - * devices. - * - * -d Scan in a specific directory, other than /dev/dsk. More than - * one directory can be specified using multiple '-d' options. - * - * -D Scan for previously destroyed pools or import all or only - * specified destroyed pools. - * - * -R Temporarily import the pool, with all mountpoints relative to - * the given root. The pool will remain exported when the machine - * is rebooted. - * - * -V Import even in the presence of faulted vdevs. This is an - * intentionally undocumented option for testing purposes, and - * treats the pool configuration as complete, leaving any bad - * vdevs in the FAULTED state. In other words, it does verbatim - * import. - * - * -f Force import, even if it appears that the pool is active. - * - * -F Attempt rewind if necessary. - * - * -n See if rewind would work, but don't actually rewind. - * - * -N Import the pool but don't mount datasets. - * - * -t Use newpool as a temporary pool name instead of renaming - * the pool. - * - * -T Specify a starting txg to use for import. This option is - * intentionally undocumented option for testing purposes. - * - * -a Import all pools found. - * - * -o Set property=value and/or temporary mount options (without '='). - * - * --rewind-to-checkpoint - * Import the pool and revert back to the checkpoint. - * - * The import command scans for pools to import, and import pools based on pool - * name and GUID. The pool can also be renamed as part of the import process. - */ -int -zpool_do_import(int argc, char **argv) -{ - char **searchdirs = NULL; - int nsearch = 0; - int c; - int err = 0; - nvlist_t *pools = NULL; - boolean_t do_all = B_FALSE; - boolean_t do_destroyed = B_FALSE; - char *mntopts = NULL; - nvpair_t *elem; - nvlist_t *config; - uint64_t searchguid = 0; - char *searchname = NULL; - char *propval; - nvlist_t *found_config; - nvlist_t *policy = NULL; - nvlist_t *props = NULL; - boolean_t first; - int flags = ZFS_IMPORT_NORMAL; - uint32_t rewind_policy = ZPOOL_NO_REWIND; - boolean_t dryrun = B_FALSE; - boolean_t do_rewind = B_FALSE; - boolean_t xtreme_rewind = B_FALSE; - uint64_t pool_state, txg = -1ULL; - char *cachefile = NULL; - importargs_t idata = { 0 }; - char *endptr; - - - struct option long_options[] = { - {"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT}, - {0, 0, 0, 0} - }; - - /* check options */ - while ((c = getopt_long(argc, argv, ":aCc:d:DEfFmnNo:rR:tT:VX", - long_options, NULL)) != -1) { - switch (c) { - case 'a': - do_all = B_TRUE; - break; - case 'c': - cachefile = optarg; - break; - case 'd': - if (searchdirs == NULL) { - searchdirs = safe_malloc(sizeof (char *)); - } else { - char **tmp = safe_malloc((nsearch + 1) * - sizeof (char *)); - bcopy(searchdirs, tmp, nsearch * - sizeof (char *)); - free(searchdirs); - searchdirs = tmp; - } - searchdirs[nsearch++] = optarg; - break; - case 'D': - do_destroyed = B_TRUE; - break; - case 'f': - flags |= ZFS_IMPORT_ANY_HOST; - break; - case 'F': - do_rewind = B_TRUE; - break; - case 'm': - flags |= ZFS_IMPORT_MISSING_LOG; - break; - case 'n': - dryrun = B_TRUE; - break; - case 'N': - flags |= ZFS_IMPORT_ONLY; - break; - case 'o': - if ((propval = strchr(optarg, '=')) != NULL) { - *propval = '\0'; - propval++; - if (add_prop_list(optarg, propval, - &props, B_TRUE)) - goto error; - } else { - mntopts = optarg; - } - break; - case 'R': - if (add_prop_list(zpool_prop_to_name( - ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE)) - goto error; - if (add_prop_list_default(zpool_prop_to_name( - ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) - goto error; - break; - case 't': - flags |= ZFS_IMPORT_TEMP_NAME; - if (add_prop_list_default(zpool_prop_to_name( - ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) - goto error; - break; - case 'T': - errno = 0; - txg = strtoull(optarg, &endptr, 0); - if (errno != 0 || *endptr != '\0') { - (void) fprintf(stderr, - gettext("invalid txg value\n")); - usage(B_FALSE); - } - rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND; - break; - case 'V': - flags |= ZFS_IMPORT_VERBATIM; - break; - case 'X': - xtreme_rewind = B_TRUE; - break; - case CHECKPOINT_OPT: - flags |= ZFS_IMPORT_CHECKPOINT; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (cachefile && nsearch != 0) { - (void) fprintf(stderr, gettext("-c is incompatible with -d\n")); - usage(B_FALSE); - } - - if ((dryrun || xtreme_rewind) && !do_rewind) { - (void) fprintf(stderr, - gettext("-n or -X only meaningful with -F\n")); - usage(B_FALSE); - } - if (dryrun) - rewind_policy = ZPOOL_TRY_REWIND; - else if (do_rewind) - rewind_policy = ZPOOL_DO_REWIND; - if (xtreme_rewind) - rewind_policy |= ZPOOL_EXTREME_REWIND; - - /* In the future, we can capture further policy and include it here */ - if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || - nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, txg) != 0 || - nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, - rewind_policy) != 0) - goto error; - - if (searchdirs == NULL) { - searchdirs = safe_malloc(sizeof (char *)); - searchdirs[0] = "/dev"; - nsearch = 1; - } - - /* check argument count */ - if (do_all) { - if (argc != 0) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - } else { - if (argc > 2) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - /* - * Check for the SYS_CONFIG privilege. We do this explicitly - * here because otherwise any attempt to discover pools will - * silently fail. - */ - if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) { - (void) fprintf(stderr, gettext("cannot " - "discover pools: permission denied\n")); - free(searchdirs); - nvlist_free(policy); - return (1); - } - } - - /* - * Depending on the arguments given, we do one of the following: - * - * <none> Iterate through all pools and display information about - * each one. - * - * -a Iterate through all pools and try to import each one. - * - * <id> Find the pool that corresponds to the given GUID/pool - * name and import that one. - * - * -D Above options applies only to destroyed pools. - */ - if (argc != 0) { - char *endptr; - - errno = 0; - searchguid = strtoull(argv[0], &endptr, 10); - if (errno != 0 || *endptr != '\0') { - searchname = argv[0]; - searchguid = 0; - } - found_config = NULL; - - /* - * User specified a name or guid. Ensure it's unique. - */ - idata.unique = B_TRUE; - } - - - idata.path = searchdirs; - idata.paths = nsearch; - idata.poolname = searchname; - idata.guid = searchguid; - idata.cachefile = cachefile; - idata.policy = policy; - - pools = zpool_search_import(g_zfs, &idata); - - if (pools != NULL && idata.exists && - (argc == 1 || strcmp(argv[0], argv[1]) == 0)) { - (void) fprintf(stderr, gettext("cannot import '%s': " - "a pool with that name already exists\n"), - argv[0]); - (void) fprintf(stderr, gettext("use the form 'zpool import " - "[-t] <pool | id> <newpool>' to give it a new temporary " - "or permanent name\n")); - err = 1; - } else if (pools == NULL && idata.exists) { - (void) fprintf(stderr, gettext("cannot import '%s': " - "a pool with that name is already created/imported,\n"), - argv[0]); - (void) fprintf(stderr, gettext("and no additional pools " - "with that name were found\n")); - err = 1; - } else if (pools == NULL) { - if (argc != 0) { - (void) fprintf(stderr, gettext("cannot import '%s': " - "no such pool available\n"), argv[0]); - } - err = 1; - } - - if (err == 1) { - free(searchdirs); - nvlist_free(policy); - return (1); - } - - /* - * At this point we have a list of import candidate configs. Even if - * we were searching by pool name or guid, we still need to - * post-process the list to deal with pool state and possible - * duplicate names. - */ - err = 0; - elem = NULL; - first = B_TRUE; - while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { - - verify(nvpair_value_nvlist(elem, &config) == 0); - - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, - &pool_state) == 0); - if (!do_destroyed && pool_state == POOL_STATE_DESTROYED) - continue; - if (do_destroyed && pool_state != POOL_STATE_DESTROYED) - continue; - - verify(nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, - policy) == 0); - - if (argc == 0) { - if (first) - first = B_FALSE; - else if (!do_all) - (void) printf("\n"); - - if (do_all) { - err |= do_import(config, NULL, mntopts, - props, flags); - } else { - show_import(config); - } - } else if (searchname != NULL) { - char *name; - - /* - * We are searching for a pool based on name. - */ - verify(nvlist_lookup_string(config, - ZPOOL_CONFIG_POOL_NAME, &name) == 0); - - if (strcmp(name, searchname) == 0) { - if (found_config != NULL) { - (void) fprintf(stderr, gettext( - "cannot import '%s': more than " - "one matching pool\n"), searchname); - (void) fprintf(stderr, gettext( - "import by numeric ID instead\n")); - err = B_TRUE; - } - found_config = config; - } - } else { - uint64_t guid; - - /* - * Search for a pool by guid. - */ - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_POOL_GUID, &guid) == 0); - - if (guid == searchguid) - found_config = config; - } - } - - /* - * If we were searching for a specific pool, verify that we found a - * pool, and then do the import. - */ - if (argc != 0 && err == 0) { - if (found_config == NULL) { - (void) fprintf(stderr, gettext("cannot import '%s': " - "no such pool available\n"), argv[0]); - err = B_TRUE; - } else { - err |= do_import(found_config, argc == 1 ? NULL : - argv[1], mntopts, props, flags); - } - } - - /* - * If we were just looking for pools, report an error if none were - * found. - */ - if (argc == 0 && first) - (void) fprintf(stderr, - gettext("no pools available to import\n")); - -error: - nvlist_free(props); - nvlist_free(pools); - nvlist_free(policy); - free(searchdirs); - - return (err ? 1 : 0); -} - -/* - * zpool sync [-f] [pool] ... - * - * -f (undocumented) force uberblock (and config including zpool cache file) - * update. - * - * Sync the specified pool(s). - * Without arguments "zpool sync" will sync all pools. - * This command initiates TXG sync(s) and will return after the TXG(s) commit. - * - */ -static int -zpool_do_sync(int argc, char **argv) -{ - int ret; - boolean_t force = B_FALSE; - - /* check options */ - while ((ret = getopt(argc, argv, "f")) != -1) { - switch (ret) { - case 'f': - force = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* if argc == 0 we will execute zpool_sync_one on all pools */ - ret = for_each_pool(argc, argv, B_FALSE, NULL, zpool_sync_one, &force); - - return (ret); -} - -typedef struct iostat_cbdata { - boolean_t cb_verbose; - int cb_name_flags; - int cb_namewidth; - int cb_iteration; - boolean_t cb_scripted; - zpool_list_t *cb_list; -} iostat_cbdata_t; - -static void -print_iostat_separator(iostat_cbdata_t *cb) -{ - int i = 0; - - for (i = 0; i < cb->cb_namewidth; i++) - (void) printf("-"); - (void) printf(" ----- ----- ----- ----- ----- -----\n"); -} - -static void -print_iostat_header(iostat_cbdata_t *cb) -{ - (void) printf("%*s capacity operations bandwidth\n", - cb->cb_namewidth, ""); - (void) printf("%-*s alloc free read write read write\n", - cb->cb_namewidth, "pool"); - print_iostat_separator(cb); -} - -/* - * Display a single statistic. - */ -static void -print_one_stat(uint64_t value) -{ - char buf[64]; - - zfs_nicenum(value, buf, sizeof (buf)); - (void) printf(" %5s", buf); -} - -static const char *class_name[] = { - VDEV_ALLOC_BIAS_DEDUP, - VDEV_ALLOC_BIAS_SPECIAL, - VDEV_ALLOC_CLASS_LOGS -}; - -/* - * Print out all the statistics for the given vdev. This can either be the - * toplevel configuration, or called recursively. If 'name' is NULL, then this - * is a verbose output, and we don't want to display the toplevel pool stats. - * - * Returns the number of stat lines printed. - */ -static unsigned int -print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, - nvlist_t *newnv, iostat_cbdata_t *cb, int depth) -{ - nvlist_t **oldchild, **newchild; - uint_t c, children; - vdev_stat_t *oldvs, *newvs; - vdev_stat_t zerovs = { 0 }; - char *vname; - int ret = 0; - uint64_t tdelta; - double scale; - - if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) - return (ret); - - if (oldnv != NULL) { - verify(nvlist_lookup_uint64_array(oldnv, - ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0); - } else { - oldvs = &zerovs; - } - - verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&newvs, &c) == 0); - - if (strlen(name) + depth > cb->cb_namewidth) - (void) printf("%*s%s", depth, "", name); - else - (void) printf("%*s%s%*s", depth, "", name, - (int)(cb->cb_namewidth - strlen(name) - depth), ""); - - tdelta = newvs->vs_timestamp - oldvs->vs_timestamp; - - if (tdelta == 0) - scale = 1.0; - else - scale = (double)NANOSEC / tdelta; - - /* only toplevel vdevs have capacity stats */ - if (newvs->vs_space == 0) { - (void) printf(" - -"); - } else { - print_one_stat(newvs->vs_alloc); - print_one_stat(newvs->vs_space - newvs->vs_alloc); - } - - print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] - - oldvs->vs_ops[ZIO_TYPE_READ]))); - - print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] - - oldvs->vs_ops[ZIO_TYPE_WRITE]))); - - print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] - - oldvs->vs_bytes[ZIO_TYPE_READ]))); - - print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] - - oldvs->vs_bytes[ZIO_TYPE_WRITE]))); - - (void) printf("\n"); - - if (!cb->cb_verbose) - return (ret); - - if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN, - &newchild, &children) != 0) - return (ret); - - if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN, - &oldchild, &c) != 0) - return (ret); - - /* - * print normal top-level devices - */ - for (c = 0; c < children; c++) { - uint64_t ishole = B_FALSE, islog = B_FALSE; - - (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_HOLE, - &ishole); - - (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG, - &islog); - - if (ishole || islog) - continue; - - if (nvlist_exists(newchild[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) - continue; - - vname = zpool_vdev_name(g_zfs, zhp, newchild[c], - cb->cb_name_flags); - print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, - newchild[c], cb, depth + 2); - free(vname); - } - - /* - * print all other top-level devices - */ - for (uint_t n = 0; n < 3; n++) { - for (c = 0; c < children; c++) { - uint64_t islog = B_FALSE; - char *bias = NULL; - char *type = NULL; - - (void) nvlist_lookup_uint64(newchild[c], - ZPOOL_CONFIG_IS_LOG, &islog); - if (islog) { - bias = VDEV_ALLOC_CLASS_LOGS; - } else { - (void) nvlist_lookup_string(newchild[c], - ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); - (void) nvlist_lookup_string(newchild[c], - ZPOOL_CONFIG_TYPE, &type); - } - if (bias == NULL || strcmp(bias, class_name[n]) != 0) - continue; - if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0) - continue; - - vname = zpool_vdev_name(g_zfs, zhp, newchild[c], - cb->cb_name_flags); - ret += print_vdev_stats(zhp, vname, oldnv ? - oldchild[c] : NULL, newchild[c], cb, depth + 2); - free(vname); - } - - } - - /* - * Include level 2 ARC devices in iostat output - */ - if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE, - &newchild, &children) != 0) - return (ret); - - if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE, - &oldchild, &c) != 0) - return (ret); - - if (children > 0) { - (void) printf("%-*s - - - - - " - "-\n", cb->cb_namewidth, "cache"); - for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, zhp, newchild[c], - cb->cb_name_flags); - print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, - newchild[c], cb, depth + 2); - free(vname); - } - } - - return (ret); -} - -static int -refresh_iostat(zpool_handle_t *zhp, void *data) -{ - iostat_cbdata_t *cb = data; - boolean_t missing; - - /* - * If the pool has disappeared, remove it from the list and continue. - */ - if (zpool_refresh_stats(zhp, &missing) != 0) - return (-1); - - if (missing) - pool_list_remove(cb->cb_list, zhp); - - return (0); -} - -/* - * Callback to print out the iostats for the given pool. - */ -int -print_iostat(zpool_handle_t *zhp, void *data) -{ - iostat_cbdata_t *cb = data; - nvlist_t *oldconfig, *newconfig; - nvlist_t *oldnvroot, *newnvroot; - - newconfig = zpool_get_config(zhp, &oldconfig); - - if (cb->cb_iteration == 1) - oldconfig = NULL; - - verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE, - &newnvroot) == 0); - - if (oldconfig == NULL) - oldnvroot = NULL; - else - verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE, - &oldnvroot) == 0); - - /* - * Print out the statistics for the pool. - */ - print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); - - if (cb->cb_verbose) - print_iostat_separator(cb); - - return (0); -} - -int -get_namewidth(zpool_handle_t *zhp, void *data) -{ - iostat_cbdata_t *cb = data; - nvlist_t *config, *nvroot; - - if ((config = zpool_get_config(zhp, NULL)) != NULL) { - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - if (!cb->cb_verbose) - cb->cb_namewidth = strlen(zpool_get_name(zhp)); - else - cb->cb_namewidth = max_width(zhp, nvroot, 0, - cb->cb_namewidth, cb->cb_name_flags); - } - - /* - * The width must fall into the range [10,38]. The upper limit is the - * maximum we can have and still fit in 80 columns. - */ - if (cb->cb_namewidth < 10) - cb->cb_namewidth = 10; - if (cb->cb_namewidth > 38) - cb->cb_namewidth = 38; - - return (0); -} - -/* - * Parse the input string, get the 'interval' and 'count' value if there is one. - */ -static void -get_interval_count(int *argcp, char **argv, unsigned long *iv, - unsigned long *cnt) -{ - unsigned long interval = 0, count = 0; - int argc = *argcp, errno; - - /* - * Determine if the last argument is an integer or a pool name - */ - if (argc > 0 && isdigit(argv[argc - 1][0])) { - char *end; - - errno = 0; - interval = strtoul(argv[argc - 1], &end, 10); - - if (*end == '\0' && errno == 0) { - if (interval == 0) { - (void) fprintf(stderr, gettext("interval " - "cannot be zero\n")); - usage(B_FALSE); - } - /* - * Ignore the last parameter - */ - argc--; - } else { - /* - * If this is not a valid number, just plow on. The - * user will get a more informative error message later - * on. - */ - interval = 0; - } - } - - /* - * If the last argument is also an integer, then we have both a count - * and an interval. - */ - if (argc > 0 && isdigit(argv[argc - 1][0])) { - char *end; - - errno = 0; - count = interval; - interval = strtoul(argv[argc - 1], &end, 10); - - if (*end == '\0' && errno == 0) { - if (interval == 0) { - (void) fprintf(stderr, gettext("interval " - "cannot be zero\n")); - usage(B_FALSE); - } - - /* - * Ignore the last parameter - */ - argc--; - } else { - interval = 0; - } - } - - *iv = interval; - *cnt = count; - *argcp = argc; -} - -static void -get_timestamp_arg(char c) -{ - if (c == 'u') - timestamp_fmt = UDATE; - else if (c == 'd') - timestamp_fmt = DDATE; - else - usage(B_FALSE); -} - -/* - * zpool iostat [-gLPv] [-T d|u] [pool] ... [interval [count]] - * - * -g Display guid for individual vdev name. - * -L Follow links when resolving vdev path name. - * -P Display full path for vdev name. - * -v Display statistics for individual vdevs - * -T Display a timestamp in date(1) or Unix format - * - * This command can be tricky because we want to be able to deal with pool - * creation/destruction as well as vdev configuration changes. The bulk of this - * processing is handled by the pool_list_* routines in zpool_iter.c. We rely - * on pool_list_update() to detect the addition of new pools. Configuration - * changes are all handled within libzfs. - */ -int -zpool_do_iostat(int argc, char **argv) -{ - int c; - int ret; - int npools; - unsigned long interval = 0, count = 0; - zpool_list_t *list; - boolean_t verbose = B_FALSE; - boolean_t guid = B_FALSE; - boolean_t follow_links = B_FALSE; - boolean_t full_name = B_FALSE; - iostat_cbdata_t cb = { 0 }; - - /* check options */ - while ((c = getopt(argc, argv, "gLPT:v")) != -1) { - switch (c) { - case 'g': - guid = B_TRUE; - break; - case 'L': - follow_links = B_TRUE; - break; - case 'P': - full_name = B_TRUE; - break; - case 'T': - get_timestamp_arg(*optarg); - break; - case 'v': - verbose = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - get_interval_count(&argc, argv, &interval, &count); - - /* - * Construct the list of all interesting pools. - */ - ret = 0; - if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL) - return (1); - - if (pool_list_count(list) == 0 && argc != 0) { - pool_list_free(list); - return (1); - } - - if (pool_list_count(list) == 0 && interval == 0) { - pool_list_free(list); - (void) fprintf(stderr, gettext("no pools available\n")); - return (1); - } - - /* - * Enter the main iostat loop. - */ - cb.cb_list = list; - cb.cb_verbose = verbose; - if (guid) - cb.cb_name_flags |= VDEV_NAME_GUID; - if (follow_links) - cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; - if (full_name) - cb.cb_name_flags |= VDEV_NAME_PATH; - cb.cb_iteration = 0; - cb.cb_namewidth = 0; - - for (;;) { - pool_list_update(list); - - if ((npools = pool_list_count(list)) == 0) - break; - - /* - * Refresh all statistics. This is done as an explicit step - * before calculating the maximum name width, so that any - * configuration changes are properly accounted for. - */ - (void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb); - - /* - * Iterate over all pools to determine the maximum width - * for the pool / device name column across all pools. - */ - cb.cb_namewidth = 0; - (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb); - - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); - - /* - * If it's the first time, or verbose mode, print the header. - */ - if (++cb.cb_iteration == 1 || verbose) - print_iostat_header(&cb); - - (void) pool_list_iter(list, B_FALSE, print_iostat, &cb); - - /* - * If there's more than one pool, and we're not in verbose mode - * (which prints a separator for us), then print a separator. - */ - if (npools > 1 && !verbose) - print_iostat_separator(&cb); - - if (verbose) - (void) printf("\n"); - - /* - * Flush the output so that redirection to a file isn't buffered - * indefinitely. - */ - (void) fflush(stdout); - - if (interval == 0) - break; - - if (count != 0 && --count == 0) - break; - - (void) sleep(interval); - } - - pool_list_free(list); - - return (ret); -} - -typedef struct list_cbdata { - boolean_t cb_verbose; - int cb_name_flags; - int cb_namewidth; - boolean_t cb_scripted; - zprop_list_t *cb_proplist; - boolean_t cb_literal; -} list_cbdata_t; - - -/* - * Given a list of columns to display, output appropriate headers for each one. - */ -static void -print_header(list_cbdata_t *cb) -{ - zprop_list_t *pl = cb->cb_proplist; - char headerbuf[ZPOOL_MAXPROPLEN]; - const char *header; - boolean_t first = B_TRUE; - boolean_t right_justify; - size_t width = 0; - - for (; pl != NULL; pl = pl->pl_next) { - width = pl->pl_width; - if (first && cb->cb_verbose) { - /* - * Reset the width to accommodate the verbose listing - * of devices. - */ - width = cb->cb_namewidth; - } - - if (!first) - (void) printf(" "); - else - first = B_FALSE; - - right_justify = B_FALSE; - if (pl->pl_prop != ZPROP_INVAL) { - header = zpool_prop_column_name(pl->pl_prop); - right_justify = zpool_prop_align_right(pl->pl_prop); - } else { - int i; - - for (i = 0; pl->pl_user_prop[i] != '\0'; i++) - headerbuf[i] = toupper(pl->pl_user_prop[i]); - headerbuf[i] = '\0'; - header = headerbuf; - } - - if (pl->pl_next == NULL && !right_justify) - (void) printf("%s", header); - else if (right_justify) - (void) printf("%*s", width, header); - else - (void) printf("%-*s", width, header); - - } - - (void) printf("\n"); -} - -/* - * Given a pool and a list of properties, print out all the properties according - * to the described layout. Used by zpool_do_list(). - */ -static void -print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) -{ - zprop_list_t *pl = cb->cb_proplist; - boolean_t first = B_TRUE; - char property[ZPOOL_MAXPROPLEN]; - char *propstr; - boolean_t right_justify; - size_t width; - - for (; pl != NULL; pl = pl->pl_next) { - - width = pl->pl_width; - if (first && cb->cb_verbose) { - /* - * Reset the width to accommodate the verbose listing - * of devices. - */ - width = cb->cb_namewidth; - } - - if (!first) { - if (cb->cb_scripted) - (void) printf("\t"); - else - (void) printf(" "); - } else { - first = B_FALSE; - } - - right_justify = B_FALSE; - if (pl->pl_prop != ZPROP_INVAL) { - if (zpool_get_prop(zhp, pl->pl_prop, property, - sizeof (property), NULL, cb->cb_literal) != 0) - propstr = "-"; - else - propstr = property; - - right_justify = zpool_prop_align_right(pl->pl_prop); - } else if ((zpool_prop_feature(pl->pl_user_prop) || - zpool_prop_unsupported(pl->pl_user_prop)) && - zpool_prop_get_feature(zhp, pl->pl_user_prop, property, - sizeof (property)) == 0) { - propstr = property; - } else { - propstr = "-"; - } - - - /* - * If this is being called in scripted mode, or if this is the - * last column and it is left-justified, don't include a width - * format specifier. - */ - if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) - (void) printf("%s", propstr); - else if (right_justify) - (void) printf("%*s", width, propstr); - else - (void) printf("%-*s", width, propstr); - } - - (void) printf("\n"); -} - -static void -print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted, - boolean_t valid) -{ - char propval[64]; - boolean_t fixed; - size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); - - switch (prop) { - case ZPOOL_PROP_EXPANDSZ: - case ZPOOL_PROP_CHECKPOINT: - if (value == 0) - (void) strlcpy(propval, "-", sizeof (propval)); - else - zfs_nicenum(value, propval, sizeof (propval)); - break; - case ZPOOL_PROP_FRAGMENTATION: - if (value == ZFS_FRAG_INVALID) { - (void) strlcpy(propval, "-", sizeof (propval)); - } else { - (void) snprintf(propval, sizeof (propval), "%llu%%", - value); - } - break; - case ZPOOL_PROP_CAPACITY: - (void) snprintf(propval, sizeof (propval), - value < 1000 ? "%1.2f%%" : value < 10000 ? - "%2.1f%%" : "%3.0f%%", value / 100.0); - break; - default: - zfs_nicenum(value, propval, sizeof (propval)); - } - - if (!valid) - (void) strlcpy(propval, "-", sizeof (propval)); - - if (scripted) - (void) printf("\t%s", propval); - else - (void) printf(" %*s", width, propval); -} - -/* - * print static default line per vdev - */ -void -print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, - list_cbdata_t *cb, int depth) -{ - nvlist_t **child; - vdev_stat_t *vs; - uint_t c, children; - char *vname; - boolean_t scripted = cb->cb_scripted; - uint64_t islog = B_FALSE; - char *dashes = "%-*s - - - - - -\n"; - - verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) == 0); - - if (name != NULL) { - boolean_t toplevel = (vs->vs_space != 0); - uint64_t cap; - - if (strcmp(name, VDEV_TYPE_INDIRECT) == 0) - return; - - if (scripted) - (void) printf("\t%s", name); - else if (strlen(name) + depth > cb->cb_namewidth) - (void) printf("%*s%s", depth, "", name); - else - (void) printf("%*s%s%*s", depth, "", name, - (int)(cb->cb_namewidth - strlen(name) - depth), ""); - - /* - * Print the properties for the individual vdevs. Some - * properties are only applicable to toplevel vdevs. The - * 'toplevel' boolean value is passed to the print_one_column() - * to indicate that the value is valid. - */ - print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, scripted, - toplevel); - print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, scripted, - toplevel); - print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, - scripted, toplevel); - print_one_column(ZPOOL_PROP_CHECKPOINT, - vs->vs_checkpoint_space, scripted, toplevel); - print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted, - B_TRUE); - print_one_column(ZPOOL_PROP_FRAGMENTATION, - vs->vs_fragmentation, scripted, - (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel)); - cap = (vs->vs_space == 0) ? 0 : - (vs->vs_alloc * 10000 / vs->vs_space); - print_one_column(ZPOOL_PROP_CAPACITY, cap, scripted, toplevel); - (void) printf("\n"); - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - return; - - /* list the normal vdevs first */ - for (c = 0; c < children; c++) { - uint64_t ishole = B_FALSE; - - if (nvlist_lookup_uint64(child[c], - ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole) - continue; - - if (nvlist_lookup_uint64(child[c], - ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) - continue; - - if (nvlist_exists(child[c], ZPOOL_CONFIG_ALLOCATION_BIAS)) - continue; - - vname = zpool_vdev_name(g_zfs, zhp, child[c], - cb->cb_name_flags); - print_list_stats(zhp, vname, child[c], cb, depth + 2); - free(vname); - } - - /* list the classes: 'logs', 'dedup', and 'special' */ - for (uint_t n = 0; n < 3; n++) { - boolean_t printed = B_FALSE; - - for (c = 0; c < children; c++) { - char *bias = NULL; - char *type = NULL; - - if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &islog) == 0 && islog) { - bias = VDEV_ALLOC_CLASS_LOGS; - } else { - (void) nvlist_lookup_string(child[c], - ZPOOL_CONFIG_ALLOCATION_BIAS, &bias); - (void) nvlist_lookup_string(child[c], - ZPOOL_CONFIG_TYPE, &type); - } - if (bias == NULL || strcmp(bias, class_name[n]) != 0) - continue; - if (!islog && strcmp(type, VDEV_TYPE_INDIRECT) == 0) - continue; - - if (!printed) { - /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, - class_name[n]); - printed = B_TRUE; - } - vname = zpool_vdev_name(g_zfs, zhp, child[c], - cb->cb_name_flags); - print_list_stats(zhp, vname, child[c], cb, depth + 2); - free(vname); - } - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, - &child, &children) == 0 && children > 0) { - /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, "cache"); - for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, zhp, child[c], - cb->cb_name_flags); - print_list_stats(zhp, vname, child[c], cb, depth + 2); - free(vname); - } - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, - &children) == 0 && children > 0) { - /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, "spare"); - for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, zhp, child[c], - cb->cb_name_flags); - print_list_stats(zhp, vname, child[c], cb, depth + 2); - free(vname); - } - } -} - -/* - * Generic callback function to list a pool. - */ -int -list_callback(zpool_handle_t *zhp, void *data) -{ - list_cbdata_t *cbp = data; - nvlist_t *config; - nvlist_t *nvroot; - - config = zpool_get_config(zhp, NULL); - - if (cbp->cb_verbose) { - config = zpool_get_config(zhp, NULL); - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - } - - if (cbp->cb_verbose) - cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0, - cbp->cb_name_flags); - - print_pool(zhp, cbp); - - if (cbp->cb_verbose) - print_list_stats(zhp, NULL, nvroot, cbp, 0); - - return (0); -} - -/* - * zpool list [-gHLP] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]] - * - * -g Display guid for individual vdev name. - * -H Scripted mode. Don't display headers, and separate properties - * by a single tab. - * -L Follow links when resolving vdev path name. - * -o List of properties to display. Defaults to - * "name,size,allocated,free,expandsize,fragmentation,capacity," - * "dedupratio,health,altroot" - * -p Diplay values in parsable (exact) format. - * -P Display full path for vdev name. - * -T Display a timestamp in date(1) or Unix format - * - * List all pools in the system, whether or not they're healthy. Output space - * statistics for each one, as well as health status summary. - */ -int -zpool_do_list(int argc, char **argv) -{ - int c; - int ret; - list_cbdata_t cb = { 0 }; - static char default_props[] = - "name,size,allocated,free,checkpoint,expandsize,fragmentation," - "capacity,dedupratio,health,altroot"; - char *props = default_props; - unsigned long interval = 0, count = 0; - zpool_list_t *list; - boolean_t first = B_TRUE; - - /* check options */ - while ((c = getopt(argc, argv, ":gHLo:pPT:v")) != -1) { - switch (c) { - case 'g': - cb.cb_name_flags |= VDEV_NAME_GUID; - break; - case 'H': - cb.cb_scripted = B_TRUE; - break; - case 'L': - cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; - break; - case 'o': - props = optarg; - break; - case 'P': - cb.cb_name_flags |= VDEV_NAME_PATH; - break; - case 'p': - cb.cb_literal = B_TRUE; - break; - case 'T': - get_timestamp_arg(*optarg); - break; - case 'v': - cb.cb_verbose = B_TRUE; - cb.cb_namewidth = 8; /* 8 until precalc is avail */ - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - get_interval_count(&argc, argv, &interval, &count); - - if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0) - usage(B_FALSE); - - for (;;) { - if ((list = pool_list_get(argc, argv, &cb.cb_proplist, - &ret)) == NULL) - return (1); - - if (pool_list_count(list) == 0) - break; - - cb.cb_namewidth = 0; - (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb); - - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); - - if (!cb.cb_scripted && (first || cb.cb_verbose)) { - print_header(&cb); - first = B_FALSE; - } - ret = pool_list_iter(list, B_TRUE, list_callback, &cb); - - if (interval == 0) - break; - - if (count != 0 && --count == 0) - break; - - pool_list_free(list); - (void) sleep(interval); - } - - if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { - (void) printf(gettext("no pools available\n")); - ret = 0; - } - - pool_list_free(list); - zprop_free_list(cb.cb_proplist); - return (ret); -} - -static int -zpool_do_attach_or_replace(int argc, char **argv, int replacing) -{ - boolean_t force = B_FALSE; - int c; - nvlist_t *nvroot; - char *poolname, *old_disk, *new_disk; - zpool_handle_t *zhp; - zpool_boot_label_t boot_type; - uint64_t boot_size; - int ret; - - /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { - switch (c) { - case 'f': - force = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - - if (argc < 2) { - (void) fprintf(stderr, - gettext("missing <device> specification\n")); - usage(B_FALSE); - } - - old_disk = argv[1]; - - if (argc < 3) { - if (!replacing) { - (void) fprintf(stderr, - gettext("missing <new_device> specification\n")); - usage(B_FALSE); - } - new_disk = old_disk; - argc -= 1; - argv += 1; - } else { - new_disk = argv[2]; - argc -= 2; - argv += 2; - } - - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - if (zpool_get_config(zhp, NULL) == NULL) { - (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), - poolname); - zpool_close(zhp); - return (1); - } - - if (zpool_is_bootable(zhp)) - boot_type = ZPOOL_COPY_BOOT_LABEL; - else - boot_type = ZPOOL_NO_BOOT_LABEL; - - boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); - nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE, - boot_type, boot_size, argc, argv); - if (nvroot == NULL) { - zpool_close(zhp); - return (1); - } - - ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing); - - nvlist_free(nvroot); - zpool_close(zhp); - - return (ret); -} - -/* - * zpool replace [-f] <pool> <device> <new_device> - * - * -f Force attach, even if <new_device> appears to be in use. - * - * Replace <device> with <new_device>. - */ -/* ARGSUSED */ -int -zpool_do_replace(int argc, char **argv) -{ - return (zpool_do_attach_or_replace(argc, argv, B_TRUE)); -} - -/* - * zpool attach [-f] <pool> <device> <new_device> - * - * -f Force attach, even if <new_device> appears to be in use. - * - * Attach <new_device> to the mirror containing <device>. If <device> is not - * part of a mirror, then <device> will be transformed into a mirror of - * <device> and <new_device>. In either case, <new_device> will begin life - * with a DTL of [0, now], and will immediately begin to resilver itself. - */ -int -zpool_do_attach(int argc, char **argv) -{ - return (zpool_do_attach_or_replace(argc, argv, B_FALSE)); -} - -/* - * zpool detach [-f] <pool> <device> - * - * -f Force detach of <device>, even if DTLs argue against it - * (not supported yet) - * - * Detach a device from a mirror. The operation will be refused if <device> - * is the last device in the mirror, or if the DTLs indicate that this device - * has the only valid copy of some data. - */ -/* ARGSUSED */ -int -zpool_do_detach(int argc, char **argv) -{ - int c; - char *poolname, *path; - zpool_handle_t *zhp; - int ret; - - /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { - switch (c) { - case 'f': - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(B_FALSE); - } - - if (argc < 2) { - (void) fprintf(stderr, - gettext("missing <device> specification\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - path = argv[1]; - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - ret = zpool_vdev_detach(zhp, path); - - zpool_close(zhp); - - return (ret); -} - -/* - * zpool split [-gLnP] [-o prop=val] ... - * [-o mntopt] ... - * [-R altroot] <pool> <newpool> [<device> ...] - * - * -g Display guid for individual vdev name. - * -L Follow links when resolving vdev path name. - * -n Do not split the pool, but display the resulting layout if - * it were to be split. - * -o Set property=value, or set mount options. - * -P Display full path for vdev name. - * -R Mount the split-off pool under an alternate root. - * - * Splits the named pool and gives it the new pool name. Devices to be split - * off may be listed, provided that no more than one device is specified - * per top-level vdev mirror. The newly split pool is left in an exported - * state unless -R is specified. - * - * Restrictions: the top-level of the pool pool must only be made up of - * mirrors; all devices in the pool must be healthy; no device may be - * undergoing a resilvering operation. - */ -int -zpool_do_split(int argc, char **argv) -{ - char *srcpool, *newpool, *propval; - char *mntopts = NULL; - splitflags_t flags; - int c, ret = 0; - zpool_handle_t *zhp; - nvlist_t *config, *props = NULL; - - flags.dryrun = B_FALSE; - flags.import = B_FALSE; - flags.name_flags = 0; - - /* check options */ - while ((c = getopt(argc, argv, ":gLR:no:P")) != -1) { - switch (c) { - case 'g': - flags.name_flags |= VDEV_NAME_GUID; - break; - case 'L': - flags.name_flags |= VDEV_NAME_FOLLOW_LINKS; - break; - case 'R': - flags.import = B_TRUE; - if (add_prop_list( - zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg, - &props, B_TRUE) != 0) { - nvlist_free(props); - usage(B_FALSE); - } - break; - case 'n': - flags.dryrun = B_TRUE; - break; - case 'o': - if ((propval = strchr(optarg, '=')) != NULL) { - *propval = '\0'; - propval++; - if (add_prop_list(optarg, propval, - &props, B_TRUE) != 0) { - nvlist_free(props); - usage(B_FALSE); - } - } else { - mntopts = optarg; - } - break; - case 'P': - flags.name_flags |= VDEV_NAME_PATH; - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - break; - } - } - - if (!flags.import && mntopts != NULL) { - (void) fprintf(stderr, gettext("setting mntopts is only " - "valid when importing the pool\n")); - usage(B_FALSE); - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("Missing pool name\n")); - usage(B_FALSE); - } - if (argc < 2) { - (void) fprintf(stderr, gettext("Missing new pool name\n")); - usage(B_FALSE); - } - - srcpool = argv[0]; - newpool = argv[1]; - - argc -= 2; - argv += 2; - - if ((zhp = zpool_open(g_zfs, srcpool)) == NULL) - return (1); - - config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv); - if (config == NULL) { - ret = 1; - } else { - if (flags.dryrun) { - (void) printf(gettext("would create '%s' with the " - "following layout:\n\n"), newpool); - print_vdev_tree(NULL, newpool, config, 0, "", - flags.name_flags); - } - nvlist_free(config); - } - - zpool_close(zhp); - - if (ret != 0 || flags.dryrun || !flags.import) - return (ret); - - /* - * The split was successful. Now we need to open the new - * pool and import it. - */ - if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL) - return (1); - if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && - zpool_enable_datasets(zhp, mntopts, 0) != 0) { - ret = 1; - (void) fprintf(stderr, gettext("Split was successful, but " - "the datasets could not all be mounted\n")); - (void) fprintf(stderr, gettext("Try doing '%s' with a " - "different altroot\n"), "zpool import"); - } - zpool_close(zhp); - - return (ret); -} - - - -/* - * zpool online <pool> <device> ... - */ -int -zpool_do_online(int argc, char **argv) -{ - int c, i; - char *poolname; - zpool_handle_t *zhp; - int ret = 0; - vdev_state_t newstate; - int flags = 0; - - /* check options */ - while ((c = getopt(argc, argv, "et")) != -1) { - switch (c) { - case 'e': - flags |= ZFS_ONLINE_EXPAND; - break; - case 't': - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name\n")); - usage(B_FALSE); - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing device name\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - for (i = 1; i < argc; i++) { - if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { - if (newstate != VDEV_STATE_HEALTHY) { - (void) printf(gettext("warning: device '%s' " - "onlined, but remains in faulted state\n"), - argv[i]); - if (newstate == VDEV_STATE_FAULTED) - (void) printf(gettext("use 'zpool " - "clear' to restore a faulted " - "device\n")); - else - (void) printf(gettext("use 'zpool " - "replace' to replace devices " - "that are no longer present\n")); - } - } else { - ret = 1; - } - } - - zpool_close(zhp); - - return (ret); -} - -/* - * zpool offline [-ft] <pool> <device> ... - * - * -f Force the device into the offline state, even if doing - * so would appear to compromise pool availability. - * (not supported yet) - * - * -t Only take the device off-line temporarily. The offline - * state will not be persistent across reboots. - */ -/* ARGSUSED */ -int -zpool_do_offline(int argc, char **argv) -{ - int c, i; - char *poolname; - zpool_handle_t *zhp; - int ret = 0; - boolean_t istmp = B_FALSE; - - /* check options */ - while ((c = getopt(argc, argv, "ft")) != -1) { - switch (c) { - case 't': - istmp = B_TRUE; - break; - case 'f': - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name\n")); - usage(B_FALSE); - } - if (argc < 2) { - (void) fprintf(stderr, gettext("missing device name\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - for (i = 1; i < argc; i++) { - if (zpool_vdev_offline(zhp, argv[i], istmp) != 0) - ret = 1; - } - - zpool_close(zhp); - - return (ret); -} - -/* - * zpool clear <pool> [device] - * - * Clear all errors associated with a pool or a particular device. - */ -int -zpool_do_clear(int argc, char **argv) -{ - int c; - int ret = 0; - boolean_t dryrun = B_FALSE; - boolean_t do_rewind = B_FALSE; - boolean_t xtreme_rewind = B_FALSE; - uint32_t rewind_policy = ZPOOL_NO_REWIND; - nvlist_t *policy = NULL; - zpool_handle_t *zhp; - char *pool, *device; - - /* check options */ - while ((c = getopt(argc, argv, "FnX")) != -1) { - switch (c) { - case 'F': - do_rewind = B_TRUE; - break; - case 'n': - dryrun = B_TRUE; - break; - case 'X': - xtreme_rewind = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name\n")); - usage(B_FALSE); - } - - if (argc > 2) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - if ((dryrun || xtreme_rewind) && !do_rewind) { - (void) fprintf(stderr, - gettext("-n or -X only meaningful with -F\n")); - usage(B_FALSE); - } - if (dryrun) - rewind_policy = ZPOOL_TRY_REWIND; - else if (do_rewind) - rewind_policy = ZPOOL_DO_REWIND; - if (xtreme_rewind) - rewind_policy |= ZPOOL_EXTREME_REWIND; - - /* In future, further rewind policy choices can be passed along here */ - if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || - nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, - rewind_policy) != 0) { - return (1); - } - - pool = argv[0]; - device = argc == 2 ? argv[1] : NULL; - - if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { - nvlist_free(policy); - return (1); - } - - if (zpool_clear(zhp, device, policy) != 0) - ret = 1; - - zpool_close(zhp); - - nvlist_free(policy); - - return (ret); -} - -/* - * zpool reguid <pool> - */ -int -zpool_do_reguid(int argc, char **argv) -{ - int c; - char *poolname; - zpool_handle_t *zhp; - int ret = 0; - - /* check options */ - while ((c = getopt(argc, argv, "")) != -1) { - switch (c) { - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - /* get pool name and check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name\n")); - usage(B_FALSE); - } - - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - poolname = argv[0]; - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) - return (1); - - ret = zpool_reguid(zhp); - - zpool_close(zhp); - return (ret); -} - - -/* - * zpool reopen <pool> - * - * Reopen the pool so that the kernel can update the sizes of all vdevs. - */ -int -zpool_do_reopen(int argc, char **argv) -{ - int c; - int ret = 0; - zpool_handle_t *zhp; - char *pool; - - /* check options */ - while ((c = getopt(argc, argv, "")) != -1) { - switch (c) { - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc--; - argv++; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name\n")); - usage(B_FALSE); - } - - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); - } - - pool = argv[0]; - if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) - return (1); - - ret = zpool_reopen(zhp); - zpool_close(zhp); - return (ret); -} - -typedef struct scrub_cbdata { - int cb_type; - int cb_argc; - char **cb_argv; - pool_scrub_cmd_t cb_scrub_cmd; -} scrub_cbdata_t; - -static boolean_t -zpool_has_checkpoint(zpool_handle_t *zhp) -{ - nvlist_t *config, *nvroot; - - config = zpool_get_config(zhp, NULL); - - if (config != NULL) { - pool_checkpoint_stat_t *pcs = NULL; - uint_t c; - - nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); - (void) nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); - - if (pcs == NULL || pcs->pcs_state == CS_NONE) - return (B_FALSE); - - assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS || - pcs->pcs_state == CS_CHECKPOINT_DISCARDING); - return (B_TRUE); - } - - return (B_FALSE); -} - -int -scrub_callback(zpool_handle_t *zhp, void *data) -{ - scrub_cbdata_t *cb = data; - int err; - - /* - * Ignore faulted pools. - */ - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - (void) fprintf(stderr, gettext("cannot scrub '%s': pool is " - "currently unavailable\n"), zpool_get_name(zhp)); - return (1); - } - - err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); - - if (err == 0 && zpool_has_checkpoint(zhp) && - cb->cb_type == POOL_SCAN_SCRUB) { - (void) printf(gettext("warning: will not scrub state that " - "belongs to the checkpoint of pool '%s'\n"), - zpool_get_name(zhp)); - } - - return (err != 0); -} - -/* - * zpool scrub [-s | -p] <pool> ... - * - * -s Stop. Stops any in-progress scrub. - * -p Pause. Pause in-progress scrub. - */ -int -zpool_do_scrub(int argc, char **argv) -{ - int c; - scrub_cbdata_t cb; - - cb.cb_type = POOL_SCAN_SCRUB; - cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; - - /* check options */ - while ((c = getopt(argc, argv, "sp")) != -1) { - switch (c) { - case 's': - cb.cb_type = POOL_SCAN_NONE; - break; - case 'p': - cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - if (cb.cb_type == POOL_SCAN_NONE && - cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) { - (void) fprintf(stderr, gettext("invalid option combination: " - "-s and -p are mutually exclusive\n")); - usage(B_FALSE); - } - - cb.cb_argc = argc; - cb.cb_argv = argv; - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(B_FALSE); - } - - return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb)); -} - -static void -zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res) -{ - uint_t children = 0; - nvlist_t **child; - uint_t i; - - (void) nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &child, &children); - - if (children == 0) { - char *path = zpool_vdev_name(g_zfs, zhp, nvroot, B_FALSE); - fnvlist_add_boolean(res, path); - free(path); - return; - } - - for (i = 0; i < children; i++) { - zpool_collect_leaves(zhp, child[i], res); - } -} - -/* - * zpool initialize [-cs] <pool> [<vdev> ...] - * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool - * if none specified. - * - * -c Cancel. Ends active initializing. - * -s Suspend. Initializing can then be restarted with no flags. - */ -int -zpool_do_initialize(int argc, char **argv) -{ - int c; - char *poolname; - zpool_handle_t *zhp; - nvlist_t *vdevs; - int err = 0; - - struct option long_options[] = { - {"cancel", no_argument, NULL, 'c'}, - {"suspend", no_argument, NULL, 's'}, - {0, 0, 0, 0} - }; - - pool_initialize_func_t cmd_type = POOL_INITIALIZE_DO; - while ((c = getopt_long(argc, argv, "cs", long_options, NULL)) != -1) { - switch (c) { - case 'c': - if (cmd_type != POOL_INITIALIZE_DO) { - (void) fprintf(stderr, gettext("-c cannot be " - "combined with other options\n")); - usage(B_FALSE); - } - cmd_type = POOL_INITIALIZE_CANCEL; - break; - case 's': - if (cmd_type != POOL_INITIALIZE_DO) { - (void) fprintf(stderr, gettext("-s cannot be " - "combined with other options\n")); - usage(B_FALSE); - } - cmd_type = POOL_INITIALIZE_SUSPEND; - break; - case '?': - if (optopt != 0) { - (void) fprintf(stderr, - gettext("invalid option '%c'\n"), optopt); - } else { - (void) fprintf(stderr, - gettext("invalid option '%s'\n"), - argv[optind - 1]); - } - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(B_FALSE); - return (-1); - } - - poolname = argv[0]; - zhp = zpool_open(g_zfs, poolname); - if (zhp == NULL) - return (-1); - - vdevs = fnvlist_alloc(); - if (argc == 1) { - /* no individual leaf vdevs specified, so add them all */ - nvlist_t *config = zpool_get_config(zhp, NULL); - nvlist_t *nvroot = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE); - zpool_collect_leaves(zhp, nvroot, vdevs); - } else { - int i; - for (i = 1; i < argc; i++) { - fnvlist_add_boolean(vdevs, argv[i]); - } - } - - err = zpool_initialize(zhp, cmd_type, vdevs); - - fnvlist_free(vdevs); - zpool_close(zhp); - - return (err); -} - -/* - * Print out detailed scrub status. - */ -static void -print_scan_status(pool_scan_stat_t *ps) -{ - time_t start, end, pause; - uint64_t total_secs_left; - uint64_t elapsed, secs_left, mins_left, hours_left, days_left; - uint64_t pass_scanned, scanned, pass_issued, issued, total; - uint_t scan_rate, issue_rate; - double fraction_done; - char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7]; - char srate_buf[7], irate_buf[7]; - - (void) printf(gettext(" scan: ")); - - /* If there's never been a scan, there's not much to say. */ - if (ps == NULL || ps->pss_func == POOL_SCAN_NONE || - ps->pss_func >= POOL_SCAN_FUNCS) { - (void) printf(gettext("none requested\n")); - return; - } - - start = ps->pss_start_time; - end = ps->pss_end_time; - pause = ps->pss_pass_scrub_pause; - - zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf)); - - assert(ps->pss_func == POOL_SCAN_SCRUB || - ps->pss_func == POOL_SCAN_RESILVER); - - /* Scan is finished or canceled. */ - if (ps->pss_state == DSS_FINISHED) { - total_secs_left = end - start; - days_left = total_secs_left / 60 / 60 / 24; - hours_left = (total_secs_left / 60 / 60) % 24; - mins_left = (total_secs_left / 60) % 60; - secs_left = (total_secs_left % 60); - - if (ps->pss_func == POOL_SCAN_SCRUB) { - (void) printf(gettext("scrub repaired %s " - "in %llu days %02llu:%02llu:%02llu " - "with %llu errors on %s"), processed_buf, - (u_longlong_t)days_left, (u_longlong_t)hours_left, - (u_longlong_t)mins_left, (u_longlong_t)secs_left, - (u_longlong_t)ps->pss_errors, ctime(&end)); - } else if (ps->pss_func == POOL_SCAN_RESILVER) { - (void) printf(gettext("resilvered %s " - "in %llu days %02llu:%02llu:%02llu " - "with %llu errors on %s"), processed_buf, - (u_longlong_t)days_left, (u_longlong_t)hours_left, - (u_longlong_t)mins_left, (u_longlong_t)secs_left, - (u_longlong_t)ps->pss_errors, ctime(&end)); - - } - - return; - } else if (ps->pss_state == DSS_CANCELED) { - if (ps->pss_func == POOL_SCAN_SCRUB) { - (void) printf(gettext("scrub canceled on %s"), - ctime(&end)); - } else if (ps->pss_func == POOL_SCAN_RESILVER) { - (void) printf(gettext("resilver canceled on %s"), - ctime(&end)); - } - return; - } - - assert(ps->pss_state == DSS_SCANNING); - - /* Scan is in progress. Resilvers can't be paused. */ - if (ps->pss_func == POOL_SCAN_SCRUB) { - if (pause == 0) { - (void) printf(gettext("scrub in progress since %s"), - ctime(&start)); - } else { - (void) printf(gettext("scrub paused since %s"), - ctime(&pause)); - (void) printf(gettext("\tscrub started on %s"), - ctime(&start)); - } - } else if (ps->pss_func == POOL_SCAN_RESILVER) { - (void) printf(gettext("resilver in progress since %s"), - ctime(&start)); - } - - scanned = ps->pss_examined; - pass_scanned = ps->pss_pass_exam; - issued = ps->pss_issued; - pass_issued = ps->pss_pass_issued; - total = ps->pss_to_examine; - - /* we are only done with a block once we have issued the IO for it */ - fraction_done = (double)issued / total; - - /* elapsed time for this pass, rounding up to 1 if it's 0 */ - elapsed = time(NULL) - ps->pss_pass_start; - elapsed -= ps->pss_pass_scrub_spent_paused; - elapsed = (elapsed != 0) ? elapsed : 1; - - scan_rate = pass_scanned / elapsed; - issue_rate = pass_issued / elapsed; - total_secs_left = (issue_rate != 0) ? - ((total - issued) / issue_rate) : UINT64_MAX; - - days_left = total_secs_left / 60 / 60 / 24; - hours_left = (total_secs_left / 60 / 60) % 24; - mins_left = (total_secs_left / 60) % 60; - secs_left = (total_secs_left % 60); - - /* format all of the numbers we will be reporting */ - zfs_nicenum(scanned, scanned_buf, sizeof (scanned_buf)); - zfs_nicenum(issued, issued_buf, sizeof (issued_buf)); - zfs_nicenum(total, total_buf, sizeof (total_buf)); - zfs_nicenum(scan_rate, srate_buf, sizeof (srate_buf)); - zfs_nicenum(issue_rate, irate_buf, sizeof (irate_buf)); - - /* doo not print estimated time if we have a paused scrub */ - if (pause == 0) { - (void) printf(gettext("\t%s scanned at %s/s, " - "%s issued at %s/s, %s total\n"), - scanned_buf, srate_buf, issued_buf, irate_buf, total_buf); - } else { - (void) printf(gettext("\t%s scanned, %s issued, %s total\n"), - scanned_buf, issued_buf, total_buf); - } - - if (ps->pss_func == POOL_SCAN_RESILVER) { - (void) printf(gettext("\t%s resilvered, %.2f%% done"), - processed_buf, 100 * fraction_done); - } else if (ps->pss_func == POOL_SCAN_SCRUB) { - (void) printf(gettext("\t%s repaired, %.2f%% done"), - processed_buf, 100 * fraction_done); - } - - if (pause == 0) { - if (issue_rate >= 10 * 1024 * 1024) { - (void) printf(gettext(", %llu days " - "%02llu:%02llu:%02llu to go\n"), - (u_longlong_t)days_left, (u_longlong_t)hours_left, - (u_longlong_t)mins_left, (u_longlong_t)secs_left); - } else { - (void) printf(gettext(", no estimated " - "completion time\n")); - } - } else { - (void) printf(gettext("\n")); - } -} - -/* - * As we don't scrub checkpointed blocks, we want to warn the - * user that we skipped scanning some blocks if a checkpoint exists - * or existed at any time during the scan. - */ -static void -print_checkpoint_scan_warning(pool_scan_stat_t *ps, pool_checkpoint_stat_t *pcs) -{ - if (ps == NULL || pcs == NULL) - return; - - if (pcs->pcs_state == CS_NONE || - pcs->pcs_state == CS_CHECKPOINT_DISCARDING) - return; - - assert(pcs->pcs_state == CS_CHECKPOINT_EXISTS); - - if (ps->pss_state == DSS_NONE) - return; - - if ((ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) && - ps->pss_end_time < pcs->pcs_start_time) - return; - - if (ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED) { - (void) printf(gettext(" scan warning: skipped blocks " - "that are only referenced by the checkpoint.\n")); - } else { - assert(ps->pss_state == DSS_SCANNING); - (void) printf(gettext(" scan warning: skipping blocks " - "that are only referenced by the checkpoint.\n")); - } -} - -/* - * Print out detailed removal status. - */ -static void -print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs) -{ - char copied_buf[7], examined_buf[7], total_buf[7], rate_buf[7]; - time_t start, end; - nvlist_t *config, *nvroot; - nvlist_t **child; - uint_t children; - char *vdev_name; - - if (prs == NULL || prs->prs_state == DSS_NONE) - return; - - /* - * Determine name of vdev. - */ - config = zpool_get_config(zhp, NULL); - nvroot = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_VDEV_TREE); - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0); - assert(prs->prs_removing_vdev < children); - vdev_name = zpool_vdev_name(g_zfs, zhp, - child[prs->prs_removing_vdev], B_TRUE); - - (void) printf(gettext("remove: ")); - - start = prs->prs_start_time; - end = prs->prs_end_time; - zfs_nicenum(prs->prs_copied, copied_buf, sizeof (copied_buf)); - - /* - * Removal is finished or canceled. - */ - if (prs->prs_state == DSS_FINISHED) { - uint64_t minutes_taken = (end - start) / 60; - - (void) printf(gettext("Removal of vdev %llu copied %s " - "in %lluh%um, completed on %s"), - (longlong_t)prs->prs_removing_vdev, - copied_buf, - (u_longlong_t)(minutes_taken / 60), - (uint_t)(minutes_taken % 60), - ctime((time_t *)&end)); - } else if (prs->prs_state == DSS_CANCELED) { - (void) printf(gettext("Removal of %s canceled on %s"), - vdev_name, ctime(&end)); - } else { - uint64_t copied, total, elapsed, mins_left, hours_left; - double fraction_done; - uint_t rate; - - assert(prs->prs_state == DSS_SCANNING); - - /* - * Removal is in progress. - */ - (void) printf(gettext( - "Evacuation of %s in progress since %s"), - vdev_name, ctime(&start)); - - copied = prs->prs_copied > 0 ? prs->prs_copied : 1; - total = prs->prs_to_copy; - fraction_done = (double)copied / total; - - /* elapsed time for this pass */ - elapsed = time(NULL) - prs->prs_start_time; - elapsed = elapsed > 0 ? elapsed : 1; - rate = copied / elapsed; - rate = rate > 0 ? rate : 1; - mins_left = ((total - copied) / rate) / 60; - hours_left = mins_left / 60; - - zfs_nicenum(copied, examined_buf, sizeof (examined_buf)); - zfs_nicenum(total, total_buf, sizeof (total_buf)); - zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); - - /* - * do not print estimated time if hours_left is more than - * 30 days - */ - (void) printf(gettext(" %s copied out of %s at %s/s, " - "%.2f%% done"), - examined_buf, total_buf, rate_buf, 100 * fraction_done); - if (hours_left < (30 * 24)) { - (void) printf(gettext(", %lluh%um to go\n"), - (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); - } else { - (void) printf(gettext( - ", (copy is slow, no estimated time)\n")); - } - } - - if (prs->prs_mapping_memory > 0) { - char mem_buf[7]; - zfs_nicenum(prs->prs_mapping_memory, mem_buf, sizeof (mem_buf)); - (void) printf(gettext(" %s memory used for " - "removed device mappings\n"), - mem_buf); - } -} - -static void -print_checkpoint_status(pool_checkpoint_stat_t *pcs) -{ - time_t start; - char space_buf[7]; - - if (pcs == NULL || pcs->pcs_state == CS_NONE) - return; - - (void) printf(gettext("checkpoint: ")); - - start = pcs->pcs_start_time; - zfs_nicenum(pcs->pcs_space, space_buf, sizeof (space_buf)); - - if (pcs->pcs_state == CS_CHECKPOINT_EXISTS) { - char *date = ctime(&start); - - /* - * ctime() adds a newline at the end of the generated - * string, thus the weird format specifier and the - * strlen() call used to chop it off from the output. - */ - (void) printf(gettext("created %.*s, consumes %s\n"), - strlen(date) - 1, date, space_buf); - return; - } - - assert(pcs->pcs_state == CS_CHECKPOINT_DISCARDING); - - (void) printf(gettext("discarding, %s remaining.\n"), - space_buf); -} - -static void -print_error_log(zpool_handle_t *zhp) -{ - nvlist_t *nverrlist = NULL; - nvpair_t *elem; - char *pathname; - size_t len = MAXPATHLEN * 2; - - if (zpool_get_errlog(zhp, &nverrlist) != 0) { - (void) printf("errors: List of errors unavailable " - "(insufficient privileges)\n"); - return; - } - - (void) printf("errors: Permanent errors have been " - "detected in the following files:\n\n"); - - pathname = safe_malloc(len); - elem = NULL; - while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) { - nvlist_t *nv; - uint64_t dsobj, obj; - - verify(nvpair_value_nvlist(elem, &nv) == 0); - verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET, - &dsobj) == 0); - verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT, - &obj) == 0); - zpool_obj_to_path(zhp, dsobj, obj, pathname, len); - (void) printf("%7s %s\n", "", pathname); - } - free(pathname); - nvlist_free(nverrlist); -} - -static void -print_spares(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **spares, - uint_t nspares) -{ - uint_t i; - char *name; - - if (nspares == 0) - return; - - (void) printf(gettext("\tspares\n")); - - for (i = 0; i < nspares; i++) { - name = zpool_vdev_name(g_zfs, zhp, spares[i], - cb->cb_name_flags); - print_status_config(zhp, cb, name, spares[i], 2, B_TRUE); - free(name); - } -} - -static void -print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache, - uint_t nl2cache) -{ - uint_t i; - char *name; - - if (nl2cache == 0) - return; - - (void) printf(gettext("\tcache\n")); - - for (i = 0; i < nl2cache; i++) { - name = zpool_vdev_name(g_zfs, zhp, l2cache[i], - cb->cb_name_flags); - print_status_config(zhp, cb, name, l2cache[i], 2, B_FALSE); - free(name); - } -} - -static void -print_dedup_stats(nvlist_t *config) -{ - ddt_histogram_t *ddh; - ddt_stat_t *dds; - ddt_object_t *ddo; - uint_t c; - - /* - * If the pool was faulted then we may not have been able to - * obtain the config. Otherwise, if we have anything in the dedup - * table continue processing the stats. - */ - if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, - (uint64_t **)&ddo, &c) != 0) - return; - - (void) printf("\n"); - (void) printf(gettext(" dedup: ")); - if (ddo->ddo_count == 0) { - (void) printf(gettext("no DDT entries\n")); - return; - } - - (void) printf("DDT entries %llu, size %llu on disk, %llu in core\n", - (u_longlong_t)ddo->ddo_count, - (u_longlong_t)ddo->ddo_dspace, - (u_longlong_t)ddo->ddo_mspace); - - verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, - (uint64_t **)&dds, &c) == 0); - verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, - (uint64_t **)&ddh, &c) == 0); - zpool_dump_ddt(dds, ddh); -} - -/* - * Display a summary of pool status. Displays a summary such as: - * - * pool: tank - * status: DEGRADED - * reason: One or more devices ... - * see: http://illumos.org/msg/ZFS-xxxx-01 - * config: - * mirror DEGRADED - * c1t0d0 OK - * c2t0d0 UNAVAIL - * - * When given the '-v' option, we print out the complete config. If the '-e' - * option is specified, then we print out error rate information as well. - */ -int -status_callback(zpool_handle_t *zhp, void *data) -{ - status_cbdata_t *cbp = data; - nvlist_t *config, *nvroot; - char *msgid; - int reason; - const char *health; - uint_t c; - vdev_stat_t *vs; - - config = zpool_get_config(zhp, NULL); - reason = zpool_get_status(zhp, &msgid); - - cbp->cb_count++; - - /* - * If we were given 'zpool status -x', only report those pools with - * problems. - */ - if (cbp->cb_explain && - (reason == ZPOOL_STATUS_OK || - reason == ZPOOL_STATUS_VERSION_OLDER || - reason == ZPOOL_STATUS_NON_NATIVE_ASHIFT || - reason == ZPOOL_STATUS_FEAT_DISABLED)) { - if (!cbp->cb_allpools) { - (void) printf(gettext("pool '%s' is healthy\n"), - zpool_get_name(zhp)); - if (cbp->cb_first) - cbp->cb_first = B_FALSE; - } - return (0); - } - - if (cbp->cb_first) - cbp->cb_first = B_FALSE; - else - (void) printf("\n"); - - nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) == 0); - health = zpool_state_to_name(vs->vs_state, vs->vs_aux); - - (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp)); - (void) printf(gettext(" state: %s\n"), health); - - switch (reason) { - case ZPOOL_STATUS_MISSING_DEV_R: - (void) printf(gettext("status: One or more devices could not " - "be opened. Sufficient replicas exist for\n\tthe pool to " - "continue functioning in a degraded state.\n")); - (void) printf(gettext("action: Attach the missing device and " - "online it using 'zpool online'.\n")); - break; - - case ZPOOL_STATUS_MISSING_DEV_NR: - (void) printf(gettext("status: One or more devices could not " - "be opened. There are insufficient\n\treplicas for the " - "pool to continue functioning.\n")); - (void) printf(gettext("action: Attach the missing device and " - "online it using 'zpool online'.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_LABEL_R: - (void) printf(gettext("status: One or more devices could not " - "be used because the label is missing or\n\tinvalid. " - "Sufficient replicas exist for the pool to continue\n\t" - "functioning in a degraded state.\n")); - (void) printf(gettext("action: Replace the device using " - "'zpool replace'.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_LABEL_NR: - (void) printf(gettext("status: One or more devices could not " - "be used because the label is missing \n\tor invalid. " - "There are insufficient replicas for the pool to " - "continue\n\tfunctioning.\n")); - zpool_explain_recover(zpool_get_handle(zhp), - zpool_get_name(zhp), reason, config); - break; - - case ZPOOL_STATUS_FAILING_DEV: - (void) printf(gettext("status: One or more devices has " - "experienced an unrecoverable error. An\n\tattempt was " - "made to correct the error. Applications are " - "unaffected.\n")); - (void) printf(gettext("action: Determine if the device needs " - "to be replaced, and clear the errors\n\tusing " - "'zpool clear' or replace the device with 'zpool " - "replace'.\n")); - break; - - case ZPOOL_STATUS_OFFLINE_DEV: - (void) printf(gettext("status: One or more devices has " - "been taken offline by the administrator.\n\tSufficient " - "replicas exist for the pool to continue functioning in " - "a\n\tdegraded state.\n")); - (void) printf(gettext("action: Online the device using " - "'zpool online' or replace the device with\n\t'zpool " - "replace'.\n")); - break; - - case ZPOOL_STATUS_REMOVED_DEV: - (void) printf(gettext("status: One or more devices has " - "been removed by the administrator.\n\tSufficient " - "replicas exist for the pool to continue functioning in " - "a\n\tdegraded state.\n")); - (void) printf(gettext("action: Online the device using " - "'zpool online' or replace the device with\n\t'zpool " - "replace'.\n")); - break; - - case ZPOOL_STATUS_RESILVERING: - (void) printf(gettext("status: One or more devices is " - "currently being resilvered. The pool will\n\tcontinue " - "to function, possibly in a degraded state.\n")); - (void) printf(gettext("action: Wait for the resilver to " - "complete.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_DATA: - (void) printf(gettext("status: One or more devices has " - "experienced an error resulting in data\n\tcorruption. " - "Applications may be affected.\n")); - (void) printf(gettext("action: Restore the file in question " - "if possible. Otherwise restore the\n\tentire pool from " - "backup.\n")); - break; - - case ZPOOL_STATUS_CORRUPT_POOL: - (void) printf(gettext("status: The pool metadata is corrupted " - "and the pool cannot be opened.\n")); - zpool_explain_recover(zpool_get_handle(zhp), - zpool_get_name(zhp), reason, config); - break; - - case ZPOOL_STATUS_VERSION_OLDER: - (void) printf(gettext("status: The pool is formatted using a " - "legacy on-disk format. The pool can\n\tstill be used, " - "but some features are unavailable.\n")); - (void) printf(gettext("action: Upgrade the pool using 'zpool " - "upgrade'. Once this is done, the\n\tpool will no longer " - "be accessible on software that does not support feature\n" - "\tflags.\n")); - break; - - case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext("status: The pool has been upgraded to a " - "newer, incompatible on-disk version.\n\tThe pool cannot " - "be accessed on this system.\n")); - (void) printf(gettext("action: Access the pool from a system " - "running more recent software, or\n\trestore the pool from " - "backup.\n")); - break; - - case ZPOOL_STATUS_FEAT_DISABLED: - (void) printf(gettext("status: Some supported features are not " - "enabled on the pool. The pool can\n\tstill be used, but " - "some features are unavailable.\n")); - (void) printf(gettext("action: Enable all features using " - "'zpool upgrade'. Once this is done,\n\tthe pool may no " - "longer be accessible by software that does not support\n\t" - "the features. See zpool-features(7) for details.\n")); - break; - - case ZPOOL_STATUS_UNSUP_FEAT_READ: - (void) printf(gettext("status: The pool cannot be accessed on " - "this system because it uses the\n\tfollowing feature(s) " - "not supported on this system:\n")); - zpool_print_unsup_feat(config); - (void) printf("\n"); - (void) printf(gettext("action: Access the pool from a system " - "that supports the required feature(s),\n\tor restore the " - "pool from backup.\n")); - break; - - case ZPOOL_STATUS_UNSUP_FEAT_WRITE: - (void) printf(gettext("status: The pool can only be accessed " - "in read-only mode on this system. It\n\tcannot be " - "accessed in read-write mode because it uses the " - "following\n\tfeature(s) not supported on this system:\n")); - zpool_print_unsup_feat(config); - (void) printf("\n"); - (void) printf(gettext("action: The pool cannot be accessed in " - "read-write mode. Import the pool with\n" - "\t\"-o readonly=on\", access the pool from a system that " - "supports the\n\trequired feature(s), or restore the " - "pool from backup.\n")); - break; - - case ZPOOL_STATUS_FAULTED_DEV_R: - (void) printf(gettext("status: One or more devices are " - "faulted in response to persistent errors.\n\tSufficient " - "replicas exist for the pool to continue functioning " - "in a\n\tdegraded state.\n")); - (void) printf(gettext("action: Replace the faulted device, " - "or use 'zpool clear' to mark the device\n\trepaired.\n")); - break; - - case ZPOOL_STATUS_FAULTED_DEV_NR: - (void) printf(gettext("status: One or more devices are " - "faulted in response to persistent errors. There are " - "insufficient replicas for the pool to\n\tcontinue " - "functioning.\n")); - (void) printf(gettext("action: Destroy and re-create the pool " - "from a backup source. Manually marking the device\n" - "\trepaired using 'zpool clear' may allow some data " - "to be recovered.\n")); - break; - - case ZPOOL_STATUS_IO_FAILURE_MMP: - (void) printf(gettext("status: The pool is suspended because " - "multihost writes failed or were delayed;\n\tanother " - "system could import the pool undetected.\n")); - (void) printf(gettext("action: Make sure the pool's devices " - "are connected, then reboot your system and\n\timport the " - "pool.\n")); - break; - - case ZPOOL_STATUS_IO_FAILURE_WAIT: - case ZPOOL_STATUS_IO_FAILURE_CONTINUE: - (void) printf(gettext("status: One or more devices are " - "faulted in response to IO failures.\n")); - (void) printf(gettext("action: Make sure the affected devices " - "are connected, then run 'zpool clear'.\n")); - break; - - case ZPOOL_STATUS_BAD_LOG: - (void) printf(gettext("status: An intent log record " - "could not be read.\n" - "\tWaiting for adminstrator intervention to fix the " - "faulted pool.\n")); - (void) printf(gettext("action: Either restore the affected " - "device(s) and run 'zpool online',\n" - "\tor ignore the intent log records by running " - "'zpool clear'.\n")); - break; - - case ZPOOL_STATUS_NON_NATIVE_ASHIFT: - (void) printf(gettext("status: One or more devices are " - "configured to use a non-native block size.\n" - "\tExpect reduced performance.\n")); - (void) printf(gettext("action: Replace affected devices with " - "devices that support the\n\tconfigured block size, or " - "migrate data to a properly configured\n\tpool.\n")); - break; - - default: - /* - * The remaining errors can't actually be generated, yet. - */ - assert(reason == ZPOOL_STATUS_OK); - } - - if (msgid != NULL) - (void) printf(gettext(" see: http://illumos.org/msg/%s\n"), - msgid); - - if (config != NULL) { - uint64_t nerr; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - pool_checkpoint_stat_t *pcs = NULL; - pool_scan_stat_t *ps = NULL; - pool_removal_stat_t *prs = NULL; - - (void) nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); - (void) nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c); - (void) nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c); - - print_scan_status(ps); - print_checkpoint_scan_warning(ps, pcs); - print_removal_status(zhp, prs); - print_checkpoint_status(pcs); - - cbp->cb_namewidth = max_width(zhp, nvroot, 0, 0, - cbp->cb_name_flags); - if (cbp->cb_namewidth < 10) - cbp->cb_namewidth = 10; - - (void) printf(gettext("config:\n\n")); - (void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), - cbp->cb_namewidth, "NAME", "STATE", "READ", "WRITE", - "CKSUM"); - - print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0, - B_FALSE); - - print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP); - print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL); - print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_CLASS_LOGS); - - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) - print_l2cache(zhp, cbp, l2cache, nl2cache); - - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) - print_spares(zhp, cbp, spares, nspares); - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, - &nerr) == 0) { - nvlist_t *nverrlist = NULL; - - /* - * If the approximate error count is small, get a - * precise count by fetching the entire log and - * uniquifying the results. - */ - if (nerr > 0 && nerr < 100 && !cbp->cb_verbose && - zpool_get_errlog(zhp, &nverrlist) == 0) { - nvpair_t *elem; - - elem = NULL; - nerr = 0; - while ((elem = nvlist_next_nvpair(nverrlist, - elem)) != NULL) { - nerr++; - } - } - nvlist_free(nverrlist); - - (void) printf("\n"); - - if (nerr == 0) - (void) printf(gettext("errors: No known data " - "errors\n")); - else if (!cbp->cb_verbose) - (void) printf(gettext("errors: %llu data " - "errors, use '-v' for a list\n"), - (u_longlong_t)nerr); - else - print_error_log(zhp); - } - - if (cbp->cb_dedup_stats) - print_dedup_stats(config); - } else { - (void) printf(gettext("config: The configuration cannot be " - "determined.\n")); - } - - return (0); -} - -/* - * zpool status [-gLPvx] [-T d|u] [pool] ... [interval [count]] - * - * -g Display guid for individual vdev name. - * -L Follow links when resolving vdev path name. - * -P Display full path for vdev name. - * -v Display complete error logs - * -x Display only pools with potential problems - * -D Display dedup status (undocumented) - * -T Display a timestamp in date(1) or Unix format - * - * Describes the health status of all pools or some subset. - */ -int -zpool_do_status(int argc, char **argv) -{ - int c; - int ret; - unsigned long interval = 0, count = 0; - status_cbdata_t cb = { 0 }; - - /* check options */ - while ((c = getopt(argc, argv, "gLPvxDT:")) != -1) { - switch (c) { - case 'g': - cb.cb_name_flags |= VDEV_NAME_GUID; - break; - case 'L': - cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; - break; - case 'P': - cb.cb_name_flags |= VDEV_NAME_PATH; - break; - case 'v': - cb.cb_verbose = B_TRUE; - break; - case 'x': - cb.cb_explain = B_TRUE; - break; - case 'D': - cb.cb_dedup_stats = B_TRUE; - break; - case 'T': - get_timestamp_arg(*optarg); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - get_interval_count(&argc, argv, &interval, &count); - - if (argc == 0) - cb.cb_allpools = B_TRUE; - - cb.cb_first = B_TRUE; - cb.cb_print_status = B_TRUE; - - for (;;) { - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); - - ret = for_each_pool(argc, argv, B_TRUE, NULL, - status_callback, &cb); - - if (argc == 0 && cb.cb_count == 0) - (void) printf(gettext("no pools available\n")); - else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) - (void) printf(gettext("all pools are healthy\n")); - - if (ret != 0) - return (ret); - - if (interval == 0) - break; - - if (count != 0 && --count == 0) - break; - - (void) sleep(interval); - } - - return (0); -} - -typedef struct upgrade_cbdata { - boolean_t cb_first; - boolean_t cb_unavail; - char cb_poolname[ZFS_MAX_DATASET_NAME_LEN]; - int cb_argc; - uint64_t cb_version; - char **cb_argv; -} upgrade_cbdata_t; - -#ifdef __FreeBSD__ -static int -is_root_pool(zpool_handle_t *zhp) -{ - static struct statfs sfs; - static char *poolname = NULL; - static boolean_t stated = B_FALSE; - char *slash; - - if (!stated) { - stated = B_TRUE; - if (statfs("/", &sfs) == -1) { - (void) fprintf(stderr, - "Unable to stat root file system: %s.\n", - strerror(errno)); - return (0); - } - if (strcmp(sfs.f_fstypename, "zfs") != 0) - return (0); - poolname = sfs.f_mntfromname; - if ((slash = strchr(poolname, '/')) != NULL) - *slash = '\0'; - } - return (poolname != NULL && strcmp(poolname, zpool_get_name(zhp)) == 0); -} - -static void -root_pool_upgrade_check(zpool_handle_t *zhp, char *poolname, int size) -{ - - if (poolname[0] == '\0' && is_root_pool(zhp)) - (void) strlcpy(poolname, zpool_get_name(zhp), size); -} -#endif /* FreeBSD */ - -static int -upgrade_version(zpool_handle_t *zhp, uint64_t version) -{ - int ret; - nvlist_t *config; - uint64_t oldversion; - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &oldversion) == 0); - - assert(SPA_VERSION_IS_SUPPORTED(oldversion)); - assert(oldversion < version); - - ret = zpool_upgrade(zhp, version); - if (ret != 0) - return (ret); - - if (version >= SPA_VERSION_FEATURES) { - (void) printf(gettext("Successfully upgraded " - "'%s' from version %llu to feature flags.\n"), - zpool_get_name(zhp), oldversion); - } else { - (void) printf(gettext("Successfully upgraded " - "'%s' from version %llu to version %llu.\n"), - zpool_get_name(zhp), oldversion, version); - } - - return (0); -} - -static int -upgrade_enable_all(zpool_handle_t *zhp, int *countp) -{ - int i, ret, count; - boolean_t firstff = B_TRUE; - nvlist_t *enabled = zpool_get_features(zhp); - - count = 0; - for (i = 0; i < SPA_FEATURES; i++) { - const char *fname = spa_feature_table[i].fi_uname; - const char *fguid = spa_feature_table[i].fi_guid; - if (!nvlist_exists(enabled, fguid)) { - char *propname; - verify(-1 != asprintf(&propname, "feature@%s", fname)); - ret = zpool_set_prop(zhp, propname, - ZFS_FEATURE_ENABLED); - if (ret != 0) { - free(propname); - return (ret); - } - count++; - - if (firstff) { - (void) printf(gettext("Enabled the " - "following features on '%s':\n"), - zpool_get_name(zhp)); - firstff = B_FALSE; - } - (void) printf(gettext(" %s\n"), fname); - free(propname); - } - } - - if (countp != NULL) - *countp = count; - return (0); -} - -static int -upgrade_cb(zpool_handle_t *zhp, void *arg) -{ - upgrade_cbdata_t *cbp = arg; - nvlist_t *config; - uint64_t version; - boolean_t printnl = B_FALSE; - int ret; - - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - (void) fprintf(stderr, gettext("cannot upgrade '%s': pool is " - "currently unavailable.\n\n"), zpool_get_name(zhp)); - cbp->cb_unavail = B_TRUE; - /* Allow iteration to continue. */ - return (0); - } - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &version) == 0); - - assert(SPA_VERSION_IS_SUPPORTED(version)); - - if (version < cbp->cb_version) { - cbp->cb_first = B_FALSE; - ret = upgrade_version(zhp, cbp->cb_version); - if (ret != 0) - return (ret); -#ifdef __FreeBSD__ - root_pool_upgrade_check(zhp, cbp->cb_poolname, - sizeof(cbp->cb_poolname)); -#endif /* __FreeBSD__ */ - printnl = B_TRUE; - -#ifdef illumos - /* - * If they did "zpool upgrade -a", then we could - * be doing ioctls to different pools. We need - * to log this history once to each pool, and bypass - * the normal history logging that happens in main(). - */ - (void) zpool_log_history(g_zfs, history_str); - log_history = B_FALSE; -#endif - } - - if (cbp->cb_version >= SPA_VERSION_FEATURES) { - int count; - ret = upgrade_enable_all(zhp, &count); - if (ret != 0) - return (ret); - - if (count > 0) { - cbp->cb_first = B_FALSE; - printnl = B_TRUE; -#ifdef __FreeBSD__ - root_pool_upgrade_check(zhp, cbp->cb_poolname, - sizeof(cbp->cb_poolname)); -#endif /* __FreeBSD__ */ - /* - * If they did "zpool upgrade -a", then we could - * be doing ioctls to different pools. We need - * to log this history once to each pool, and bypass - * the normal history logging that happens in main(). - */ - (void) zpool_log_history(g_zfs, history_str); - log_history = B_FALSE; - } - } - - if (printnl) { - (void) printf(gettext("\n")); - } - - return (0); -} - -static int -upgrade_list_unavail(zpool_handle_t *zhp, void *arg) -{ - upgrade_cbdata_t *cbp = arg; - - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - if (cbp->cb_first) { - (void) fprintf(stderr, gettext("The following pools " - "are unavailable and cannot be upgraded as this " - "time.\n\n")); - (void) fprintf(stderr, gettext("POOL\n")); - (void) fprintf(stderr, gettext("------------\n")); - cbp->cb_first = B_FALSE; - } - (void) printf(gettext("%s\n"), zpool_get_name(zhp)); - cbp->cb_unavail = B_TRUE; - } - return (0); -} - -static int -upgrade_list_older_cb(zpool_handle_t *zhp, void *arg) -{ - upgrade_cbdata_t *cbp = arg; - nvlist_t *config; - uint64_t version; - - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - /* - * This will have been reported by upgrade_list_unavail so - * just allow iteration to continue. - */ - cbp->cb_unavail = B_TRUE; - return (0); - } - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &version) == 0); - - assert(SPA_VERSION_IS_SUPPORTED(version)); - - if (version < SPA_VERSION_FEATURES) { - if (cbp->cb_first) { - (void) printf(gettext("The following pools are " - "formatted with legacy version numbers and can\n" - "be upgraded to use feature flags. After " - "being upgraded, these pools\nwill no " - "longer be accessible by software that does not " - "support feature\nflags.\n\n")); - (void) printf(gettext("VER POOL\n")); - (void) printf(gettext("--- ------------\n")); - cbp->cb_first = B_FALSE; - } - - (void) printf("%2llu %s\n", (u_longlong_t)version, - zpool_get_name(zhp)); - } - - return (0); -} - -static int -upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) -{ - upgrade_cbdata_t *cbp = arg; - nvlist_t *config; - uint64_t version; - - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - /* - * This will have been reported by upgrade_list_unavail so - * just allow iteration to continue. - */ - cbp->cb_unavail = B_TRUE; - return (0); - } - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &version) == 0); - - if (version >= SPA_VERSION_FEATURES) { - int i; - boolean_t poolfirst = B_TRUE; - nvlist_t *enabled = zpool_get_features(zhp); - - for (i = 0; i < SPA_FEATURES; i++) { - const char *fguid = spa_feature_table[i].fi_guid; - const char *fname = spa_feature_table[i].fi_uname; - if (!nvlist_exists(enabled, fguid)) { - if (cbp->cb_first) { - (void) printf(gettext("\nSome " - "supported features are not " - "enabled on the following pools. " - "Once a\nfeature is enabled the " - "pool may become incompatible with " - "software\nthat does not support " - "the feature. See " - "zpool-features(7) for " - "details.\n\n")); - (void) printf(gettext("POOL " - "FEATURE\n")); - (void) printf(gettext("------" - "---------\n")); - cbp->cb_first = B_FALSE; - } - - if (poolfirst) { - (void) printf(gettext("%s\n"), - zpool_get_name(zhp)); - poolfirst = B_FALSE; - } - - (void) printf(gettext(" %s\n"), fname); - } - } - } - - return (0); -} - -/* ARGSUSED */ -static int -upgrade_one(zpool_handle_t *zhp, void *data) -{ - boolean_t printnl = B_FALSE; - upgrade_cbdata_t *cbp = data; - uint64_t cur_version; - int ret; - - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - (void) fprintf(stderr, gettext("cannot upgrade '%s': pool is " - "is currently unavailable.\n\n"), zpool_get_name(zhp)); - cbp->cb_unavail = B_TRUE; - return (1); - } - - if (strcmp("log", zpool_get_name(zhp)) == 0) { - (void) printf(gettext("'log' is now a reserved word\n" - "Pool 'log' must be renamed using export and import" - " to upgrade.\n\n")); - return (1); - } - - cur_version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); - if (cur_version > cbp->cb_version) { - (void) printf(gettext("Pool '%s' is already formatted " - "using more current version '%llu'.\n\n"), - zpool_get_name(zhp), cur_version); - return (0); - } - - if (cbp->cb_version != SPA_VERSION && cur_version == cbp->cb_version) { - (void) printf(gettext("Pool '%s' is already formatted " - "using version %llu.\n\n"), zpool_get_name(zhp), - cbp->cb_version); - return (0); - } - - if (cur_version != cbp->cb_version) { - printnl = B_TRUE; - ret = upgrade_version(zhp, cbp->cb_version); - if (ret != 0) - return (ret); -#ifdef __FreeBSD__ - root_pool_upgrade_check(zhp, cbp->cb_poolname, - sizeof(cbp->cb_poolname)); -#endif /* __FreeBSD__ */ - } - - if (cbp->cb_version >= SPA_VERSION_FEATURES) { - int count = 0; - ret = upgrade_enable_all(zhp, &count); - if (ret != 0) - return (ret); - - if (count != 0) { - printnl = B_TRUE; -#ifdef __FreeBSD__ - root_pool_upgrade_check(zhp, cbp->cb_poolname, - sizeof(cbp->cb_poolname)); -#endif /* __FreeBSD __*/ - } else if (cur_version == SPA_VERSION) { - (void) printf(gettext("Pool '%s' already has all " - "supported features enabled.\n\n"), - zpool_get_name(zhp)); - } - } - - if (printnl) { - (void) printf(gettext("\n")); - } - - return (0); -} - -/* - * zpool upgrade - * zpool upgrade -v - * zpool upgrade [-V version] <-a | pool ...> - * - * With no arguments, display downrev'd ZFS pool available for upgrade. - * Individual pools can be upgraded by specifying the pool, and '-a' will - * upgrade all pools. - */ -int -zpool_do_upgrade(int argc, char **argv) -{ - int c; - upgrade_cbdata_t cb = { 0 }; - int ret = 0; - boolean_t showversions = B_FALSE; - boolean_t upgradeall = B_FALSE; - char *end; - - - /* check options */ - while ((c = getopt(argc, argv, ":avV:")) != -1) { - switch (c) { - case 'a': - upgradeall = B_TRUE; - break; - case 'v': - showversions = B_TRUE; - break; - case 'V': - cb.cb_version = strtoll(optarg, &end, 10); - if (*end != '\0' || - !SPA_VERSION_IS_SUPPORTED(cb.cb_version)) { - (void) fprintf(stderr, - gettext("invalid version '%s'\n"), optarg); - usage(B_FALSE); - } - break; - case ':': - (void) fprintf(stderr, gettext("missing argument for " - "'%c' option\n"), optopt); - usage(B_FALSE); - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - cb.cb_argc = argc; - cb.cb_argv = argv; - argc -= optind; - argv += optind; - - if (cb.cb_version == 0) { - cb.cb_version = SPA_VERSION; - } else if (!upgradeall && argc == 0) { - (void) fprintf(stderr, gettext("-V option is " - "incompatible with other arguments\n")); - usage(B_FALSE); - } - - if (showversions) { - if (upgradeall || argc != 0) { - (void) fprintf(stderr, gettext("-v option is " - "incompatible with other arguments\n")); - usage(B_FALSE); - } - } else if (upgradeall) { - if (argc != 0) { - (void) fprintf(stderr, gettext("-a option should not " - "be used along with a pool name\n")); - usage(B_FALSE); - } - } - - (void) printf(gettext("This system supports ZFS pool feature " - "flags.\n\n")); - if (showversions) { - int i; - - (void) printf(gettext("The following features are " - "supported:\n\n")); - (void) printf(gettext("FEAT DESCRIPTION\n")); - (void) printf("----------------------------------------------" - "---------------\n"); - for (i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t *fi = &spa_feature_table[i]; - const char *ro = - (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? - " (read-only compatible)" : ""; - - (void) printf("%-37s%s\n", fi->fi_uname, ro); - (void) printf(" %s\n", fi->fi_desc); - } - (void) printf("\n"); - - (void) printf(gettext("The following legacy versions are also " - "supported:\n\n")); - (void) printf(gettext("VER DESCRIPTION\n")); - (void) printf("--- -----------------------------------------" - "---------------\n"); - (void) printf(gettext(" 1 Initial ZFS version\n")); - (void) printf(gettext(" 2 Ditto blocks " - "(replicated metadata)\n")); - (void) printf(gettext(" 3 Hot spares and double parity " - "RAID-Z\n")); - (void) printf(gettext(" 4 zpool history\n")); - (void) printf(gettext(" 5 Compression using the gzip " - "algorithm\n")); - (void) printf(gettext(" 6 bootfs pool property\n")); - (void) printf(gettext(" 7 Separate intent log devices\n")); - (void) printf(gettext(" 8 Delegated administration\n")); - (void) printf(gettext(" 9 refquota and refreservation " - "properties\n")); - (void) printf(gettext(" 10 Cache devices\n")); - (void) printf(gettext(" 11 Improved scrub performance\n")); - (void) printf(gettext(" 12 Snapshot properties\n")); - (void) printf(gettext(" 13 snapused property\n")); - (void) printf(gettext(" 14 passthrough-x aclinherit\n")); - (void) printf(gettext(" 15 user/group space accounting\n")); - (void) printf(gettext(" 16 stmf property support\n")); - (void) printf(gettext(" 17 Triple-parity RAID-Z\n")); - (void) printf(gettext(" 18 Snapshot user holds\n")); - (void) printf(gettext(" 19 Log device removal\n")); - (void) printf(gettext(" 20 Compression using zle " - "(zero-length encoding)\n")); - (void) printf(gettext(" 21 Deduplication\n")); - (void) printf(gettext(" 22 Received properties\n")); - (void) printf(gettext(" 23 Slim ZIL\n")); - (void) printf(gettext(" 24 System attributes\n")); - (void) printf(gettext(" 25 Improved scrub stats\n")); - (void) printf(gettext(" 26 Improved snapshot deletion " - "performance\n")); - (void) printf(gettext(" 27 Improved snapshot creation " - "performance\n")); - (void) printf(gettext(" 28 Multiple vdev replacements\n")); - (void) printf(gettext("\nFor more information on a particular " - "version, including supported releases,\n")); - (void) printf(gettext("see the ZFS Administration Guide.\n\n")); - } else if (argc == 0 && upgradeall) { - cb.cb_first = B_TRUE; - ret = zpool_iter(g_zfs, upgrade_cb, &cb); - if (ret == 0 && cb.cb_first) { - if (cb.cb_version == SPA_VERSION) { - (void) printf(gettext("All %spools are already " - "formatted using feature flags.\n\n"), - cb.cb_unavail ? gettext("available ") : ""); - (void) printf(gettext("Every %sfeature flags " - "pool already has all supported features " - "enabled.\n"), - cb.cb_unavail ? gettext("available ") : ""); - } else { - (void) printf(gettext("All pools are already " - "formatted with version %llu or higher.\n"), - cb.cb_version); - } - } - } else if (argc == 0) { - cb.cb_first = B_TRUE; - ret = zpool_iter(g_zfs, upgrade_list_unavail, &cb); - assert(ret == 0); - - if (!cb.cb_first) { - (void) fprintf(stderr, "\n"); - } - - cb.cb_first = B_TRUE; - ret = zpool_iter(g_zfs, upgrade_list_older_cb, &cb); - assert(ret == 0); - - if (cb.cb_first) { - (void) printf(gettext("All %spools are formatted using " - "feature flags.\n\n"), cb.cb_unavail ? - gettext("available ") : ""); - } else { - (void) printf(gettext("\nUse 'zpool upgrade -v' " - "for a list of available legacy versions.\n")); - } - - cb.cb_first = B_TRUE; - ret = zpool_iter(g_zfs, upgrade_list_disabled_cb, &cb); - assert(ret == 0); - - if (cb.cb_first) { - (void) printf(gettext("Every %sfeature flags pool has " - "all supported features enabled.\n"), - cb.cb_unavail ? gettext("available ") : ""); - } else { - (void) printf(gettext("\n")); - } - } else { - ret = for_each_pool(argc, argv, B_TRUE, NULL, - upgrade_one, &cb); - } - - if (cb.cb_poolname[0] != '\0') { - (void) printf( - "If you boot from pool '%s', don't forget to update boot code.\n" - "Assuming you use GPT partitioning and da0 is your boot disk\n" - "the following command will do it:\n" - "\n" - "\tgpart bootcode -b /boot/pmbr -p /boot/gptzfsboot -i 1 da0\n\n", - cb.cb_poolname); - } - - return (ret); -} - -typedef struct hist_cbdata { - boolean_t first; - boolean_t longfmt; - boolean_t internal; -} hist_cbdata_t; - -static void -print_history_records(nvlist_t *nvhis, hist_cbdata_t *cb) -{ - nvlist_t **records; - uint_t numrecords; - int i; - - verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, - &records, &numrecords) == 0); - for (i = 0; i < numrecords; i++) { - nvlist_t *rec = records[i]; - char tbuf[30] = ""; - - if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { - time_t tsec; - struct tm t; - - tsec = fnvlist_lookup_uint64(records[i], - ZPOOL_HIST_TIME); - (void) localtime_r(&tsec, &t); - (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); - } - - if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { - (void) printf("%s %s", tbuf, - fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); - } else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { - int ievent = - fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); - if (!cb->internal) - continue; - if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { - (void) printf("%s unrecognized record:\n", - tbuf); - dump_nvlist(rec, 4); - continue; - } - (void) printf("%s [internal %s txg:%lld] %s", tbuf, - zfs_history_event_names[ievent], - fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), - fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); - } else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { - if (!cb->internal) - continue; - (void) printf("%s [txg:%lld] %s", tbuf, - fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), - fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); - if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { - (void) printf(" %s (%llu)", - fnvlist_lookup_string(rec, - ZPOOL_HIST_DSNAME), - fnvlist_lookup_uint64(rec, - ZPOOL_HIST_DSID)); - } - (void) printf(" %s", fnvlist_lookup_string(rec, - ZPOOL_HIST_INT_STR)); - } else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { - if (!cb->internal) - continue; - (void) printf("%s ioctl %s\n", tbuf, - fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); - if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { - (void) printf(" input:\n"); - dump_nvlist(fnvlist_lookup_nvlist(rec, - ZPOOL_HIST_INPUT_NVL), 8); - } - if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { - (void) printf(" output:\n"); - dump_nvlist(fnvlist_lookup_nvlist(rec, - ZPOOL_HIST_OUTPUT_NVL), 8); - } - if (nvlist_exists(rec, ZPOOL_HIST_ERRNO)) { - (void) printf(" errno: %lld\n", - fnvlist_lookup_int64(rec, - ZPOOL_HIST_ERRNO)); - } - } else { - if (!cb->internal) - continue; - (void) printf("%s unrecognized record:\n", tbuf); - dump_nvlist(rec, 4); - } - - if (!cb->longfmt) { - (void) printf("\n"); - continue; - } - (void) printf(" ["); - if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { - uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); - struct passwd *pwd = getpwuid(who); - (void) printf("user %d ", (int)who); - if (pwd != NULL) - (void) printf("(%s) ", pwd->pw_name); - } - if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { - (void) printf("on %s", - fnvlist_lookup_string(rec, ZPOOL_HIST_HOST)); - } - if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { - (void) printf(":%s", - fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE)); - } - (void) printf("]"); - (void) printf("\n"); - } -} - -/* - * Print out the command history for a specific pool. - */ -static int -get_history_one(zpool_handle_t *zhp, void *data) -{ - nvlist_t *nvhis; - int ret; - hist_cbdata_t *cb = (hist_cbdata_t *)data; - uint64_t off = 0; - boolean_t eof = B_FALSE; - - cb->first = B_FALSE; - - (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp)); - - while (!eof) { - if ((ret = zpool_get_history(zhp, &nvhis, &off, &eof)) != 0) - return (ret); - - print_history_records(nvhis, cb); - nvlist_free(nvhis); - } - (void) printf("\n"); - - return (ret); -} - -/* - * zpool history <pool> - * - * Displays the history of commands that modified pools. - */ -int -zpool_do_history(int argc, char **argv) -{ - hist_cbdata_t cbdata = { 0 }; - int ret; - int c; - - cbdata.first = B_TRUE; - /* check options */ - while ((c = getopt(argc, argv, "li")) != -1) { - switch (c) { - case 'l': - cbdata.longfmt = B_TRUE; - break; - case 'i': - cbdata.internal = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - argc -= optind; - argv += optind; - - ret = for_each_pool(argc, argv, B_FALSE, NULL, get_history_one, - &cbdata); - - if (argc == 0 && cbdata.first == B_TRUE) { - (void) printf(gettext("no pools available\n")); - return (0); - } - - return (ret); -} - -static int -get_callback(zpool_handle_t *zhp, void *data) -{ - zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data; - char value[MAXNAMELEN]; - zprop_source_t srctype; - zprop_list_t *pl; - - for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { - - /* - * Skip the special fake placeholder. This will also skip - * over the name property when 'all' is specified. - */ - if (pl->pl_prop == ZPOOL_PROP_NAME && - pl == cbp->cb_proplist) - continue; - - if (pl->pl_prop == ZPROP_INVAL && - (zpool_prop_feature(pl->pl_user_prop) || - zpool_prop_unsupported(pl->pl_user_prop))) { - srctype = ZPROP_SRC_LOCAL; - - if (zpool_prop_get_feature(zhp, pl->pl_user_prop, - value, sizeof (value)) == 0) { - zprop_print_one_property(zpool_get_name(zhp), - cbp, pl->pl_user_prop, value, srctype, - NULL, NULL); - } - } else { - if (zpool_get_prop(zhp, pl->pl_prop, value, - sizeof (value), &srctype, cbp->cb_literal) != 0) - continue; - - zprop_print_one_property(zpool_get_name(zhp), cbp, - zpool_prop_to_name(pl->pl_prop), value, srctype, - NULL, NULL); - } - } - return (0); -} - -/* - * zpool get [-Hp] [-o "all" | field[,...]] <"all" | property[,...]> <pool> ... - * - * -H Scripted mode. Don't display headers, and separate properties - * by a single tab. - * -o List of columns to display. Defaults to - * "name,property,value,source". - * -p Diplay values in parsable (exact) format. - * - * Get properties of pools in the system. Output space statistics - * for each one as well as other attributes. - */ -int -zpool_do_get(int argc, char **argv) -{ - zprop_get_cbdata_t cb = { 0 }; - zprop_list_t fake_name = { 0 }; - int ret; - int c, i; - char *value; - - cb.cb_first = B_TRUE; - - /* - * Set up default columns and sources. - */ - cb.cb_sources = ZPROP_SRC_ALL; - cb.cb_columns[0] = GET_COL_NAME; - cb.cb_columns[1] = GET_COL_PROPERTY; - cb.cb_columns[2] = GET_COL_VALUE; - cb.cb_columns[3] = GET_COL_SOURCE; - cb.cb_type = ZFS_TYPE_POOL; - - /* check options */ - while ((c = getopt(argc, argv, ":Hpo:")) != -1) { - switch (c) { - case 'p': - cb.cb_literal = B_TRUE; - break; - case 'H': - cb.cb_scripted = B_TRUE; - break; - case 'o': - bzero(&cb.cb_columns, sizeof (cb.cb_columns)); - i = 0; - while (*optarg != '\0') { - static char *col_subopts[] = - { "name", "property", "value", "source", - "all", NULL }; - - if (i == ZFS_GET_NCOLS) { - (void) fprintf(stderr, gettext("too " - "many fields given to -o " - "option\n")); - usage(B_FALSE); - } - - switch (getsubopt(&optarg, col_subopts, - &value)) { - case 0: - cb.cb_columns[i++] = GET_COL_NAME; - break; - case 1: - cb.cb_columns[i++] = GET_COL_PROPERTY; - break; - case 2: - cb.cb_columns[i++] = GET_COL_VALUE; - break; - case 3: - cb.cb_columns[i++] = GET_COL_SOURCE; - break; - case 4: - if (i > 0) { - (void) fprintf(stderr, - gettext("\"all\" conflicts " - "with specific fields " - "given to -o option\n")); - usage(B_FALSE); - } - cb.cb_columns[0] = GET_COL_NAME; - cb.cb_columns[1] = GET_COL_PROPERTY; - cb.cb_columns[2] = GET_COL_VALUE; - cb.cb_columns[3] = GET_COL_SOURCE; - i = ZFS_GET_NCOLS; - break; - default: - (void) fprintf(stderr, - gettext("invalid column name " - "'%s'\n"), suboptarg); - usage(B_FALSE); - } - } - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; - - if (argc < 1) { - (void) fprintf(stderr, gettext("missing property " - "argument\n")); - usage(B_FALSE); - } - - if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist, - ZFS_TYPE_POOL) != 0) - usage(B_FALSE); - - argc--; - argv++; - - if (cb.cb_proplist != NULL) { - fake_name.pl_prop = ZPOOL_PROP_NAME; - fake_name.pl_width = strlen(gettext("NAME")); - fake_name.pl_next = cb.cb_proplist; - cb.cb_proplist = &fake_name; - } - - ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, - get_callback, &cb); - - if (cb.cb_proplist == &fake_name) - zprop_free_list(fake_name.pl_next); - else - zprop_free_list(cb.cb_proplist); - - return (ret); -} - -typedef struct set_cbdata { - char *cb_propname; - char *cb_value; - boolean_t cb_any_successful; -} set_cbdata_t; - -int -set_callback(zpool_handle_t *zhp, void *data) -{ - int error; - set_cbdata_t *cb = (set_cbdata_t *)data; - - error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value); - - if (!error) - cb->cb_any_successful = B_TRUE; - - return (error); -} - -int -zpool_do_set(int argc, char **argv) -{ - set_cbdata_t cb = { 0 }; - int error; - - if (argc > 1 && argv[1][0] == '-') { - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - argv[1][1]); - usage(B_FALSE); - } - - if (argc < 2) { - (void) fprintf(stderr, gettext("missing property=value " - "argument\n")); - usage(B_FALSE); - } - - if (argc < 3) { - (void) fprintf(stderr, gettext("missing pool name\n")); - usage(B_FALSE); - } - - if (argc > 3) { - (void) fprintf(stderr, gettext("too many pool names\n")); - usage(B_FALSE); - } - - cb.cb_propname = argv[1]; - cb.cb_value = strchr(cb.cb_propname, '='); - if (cb.cb_value == NULL) { - (void) fprintf(stderr, gettext("missing value in " - "property=value argument\n")); - usage(B_FALSE); - } - - *(cb.cb_value) = '\0'; - cb.cb_value++; - - error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL, - set_callback, &cb); - - return (error); -} - -static int -find_command_idx(char *command, int *idx) -{ - int i; - - for (i = 0; i < NCOMMAND; i++) { - if (command_table[i].name == NULL) - continue; - - if (strcmp(command, command_table[i].name) == 0) { - *idx = i; - return (0); - } - } - return (1); -} - -int -main(int argc, char **argv) -{ - int ret = 0; - int i; - char *cmdname; - - (void) setlocale(LC_ALL, ""); - (void) textdomain(TEXT_DOMAIN); - - if ((g_zfs = libzfs_init()) == NULL) { - (void) fprintf(stderr, gettext("internal error: failed to " - "initialize ZFS library\n")); - return (1); - } - - libzfs_print_on_error(g_zfs, B_TRUE); - - opterr = 0; - - /* - * Make sure the user has specified some command. - */ - if (argc < 2) { - (void) fprintf(stderr, gettext("missing command\n")); - usage(B_FALSE); - } - - cmdname = argv[1]; - - /* - * Special case '-?' - */ - if (strcmp(cmdname, "-?") == 0) - usage(B_TRUE); - - zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); - - /* - * Run the appropriate command. - */ - if (find_command_idx(cmdname, &i) == 0) { - current_command = &command_table[i]; - ret = command_table[i].func(argc - 1, argv + 1); - } else if (strchr(cmdname, '=')) { - verify(find_command_idx("set", &i) == 0); - current_command = &command_table[i]; - ret = command_table[i].func(argc, argv); - } else if (strcmp(cmdname, "freeze") == 0 && argc == 3) { - /* - * 'freeze' is a vile debugging abomination, so we treat - * it as such. - */ - zfs_cmd_t zc = { 0 }; - (void) strlcpy(zc.zc_name, argv[2], sizeof (zc.zc_name)); - return (!!zfs_ioctl(g_zfs, ZFS_IOC_POOL_FREEZE, &zc)); - } else { - (void) fprintf(stderr, gettext("unrecognized " - "command '%s'\n"), cmdname); - usage(B_FALSE); - } - - if (ret == 0 && log_history) - (void) zpool_log_history(g_zfs, history_str); - - libzfs_fini(g_zfs); - - /* - * The 'ZFS_ABORT' environment variable causes us to dump core on exit - * for the purposes of running ::findleaks. - */ - if (getenv("ZFS_ABORT") != NULL) { - (void) printf("dumping core by request\n"); - abort(); - } - - return (ret); -} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c deleted file mode 100644 index c7a002efb17c..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <errno.h> -#include <libgen.h> -#include <libintl.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> - -#include "zpool_util.h" - -/* - * Utility function to guarantee malloc() success. - */ -void * -safe_malloc(size_t size) -{ - void *data; - - if ((data = calloc(1, size)) == NULL) { - (void) fprintf(stderr, "internal error: out of memory\n"); - exit(1); - } - - return (data); -} - -/* - * Display an out of memory error message and abort the current program. - */ -void -zpool_no_memory(void) -{ - assert(errno == ENOMEM); - (void) fprintf(stderr, - gettext("internal error: out of memory\n")); - exit(1); -} - -/* - * Return the number of logs in supplied nvlist - */ -uint_t -num_logs(nvlist_t *nv) -{ - uint_t nlogs = 0; - uint_t c, children; - nvlist_t **child; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - return (0); - - for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE; - - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - if (is_log) - nlogs++; - } - return (nlogs); -} diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h deleted file mode 100644 index 118029a22866..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef ZPOOL_UTIL_H -#define ZPOOL_UTIL_H - -#include <libnvpair.h> -#include <libzfs.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Basic utility functions - */ -void *safe_malloc(size_t); -void zpool_no_memory(void); -uint_t num_logs(nvlist_t *nv); - -/* - * Virtual device functions - */ - -nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, - uint64_t boot_size, int argc, char **argv); -nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, - nvlist_t *props, splitflags_t flags, int argc, char **argv); - -/* - * Pool list functions - */ -int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, - zpool_iter_f, void *); - -typedef struct zpool_list zpool_list_t; - -zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *); -void pool_list_update(zpool_list_t *); -int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *); -void pool_list_free(zpool_list_t *); -int pool_list_count(zpool_list_t *); -void pool_list_remove(zpool_list_t *, zpool_handle_t *); - -extern libzfs_handle_t *g_zfs; - -#ifdef __cplusplus -} -#endif - -#endif /* ZPOOL_UTIL_H */ diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c deleted file mode 100644 index 43d66d2263e0..000000000000 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c +++ /dev/null @@ -1,1729 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2018 by Delphix. All rights reserved. - * Copyright (c) 2016, 2017 Intel Corporation. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. - */ - -/* - * Functions to convert between a list of vdevs and an nvlist representing the - * configuration. Each entry in the list can be one of: - * - * Device vdevs - * disk=(path=..., devid=...) - * file=(path=...) - * - * Group vdevs - * raidz[1|2]=(...) - * mirror=(...) - * - * Hot spares - * - * While the underlying implementation supports it, group vdevs cannot contain - * other group vdevs. All userland verification of devices is contained within - * this file. If successful, the nvlist returned can be passed directly to the - * kernel; we've done as much verification as possible in userland. - * - * Hot spares are a special case, and passed down as an array of disk vdevs, at - * the same level as the root of the vdev tree. - * - * The only function exported by this file is 'make_root_vdev'. The - * function performs several passes: - * - * 1. Construct the vdev specification. Performs syntax validation and - * makes sure each device is valid. - * 2. Check for devices in use. Using libdiskmgt, makes sure that no - * devices are also in use. Some can be overridden using the 'force' - * flag, others cannot. - * 3. Check for replication errors if the 'force' flag is not specified. - * validates that the replication level is consistent across the - * entire pool. - * 4. Call libzfs to label any whole disks with an EFI label. - */ - -#include <assert.h> -#include <devid.h> -#include <errno.h> -#include <fcntl.h> -#include <libintl.h> -#include <libnvpair.h> -#include <limits.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <paths.h> -#include <sys/stat.h> -#include <sys/disk.h> -#include <sys/mntent.h> -#include <libgeom.h> - -#include "zpool_util.h" - -#define BACKUP_SLICE "s2" - -/* - * For any given vdev specification, we can have multiple errors. The - * vdev_error() function keeps track of whether we have seen an error yet, and - * prints out a header if its the first error we've seen. - */ -boolean_t error_seen; -boolean_t is_force; - -/*PRINTFLIKE1*/ -static void -vdev_error(const char *fmt, ...) -{ - va_list ap; - - if (!error_seen) { - (void) fprintf(stderr, gettext("invalid vdev specification\n")); - if (!is_force) - (void) fprintf(stderr, gettext("use '-f' to override " - "the following errors:\n")); - else - (void) fprintf(stderr, gettext("the following errors " - "must be manually repaired:\n")); - error_seen = B_TRUE; - } - - va_start(ap, fmt); - (void) vfprintf(stderr, fmt, ap); - va_end(ap); -} - -#ifdef illumos -static void -libdiskmgt_error(int error) -{ - /* - * ENXIO/ENODEV is a valid error message if the device doesn't live in - * /dev/dsk. Don't bother printing an error message in this case. - */ - if (error == ENXIO || error == ENODEV) - return; - - (void) fprintf(stderr, gettext("warning: device in use checking " - "failed: %s\n"), strerror(error)); -} - -/* - * Validate a device, passing the bulk of the work off to libdiskmgt. - */ -static int -check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) -{ - char *msg; - int error = 0; - dm_who_type_t who; - - if (force) - who = DM_WHO_ZPOOL_FORCE; - else if (isspare) - who = DM_WHO_ZPOOL_SPARE; - else - who = DM_WHO_ZPOOL; - - if (dm_inuse((char *)path, &msg, who, &error) || error) { - if (error != 0) { - libdiskmgt_error(error); - return (0); - } else { - vdev_error("%s", msg); - free(msg); - return (-1); - } - } - - /* - * If we're given a whole disk, ignore overlapping slices since we're - * about to label it anyway. - */ - error = 0; - if (!wholedisk && !force && - (dm_isoverlapping((char *)path, &msg, &error) || error)) { - if (error == 0) { - /* dm_isoverlapping returned -1 */ - vdev_error(gettext("%s overlaps with %s\n"), path, msg); - free(msg); - return (-1); - } else if (error != ENODEV) { - /* libdiskmgt's devcache only handles physical drives */ - libdiskmgt_error(error); - return (0); - } - } - - return (0); -} - - -/* - * Validate a whole disk. Iterate over all slices on the disk and make sure - * that none is in use by calling check_slice(). - */ -static int -check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) -{ - dm_descriptor_t *drive, *media, *slice; - int err = 0; - int i; - int ret; - - /* - * Get the drive associated with this disk. This should never fail, - * because we already have an alias handle open for the device. - */ - if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, - &err)) == NULL || *drive == NULL) { - if (err) - libdiskmgt_error(err); - return (0); - } - - if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, - &err)) == NULL) { - dm_free_descriptors(drive); - if (err) - libdiskmgt_error(err); - return (0); - } - - dm_free_descriptors(drive); - - /* - * It is possible that the user has specified a removable media drive, - * and the media is not present. - */ - if (*media == NULL) { - dm_free_descriptors(media); - vdev_error(gettext("'%s' has no media in drive\n"), name); - return (-1); - } - - if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, - &err)) == NULL) { - dm_free_descriptors(media); - if (err) - libdiskmgt_error(err); - return (0); - } - - dm_free_descriptors(media); - - ret = 0; - - /* - * Iterate over all slices and report any errors. We don't care about - * overlapping slices because we are using the whole disk. - */ - for (i = 0; slice[i] != NULL; i++) { - char *name = dm_get_name(slice[i], &err); - - if (check_slice(name, force, B_TRUE, isspare) != 0) - ret = -1; - - dm_free_name(name); - } - - dm_free_descriptors(slice); - return (ret); -} - -/* - * Validate a device. - */ -static int -check_device(const char *path, boolean_t force, boolean_t isspare) -{ - dm_descriptor_t desc; - int err; - char *dev; - - /* - * For whole disks, libdiskmgt does not include the leading dev path. - */ - dev = strrchr(path, '/'); - assert(dev != NULL); - dev++; - if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { - err = check_disk(path, desc, force, isspare); - dm_free_descriptor(desc); - return (err); - } - - return (check_slice(path, force, B_FALSE, isspare)); -} -#endif /* illumos */ - -/* - * Check that a file is valid. All we can do in this case is check that it's - * not in use by another pool, and not in use by swap. - */ -static int -check_file(const char *file, boolean_t force, boolean_t isspare) -{ - char *name; - int fd; - int ret = 0; - int err; - pool_state_t state; - boolean_t inuse; - -#ifdef illumos - if (dm_inuse_swap(file, &err)) { - if (err) - libdiskmgt_error(err); - else - vdev_error(gettext("%s is currently used by swap. " - "Please see swap(1M).\n"), file); - return (-1); - } -#endif - - if ((fd = open(file, O_RDONLY)) < 0) - return (0); - - if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { - const char *desc; - - switch (state) { - case POOL_STATE_ACTIVE: - desc = gettext("active"); - break; - - case POOL_STATE_EXPORTED: - desc = gettext("exported"); - break; - - case POOL_STATE_POTENTIALLY_ACTIVE: - desc = gettext("potentially active"); - break; - - default: - desc = gettext("unknown"); - break; - } - - /* - * Allow hot spares to be shared between pools. - */ - if (state == POOL_STATE_SPARE && isspare) - return (0); - - if (state == POOL_STATE_ACTIVE || - state == POOL_STATE_SPARE || !force) { - switch (state) { - case POOL_STATE_SPARE: - vdev_error(gettext("%s is reserved as a hot " - "spare for pool %s\n"), file, name); - break; - default: - vdev_error(gettext("%s is part of %s pool " - "'%s'\n"), file, desc, name); - break; - } - ret = -1; - } - - free(name); - } - - (void) close(fd); - return (ret); -} - -static int -check_device(const char *name, boolean_t force, boolean_t isspare) -{ - char path[MAXPATHLEN]; - - if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) != 0) - snprintf(path, sizeof(path), "%s%s", _PATH_DEV, name); - else - strlcpy(path, name, sizeof(path)); - - return (check_file(path, force, isspare)); -} - -/* - * By "whole disk" we mean an entire physical disk (something we can - * label, toggle the write cache on, etc.) as opposed to the full - * capacity of a pseudo-device such as lofi or did. We act as if we - * are labeling the disk, which should be a pretty good test of whether - * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if - * it isn't. - */ -static boolean_t -is_whole_disk(const char *arg) -{ -#ifdef illumos - struct dk_gpt *label; - int fd; - char path[MAXPATHLEN]; - - (void) snprintf(path, sizeof (path), "%s%s%s", - ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) - return (B_FALSE); - if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { - (void) close(fd); - return (B_FALSE); - } - efi_free(label); - (void) close(fd); - return (B_TRUE); -#else - int fd; - - fd = g_open(arg, 0); - if (fd >= 0) { - g_close(fd); - return (B_TRUE); - } - return (B_FALSE); -#endif -} - -/* - * Create a leaf vdev. Determine if this is a file or a device. If it's a - * device, fill in the device id to make a complete nvlist. Valid forms for a - * leaf vdev are: - * - * /dev/dsk/xxx Complete disk path - * /xxx Full path to file - * xxx Shorthand for /dev/dsk/xxx - */ -static nvlist_t * -make_leaf_vdev(const char *arg, uint64_t is_log) -{ - char path[MAXPATHLEN]; - struct stat64 statbuf; - nvlist_t *vdev = NULL; - char *type = NULL; - boolean_t wholedisk = B_FALSE; - - /* - * Determine what type of vdev this is, and put the full path into - * 'path'. We detect whether this is a device of file afterwards by - * checking the st_mode of the file. - */ - if (arg[0] == '/') { - /* - * Complete device or file path. Exact type is determined by - * examining the file descriptor afterwards. - */ - wholedisk = is_whole_disk(arg); - if (!wholedisk && (stat64(arg, &statbuf) != 0)) { - (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - arg, strerror(errno)); - return (NULL); - } - - (void) strlcpy(path, arg, sizeof (path)); - } else { - /* - * This may be a short path for a device, or it could be total - * gibberish. Check to see if it's a known device in - * /dev/dsk/. As part of this check, see if we've been given a - * an entire disk (minus the slice number). - */ - if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) - strlcpy(path, arg, sizeof (path)); - else - snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg); - wholedisk = is_whole_disk(path); - if (!wholedisk && (stat64(path, &statbuf) != 0)) { - /* - * If we got ENOENT, then the user gave us - * gibberish, so try to direct them with a - * reasonable error message. Otherwise, - * regurgitate strerror() since it's the best we - * can do. - */ - if (errno == ENOENT) { - (void) fprintf(stderr, - gettext("cannot open '%s': no such " - "GEOM provider\n"), arg); - (void) fprintf(stderr, - gettext("must be a full path or " - "shorthand device name\n")); - return (NULL); - } else { - (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - path, strerror(errno)); - return (NULL); - } - } - } - -#ifdef __FreeBSD__ - if (S_ISCHR(statbuf.st_mode)) { - statbuf.st_mode &= ~S_IFCHR; - statbuf.st_mode |= S_IFBLK; - wholedisk = B_FALSE; - } -#endif - - /* - * Determine whether this is a device or a file. - */ - if (wholedisk || S_ISBLK(statbuf.st_mode)) { - type = VDEV_TYPE_DISK; - } else if (S_ISREG(statbuf.st_mode)) { - type = VDEV_TYPE_FILE; - } else { - (void) fprintf(stderr, gettext("cannot use '%s': must be a " - "GEOM provider or regular file\n"), path); - return (NULL); - } - - /* - * Finally, we have the complete device or file, and we know that it is - * acceptable to use. Construct the nvlist to describe this vdev. All - * vdevs have a 'path' element, and devices also have a 'devid' element. - */ - verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); - verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); - verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0); - if (is_log) - verify(nvlist_add_string(vdev, ZPOOL_CONFIG_ALLOCATION_BIAS, - VDEV_ALLOC_BIAS_LOG) == 0); - if (strcmp(type, VDEV_TYPE_DISK) == 0) - verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, - (uint64_t)wholedisk) == 0); - -#ifdef have_devid - /* - * For a whole disk, defer getting its devid until after labeling it. - */ - if (S_ISBLK(statbuf.st_mode) && !wholedisk) { - /* - * Get the devid for the device. - */ - int fd; - ddi_devid_t devid; - char *minor = NULL, *devid_str = NULL; - - if ((fd = open(path, O_RDONLY)) < 0) { - (void) fprintf(stderr, gettext("cannot open '%s': " - "%s\n"), path, strerror(errno)); - nvlist_free(vdev); - return (NULL); - } - - if (devid_get(fd, &devid) == 0) { - if (devid_get_minor_name(fd, &minor) == 0 && - (devid_str = devid_str_encode(devid, minor)) != - NULL) { - verify(nvlist_add_string(vdev, - ZPOOL_CONFIG_DEVID, devid_str) == 0); - } - if (devid_str != NULL) - devid_str_free(devid_str); - if (minor != NULL) - devid_str_free(minor); - devid_free(devid); - } - - (void) close(fd); - } -#endif - - return (vdev); -} - -/* - * Go through and verify the replication level of the pool is consistent. - * Performs the following checks: - * - * For the new spec, verifies that devices in mirrors and raidz are the - * same size. - * - * If the current configuration already has inconsistent replication - * levels, ignore any other potential problems in the new spec. - * - * Otherwise, make sure that the current spec (if there is one) and the new - * spec have consistent replication levels. - * - * If there is no current spec (create), make sure new spec has at least - * one general purpose vdev. - */ -typedef struct replication_level { - char *zprl_type; - uint64_t zprl_children; - uint64_t zprl_parity; -} replication_level_t; - -#define ZPOOL_FUZZ (16 * 1024 * 1024) - -static boolean_t -is_raidz_mirror(replication_level_t *a, replication_level_t *b, - replication_level_t **raidz, replication_level_t **mirror) -{ - if (strcmp(a->zprl_type, "raidz") == 0 && - strcmp(b->zprl_type, "mirror") == 0) { - *raidz = a; - *mirror = b; - return (B_TRUE); - } - return (B_FALSE); -} - -/* - * Given a list of toplevel vdevs, return the current replication level. If - * the config is inconsistent, then NULL is returned. If 'fatal' is set, then - * an error message will be displayed for each self-inconsistent vdev. - */ -static replication_level_t * -get_replication(nvlist_t *nvroot, boolean_t fatal) -{ - nvlist_t **top; - uint_t t, toplevels; - nvlist_t **child; - uint_t c, children; - nvlist_t *nv; - char *type; - replication_level_t lastrep = {0}; - replication_level_t rep; - replication_level_t *ret; - replication_level_t *raidz, *mirror; - boolean_t dontreport; - - ret = safe_malloc(sizeof (replication_level_t)); - - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &top, &toplevels) == 0); - - for (t = 0; t < toplevels; t++) { - uint64_t is_log = B_FALSE; - - nv = top[t]; - - /* - * For separate logs we ignore the top level vdev replication - * constraints. - */ - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); - if (is_log) - continue; - - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, - &type) == 0); - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) { - /* - * This is a 'file' or 'disk' vdev. - */ - rep.zprl_type = type; - rep.zprl_children = 1; - rep.zprl_parity = 0; - } else { - uint64_t vdev_size; - - /* - * This is a mirror or RAID-Z vdev. Go through and make - * sure the contents are all the same (files vs. disks), - * keeping track of the number of elements in the - * process. - * - * We also check that the size of each vdev (if it can - * be determined) is the same. - */ - rep.zprl_type = type; - rep.zprl_children = 0; - - if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { - verify(nvlist_lookup_uint64(nv, - ZPOOL_CONFIG_NPARITY, - &rep.zprl_parity) == 0); - assert(rep.zprl_parity != 0); - } else { - rep.zprl_parity = 0; - } - - /* - * The 'dontreport' variable indicates that we've - * already reported an error for this spec, so don't - * bother doing it again. - */ - type = NULL; - dontreport = 0; - vdev_size = -1ULL; - for (c = 0; c < children; c++) { - boolean_t is_replacing, is_spare; - nvlist_t *cnv = child[c]; - char *path; - struct stat64 statbuf; - uint64_t size = -1ULL; - char *childtype; - int fd, err; - - rep.zprl_children++; - - verify(nvlist_lookup_string(cnv, - ZPOOL_CONFIG_TYPE, &childtype) == 0); - - /* - * If this is a replacing or spare vdev, then - * get the real first child of the vdev. - */ - is_replacing = strcmp(childtype, - VDEV_TYPE_REPLACING) == 0; - is_spare = strcmp(childtype, - VDEV_TYPE_SPARE) == 0; - if (is_replacing || is_spare) { - nvlist_t **rchild; - uint_t rchildren; - - verify(nvlist_lookup_nvlist_array(cnv, - ZPOOL_CONFIG_CHILDREN, &rchild, - &rchildren) == 0); - assert((is_replacing && rchildren == 2) - || (is_spare && rchildren >= 2)); - cnv = rchild[0]; - - verify(nvlist_lookup_string(cnv, - ZPOOL_CONFIG_TYPE, - &childtype) == 0); - if (strcmp(childtype, - VDEV_TYPE_SPARE) == 0) { - /* We have a replacing vdev with - * a spare child. Get the first - * real child of the spare - */ - verify( - nvlist_lookup_nvlist_array( - cnv, - ZPOOL_CONFIG_CHILDREN, - &rchild, - &rchildren) == 0); - assert(rchildren >= 2); - cnv = rchild[0]; - } - } - - verify(nvlist_lookup_string(cnv, - ZPOOL_CONFIG_PATH, &path) == 0); - - /* - * If we have a raidz/mirror that combines disks - * with files, report it as an error. - */ - if (!dontreport && type != NULL && - strcmp(type, childtype) != 0) { - if (ret != NULL) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication " - "level: %s contains both " - "files and devices\n"), - rep.zprl_type); - else - return (NULL); - dontreport = B_TRUE; - } - - /* - * According to stat(2), the value of 'st_size' - * is undefined for block devices and character - * devices. But there is no effective way to - * determine the real size in userland. - * - * Instead, we'll take advantage of an - * implementation detail of spec_size(). If the - * device is currently open, then we (should) - * return a valid size. - * - * If we still don't get a valid size (indicated - * by a size of 0 or MAXOFFSET_T), then ignore - * this device altogether. - */ - if ((fd = open(path, O_RDONLY)) >= 0) { - err = fstat64(fd, &statbuf); - (void) close(fd); - } else { - err = stat64(path, &statbuf); - } - - if (err != 0 || - statbuf.st_size == 0 || - statbuf.st_size == MAXOFFSET_T) - continue; - - size = statbuf.st_size; - - /* - * Also make sure that devices and - * slices have a consistent size. If - * they differ by a significant amount - * (~16MB) then report an error. - */ - if (!dontreport && - (vdev_size != -1ULL && - (labs(size - vdev_size) > - ZPOOL_FUZZ))) { - if (ret != NULL) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "%s contains devices of " - "different sizes\n"), - rep.zprl_type); - else - return (NULL); - dontreport = B_TRUE; - } - - type = childtype; - vdev_size = size; - } - } - - /* - * At this point, we have the replication of the last toplevel - * vdev in 'rep'. Compare it to 'lastrep' to see if it is - * different. - */ - if (lastrep.zprl_type != NULL) { - if (is_raidz_mirror(&lastrep, &rep, &raidz, &mirror) || - is_raidz_mirror(&rep, &lastrep, &raidz, &mirror)) { - /* - * Accepted raidz and mirror when they can - * handle the same number of disk failures. - */ - if (raidz->zprl_parity != - mirror->zprl_children - 1) { - if (ret != NULL) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication " - "level: " - "%s and %s vdevs with " - "different redundancy, " - "%llu vs. %llu (%llu-way) " - "are present\n"), - raidz->zprl_type, - mirror->zprl_type, - raidz->zprl_parity, - mirror->zprl_children - 1, - mirror->zprl_children); - else - return (NULL); - } - } else if (strcmp(lastrep.zprl_type, rep.zprl_type) != - 0) { - if (ret != NULL) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication level: " - "both %s and %s vdevs are " - "present\n"), - lastrep.zprl_type, rep.zprl_type); - else - return (NULL); - } else if (lastrep.zprl_parity != rep.zprl_parity) { - if (ret) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication level: " - "both %llu and %llu device parity " - "%s vdevs are present\n"), - lastrep.zprl_parity, - rep.zprl_parity, - rep.zprl_type); - else - return (NULL); - } else if (lastrep.zprl_children != rep.zprl_children) { - if (ret) - free(ret); - ret = NULL; - if (fatal) - vdev_error(gettext( - "mismatched replication level: " - "both %llu-way and %llu-way %s " - "vdevs are present\n"), - lastrep.zprl_children, - rep.zprl_children, - rep.zprl_type); - else - return (NULL); - } - } - lastrep = rep; - } - - if (ret != NULL) - *ret = rep; - - return (ret); -} - -/* - * Check the replication level of the vdev spec against the current pool. Calls - * get_replication() to make sure the new spec is self-consistent. If the pool - * has a consistent replication level, then we ignore any errors. Otherwise, - * report any difference between the two. - */ -static int -check_replication(nvlist_t *config, nvlist_t *newroot) -{ - nvlist_t **child; - uint_t children; - replication_level_t *current = NULL, *new; - replication_level_t *raidz, *mirror; - int ret; - - /* - * If we have a current pool configuration, check to see if it's - * self-consistent. If not, simply return success. - */ - if (config != NULL) { - nvlist_t *nvroot; - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - if ((current = get_replication(nvroot, B_FALSE)) == NULL) - return (0); - } - /* - * for spares there may be no children, and therefore no - * replication level to check - */ - if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) || (children == 0)) { - free(current); - return (0); - } - - /* - * If all we have is logs then there's no replication level to check. - */ - if (num_logs(newroot) == children) { - free(current); - return (0); - } - - /* - * Get the replication level of the new vdev spec, reporting any - * inconsistencies found. - */ - if ((new = get_replication(newroot, B_TRUE)) == NULL) { - free(current); - return (-1); - } - - /* - * Check to see if the new vdev spec matches the replication level of - * the current pool. - */ - ret = 0; - if (current != NULL) { - if (is_raidz_mirror(current, new, &raidz, &mirror) || - is_raidz_mirror(new, current, &raidz, &mirror)) { - if (raidz->zprl_parity != mirror->zprl_children - 1) { - vdev_error(gettext( - "mismatched replication level: pool and " - "new vdev with different redundancy, %s " - "and %s vdevs, %llu vs. %llu (%llu-way)\n"), - raidz->zprl_type, - mirror->zprl_type, - raidz->zprl_parity, - mirror->zprl_children - 1, - mirror->zprl_children); - ret = -1; - } - } else if (strcmp(current->zprl_type, new->zprl_type) != 0) { - vdev_error(gettext( - "mismatched replication level: pool uses %s " - "and new vdev is %s\n"), - current->zprl_type, new->zprl_type); - ret = -1; - } else if (current->zprl_parity != new->zprl_parity) { - vdev_error(gettext( - "mismatched replication level: pool uses %llu " - "device parity and new vdev uses %llu\n"), - current->zprl_parity, new->zprl_parity); - ret = -1; - } else if (current->zprl_children != new->zprl_children) { - vdev_error(gettext( - "mismatched replication level: pool uses %llu-way " - "%s and new vdev uses %llu-way %s\n"), - current->zprl_children, current->zprl_type, - new->zprl_children, new->zprl_type); - ret = -1; - } - } - - free(new); - if (current != NULL) - free(current); - - return (ret); -} - -#ifdef illumos -/* - * Go through and find any whole disks in the vdev specification, labelling them - * as appropriate. When constructing the vdev spec, we were unable to open this - * device in order to provide a devid. Now that we have labelled the disk and - * know the pool slice is valid, we can construct the devid now. - * - * If the disk was already labeled with an EFI label, we will have gotten the - * devid already (because we were able to open the whole disk). Otherwise, we - * need to get the devid after we label the disk. - */ -static int -make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type, - uint64_t boot_size) -{ - nvlist_t **child; - uint_t c, children; - char *type, *path, *diskname; - char buf[MAXPATHLEN]; - uint64_t wholedisk; - int fd; - int ret; - int slice; - ddi_devid_t devid; - char *minor = NULL, *devid_str = NULL; - - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) { - - if (strcmp(type, VDEV_TYPE_DISK) != 0) - return (0); - - /* - * We have a disk device. Get the path to the device - * and see if it's a whole disk by appending the backup - * slice and stat()ing the device. - */ - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - - diskname = strrchr(path, '/'); - assert(diskname != NULL); - diskname++; - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) != 0 || !wholedisk) { - /* - * This is not whole disk, return error if - * boot partition creation was requested - */ - if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { - (void) fprintf(stderr, - gettext("creating boot partition is only " - "supported on whole disk vdevs: %s\n"), - diskname); - return (-1); - } - return (0); - } - - ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type, - boot_size, &slice); - if (ret == -1) - return (ret); - - /* - * Fill in the devid, now that we've labeled the disk. - */ - (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice); - if ((fd = open(buf, O_RDONLY)) < 0) { - (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - buf, strerror(errno)); - return (-1); - } - - if (devid_get(fd, &devid) == 0) { - if (devid_get_minor_name(fd, &minor) == 0 && - (devid_str = devid_str_encode(devid, minor)) != - NULL) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_DEVID, devid_str) == 0); - } - if (devid_str != NULL) - devid_str_free(devid_str); - if (minor != NULL) - devid_str_free(minor); - devid_free(devid); - } - - /* - * Update the path to refer to the pool slice. The presence of - * the 'whole_disk' field indicates to the CLI that we should - * chop off the slice number when displaying the device in - * future output. - */ - verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); - - (void) close(fd); - - return (0); - } - - /* illumos kernel does not support booting from multi-vdev pools. */ - if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) { - if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) { - (void) fprintf(stderr, gettext("boot pool " - "can not have more than one vdev\n")); - return (-1); - } - } - - for (c = 0; c < children; c++) { - ret = make_disks(zhp, child[c], boot_type, boot_size); - if (ret != 0) - return (ret); - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, - &child, &children) == 0) - for (c = 0; c < children; c++) { - ret = make_disks(zhp, child[c], boot_type, boot_size); - if (ret != 0) - return (ret); - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, - &child, &children) == 0) - for (c = 0; c < children; c++) { - ret = make_disks(zhp, child[c], boot_type, boot_size); - if (ret != 0) - return (ret); - } - - return (0); -} -#endif /* illumos */ - -/* - * Determine if the given path is a hot spare within the given configuration. - */ -static boolean_t -is_spare(nvlist_t *config, const char *path) -{ - int fd; - pool_state_t state; - char *name = NULL; - nvlist_t *label; - uint64_t guid, spareguid; - nvlist_t *nvroot; - nvlist_t **spares; - uint_t i, nspares; - boolean_t inuse; - - if ((fd = open(path, O_RDONLY)) < 0) - return (B_FALSE); - - if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || - !inuse || - state != POOL_STATE_SPARE || - zpool_read_label(fd, &label) != 0) { - free(name); - (void) close(fd); - return (B_FALSE); - } - free(name); - (void) close(fd); - - verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); - nvlist_free(label); - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - for (i = 0; i < nspares; i++) { - verify(nvlist_lookup_uint64(spares[i], - ZPOOL_CONFIG_GUID, &spareguid) == 0); - if (spareguid == guid) - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* - * Go through and find any devices that are in use. We rely on libdiskmgt for - * the majority of this task. - */ -static boolean_t -is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, - boolean_t replacing, boolean_t isspare) -{ - nvlist_t **child; - uint_t c, children; - char *type, *path; - int ret = 0; - char buf[MAXPATHLEN]; - uint64_t wholedisk; - boolean_t anyinuse = B_FALSE; - - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) { - - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - - /* - * As a generic check, we look to see if this is a replace of a - * hot spare within the same pool. If so, we allow it - * regardless of what libdiskmgt or zpool_in_use() says. - */ - if (replacing) { -#ifdef illumos - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) == 0 && wholedisk) - (void) snprintf(buf, sizeof (buf), "%ss0", - path); - else -#endif - (void) strlcpy(buf, path, sizeof (buf)); - - if (is_spare(config, buf)) - return (B_FALSE); - } - - if (strcmp(type, VDEV_TYPE_DISK) == 0) - ret = check_device(path, force, isspare); - else if (strcmp(type, VDEV_TYPE_FILE) == 0) - ret = check_file(path, force, isspare); - - return (ret != 0); - } - - for (c = 0; c < children; c++) - if (is_device_in_use(config, child[c], force, replacing, - B_FALSE)) - anyinuse = B_TRUE; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, - &child, &children) == 0) - for (c = 0; c < children; c++) - if (is_device_in_use(config, child[c], force, replacing, - B_TRUE)) - anyinuse = B_TRUE; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, - &child, &children) == 0) - for (c = 0; c < children; c++) - if (is_device_in_use(config, child[c], force, replacing, - B_FALSE)) - anyinuse = B_TRUE; - - return (anyinuse); -} - -static const char * -is_grouping(const char *type, int *mindev, int *maxdev) -{ - if (strncmp(type, "raidz", 5) == 0) { - const char *p = type + 5; - char *end; - long nparity; - - if (*p == '\0') { - nparity = 1; - } else if (*p == '0') { - return (NULL); /* no zero prefixes allowed */ - } else { - errno = 0; - nparity = strtol(p, &end, 10); - if (errno != 0 || nparity < 1 || nparity >= 255 || - *end != '\0') - return (NULL); - } - - if (mindev != NULL) - *mindev = nparity + 1; - if (maxdev != NULL) - *maxdev = 255; - return (VDEV_TYPE_RAIDZ); - } - - if (maxdev != NULL) - *maxdev = INT_MAX; - - if (strcmp(type, "mirror") == 0) { - if (mindev != NULL) - *mindev = 2; - return (VDEV_TYPE_MIRROR); - } - - if (strcmp(type, "spare") == 0) { - if (mindev != NULL) - *mindev = 1; - return (VDEV_TYPE_SPARE); - } - - if (strcmp(type, "log") == 0) { - if (mindev != NULL) - *mindev = 1; - return (VDEV_TYPE_LOG); - } - - if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 || - strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { - if (mindev != NULL) - *mindev = 1; - return (type); - } - - if (strcmp(type, "cache") == 0) { - if (mindev != NULL) - *mindev = 1; - return (VDEV_TYPE_L2CACHE); - } - - return (NULL); -} - -/* - * Construct a syntactically valid vdev specification, - * and ensure that all devices and files exist and can be opened. - * Note: we don't bother freeing anything in the error paths - * because the program is just going to exit anyway. - */ -nvlist_t * -construct_spec(int argc, char **argv) -{ - nvlist_t *nvroot, *nv, **top, **spares, **l2cache; - int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; - const char *type; - uint64_t is_log, is_special, is_dedup; - boolean_t seen_logs; - - top = NULL; - toplevels = 0; - spares = NULL; - l2cache = NULL; - nspares = 0; - nlogs = 0; - nl2cache = 0; - is_log = is_special = is_dedup = B_FALSE; - seen_logs = B_FALSE; - - while (argc > 0) { - nv = NULL; - - /* - * If it's a mirror or raidz, the subsequent arguments are - * its leaves -- until we encounter the next mirror or raidz. - */ - if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) { - nvlist_t **child = NULL; - int c, children = 0; - - if (strcmp(type, VDEV_TYPE_SPARE) == 0) { - if (spares != NULL) { - (void) fprintf(stderr, - gettext("invalid vdev " - "specification: 'spare' can be " - "specified only once\n")); - return (NULL); - } - is_log = is_special = is_dedup = B_FALSE; - } - - if (strcmp(type, VDEV_TYPE_LOG) == 0) { - if (seen_logs) { - (void) fprintf(stderr, - gettext("invalid vdev " - "specification: 'log' can be " - "specified only once\n")); - return (NULL); - } - seen_logs = B_TRUE; - is_log = B_TRUE; - is_special = B_FALSE; - is_dedup = B_FALSE; - argc--; - argv++; - /* - * A log is not a real grouping device. - * We just set is_log and continue. - */ - continue; - } - - if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) { - is_special = B_TRUE; - is_log = B_FALSE; - is_dedup = B_FALSE; - argc--; - argv++; - continue; - } - - if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) { - is_dedup = B_TRUE; - is_log = B_FALSE; - is_special = B_FALSE; - argc--; - argv++; - continue; - } - - if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { - if (l2cache != NULL) { - (void) fprintf(stderr, - gettext("invalid vdev " - "specification: 'cache' can be " - "specified only once\n")); - return (NULL); - } - is_log = is_special = is_dedup = B_FALSE; - } - - if (is_log || is_special || is_dedup) { - if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { - (void) fprintf(stderr, - gettext("invalid vdev " - "specification: unsupported '%s' " - "device: %s\n"), is_log ? "log" : - "special", type); - return (NULL); - } - nlogs++; - } - - for (c = 1; c < argc; c++) { - if (is_grouping(argv[c], NULL, NULL) != NULL) - break; - children++; - child = realloc(child, - children * sizeof (nvlist_t *)); - if (child == NULL) - zpool_no_memory(); - if ((nv = make_leaf_vdev(argv[c], B_FALSE)) - == NULL) - return (NULL); - child[children - 1] = nv; - } - - if (children < mindev) { - (void) fprintf(stderr, gettext("invalid vdev " - "specification: %s requires at least %d " - "devices\n"), argv[0], mindev); - return (NULL); - } - - if (children > maxdev) { - (void) fprintf(stderr, gettext("invalid vdev " - "specification: %s supports no more than " - "%d devices\n"), argv[0], maxdev); - return (NULL); - } - - argc -= c; - argv += c; - - if (strcmp(type, VDEV_TYPE_SPARE) == 0) { - spares = child; - nspares = children; - continue; - } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { - l2cache = child; - nl2cache = children; - continue; - } else { - /* create a top-level vdev with children */ - verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, - 0) == 0); - verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, - type) == 0); - verify(nvlist_add_uint64(nv, - ZPOOL_CONFIG_IS_LOG, is_log) == 0); - if (is_log) - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_ALLOCATION_BIAS, - VDEV_ALLOC_BIAS_LOG) == 0); - if (is_special) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_ALLOCATION_BIAS, - VDEV_ALLOC_BIAS_SPECIAL) == 0); - } - if (is_dedup) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_ALLOCATION_BIAS, - VDEV_ALLOC_BIAS_DEDUP) == 0); - } - if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { - verify(nvlist_add_uint64(nv, - ZPOOL_CONFIG_NPARITY, - mindev - 1) == 0); - } - verify(nvlist_add_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, child, - children) == 0); - - for (c = 0; c < children; c++) - nvlist_free(child[c]); - free(child); - } - } else { - /* - * We have a device. Pass off to make_leaf_vdev() to - * construct the appropriate nvlist describing the vdev. - */ - if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL) - return (NULL); - if (is_log) - nlogs++; - if (is_special) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_ALLOCATION_BIAS, - VDEV_ALLOC_BIAS_SPECIAL) == 0); - } - if (is_dedup) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_ALLOCATION_BIAS, - VDEV_ALLOC_BIAS_DEDUP) == 0); - } - argc--; - argv++; - } - - toplevels++; - top = realloc(top, toplevels * sizeof (nvlist_t *)); - if (top == NULL) - zpool_no_memory(); - top[toplevels - 1] = nv; - } - - if (toplevels == 0 && nspares == 0 && nl2cache == 0) { - (void) fprintf(stderr, gettext("invalid vdev " - "specification: at least one toplevel vdev must be " - "specified\n")); - return (NULL); - } - - if (seen_logs && nlogs == 0) { - (void) fprintf(stderr, gettext("invalid vdev specification: " - "log requires at least 1 device\n")); - return (NULL); - } - - /* - * Finally, create nvroot and add all top-level vdevs to it. - */ - verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_ROOT) == 0); - verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - top, toplevels) == 0); - if (nspares != 0) - verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - spares, nspares) == 0); - if (nl2cache != 0) - verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - l2cache, nl2cache) == 0); - - for (t = 0; t < toplevels; t++) - nvlist_free(top[t]); - for (t = 0; t < nspares; t++) - nvlist_free(spares[t]); - for (t = 0; t < nl2cache; t++) - nvlist_free(l2cache[t]); - if (spares) - free(spares); - if (l2cache) - free(l2cache); - free(top); - - return (nvroot); -} - -nvlist_t * -split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, - splitflags_t flags, int argc, char **argv) -{ - nvlist_t *newroot = NULL, **child; - uint_t c, children; -#ifdef illumos - zpool_boot_label_t boot_type; -#endif - - if (argc > 0) { - if ((newroot = construct_spec(argc, argv)) == NULL) { - (void) fprintf(stderr, gettext("Unable to build a " - "pool from the specified devices\n")); - return (NULL); - } - -#ifdef illumos - if (zpool_is_bootable(zhp)) - boot_type = ZPOOL_COPY_BOOT_LABEL; - else - boot_type = ZPOOL_NO_BOOT_LABEL; - - if (!flags.dryrun && - make_disks(zhp, newroot, boot_type, 0) != 0) { - nvlist_free(newroot); - return (NULL); - } -#endif - - /* avoid any tricks in the spec */ - verify(nvlist_lookup_nvlist_array(newroot, - ZPOOL_CONFIG_CHILDREN, &child, &children) == 0); - for (c = 0; c < children; c++) { - char *path; - const char *type; - int min, max; - - verify(nvlist_lookup_string(child[c], - ZPOOL_CONFIG_PATH, &path) == 0); - if ((type = is_grouping(path, &min, &max)) != NULL) { - (void) fprintf(stderr, gettext("Cannot use " - "'%s' as a device for splitting\n"), type); - nvlist_free(newroot); - return (NULL); - } - } - } - - if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) { - nvlist_free(newroot); - return (NULL); - } - - return (newroot); -} - -static int -num_normal_vdevs(nvlist_t *nvroot) -{ - nvlist_t **top; - uint_t t, toplevels, normal = 0; - - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &top, &toplevels) == 0); - - for (t = 0; t < toplevels; t++) { - uint64_t log = B_FALSE; - - (void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log); - if (log) - continue; - if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS)) - continue; - - normal++; - } - - return (normal); -} - -/* - * Get and validate the contents of the given vdev specification. This ensures - * that the nvlist returned is well-formed, that all the devices exist, and that - * they are not currently in use by any other known consumer. The 'poolconfig' - * parameter is the current configuration of the pool when adding devices - * existing pool, and is used to perform additional checks, such as changing the - * replication level of the pool. It can be 'NULL' to indicate that this is a - * new pool. The 'force' flag controls whether devices should be forcefully - * added, even if they appear in use. - */ -nvlist_t * -make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, - uint64_t boot_size, int argc, char **argv) -{ - nvlist_t *newroot; - nvlist_t *poolconfig = NULL; - is_force = force; - - /* - * Construct the vdev specification. If this is successful, we know - * that we have a valid specification, and that all devices can be - * opened. - */ - if ((newroot = construct_spec(argc, argv)) == NULL) - return (NULL); - - if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) - return (NULL); - - /* - * Validate each device to make sure that its not shared with another - * subsystem. We do this even if 'force' is set, because there are some - * uses (such as a dedicated dump device) that even '-f' cannot - * override. - */ - if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) { - nvlist_free(newroot); - return (NULL); - } - - /* - * Check the replication level of the given vdevs and report any errors - * found. We include the existing pool spec, if any, as we need to - * catch changes against the existing replication level. - */ - if (check_rep && check_replication(poolconfig, newroot) != 0) { - nvlist_free(newroot); - return (NULL); - } - -#ifdef illumos - /* - * On pool create the new vdev spec must have one normal vdev. - */ - if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) { - vdev_error(gettext("at least one general top-level vdev must " - "be specified\n")); - nvlist_free(newroot); - return (NULL); - } - - /* - * Run through the vdev specification and label any whole disks found. - */ - if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) { - nvlist_free(newroot); - return (NULL); - } -#endif - - return (newroot); -} diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1 b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1 deleted file mode 100644 index 3e3050283313..000000000000 --- a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1 +++ /dev/null @@ -1,76 +0,0 @@ -'\" te -.\" Copyright (c) 2011, Martin Matuska <mm@FreeBSD.org>. -.\" All Rights Reserved. -.\" -.\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). -.\" You may not use this file except in compliance with the License. -.\" -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -.\" or http://www.opensolaris.org/os/licensing. -.\" See the License for the specific language governing permissions -.\" and limitations under the License. -.\" -.\" When distributing Covered Code, include this CDDL HEADER in each -.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. -.\" If applicable, add the following below this CDDL HEADER, with the -.\" fields enclosed by brackets "[]" replaced with your own identifying -.\" information: Portions Copyright [yyyy] [name of copyright owner] -.\" -.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2013, Delphix. All Rights Reserved. -.\" -.\" $FreeBSD$ -.\" -.Dd February 25, 2020 -.Dt ZSTREAMDUMP 8 -.Os -.Sh NAME -.Nm zstreamdump -.Nd filter data in zfs send stream -.Sh SYNOPSIS -.Nm -.Op Fl C -.Op Fl d -.Op Fl v -.Sh DESCRIPTION -The -.Nm -utility reads from the output of the -.Qq Nm zfs Cm send -command, then displays headers and some statistics from that output. See -.Xr zfs 8 . -.Pp -The following options are supported: -.Bl -tag -width indent -.It Fl C -Suppress the validation of checksums. -.It Fl d -Dump contents of blocks modified, implies verbose. -.It Fl v -Verbose. Dump all headers, not only begin and end headers. -.El -.Sh SEE ALSO -.Xr zfs 8 -.Sh HISTORY -The -.Nm -utility first appeared in -.Fx 7.0 . -.Sh AUTHORS -This manual page is a -.Xr mdoc 7 -reimplementation of the -.Tn OpenSolaris -manual page -.Em zstreamdump(1M) , -modified and customized for -.Fx -and licensed under the -.Tn Common Development and Distribution License -.Pq Tn CDDL . -.Pp -The -.Xr mdoc 7 -implementation of this manual page was initially written by -.An Martin Matuska Aq mm@FreeBSD.org . diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c deleted file mode 100644 index 51c4c8e0e649..000000000000 --- a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c +++ /dev/null @@ -1,644 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2014 Integros [integros.com] - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. - */ - -#include <ctype.h> -#include <libnvpair.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <stddef.h> - -#include <sys/dmu.h> -#include <sys/zfs_ioctl.h> -#include <sys/zio.h> -#include <zfs_fletcher.h> - -/* - * If dump mode is enabled, the number of bytes to print per line - */ -#define BYTES_PER_LINE 16 -/* - * If dump mode is enabled, the number of bytes to group together, separated - * by newlines or spaces - */ -#define DUMP_GROUPING 4 - -uint64_t total_write_size = 0; -uint64_t total_stream_len = 0; -FILE *send_stream = 0; -boolean_t do_byteswap = B_FALSE; -boolean_t do_cksum = B_TRUE; - -static void -usage(void) -{ - (void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n"); - (void) fprintf(stderr, "\t -v -- verbose\n"); - (void) fprintf(stderr, "\t -C -- suppress checksum verification\n"); - (void) fprintf(stderr, "\t -d -- dump contents of blocks modified, " - "implies verbose\n"); - exit(1); -} - -static void * -safe_malloc(size_t size) -{ - void *rv = malloc(size); - if (rv == NULL) { - (void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n", - size); - abort(); - } - return (rv); -} - -/* - * ssread - send stream read. - * - * Read while computing incremental checksum - */ -static size_t -ssread(void *buf, size_t len, zio_cksum_t *cksum) -{ - size_t outlen; - - if ((outlen = fread(buf, len, 1, send_stream)) == 0) - return (0); - - if (do_cksum) { - if (do_byteswap) - fletcher_4_incremental_byteswap(buf, len, cksum); - else - fletcher_4_incremental_native(buf, len, cksum); - } - total_stream_len += len; - return (outlen); -} - -static size_t -read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) -{ - ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), - ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); - size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); - if (r == 0) - return (0); - zio_cksum_t saved_cksum = *cksum; - r = ssread(&drr->drr_u.drr_checksum.drr_checksum, - sizeof (zio_cksum_t), cksum); - if (r == 0) - return (0); - if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) && - !ZIO_CHECKSUM_EQUAL(saved_cksum, - drr->drr_u.drr_checksum.drr_checksum)) { - fprintf(stderr, "invalid checksum\n"); - (void) printf("Incorrect checksum in record header.\n"); - (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n", - saved_cksum.zc_word[0], - saved_cksum.zc_word[1], - saved_cksum.zc_word[2], - saved_cksum.zc_word[3]); - return (0); - } - return (sizeof (*drr)); -} - -/* - * Print part of a block in ASCII characters - */ -static void -print_ascii_block(char *subbuf, int length) -{ - int i; - - for (i = 0; i < length; i++) { - char char_print = isprint(subbuf[i]) ? subbuf[i] : '.'; - if (i != 0 && i % DUMP_GROUPING == 0) { - (void) printf(" "); - } - (void) printf("%c", char_print); - } - (void) printf("\n"); -} - -/* - * print_block - Dump the contents of a modified block to STDOUT - * - * Assume that buf has capacity evenly divisible by BYTES_PER_LINE - */ -static void -print_block(char *buf, int length) -{ - int i; - /* - * Start printing ASCII characters at a constant offset, after - * the hex prints. Leave 3 characters per byte on a line (2 digit - * hex number plus 1 space) plus spaces between characters and - * groupings. - */ - int ascii_start = BYTES_PER_LINE * 3 + - BYTES_PER_LINE / DUMP_GROUPING + 2; - - for (i = 0; i < length; i += BYTES_PER_LINE) { - int j; - int this_line_length = MIN(BYTES_PER_LINE, length - i); - int print_offset = 0; - - for (j = 0; j < this_line_length; j++) { - int buf_offset = i + j; - - /* - * Separate every DUMP_GROUPING bytes by a space. - */ - if (buf_offset % DUMP_GROUPING == 0) { - print_offset += printf(" "); - } - - /* - * Print the two-digit hex value for this byte. - */ - unsigned char hex_print = buf[buf_offset]; - print_offset += printf("%02x ", hex_print); - } - - (void) printf("%*s", ascii_start - print_offset, " "); - - print_ascii_block(buf + i, this_line_length); - } -} - -int -main(int argc, char *argv[]) -{ - char *buf = safe_malloc(SPA_MAXBLOCKSIZE); - uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; - uint64_t total_records = 0; - dmu_replay_record_t thedrr; - dmu_replay_record_t *drr = &thedrr; - struct drr_begin *drrb = &thedrr.drr_u.drr_begin; - struct drr_end *drre = &thedrr.drr_u.drr_end; - struct drr_object *drro = &thedrr.drr_u.drr_object; - struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects; - struct drr_write *drrw = &thedrr.drr_u.drr_write; - struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref; - struct drr_free *drrf = &thedrr.drr_u.drr_free; - struct drr_spill *drrs = &thedrr.drr_u.drr_spill; - struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; - struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; - char c; - boolean_t verbose = B_FALSE; - boolean_t very_verbose = B_FALSE; - boolean_t first = B_TRUE; - /* - * dump flag controls whether the contents of any modified data blocks - * are printed to the console during processing of the stream. Warning: - * for large streams, this can obviously lead to massive prints. - */ - boolean_t dump = B_FALSE; - int err; - zio_cksum_t zc = { 0 }; - zio_cksum_t pcksum = { 0 }; - - while ((c = getopt(argc, argv, ":vCd")) != -1) { - switch (c) { - case 'C': - do_cksum = B_FALSE; - break; - case 'v': - if (verbose) - very_verbose = B_TRUE; - verbose = B_TRUE; - break; - case 'd': - dump = B_TRUE; - verbose = B_TRUE; - very_verbose = B_TRUE; - break; - case ':': - (void) fprintf(stderr, - "missing argument for '%c' option\n", optopt); - usage(); - break; - case '?': - (void) fprintf(stderr, "invalid option '%c'\n", - optopt); - usage(); - break; - } - } - - if (isatty(STDIN_FILENO)) { - (void) fprintf(stderr, - "Error: Backup stream can not be read " - "from a terminal.\n" - "You must redirect standard input.\n"); - exit(1); - } - - send_stream = stdin; - pcksum = zc; - while (read_hdr(drr, &zc)) { - - /* - * If this is the first DMU record being processed, check for - * the magic bytes and figure out the endian-ness based on them. - */ - if (first) { - if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { - do_byteswap = B_TRUE; - if (do_cksum) { - ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); - /* - * recalculate header checksum now - * that we know it needs to be - * byteswapped. - */ - fletcher_4_incremental_byteswap(drr, - sizeof (dmu_replay_record_t), &zc); - } - } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) { - (void) fprintf(stderr, "Invalid stream " - "(bad magic number)\n"); - exit(1); - } - first = B_FALSE; - } - if (do_byteswap) { - drr->drr_type = BSWAP_32(drr->drr_type); - drr->drr_payloadlen = - BSWAP_32(drr->drr_payloadlen); - } - - /* - * At this point, the leading fields of the replay record - * (drr_type and drr_payloadlen) have been byte-swapped if - * necessary, but the rest of the data structure (the - * union of type-specific structures) is still in its - * original state. - */ - if (drr->drr_type >= DRR_NUMTYPES) { - (void) printf("INVALID record found: type 0x%x\n", - drr->drr_type); - (void) printf("Aborting.\n"); - exit(1); - } - - drr_record_count[drr->drr_type]++; - total_records++; - - switch (drr->drr_type) { - case DRR_BEGIN: - if (do_byteswap) { - drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_versioninfo = - BSWAP_64(drrb->drr_versioninfo); - drrb->drr_creation_time = - BSWAP_64(drrb->drr_creation_time); - drrb->drr_type = BSWAP_32(drrb->drr_type); - drrb->drr_flags = BSWAP_32(drrb->drr_flags); - drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); - drrb->drr_fromguid = - BSWAP_64(drrb->drr_fromguid); - } - - (void) printf("BEGIN record\n"); - (void) printf("\thdrtype = %lld\n", - DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo)); - (void) printf("\tfeatures = %llx\n", - DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo)); - (void) printf("\tmagic = %llx\n", - (u_longlong_t)drrb->drr_magic); - (void) printf("\tcreation_time = %llx\n", - (u_longlong_t)drrb->drr_creation_time); - (void) printf("\ttype = %u\n", drrb->drr_type); - (void) printf("\tflags = 0x%x\n", drrb->drr_flags); - (void) printf("\ttoguid = %llx\n", - (u_longlong_t)drrb->drr_toguid); - (void) printf("\tfromguid = %llx\n", - (u_longlong_t)drrb->drr_fromguid); - (void) printf("\ttoname = %s\n", drrb->drr_toname); - if (verbose) - (void) printf("\n"); - - if (drr->drr_payloadlen != 0) { - nvlist_t *nv; - int sz = drr->drr_payloadlen; - - if (sz > SPA_MAXBLOCKSIZE) { - free(buf); - buf = safe_malloc(sz); - } - (void) ssread(buf, sz, &zc); - if (ferror(send_stream)) - perror("fread"); - err = nvlist_unpack(buf, sz, &nv, 0); - if (err) - perror(strerror(err)); - nvlist_print(stdout, nv); - nvlist_free(nv); - } - break; - - case DRR_END: - if (do_byteswap) { - drre->drr_checksum.zc_word[0] = - BSWAP_64(drre->drr_checksum.zc_word[0]); - drre->drr_checksum.zc_word[1] = - BSWAP_64(drre->drr_checksum.zc_word[1]); - drre->drr_checksum.zc_word[2] = - BSWAP_64(drre->drr_checksum.zc_word[2]); - drre->drr_checksum.zc_word[3] = - BSWAP_64(drre->drr_checksum.zc_word[3]); - } - /* - * We compare against the *previous* checksum - * value, because the stored checksum is of - * everything before the DRR_END record. - */ - if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum, - pcksum)) { - (void) printf("Expected checksum differs from " - "checksum in stream.\n"); - (void) printf("Expected checksum = " - "%llx/%llx/%llx/%llx\n", - pcksum.zc_word[0], - pcksum.zc_word[1], - pcksum.zc_word[2], - pcksum.zc_word[3]); - } - (void) printf("END checksum = %llx/%llx/%llx/%llx\n", - drre->drr_checksum.zc_word[0], - drre->drr_checksum.zc_word[1], - drre->drr_checksum.zc_word[2], - drre->drr_checksum.zc_word[3]); - - ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); - break; - - case DRR_OBJECT: - if (do_byteswap) { - drro->drr_object = BSWAP_64(drro->drr_object); - drro->drr_type = BSWAP_32(drro->drr_type); - drro->drr_bonustype = - BSWAP_32(drro->drr_bonustype); - drro->drr_blksz = BSWAP_32(drro->drr_blksz); - drro->drr_bonuslen = - BSWAP_32(drro->drr_bonuslen); - drro->drr_toguid = BSWAP_64(drro->drr_toguid); - } - if (verbose) { - (void) printf("OBJECT object = %" PRIu64 - " type = %u bonustype = %u blksz = %u" - " bonuslen = %u dn_slots = %u\n", - drro->drr_object, - drro->drr_type, - drro->drr_bonustype, - drro->drr_blksz, - drro->drr_bonuslen, - drro->drr_dn_slots); - } - if (drro->drr_bonuslen > 0) { - (void) ssread(buf, - P2ROUNDUP(drro->drr_bonuslen, 8), &zc); - if (dump) { - print_block(buf, - P2ROUNDUP(drro->drr_bonuslen, 8)); - } - } - break; - - case DRR_FREEOBJECTS: - if (do_byteswap) { - drrfo->drr_firstobj = - BSWAP_64(drrfo->drr_firstobj); - drrfo->drr_numobjs = - BSWAP_64(drrfo->drr_numobjs); - drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid); - } - if (verbose) { - (void) printf("FREEOBJECTS firstobj = %llu " - "numobjs = %llu\n", - (u_longlong_t)drrfo->drr_firstobj, - (u_longlong_t)drrfo->drr_numobjs); - } - break; - - case DRR_WRITE: - if (do_byteswap) { - drrw->drr_object = BSWAP_64(drrw->drr_object); - drrw->drr_type = BSWAP_32(drrw->drr_type); - drrw->drr_offset = BSWAP_64(drrw->drr_offset); - drrw->drr_logical_size = - BSWAP_64(drrw->drr_logical_size); - drrw->drr_toguid = BSWAP_64(drrw->drr_toguid); - drrw->drr_key.ddk_prop = - BSWAP_64(drrw->drr_key.ddk_prop); - drrw->drr_compressed_size = - BSWAP_64(drrw->drr_compressed_size); - } - - uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); - - /* - * If this is verbose and/or dump output, - * print info on the modified block - */ - if (verbose) { - (void) printf("WRITE object = %llu type = %u " - "checksum type = %u compression type = %u\n" - " offset = %llu logical_size = %llu " - "compressed_size = %llu " - "payload_size = %llu " - "props = %llx\n", - (u_longlong_t)drrw->drr_object, - drrw->drr_type, - drrw->drr_checksumtype, - drrw->drr_compressiontype, - (u_longlong_t)drrw->drr_offset, - (u_longlong_t)drrw->drr_logical_size, - (u_longlong_t)drrw->drr_compressed_size, - (u_longlong_t)payload_size, - (u_longlong_t)drrw->drr_key.ddk_prop); - } - - /* - * Read the contents of the block in from STDIN to buf - */ - (void) ssread(buf, payload_size, &zc); - /* - * If in dump mode - */ - if (dump) { - print_block(buf, payload_size); - } - total_write_size += payload_size; - break; - - case DRR_WRITE_BYREF: - if (do_byteswap) { - drrwbr->drr_object = - BSWAP_64(drrwbr->drr_object); - drrwbr->drr_offset = - BSWAP_64(drrwbr->drr_offset); - drrwbr->drr_length = - BSWAP_64(drrwbr->drr_length); - drrwbr->drr_toguid = - BSWAP_64(drrwbr->drr_toguid); - drrwbr->drr_refguid = - BSWAP_64(drrwbr->drr_refguid); - drrwbr->drr_refobject = - BSWAP_64(drrwbr->drr_refobject); - drrwbr->drr_refoffset = - BSWAP_64(drrwbr->drr_refoffset); - drrwbr->drr_key.ddk_prop = - BSWAP_64(drrwbr->drr_key.ddk_prop); - } - if (verbose) { - (void) printf("WRITE_BYREF object = %llu " - "checksum type = %u props = %llx\n" - " offset = %llu length = %llu\n" - "toguid = %llx refguid = %llx\n" - " refobject = %llu refoffset = %llu\n", - (u_longlong_t)drrwbr->drr_object, - drrwbr->drr_checksumtype, - (u_longlong_t)drrwbr->drr_key.ddk_prop, - (u_longlong_t)drrwbr->drr_offset, - (u_longlong_t)drrwbr->drr_length, - (u_longlong_t)drrwbr->drr_toguid, - (u_longlong_t)drrwbr->drr_refguid, - (u_longlong_t)drrwbr->drr_refobject, - (u_longlong_t)drrwbr->drr_refoffset); - } - break; - - case DRR_FREE: - if (do_byteswap) { - drrf->drr_object = BSWAP_64(drrf->drr_object); - drrf->drr_offset = BSWAP_64(drrf->drr_offset); - drrf->drr_length = BSWAP_64(drrf->drr_length); - } - if (verbose) { - (void) printf("FREE object = %llu " - "offset = %llu length = %lld\n", - (u_longlong_t)drrf->drr_object, - (u_longlong_t)drrf->drr_offset, - (longlong_t)drrf->drr_length); - } - break; - case DRR_SPILL: - if (do_byteswap) { - drrs->drr_object = BSWAP_64(drrs->drr_object); - drrs->drr_length = BSWAP_64(drrs->drr_length); - } - if (verbose) { - (void) printf("SPILL block for object = %llu " - "length = %llu\n", drrs->drr_object, - drrs->drr_length); - } - (void) ssread(buf, drrs->drr_length, &zc); - if (dump) { - print_block(buf, drrs->drr_length); - } - break; - case DRR_WRITE_EMBEDDED: - if (do_byteswap) { - drrwe->drr_object = - BSWAP_64(drrwe->drr_object); - drrwe->drr_offset = - BSWAP_64(drrwe->drr_offset); - drrwe->drr_length = - BSWAP_64(drrwe->drr_length); - drrwe->drr_toguid = - BSWAP_64(drrwe->drr_toguid); - drrwe->drr_lsize = - BSWAP_32(drrwe->drr_lsize); - drrwe->drr_psize = - BSWAP_32(drrwe->drr_psize); - } - if (verbose) { - (void) printf("WRITE_EMBEDDED object = %llu " - "offset = %llu length = %llu\n" - " toguid = %llx comp = %u etype = %u " - "lsize = %u psize = %u\n", - (u_longlong_t)drrwe->drr_object, - (u_longlong_t)drrwe->drr_offset, - (u_longlong_t)drrwe->drr_length, - (u_longlong_t)drrwe->drr_toguid, - drrwe->drr_compression, - drrwe->drr_etype, - drrwe->drr_lsize, - drrwe->drr_psize); - } - (void) ssread(buf, - P2ROUNDUP(drrwe->drr_psize, 8), &zc); - break; - } - if (drr->drr_type != DRR_BEGIN && very_verbose) { - (void) printf(" checksum = %llx/%llx/%llx/%llx\n", - (longlong_t)drrc->drr_checksum.zc_word[0], - (longlong_t)drrc->drr_checksum.zc_word[1], - (longlong_t)drrc->drr_checksum.zc_word[2], - (longlong_t)drrc->drr_checksum.zc_word[3]); - } - pcksum = zc; - } - free(buf); - - /* Print final summary */ - - (void) printf("SUMMARY:\n"); - (void) printf("\tTotal DRR_BEGIN records = %lld\n", - (u_longlong_t)drr_record_count[DRR_BEGIN]); - (void) printf("\tTotal DRR_END records = %lld\n", - (u_longlong_t)drr_record_count[DRR_END]); - (void) printf("\tTotal DRR_OBJECT records = %lld\n", - (u_longlong_t)drr_record_count[DRR_OBJECT]); - (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); - (void) printf("\tTotal DRR_WRITE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE]); - (void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]); - (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]); - (void) printf("\tTotal DRR_FREE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREE]); - (void) printf("\tTotal DRR_SPILL records = %lld\n", - (u_longlong_t)drr_record_count[DRR_SPILL]); - (void) printf("\tTotal records = %lld\n", - (u_longlong_t)total_records); - (void) printf("\tTotal write size = %lld (0x%llx)\n", - (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); - (void) printf("\tTotal stream length = %lld (0x%llx)\n", - (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); - return (0); -} diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c deleted file mode 100644 index 65a4858b95d9..000000000000 --- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c +++ /dev/null @@ -1,7135 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2017 Joyent, Inc. - * Copyright (c) 2017, Intel Corporation. - * Copyright 2017 RackTop Systems. - */ - -/* - * The objective of this program is to provide a DMU/ZAP/SPA stress test - * that runs entirely in userland, is easy to use, and easy to extend. - * - * The overall design of the ztest program is as follows: - * - * (1) For each major functional area (e.g. adding vdevs to a pool, - * creating and destroying datasets, reading and writing objects, etc) - * we have a simple routine to test that functionality. These - * individual routines do not have to do anything "stressful". - * - * (2) We turn these simple functionality tests into a stress test by - * running them all in parallel, with as many threads as desired, - * and spread across as many datasets, objects, and vdevs as desired. - * - * (3) While all this is happening, we inject faults into the pool to - * verify that self-healing data really works. - * - * (4) Every time we open a dataset, we change its checksum and compression - * functions. Thus even individual objects vary from block to block - * in which checksum they use and whether they're compressed. - * - * (5) To verify that we never lose on-disk consistency after a crash, - * we run the entire test in a child of the main process. - * At random times, the child self-immolates with a SIGKILL. - * This is the software equivalent of pulling the power cord. - * The parent then runs the test again, using the existing - * storage pool, as many times as desired. If backwards compatibility - * testing is enabled ztest will sometimes run the "older" version - * of ztest after a SIGKILL. - * - * (6) To verify that we don't have future leaks or temporal incursions, - * many of the functional tests record the transaction group number - * as part of their data. When reading old data, they verify that - * the transaction group number is less than the current, open txg. - * If you add a new test, please do this if applicable. - * - * When run with no arguments, ztest runs for about five minutes and - * produces no output if successful. To get a little bit of information, - * specify -V. To get more information, specify -VV, and so on. - * - * To turn this into an overnight stress test, use -T to specify run time. - * - * You can ask more more vdevs [-v], datasets [-d], or threads [-t] - * to increase the pool capacity, fanout, and overall stress level. - * - * Use the -k option to set the desired frequency of kills. - * - * When ztest invokes itself it passes all relevant information through a - * temporary file which is mmap-ed in the child process. This allows shared - * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always - * stored at offset 0 of this file and contains information on the size and - * number of shared structures in the file. The information stored in this file - * must remain backwards compatible with older versions of ztest so that - * ztest can invoke them during backwards compatibility testing (-B). - */ - -#include <sys/zfs_context.h> -#include <sys/spa.h> -#include <sys/dmu.h> -#include <sys/txg.h> -#include <sys/dbuf.h> -#include <sys/zap.h> -#include <sys/dmu_objset.h> -#include <sys/poll.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/wait.h> -#include <sys/mman.h> -#include <sys/resource.h> -#include <sys/zio.h> -#include <sys/zil.h> -#include <sys/zil_impl.h> -#include <sys/vdev_impl.h> -#include <sys/vdev_file.h> -#include <sys/vdev_initialize.h> -#include <sys/spa_impl.h> -#include <sys/metaslab_impl.h> -#include <sys/dsl_prop.h> -#include <sys/dsl_dataset.h> -#include <sys/dsl_destroy.h> -#include <sys/dsl_scan.h> -#include <sys/zio_checksum.h> -#include <sys/refcount.h> -#include <sys/zfeature.h> -#include <sys/dsl_userhold.h> -#include <sys/abd.h> -#include <stdio.h> -#include <stdio_ext.h> -#include <stdlib.h> -#include <unistd.h> -#include <signal.h> -#include <umem.h> -#include <dlfcn.h> -#include <ctype.h> -#include <math.h> -#include <errno.h> -#include <sys/fs/zfs.h> -#include <libnvpair.h> -#include <libzfs.h> -#include <libcmdutils.h> - -static int ztest_fd_data = -1; -static int ztest_fd_rand = -1; - -typedef struct ztest_shared_hdr { - uint64_t zh_hdr_size; - uint64_t zh_opts_size; - uint64_t zh_size; - uint64_t zh_stats_size; - uint64_t zh_stats_count; - uint64_t zh_ds_size; - uint64_t zh_ds_count; -} ztest_shared_hdr_t; - -static ztest_shared_hdr_t *ztest_shared_hdr; - -enum ztest_class_state { - ZTEST_VDEV_CLASS_OFF, - ZTEST_VDEV_CLASS_ON, - ZTEST_VDEV_CLASS_RND -}; - -typedef struct ztest_shared_opts { - char zo_pool[ZFS_MAX_DATASET_NAME_LEN]; - char zo_dir[ZFS_MAX_DATASET_NAME_LEN]; - char zo_alt_ztest[MAXNAMELEN]; - char zo_alt_libpath[MAXNAMELEN]; - uint64_t zo_vdevs; - uint64_t zo_vdevtime; - size_t zo_vdev_size; - int zo_ashift; - int zo_mirrors; - int zo_raidz; - int zo_raidz_parity; - int zo_datasets; - int zo_threads; - uint64_t zo_passtime; - uint64_t zo_killrate; - int zo_verbose; - int zo_init; - uint64_t zo_time; - uint64_t zo_maxloops; - uint64_t zo_metaslab_force_ganging; - int zo_mmp_test; - int zo_special_vdevs; -} ztest_shared_opts_t; - -static const ztest_shared_opts_t ztest_opts_defaults = { - .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, - .zo_dir = { '/', 't', 'm', 'p', '\0' }, - .zo_alt_ztest = { '\0' }, - .zo_alt_libpath = { '\0' }, - .zo_vdevs = 5, - .zo_ashift = SPA_MINBLOCKSHIFT, - .zo_mirrors = 2, - .zo_raidz = 4, - .zo_raidz_parity = 1, - .zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */ - .zo_datasets = 7, - .zo_threads = 23, - .zo_passtime = 60, /* 60 seconds */ - .zo_killrate = 70, /* 70% kill rate */ - .zo_verbose = 0, - .zo_mmp_test = 0, - .zo_init = 1, - .zo_time = 300, /* 5 minutes */ - .zo_maxloops = 50, /* max loops during spa_freeze() */ - .zo_metaslab_force_ganging = 32 << 10, - .zo_special_vdevs = ZTEST_VDEV_CLASS_RND, -}; - -extern uint64_t metaslab_force_ganging; -extern uint64_t metaslab_df_alloc_threshold; -extern uint64_t zfs_deadman_synctime_ms; -extern int metaslab_preload_limit; -extern boolean_t zfs_compressed_arc_enabled; -extern boolean_t zfs_abd_scatter_enabled; -extern int dmu_object_alloc_chunk_shift; -extern boolean_t zfs_force_some_double_word_sm_entries; -extern unsigned long zfs_reconstruct_indirect_damage_fraction; - -static ztest_shared_opts_t *ztest_shared_opts; -static ztest_shared_opts_t ztest_opts; - -typedef struct ztest_shared_ds { - uint64_t zd_seq; -} ztest_shared_ds_t; - -static ztest_shared_ds_t *ztest_shared_ds; -#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) - -#define BT_MAGIC 0x123456789abcdefULL -#define MAXFAULTS() \ - (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) - -enum ztest_io_type { - ZTEST_IO_WRITE_TAG, - ZTEST_IO_WRITE_PATTERN, - ZTEST_IO_WRITE_ZEROES, - ZTEST_IO_TRUNCATE, - ZTEST_IO_SETATTR, - ZTEST_IO_REWRITE, - ZTEST_IO_TYPES -}; - -typedef struct ztest_block_tag { - uint64_t bt_magic; - uint64_t bt_objset; - uint64_t bt_object; - uint64_t bt_dnodesize; - uint64_t bt_offset; - uint64_t bt_gen; - uint64_t bt_txg; - uint64_t bt_crtxg; -} ztest_block_tag_t; - -typedef struct bufwad { - uint64_t bw_index; - uint64_t bw_txg; - uint64_t bw_data; -} bufwad_t; - -/* - * It would be better to use a rangelock_t per object. Unfortunately - * the rangelock_t is not a drop-in replacement for rl_t, because we - * still need to map from object ID to rangelock_t. - */ -typedef enum { - RL_READER, - RL_WRITER, - RL_APPEND -} rl_type_t; - -typedef struct rll { - void *rll_writer; - int rll_readers; - kmutex_t rll_lock; - kcondvar_t rll_cv; -} rll_t; - -typedef struct rl { - uint64_t rl_object; - uint64_t rl_offset; - uint64_t rl_size; - rll_t *rl_lock; -} rl_t; - -#define ZTEST_RANGE_LOCKS 64 -#define ZTEST_OBJECT_LOCKS 64 - -/* - * Object descriptor. Used as a template for object lookup/create/remove. - */ -typedef struct ztest_od { - uint64_t od_dir; - uint64_t od_object; - dmu_object_type_t od_type; - dmu_object_type_t od_crtype; - uint64_t od_blocksize; - uint64_t od_crblocksize; - uint64_t od_crdnodesize; - uint64_t od_gen; - uint64_t od_crgen; - char od_name[ZFS_MAX_DATASET_NAME_LEN]; -} ztest_od_t; - -/* - * Per-dataset state. - */ -typedef struct ztest_ds { - ztest_shared_ds_t *zd_shared; - objset_t *zd_os; - krwlock_t zd_zilog_lock; - zilog_t *zd_zilog; - ztest_od_t *zd_od; /* debugging aid */ - char zd_name[ZFS_MAX_DATASET_NAME_LEN]; - kmutex_t zd_dirobj_lock; - rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; - rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; -} ztest_ds_t; - -/* - * Per-iteration state. - */ -typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); - -typedef struct ztest_info { - ztest_func_t *zi_func; /* test function */ - uint64_t zi_iters; /* iterations per execution */ - uint64_t *zi_interval; /* execute every <interval> seconds */ -} ztest_info_t; - -typedef struct ztest_shared_callstate { - uint64_t zc_count; /* per-pass count */ - uint64_t zc_time; /* per-pass time */ - uint64_t zc_next; /* next time to call this function */ -} ztest_shared_callstate_t; - -static ztest_shared_callstate_t *ztest_shared_callstate; -#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) - -/* - * Note: these aren't static because we want dladdr() to work. - */ -ztest_func_t ztest_dmu_read_write; -ztest_func_t ztest_dmu_write_parallel; -ztest_func_t ztest_dmu_object_alloc_free; -ztest_func_t ztest_dmu_object_next_chunk; -ztest_func_t ztest_dmu_commit_callbacks; -ztest_func_t ztest_zap; -ztest_func_t ztest_zap_parallel; -ztest_func_t ztest_zil_commit; -ztest_func_t ztest_zil_remount; -ztest_func_t ztest_dmu_read_write_zcopy; -ztest_func_t ztest_dmu_objset_create_destroy; -ztest_func_t ztest_dmu_prealloc; -ztest_func_t ztest_fzap; -ztest_func_t ztest_dmu_snapshot_create_destroy; -ztest_func_t ztest_dsl_prop_get_set; -ztest_func_t ztest_spa_prop_get_set; -ztest_func_t ztest_spa_create_destroy; -ztest_func_t ztest_fault_inject; -ztest_func_t ztest_ddt_repair; -ztest_func_t ztest_dmu_snapshot_hold; -ztest_func_t ztest_mmp_enable_disable; -ztest_func_t ztest_scrub; -ztest_func_t ztest_dsl_dataset_promote_busy; -ztest_func_t ztest_vdev_attach_detach; -ztest_func_t ztest_vdev_LUN_growth; -ztest_func_t ztest_vdev_add_remove; -ztest_func_t ztest_vdev_class_add; -ztest_func_t ztest_vdev_aux_add_remove; -ztest_func_t ztest_split_pool; -ztest_func_t ztest_reguid; -ztest_func_t ztest_spa_upgrade; -ztest_func_t ztest_device_removal; -ztest_func_t ztest_remap_blocks; -ztest_func_t ztest_spa_checkpoint_create_discard; -ztest_func_t ztest_initialize; -ztest_func_t ztest_verify_dnode_bt; - -uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ -uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ -uint64_t zopt_often = 1ULL * NANOSEC; /* every second */ -uint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ -uint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ - -ztest_info_t ztest_info[] = { - { ztest_dmu_read_write, 1, &zopt_always }, - { ztest_dmu_write_parallel, 10, &zopt_always }, - { ztest_dmu_object_alloc_free, 1, &zopt_always }, - { ztest_dmu_object_next_chunk, 1, &zopt_sometimes }, - { ztest_dmu_commit_callbacks, 1, &zopt_always }, - { ztest_zap, 30, &zopt_always }, - { ztest_zap_parallel, 100, &zopt_always }, - { ztest_split_pool, 1, &zopt_always }, - { ztest_zil_commit, 1, &zopt_incessant }, - { ztest_zil_remount, 1, &zopt_sometimes }, - { ztest_dmu_read_write_zcopy, 1, &zopt_often }, - { ztest_dmu_objset_create_destroy, 1, &zopt_often }, - { ztest_dsl_prop_get_set, 1, &zopt_often }, - { ztest_spa_prop_get_set, 1, &zopt_sometimes }, -#if 0 - { ztest_dmu_prealloc, 1, &zopt_sometimes }, -#endif - { ztest_fzap, 1, &zopt_sometimes }, - { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, - { ztest_spa_create_destroy, 1, &zopt_sometimes }, - { ztest_fault_inject, 1, &zopt_incessant }, - { ztest_ddt_repair, 1, &zopt_sometimes }, - { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, - { ztest_mmp_enable_disable, 1, &zopt_sometimes }, - { ztest_reguid, 1, &zopt_rarely }, - { ztest_scrub, 1, &zopt_often }, - { ztest_spa_upgrade, 1, &zopt_rarely }, - { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, - { ztest_vdev_attach_detach, 1, &zopt_incessant }, - { ztest_vdev_LUN_growth, 1, &zopt_rarely }, - { ztest_vdev_add_remove, 1, - &ztest_opts.zo_vdevtime }, - { ztest_vdev_class_add, 1, - &ztest_opts.zo_vdevtime }, - { ztest_vdev_aux_add_remove, 1, - &ztest_opts.zo_vdevtime }, - { ztest_device_removal, 1, &zopt_sometimes }, - { ztest_remap_blocks, 1, &zopt_sometimes }, - { ztest_spa_checkpoint_create_discard, 1, &zopt_rarely }, - { ztest_initialize, 1, &zopt_sometimes }, - { ztest_verify_dnode_bt, 1, &zopt_sometimes } -}; - -#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) - -/* - * The following struct is used to hold a list of uncalled commit callbacks. - * The callbacks are ordered by txg number. - */ -typedef struct ztest_cb_list { - kmutex_t zcl_callbacks_lock; - list_t zcl_callbacks; -} ztest_cb_list_t; - -/* - * Stuff we need to share writably between parent and child. - */ -typedef struct ztest_shared { - boolean_t zs_do_init; - hrtime_t zs_proc_start; - hrtime_t zs_proc_stop; - hrtime_t zs_thread_start; - hrtime_t zs_thread_stop; - hrtime_t zs_thread_kill; - uint64_t zs_enospc_count; - uint64_t zs_vdev_next_leaf; - uint64_t zs_vdev_aux; - uint64_t zs_alloc; - uint64_t zs_space; - uint64_t zs_splits; - uint64_t zs_mirrors; - uint64_t zs_metaslab_sz; - uint64_t zs_metaslab_df_alloc_threshold; - uint64_t zs_guid; -} ztest_shared_t; - -#define ID_PARALLEL -1ULL - -static char ztest_dev_template[] = "%s/%s.%llua"; -static char ztest_aux_template[] = "%s/%s.%s.%llu"; -ztest_shared_t *ztest_shared; - -static spa_t *ztest_spa = NULL; -static ztest_ds_t *ztest_ds; - -static kmutex_t ztest_vdev_lock; -static boolean_t ztest_device_removal_active = B_FALSE; -static kmutex_t ztest_checkpoint_lock; - -/* - * The ztest_name_lock protects the pool and dataset namespace used by - * the individual tests. To modify the namespace, consumers must grab - * this lock as writer. Grabbing the lock as reader will ensure that the - * namespace does not change while the lock is held. - */ -static krwlock_t ztest_name_lock; - -static boolean_t ztest_dump_core = B_TRUE; -static boolean_t ztest_exiting; - -/* Global commit callback list */ -static ztest_cb_list_t zcl; - -enum ztest_object { - ZTEST_META_DNODE = 0, - ZTEST_DIROBJ, - ZTEST_OBJECTS -}; - -static void usage(boolean_t) __NORETURN; - -/* - * These libumem hooks provide a reasonable set of defaults for the allocator's - * debugging facilities. - */ -const char * -_umem_debug_init() -{ - return ("default,verbose"); /* $UMEM_DEBUG setting */ -} - -const char * -_umem_logging_init(void) -{ - return ("fail,contents"); /* $UMEM_LOGGING setting */ -} - -#define FATAL_MSG_SZ 1024 - -char *fatal_msg; - -static void -fatal(int do_perror, char *message, ...) -{ - va_list args; - int save_errno = errno; - char buf[FATAL_MSG_SZ]; - - (void) fflush(stdout); - - va_start(args, message); - (void) sprintf(buf, "ztest: "); - /* LINTED */ - (void) vsprintf(buf + strlen(buf), message, args); - va_end(args); - if (do_perror) { - (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), - ": %s", strerror(save_errno)); - } - (void) fprintf(stderr, "%s\n", buf); - fatal_msg = buf; /* to ease debugging */ - if (ztest_dump_core) - abort(); - exit(3); -} - -static int -str2shift(const char *buf) -{ - const char *ends = "BKMGTPEZ"; - int i; - - if (buf[0] == '\0') - return (0); - for (i = 0; i < strlen(ends); i++) { - if (toupper(buf[0]) == ends[i]) - break; - } - if (i == strlen(ends)) { - (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", - buf); - usage(B_FALSE); - } - if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { - return (10*i); - } - (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); - usage(B_FALSE); - /* NOTREACHED */ -} - -static uint64_t -nicenumtoull(const char *buf) -{ - char *end; - uint64_t val; - - val = strtoull(buf, &end, 0); - if (end == buf) { - (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); - usage(B_FALSE); - } else if (end[0] == '.') { - double fval = strtod(buf, &end); - fval *= pow(2, str2shift(end)); - if (fval > UINT64_MAX) { - (void) fprintf(stderr, "ztest: value too large: %s\n", - buf); - usage(B_FALSE); - } - val = (uint64_t)fval; - } else { - int shift = str2shift(end); - if (shift >= 64 || (val << shift) >> shift != val) { - (void) fprintf(stderr, "ztest: value too large: %s\n", - buf); - usage(B_FALSE); - } - val <<= shift; - } - return (val); -} - -static void -usage(boolean_t requested) -{ - const ztest_shared_opts_t *zo = &ztest_opts_defaults; - - char nice_vdev_size[NN_NUMBUF_SZ]; - char nice_force_ganging[NN_NUMBUF_SZ]; - FILE *fp = requested ? stdout : stderr; - - nicenum(zo->zo_vdev_size, nice_vdev_size, sizeof (nice_vdev_size)); - nicenum(zo->zo_metaslab_force_ganging, nice_force_ganging, - sizeof (nice_force_ganging)); - - (void) fprintf(fp, "Usage: %s\n" - "\t[-v vdevs (default: %llu)]\n" - "\t[-s size_of_each_vdev (default: %s)]\n" - "\t[-a alignment_shift (default: %d)] use 0 for random\n" - "\t[-m mirror_copies (default: %d)]\n" - "\t[-r raidz_disks (default: %d)]\n" - "\t[-R raidz_parity (default: %d)]\n" - "\t[-d datasets (default: %d)]\n" - "\t[-t threads (default: %d)]\n" - "\t[-g gang_block_threshold (default: %s)]\n" - "\t[-i init_count (default: %d)] initialize pool i times\n" - "\t[-k kill_percentage (default: %llu%%)]\n" - "\t[-p pool_name (default: %s)]\n" - "\t[-f dir (default: %s)] file directory for vdev files\n" - "\t[-M] Multi-host simulate pool imported on remote host\n" - "\t[-V] verbose (use multiple times for ever more blather)\n" - "\t[-E] use existing pool instead of creating new one\n" - "\t[-T time (default: %llu sec)] total run time\n" - "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" - "\t[-P passtime (default: %llu sec)] time per pass\n" - "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" - "\t[-C vdev class state (default: random)] special=on|off|random\n" - "\t[-o variable=value] ... set global variable to an unsigned\n" - "\t 32-bit integer value\n" - "\t[-h] (print help)\n" - "", - zo->zo_pool, - (u_longlong_t)zo->zo_vdevs, /* -v */ - nice_vdev_size, /* -s */ - zo->zo_ashift, /* -a */ - zo->zo_mirrors, /* -m */ - zo->zo_raidz, /* -r */ - zo->zo_raidz_parity, /* -R */ - zo->zo_datasets, /* -d */ - zo->zo_threads, /* -t */ - nice_force_ganging, /* -g */ - zo->zo_init, /* -i */ - (u_longlong_t)zo->zo_killrate, /* -k */ - zo->zo_pool, /* -p */ - zo->zo_dir, /* -f */ - (u_longlong_t)zo->zo_time, /* -T */ - (u_longlong_t)zo->zo_maxloops, /* -F */ - (u_longlong_t)zo->zo_passtime); - exit(requested ? 0 : 1); -} - - -static void -ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo) -{ - char name[32]; - char *value; - int state = ZTEST_VDEV_CLASS_RND; - - (void) strlcpy(name, input, sizeof (name)); - - value = strchr(name, '='); - if (value == NULL) { - (void) fprintf(stderr, "missing value in property=value " - "'-C' argument (%s)\n", input); - usage(B_FALSE); - } - *(value) = '\0'; - value++; - - if (strcmp(value, "on") == 0) { - state = ZTEST_VDEV_CLASS_ON; - } else if (strcmp(value, "off") == 0) { - state = ZTEST_VDEV_CLASS_OFF; - } else if (strcmp(value, "random") == 0) { - state = ZTEST_VDEV_CLASS_RND; - } else { - (void) fprintf(stderr, "invalid property value '%s'\n", value); - usage(B_FALSE); - } - - if (strcmp(name, "special") == 0) { - zo->zo_special_vdevs = state; - } else { - (void) fprintf(stderr, "invalid property name '%s'\n", name); - usage(B_FALSE); - } - if (zo->zo_verbose >= 3) - (void) printf("%s vdev state is '%s'\n", name, value); -} - -static void -process_options(int argc, char **argv) -{ - char *path; - ztest_shared_opts_t *zo = &ztest_opts; - - int opt; - uint64_t value; - char altdir[MAXNAMELEN] = { 0 }; - - bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); - - while ((opt = getopt(argc, argv, - "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:")) != EOF) { - value = 0; - switch (opt) { - case 'v': - case 's': - case 'a': - case 'm': - case 'r': - case 'R': - case 'd': - case 't': - case 'g': - case 'i': - case 'k': - case 'T': - case 'P': - case 'F': - value = nicenumtoull(optarg); - } - switch (opt) { - case 'v': - zo->zo_vdevs = value; - break; - case 's': - zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); - break; - case 'a': - zo->zo_ashift = value; - break; - case 'm': - zo->zo_mirrors = value; - break; - case 'r': - zo->zo_raidz = MAX(1, value); - break; - case 'R': - zo->zo_raidz_parity = MIN(MAX(value, 1), 3); - break; - case 'd': - zo->zo_datasets = MAX(1, value); - break; - case 't': - zo->zo_threads = MAX(1, value); - break; - case 'g': - zo->zo_metaslab_force_ganging = - MAX(SPA_MINBLOCKSIZE << 1, value); - break; - case 'i': - zo->zo_init = value; - break; - case 'k': - zo->zo_killrate = value; - break; - case 'p': - (void) strlcpy(zo->zo_pool, optarg, - sizeof (zo->zo_pool)); - break; - case 'f': - path = realpath(optarg, NULL); - if (path == NULL) { - (void) fprintf(stderr, "error: %s: %s\n", - optarg, strerror(errno)); - usage(B_FALSE); - } else { - (void) strlcpy(zo->zo_dir, path, - sizeof (zo->zo_dir)); - } - break; - case 'M': - zo->zo_mmp_test = 1; - break; - case 'V': - zo->zo_verbose++; - break; - case 'E': - zo->zo_init = 0; - break; - case 'T': - zo->zo_time = value; - break; - case 'P': - zo->zo_passtime = MAX(1, value); - break; - case 'F': - zo->zo_maxloops = MAX(1, value); - break; - case 'B': - (void) strlcpy(altdir, optarg, sizeof (altdir)); - break; - case 'C': - ztest_parse_name_value(optarg, zo); - break; - case 'o': - if (set_global_var(optarg) != 0) - usage(B_FALSE); - break; - case 'h': - usage(B_TRUE); - break; - case '?': - default: - usage(B_FALSE); - break; - } - } - - zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); - - zo->zo_vdevtime = - (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : - UINT64_MAX >> 2); - - if (strlen(altdir) > 0) { - char *cmd; - char *realaltdir; - char *bin; - char *ztest; - char *isa; - int isalen; - - cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); - realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); - - VERIFY(NULL != realpath(getexecname(), cmd)); - if (0 != access(altdir, F_OK)) { - ztest_dump_core = B_FALSE; - fatal(B_TRUE, "invalid alternate ztest path: %s", - altdir); - } - VERIFY(NULL != realpath(altdir, realaltdir)); - - /* - * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". - * We want to extract <isa> to determine if we should use - * 32 or 64 bit binaries. - */ - bin = strstr(cmd, "/usr/bin/"); - ztest = strstr(bin, "/ztest"); - isa = bin + 9; - isalen = ztest - isa; - (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), - "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); - (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), - "%s/usr/lib/%.*s", realaltdir, isalen, isa); - - if (0 != access(zo->zo_alt_ztest, X_OK)) { - ztest_dump_core = B_FALSE; - fatal(B_TRUE, "invalid alternate ztest: %s", - zo->zo_alt_ztest); - } else if (0 != access(zo->zo_alt_libpath, X_OK)) { - ztest_dump_core = B_FALSE; - fatal(B_TRUE, "invalid alternate lib directory %s", - zo->zo_alt_libpath); - } - - umem_free(cmd, MAXPATHLEN); - umem_free(realaltdir, MAXPATHLEN); - } -} - -static void -ztest_kill(ztest_shared_t *zs) -{ - zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); - zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); - - /* - * Before we kill off ztest, make sure that the config is updated. - * See comment above spa_write_cachefile(). - */ - mutex_enter(&spa_namespace_lock); - spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); - mutex_exit(&spa_namespace_lock); - - zfs_dbgmsg_print(FTAG); - (void) kill(getpid(), SIGKILL); -} - -static uint64_t -ztest_random(uint64_t range) -{ - uint64_t r; - - ASSERT3S(ztest_fd_rand, >=, 0); - - if (range == 0) - return (0); - - if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) - fatal(1, "short read from /dev/urandom"); - - return (r % range); -} - -/* ARGSUSED */ -static void -ztest_record_enospc(const char *s) -{ - ztest_shared->zs_enospc_count++; -} - -static uint64_t -ztest_get_ashift(void) -{ - if (ztest_opts.zo_ashift == 0) - return (SPA_MINBLOCKSHIFT + ztest_random(5)); - return (ztest_opts.zo_ashift); -} - -static nvlist_t * -make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) -{ - char pathbuf[MAXPATHLEN]; - uint64_t vdev; - nvlist_t *file; - - if (ashift == 0) - ashift = ztest_get_ashift(); - - if (path == NULL) { - path = pathbuf; - - if (aux != NULL) { - vdev = ztest_shared->zs_vdev_aux; - (void) snprintf(path, sizeof (pathbuf), - ztest_aux_template, ztest_opts.zo_dir, - pool == NULL ? ztest_opts.zo_pool : pool, - aux, vdev); - } else { - vdev = ztest_shared->zs_vdev_next_leaf++; - (void) snprintf(path, sizeof (pathbuf), - ztest_dev_template, ztest_opts.zo_dir, - pool == NULL ? ztest_opts.zo_pool : pool, vdev); - } - } - - if (size != 0) { - int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); - if (fd == -1) - fatal(1, "can't open %s", path); - if (ftruncate(fd, size) != 0) - fatal(1, "can't ftruncate %s", path); - (void) close(fd); - } - - VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); - VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); - VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); - - return (file); -} - -static nvlist_t * -make_vdev_raidz(char *path, char *aux, char *pool, size_t size, - uint64_t ashift, int r) -{ - nvlist_t *raidz, **child; - int c; - - if (r < 2) - return (make_vdev_file(path, aux, pool, size, ashift)); - child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); - - for (c = 0; c < r; c++) - child[c] = make_vdev_file(path, aux, pool, size, ashift); - - VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_RAIDZ) == 0); - VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, - ztest_opts.zo_raidz_parity) == 0); - VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, - child, r) == 0); - - for (c = 0; c < r; c++) - nvlist_free(child[c]); - - umem_free(child, r * sizeof (nvlist_t *)); - - return (raidz); -} - -static nvlist_t * -make_vdev_mirror(char *path, char *aux, char *pool, size_t size, - uint64_t ashift, int r, int m) -{ - nvlist_t *mirror, **child; - int c; - - if (m < 1) - return (make_vdev_raidz(path, aux, pool, size, ashift, r)); - - child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); - - for (c = 0; c < m; c++) - child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); - - VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_MIRROR) == 0); - VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, - child, m) == 0); - - for (c = 0; c < m; c++) - nvlist_free(child[c]); - - umem_free(child, m * sizeof (nvlist_t *)); - - return (mirror); -} - -static nvlist_t * -make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, - const char *class, int r, int m, int t) -{ - nvlist_t *root, **child; - int c; - boolean_t log; - - ASSERT(t > 0); - - log = (class != NULL && strcmp(class, "log") == 0); - - child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); - - for (c = 0; c < t; c++) { - child[c] = make_vdev_mirror(path, aux, pool, size, ashift, - r, m); - VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - log) == 0); - - if (class != NULL && class[0] != '\0') { - ASSERT(m > 1 || log); /* expecting a mirror */ - VERIFY(nvlist_add_string(child[c], - ZPOOL_CONFIG_ALLOCATION_BIAS, class) == 0); - } - } - - VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); - VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, - child, t) == 0); - - for (c = 0; c < t; c++) - nvlist_free(child[c]); - - umem_free(child, t * sizeof (nvlist_t *)); - - return (root); -} - -/* - * Find a random spa version. Returns back a random spa version in the - * range [initial_version, SPA_VERSION_FEATURES]. - */ -static uint64_t -ztest_random_spa_version(uint64_t initial_version) -{ - uint64_t version = initial_version; - - if (version <= SPA_VERSION_BEFORE_FEATURES) { - version = version + - ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); - } - - if (version > SPA_VERSION_BEFORE_FEATURES) - version = SPA_VERSION_FEATURES; - - ASSERT(SPA_VERSION_IS_SUPPORTED(version)); - return (version); -} - -static int -ztest_random_blocksize(void) -{ - uint64_t block_shift; - - ASSERT(ztest_spa->spa_max_ashift != 0); - - /* - * Choose a block size >= the ashift. - * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks. - */ - int maxbs = SPA_OLD_MAXBLOCKSHIFT; - if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE) - maxbs = 20; - block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1); - return (1 << (SPA_MINBLOCKSHIFT + block_shift)); -} - -static int -ztest_random_dnodesize(void) -{ - int slots; - int max_slots = spa_maxdnodesize(ztest_spa) >> DNODE_SHIFT; - - if (max_slots == DNODE_MIN_SLOTS) - return (DNODE_MIN_SIZE); - - /* - * Weight the random distribution more heavily toward smaller - * dnode sizes since that is more likely to reflect real-world - * usage. - */ - ASSERT3U(max_slots, >, 4); - switch (ztest_random(10)) { - case 0: - slots = 5 + ztest_random(max_slots - 4); - break; - case 1 ... 4: - slots = 2 + ztest_random(3); - break; - default: - slots = 1; - break; - } - - return (slots << DNODE_SHIFT); -} - -static int -ztest_random_ibshift(void) -{ - return (DN_MIN_INDBLKSHIFT + - ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); -} - -static uint64_t -ztest_random_vdev_top(spa_t *spa, boolean_t log_ok) -{ - uint64_t top; - vdev_t *rvd = spa->spa_root_vdev; - vdev_t *tvd; - - ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); - - do { - top = ztest_random(rvd->vdev_children); - tvd = rvd->vdev_child[top]; - } while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) || - tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); - - return (top); -} - -static uint64_t -ztest_random_dsl_prop(zfs_prop_t prop) -{ - uint64_t value; - - do { - value = zfs_prop_random_value(prop, ztest_random(-1ULL)); - } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); - - return (value); -} - -static int -ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, - boolean_t inherit) -{ - const char *propname = zfs_prop_to_name(prop); - const char *valname; - char setpoint[MAXPATHLEN]; - uint64_t curval; - int error; - - error = dsl_prop_set_int(osname, propname, - (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); - - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - return (error); - } - ASSERT0(error); - - VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); - - if (ztest_opts.zo_verbose >= 6) { - VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); - (void) printf("%s %s = %s at '%s'\n", - osname, propname, valname, setpoint); - } - - return (error); -} - -static int -ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) -{ - spa_t *spa = ztest_spa; - nvlist_t *props = NULL; - int error; - - VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); - - error = spa_prop_set(spa, props); - - nvlist_free(props); - - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - return (error); - } - ASSERT0(error); - - return (error); -} - -static void -ztest_rll_init(rll_t *rll) -{ - rll->rll_writer = NULL; - rll->rll_readers = 0; - mutex_init(&rll->rll_lock, NULL, USYNC_THREAD, NULL); - cv_init(&rll->rll_cv, NULL, USYNC_THREAD, NULL); -} - -static void -ztest_rll_destroy(rll_t *rll) -{ - ASSERT(rll->rll_writer == NULL); - ASSERT(rll->rll_readers == 0); - mutex_destroy(&rll->rll_lock); - cv_destroy(&rll->rll_cv); -} - -static void -ztest_rll_lock(rll_t *rll, rl_type_t type) -{ - mutex_enter(&rll->rll_lock); - - if (type == RL_READER) { - while (rll->rll_writer != NULL) - cv_wait(&rll->rll_cv, &rll->rll_lock); - rll->rll_readers++; - } else { - while (rll->rll_writer != NULL || rll->rll_readers) - cv_wait(&rll->rll_cv, &rll->rll_lock); - rll->rll_writer = curthread; - } - - mutex_exit(&rll->rll_lock); -} - -static void -ztest_rll_unlock(rll_t *rll) -{ - mutex_enter(&rll->rll_lock); - - if (rll->rll_writer) { - ASSERT(rll->rll_readers == 0); - rll->rll_writer = NULL; - } else { - ASSERT(rll->rll_readers != 0); - ASSERT(rll->rll_writer == NULL); - rll->rll_readers--; - } - - if (rll->rll_writer == NULL && rll->rll_readers == 0) - cv_broadcast(&rll->rll_cv); - - mutex_exit(&rll->rll_lock); -} - -static void -ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) -{ - rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; - - ztest_rll_lock(rll, type); -} - -static void -ztest_object_unlock(ztest_ds_t *zd, uint64_t object) -{ - rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; - - ztest_rll_unlock(rll); -} - -static rl_t * -ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, - uint64_t size, rl_type_t type) -{ - uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); - rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; - rl_t *rl; - - rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); - rl->rl_object = object; - rl->rl_offset = offset; - rl->rl_size = size; - rl->rl_lock = rll; - - ztest_rll_lock(rll, type); - - return (rl); -} - -static void -ztest_range_unlock(rl_t *rl) -{ - rll_t *rll = rl->rl_lock; - - ztest_rll_unlock(rll); - - umem_free(rl, sizeof (*rl)); -} - -static void -ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) -{ - zd->zd_os = os; - zd->zd_zilog = dmu_objset_zil(os); - zd->zd_shared = szd; - dmu_objset_name(os, zd->zd_name); - - if (zd->zd_shared != NULL) - zd->zd_shared->zd_seq = 0; - - rw_init(&zd->zd_zilog_lock, NULL, USYNC_THREAD, NULL); - mutex_init(&zd->zd_dirobj_lock, NULL, USYNC_THREAD, NULL); - - for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) - ztest_rll_init(&zd->zd_object_lock[l]); - - for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) - ztest_rll_init(&zd->zd_range_lock[l]); -} - -static void -ztest_zd_fini(ztest_ds_t *zd) -{ - mutex_destroy(&zd->zd_dirobj_lock); - - for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) - ztest_rll_destroy(&zd->zd_object_lock[l]); - - for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) - ztest_rll_destroy(&zd->zd_range_lock[l]); -} - -#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) - -static uint64_t -ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) -{ - uint64_t txg; - int error; - - /* - * Attempt to assign tx to some transaction group. - */ - error = dmu_tx_assign(tx, txg_how); - if (error) { - if (error == ERESTART) { - ASSERT(txg_how == TXG_NOWAIT); - dmu_tx_wait(tx); - } else { - ASSERT3U(error, ==, ENOSPC); - ztest_record_enospc(tag); - } - dmu_tx_abort(tx); - return (0); - } - txg = dmu_tx_get_txg(tx); - ASSERT(txg != 0); - return (txg); -} - -static void -ztest_pattern_set(void *buf, uint64_t size, uint64_t value) -{ - uint64_t *ip = buf; - uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); - - while (ip < ip_end) - *ip++ = value; -} - -static boolean_t -ztest_pattern_match(void *buf, uint64_t size, uint64_t value) -{ - uint64_t *ip = buf; - uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); - uint64_t diff = 0; - - while (ip < ip_end) - diff |= (value - *ip++); - - return (diff == 0); -} - -static void -ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, - uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg, - uint64_t crtxg) -{ - bt->bt_magic = BT_MAGIC; - bt->bt_objset = dmu_objset_id(os); - bt->bt_object = object; - bt->bt_dnodesize = dnodesize; - bt->bt_offset = offset; - bt->bt_gen = gen; - bt->bt_txg = txg; - bt->bt_crtxg = crtxg; -} - -static void -ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, - uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg, - uint64_t crtxg) -{ - ASSERT3U(bt->bt_magic, ==, BT_MAGIC); - ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); - ASSERT3U(bt->bt_object, ==, object); - ASSERT3U(bt->bt_dnodesize, ==, dnodesize); - ASSERT3U(bt->bt_offset, ==, offset); - ASSERT3U(bt->bt_gen, <=, gen); - ASSERT3U(bt->bt_txg, <=, txg); - ASSERT3U(bt->bt_crtxg, ==, crtxg); -} - -static ztest_block_tag_t * -ztest_bt_bonus(dmu_buf_t *db) -{ - dmu_object_info_t doi; - ztest_block_tag_t *bt; - - dmu_object_info_from_db(db, &doi); - ASSERT3U(doi.doi_bonus_size, <=, db->db_size); - ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); - bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); - - return (bt); -} - -/* - * Generate a token to fill up unused bonus buffer space. Try to make - * it unique to the object, generation, and offset to verify that data - * is not getting overwritten by data from other dnodes. - */ -#define ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \ - (((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset)) - -/* - * Fill up the unused bonus buffer region before the block tag with a - * verifiable pattern. Filling the whole bonus area with non-zero data - * helps ensure that all dnode traversal code properly skips the - * interior regions of large dnodes. - */ -void -ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj, - objset_t *os, uint64_t gen) -{ - uint64_t *bonusp; - - ASSERT(IS_P2ALIGNED((char *)end - (char *)db->db_data, 8)); - - for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) { - uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os), - gen, bonusp - (uint64_t *)db->db_data); - *bonusp = token; - } -} - -/* - * Verify that the unused area of a bonus buffer is filled with the - * expected tokens. - */ -void -ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj, - objset_t *os, uint64_t gen) -{ - uint64_t *bonusp; - - for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) { - uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os), - gen, bonusp - (uint64_t *)db->db_data); - VERIFY3U(*bonusp, ==, token); - } -} - -/* - * ZIL logging ops - */ - -#define lrz_type lr_mode -#define lrz_blocksize lr_uid -#define lrz_ibshift lr_gid -#define lrz_bonustype lr_rdev -#define lrz_dnodesize lr_crtime[1] - -static void -ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) -{ - char *name = (void *)(lr + 1); /* name follows lr */ - size_t namesize = strlen(name) + 1; - itx_t *itx; - - if (zil_replaying(zd->zd_zilog, tx)) - return; - - itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); - bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, - sizeof (*lr) + namesize - sizeof (lr_t)); - - zil_itx_assign(zd->zd_zilog, itx, tx); -} - -static void -ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) -{ - char *name = (void *)(lr + 1); /* name follows lr */ - size_t namesize = strlen(name) + 1; - itx_t *itx; - - if (zil_replaying(zd->zd_zilog, tx)) - return; - - itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); - bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, - sizeof (*lr) + namesize - sizeof (lr_t)); - - itx->itx_oid = object; - zil_itx_assign(zd->zd_zilog, itx, tx); -} - -static void -ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) -{ - itx_t *itx; - itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); - - if (zil_replaying(zd->zd_zilog, tx)) - return; - - if (lr->lr_length > zil_max_log_data(zd->zd_zilog)) - write_state = WR_INDIRECT; - - itx = zil_itx_create(TX_WRITE, - sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); - - if (write_state == WR_COPIED && - dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, - ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { - zil_itx_destroy(itx); - itx = zil_itx_create(TX_WRITE, sizeof (*lr)); - write_state = WR_NEED_COPY; - } - itx->itx_private = zd; - itx->itx_wr_state = write_state; - itx->itx_sync = (ztest_random(8) == 0); - - bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, - sizeof (*lr) - sizeof (lr_t)); - - zil_itx_assign(zd->zd_zilog, itx, tx); -} - -static void -ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) -{ - itx_t *itx; - - if (zil_replaying(zd->zd_zilog, tx)) - return; - - itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); - bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, - sizeof (*lr) - sizeof (lr_t)); - - itx->itx_sync = B_FALSE; - zil_itx_assign(zd->zd_zilog, itx, tx); -} - -static void -ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) -{ - itx_t *itx; - - if (zil_replaying(zd->zd_zilog, tx)) - return; - - itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); - bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, - sizeof (*lr) - sizeof (lr_t)); - - itx->itx_sync = B_FALSE; - zil_itx_assign(zd->zd_zilog, itx, tx); -} - -/* - * ZIL replay ops - */ -static int -ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap) -{ - ztest_ds_t *zd = arg1; - lr_create_t *lr = arg2; - char *name = (void *)(lr + 1); /* name follows lr */ - objset_t *os = zd->zd_os; - ztest_block_tag_t *bbt; - dmu_buf_t *db; - dmu_tx_t *tx; - uint64_t txg; - int error = 0; - int bonuslen; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - ASSERT(lr->lr_doid == ZTEST_DIROBJ); - ASSERT(name[0] != '\0'); - - tx = dmu_tx_create(os); - - dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); - - if (lr->lrz_type == DMU_OT_ZAP_OTHER) { - dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); - } else { - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - } - - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - if (txg == 0) - return (ENOSPC); - - ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); - bonuslen = DN_BONUS_SIZE(lr->lrz_dnodesize); - - if (lr->lrz_type == DMU_OT_ZAP_OTHER) { - if (lr->lr_foid == 0) { - lr->lr_foid = zap_create_dnsize(os, - lr->lrz_type, lr->lrz_bonustype, - bonuslen, lr->lrz_dnodesize, tx); - } else { - error = zap_create_claim_dnsize(os, lr->lr_foid, - lr->lrz_type, lr->lrz_bonustype, - bonuslen, lr->lrz_dnodesize, tx); - } - } else { - if (lr->lr_foid == 0) { - lr->lr_foid = dmu_object_alloc_dnsize(os, - lr->lrz_type, 0, lr->lrz_bonustype, - bonuslen, lr->lrz_dnodesize, tx); - } else { - error = dmu_object_claim_dnsize(os, lr->lr_foid, - lr->lrz_type, 0, lr->lrz_bonustype, - bonuslen, lr->lrz_dnodesize, tx); - } - } - - if (error) { - ASSERT3U(error, ==, EEXIST); - ASSERT(zd->zd_zilog->zl_replay); - dmu_tx_commit(tx); - return (error); - } - - ASSERT(lr->lr_foid != 0); - - if (lr->lrz_type != DMU_OT_ZAP_OTHER) - VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, - lr->lrz_blocksize, lr->lrz_ibshift, tx)); - - VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); - bbt = ztest_bt_bonus(db); - dmu_buf_will_dirty(db, tx); - ztest_bt_generate(bbt, os, lr->lr_foid, lr->lrz_dnodesize, -1ULL, - lr->lr_gen, txg, txg); - ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, lr->lr_gen); - dmu_buf_rele(db, FTAG); - - VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, - &lr->lr_foid, tx)); - - (void) ztest_log_create(zd, tx, lr); - - dmu_tx_commit(tx); - - return (0); -} - -static int -ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap) -{ - ztest_ds_t *zd = arg1; - lr_remove_t *lr = arg2; - char *name = (void *)(lr + 1); /* name follows lr */ - objset_t *os = zd->zd_os; - dmu_object_info_t doi; - dmu_tx_t *tx; - uint64_t object, txg; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - ASSERT(lr->lr_doid == ZTEST_DIROBJ); - ASSERT(name[0] != '\0'); - - VERIFY3U(0, ==, - zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); - ASSERT(object != 0); - - ztest_object_lock(zd, object, RL_WRITER); - - VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); - - tx = dmu_tx_create(os); - - dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); - dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); - - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - if (txg == 0) { - ztest_object_unlock(zd, object); - return (ENOSPC); - } - - if (doi.doi_type == DMU_OT_ZAP_OTHER) { - VERIFY3U(0, ==, zap_destroy(os, object, tx)); - } else { - VERIFY3U(0, ==, dmu_object_free(os, object, tx)); - } - - VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); - - (void) ztest_log_remove(zd, tx, lr, object); - - dmu_tx_commit(tx); - - ztest_object_unlock(zd, object); - - return (0); -} - -static int -ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) -{ - ztest_ds_t *zd = arg1; - lr_write_t *lr = arg2; - objset_t *os = zd->zd_os; - void *data = lr + 1; /* data follows lr */ - uint64_t offset, length; - ztest_block_tag_t *bt = data; - ztest_block_tag_t *bbt; - uint64_t gen, txg, lrtxg, crtxg; - dmu_object_info_t doi; - dmu_tx_t *tx; - dmu_buf_t *db; - arc_buf_t *abuf = NULL; - rl_t *rl; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - offset = lr->lr_offset; - length = lr->lr_length; - - /* If it's a dmu_sync() block, write the whole block */ - if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { - uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); - if (length < blocksize) { - offset -= offset % blocksize; - length = blocksize; - } - } - - if (bt->bt_magic == BSWAP_64(BT_MAGIC)) - byteswap_uint64_array(bt, sizeof (*bt)); - - if (bt->bt_magic != BT_MAGIC) - bt = NULL; - - ztest_object_lock(zd, lr->lr_foid, RL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); - - VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); - - dmu_object_info_from_db(db, &doi); - - bbt = ztest_bt_bonus(db); - ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); - gen = bbt->bt_gen; - crtxg = bbt->bt_crtxg; - lrtxg = lr->lr_common.lrc_txg; - - tx = dmu_tx_create(os); - - dmu_tx_hold_write(tx, lr->lr_foid, offset, length); - - if (ztest_random(8) == 0 && length == doi.doi_data_block_size && - P2PHASE(offset, length) == 0) - abuf = dmu_request_arcbuf(db, length); - - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - if (txg == 0) { - if (abuf != NULL) - dmu_return_arcbuf(abuf); - dmu_buf_rele(db, FTAG); - ztest_range_unlock(rl); - ztest_object_unlock(zd, lr->lr_foid); - return (ENOSPC); - } - - if (bt != NULL) { - /* - * Usually, verify the old data before writing new data -- - * but not always, because we also want to verify correct - * behavior when the data was not recently read into cache. - */ - ASSERT(offset % doi.doi_data_block_size == 0); - if (ztest_random(4) != 0) { - int prefetch = ztest_random(2) ? - DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; - ztest_block_tag_t rbt; - - VERIFY(dmu_read(os, lr->lr_foid, offset, - sizeof (rbt), &rbt, prefetch) == 0); - if (rbt.bt_magic == BT_MAGIC) { - ztest_bt_verify(&rbt, os, lr->lr_foid, 0, - offset, gen, txg, crtxg); - } - } - - /* - * Writes can appear to be newer than the bonus buffer because - * the ztest_get_data() callback does a dmu_read() of the - * open-context data, which may be different than the data - * as it was when the write was generated. - */ - if (zd->zd_zilog->zl_replay) { - ztest_bt_verify(bt, os, lr->lr_foid, 0, offset, - MAX(gen, bt->bt_gen), MAX(txg, lrtxg), - bt->bt_crtxg); - } - - /* - * Set the bt's gen/txg to the bonus buffer's gen/txg - * so that all of the usual ASSERTs will work. - */ - ztest_bt_generate(bt, os, lr->lr_foid, 0, offset, gen, txg, - crtxg); - } - - if (abuf == NULL) { - dmu_write(os, lr->lr_foid, offset, length, data, tx); - } else { - bcopy(data, abuf->b_data, length); - dmu_assign_arcbuf(db, offset, abuf, tx); - } - - (void) ztest_log_write(zd, tx, lr); - - dmu_buf_rele(db, FTAG); - - dmu_tx_commit(tx); - - ztest_range_unlock(rl); - ztest_object_unlock(zd, lr->lr_foid); - - return (0); -} - -static int -ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) -{ - ztest_ds_t *zd = arg1; - lr_truncate_t *lr = arg2; - objset_t *os = zd->zd_os; - dmu_tx_t *tx; - uint64_t txg; - rl_t *rl; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - ztest_object_lock(zd, lr->lr_foid, RL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, - RL_WRITER); - - tx = dmu_tx_create(os); - - dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); - - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - if (txg == 0) { - ztest_range_unlock(rl); - ztest_object_unlock(zd, lr->lr_foid); - return (ENOSPC); - } - - VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, - lr->lr_length, tx) == 0); - - (void) ztest_log_truncate(zd, tx, lr); - - dmu_tx_commit(tx); - - ztest_range_unlock(rl); - ztest_object_unlock(zd, lr->lr_foid); - - return (0); -} - -static int -ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) -{ - ztest_ds_t *zd = arg1; - lr_setattr_t *lr = arg2; - objset_t *os = zd->zd_os; - dmu_tx_t *tx; - dmu_buf_t *db; - ztest_block_tag_t *bbt; - uint64_t txg, lrtxg, crtxg, dnodesize; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - ztest_object_lock(zd, lr->lr_foid, RL_WRITER); - - VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); - - tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, lr->lr_foid); - - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - if (txg == 0) { - dmu_buf_rele(db, FTAG); - ztest_object_unlock(zd, lr->lr_foid); - return (ENOSPC); - } - - bbt = ztest_bt_bonus(db); - ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); - crtxg = bbt->bt_crtxg; - lrtxg = lr->lr_common.lrc_txg; - dnodesize = bbt->bt_dnodesize; - - if (zd->zd_zilog->zl_replay) { - ASSERT(lr->lr_size != 0); - ASSERT(lr->lr_mode != 0); - ASSERT(lrtxg != 0); - } else { - /* - * Randomly change the size and increment the generation. - */ - lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * - sizeof (*bbt); - lr->lr_mode = bbt->bt_gen + 1; - ASSERT(lrtxg == 0); - } - - /* - * Verify that the current bonus buffer is not newer than our txg. - */ - ztest_bt_verify(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode, - MAX(txg, lrtxg), crtxg); - - dmu_buf_will_dirty(db, tx); - - ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); - ASSERT3U(lr->lr_size, <=, db->db_size); - VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); - bbt = ztest_bt_bonus(db); - - ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode, - txg, crtxg); - ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen); - - dmu_buf_rele(db, FTAG); - - (void) ztest_log_setattr(zd, tx, lr); - - dmu_tx_commit(tx); - - ztest_object_unlock(zd, lr->lr_foid); - - return (0); -} - -zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { - NULL, /* 0 no such transaction type */ - ztest_replay_create, /* TX_CREATE */ - NULL, /* TX_MKDIR */ - NULL, /* TX_MKXATTR */ - NULL, /* TX_SYMLINK */ - ztest_replay_remove, /* TX_REMOVE */ - NULL, /* TX_RMDIR */ - NULL, /* TX_LINK */ - NULL, /* TX_RENAME */ - ztest_replay_write, /* TX_WRITE */ - ztest_replay_truncate, /* TX_TRUNCATE */ - ztest_replay_setattr, /* TX_SETATTR */ - NULL, /* TX_ACL */ - NULL, /* TX_CREATE_ACL */ - NULL, /* TX_CREATE_ATTR */ - NULL, /* TX_CREATE_ACL_ATTR */ - NULL, /* TX_MKDIR_ACL */ - NULL, /* TX_MKDIR_ATTR */ - NULL, /* TX_MKDIR_ACL_ATTR */ - NULL, /* TX_WRITE2 */ -}; - -/* - * ZIL get_data callbacks - */ - -/* ARGSUSED */ -static void -ztest_get_done(zgd_t *zgd, int error) -{ - ztest_ds_t *zd = zgd->zgd_private; - uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object; - - if (zgd->zgd_db) - dmu_buf_rele(zgd->zgd_db, zgd); - - ztest_range_unlock((rl_t *)zgd->zgd_lr); - ztest_object_unlock(zd, object); - - umem_free(zgd, sizeof (*zgd)); -} - -static int -ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, - zio_t *zio) -{ - ztest_ds_t *zd = arg; - objset_t *os = zd->zd_os; - uint64_t object = lr->lr_foid; - uint64_t offset = lr->lr_offset; - uint64_t size = lr->lr_length; - uint64_t txg = lr->lr_common.lrc_txg; - uint64_t crtxg; - dmu_object_info_t doi; - dmu_buf_t *db; - zgd_t *zgd; - int error; - - ASSERT3P(lwb, !=, NULL); - ASSERT3P(zio, !=, NULL); - ASSERT3U(size, !=, 0); - - ztest_object_lock(zd, object, RL_READER); - error = dmu_bonus_hold(os, object, FTAG, &db); - if (error) { - ztest_object_unlock(zd, object); - return (error); - } - - crtxg = ztest_bt_bonus(db)->bt_crtxg; - - if (crtxg == 0 || crtxg > txg) { - dmu_buf_rele(db, FTAG); - ztest_object_unlock(zd, object); - return (ENOENT); - } - - dmu_object_info_from_db(db, &doi); - dmu_buf_rele(db, FTAG); - db = NULL; - - zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); - zgd->zgd_lwb = lwb; - zgd->zgd_private = zd; - - if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd, - object, offset, size, RL_READER); - - error = dmu_read(os, object, offset, size, buf, - DMU_READ_NO_PREFETCH); - ASSERT(error == 0); - } else { - size = doi.doi_data_block_size; - if (ISP2(size)) { - offset = P2ALIGN(offset, size); - } else { - ASSERT(offset < size); - offset = 0; - } - - zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd, - object, offset, size, RL_READER); - - error = dmu_buf_hold(os, object, offset, zgd, &db, - DMU_READ_NO_PREFETCH); - - if (error == 0) { - blkptr_t *bp = &lr->lr_blkptr; - - zgd->zgd_db = db; - zgd->zgd_bp = bp; - - ASSERT(db->db_offset == offset); - ASSERT(db->db_size == size); - - error = dmu_sync(zio, lr->lr_common.lrc_txg, - ztest_get_done, zgd); - - if (error == 0) - return (0); - } - } - - ztest_get_done(zgd, error); - - return (error); -} - -static void * -ztest_lr_alloc(size_t lrsize, char *name) -{ - char *lr; - size_t namesize = name ? strlen(name) + 1 : 0; - - lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); - - if (name) - bcopy(name, lr + lrsize, namesize); - - return (lr); -} - -void -ztest_lr_free(void *lr, size_t lrsize, char *name) -{ - size_t namesize = name ? strlen(name) + 1 : 0; - - umem_free(lr, lrsize + namesize); -} - -/* - * Lookup a bunch of objects. Returns the number of objects not found. - */ -static int -ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) -{ - int missing = 0; - int error; - - ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock)); - - for (int i = 0; i < count; i++, od++) { - od->od_object = 0; - error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, - sizeof (uint64_t), 1, &od->od_object); - if (error) { - ASSERT(error == ENOENT); - ASSERT(od->od_object == 0); - missing++; - } else { - dmu_buf_t *db; - ztest_block_tag_t *bbt; - dmu_object_info_t doi; - - ASSERT(od->od_object != 0); - ASSERT(missing == 0); /* there should be no gaps */ - - ztest_object_lock(zd, od->od_object, RL_READER); - VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, - od->od_object, FTAG, &db)); - dmu_object_info_from_db(db, &doi); - bbt = ztest_bt_bonus(db); - ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); - od->od_type = doi.doi_type; - od->od_blocksize = doi.doi_data_block_size; - od->od_gen = bbt->bt_gen; - dmu_buf_rele(db, FTAG); - ztest_object_unlock(zd, od->od_object); - } - } - - return (missing); -} - -static int -ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) -{ - int missing = 0; - - ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock)); - - for (int i = 0; i < count; i++, od++) { - if (missing) { - od->od_object = 0; - missing++; - continue; - } - - lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); - - lr->lr_doid = od->od_dir; - lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ - lr->lrz_type = od->od_crtype; - lr->lrz_blocksize = od->od_crblocksize; - lr->lrz_ibshift = ztest_random_ibshift(); - lr->lrz_bonustype = DMU_OT_UINT64_OTHER; - lr->lrz_dnodesize = od->od_crdnodesize; - lr->lr_gen = od->od_crgen; - lr->lr_crtime[0] = time(NULL); - - if (ztest_replay_create(zd, lr, B_FALSE) != 0) { - ASSERT(missing == 0); - od->od_object = 0; - missing++; - } else { - od->od_object = lr->lr_foid; - od->od_type = od->od_crtype; - od->od_blocksize = od->od_crblocksize; - od->od_gen = od->od_crgen; - ASSERT(od->od_object != 0); - } - - ztest_lr_free(lr, sizeof (*lr), od->od_name); - } - - return (missing); -} - -static int -ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) -{ - int missing = 0; - int error; - - ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock)); - - od += count - 1; - - for (int i = count - 1; i >= 0; i--, od--) { - if (missing) { - missing++; - continue; - } - - /* - * No object was found. - */ - if (od->od_object == 0) - continue; - - lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); - - lr->lr_doid = od->od_dir; - - if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { - ASSERT3U(error, ==, ENOSPC); - missing++; - } else { - od->od_object = 0; - } - ztest_lr_free(lr, sizeof (*lr), od->od_name); - } - - return (missing); -} - -static int -ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, - void *data) -{ - lr_write_t *lr; - int error; - - lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); - - lr->lr_foid = object; - lr->lr_offset = offset; - lr->lr_length = size; - lr->lr_blkoff = 0; - BP_ZERO(&lr->lr_blkptr); - - bcopy(data, lr + 1, size); - - error = ztest_replay_write(zd, lr, B_FALSE); - - ztest_lr_free(lr, sizeof (*lr) + size, NULL); - - return (error); -} - -static int -ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) -{ - lr_truncate_t *lr; - int error; - - lr = ztest_lr_alloc(sizeof (*lr), NULL); - - lr->lr_foid = object; - lr->lr_offset = offset; - lr->lr_length = size; - - error = ztest_replay_truncate(zd, lr, B_FALSE); - - ztest_lr_free(lr, sizeof (*lr), NULL); - - return (error); -} - -static int -ztest_setattr(ztest_ds_t *zd, uint64_t object) -{ - lr_setattr_t *lr; - int error; - - lr = ztest_lr_alloc(sizeof (*lr), NULL); - - lr->lr_foid = object; - lr->lr_size = 0; - lr->lr_mode = 0; - - error = ztest_replay_setattr(zd, lr, B_FALSE); - - ztest_lr_free(lr, sizeof (*lr), NULL); - - return (error); -} - -static void -ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) -{ - objset_t *os = zd->zd_os; - dmu_tx_t *tx; - uint64_t txg; - rl_t *rl; - - txg_wait_synced(dmu_objset_pool(os), 0); - - ztest_object_lock(zd, object, RL_READER); - rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); - - tx = dmu_tx_create(os); - - dmu_tx_hold_write(tx, object, offset, size); - - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - - if (txg != 0) { - dmu_prealloc(os, object, offset, size, tx); - dmu_tx_commit(tx); - txg_wait_synced(dmu_objset_pool(os), txg); - } else { - (void) dmu_free_long_range(os, object, offset, size); - } - - ztest_range_unlock(rl); - ztest_object_unlock(zd, object); -} - -static void -ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) -{ - int err; - ztest_block_tag_t wbt; - dmu_object_info_t doi; - enum ztest_io_type io_type; - uint64_t blocksize; - void *data; - - VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); - blocksize = doi.doi_data_block_size; - data = umem_alloc(blocksize, UMEM_NOFAIL); - - /* - * Pick an i/o type at random, biased toward writing block tags. - */ - io_type = ztest_random(ZTEST_IO_TYPES); - if (ztest_random(2) == 0) - io_type = ZTEST_IO_WRITE_TAG; - - rw_enter(&zd->zd_zilog_lock, RW_READER); - - switch (io_type) { - - case ZTEST_IO_WRITE_TAG: - ztest_bt_generate(&wbt, zd->zd_os, object, doi.doi_dnodesize, - offset, 0, 0, 0); - (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); - break; - - case ZTEST_IO_WRITE_PATTERN: - (void) memset(data, 'a' + (object + offset) % 5, blocksize); - if (ztest_random(2) == 0) { - /* - * Induce fletcher2 collisions to ensure that - * zio_ddt_collision() detects and resolves them - * when using fletcher2-verify for deduplication. - */ - ((uint64_t *)data)[0] ^= 1ULL << 63; - ((uint64_t *)data)[4] ^= 1ULL << 63; - } - (void) ztest_write(zd, object, offset, blocksize, data); - break; - - case ZTEST_IO_WRITE_ZEROES: - bzero(data, blocksize); - (void) ztest_write(zd, object, offset, blocksize, data); - break; - - case ZTEST_IO_TRUNCATE: - (void) ztest_truncate(zd, object, offset, blocksize); - break; - - case ZTEST_IO_SETATTR: - (void) ztest_setattr(zd, object); - break; - - case ZTEST_IO_REWRITE: - rw_enter(&ztest_name_lock, RW_READER); - err = ztest_dsl_prop_set_uint64(zd->zd_name, - ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), - B_FALSE); - VERIFY(err == 0 || err == ENOSPC); - err = ztest_dsl_prop_set_uint64(zd->zd_name, - ZFS_PROP_COMPRESSION, - ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), - B_FALSE); - VERIFY(err == 0 || err == ENOSPC); - rw_exit(&ztest_name_lock); - - VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, - DMU_READ_NO_PREFETCH)); - - (void) ztest_write(zd, object, offset, blocksize, data); - break; - } - - rw_exit(&zd->zd_zilog_lock); - - umem_free(data, blocksize); -} - -/* - * Initialize an object description template. - */ -static void -ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, - dmu_object_type_t type, uint64_t blocksize, uint64_t dnodesize, - uint64_t gen) -{ - od->od_dir = ZTEST_DIROBJ; - od->od_object = 0; - - od->od_crtype = type; - od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); - od->od_crdnodesize = dnodesize ? dnodesize : ztest_random_dnodesize(); - od->od_crgen = gen; - - od->od_type = DMU_OT_NONE; - od->od_blocksize = 0; - od->od_gen = 0; - - (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", - tag, (int64_t)id, index); -} - -/* - * Lookup or create the objects for a test using the od template. - * If the objects do not all exist, or if 'remove' is specified, - * remove any existing objects and create new ones. Otherwise, - * use the existing objects. - */ -static int -ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) -{ - int count = size / sizeof (*od); - int rv = 0; - - mutex_enter(&zd->zd_dirobj_lock); - if ((ztest_lookup(zd, od, count) != 0 || remove) && - (ztest_remove(zd, od, count) != 0 || - ztest_create(zd, od, count) != 0)) - rv = -1; - zd->zd_od = od; - mutex_exit(&zd->zd_dirobj_lock); - - return (rv); -} - -/* ARGSUSED */ -void -ztest_zil_commit(ztest_ds_t *zd, uint64_t id) -{ - zilog_t *zilog = zd->zd_zilog; - - rw_enter(&zd->zd_zilog_lock, RW_READER); - - zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); - - /* - * Remember the committed values in zd, which is in parent/child - * shared memory. If we die, the next iteration of ztest_run() - * will verify that the log really does contain this record. - */ - mutex_enter(&zilog->zl_lock); - ASSERT(zd->zd_shared != NULL); - ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); - zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; - mutex_exit(&zilog->zl_lock); - - rw_exit(&zd->zd_zilog_lock); -} - -/* - * This function is designed to simulate the operations that occur during a - * mount/unmount operation. We hold the dataset across these operations in an - * attempt to expose any implicit assumptions about ZIL management. - */ -/* ARGSUSED */ -void -ztest_zil_remount(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - - /* - * We grab the zd_dirobj_lock to ensure that no other thread is - * updating the zil (i.e. adding in-memory log records) and the - * zd_zilog_lock to block any I/O. - */ - mutex_enter(&zd->zd_dirobj_lock); - rw_enter(&zd->zd_zilog_lock, RW_WRITER); - - /* zfsvfs_teardown() */ - zil_close(zd->zd_zilog); - - /* zfsvfs_setup() */ - VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); - zil_replay(os, zd, ztest_replay_vector); - - rw_exit(&zd->zd_zilog_lock); - mutex_exit(&zd->zd_dirobj_lock); -} - -/* - * Verify that we can't destroy an active pool, create an existing pool, - * or create a pool with a bad vdev spec. - */ -/* ARGSUSED */ -void -ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_opts_t *zo = &ztest_opts; - spa_t *spa; - nvlist_t *nvroot; - - if (zo->zo_mmp_test) - return; - - /* - * Attempt to create using a bad file. - */ - nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1); - VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_file", nvroot, NULL, NULL)); - nvlist_free(nvroot); - - /* - * Attempt to create using a bad mirror. - */ - nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1); - VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); - nvlist_free(nvroot); - - /* - * Attempt to create an existing pool. It shouldn't matter - * what's in the nvroot; we should fail with EEXIST. - */ - rw_enter(&ztest_name_lock, RW_READER); - nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1); - VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); - nvlist_free(nvroot); - VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); - VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); - spa_close(spa, FTAG); - - rw_exit(&ztest_name_lock); -} - -/* - * Start and then stop the MMP threads to ensure the startup and shutdown code - * works properly. Actual protection and property-related code tested via ZTS. - */ -/* ARGSUSED */ -void -ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_opts_t *zo = &ztest_opts; - spa_t *spa = ztest_spa; - - if (zo->zo_mmp_test) - return; - - /* - * Since enabling MMP involves setting a property, it could not be done - * while the pool is suspended. - */ - if (spa_suspended(spa)) - return; - - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - mutex_enter(&spa->spa_props_lock); - - zfs_multihost_fail_intervals = 0; - - if (!spa_multihost(spa)) { - spa->spa_multihost = B_TRUE; - mmp_thread_start(spa); - } - - mutex_exit(&spa->spa_props_lock); - spa_config_exit(spa, SCL_CONFIG, FTAG); - - txg_wait_synced(spa_get_dsl(spa), 0); - mmp_signal_all_threads(); - txg_wait_synced(spa_get_dsl(spa), 0); - - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - mutex_enter(&spa->spa_props_lock); - - if (spa_multihost(spa)) { - mmp_thread_stop(spa); - spa->spa_multihost = B_FALSE; - } - - mutex_exit(&spa->spa_props_lock); - spa_config_exit(spa, SCL_CONFIG, FTAG); -} - -/* ARGSUSED */ -void -ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa; - uint64_t initial_version = SPA_VERSION_INITIAL; - uint64_t version, newversion; - nvlist_t *nvroot, *props; - char *name; - - if (ztest_opts.zo_mmp_test) - return; - - mutex_enter(&ztest_vdev_lock); - name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); - - /* - * Clean up from previous runs. - */ - (void) spa_destroy(name); - - nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, - NULL, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); - - /* - * If we're configuring a RAIDZ device then make sure that the - * the initial version is capable of supporting that feature. - */ - switch (ztest_opts.zo_raidz_parity) { - case 0: - case 1: - initial_version = SPA_VERSION_INITIAL; - break; - case 2: - initial_version = SPA_VERSION_RAIDZ2; - break; - case 3: - initial_version = SPA_VERSION_RAIDZ3; - break; - } - - /* - * Create a pool with a spa version that can be upgraded. Pick - * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. - */ - do { - version = ztest_random_spa_version(initial_version); - } while (version > SPA_VERSION_BEFORE_FEATURES); - - props = fnvlist_alloc(); - fnvlist_add_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_VERSION), version); - VERIFY0(spa_create(name, nvroot, props, NULL)); - fnvlist_free(nvroot); - fnvlist_free(props); - - VERIFY0(spa_open(name, &spa, FTAG)); - VERIFY3U(spa_version(spa), ==, version); - newversion = ztest_random_spa_version(version + 1); - - if (ztest_opts.zo_verbose >= 4) { - (void) printf("upgrading spa version from %llu to %llu\n", - (u_longlong_t)version, (u_longlong_t)newversion); - } - - spa_upgrade(spa, newversion); - VERIFY3U(spa_version(spa), >, version); - VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, - zpool_prop_to_name(ZPOOL_PROP_VERSION))); - spa_close(spa, FTAG); - - strfree(name); - mutex_exit(&ztest_vdev_lock); -} - -static void -ztest_spa_checkpoint(spa_t *spa) -{ - ASSERT(MUTEX_HELD(&ztest_checkpoint_lock)); - - int error = spa_checkpoint(spa->spa_name); - - switch (error) { - case 0: - case ZFS_ERR_DEVRM_IN_PROGRESS: - case ZFS_ERR_DISCARDING_CHECKPOINT: - case ZFS_ERR_CHECKPOINT_EXISTS: - break; - case ENOSPC: - ztest_record_enospc(FTAG); - break; - default: - fatal(0, "spa_checkpoint(%s) = %d", spa->spa_name, error); - } -} - -static void -ztest_spa_discard_checkpoint(spa_t *spa) -{ - ASSERT(MUTEX_HELD(&ztest_checkpoint_lock)); - - int error = spa_checkpoint_discard(spa->spa_name); - - switch (error) { - case 0: - case ZFS_ERR_DISCARDING_CHECKPOINT: - case ZFS_ERR_NO_CHECKPOINT: - break; - default: - fatal(0, "spa_discard_checkpoint(%s) = %d", - spa->spa_name, error); - } - -} - -/* ARGSUSED */ -void -ztest_spa_checkpoint_create_discard(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa = ztest_spa; - - mutex_enter(&ztest_checkpoint_lock); - if (ztest_random(2) == 0) { - ztest_spa_checkpoint(spa); - } else { - ztest_spa_discard_checkpoint(spa); - } - mutex_exit(&ztest_checkpoint_lock); -} - - -static vdev_t * -vdev_lookup_by_path(vdev_t *vd, const char *path) -{ - vdev_t *mvd; - - if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) - return (vd); - - for (int c = 0; c < vd->vdev_children; c++) - if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != - NULL) - return (mvd); - - return (NULL); -} - -/* - * Find the first available hole which can be used as a top-level. - */ -int -find_vdev_hole(spa_t *spa) -{ - vdev_t *rvd = spa->spa_root_vdev; - int c; - - ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); - - for (c = 0; c < rvd->vdev_children; c++) { - vdev_t *cvd = rvd->vdev_child[c]; - - if (cvd->vdev_ishole) - break; - } - return (c); -} - -/* - * Verify that vdev_add() works as expected. - */ -/* ARGSUSED */ -void -ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - uint64_t leaves; - uint64_t guid; - nvlist_t *nvroot; - int error; - - if (ztest_opts.zo_mmp_test) - return; - - mutex_enter(&ztest_vdev_lock); - leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - - ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; - - /* - * If we have slogs then remove them 1/4 of the time. - */ - if (spa_has_slogs(spa) && ztest_random(4) == 0) { - metaslab_group_t *mg; - - /* - * find the first real slog in log allocation class - */ - mg = spa_log_class(spa)->mc_rotor; - while (!mg->mg_vd->vdev_islog) - mg = mg->mg_next; - - guid = mg->mg_vd->vdev_guid; - - spa_config_exit(spa, SCL_VDEV, FTAG); - - /* - * We have to grab the zs_name_lock as writer to - * prevent a race between removing a slog (dmu_objset_find) - * and destroying a dataset. Removing the slog will - * grab a reference on the dataset which may cause - * dmu_objset_destroy() to fail with EBUSY thus - * leaving the dataset in an inconsistent state. - */ - rw_enter(&ztest_name_lock, RW_WRITER); - error = spa_vdev_remove(spa, guid, B_FALSE); - rw_exit(&ztest_name_lock); - - switch (error) { - case 0: - case EEXIST: - case ZFS_ERR_CHECKPOINT_EXISTS: - case ZFS_ERR_DISCARDING_CHECKPOINT: - break; - default: - fatal(0, "spa_vdev_remove() = %d", error); - } - } else { - spa_config_exit(spa, SCL_VDEV, FTAG); - - /* - * Make 1/4 of the devices be log devices - */ - nvroot = make_vdev_root(NULL, NULL, NULL, - ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ? - "log" : NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1); - - error = spa_vdev_add(spa, nvroot); - nvlist_free(nvroot); - - switch (error) { - case 0: - break; - case ENOSPC: - ztest_record_enospc("spa_vdev_add"); - break; - default: - fatal(0, "spa_vdev_add() = %d", error); - } - } - - mutex_exit(&ztest_vdev_lock); -} - -/* ARGSUSED */ -void -ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - uint64_t leaves; - nvlist_t *nvroot; - const char *class = (ztest_random(2) == 0) ? - VDEV_ALLOC_BIAS_SPECIAL : VDEV_ALLOC_BIAS_DEDUP; - int error; - - /* - * By default add a special vdev 50% of the time - */ - if ((ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_OFF) || - (ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_RND && - ztest_random(2) == 0)) { - return; - } - - mutex_enter(&ztest_vdev_lock); - - /* Only test with mirrors */ - if (zs->zs_mirrors < 2) { - mutex_exit(&ztest_vdev_lock); - return; - } - - /* requires feature@allocation_classes */ - if (!spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES)) { - mutex_exit(&ztest_vdev_lock); - return; - } - - leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; - spa_config_exit(spa, SCL_VDEV, FTAG); - - nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, - class, ztest_opts.zo_raidz, zs->zs_mirrors, 1); - - error = spa_vdev_add(spa, nvroot); - nvlist_free(nvroot); - - if (error == ENOSPC) - ztest_record_enospc("spa_vdev_add"); - else if (error != 0) - fatal(0, "spa_vdev_add() = %d", error); - - /* - * 50% of the time allow small blocks in the special class - */ - if (error == 0 && - spa_special_class(spa)->mc_groups == 1 && ztest_random(2) == 0) { - if (ztest_opts.zo_verbose >= 3) - (void) printf("Enabling special VDEV small blocks\n"); - (void) ztest_dsl_prop_set_uint64(zd->zd_name, - ZFS_PROP_SPECIAL_SMALL_BLOCKS, 32768, B_FALSE); - } - - mutex_exit(&ztest_vdev_lock); - - if (ztest_opts.zo_verbose >= 3) { - metaslab_class_t *mc; - - if (strcmp(class, VDEV_ALLOC_BIAS_SPECIAL) == 0) - mc = spa_special_class(spa); - else - mc = spa_dedup_class(spa); - (void) printf("Added a %s mirrored vdev (of %d)\n", - class, (int)mc->mc_groups); - } -} - -/* - * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. - */ -/* ARGSUSED */ -void -ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - vdev_t *rvd = spa->spa_root_vdev; - spa_aux_vdev_t *sav; - char *aux; - uint64_t guid = 0; - int error; - - if (ztest_opts.zo_mmp_test) - return; - - if (ztest_random(2) == 0) { - sav = &spa->spa_spares; - aux = ZPOOL_CONFIG_SPARES; - } else { - sav = &spa->spa_l2cache; - aux = ZPOOL_CONFIG_L2CACHE; - } - - mutex_enter(&ztest_vdev_lock); - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - - if (sav->sav_count != 0 && ztest_random(4) == 0) { - /* - * Pick a random device to remove. - */ - guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; - } else { - /* - * Find an unused device we can add. - */ - zs->zs_vdev_aux = 0; - for (;;) { - char path[MAXPATHLEN]; - int c; - (void) snprintf(path, sizeof (path), ztest_aux_template, - ztest_opts.zo_dir, ztest_opts.zo_pool, aux, - zs->zs_vdev_aux); - for (c = 0; c < sav->sav_count; c++) - if (strcmp(sav->sav_vdevs[c]->vdev_path, - path) == 0) - break; - if (c == sav->sav_count && - vdev_lookup_by_path(rvd, path) == NULL) - break; - zs->zs_vdev_aux++; - } - } - - spa_config_exit(spa, SCL_VDEV, FTAG); - - if (guid == 0) { - /* - * Add a new device. - */ - nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, - (ztest_opts.zo_vdev_size * 5) / 4, 0, NULL, 0, 0, 1); - error = spa_vdev_add(spa, nvroot); - - switch (error) { - case 0: - break; - default: - fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); - } - nvlist_free(nvroot); - } else { - /* - * Remove an existing device. Sometimes, dirty its - * vdev state first to make sure we handle removal - * of devices that have pending state changes. - */ - if (ztest_random(2) == 0) - (void) vdev_online(spa, guid, 0, NULL); - - error = spa_vdev_remove(spa, guid, B_FALSE); - - switch (error) { - case 0: - case EBUSY: - case ZFS_ERR_CHECKPOINT_EXISTS: - case ZFS_ERR_DISCARDING_CHECKPOINT: - break; - default: - fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); - } - } - - mutex_exit(&ztest_vdev_lock); -} - -/* - * split a pool if it has mirror tlvdevs - */ -/* ARGSUSED */ -void -ztest_split_pool(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - vdev_t *rvd = spa->spa_root_vdev; - nvlist_t *tree, **child, *config, *split, **schild; - uint_t c, children, schildren = 0, lastlogid = 0; - int error = 0; - - if (ztest_opts.zo_mmp_test) - return; - - mutex_enter(&ztest_vdev_lock); - - /* ensure we have a useable config; mirrors of raidz aren't supported */ - if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { - mutex_exit(&ztest_vdev_lock); - return; - } - - /* clean up the old pool, if any */ - (void) spa_destroy("splitp"); - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - - /* generate a config from the existing config */ - mutex_enter(&spa->spa_props_lock); - VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, - &tree) == 0); - mutex_exit(&spa->spa_props_lock); - - VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0); - - schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); - for (c = 0; c < children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - nvlist_t **mchild; - uint_t mchildren; - - if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { - VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, - 0) == 0); - VERIFY(nvlist_add_string(schild[schildren], - ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); - VERIFY(nvlist_add_uint64(schild[schildren], - ZPOOL_CONFIG_IS_HOLE, 1) == 0); - if (lastlogid == 0) - lastlogid = schildren; - ++schildren; - continue; - } - lastlogid = 0; - VERIFY(nvlist_lookup_nvlist_array(child[c], - ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); - VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); - } - - /* OK, create a config that can be used to split */ - VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_ROOT) == 0); - VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, - lastlogid != 0 ? lastlogid : schildren) == 0); - - VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); - - for (c = 0; c < schildren; c++) - nvlist_free(schild[c]); - free(schild); - nvlist_free(split); - - spa_config_exit(spa, SCL_VDEV, FTAG); - - rw_enter(&ztest_name_lock, RW_WRITER); - error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); - rw_exit(&ztest_name_lock); - - nvlist_free(config); - - if (error == 0) { - (void) printf("successful split - results:\n"); - mutex_enter(&spa_namespace_lock); - show_pool_stats(spa); - show_pool_stats(spa_lookup("splitp")); - mutex_exit(&spa_namespace_lock); - ++zs->zs_splits; - --zs->zs_mirrors; - } - mutex_exit(&ztest_vdev_lock); -} - -/* - * Verify that we can attach and detach devices. - */ -/* ARGSUSED */ -void -ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - spa_aux_vdev_t *sav = &spa->spa_spares; - vdev_t *rvd = spa->spa_root_vdev; - vdev_t *oldvd, *newvd, *pvd; - nvlist_t *root; - uint64_t leaves; - uint64_t leaf, top; - uint64_t ashift = ztest_get_ashift(); - uint64_t oldguid, pguid; - uint64_t oldsize, newsize; - char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; - int replacing; - int oldvd_has_siblings = B_FALSE; - int newvd_is_spare = B_FALSE; - int oldvd_is_log; - int error, expected_error; - - if (ztest_opts.zo_mmp_test) - return; - - mutex_enter(&ztest_vdev_lock); - leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - - /* - * If a vdev is in the process of being removed, its removal may - * finish while we are in progress, leading to an unexpected error - * value. Don't bother trying to attach while we are in the middle - * of removal. - */ - if (ztest_device_removal_active) { - spa_config_exit(spa, SCL_ALL, FTAG); - mutex_exit(&ztest_vdev_lock); - return; - } - - /* - * Decide whether to do an attach or a replace. - */ - replacing = ztest_random(2); - - /* - * Pick a random top-level vdev. - */ - top = ztest_random_vdev_top(spa, B_TRUE); - - /* - * Pick a random leaf within it. - */ - leaf = ztest_random(leaves); - - /* - * Locate this vdev. - */ - oldvd = rvd->vdev_child[top]; - - /* pick a child from the mirror */ - if (zs->zs_mirrors >= 1) { - ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); - ASSERT(oldvd->vdev_children >= zs->zs_mirrors); - oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; - } - - /* pick a child out of the raidz group */ - if (ztest_opts.zo_raidz > 1) { - ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); - ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); - oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; - } - - /* - * If we're already doing an attach or replace, oldvd may be a - * mirror vdev -- in which case, pick a random child. - */ - while (oldvd->vdev_children != 0) { - oldvd_has_siblings = B_TRUE; - ASSERT(oldvd->vdev_children >= 2); - oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; - } - - oldguid = oldvd->vdev_guid; - oldsize = vdev_get_min_asize(oldvd); - oldvd_is_log = oldvd->vdev_top->vdev_islog; - (void) strcpy(oldpath, oldvd->vdev_path); - pvd = oldvd->vdev_parent; - pguid = pvd->vdev_guid; - - /* - * If oldvd has siblings, then half of the time, detach it. - */ - if (oldvd_has_siblings && ztest_random(2) == 0) { - spa_config_exit(spa, SCL_ALL, FTAG); - error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); - if (error != 0 && error != ENODEV && error != EBUSY && - error != ENOTSUP && error != ZFS_ERR_CHECKPOINT_EXISTS && - error != ZFS_ERR_DISCARDING_CHECKPOINT) - fatal(0, "detach (%s) returned %d", oldpath, error); - mutex_exit(&ztest_vdev_lock); - return; - } - - /* - * For the new vdev, choose with equal probability between the two - * standard paths (ending in either 'a' or 'b') or a random hot spare. - */ - if (sav->sav_count != 0 && ztest_random(3) == 0) { - newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; - newvd_is_spare = B_TRUE; - (void) strcpy(newpath, newvd->vdev_path); - } else { - (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, - ztest_opts.zo_dir, ztest_opts.zo_pool, - top * leaves + leaf); - if (ztest_random(2) == 0) - newpath[strlen(newpath) - 1] = 'b'; - newvd = vdev_lookup_by_path(rvd, newpath); - } - - if (newvd) { - /* - * Reopen to ensure the vdev's asize field isn't stale. - */ - vdev_reopen(newvd); - newsize = vdev_get_min_asize(newvd); - } else { - /* - * Make newsize a little bigger or smaller than oldsize. - * If it's smaller, the attach should fail. - * If it's larger, and we're doing a replace, - * we should get dynamic LUN growth when we're done. - */ - newsize = 10 * oldsize / (9 + ztest_random(3)); - } - - /* - * If pvd is not a mirror or root, the attach should fail with ENOTSUP, - * unless it's a replace; in that case any non-replacing parent is OK. - * - * If newvd is already part of the pool, it should fail with EBUSY. - * - * If newvd is too small, it should fail with EOVERFLOW. - */ - if (pvd->vdev_ops != &vdev_mirror_ops && - pvd->vdev_ops != &vdev_root_ops && (!replacing || - pvd->vdev_ops == &vdev_replacing_ops || - pvd->vdev_ops == &vdev_spare_ops)) - expected_error = ENOTSUP; - else if (newvd_is_spare && (!replacing || oldvd_is_log)) - expected_error = ENOTSUP; - else if (newvd == oldvd) - expected_error = replacing ? 0 : EBUSY; - else if (vdev_lookup_by_path(rvd, newpath) != NULL) - expected_error = EBUSY; - else if (newsize < oldsize) - expected_error = EOVERFLOW; - else if (ashift > oldvd->vdev_top->vdev_ashift) - expected_error = EDOM; - else - expected_error = 0; - - spa_config_exit(spa, SCL_ALL, FTAG); - - /* - * Build the nvlist describing newpath. - */ - root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, - ashift, NULL, 0, 0, 1); - - error = spa_vdev_attach(spa, oldguid, root, replacing); - - nvlist_free(root); - - /* - * If our parent was the replacing vdev, but the replace completed, - * then instead of failing with ENOTSUP we may either succeed, - * fail with ENODEV, or fail with EOVERFLOW. - */ - if (expected_error == ENOTSUP && - (error == 0 || error == ENODEV || error == EOVERFLOW)) - expected_error = error; - - /* - * If someone grew the LUN, the replacement may be too small. - */ - if (error == EOVERFLOW || error == EBUSY) - expected_error = error; - - if (error == ZFS_ERR_CHECKPOINT_EXISTS || - error == ZFS_ERR_DISCARDING_CHECKPOINT) - expected_error = error; - - /* XXX workaround 6690467 */ - if (error != expected_error && expected_error != EBUSY) { - fatal(0, "attach (%s %llu, %s %llu, %d) " - "returned %d, expected %d", - oldpath, oldsize, newpath, - newsize, replacing, error, expected_error); - } - - mutex_exit(&ztest_vdev_lock); -} - -/* ARGSUSED */ -void -ztest_device_removal(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa = ztest_spa; - vdev_t *vd; - uint64_t guid; - int error; - - mutex_enter(&ztest_vdev_lock); - - if (ztest_device_removal_active) { - mutex_exit(&ztest_vdev_lock); - return; - } - - /* - * Remove a random top-level vdev and wait for removal to finish. - */ - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE)); - guid = vd->vdev_guid; - spa_config_exit(spa, SCL_VDEV, FTAG); - - error = spa_vdev_remove(spa, guid, B_FALSE); - if (error == 0) { - ztest_device_removal_active = B_TRUE; - mutex_exit(&ztest_vdev_lock); - - while (spa->spa_vdev_removal != NULL) - txg_wait_synced(spa_get_dsl(spa), 0); - } else { - mutex_exit(&ztest_vdev_lock); - return; - } - - /* - * The pool needs to be scrubbed after completing device removal. - * Failure to do so may result in checksum errors due to the - * strategy employed by ztest_fault_inject() when selecting which - * offset are redundant and can be damaged. - */ - error = spa_scan(spa, POOL_SCAN_SCRUB); - if (error == 0) { - while (dsl_scan_scrubbing(spa_get_dsl(spa))) - txg_wait_synced(spa_get_dsl(spa), 0); - } - - mutex_enter(&ztest_vdev_lock); - ztest_device_removal_active = B_FALSE; - mutex_exit(&ztest_vdev_lock); -} - -/* - * Callback function which expands the physical size of the vdev. - */ -vdev_t * -grow_vdev(vdev_t *vd, void *arg) -{ - spa_t *spa = vd->vdev_spa; - size_t *newsize = arg; - size_t fsize; - int fd; - - ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); - ASSERT(vd->vdev_ops->vdev_op_leaf); - - if ((fd = open(vd->vdev_path, O_RDWR)) == -1) - return (vd); - - fsize = lseek(fd, 0, SEEK_END); - (void) ftruncate(fd, *newsize); - - if (ztest_opts.zo_verbose >= 6) { - (void) printf("%s grew from %lu to %lu bytes\n", - vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); - } - (void) close(fd); - return (NULL); -} - -/* - * Callback function which expands a given vdev by calling vdev_online(). - */ -/* ARGSUSED */ -vdev_t * -online_vdev(vdev_t *vd, void *arg) -{ - spa_t *spa = vd->vdev_spa; - vdev_t *tvd = vd->vdev_top; - uint64_t guid = vd->vdev_guid; - uint64_t generation = spa->spa_config_generation + 1; - vdev_state_t newstate = VDEV_STATE_UNKNOWN; - int error; - - ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); - ASSERT(vd->vdev_ops->vdev_op_leaf); - - /* Calling vdev_online will initialize the new metaslabs */ - spa_config_exit(spa, SCL_STATE, spa); - error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); - spa_config_enter(spa, SCL_STATE, spa, RW_READER); - - /* - * If vdev_online returned an error or the underlying vdev_open - * failed then we abort the expand. The only way to know that - * vdev_open fails is by checking the returned newstate. - */ - if (error || newstate != VDEV_STATE_HEALTHY) { - if (ztest_opts.zo_verbose >= 5) { - (void) printf("Unable to expand vdev, state %llu, " - "error %d\n", (u_longlong_t)newstate, error); - } - return (vd); - } - ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); - - /* - * Since we dropped the lock we need to ensure that we're - * still talking to the original vdev. It's possible this - * vdev may have been detached/replaced while we were - * trying to online it. - */ - if (generation != spa->spa_config_generation) { - if (ztest_opts.zo_verbose >= 5) { - (void) printf("vdev configuration has changed, " - "guid %llu, state %llu, expected gen %llu, " - "got gen %llu\n", - (u_longlong_t)guid, - (u_longlong_t)tvd->vdev_state, - (u_longlong_t)generation, - (u_longlong_t)spa->spa_config_generation); - } - return (vd); - } - return (NULL); -} - -/* - * Traverse the vdev tree calling the supplied function. - * We continue to walk the tree until we either have walked all - * children or we receive a non-NULL return from the callback. - * If a NULL callback is passed, then we just return back the first - * leaf vdev we encounter. - */ -vdev_t * -vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) -{ - if (vd->vdev_ops->vdev_op_leaf) { - if (func == NULL) - return (vd); - else - return (func(vd, arg)); - } - - for (uint_t c = 0; c < vd->vdev_children; c++) { - vdev_t *cvd = vd->vdev_child[c]; - if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) - return (cvd); - } - return (NULL); -} - -/* - * Verify that dynamic LUN growth works as expected. - */ -/* ARGSUSED */ -void -ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa = ztest_spa; - vdev_t *vd, *tvd; - metaslab_class_t *mc; - metaslab_group_t *mg; - size_t psize, newsize; - uint64_t top; - uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; - - mutex_enter(&ztest_checkpoint_lock); - mutex_enter(&ztest_vdev_lock); - spa_config_enter(spa, SCL_STATE, spa, RW_READER); - - /* - * If there is a vdev removal in progress, it could complete while - * we are running, in which case we would not be able to verify - * that the metaslab_class space increased (because it decreases - * when the device removal completes). - */ - if (ztest_device_removal_active) { - spa_config_exit(spa, SCL_STATE, spa); - mutex_exit(&ztest_vdev_lock); - mutex_exit(&ztest_checkpoint_lock); - return; - } - - top = ztest_random_vdev_top(spa, B_TRUE); - - tvd = spa->spa_root_vdev->vdev_child[top]; - mg = tvd->vdev_mg; - mc = mg->mg_class; - old_ms_count = tvd->vdev_ms_count; - old_class_space = metaslab_class_get_space(mc); - - /* - * Determine the size of the first leaf vdev associated with - * our top-level device. - */ - vd = vdev_walk_tree(tvd, NULL, NULL); - ASSERT3P(vd, !=, NULL); - ASSERT(vd->vdev_ops->vdev_op_leaf); - - psize = vd->vdev_psize; - - /* - * We only try to expand the vdev if it's healthy, less than 4x its - * original size, and it has a valid psize. - */ - if (tvd->vdev_state != VDEV_STATE_HEALTHY || - psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { - spa_config_exit(spa, SCL_STATE, spa); - mutex_exit(&ztest_vdev_lock); - mutex_exit(&ztest_checkpoint_lock); - return; - } - ASSERT(psize > 0); - newsize = psize + MAX(psize / 8, SPA_MAXBLOCKSIZE); - ASSERT3U(newsize, >, psize); - - if (ztest_opts.zo_verbose >= 6) { - (void) printf("Expanding LUN %s from %lu to %lu\n", - vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); - } - - /* - * Growing the vdev is a two step process: - * 1). expand the physical size (i.e. relabel) - * 2). online the vdev to create the new metaslabs - */ - if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || - vdev_walk_tree(tvd, online_vdev, NULL) != NULL || - tvd->vdev_state != VDEV_STATE_HEALTHY) { - if (ztest_opts.zo_verbose >= 5) { - (void) printf("Could not expand LUN because " - "the vdev configuration changed.\n"); - } - spa_config_exit(spa, SCL_STATE, spa); - mutex_exit(&ztest_vdev_lock); - mutex_exit(&ztest_checkpoint_lock); - return; - } - - spa_config_exit(spa, SCL_STATE, spa); - - /* - * Expanding the LUN will update the config asynchronously, - * thus we must wait for the async thread to complete any - * pending tasks before proceeding. - */ - for (;;) { - boolean_t done; - mutex_enter(&spa->spa_async_lock); - done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); - mutex_exit(&spa->spa_async_lock); - if (done) - break; - txg_wait_synced(spa_get_dsl(spa), 0); - (void) poll(NULL, 0, 100); - } - - spa_config_enter(spa, SCL_STATE, spa, RW_READER); - - tvd = spa->spa_root_vdev->vdev_child[top]; - new_ms_count = tvd->vdev_ms_count; - new_class_space = metaslab_class_get_space(mc); - - if (tvd->vdev_mg != mg || mg->mg_class != mc) { - if (ztest_opts.zo_verbose >= 5) { - (void) printf("Could not verify LUN expansion due to " - "intervening vdev offline or remove.\n"); - } - spa_config_exit(spa, SCL_STATE, spa); - mutex_exit(&ztest_vdev_lock); - mutex_exit(&ztest_checkpoint_lock); - return; - } - - /* - * Make sure we were able to grow the vdev. - */ - if (new_ms_count <= old_ms_count) { - fatal(0, "LUN expansion failed: ms_count %llu < %llu\n", - old_ms_count, new_ms_count); - } - - /* - * Make sure we were able to grow the pool. - */ - if (new_class_space <= old_class_space) { - fatal(0, "LUN expansion failed: class_space %llu < %llu\n", - old_class_space, new_class_space); - } - - if (ztest_opts.zo_verbose >= 5) { - char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ]; - - nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf)); - nicenum(new_class_space, newnumbuf, sizeof (newnumbuf)); - (void) printf("%s grew from %s to %s\n", - spa->spa_name, oldnumbuf, newnumbuf); - } - - spa_config_exit(spa, SCL_STATE, spa); - mutex_exit(&ztest_vdev_lock); - mutex_exit(&ztest_checkpoint_lock); -} - -/* - * Verify that dmu_objset_{create,destroy,open,close} work as expected. - */ -/* ARGSUSED */ -static void -ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) -{ - /* - * Create the objects common to all ztest datasets. - */ - VERIFY(zap_create_claim(os, ZTEST_DIROBJ, - DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); -} - -static int -ztest_dataset_create(char *dsname) -{ - uint64_t zilset = ztest_random(100); - int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, - ztest_objset_create_cb, NULL); - - if (err || zilset < 80) - return (err); - - if (ztest_opts.zo_verbose >= 6) - (void) printf("Setting dataset %s to sync always\n", dsname); - return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, - ZFS_SYNC_ALWAYS, B_FALSE)); -} - -/* ARGSUSED */ -static int -ztest_objset_destroy_cb(const char *name, void *arg) -{ - objset_t *os; - dmu_object_info_t doi; - int error; - - /* - * Verify that the dataset contains a directory object. - */ - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); - error = dmu_object_info(os, ZTEST_DIROBJ, &doi); - if (error != ENOENT) { - /* We could have crashed in the middle of destroying it */ - ASSERT0(error); - ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); - ASSERT3S(doi.doi_physical_blocks_512, >=, 0); - } - dmu_objset_disown(os, FTAG); - - /* - * Destroy the dataset. - */ - if (strchr(name, '@') != NULL) { - VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); - } else { - VERIFY0(dsl_destroy_head(name)); - } - return (0); -} - -static boolean_t -ztest_snapshot_create(char *osname, uint64_t id) -{ - char snapname[ZFS_MAX_DATASET_NAME_LEN]; - int error; - - (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); - - error = dmu_objset_snapshot_one(osname, snapname); - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - return (B_FALSE); - } - if (error != 0 && error != EEXIST) { - fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, - snapname, error); - } - return (B_TRUE); -} - -static boolean_t -ztest_snapshot_destroy(char *osname, uint64_t id) -{ - char snapname[ZFS_MAX_DATASET_NAME_LEN]; - int error; - - (void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname, - (u_longlong_t)id); - - error = dsl_destroy_snapshot(snapname, B_FALSE); - if (error != 0 && error != ENOENT) - fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); - return (B_TRUE); -} - -/* ARGSUSED */ -void -ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) -{ - ztest_ds_t zdtmp; - int iters; - int error; - objset_t *os, *os2; - char name[ZFS_MAX_DATASET_NAME_LEN]; - zilog_t *zilog; - - rw_enter(&ztest_name_lock, RW_READER); - - (void) snprintf(name, sizeof (name), "%s/temp_%llu", - ztest_opts.zo_pool, (u_longlong_t)id); - - /* - * If this dataset exists from a previous run, process its replay log - * half of the time. If we don't replay it, then dmu_objset_destroy() - * (invoked from ztest_objset_destroy_cb()) should just throw it away. - */ - if (ztest_random(2) == 0 && - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { - ztest_zd_init(&zdtmp, NULL, os); - zil_replay(os, &zdtmp, ztest_replay_vector); - ztest_zd_fini(&zdtmp); - dmu_objset_disown(os, FTAG); - } - - /* - * There may be an old instance of the dataset we're about to - * create lying around from a previous run. If so, destroy it - * and all of its snapshots. - */ - (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, - DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); - - /* - * Verify that the destroyed dataset is no longer in the namespace. - */ - VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, - FTAG, &os)); - - /* - * Verify that we can create a new dataset. - */ - error = ztest_dataset_create(name); - if (error) { - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - rw_exit(&ztest_name_lock); - return; - } - fatal(0, "dmu_objset_create(%s) = %d", name, error); - } - - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); - - ztest_zd_init(&zdtmp, NULL, os); - - /* - * Open the intent log for it. - */ - zilog = zil_open(os, ztest_get_data); - - /* - * Put some objects in there, do a little I/O to them, - * and randomly take a couple of snapshots along the way. - */ - iters = ztest_random(5); - for (int i = 0; i < iters; i++) { - ztest_dmu_object_alloc_free(&zdtmp, id); - if (ztest_random(iters) == 0) - (void) ztest_snapshot_create(name, i); - } - - /* - * Verify that we cannot create an existing dataset. - */ - VERIFY3U(EEXIST, ==, - dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); - - /* - * Verify that we can hold an objset that is also owned. - */ - VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); - dmu_objset_rele(os2, FTAG); - - /* - * Verify that we cannot own an objset that is already owned. - */ - VERIFY3U(EBUSY, ==, - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); - - zil_close(zilog); - dmu_objset_disown(os, FTAG); - ztest_zd_fini(&zdtmp); - - rw_exit(&ztest_name_lock); -} - -/* - * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. - */ -void -ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) -{ - rw_enter(&ztest_name_lock, RW_READER); - (void) ztest_snapshot_destroy(zd->zd_name, id); - (void) ztest_snapshot_create(zd->zd_name, id); - rw_exit(&ztest_name_lock); -} - -/* - * Cleanup non-standard snapshots and clones. - */ -void -ztest_dsl_dataset_cleanup(char *osname, uint64_t id) -{ - char snap1name[ZFS_MAX_DATASET_NAME_LEN]; - char clone1name[ZFS_MAX_DATASET_NAME_LEN]; - char snap2name[ZFS_MAX_DATASET_NAME_LEN]; - char clone2name[ZFS_MAX_DATASET_NAME_LEN]; - char snap3name[ZFS_MAX_DATASET_NAME_LEN]; - int error; - - (void) snprintf(snap1name, sizeof (snap1name), - "%s@s1_%llu", osname, id); - (void) snprintf(clone1name, sizeof (clone1name), - "%s/c1_%llu", osname, id); - (void) snprintf(snap2name, sizeof (snap2name), - "%s@s2_%llu", clone1name, id); - (void) snprintf(clone2name, sizeof (clone2name), - "%s/c2_%llu", osname, id); - (void) snprintf(snap3name, sizeof (snap3name), - "%s@s3_%llu", clone1name, id); - - error = dsl_destroy_head(clone2name); - if (error && error != ENOENT) - fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); - error = dsl_destroy_snapshot(snap3name, B_FALSE); - if (error && error != ENOENT) - fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); - error = dsl_destroy_snapshot(snap2name, B_FALSE); - if (error && error != ENOENT) - fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); - error = dsl_destroy_head(clone1name); - if (error && error != ENOENT) - fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); - error = dsl_destroy_snapshot(snap1name, B_FALSE); - if (error && error != ENOENT) - fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); -} - -/* - * Verify dsl_dataset_promote handles EBUSY - */ -void -ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os; - char snap1name[ZFS_MAX_DATASET_NAME_LEN]; - char clone1name[ZFS_MAX_DATASET_NAME_LEN]; - char snap2name[ZFS_MAX_DATASET_NAME_LEN]; - char clone2name[ZFS_MAX_DATASET_NAME_LEN]; - char snap3name[ZFS_MAX_DATASET_NAME_LEN]; - char *osname = zd->zd_name; - int error; - - rw_enter(&ztest_name_lock, RW_READER); - - ztest_dsl_dataset_cleanup(osname, id); - - (void) snprintf(snap1name, sizeof (snap1name), - "%s@s1_%llu", osname, id); - (void) snprintf(clone1name, sizeof (clone1name), - "%s/c1_%llu", osname, id); - (void) snprintf(snap2name, sizeof (snap2name), - "%s@s2_%llu", clone1name, id); - (void) snprintf(clone2name, sizeof (clone2name), - "%s/c2_%llu", osname, id); - (void) snprintf(snap3name, sizeof (snap3name), - "%s@s3_%llu", clone1name, id); - - error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); - if (error && error != EEXIST) { - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - goto out; - } - fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); - } - - error = dmu_objset_clone(clone1name, snap1name); - if (error) { - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - goto out; - } - fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); - } - - error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); - if (error && error != EEXIST) { - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - goto out; - } - fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); - } - - error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); - if (error && error != EEXIST) { - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - goto out; - } - fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); - } - - error = dmu_objset_clone(clone2name, snap3name); - if (error) { - if (error == ENOSPC) { - ztest_record_enospc(FTAG); - goto out; - } - fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); - } - - error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); - if (error) - fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); - error = dsl_dataset_promote(clone2name, NULL); - if (error == ENOSPC) { - dmu_objset_disown(os, FTAG); - ztest_record_enospc(FTAG); - goto out; - } - if (error != EBUSY) - fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, - error); - dmu_objset_disown(os, FTAG); - -out: - ztest_dsl_dataset_cleanup(osname, id); - - rw_exit(&ztest_name_lock); -} - -/* - * Verify that dmu_object_{alloc,free} work as expected. - */ -void -ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) -{ - ztest_od_t od[4]; - int batchsize = sizeof (od) / sizeof (od[0]); - - for (int b = 0; b < batchsize; b++) { - ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, - 0, 0, 0); - } - - /* - * Destroy the previous batch of objects, create a new batch, - * and do some I/O on the new objects. - */ - if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) - return; - - while (ztest_random(4 * batchsize) != 0) - ztest_io(zd, od[ztest_random(batchsize)].od_object, - ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); -} - -/* - * Rewind the global allocator to verify object allocation backfilling. - */ -void -ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift; - uint64_t object; - - /* - * Rewind the global allocator randomly back to a lower object number - * to force backfilling and reclamation of recently freed dnodes. - */ - mutex_enter(&os->os_obj_lock); - object = ztest_random(os->os_obj_next_chunk); - os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk); - mutex_exit(&os->os_obj_lock); -} - -/* - * Verify that dmu_{read,write} work as expected. - */ -void -ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - ztest_od_t od[2]; - dmu_tx_t *tx; - int i, freeit, error; - uint64_t n, s, txg; - bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; - uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; - uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); - uint64_t regions = 997; - uint64_t stride = 123456789ULL; - uint64_t width = 40; - int free_percent = 5; - - /* - * This test uses two objects, packobj and bigobj, that are always - * updated together (i.e. in the same tx) so that their contents are - * in sync and can be compared. Their contents relate to each other - * in a simple way: packobj is a dense array of 'bufwad' structures, - * while bigobj is a sparse array of the same bufwads. Specifically, - * for any index n, there are three bufwads that should be identical: - * - * packobj, at offset n * sizeof (bufwad_t) - * bigobj, at the head of the nth chunk - * bigobj, at the tail of the nth chunk - * - * The chunk size is arbitrary. It doesn't have to be a power of two, - * and it doesn't have any relation to the object blocksize. - * The only requirement is that it can hold at least two bufwads. - * - * Normally, we write the bufwad to each of these locations. - * However, free_percent of the time we instead write zeroes to - * packobj and perform a dmu_free_range() on bigobj. By comparing - * bigobj to packobj, we can verify that the DMU is correctly - * tracking which parts of an object are allocated and free, - * and that the contents of the allocated blocks are correct. - */ - - /* - * Read the directory info. If it's the first time, set things up. - */ - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, - chunksize); - ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0, - chunksize); - - if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) - return; - - bigobj = od[0].od_object; - packobj = od[1].od_object; - chunksize = od[0].od_gen; - ASSERT(chunksize == od[1].od_gen); - - /* - * Prefetch a random chunk of the big object. - * Our aim here is to get some async reads in flight - * for blocks that we may free below; the DMU should - * handle this race correctly. - */ - n = ztest_random(regions) * stride + ztest_random(width); - s = 1 + ztest_random(2 * width - 1); - dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize, - ZIO_PRIORITY_SYNC_READ); - - /* - * Pick a random index and compute the offsets into packobj and bigobj. - */ - n = ztest_random(regions) * stride + ztest_random(width); - s = 1 + ztest_random(width - 1); - - packoff = n * sizeof (bufwad_t); - packsize = s * sizeof (bufwad_t); - - bigoff = n * chunksize; - bigsize = s * chunksize; - - packbuf = umem_alloc(packsize, UMEM_NOFAIL); - bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); - - /* - * free_percent of the time, free a range of bigobj rather than - * overwriting it. - */ - freeit = (ztest_random(100) < free_percent); - - /* - * Read the current contents of our objects. - */ - error = dmu_read(os, packobj, packoff, packsize, packbuf, - DMU_READ_PREFETCH); - ASSERT0(error); - error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, - DMU_READ_PREFETCH); - ASSERT0(error); - - /* - * Get a tx for the mods to both packobj and bigobj. - */ - tx = dmu_tx_create(os); - - dmu_tx_hold_write(tx, packobj, packoff, packsize); - - if (freeit) - dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); - else - dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); - - /* This accounts for setting the checksum/compression. */ - dmu_tx_hold_bonus(tx, bigobj); - - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) { - umem_free(packbuf, packsize); - umem_free(bigbuf, bigsize); - return; - } - - enum zio_checksum cksum; - do { - cksum = (enum zio_checksum) - ztest_random_dsl_prop(ZFS_PROP_CHECKSUM); - } while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS); - dmu_object_set_checksum(os, bigobj, cksum, tx); - - enum zio_compress comp; - do { - comp = (enum zio_compress) - ztest_random_dsl_prop(ZFS_PROP_COMPRESSION); - } while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS); - dmu_object_set_compress(os, bigobj, comp, tx); - - /* - * For each index from n to n + s, verify that the existing bufwad - * in packobj matches the bufwads at the head and tail of the - * corresponding chunk in bigobj. Then update all three bufwads - * with the new values we want to write out. - */ - for (i = 0; i < s; i++) { - /* LINTED */ - pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); - /* LINTED */ - bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); - /* LINTED */ - bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; - - ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); - ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); - - if (pack->bw_txg > txg) - fatal(0, "future leak: got %llx, open txg is %llx", - pack->bw_txg, txg); - - if (pack->bw_data != 0 && pack->bw_index != n + i) - fatal(0, "wrong index: got %llx, wanted %llx+%llx", - pack->bw_index, n, i); - - if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) - fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); - - if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) - fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); - - if (freeit) { - bzero(pack, sizeof (bufwad_t)); - } else { - pack->bw_index = n + i; - pack->bw_txg = txg; - pack->bw_data = 1 + ztest_random(-2ULL); - } - *bigH = *pack; - *bigT = *pack; - } - - /* - * We've verified all the old bufwads, and made new ones. - * Now write them out. - */ - dmu_write(os, packobj, packoff, packsize, packbuf, tx); - - if (freeit) { - if (ztest_opts.zo_verbose >= 7) { - (void) printf("freeing offset %llx size %llx" - " txg %llx\n", - (u_longlong_t)bigoff, - (u_longlong_t)bigsize, - (u_longlong_t)txg); - } - VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); - } else { - if (ztest_opts.zo_verbose >= 7) { - (void) printf("writing offset %llx size %llx" - " txg %llx\n", - (u_longlong_t)bigoff, - (u_longlong_t)bigsize, - (u_longlong_t)txg); - } - dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); - } - - dmu_tx_commit(tx); - - /* - * Sanity check the stuff we just wrote. - */ - { - void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); - void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); - - VERIFY(0 == dmu_read(os, packobj, packoff, - packsize, packcheck, DMU_READ_PREFETCH)); - VERIFY(0 == dmu_read(os, bigobj, bigoff, - bigsize, bigcheck, DMU_READ_PREFETCH)); - - ASSERT(bcmp(packbuf, packcheck, packsize) == 0); - ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); - - umem_free(packcheck, packsize); - umem_free(bigcheck, bigsize); - } - - umem_free(packbuf, packsize); - umem_free(bigbuf, bigsize); -} - -void -compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, - uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) -{ - uint64_t i; - bufwad_t *pack; - bufwad_t *bigH; - bufwad_t *bigT; - - /* - * For each index from n to n + s, verify that the existing bufwad - * in packobj matches the bufwads at the head and tail of the - * corresponding chunk in bigobj. Then update all three bufwads - * with the new values we want to write out. - */ - for (i = 0; i < s; i++) { - /* LINTED */ - pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); - /* LINTED */ - bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); - /* LINTED */ - bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; - - ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); - ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); - - if (pack->bw_txg > txg) - fatal(0, "future leak: got %llx, open txg is %llx", - pack->bw_txg, txg); - - if (pack->bw_data != 0 && pack->bw_index != n + i) - fatal(0, "wrong index: got %llx, wanted %llx+%llx", - pack->bw_index, n, i); - - if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) - fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); - - if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) - fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); - - pack->bw_index = n + i; - pack->bw_txg = txg; - pack->bw_data = 1 + ztest_random(-2ULL); - - *bigH = *pack; - *bigT = *pack; - } -} - -void -ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - ztest_od_t od[2]; - dmu_tx_t *tx; - uint64_t i; - int error; - uint64_t n, s, txg; - bufwad_t *packbuf, *bigbuf; - uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; - uint64_t blocksize = ztest_random_blocksize(); - uint64_t chunksize = blocksize; - uint64_t regions = 997; - uint64_t stride = 123456789ULL; - uint64_t width = 9; - dmu_buf_t *bonus_db; - arc_buf_t **bigbuf_arcbufs; - dmu_object_info_t doi; - - /* - * This test uses two objects, packobj and bigobj, that are always - * updated together (i.e. in the same tx) so that their contents are - * in sync and can be compared. Their contents relate to each other - * in a simple way: packobj is a dense array of 'bufwad' structures, - * while bigobj is a sparse array of the same bufwads. Specifically, - * for any index n, there are three bufwads that should be identical: - * - * packobj, at offset n * sizeof (bufwad_t) - * bigobj, at the head of the nth chunk - * bigobj, at the tail of the nth chunk - * - * The chunk size is set equal to bigobj block size so that - * dmu_assign_arcbuf() can be tested for object updates. - */ - - /* - * Read the directory info. If it's the first time, set things up. - */ - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, - 0, 0); - ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0, - chunksize); - - if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) - return; - - bigobj = od[0].od_object; - packobj = od[1].od_object; - blocksize = od[0].od_blocksize; - chunksize = blocksize; - ASSERT(chunksize == od[1].od_gen); - - VERIFY(dmu_object_info(os, bigobj, &doi) == 0); - VERIFY(ISP2(doi.doi_data_block_size)); - VERIFY(chunksize == doi.doi_data_block_size); - VERIFY(chunksize >= 2 * sizeof (bufwad_t)); - - /* - * Pick a random index and compute the offsets into packobj and bigobj. - */ - n = ztest_random(regions) * stride + ztest_random(width); - s = 1 + ztest_random(width - 1); - - packoff = n * sizeof (bufwad_t); - packsize = s * sizeof (bufwad_t); - - bigoff = n * chunksize; - bigsize = s * chunksize; - - packbuf = umem_zalloc(packsize, UMEM_NOFAIL); - bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); - - VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); - - bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); - - /* - * Iteration 0 test zcopy for DB_UNCACHED dbufs. - * Iteration 1 test zcopy to already referenced dbufs. - * Iteration 2 test zcopy to dirty dbuf in the same txg. - * Iteration 3 test zcopy to dbuf dirty in previous txg. - * Iteration 4 test zcopy when dbuf is no longer dirty. - * Iteration 5 test zcopy when it can't be done. - * Iteration 6 one more zcopy write. - */ - for (i = 0; i < 7; i++) { - uint64_t j; - uint64_t off; - - /* - * In iteration 5 (i == 5) use arcbufs - * that don't match bigobj blksz to test - * dmu_assign_arcbuf() when it can't directly - * assign an arcbuf to a dbuf. - */ - for (j = 0; j < s; j++) { - if (i != 5) { - bigbuf_arcbufs[j] = - dmu_request_arcbuf(bonus_db, chunksize); - } else { - bigbuf_arcbufs[2 * j] = - dmu_request_arcbuf(bonus_db, chunksize / 2); - bigbuf_arcbufs[2 * j + 1] = - dmu_request_arcbuf(bonus_db, chunksize / 2); - } - } - - /* - * Get a tx for the mods to both packobj and bigobj. - */ - tx = dmu_tx_create(os); - - dmu_tx_hold_write(tx, packobj, packoff, packsize); - dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); - - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) { - umem_free(packbuf, packsize); - umem_free(bigbuf, bigsize); - for (j = 0; j < s; j++) { - if (i != 5) { - dmu_return_arcbuf(bigbuf_arcbufs[j]); - } else { - dmu_return_arcbuf( - bigbuf_arcbufs[2 * j]); - dmu_return_arcbuf( - bigbuf_arcbufs[2 * j + 1]); - } - } - umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); - dmu_buf_rele(bonus_db, FTAG); - return; - } - - /* - * 50% of the time don't read objects in the 1st iteration to - * test dmu_assign_arcbuf() for the case when there're no - * existing dbufs for the specified offsets. - */ - if (i != 0 || ztest_random(2) != 0) { - error = dmu_read(os, packobj, packoff, - packsize, packbuf, DMU_READ_PREFETCH); - ASSERT0(error); - error = dmu_read(os, bigobj, bigoff, bigsize, - bigbuf, DMU_READ_PREFETCH); - ASSERT0(error); - } - compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, - n, chunksize, txg); - - /* - * We've verified all the old bufwads, and made new ones. - * Now write them out. - */ - dmu_write(os, packobj, packoff, packsize, packbuf, tx); - if (ztest_opts.zo_verbose >= 7) { - (void) printf("writing offset %llx size %llx" - " txg %llx\n", - (u_longlong_t)bigoff, - (u_longlong_t)bigsize, - (u_longlong_t)txg); - } - for (off = bigoff, j = 0; j < s; j++, off += chunksize) { - dmu_buf_t *dbt; - if (i != 5) { - bcopy((caddr_t)bigbuf + (off - bigoff), - bigbuf_arcbufs[j]->b_data, chunksize); - } else { - bcopy((caddr_t)bigbuf + (off - bigoff), - bigbuf_arcbufs[2 * j]->b_data, - chunksize / 2); - bcopy((caddr_t)bigbuf + (off - bigoff) + - chunksize / 2, - bigbuf_arcbufs[2 * j + 1]->b_data, - chunksize / 2); - } - - if (i == 1) { - VERIFY(dmu_buf_hold(os, bigobj, off, - FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); - } - if (i != 5) { - dmu_assign_arcbuf(bonus_db, off, - bigbuf_arcbufs[j], tx); - } else { - dmu_assign_arcbuf(bonus_db, off, - bigbuf_arcbufs[2 * j], tx); - dmu_assign_arcbuf(bonus_db, - off + chunksize / 2, - bigbuf_arcbufs[2 * j + 1], tx); - } - if (i == 1) { - dmu_buf_rele(dbt, FTAG); - } - } - dmu_tx_commit(tx); - - /* - * Sanity check the stuff we just wrote. - */ - { - void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); - void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); - - VERIFY(0 == dmu_read(os, packobj, packoff, - packsize, packcheck, DMU_READ_PREFETCH)); - VERIFY(0 == dmu_read(os, bigobj, bigoff, - bigsize, bigcheck, DMU_READ_PREFETCH)); - - ASSERT(bcmp(packbuf, packcheck, packsize) == 0); - ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); - - umem_free(packcheck, packsize); - umem_free(bigcheck, bigsize); - } - if (i == 2) { - txg_wait_open(dmu_objset_pool(os), 0); - } else if (i == 3) { - txg_wait_synced(dmu_objset_pool(os), 0); - } - } - - dmu_buf_rele(bonus_db, FTAG); - umem_free(packbuf, packsize); - umem_free(bigbuf, bigsize); - umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); -} - -/* ARGSUSED */ -void -ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) -{ - ztest_od_t od[1]; - uint64_t offset = (1ULL << (ztest_random(20) + 43)) + - (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); - - /* - * Have multiple threads write to large offsets in an object - * to verify that parallel writes to an object -- even to the - * same blocks within the object -- doesn't cause any trouble. - */ - ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, - 0, 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) - return; - - while (ztest_random(10) != 0) - ztest_io(zd, od[0].od_object, offset); -} - -void -ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) -{ - ztest_od_t od[1]; - uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + - (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); - uint64_t count = ztest_random(20) + 1; - uint64_t blocksize = ztest_random_blocksize(); - void *data; - - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, - 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) - return; - - if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) - return; - - ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); - - data = umem_zalloc(blocksize, UMEM_NOFAIL); - - while (ztest_random(count) != 0) { - uint64_t randoff = offset + (ztest_random(count) * blocksize); - if (ztest_write(zd, od[0].od_object, randoff, blocksize, - data) != 0) - break; - while (ztest_random(4) != 0) - ztest_io(zd, od[0].od_object, randoff); - } - - umem_free(data, blocksize); -} - -/* - * Verify that zap_{create,destroy,add,remove,update} work as expected. - */ -#define ZTEST_ZAP_MIN_INTS 1 -#define ZTEST_ZAP_MAX_INTS 4 -#define ZTEST_ZAP_MAX_PROPS 1000 - -void -ztest_zap(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - ztest_od_t od[1]; - uint64_t object; - uint64_t txg, last_txg; - uint64_t value[ZTEST_ZAP_MAX_INTS]; - uint64_t zl_ints, zl_intsize, prop; - int i, ints; - dmu_tx_t *tx; - char propname[100], txgname[100]; - int error; - char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; - - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) - return; - - object = od[0].od_object; - - /* - * Generate a known hash collision, and verify that - * we can lookup and remove both entries. - */ - tx = dmu_tx_create(os); - dmu_tx_hold_zap(tx, object, B_TRUE, NULL); - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) - return; - for (i = 0; i < 2; i++) { - value[i] = i; - VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), - 1, &value[i], tx)); - } - for (i = 0; i < 2; i++) { - VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], - sizeof (uint64_t), 1, &value[i], tx)); - VERIFY3U(0, ==, - zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); - ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); - ASSERT3U(zl_ints, ==, 1); - } - for (i = 0; i < 2; i++) { - VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); - } - dmu_tx_commit(tx); - - /* - * Generate a buch of random entries. - */ - ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); - - prop = ztest_random(ZTEST_ZAP_MAX_PROPS); - (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); - (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); - bzero(value, sizeof (value)); - last_txg = 0; - - /* - * If these zap entries already exist, validate their contents. - */ - error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); - if (error == 0) { - ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); - ASSERT3U(zl_ints, ==, 1); - - VERIFY(zap_lookup(os, object, txgname, zl_intsize, - zl_ints, &last_txg) == 0); - - VERIFY(zap_length(os, object, propname, &zl_intsize, - &zl_ints) == 0); - - ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); - ASSERT3U(zl_ints, ==, ints); - - VERIFY(zap_lookup(os, object, propname, zl_intsize, - zl_ints, value) == 0); - - for (i = 0; i < ints; i++) { - ASSERT3U(value[i], ==, last_txg + object + i); - } - } else { - ASSERT3U(error, ==, ENOENT); - } - - /* - * Atomically update two entries in our zap object. - * The first is named txg_%llu, and contains the txg - * in which the property was last updated. The second - * is named prop_%llu, and the nth element of its value - * should be txg + object + n. - */ - tx = dmu_tx_create(os); - dmu_tx_hold_zap(tx, object, B_TRUE, NULL); - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) - return; - - if (last_txg > txg) - fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); - - for (i = 0; i < ints; i++) - value[i] = txg + object + i; - - VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), - 1, &txg, tx)); - VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), - ints, value, tx)); - - dmu_tx_commit(tx); - - /* - * Remove a random pair of entries. - */ - prop = ztest_random(ZTEST_ZAP_MAX_PROPS); - (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); - (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); - - error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); - - if (error == ENOENT) - return; - - ASSERT0(error); - - tx = dmu_tx_create(os); - dmu_tx_hold_zap(tx, object, B_TRUE, NULL); - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) - return; - VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); - VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); - dmu_tx_commit(tx); -} - -/* - * Testcase to test the upgrading of a microzap to fatzap. - */ -void -ztest_fzap(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - ztest_od_t od[1]; - uint64_t object, txg; - - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) - return; - - object = od[0].od_object; - - /* - * Add entries to this ZAP and make sure it spills over - * and gets upgraded to a fatzap. Also, since we are adding - * 2050 entries we should see ptrtbl growth and leaf-block split. - */ - for (int i = 0; i < 2050; i++) { - char name[ZFS_MAX_DATASET_NAME_LEN]; - uint64_t value = i; - dmu_tx_t *tx; - int error; - - (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", - id, value); - - tx = dmu_tx_create(os); - dmu_tx_hold_zap(tx, object, B_TRUE, name); - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) - return; - error = zap_add(os, object, name, sizeof (uint64_t), 1, - &value, tx); - ASSERT(error == 0 || error == EEXIST); - dmu_tx_commit(tx); - } -} - -/* ARGSUSED */ -void -ztest_zap_parallel(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - ztest_od_t od[1]; - uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; - dmu_tx_t *tx; - int i, namelen, error; - int micro = ztest_random(2); - char name[20], string_value[20]; - void *data; - - ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, - 0, 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) - return; - - object = od[0].od_object; - - /* - * Generate a random name of the form 'xxx.....' where each - * x is a random printable character and the dots are dots. - * There are 94 such characters, and the name length goes from - * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. - */ - namelen = ztest_random(sizeof (name) - 5) + 5 + 1; - - for (i = 0; i < 3; i++) - name[i] = '!' + ztest_random('~' - '!' + 1); - for (; i < namelen - 1; i++) - name[i] = '.'; - name[i] = '\0'; - - if ((namelen & 1) || micro) { - wsize = sizeof (txg); - wc = 1; - data = &txg; - } else { - wsize = 1; - wc = namelen; - data = string_value; - } - - count = -1ULL; - VERIFY0(zap_count(os, object, &count)); - ASSERT(count != -1ULL); - - /* - * Select an operation: length, lookup, add, update, remove. - */ - i = ztest_random(5); - - if (i >= 2) { - tx = dmu_tx_create(os); - dmu_tx_hold_zap(tx, object, B_TRUE, NULL); - txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); - if (txg == 0) - return; - bcopy(name, string_value, namelen); - } else { - tx = NULL; - txg = 0; - bzero(string_value, namelen); - } - - switch (i) { - - case 0: - error = zap_length(os, object, name, &zl_wsize, &zl_wc); - if (error == 0) { - ASSERT3U(wsize, ==, zl_wsize); - ASSERT3U(wc, ==, zl_wc); - } else { - ASSERT3U(error, ==, ENOENT); - } - break; - - case 1: - error = zap_lookup(os, object, name, wsize, wc, data); - if (error == 0) { - if (data == string_value && - bcmp(name, data, namelen) != 0) - fatal(0, "name '%s' != val '%s' len %d", - name, data, namelen); - } else { - ASSERT3U(error, ==, ENOENT); - } - break; - - case 2: - error = zap_add(os, object, name, wsize, wc, data, tx); - ASSERT(error == 0 || error == EEXIST); - break; - - case 3: - VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); - break; - - case 4: - error = zap_remove(os, object, name, tx); - ASSERT(error == 0 || error == ENOENT); - break; - } - - if (tx != NULL) - dmu_tx_commit(tx); -} - -/* - * Commit callback data. - */ -typedef struct ztest_cb_data { - list_node_t zcd_node; - uint64_t zcd_txg; - int zcd_expected_err; - boolean_t zcd_added; - boolean_t zcd_called; - spa_t *zcd_spa; -} ztest_cb_data_t; - -/* This is the actual commit callback function */ -static void -ztest_commit_callback(void *arg, int error) -{ - ztest_cb_data_t *data = arg; - uint64_t synced_txg; - - VERIFY(data != NULL); - VERIFY3S(data->zcd_expected_err, ==, error); - VERIFY(!data->zcd_called); - - synced_txg = spa_last_synced_txg(data->zcd_spa); - if (data->zcd_txg > synced_txg) - fatal(0, "commit callback of txg %" PRIu64 " called prematurely" - ", last synced txg = %" PRIu64 "\n", data->zcd_txg, - synced_txg); - - data->zcd_called = B_TRUE; - - if (error == ECANCELED) { - ASSERT0(data->zcd_txg); - ASSERT(!data->zcd_added); - - /* - * The private callback data should be destroyed here, but - * since we are going to check the zcd_called field after - * dmu_tx_abort(), we will destroy it there. - */ - return; - } - - /* Was this callback added to the global callback list? */ - if (!data->zcd_added) - goto out; - - ASSERT3U(data->zcd_txg, !=, 0); - - /* Remove our callback from the list */ - mutex_enter(&zcl.zcl_callbacks_lock); - list_remove(&zcl.zcl_callbacks, data); - mutex_exit(&zcl.zcl_callbacks_lock); - -out: - umem_free(data, sizeof (ztest_cb_data_t)); -} - -/* Allocate and initialize callback data structure */ -static ztest_cb_data_t * -ztest_create_cb_data(objset_t *os, uint64_t txg) -{ - ztest_cb_data_t *cb_data; - - cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); - - cb_data->zcd_txg = txg; - cb_data->zcd_spa = dmu_objset_spa(os); - - return (cb_data); -} - -/* - * If a number of txgs equal to this threshold have been created after a commit - * callback has been registered but not called, then we assume there is an - * implementation bug. - */ -#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) - -/* - * Commit callback test. - */ -void -ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - ztest_od_t od[1]; - dmu_tx_t *tx; - ztest_cb_data_t *cb_data[3], *tmp_cb; - uint64_t old_txg, txg; - int i, error; - - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) - return; - - tx = dmu_tx_create(os); - - cb_data[0] = ztest_create_cb_data(os, 0); - dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); - - dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); - - /* Every once in a while, abort the transaction on purpose */ - if (ztest_random(100) == 0) - error = -1; - - if (!error) - error = dmu_tx_assign(tx, TXG_NOWAIT); - - txg = error ? 0 : dmu_tx_get_txg(tx); - - cb_data[0]->zcd_txg = txg; - cb_data[1] = ztest_create_cb_data(os, txg); - dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); - - if (error) { - /* - * It's not a strict requirement to call the registered - * callbacks from inside dmu_tx_abort(), but that's what - * it's supposed to happen in the current implementation - * so we will check for that. - */ - for (i = 0; i < 2; i++) { - cb_data[i]->zcd_expected_err = ECANCELED; - VERIFY(!cb_data[i]->zcd_called); - } - - dmu_tx_abort(tx); - - for (i = 0; i < 2; i++) { - VERIFY(cb_data[i]->zcd_called); - umem_free(cb_data[i], sizeof (ztest_cb_data_t)); - } - - return; - } - - cb_data[2] = ztest_create_cb_data(os, txg); - dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); - - /* - * Read existing data to make sure there isn't a future leak. - */ - VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), - &old_txg, DMU_READ_PREFETCH)); - - if (old_txg > txg) - fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, - old_txg, txg); - - dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); - - mutex_enter(&zcl.zcl_callbacks_lock); - - /* - * Since commit callbacks don't have any ordering requirement and since - * it is theoretically possible for a commit callback to be called - * after an arbitrary amount of time has elapsed since its txg has been - * synced, it is difficult to reliably determine whether a commit - * callback hasn't been called due to high load or due to a flawed - * implementation. - * - * In practice, we will assume that if after a certain number of txgs a - * commit callback hasn't been called, then most likely there's an - * implementation bug.. - */ - tmp_cb = list_head(&zcl.zcl_callbacks); - if (tmp_cb != NULL && - (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) { - fatal(0, "Commit callback threshold exceeded, oldest txg: %" - PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); - } - - /* - * Let's find the place to insert our callbacks. - * - * Even though the list is ordered by txg, it is possible for the - * insertion point to not be the end because our txg may already be - * quiescing at this point and other callbacks in the open txg - * (from other objsets) may have sneaked in. - */ - tmp_cb = list_tail(&zcl.zcl_callbacks); - while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) - tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); - - /* Add the 3 callbacks to the list */ - for (i = 0; i < 3; i++) { - if (tmp_cb == NULL) - list_insert_head(&zcl.zcl_callbacks, cb_data[i]); - else - list_insert_after(&zcl.zcl_callbacks, tmp_cb, - cb_data[i]); - - cb_data[i]->zcd_added = B_TRUE; - VERIFY(!cb_data[i]->zcd_called); - - tmp_cb = cb_data[i]; - } - - mutex_exit(&zcl.zcl_callbacks_lock); - - dmu_tx_commit(tx); -} - -/* - * Visit each object in the dataset. Verify that its properties - * are consistent what was stored in the block tag when it was created, - * and that its unused bonus buffer space has not been overwritten. - */ -void -ztest_verify_dnode_bt(ztest_ds_t *zd, uint64_t id) -{ - objset_t *os = zd->zd_os; - uint64_t obj; - int err = 0; - - for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { - ztest_block_tag_t *bt = NULL; - dmu_object_info_t doi; - dmu_buf_t *db; - - if (dmu_bonus_hold(os, obj, FTAG, &db) != 0) - continue; - - dmu_object_info_from_db(db, &doi); - if (doi.doi_bonus_size >= sizeof (*bt)) - bt = ztest_bt_bonus(db); - - if (bt && bt->bt_magic == BT_MAGIC) { - ztest_bt_verify(bt, os, obj, doi.doi_dnodesize, - bt->bt_offset, bt->bt_gen, bt->bt_txg, - bt->bt_crtxg); - ztest_verify_unused_bonus(db, bt, obj, os, bt->bt_gen); - } - - dmu_buf_rele(db, FTAG); - } -} - -/* ARGSUSED */ -void -ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) -{ - zfs_prop_t proplist[] = { - ZFS_PROP_CHECKSUM, - ZFS_PROP_COMPRESSION, - ZFS_PROP_COPIES, - ZFS_PROP_DEDUP - }; - - rw_enter(&ztest_name_lock, RW_READER); - - for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) - (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], - ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); - - rw_exit(&ztest_name_lock); -} - -/* ARGSUSED */ -void -ztest_remap_blocks(ztest_ds_t *zd, uint64_t id) -{ - rw_enter(&ztest_name_lock, RW_READER); - - int error = dmu_objset_remap_indirects(zd->zd_name); - if (error == ENOSPC) - error = 0; - ASSERT0(error); - - rw_exit(&ztest_name_lock); -} - -/* ARGSUSED */ -void -ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) -{ - nvlist_t *props = NULL; - - rw_enter(&ztest_name_lock, RW_READER); - - (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, - ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); - - VERIFY0(spa_prop_get(ztest_spa, &props)); - - if (ztest_opts.zo_verbose >= 6) - dump_nvlist(props, 4); - - nvlist_free(props); - - rw_exit(&ztest_name_lock); -} - -static int -user_release_one(const char *snapname, const char *holdname) -{ - nvlist_t *snaps, *holds; - int error; - - snaps = fnvlist_alloc(); - holds = fnvlist_alloc(); - fnvlist_add_boolean(holds, holdname); - fnvlist_add_nvlist(snaps, snapname, holds); - fnvlist_free(holds); - error = dsl_dataset_user_release(snaps, NULL); - fnvlist_free(snaps); - return (error); -} - -/* - * Test snapshot hold/release and deferred destroy. - */ -void -ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) -{ - int error; - objset_t *os = zd->zd_os; - objset_t *origin; - char snapname[100]; - char fullname[100]; - char clonename[100]; - char tag[100]; - char osname[ZFS_MAX_DATASET_NAME_LEN]; - nvlist_t *holds; - - rw_enter(&ztest_name_lock, RW_READER); - - dmu_objset_name(os, osname); - - (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); - (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); - (void) snprintf(clonename, sizeof (clonename), - "%s/ch1_%llu", osname, id); - (void) snprintf(tag, sizeof (tag), "tag_%llu", id); - - /* - * Clean up from any previous run. - */ - error = dsl_destroy_head(clonename); - if (error != ENOENT) - ASSERT0(error); - error = user_release_one(fullname, tag); - if (error != ESRCH && error != ENOENT) - ASSERT0(error); - error = dsl_destroy_snapshot(fullname, B_FALSE); - if (error != ENOENT) - ASSERT0(error); - - /* - * Create snapshot, clone it, mark snap for deferred destroy, - * destroy clone, verify snap was also destroyed. - */ - error = dmu_objset_snapshot_one(osname, snapname); - if (error) { - if (error == ENOSPC) { - ztest_record_enospc("dmu_objset_snapshot"); - goto out; - } - fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); - } - - error = dmu_objset_clone(clonename, fullname); - if (error) { - if (error == ENOSPC) { - ztest_record_enospc("dmu_objset_clone"); - goto out; - } - fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); - } - - error = dsl_destroy_snapshot(fullname, B_TRUE); - if (error) { - fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", - fullname, error); - } - - error = dsl_destroy_head(clonename); - if (error) - fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); - - error = dmu_objset_hold(fullname, FTAG, &origin); - if (error != ENOENT) - fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); - - /* - * Create snapshot, add temporary hold, verify that we can't - * destroy a held snapshot, mark for deferred destroy, - * release hold, verify snapshot was destroyed. - */ - error = dmu_objset_snapshot_one(osname, snapname); - if (error) { - if (error == ENOSPC) { - ztest_record_enospc("dmu_objset_snapshot"); - goto out; - } - fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); - } - - holds = fnvlist_alloc(); - fnvlist_add_string(holds, fullname, tag); - error = dsl_dataset_user_hold(holds, 0, NULL); - fnvlist_free(holds); - - if (error == ENOSPC) { - ztest_record_enospc("dsl_dataset_user_hold"); - goto out; - } else if (error) { - fatal(0, "dsl_dataset_user_hold(%s, %s) = %u", - fullname, tag, error); - } - - error = dsl_destroy_snapshot(fullname, B_FALSE); - if (error != EBUSY) { - fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", - fullname, error); - } - - error = dsl_destroy_snapshot(fullname, B_TRUE); - if (error) { - fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", - fullname, error); - } - - error = user_release_one(fullname, tag); - if (error) - fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); - - VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); - -out: - rw_exit(&ztest_name_lock); -} - -/* - * Inject random faults into the on-disk data. - */ -/* ARGSUSED */ -void -ztest_fault_inject(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - int fd; - uint64_t offset; - uint64_t leaves; - uint64_t bad = 0x1990c0ffeedecadeULL; - uint64_t top, leaf; - char path0[MAXPATHLEN]; - char pathrand[MAXPATHLEN]; - size_t fsize; - int bshift = SPA_MAXBLOCKSHIFT + 2; - int iters = 1000; - int maxfaults; - int mirror_save; - vdev_t *vd0 = NULL; - uint64_t guid0 = 0; - boolean_t islog = B_FALSE; - - mutex_enter(&ztest_vdev_lock); - - /* - * Device removal is in progress, fault injection must be disabled - * until it completes and the pool is scrubbed. The fault injection - * strategy for damaging blocks does not take in to account evacuated - * blocks which may have already been damaged. - */ - if (ztest_device_removal_active) { - mutex_exit(&ztest_vdev_lock); - return; - } - - maxfaults = MAXFAULTS(); - leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; - mirror_save = zs->zs_mirrors; - mutex_exit(&ztest_vdev_lock); - - ASSERT(leaves >= 1); - - /* - * Grab the name lock as reader. There are some operations - * which don't like to have their vdevs changed while - * they are in progress (i.e. spa_change_guid). Those - * operations will have grabbed the name lock as writer. - */ - rw_enter(&ztest_name_lock, RW_READER); - - /* - * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. - */ - spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); - - if (ztest_random(2) == 0) { - /* - * Inject errors on a normal data device or slog device. - */ - top = ztest_random_vdev_top(spa, B_TRUE); - leaf = ztest_random(leaves) + zs->zs_splits; - - /* - * Generate paths to the first leaf in this top-level vdev, - * and to the random leaf we selected. We'll induce transient - * write failures and random online/offline activity on leaf 0, - * and we'll write random garbage to the randomly chosen leaf. - */ - (void) snprintf(path0, sizeof (path0), ztest_dev_template, - ztest_opts.zo_dir, ztest_opts.zo_pool, - top * leaves + zs->zs_splits); - (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, - ztest_opts.zo_dir, ztest_opts.zo_pool, - top * leaves + leaf); - - vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); - if (vd0 != NULL && vd0->vdev_top->vdev_islog) - islog = B_TRUE; - - /* - * If the top-level vdev needs to be resilvered - * then we only allow faults on the device that is - * resilvering. - */ - if (vd0 != NULL && maxfaults != 1 && - (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || - vd0->vdev_resilver_txg != 0)) { - /* - * Make vd0 explicitly claim to be unreadable, - * or unwriteable, or reach behind its back - * and close the underlying fd. We can do this if - * maxfaults == 0 because we'll fail and reexecute, - * and we can do it if maxfaults >= 2 because we'll - * have enough redundancy. If maxfaults == 1, the - * combination of this with injection of random data - * corruption below exceeds the pool's fault tolerance. - */ - vdev_file_t *vf = vd0->vdev_tsd; - - zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d", - (long long)vd0->vdev_id, (int)maxfaults); - - if (vf != NULL && ztest_random(3) == 0) { - (void) close(vf->vf_vnode->v_fd); - vf->vf_vnode->v_fd = -1; - } else if (ztest_random(2) == 0) { - vd0->vdev_cant_read = B_TRUE; - } else { - vd0->vdev_cant_write = B_TRUE; - } - guid0 = vd0->vdev_guid; - } - } else { - /* - * Inject errors on an l2cache device. - */ - spa_aux_vdev_t *sav = &spa->spa_l2cache; - - if (sav->sav_count == 0) { - spa_config_exit(spa, SCL_STATE, FTAG); - rw_exit(&ztest_name_lock); - return; - } - vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; - guid0 = vd0->vdev_guid; - (void) strcpy(path0, vd0->vdev_path); - (void) strcpy(pathrand, vd0->vdev_path); - - leaf = 0; - leaves = 1; - maxfaults = INT_MAX; /* no limit on cache devices */ - } - - spa_config_exit(spa, SCL_STATE, FTAG); - rw_exit(&ztest_name_lock); - - /* - * If we can tolerate two or more faults, or we're dealing - * with a slog, randomly online/offline vd0. - */ - if ((maxfaults >= 2 || islog) && guid0 != 0) { - if (ztest_random(10) < 6) { - int flags = (ztest_random(2) == 0 ? - ZFS_OFFLINE_TEMPORARY : 0); - - /* - * We have to grab the zs_name_lock as writer to - * prevent a race between offlining a slog and - * destroying a dataset. Offlining the slog will - * grab a reference on the dataset which may cause - * dmu_objset_destroy() to fail with EBUSY thus - * leaving the dataset in an inconsistent state. - */ - if (islog) - rw_enter(&ztest_name_lock, RW_WRITER); - - VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); - - if (islog) - rw_exit(&ztest_name_lock); - } else { - /* - * Ideally we would like to be able to randomly - * call vdev_[on|off]line without holding locks - * to force unpredictable failures but the side - * effects of vdev_[on|off]line prevent us from - * doing so. We grab the ztest_vdev_lock here to - * prevent a race between injection testing and - * aux_vdev removal. - */ - mutex_enter(&ztest_vdev_lock); - (void) vdev_online(spa, guid0, 0, NULL); - mutex_exit(&ztest_vdev_lock); - } - } - - if (maxfaults == 0) - return; - - /* - * We have at least single-fault tolerance, so inject data corruption. - */ - fd = open(pathrand, O_RDWR); - - if (fd == -1) /* we hit a gap in the device namespace */ - return; - - fsize = lseek(fd, 0, SEEK_END); - - while (--iters != 0) { - /* - * The offset must be chosen carefully to ensure that - * we do not inject a given logical block with errors - * on two different leaf devices, because ZFS can not - * tolerate that (if maxfaults==1). - * - * We divide each leaf into chunks of size - * (# leaves * SPA_MAXBLOCKSIZE * 4). Within each chunk - * there is a series of ranges to which we can inject errors. - * Each range can accept errors on only a single leaf vdev. - * The error injection ranges are separated by ranges - * which we will not inject errors on any device (DMZs). - * Each DMZ must be large enough such that a single block - * can not straddle it, so that a single block can not be - * a target in two different injection ranges (on different - * leaf vdevs). - * - * For example, with 3 leaves, each chunk looks like: - * 0 to 32M: injection range for leaf 0 - * 32M to 64M: DMZ - no injection allowed - * 64M to 96M: injection range for leaf 1 - * 96M to 128M: DMZ - no injection allowed - * 128M to 160M: injection range for leaf 2 - * 160M to 192M: DMZ - no injection allowed - */ - offset = ztest_random(fsize / (leaves << bshift)) * - (leaves << bshift) + (leaf << bshift) + - (ztest_random(1ULL << (bshift - 1)) & -8ULL); - - /* - * Only allow damage to the labels at one end of the vdev. - * - * If all labels are damaged, the device will be totally - * inaccessible, which will result in loss of data, - * because we also damage (parts of) the other side of - * the mirror/raidz. - * - * Additionally, we will always have both an even and an - * odd label, so that we can handle crashes in the - * middle of vdev_config_sync(). - */ - if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE) - continue; - - /* - * The two end labels are stored at the "end" of the disk, but - * the end of the disk (vdev_psize) is aligned to - * sizeof (vdev_label_t). - */ - uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t)); - if ((leaf & 1) == 1 && - offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE) - continue; - - mutex_enter(&ztest_vdev_lock); - if (mirror_save != zs->zs_mirrors) { - mutex_exit(&ztest_vdev_lock); - (void) close(fd); - return; - } - - if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) - fatal(1, "can't inject bad word at 0x%llx in %s", - offset, pathrand); - - mutex_exit(&ztest_vdev_lock); - - if (ztest_opts.zo_verbose >= 7) - (void) printf("injected bad word into %s," - " offset 0x%llx\n", pathrand, (u_longlong_t)offset); - } - - (void) close(fd); -} - -/* - * Verify that DDT repair works as expected. - */ -void -ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) -{ - ztest_shared_t *zs = ztest_shared; - spa_t *spa = ztest_spa; - objset_t *os = zd->zd_os; - ztest_od_t od[1]; - uint64_t object, blocksize, txg, pattern, psize; - enum zio_checksum checksum = spa_dedup_checksum(spa); - dmu_buf_t *db; - dmu_tx_t *tx; - abd_t *abd; - blkptr_t blk; - int copies = 2 * ZIO_DEDUPDITTO_MIN; - - blocksize = ztest_random_blocksize(); - blocksize = MIN(blocksize, 2048); /* because we write so many */ - - ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, - 0, 0); - - if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) - return; - - /* - * Take the name lock as writer to prevent anyone else from changing - * the pool and dataset properies we need to maintain during this test. - */ - rw_enter(&ztest_name_lock, RW_WRITER); - - if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, - B_FALSE) != 0 || - ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, - B_FALSE) != 0) { - rw_exit(&ztest_name_lock); - return; - } - - dmu_objset_stats_t dds; - dsl_pool_config_enter(dmu_objset_pool(os), FTAG); - dmu_objset_fast_stat(os, &dds); - dsl_pool_config_exit(dmu_objset_pool(os), FTAG); - - object = od[0].od_object; - blocksize = od[0].od_blocksize; - pattern = zs->zs_guid ^ dds.dds_guid; - - ASSERT(object != 0); - - tx = dmu_tx_create(os); - dmu_tx_hold_write(tx, object, 0, copies * blocksize); - txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); - if (txg == 0) { - rw_exit(&ztest_name_lock); - return; - } - - /* - * Write all the copies of our block. - */ - for (int i = 0; i < copies; i++) { - uint64_t offset = i * blocksize; - int error = dmu_buf_hold(os, object, offset, FTAG, &db, - DMU_READ_NO_PREFETCH); - if (error != 0) { - fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", - os, (long long)object, (long long) offset, error); - } - ASSERT(db->db_offset == offset); - ASSERT(db->db_size == blocksize); - ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || - ztest_pattern_match(db->db_data, db->db_size, 0ULL)); - dmu_buf_will_fill(db, tx); - ztest_pattern_set(db->db_data, db->db_size, pattern); - dmu_buf_rele(db, FTAG); - } - - dmu_tx_commit(tx); - txg_wait_synced(spa_get_dsl(spa), txg); - - /* - * Find out what block we got. - */ - VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, - DMU_READ_NO_PREFETCH)); - blk = *((dmu_buf_impl_t *)db)->db_blkptr; - dmu_buf_rele(db, FTAG); - - /* - * Damage the block. Dedup-ditto will save us when we read it later. - */ - psize = BP_GET_PSIZE(&blk); - abd = abd_alloc_linear(psize, B_TRUE); - ztest_pattern_set(abd_to_buf(abd), psize, ~pattern); - - (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, - abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, - ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); - - abd_free(abd); - - rw_exit(&ztest_name_lock); -} - -/* - * Scrub the pool. - */ -/* ARGSUSED */ -void -ztest_scrub(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa = ztest_spa; - - /* - * Scrub in progress by device removal. - */ - if (ztest_device_removal_active) - return; - - (void) spa_scan(spa, POOL_SCAN_SCRUB); - (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ - (void) spa_scan(spa, POOL_SCAN_SCRUB); -} - -/* - * Change the guid for the pool. - */ -/* ARGSUSED */ -void -ztest_reguid(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa = ztest_spa; - uint64_t orig, load; - int error; - - if (ztest_opts.zo_mmp_test) - return; - - orig = spa_guid(spa); - load = spa_load_guid(spa); - - rw_enter(&ztest_name_lock, RW_WRITER); - error = spa_change_guid(spa); - rw_exit(&ztest_name_lock); - - if (error != 0) - return; - - if (ztest_opts.zo_verbose >= 4) { - (void) printf("Changed guid old %llu -> %llu\n", - (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); - } - - VERIFY3U(orig, !=, spa_guid(spa)); - VERIFY3U(load, ==, spa_load_guid(spa)); -} - -static vdev_t * -ztest_random_concrete_vdev_leaf(vdev_t *vd) -{ - if (vd == NULL) - return (NULL); - - if (vd->vdev_children == 0) - return (vd); - - vdev_t *eligible[vd->vdev_children]; - int eligible_idx = 0, i; - for (i = 0; i < vd->vdev_children; i++) { - vdev_t *cvd = vd->vdev_child[i]; - if (cvd->vdev_top->vdev_removing) - continue; - if (cvd->vdev_children > 0 || - (vdev_is_concrete(cvd) && !cvd->vdev_detached)) { - eligible[eligible_idx++] = cvd; - } - } - VERIFY(eligible_idx > 0); - - uint64_t child_no = ztest_random(eligible_idx); - return (ztest_random_concrete_vdev_leaf(eligible[child_no])); -} - -/* ARGSUSED */ -void -ztest_initialize(ztest_ds_t *zd, uint64_t id) -{ - spa_t *spa = ztest_spa; - int error = 0; - - mutex_enter(&ztest_vdev_lock); - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - - /* Random leaf vdev */ - vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev); - if (rand_vd == NULL) { - spa_config_exit(spa, SCL_VDEV, FTAG); - mutex_exit(&ztest_vdev_lock); - return; - } - - /* - * The random vdev we've selected may change as soon as we - * drop the spa_config_lock. We create local copies of things - * we're interested in. - */ - uint64_t guid = rand_vd->vdev_guid; - char *path = strdup(rand_vd->vdev_path); - boolean_t active = rand_vd->vdev_initialize_thread != NULL; - - zfs_dbgmsg("vd %p, guid %llu", rand_vd, guid); - spa_config_exit(spa, SCL_VDEV, FTAG); - - uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS); - error = spa_vdev_initialize(spa, guid, cmd); - switch (cmd) { - case POOL_INITIALIZE_CANCEL: - if (ztest_opts.zo_verbose >= 4) { - (void) printf("Cancel initialize %s", path); - if (!active) - (void) printf(" failed (no initialize active)"); - (void) printf("\n"); - } - break; - case POOL_INITIALIZE_DO: - if (ztest_opts.zo_verbose >= 4) { - (void) printf("Start initialize %s", path); - if (active && error == 0) - (void) printf(" failed (already active)"); - else if (error != 0) - (void) printf(" failed (error %d)", error); - (void) printf("\n"); - } - break; - case POOL_INITIALIZE_SUSPEND: - if (ztest_opts.zo_verbose >= 4) { - (void) printf("Suspend initialize %s", path); - if (!active) - (void) printf(" failed (no initialize active)"); - (void) printf("\n"); - } - break; - } - free(path); - mutex_exit(&ztest_vdev_lock); -} - -/* - * Verify pool integrity by running zdb. - */ -static void -ztest_run_zdb(char *pool) -{ - int status; - char zdb[MAXPATHLEN + MAXNAMELEN + 20]; - char zbuf[1024]; - char *bin; - char *ztest; - char *isa; - int isalen; - FILE *fp; - - strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); - - /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ - bin = strstr(zdb, "/usr/bin/"); - ztest = strstr(bin, "/ztest"); - isa = bin + 8; - isalen = ztest - isa; - isa = strdup(isa); - /* LINTED */ - (void) sprintf(bin, - "/usr/sbin%.*s/zdb -bcc%s%s -G -d -U %s " - "-o zfs_reconstruct_indirect_combinations_max=65536 %s", - isalen, - isa, - ztest_opts.zo_verbose >= 3 ? "s" : "", - ztest_opts.zo_verbose >= 4 ? "v" : "", - spa_config_path, - pool); - free(isa); - - if (ztest_opts.zo_verbose >= 5) - (void) printf("Executing %s\n", strstr(zdb, "zdb ")); - - fp = popen(zdb, "r"); - assert(fp != NULL); - - while (fgets(zbuf, sizeof (zbuf), fp) != NULL) - if (ztest_opts.zo_verbose >= 3) - (void) printf("%s", zbuf); - - status = pclose(fp); - - if (status == 0) - return; - - ztest_dump_core = 0; - if (WIFEXITED(status)) - fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); - else - fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); -} - -static void -ztest_walk_pool_directory(char *header) -{ - spa_t *spa = NULL; - - if (ztest_opts.zo_verbose >= 6) - (void) printf("%s\n", header); - - mutex_enter(&spa_namespace_lock); - while ((spa = spa_next(spa)) != NULL) - if (ztest_opts.zo_verbose >= 6) - (void) printf("\t%s\n", spa_name(spa)); - mutex_exit(&spa_namespace_lock); -} - -static void -ztest_spa_import_export(char *oldname, char *newname) -{ - nvlist_t *config, *newconfig; - uint64_t pool_guid; - spa_t *spa; - int error; - - if (ztest_opts.zo_verbose >= 4) { - (void) printf("import/export: old = %s, new = %s\n", - oldname, newname); - } - - /* - * Clean up from previous runs. - */ - (void) spa_destroy(newname); - - /* - * Get the pool's configuration and guid. - */ - VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); - - /* - * Kick off a scrub to tickle scrub/export races. - */ - if (ztest_random(2) == 0) - (void) spa_scan(spa, POOL_SCAN_SCRUB); - - pool_guid = spa_guid(spa); - spa_close(spa, FTAG); - - ztest_walk_pool_directory("pools before export"); - - /* - * Export it. - */ - VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); - - ztest_walk_pool_directory("pools after export"); - - /* - * Try to import it. - */ - newconfig = spa_tryimport(config); - ASSERT(newconfig != NULL); - nvlist_free(newconfig); - - /* - * Import it under the new name. - */ - error = spa_import(newname, config, NULL, 0); - if (error != 0) { - dump_nvlist(config, 0); - fatal(B_FALSE, "couldn't import pool %s as %s: error %u", - oldname, newname, error); - } - - ztest_walk_pool_directory("pools after import"); - - /* - * Try to import it again -- should fail with EEXIST. - */ - VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); - - /* - * Try to import it under a different name -- should fail with EEXIST. - */ - VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); - - /* - * Verify that the pool is no longer visible under the old name. - */ - VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); - - /* - * Verify that we can open and close the pool using the new name. - */ - VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); - ASSERT(pool_guid == spa_guid(spa)); - spa_close(spa, FTAG); - - nvlist_free(config); -} - -static void -ztest_resume(spa_t *spa) -{ - if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) - (void) printf("resuming from suspended state\n"); - spa_vdev_state_enter(spa, SCL_NONE); - vdev_clear(spa, NULL); - (void) spa_vdev_state_exit(spa, NULL, 0); - (void) zio_resume(spa); -} - -static void * -ztest_resume_thread(void *arg) -{ - spa_t *spa = arg; - - while (!ztest_exiting) { - if (spa_suspended(spa)) - ztest_resume(spa); - (void) poll(NULL, 0, 100); - - /* - * Periodically change the zfs_compressed_arc_enabled setting. - */ - if (ztest_random(10) == 0) - zfs_compressed_arc_enabled = ztest_random(2); - - /* - * Periodically change the zfs_abd_scatter_enabled setting. - */ - if (ztest_random(10) == 0) - zfs_abd_scatter_enabled = ztest_random(2); - } - return (NULL); -} - -static void * -ztest_deadman_thread(void *arg) -{ - ztest_shared_t *zs = arg; - spa_t *spa = ztest_spa; - hrtime_t delta, total = 0; - - for (;;) { - delta = zs->zs_thread_stop - zs->zs_thread_start + - MSEC2NSEC(zfs_deadman_synctime_ms); - - (void) poll(NULL, 0, (int)NSEC2MSEC(delta)); - - /* - * If the pool is suspended then fail immediately. Otherwise, - * check to see if the pool is making any progress. If - * vdev_deadman() discovers that there hasn't been any recent - * I/Os then it will end up aborting the tests. - */ - if (spa_suspended(spa) || spa->spa_root_vdev == NULL) { - fatal(0, "aborting test after %llu seconds because " - "pool has transitioned to a suspended state.", - zfs_deadman_synctime_ms / 1000); - return (NULL); - } - vdev_deadman(spa->spa_root_vdev); - - total += zfs_deadman_synctime_ms/1000; - (void) printf("ztest has been running for %lld seconds\n", - total); - } -} - -static void -ztest_execute(int test, ztest_info_t *zi, uint64_t id) -{ - ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; - ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); - hrtime_t functime = gethrtime(); - - for (int i = 0; i < zi->zi_iters; i++) - zi->zi_func(zd, id); - - functime = gethrtime() - functime; - - atomic_add_64(&zc->zc_count, 1); - atomic_add_64(&zc->zc_time, functime); - - if (ztest_opts.zo_verbose >= 4) { - Dl_info dli; - (void) dladdr((void *)zi->zi_func, &dli); - (void) printf("%6.2f sec in %s\n", - (double)functime / NANOSEC, dli.dli_sname); - } -} - -static void * -ztest_thread(void *arg) -{ - int rand; - uint64_t id = (uintptr_t)arg; - ztest_shared_t *zs = ztest_shared; - uint64_t call_next; - hrtime_t now; - ztest_info_t *zi; - ztest_shared_callstate_t *zc; - - while ((now = gethrtime()) < zs->zs_thread_stop) { - /* - * See if it's time to force a crash. - */ - if (now > zs->zs_thread_kill) - ztest_kill(zs); - - /* - * If we're getting ENOSPC with some regularity, stop. - */ - if (zs->zs_enospc_count > 10) - break; - - /* - * Pick a random function to execute. - */ - rand = ztest_random(ZTEST_FUNCS); - zi = &ztest_info[rand]; - zc = ZTEST_GET_SHARED_CALLSTATE(rand); - call_next = zc->zc_next; - - if (now >= call_next && - atomic_cas_64(&zc->zc_next, call_next, call_next + - ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { - ztest_execute(rand, zi, id); - } - } - - return (NULL); -} - -static void -ztest_dataset_name(char *dsname, char *pool, int d) -{ - (void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d); -} - -static void -ztest_dataset_destroy(int d) -{ - char name[ZFS_MAX_DATASET_NAME_LEN]; - - ztest_dataset_name(name, ztest_opts.zo_pool, d); - - if (ztest_opts.zo_verbose >= 3) - (void) printf("Destroying %s to free up space\n", name); - - /* - * Cleanup any non-standard clones and snapshots. In general, - * ztest thread t operates on dataset (t % zopt_datasets), - * so there may be more than one thing to clean up. - */ - for (int t = d; t < ztest_opts.zo_threads; - t += ztest_opts.zo_datasets) { - ztest_dsl_dataset_cleanup(name, t); - } - - (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, - DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); -} - -static void -ztest_dataset_dirobj_verify(ztest_ds_t *zd) -{ - uint64_t usedobjs, dirobjs, scratch; - - /* - * ZTEST_DIROBJ is the object directory for the entire dataset. - * Therefore, the number of objects in use should equal the - * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. - * If not, we have an object leak. - * - * Note that we can only check this in ztest_dataset_open(), - * when the open-context and syncing-context values agree. - * That's because zap_count() returns the open-context value, - * while dmu_objset_space() returns the rootbp fill count. - */ - VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); - dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); - ASSERT3U(dirobjs + 1, ==, usedobjs); -} - -static int -ztest_dataset_open(int d) -{ - ztest_ds_t *zd = &ztest_ds[d]; - uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; - objset_t *os; - zilog_t *zilog; - char name[ZFS_MAX_DATASET_NAME_LEN]; - int error; - - ztest_dataset_name(name, ztest_opts.zo_pool, d); - - rw_enter(&ztest_name_lock, RW_READER); - - error = ztest_dataset_create(name); - if (error == ENOSPC) { - rw_exit(&ztest_name_lock); - ztest_record_enospc(FTAG); - return (error); - } - ASSERT(error == 0 || error == EEXIST); - - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); - rw_exit(&ztest_name_lock); - - ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); - - zilog = zd->zd_zilog; - - if (zilog->zl_header->zh_claim_lr_seq != 0 && - zilog->zl_header->zh_claim_lr_seq < committed_seq) - fatal(0, "missing log records: claimed %llu < committed %llu", - zilog->zl_header->zh_claim_lr_seq, committed_seq); - - ztest_dataset_dirobj_verify(zd); - - zil_replay(os, zd, ztest_replay_vector); - - ztest_dataset_dirobj_verify(zd); - - if (ztest_opts.zo_verbose >= 6) - (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", - zd->zd_name, - (u_longlong_t)zilog->zl_parse_blk_count, - (u_longlong_t)zilog->zl_parse_lr_count, - (u_longlong_t)zilog->zl_replaying_seq); - - zilog = zil_open(os, ztest_get_data); - - if (zilog->zl_replaying_seq != 0 && - zilog->zl_replaying_seq < committed_seq) - fatal(0, "missing log records: replayed %llu < committed %llu", - zilog->zl_replaying_seq, committed_seq); - - return (0); -} - -static void -ztest_dataset_close(int d) -{ - ztest_ds_t *zd = &ztest_ds[d]; - - zil_close(zd->zd_zilog); - dmu_objset_disown(zd->zd_os, zd); - - ztest_zd_fini(zd); -} - -/* - * Kick off threads to run tests on all datasets in parallel. - */ -static void -ztest_run(ztest_shared_t *zs) -{ - thread_t *tid; - spa_t *spa; - objset_t *os; - thread_t resume_tid; - int error; - - ztest_exiting = B_FALSE; - - /* - * Initialize parent/child shared state. - */ - mutex_init(&ztest_checkpoint_lock, NULL, USYNC_THREAD, NULL); - mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL); - rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); - - zs->zs_thread_start = gethrtime(); - zs->zs_thread_stop = - zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; - zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); - zs->zs_thread_kill = zs->zs_thread_stop; - if (ztest_random(100) < ztest_opts.zo_killrate) { - zs->zs_thread_kill -= - ztest_random(ztest_opts.zo_passtime * NANOSEC); - } - - mutex_init(&zcl.zcl_callbacks_lock, NULL, USYNC_THREAD, NULL); - - list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), - offsetof(ztest_cb_data_t, zcd_node)); - - /* - * Open our pool. - */ - kernel_init(FREAD | FWRITE); - VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); - metaslab_preload_limit = ztest_random(20) + 1; - ztest_spa = spa; - - dmu_objset_stats_t dds; - VERIFY0(dmu_objset_own(ztest_opts.zo_pool, - DMU_OST_ANY, B_TRUE, FTAG, &os)); - dsl_pool_config_enter(dmu_objset_pool(os), FTAG); - dmu_objset_fast_stat(os, &dds); - dsl_pool_config_exit(dmu_objset_pool(os), FTAG); - zs->zs_guid = dds.dds_guid; - dmu_objset_disown(os, FTAG); - - spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; - - /* - * We don't expect the pool to suspend unless maxfaults == 0, - * in which case ztest_fault_inject() temporarily takes away - * the only valid replica. - */ - if (MAXFAULTS() == 0) - spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; - else - spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; - - /* - * Create a thread to periodically resume suspended I/O. - */ - VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, - &resume_tid) == 0); - - /* - * Create a deadman thread to abort() if we hang. - */ - VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, - NULL) == 0); - - /* - * Verify that we can safely inquire about any object, - * whether it's allocated or not. To make it interesting, - * we probe a 5-wide window around each power of two. - * This hits all edge cases, including zero and the max. - */ - for (int t = 0; t < 64; t++) { - for (int d = -5; d <= 5; d++) { - error = dmu_object_info(spa->spa_meta_objset, - (1ULL << t) + d, NULL); - ASSERT(error == 0 || error == ENOENT || - error == EINVAL); - } - } - - /* - * If we got any ENOSPC errors on the previous run, destroy something. - */ - if (zs->zs_enospc_count != 0) { - int d = ztest_random(ztest_opts.zo_datasets); - ztest_dataset_destroy(d); - } - zs->zs_enospc_count = 0; - - tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), - UMEM_NOFAIL); - - if (ztest_opts.zo_verbose >= 4) - (void) printf("starting main threads...\n"); - - /* - * Kick off all the tests that run in parallel. - */ - for (int t = 0; t < ztest_opts.zo_threads; t++) { - if (t < ztest_opts.zo_datasets && - ztest_dataset_open(t) != 0) - return; - VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, - THR_BOUND, &tid[t]) == 0); - } - - /* - * Wait for all of the tests to complete. We go in reverse order - * so we don't close datasets while threads are still using them. - */ - for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { - VERIFY(thr_join(tid[t], NULL, NULL) == 0); - if (t < ztest_opts.zo_datasets) - ztest_dataset_close(t); - } - - txg_wait_synced(spa_get_dsl(spa), 0); - - zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); - zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); - zfs_dbgmsg_print(FTAG); - - umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); - - /* Kill the resume thread */ - ztest_exiting = B_TRUE; - VERIFY(thr_join(resume_tid, NULL, NULL) == 0); - ztest_resume(spa); - - /* - * Right before closing the pool, kick off a bunch of async I/O; - * spa_close() should wait for it to complete. - */ - for (uint64_t object = 1; object < 50; object++) { - dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20, - ZIO_PRIORITY_SYNC_READ); - } - - spa_close(spa, FTAG); - - /* - * Verify that we can loop over all pools. - */ - mutex_enter(&spa_namespace_lock); - for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) - if (ztest_opts.zo_verbose > 3) - (void) printf("spa_next: found %s\n", spa_name(spa)); - mutex_exit(&spa_namespace_lock); - - /* - * Verify that we can export the pool and reimport it under a - * different name. - */ - if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) { - char name[ZFS_MAX_DATASET_NAME_LEN]; - (void) snprintf(name, sizeof (name), "%s_import", - ztest_opts.zo_pool); - ztest_spa_import_export(ztest_opts.zo_pool, name); - ztest_spa_import_export(name, ztest_opts.zo_pool); - } - - kernel_fini(); - - list_destroy(&zcl.zcl_callbacks); - - mutex_destroy(&zcl.zcl_callbacks_lock); - - rw_destroy(&ztest_name_lock); - mutex_destroy(&ztest_vdev_lock); - mutex_destroy(&ztest_checkpoint_lock); -} - -static void -ztest_freeze(void) -{ - ztest_ds_t *zd = &ztest_ds[0]; - spa_t *spa; - int numloops = 0; - - if (ztest_opts.zo_verbose >= 3) - (void) printf("testing spa_freeze()...\n"); - - kernel_init(FREAD | FWRITE); - VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); - VERIFY3U(0, ==, ztest_dataset_open(0)); - ztest_spa = spa; - - /* - * Force the first log block to be transactionally allocated. - * We have to do this before we freeze the pool -- otherwise - * the log chain won't be anchored. - */ - while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { - ztest_dmu_object_alloc_free(zd, 0); - zil_commit(zd->zd_zilog, 0); - } - - txg_wait_synced(spa_get_dsl(spa), 0); - - /* - * Freeze the pool. This stops spa_sync() from doing anything, - * so that the only way to record changes from now on is the ZIL. - */ - spa_freeze(spa); - - /* - * Because it is hard to predict how much space a write will actually - * require beforehand, we leave ourselves some fudge space to write over - * capacity. - */ - uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2; - - /* - * Run tests that generate log records but don't alter the pool config - * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). - * We do a txg_wait_synced() after each iteration to force the txg - * to increase well beyond the last synced value in the uberblock. - * The ZIL should be OK with that. - * - * Run a random number of times less than zo_maxloops and ensure we do - * not run out of space on the pool. - */ - while (ztest_random(10) != 0 && - numloops++ < ztest_opts.zo_maxloops && - metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) { - ztest_od_t od; - ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0); - VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE)); - ztest_io(zd, od.od_object, - ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); - txg_wait_synced(spa_get_dsl(spa), 0); - } - - /* - * Commit all of the changes we just generated. - */ - zil_commit(zd->zd_zilog, 0); - txg_wait_synced(spa_get_dsl(spa), 0); - - /* - * Close our dataset and close the pool. - */ - ztest_dataset_close(0); - spa_close(spa, FTAG); - kernel_fini(); - - /* - * Open and close the pool and dataset to induce log replay. - */ - kernel_init(FREAD | FWRITE); - VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); - ASSERT(spa_freeze_txg(spa) == UINT64_MAX); - VERIFY3U(0, ==, ztest_dataset_open(0)); - ztest_dataset_close(0); - - ztest_spa = spa; - txg_wait_synced(spa_get_dsl(spa), 0); - ztest_reguid(NULL, 0); - - spa_close(spa, FTAG); - kernel_fini(); -} - -void -print_time(hrtime_t t, char *timebuf) -{ - hrtime_t s = t / NANOSEC; - hrtime_t m = s / 60; - hrtime_t h = m / 60; - hrtime_t d = h / 24; - - s -= m * 60; - m -= h * 60; - h -= d * 24; - - timebuf[0] = '\0'; - - if (d) - (void) sprintf(timebuf, - "%llud%02lluh%02llum%02llus", d, h, m, s); - else if (h) - (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); - else if (m) - (void) sprintf(timebuf, "%llum%02llus", m, s); - else - (void) sprintf(timebuf, "%llus", s); -} - -static nvlist_t * -make_random_props() -{ - nvlist_t *props; - - VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - - if (ztest_random(2) == 0) - return (props); - VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); - - return (props); -} - -/* - * Import a storage pool with the given name. - */ -static void -ztest_import(ztest_shared_t *zs) -{ - libzfs_handle_t *hdl; - importargs_t args = { 0 }; - spa_t *spa; - nvlist_t *cfg = NULL; - int nsearch = 1; - char *searchdirs[nsearch]; - char *name = ztest_opts.zo_pool; - int flags = ZFS_IMPORT_MISSING_LOG; - int error; - - mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); - - kernel_init(FREAD | FWRITE); - hdl = libzfs_init(); - - searchdirs[0] = ztest_opts.zo_dir; - args.paths = nsearch; - args.path = searchdirs; - args.can_be_active = B_FALSE; - - error = zpool_tryimport(hdl, name, &cfg, &args); - if (error) - (void) fatal(0, "No pools found\n"); - - VERIFY0(spa_import(name, cfg, NULL, flags)); - VERIFY0(spa_open(name, &spa, FTAG)); - zs->zs_metaslab_sz = - 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; - spa_close(spa, FTAG); - - libzfs_fini(hdl); - kernel_fini(); - - if (!ztest_opts.zo_mmp_test) { - ztest_run_zdb(ztest_opts.zo_pool); - ztest_freeze(); - ztest_run_zdb(ztest_opts.zo_pool); - } - - rw_destroy(&ztest_name_lock); - mutex_destroy(&ztest_vdev_lock); -} - -/* - * Create a storage pool with the given name and initial vdev size. - * Then test spa_freeze() functionality. - */ -static void -ztest_init(ztest_shared_t *zs) -{ - spa_t *spa; - nvlist_t *nvroot, *props; - - mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL); - mutex_init(&ztest_checkpoint_lock, NULL, USYNC_THREAD, NULL); - rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); - - kernel_init(FREAD | FWRITE); - - /* - * Create the storage pool. - */ - (void) spa_destroy(ztest_opts.zo_pool); - ztest_shared->zs_vdev_next_leaf = 0; - zs->zs_splits = 0; - zs->zs_mirrors = ztest_opts.zo_mirrors; - nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, - NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1); - props = make_random_props(); - for (int i = 0; i < SPA_FEATURES; i++) { - char buf[1024]; - (void) snprintf(buf, sizeof (buf), "feature@%s", - spa_feature_table[i].fi_uname); - VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); - } - VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); - nvlist_free(nvroot); - nvlist_free(props); - - VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); - zs->zs_metaslab_sz = - 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; - - spa_close(spa, FTAG); - - kernel_fini(); - - if (!ztest_opts.zo_mmp_test) { - ztest_run_zdb(ztest_opts.zo_pool); - ztest_freeze(); - ztest_run_zdb(ztest_opts.zo_pool); - } - - rw_destroy(&ztest_name_lock); - mutex_destroy(&ztest_vdev_lock); - mutex_destroy(&ztest_checkpoint_lock); -} - -static void -setup_data_fd(void) -{ - static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; - - ztest_fd_data = mkstemp(ztest_name_data); - ASSERT3S(ztest_fd_data, >=, 0); - (void) unlink(ztest_name_data); -} - - -static int -shared_data_size(ztest_shared_hdr_t *hdr) -{ - int size; - - size = hdr->zh_hdr_size; - size += hdr->zh_opts_size; - size += hdr->zh_size; - size += hdr->zh_stats_size * hdr->zh_stats_count; - size += hdr->zh_ds_size * hdr->zh_ds_count; - - return (size); -} - -static void -setup_hdr(void) -{ - int size; - ztest_shared_hdr_t *hdr; - - hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), - PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); - ASSERT(hdr != MAP_FAILED); - - VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); - - hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); - hdr->zh_opts_size = sizeof (ztest_shared_opts_t); - hdr->zh_size = sizeof (ztest_shared_t); - hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); - hdr->zh_stats_count = ZTEST_FUNCS; - hdr->zh_ds_size = sizeof (ztest_shared_ds_t); - hdr->zh_ds_count = ztest_opts.zo_datasets; - - size = shared_data_size(hdr); - VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); - - (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); -} - -static void -setup_data(void) -{ - int size, offset; - ztest_shared_hdr_t *hdr; - uint8_t *buf; - - hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), - PROT_READ, MAP_SHARED, ztest_fd_data, 0); - ASSERT(hdr != MAP_FAILED); - - size = shared_data_size(hdr); - - (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); - hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), - PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); - ASSERT(hdr != MAP_FAILED); - buf = (uint8_t *)hdr; - - offset = hdr->zh_hdr_size; - ztest_shared_opts = (void *)&buf[offset]; - offset += hdr->zh_opts_size; - ztest_shared = (void *)&buf[offset]; - offset += hdr->zh_size; - ztest_shared_callstate = (void *)&buf[offset]; - offset += hdr->zh_stats_size * hdr->zh_stats_count; - ztest_shared_ds = (void *)&buf[offset]; -} - -static boolean_t -exec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) -{ - pid_t pid; - int status; - char *cmdbuf = NULL; - - pid = fork(); - - if (cmd == NULL) { - cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); - (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); - cmd = cmdbuf; - } - - if (pid == -1) - fatal(1, "fork failed"); - - if (pid == 0) { /* child */ - char *emptyargv[2] = { cmd, NULL }; - char fd_data_str[12]; - - struct rlimit rl = { 1024, 1024 }; - (void) setrlimit(RLIMIT_NOFILE, &rl); - - (void) close(ztest_fd_rand); - VERIFY3U(11, >=, - snprintf(fd_data_str, 12, "%d", ztest_fd_data)); - VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); - - (void) enable_extended_FILE_stdio(-1, -1); - if (libpath != NULL) - VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); -#ifdef illumos - (void) execv(cmd, emptyargv); -#else - (void) execvp(cmd, emptyargv); -#endif - ztest_dump_core = B_FALSE; - fatal(B_TRUE, "exec failed: %s", cmd); - } - - if (cmdbuf != NULL) { - umem_free(cmdbuf, MAXPATHLEN); - cmd = NULL; - } - - while (waitpid(pid, &status, 0) != pid) - continue; - if (statusp != NULL) - *statusp = status; - - if (WIFEXITED(status)) { - if (WEXITSTATUS(status) != 0) { - (void) fprintf(stderr, "child exited with code %d\n", - WEXITSTATUS(status)); - exit(2); - } - return (B_FALSE); - } else if (WIFSIGNALED(status)) { - if (!ignorekill || WTERMSIG(status) != SIGKILL) { - (void) fprintf(stderr, "child died with signal %d\n", - WTERMSIG(status)); - exit(3); - } - return (B_TRUE); - } else { - (void) fprintf(stderr, "something strange happened to child\n"); - exit(4); - /* NOTREACHED */ - } -} - -static void -ztest_run_init(void) -{ - ztest_shared_t *zs = ztest_shared; - - /* - * Blow away any existing copy of zpool.cache - */ - (void) remove(spa_config_path); - - if (ztest_opts.zo_init == 0) { - if (ztest_opts.zo_verbose >= 1) - (void) printf("Importing pool %s\n", - ztest_opts.zo_pool); - ztest_import(zs); - return; - } - - /* - * Create and initialize our storage pool. - */ - for (int i = 1; i <= ztest_opts.zo_init; i++) { - bzero(zs, sizeof (ztest_shared_t)); - if (ztest_opts.zo_verbose >= 3 && - ztest_opts.zo_init != 1) { - (void) printf("ztest_init(), pass %d\n", i); - } - ztest_init(zs); - } -} - -int -main(int argc, char **argv) -{ - int kills = 0; - int iters = 0; - int older = 0; - int newer = 0; - ztest_shared_t *zs; - ztest_info_t *zi; - ztest_shared_callstate_t *zc; - char timebuf[100]; - char numbuf[NN_NUMBUF_SZ]; - char *cmd; - boolean_t hasalt; - char *fd_data_str = getenv("ZTEST_FD_DATA"); - - (void) setvbuf(stdout, NULL, _IOLBF, 0); - - dprintf_setup(&argc, argv); - zfs_deadman_synctime_ms = 300000; - /* - * As two-word space map entries may not come up often (especially - * if pool and vdev sizes are small) we want to force at least some - * of them so the feature get tested. - */ - zfs_force_some_double_word_sm_entries = B_TRUE; - - /* - * Verify that even extensively damaged split blocks with many - * segments can be reconstructed in a reasonable amount of time - * when reconstruction is known to be possible. - */ - zfs_reconstruct_indirect_damage_fraction = 4; - - ztest_fd_rand = open("/dev/urandom", O_RDONLY); - ASSERT3S(ztest_fd_rand, >=, 0); - - if (!fd_data_str) { - process_options(argc, argv); - - setup_data_fd(); - setup_hdr(); - setup_data(); - bcopy(&ztest_opts, ztest_shared_opts, - sizeof (*ztest_shared_opts)); - } else { - ztest_fd_data = atoi(fd_data_str); - setup_data(); - bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); - } - ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); - - /* Override location of zpool.cache */ - VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", - ztest_opts.zo_dir), !=, -1); - - ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), - UMEM_NOFAIL); - zs = ztest_shared; - - if (fd_data_str) { - metaslab_force_ganging = ztest_opts.zo_metaslab_force_ganging; - metaslab_df_alloc_threshold = - zs->zs_metaslab_df_alloc_threshold; - - if (zs->zs_do_init) - ztest_run_init(); - else - ztest_run(zs); - exit(0); - } - - hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); - - if (ztest_opts.zo_verbose >= 1) { - (void) printf("%llu vdevs, %d datasets, %d threads," - " %llu seconds...\n", - (u_longlong_t)ztest_opts.zo_vdevs, - ztest_opts.zo_datasets, - ztest_opts.zo_threads, - (u_longlong_t)ztest_opts.zo_time); - } - - cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - (void) strlcpy(cmd, getexecname(), MAXNAMELEN); - - zs->zs_do_init = B_TRUE; - if (strlen(ztest_opts.zo_alt_ztest) != 0) { - if (ztest_opts.zo_verbose >= 1) { - (void) printf("Executing older ztest for " - "initialization: %s\n", ztest_opts.zo_alt_ztest); - } - VERIFY(!exec_child(ztest_opts.zo_alt_ztest, - ztest_opts.zo_alt_libpath, B_FALSE, NULL)); - } else { - VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); - } - zs->zs_do_init = B_FALSE; - - zs->zs_proc_start = gethrtime(); - zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; - - for (int f = 0; f < ZTEST_FUNCS; f++) { - zi = &ztest_info[f]; - zc = ZTEST_GET_SHARED_CALLSTATE(f); - if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) - zc->zc_next = UINT64_MAX; - else - zc->zc_next = zs->zs_proc_start + - ztest_random(2 * zi->zi_interval[0] + 1); - } - - /* - * Run the tests in a loop. These tests include fault injection - * to verify that self-healing data works, and forced crashes - * to verify that we never lose on-disk consistency. - */ - while (gethrtime() < zs->zs_proc_stop) { - int status; - boolean_t killed; - - /* - * Initialize the workload counters for each function. - */ - for (int f = 0; f < ZTEST_FUNCS; f++) { - zc = ZTEST_GET_SHARED_CALLSTATE(f); - zc->zc_count = 0; - zc->zc_time = 0; - } - - /* Set the allocation switch size */ - zs->zs_metaslab_df_alloc_threshold = - ztest_random(zs->zs_metaslab_sz / 4) + 1; - - if (!hasalt || ztest_random(2) == 0) { - if (hasalt && ztest_opts.zo_verbose >= 1) { - (void) printf("Executing newer ztest: %s\n", - cmd); - } - newer++; - killed = exec_child(cmd, NULL, B_TRUE, &status); - } else { - if (hasalt && ztest_opts.zo_verbose >= 1) { - (void) printf("Executing older ztest: %s\n", - ztest_opts.zo_alt_ztest); - } - older++; - killed = exec_child(ztest_opts.zo_alt_ztest, - ztest_opts.zo_alt_libpath, B_TRUE, &status); - } - - if (killed) - kills++; - iters++; - - if (ztest_opts.zo_verbose >= 1) { - hrtime_t now = gethrtime(); - - now = MIN(now, zs->zs_proc_stop); - print_time(zs->zs_proc_stop - now, timebuf); - nicenum(zs->zs_space, numbuf, sizeof (numbuf)); - - (void) printf("Pass %3d, %8s, %3llu ENOSPC, " - "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", - iters, - WIFEXITED(status) ? "Complete" : "SIGKILL", - (u_longlong_t)zs->zs_enospc_count, - 100.0 * zs->zs_alloc / zs->zs_space, - numbuf, - 100.0 * (now - zs->zs_proc_start) / - (ztest_opts.zo_time * NANOSEC), timebuf); - } - - if (ztest_opts.zo_verbose >= 2) { - (void) printf("\nWorkload summary:\n\n"); - (void) printf("%7s %9s %s\n", - "Calls", "Time", "Function"); - (void) printf("%7s %9s %s\n", - "-----", "----", "--------"); - for (int f = 0; f < ZTEST_FUNCS; f++) { - Dl_info dli; - - zi = &ztest_info[f]; - zc = ZTEST_GET_SHARED_CALLSTATE(f); - print_time(zc->zc_time, timebuf); - (void) dladdr((void *)zi->zi_func, &dli); - (void) printf("%7llu %9s %s\n", - (u_longlong_t)zc->zc_count, timebuf, - dli.dli_sname); - } - (void) printf("\n"); - } - - if (!ztest_opts.zo_mmp_test) - ztest_run_zdb(ztest_opts.zo_pool); - } - - if (ztest_opts.zo_verbose >= 1) { - if (hasalt) { - (void) printf("%d runs of older ztest: %s\n", older, - ztest_opts.zo_alt_ztest); - (void) printf("%d runs of newer ztest: %s\n", newer, - cmd); - } - (void) printf("%d killed, %d completed, %.0f%% kill rate\n", - kills, iters - kills, (100.0 * kills) / MAX(1, iters)); - } - - umem_free(cmd, MAXNAMELEN); - - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c b/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c index 836eeccb8274..a66661fd9ab8 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c @@ -24,6 +24,7 @@ * Use is subject to license terms. */ +#include <sys/types.h> #include <unistd.h> #include <fcntl.h> #include <dlfcn.h> diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c index 05f2785e6600..8f32890057f0 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c @@ -31,6 +31,7 @@ #include <assert.h> #include <elf.h> +#include <sys/types.h> #include <fcntl.h> #include <gelf.h> #include <limits.h> diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c index 0a3a10a76954..97da0c3a5ac2 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c @@ -77,7 +77,6 @@ #include <netdb.h> #include <netinet/in.h> #include <arpa/inet.h> -#include <arpa/nameser.h> #include <dt_module.h> #include <dt_printf.h> diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c index f7b4684b01d0..57a7db4ad0fd 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c @@ -44,12 +44,19 @@ #include <netdb.h> #include <netinet/in.h> #include <arpa/inet.h> -#include <arpa/nameser.h> - +#include <sys/byteorder.h> #include <dt_printf.h> #include <dt_string.h> #include <dt_impl.h> +#ifndef NS_IN6ADDRSZ +#define NS_IN6ADDRSZ 16 +#endif + +#ifndef NS_INADDRSZ +#define NS_INADDRSZ 4 +#endif + /*ARGSUSED*/ static int pfcheck_addr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) diff --git a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c deleted file mode 100644 index c6fbfe97a9af..000000000000 --- a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c +++ /dev/null @@ -1,1286 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#include <solaris.h> -#include <inttypes.h> -#include <unistd.h> -#include <string.h> -#include <libintl.h> -#include <stdarg.h> -#include "libnvpair.h" - -/* - * libnvpair - A tools library for manipulating <name, value> pairs. - * - * This library provides routines packing an unpacking nv pairs - * for transporting data across process boundaries, transporting - * between kernel and userland, and possibly saving onto disk files. - */ - -/* - * Print control structure. - */ - -#define DEFINEOP(opname, vtype) \ - struct { \ - int (*op)(struct nvlist_prtctl *, void *, nvlist_t *, \ - const char *, vtype); \ - void *arg; \ - } opname - -#define DEFINEARROP(opname, vtype) \ - struct { \ - int (*op)(struct nvlist_prtctl *, void *, nvlist_t *, \ - const char *, vtype, uint_t); \ - void *arg; \ - } opname - -struct nvlist_printops { - DEFINEOP(print_boolean, int); - DEFINEOP(print_boolean_value, boolean_t); - DEFINEOP(print_byte, uchar_t); - DEFINEOP(print_int8, int8_t); - DEFINEOP(print_uint8, uint8_t); - DEFINEOP(print_int16, int16_t); - DEFINEOP(print_uint16, uint16_t); - DEFINEOP(print_int32, int32_t); - DEFINEOP(print_uint32, uint32_t); - DEFINEOP(print_int64, int64_t); - DEFINEOP(print_uint64, uint64_t); - DEFINEOP(print_double, double); - DEFINEOP(print_string, char *); - DEFINEOP(print_hrtime, hrtime_t); - DEFINEOP(print_nvlist, nvlist_t *); - DEFINEARROP(print_boolean_array, boolean_t *); - DEFINEARROP(print_byte_array, uchar_t *); - DEFINEARROP(print_int8_array, int8_t *); - DEFINEARROP(print_uint8_array, uint8_t *); - DEFINEARROP(print_int16_array, int16_t *); - DEFINEARROP(print_uint16_array, uint16_t *); - DEFINEARROP(print_int32_array, int32_t *); - DEFINEARROP(print_uint32_array, uint32_t *); - DEFINEARROP(print_int64_array, int64_t *); - DEFINEARROP(print_uint64_array, uint64_t *); - DEFINEARROP(print_string_array, char **); - DEFINEARROP(print_nvlist_array, nvlist_t **); -}; - -struct nvlist_prtctl { - FILE *nvprt_fp; /* output destination */ - enum nvlist_indent_mode nvprt_indent_mode; /* see above */ - int nvprt_indent; /* absolute indent, or tab depth */ - int nvprt_indentinc; /* indent or tab increment */ - const char *nvprt_nmfmt; /* member name format, max one %s */ - const char *nvprt_eomfmt; /* after member format, e.g. "\n" */ - const char *nvprt_btwnarrfmt; /* between array members */ - int nvprt_btwnarrfmt_nl; /* nvprt_eoamfmt includes newline? */ - struct nvlist_printops *nvprt_dfltops; - struct nvlist_printops *nvprt_custops; -}; - -#define DFLTPRTOP(pctl, type) \ - ((pctl)->nvprt_dfltops->print_##type.op) - -#define DFLTPRTOPARG(pctl, type) \ - ((pctl)->nvprt_dfltops->print_##type.arg) - -#define CUSTPRTOP(pctl, type) \ - ((pctl)->nvprt_custops->print_##type.op) - -#define CUSTPRTOPARG(pctl, type) \ - ((pctl)->nvprt_custops->print_##type.arg) - -#define RENDER(pctl, type, nvl, name, val) \ - { \ - int done = 0; \ - if ((pctl)->nvprt_custops && CUSTPRTOP(pctl, type)) { \ - done = CUSTPRTOP(pctl, type)(pctl, \ - CUSTPRTOPARG(pctl, type), nvl, name, val); \ - } \ - if (!done) { \ - (void) DFLTPRTOP(pctl, type)(pctl, \ - DFLTPRTOPARG(pctl, type), nvl, name, val); \ - } \ - (void) fprintf(pctl->nvprt_fp, pctl->nvprt_eomfmt); \ - } - -#define ARENDER(pctl, type, nvl, name, arrp, count) \ - { \ - int done = 0; \ - if ((pctl)->nvprt_custops && CUSTPRTOP(pctl, type)) { \ - done = CUSTPRTOP(pctl, type)(pctl, \ - CUSTPRTOPARG(pctl, type), nvl, name, arrp, count); \ - } \ - if (!done) { \ - (void) DFLTPRTOP(pctl, type)(pctl, \ - DFLTPRTOPARG(pctl, type), nvl, name, arrp, count); \ - } \ - (void) fprintf(pctl->nvprt_fp, pctl->nvprt_eomfmt); \ - } - -static void nvlist_print_with_indent(nvlist_t *, nvlist_prtctl_t); - -/* - * ====================================================================== - * | | - * | Indentation | - * | | - * ====================================================================== - */ - -static void -indent(nvlist_prtctl_t pctl, int onemore) -{ - int depth; - - switch (pctl->nvprt_indent_mode) { - case NVLIST_INDENT_ABS: - (void) fprintf(pctl->nvprt_fp, "%*s", - pctl->nvprt_indent + onemore * pctl->nvprt_indentinc, ""); - break; - - case NVLIST_INDENT_TABBED: - depth = pctl->nvprt_indent + onemore; - while (depth-- > 0) - (void) fprintf(pctl->nvprt_fp, "\t"); - } -} - -/* - * ====================================================================== - * | | - * | Default nvlist member rendering functions. | - * | | - * ====================================================================== - */ - -/* - * Generate functions to print single-valued nvlist members. - * - * type_and_variant - suffix to form function name - * vtype - C type for the member value - * ptype - C type to cast value to for printing - * vfmt - format string for pair value, e.g "%d" or "0x%llx" - */ - -#define NVLIST_PRTFUNC(type_and_variant, vtype, ptype, vfmt) \ -static int \ -nvprint_##type_and_variant(nvlist_prtctl_t pctl, void *private, \ - nvlist_t *nvl, const char *name, vtype value) \ -{ \ - FILE *fp = pctl->nvprt_fp; \ - NOTE(ARGUNUSED(private)) \ - NOTE(ARGUNUSED(nvl)) \ - indent(pctl, 1); \ - (void) fprintf(fp, pctl->nvprt_nmfmt, name); \ - (void) fprintf(fp, vfmt, (ptype)value); \ - return (1); \ -} - -NVLIST_PRTFUNC(boolean, int, int, "%d") -NVLIST_PRTFUNC(boolean_value, boolean_t, int, "%d") -NVLIST_PRTFUNC(byte, uchar_t, uchar_t, "0x%2.2x") -NVLIST_PRTFUNC(int8, int8_t, int, "%d") -NVLIST_PRTFUNC(uint8, uint8_t, uint8_t, "0x%x") -NVLIST_PRTFUNC(int16, int16_t, int16_t, "%d") -NVLIST_PRTFUNC(uint16, uint16_t, uint16_t, "0x%x") -NVLIST_PRTFUNC(int32, int32_t, int32_t, "%d") -NVLIST_PRTFUNC(uint32, uint32_t, uint32_t, "0x%x") -NVLIST_PRTFUNC(int64, int64_t, longlong_t, "%lld") -NVLIST_PRTFUNC(uint64, uint64_t, u_longlong_t, "0x%llx") -NVLIST_PRTFUNC(double, double, double, "0x%f") -NVLIST_PRTFUNC(string, char *, char *, "%s") -NVLIST_PRTFUNC(hrtime, hrtime_t, hrtime_t, "0x%llx") - -/* - * Generate functions to print array-valued nvlist members. - */ - -#define NVLIST_ARRPRTFUNC(type_and_variant, vtype, ptype, vfmt) \ -static int \ -nvaprint_##type_and_variant(nvlist_prtctl_t pctl, void *private, \ - nvlist_t *nvl, const char *name, vtype *valuep, uint_t count) \ -{ \ - FILE *fp = pctl->nvprt_fp; \ - uint_t i; \ - NOTE(ARGUNUSED(private)) \ - NOTE(ARGUNUSED(nvl)) \ - for (i = 0; i < count; i++) { \ - if (i == 0 || pctl->nvprt_btwnarrfmt_nl) { \ - indent(pctl, 1); \ - (void) fprintf(fp, pctl->nvprt_nmfmt, name); \ - if (pctl->nvprt_btwnarrfmt_nl) \ - (void) fprintf(fp, "[%d]: ", i); \ - } \ - if (i != 0) \ - (void) fprintf(fp, pctl->nvprt_btwnarrfmt); \ - (void) fprintf(fp, vfmt, (ptype)valuep[i]); \ - } \ - return (1); \ -} - -NVLIST_ARRPRTFUNC(boolean_array, boolean_t, boolean_t, "%d") -NVLIST_ARRPRTFUNC(byte_array, uchar_t, uchar_t, "0x%2.2x") -NVLIST_ARRPRTFUNC(int8_array, int8_t, int8_t, "%d") -NVLIST_ARRPRTFUNC(uint8_array, uint8_t, uint8_t, "0x%x") -NVLIST_ARRPRTFUNC(int16_array, int16_t, int16_t, "%d") -NVLIST_ARRPRTFUNC(uint16_array, uint16_t, uint16_t, "0x%x") -NVLIST_ARRPRTFUNC(int32_array, int32_t, int32_t, "%d") -NVLIST_ARRPRTFUNC(uint32_array, uint32_t, uint32_t, "0x%x") -NVLIST_ARRPRTFUNC(int64_array, int64_t, longlong_t, "%lld") -NVLIST_ARRPRTFUNC(uint64_array, uint64_t, u_longlong_t, "0x%llx") -NVLIST_ARRPRTFUNC(string_array, char *, char *, "%s") - -/*ARGSUSED*/ -static int -nvprint_nvlist(nvlist_prtctl_t pctl, void *private, - nvlist_t *nvl, const char *name, nvlist_t *value) -{ - FILE *fp = pctl->nvprt_fp; - - indent(pctl, 1); - (void) fprintf(fp, "%s = (embedded nvlist)\n", name); - - pctl->nvprt_indent += pctl->nvprt_indentinc; - nvlist_print_with_indent(value, pctl); - pctl->nvprt_indent -= pctl->nvprt_indentinc; - - indent(pctl, 1); - (void) fprintf(fp, "(end %s)\n", name); - - return (1); -} - -/*ARGSUSED*/ -static int -nvaprint_nvlist_array(nvlist_prtctl_t pctl, void *private, - nvlist_t *nvl, const char *name, nvlist_t **valuep, uint_t count) -{ - FILE *fp = pctl->nvprt_fp; - uint_t i; - - indent(pctl, 1); - (void) fprintf(fp, "%s = (array of embedded nvlists)\n", name); - - for (i = 0; i < count; i++) { - indent(pctl, 1); - (void) fprintf(fp, "(start %s[%d])\n", name, i); - - pctl->nvprt_indent += pctl->nvprt_indentinc; - nvlist_print_with_indent(valuep[i], pctl); - pctl->nvprt_indent -= pctl->nvprt_indentinc; - - indent(pctl, 1); - (void) fprintf(fp, "(end %s[%d])\n", name, i); - } - - return (1); -} - -/* - * ====================================================================== - * | | - * | Interfaces that allow control over formatting. | - * | | - * ====================================================================== - */ - -void -nvlist_prtctl_setdest(nvlist_prtctl_t pctl, FILE *fp) -{ - pctl->nvprt_fp = fp; -} - -FILE * -nvlist_prtctl_getdest(nvlist_prtctl_t pctl) -{ - return (pctl->nvprt_fp); -} - - -void -nvlist_prtctl_setindent(nvlist_prtctl_t pctl, enum nvlist_indent_mode mode, - int start, int inc) -{ - if (mode < NVLIST_INDENT_ABS || mode > NVLIST_INDENT_TABBED) - mode = NVLIST_INDENT_TABBED; - - if (start < 0) - start = 0; - - if (inc < 0) - inc = 1; - - pctl->nvprt_indent_mode = mode; - pctl->nvprt_indent = start; - pctl->nvprt_indentinc = inc; -} - -void -nvlist_prtctl_doindent(nvlist_prtctl_t pctl, int onemore) -{ - indent(pctl, onemore); -} - - -void -nvlist_prtctl_setfmt(nvlist_prtctl_t pctl, enum nvlist_prtctl_fmt which, - const char *fmt) -{ - switch (which) { - case NVLIST_FMT_MEMBER_NAME: - if (fmt == NULL) - fmt = "%s = "; - pctl->nvprt_nmfmt = fmt; - break; - - case NVLIST_FMT_MEMBER_POSTAMBLE: - if (fmt == NULL) - fmt = "\n"; - pctl->nvprt_eomfmt = fmt; - break; - - case NVLIST_FMT_BTWN_ARRAY: - if (fmt == NULL) { - pctl->nvprt_btwnarrfmt = " "; - pctl->nvprt_btwnarrfmt_nl = 0; - } else { - pctl->nvprt_btwnarrfmt = fmt; - pctl->nvprt_btwnarrfmt_nl = (strstr(fmt, "\n") != NULL); - } - break; - - default: - break; - } -} - - -void -nvlist_prtctl_dofmt(nvlist_prtctl_t pctl, enum nvlist_prtctl_fmt which, ...) -{ - FILE *fp = pctl->nvprt_fp; - va_list ap; - char *name; - - va_start(ap, which); - - switch (which) { - case NVLIST_FMT_MEMBER_NAME: - name = va_arg(ap, char *); - (void) fprintf(fp, pctl->nvprt_nmfmt, name); - break; - - case NVLIST_FMT_MEMBER_POSTAMBLE: - (void) fprintf(fp, pctl->nvprt_eomfmt); - break; - - case NVLIST_FMT_BTWN_ARRAY: - (void) fprintf(fp, pctl->nvprt_btwnarrfmt); \ - break; - - default: - break; - } - - va_end(ap); -} - -/* - * ====================================================================== - * | | - * | Interfaces to allow appointment of replacement rendering functions.| - * | | - * ====================================================================== - */ - -#define NVLIST_PRINTCTL_REPLACE(type, vtype) \ -void \ -nvlist_prtctlop_##type(nvlist_prtctl_t pctl, \ - int (*func)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype), \ - void *private) \ -{ \ - CUSTPRTOP(pctl, type) = func; \ - CUSTPRTOPARG(pctl, type) = private; \ -} - -NVLIST_PRINTCTL_REPLACE(boolean, int) -NVLIST_PRINTCTL_REPLACE(boolean_value, boolean_t) -NVLIST_PRINTCTL_REPLACE(byte, uchar_t) -NVLIST_PRINTCTL_REPLACE(int8, int8_t) -NVLIST_PRINTCTL_REPLACE(uint8, uint8_t) -NVLIST_PRINTCTL_REPLACE(int16, int16_t) -NVLIST_PRINTCTL_REPLACE(uint16, uint16_t) -NVLIST_PRINTCTL_REPLACE(int32, int32_t) -NVLIST_PRINTCTL_REPLACE(uint32, uint32_t) -NVLIST_PRINTCTL_REPLACE(int64, int64_t) -NVLIST_PRINTCTL_REPLACE(uint64, uint64_t) -NVLIST_PRINTCTL_REPLACE(double, double) -NVLIST_PRINTCTL_REPLACE(string, char *) -NVLIST_PRINTCTL_REPLACE(hrtime, hrtime_t) -NVLIST_PRINTCTL_REPLACE(nvlist, nvlist_t *) - -#define NVLIST_PRINTCTL_AREPLACE(type, vtype) \ -void \ -nvlist_prtctlop_##type(nvlist_prtctl_t pctl, \ - int (*func)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, \ - uint_t), void *private) \ -{ \ - CUSTPRTOP(pctl, type) = func; \ - CUSTPRTOPARG(pctl, type) = private; \ -} - -NVLIST_PRINTCTL_AREPLACE(boolean_array, boolean_t *) -NVLIST_PRINTCTL_AREPLACE(byte_array, uchar_t *) -NVLIST_PRINTCTL_AREPLACE(int8_array, int8_t *) -NVLIST_PRINTCTL_AREPLACE(uint8_array, uint8_t *) -NVLIST_PRINTCTL_AREPLACE(int16_array, int16_t *) -NVLIST_PRINTCTL_AREPLACE(uint16_array, uint16_t *) -NVLIST_PRINTCTL_AREPLACE(int32_array, int32_t *) -NVLIST_PRINTCTL_AREPLACE(uint32_array, uint32_t *) -NVLIST_PRINTCTL_AREPLACE(int64_array, int64_t *) -NVLIST_PRINTCTL_AREPLACE(uint64_array, uint64_t *) -NVLIST_PRINTCTL_AREPLACE(string_array, char **) -NVLIST_PRINTCTL_AREPLACE(nvlist_array, nvlist_t **) - -/* - * ====================================================================== - * | | - * | Interfaces to manage nvlist_prtctl_t cookies. | - * | | - * ====================================================================== - */ - - -static const struct nvlist_printops defprtops = { - { nvprint_boolean, NULL }, - { nvprint_boolean_value, NULL }, - { nvprint_byte, NULL }, - { nvprint_int8, NULL }, - { nvprint_uint8, NULL }, - { nvprint_int16, NULL }, - { nvprint_uint16, NULL }, - { nvprint_int32, NULL }, - { nvprint_uint32, NULL }, - { nvprint_int64, NULL }, - { nvprint_uint64, NULL }, - { nvprint_double, NULL }, - { nvprint_string, NULL }, - { nvprint_hrtime, NULL }, - { nvprint_nvlist, NULL }, - { nvaprint_boolean_array, NULL }, - { nvaprint_byte_array, NULL }, - { nvaprint_int8_array, NULL }, - { nvaprint_uint8_array, NULL }, - { nvaprint_int16_array, NULL }, - { nvaprint_uint16_array, NULL }, - { nvaprint_int32_array, NULL }, - { nvaprint_uint32_array, NULL }, - { nvaprint_int64_array, NULL }, - { nvaprint_uint64_array, NULL }, - { nvaprint_string_array, NULL }, - { nvaprint_nvlist_array, NULL }, -}; - -static void -prtctl_defaults(FILE *fp, struct nvlist_prtctl *pctl, - struct nvlist_printops *ops) -{ - pctl->nvprt_fp = fp; - pctl->nvprt_indent_mode = NVLIST_INDENT_TABBED; - pctl->nvprt_indent = 0; - pctl->nvprt_indentinc = 1; - pctl->nvprt_nmfmt = "%s = "; - pctl->nvprt_eomfmt = "\n"; - pctl->nvprt_btwnarrfmt = " "; - pctl->nvprt_btwnarrfmt_nl = 0; - - pctl->nvprt_dfltops = (struct nvlist_printops *)&defprtops; - pctl->nvprt_custops = ops; -} - -nvlist_prtctl_t -nvlist_prtctl_alloc(void) -{ - struct nvlist_prtctl *pctl; - struct nvlist_printops *ops; - - if ((pctl = malloc(sizeof (*pctl))) == NULL) - return (NULL); - - if ((ops = calloc(1, sizeof (*ops))) == NULL) { - free(pctl); - return (NULL); - } - - prtctl_defaults(stdout, pctl, ops); - - return (pctl); -} - -void -nvlist_prtctl_free(nvlist_prtctl_t pctl) -{ - if (pctl != NULL) { - free(pctl->nvprt_custops); - free(pctl); - } -} - -/* - * ====================================================================== - * | | - * | Top-level print request interfaces. | - * | | - * ====================================================================== - */ - -/* - * nvlist_print - Prints elements in an event buffer - */ -static void -nvlist_print_with_indent(nvlist_t *nvl, nvlist_prtctl_t pctl) -{ - FILE *fp = pctl->nvprt_fp; - char *name; - uint_t nelem; - nvpair_t *nvp; - - if (nvl == NULL) - return; - - indent(pctl, 0); - (void) fprintf(fp, "nvlist version: %d\n", NVL_VERSION(nvl)); - - nvp = nvlist_next_nvpair(nvl, NULL); - - while (nvp) { - data_type_t type = nvpair_type(nvp); - - name = nvpair_name(nvp); - nelem = 0; - - switch (type) { - case DATA_TYPE_BOOLEAN: { - RENDER(pctl, boolean, nvl, name, 1); - break; - } - case DATA_TYPE_BOOLEAN_VALUE: { - boolean_t val; - (void) nvpair_value_boolean_value(nvp, &val); - RENDER(pctl, boolean_value, nvl, name, val); - break; - } - case DATA_TYPE_BYTE: { - uchar_t val; - (void) nvpair_value_byte(nvp, &val); - RENDER(pctl, byte, nvl, name, val); - break; - } - case DATA_TYPE_INT8: { - int8_t val; - (void) nvpair_value_int8(nvp, &val); - RENDER(pctl, int8, nvl, name, val); - break; - } - case DATA_TYPE_UINT8: { - uint8_t val; - (void) nvpair_value_uint8(nvp, &val); - RENDER(pctl, uint8, nvl, name, val); - break; - } - case DATA_TYPE_INT16: { - int16_t val; - (void) nvpair_value_int16(nvp, &val); - RENDER(pctl, int16, nvl, name, val); - break; - } - case DATA_TYPE_UINT16: { - uint16_t val; - (void) nvpair_value_uint16(nvp, &val); - RENDER(pctl, uint16, nvl, name, val); - break; - } - case DATA_TYPE_INT32: { - int32_t val; - (void) nvpair_value_int32(nvp, &val); - RENDER(pctl, int32, nvl, name, val); - break; - } - case DATA_TYPE_UINT32: { - uint32_t val; - (void) nvpair_value_uint32(nvp, &val); - RENDER(pctl, uint32, nvl, name, val); - break; - } - case DATA_TYPE_INT64: { - int64_t val; - (void) nvpair_value_int64(nvp, &val); - RENDER(pctl, int64, nvl, name, val); - break; - } - case DATA_TYPE_UINT64: { - uint64_t val; - (void) nvpair_value_uint64(nvp, &val); - RENDER(pctl, uint64, nvl, name, val); - break; - } - case DATA_TYPE_DOUBLE: { - double val; - (void) nvpair_value_double(nvp, &val); - RENDER(pctl, double, nvl, name, val); - break; - } - case DATA_TYPE_STRING: { - char *val; - (void) nvpair_value_string(nvp, &val); - RENDER(pctl, string, nvl, name, val); - break; - } - case DATA_TYPE_BOOLEAN_ARRAY: { - boolean_t *val; - (void) nvpair_value_boolean_array(nvp, &val, &nelem); - ARENDER(pctl, boolean_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_BYTE_ARRAY: { - uchar_t *val; - (void) nvpair_value_byte_array(nvp, &val, &nelem); - ARENDER(pctl, byte_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_INT8_ARRAY: { - int8_t *val; - (void) nvpair_value_int8_array(nvp, &val, &nelem); - ARENDER(pctl, int8_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_UINT8_ARRAY: { - uint8_t *val; - (void) nvpair_value_uint8_array(nvp, &val, &nelem); - ARENDER(pctl, uint8_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_INT16_ARRAY: { - int16_t *val; - (void) nvpair_value_int16_array(nvp, &val, &nelem); - ARENDER(pctl, int16_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_UINT16_ARRAY: { - uint16_t *val; - (void) nvpair_value_uint16_array(nvp, &val, &nelem); - ARENDER(pctl, uint16_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_INT32_ARRAY: { - int32_t *val; - (void) nvpair_value_int32_array(nvp, &val, &nelem); - ARENDER(pctl, int32_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_UINT32_ARRAY: { - uint32_t *val; - (void) nvpair_value_uint32_array(nvp, &val, &nelem); - ARENDER(pctl, uint32_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_INT64_ARRAY: { - int64_t *val; - (void) nvpair_value_int64_array(nvp, &val, &nelem); - ARENDER(pctl, int64_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_UINT64_ARRAY: { - uint64_t *val; - (void) nvpair_value_uint64_array(nvp, &val, &nelem); - ARENDER(pctl, uint64_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_STRING_ARRAY: { - char **val; - (void) nvpair_value_string_array(nvp, &val, &nelem); - ARENDER(pctl, string_array, nvl, name, val, nelem); - break; - } - case DATA_TYPE_HRTIME: { - hrtime_t val; - (void) nvpair_value_hrtime(nvp, &val); - RENDER(pctl, hrtime, nvl, name, val); - break; - } - case DATA_TYPE_NVLIST: { - nvlist_t *val; - (void) nvpair_value_nvlist(nvp, &val); - RENDER(pctl, nvlist, nvl, name, val); - break; - } - case DATA_TYPE_NVLIST_ARRAY: { - nvlist_t **val; - (void) nvpair_value_nvlist_array(nvp, &val, &nelem); - ARENDER(pctl, nvlist_array, nvl, name, val, nelem); - break; - } - default: - (void) fprintf(fp, " unknown data type (%d)", type); - break; - } - nvp = nvlist_next_nvpair(nvl, nvp); - } -} - -void -nvlist_print(FILE *fp, nvlist_t *nvl) -{ - struct nvlist_prtctl pc; - - prtctl_defaults(fp, &pc, NULL); - nvlist_print_with_indent(nvl, &pc); -} - -void -nvlist_prt(nvlist_t *nvl, nvlist_prtctl_t pctl) -{ - nvlist_print_with_indent(nvl, pctl); -} - -#define NVP(elem, type, vtype, ptype, format) { \ - vtype value; \ -\ - (void) nvpair_value_##type(elem, &value); \ - (void) printf("%*s%s: " format "\n", indent, "", \ - nvpair_name(elem), (ptype)value); \ -} - -#define NVPA(elem, type, vtype, ptype, format) { \ - uint_t i, count; \ - vtype *value; \ -\ - (void) nvpair_value_##type(elem, &value, &count); \ - for (i = 0; i < count; i++) { \ - (void) printf("%*s%s[%d]: " format "\n", indent, "", \ - nvpair_name(elem), i, (ptype)value[i]); \ - } \ -} - -/* - * Similar to nvlist_print() but handles arrays slightly differently. - */ -void -dump_nvlist(nvlist_t *list, int indent) -{ - nvpair_t *elem = NULL; - boolean_t bool_value; - boolean_t *bool_array_value; - nvlist_t *nvlist_value; - nvlist_t **nvlist_array_value; - uint_t i, count; - - if (list == NULL) { - return; - } - - while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { - switch (nvpair_type(elem)) { - case DATA_TYPE_BOOLEAN: - (void) printf("%*s%s\n", indent, "", nvpair_name(elem)); - break; - - case DATA_TYPE_BOOLEAN_VALUE: - (void) nvpair_value_boolean_value(elem, &bool_value); - (void) printf("%*s%s: %s\n", indent, "", - nvpair_name(elem), bool_value ? "true" : "false"); - break; - - case DATA_TYPE_BYTE: - NVP(elem, byte, uchar_t, int, "%u"); - break; - - case DATA_TYPE_INT8: - NVP(elem, int8, int8_t, int, "%d"); - break; - - case DATA_TYPE_UINT8: - NVP(elem, uint8, uint8_t, int, "%u"); - break; - - case DATA_TYPE_INT16: - NVP(elem, int16, int16_t, int, "%d"); - break; - - case DATA_TYPE_UINT16: - NVP(elem, uint16, uint16_t, int, "%u"); - break; - - case DATA_TYPE_INT32: - NVP(elem, int32, int32_t, long, "%ld"); - break; - - case DATA_TYPE_UINT32: - NVP(elem, uint32, uint32_t, ulong_t, "%lu"); - break; - - case DATA_TYPE_INT64: - NVP(elem, int64, int64_t, longlong_t, "%lld"); - break; - - case DATA_TYPE_UINT64: - NVP(elem, uint64, uint64_t, u_longlong_t, "%llu"); - break; - - case DATA_TYPE_STRING: - NVP(elem, string, char *, char *, "'%s'"); - break; - - case DATA_TYPE_BOOLEAN_ARRAY: - (void) nvpair_value_boolean_array(elem, - &bool_array_value, &count); - for (i = 0; i < count; i++) { - (void) printf("%*s%s[%d]: %s\n", indent, "", - nvpair_name(elem), i, - bool_array_value[i] ? "true" : "false"); - } - break; - - case DATA_TYPE_BYTE_ARRAY: - NVPA(elem, byte_array, uchar_t, int, "%u"); - break; - - case DATA_TYPE_INT8_ARRAY: - NVPA(elem, int8_array, int8_t, int, "%d"); - break; - - case DATA_TYPE_UINT8_ARRAY: - NVPA(elem, uint8_array, uint8_t, int, "%u"); - break; - - case DATA_TYPE_INT16_ARRAY: - NVPA(elem, int16_array, int16_t, int, "%d"); - break; - - case DATA_TYPE_UINT16_ARRAY: - NVPA(elem, uint16_array, uint16_t, int, "%u"); - break; - - case DATA_TYPE_INT32_ARRAY: - NVPA(elem, int32_array, int32_t, long, "%ld"); - break; - - case DATA_TYPE_UINT32_ARRAY: - NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu"); - break; - - case DATA_TYPE_INT64_ARRAY: - NVPA(elem, int64_array, int64_t, longlong_t, "%lld"); - break; - - case DATA_TYPE_UINT64_ARRAY: - NVPA(elem, uint64_array, uint64_t, u_longlong_t, - "%llu"); - break; - - case DATA_TYPE_STRING_ARRAY: - NVPA(elem, string_array, char *, char *, "'%s'"); - break; - - case DATA_TYPE_NVLIST: - (void) nvpair_value_nvlist(elem, &nvlist_value); - (void) printf("%*s%s:\n", indent, "", - nvpair_name(elem)); - dump_nvlist(nvlist_value, indent + 4); - break; - - case DATA_TYPE_NVLIST_ARRAY: - (void) nvpair_value_nvlist_array(elem, - &nvlist_array_value, &count); - for (i = 0; i < count; i++) { - (void) printf("%*s%s[%u]:\n", indent, "", - nvpair_name(elem), i); - dump_nvlist(nvlist_array_value[i], indent + 4); - } - break; - - default: - (void) printf(dgettext(TEXT_DOMAIN, "bad config type " - "%d for %s\n"), nvpair_type(elem), - nvpair_name(elem)); - } - } -} - -/* - * ====================================================================== - * | | - * | Misc private interface. | - * | | - * ====================================================================== - */ - -/* - * Determine if string 'value' matches 'nvp' value. The 'value' string is - * converted, depending on the type of 'nvp', prior to match. For numeric - * types, a radix independent sscanf conversion of 'value' is used. If 'nvp' - * is an array type, 'ai' is the index into the array against which we are - * checking for match. If nvp is of DATA_TYPE_STRING*, the caller can pass - * in a regex_t compilation of value in 'value_regex' to trigger regular - * expression string match instead of simple strcmp(). - * - * Return 1 on match, 0 on no-match, and -1 on error. If the error is - * related to value syntax error and 'ep' is non-NULL, *ep will point into - * the 'value' string at the location where the error exists. - * - * NOTE: It may be possible to move the non-regex_t version of this into - * common code used by library/kernel/boot. - */ -int -nvpair_value_match_regex(nvpair_t *nvp, int ai, - char *value, regex_t *value_regex, char **ep) -{ - char *evalue; - uint_t a_len; - int sr; - - if (ep) - *ep = NULL; - - if ((nvp == NULL) || (value == NULL)) - return (-1); /* error fail match - invalid args */ - - /* make sure array and index combination make sense */ - if ((nvpair_type_is_array(nvp) && (ai < 0)) || - (!nvpair_type_is_array(nvp) && (ai >= 0))) - return (-1); /* error fail match - bad index */ - - /* non-string values should be single 'chunk' */ - if ((nvpair_type(nvp) != DATA_TYPE_STRING) && - (nvpair_type(nvp) != DATA_TYPE_STRING_ARRAY)) { - value += strspn(value, " \t"); - evalue = value + strcspn(value, " \t"); - if (*evalue) { - if (ep) - *ep = evalue; - return (-1); /* error fail match - syntax */ - } - } - - sr = EOF; - switch (nvpair_type(nvp)) { - case DATA_TYPE_STRING: { - char *val; - - /* check string value for match */ - if (nvpair_value_string(nvp, &val) == 0) { - if (value_regex) { - if (regexec(value_regex, val, - (size_t)0, NULL, 0) == 0) - return (1); /* match */ - } else { - if (strcmp(value, val) == 0) - return (1); /* match */ - } - } - break; - } - case DATA_TYPE_STRING_ARRAY: { - char **val_array; - - /* check indexed string value of array for match */ - if ((nvpair_value_string_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len)) { - if (value_regex) { - if (regexec(value_regex, val_array[ai], - (size_t)0, NULL, 0) == 0) - return (1); - } else { - if (strcmp(value, val_array[ai]) == 0) - return (1); - } - } - break; - } - case DATA_TYPE_BYTE: { - uchar_t val, val_arg; - - /* scanf uchar_t from value and check for match */ - sr = sscanf(value, "%c", &val_arg); - if ((sr == 1) && (nvpair_value_byte(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_BYTE_ARRAY: { - uchar_t *val_array, val_arg; - - - /* check indexed value of array for match */ - sr = sscanf(value, "%c", &val_arg); - if ((sr == 1) && - (nvpair_value_byte_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT8: { - int8_t val, val_arg; - - /* scanf int8_t from value and check for match */ - sr = sscanf(value, "%"SCNi8, &val_arg); - if ((sr == 1) && - (nvpair_value_int8(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT8_ARRAY: { - int8_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi8, &val_arg); - if ((sr == 1) && - (nvpair_value_int8_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT8: { - uint8_t val, val_arg; - - /* scanf uint8_t from value and check for match */ - sr = sscanf(value, "%"SCNi8, (int8_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint8(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT8_ARRAY: { - uint8_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi8, (int8_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint8_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT16: { - int16_t val, val_arg; - - /* scanf int16_t from value and check for match */ - sr = sscanf(value, "%"SCNi16, &val_arg); - if ((sr == 1) && - (nvpair_value_int16(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT16_ARRAY: { - int16_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi16, &val_arg); - if ((sr == 1) && - (nvpair_value_int16_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT16: { - uint16_t val, val_arg; - - /* scanf uint16_t from value and check for match */ - sr = sscanf(value, "%"SCNi16, (int16_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint16(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT16_ARRAY: { - uint16_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi16, (int16_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint16_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT32: { - int32_t val, val_arg; - - /* scanf int32_t from value and check for match */ - sr = sscanf(value, "%"SCNi32, &val_arg); - if ((sr == 1) && - (nvpair_value_int32(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT32_ARRAY: { - int32_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi32, &val_arg); - if ((sr == 1) && - (nvpair_value_int32_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT32: { - uint32_t val, val_arg; - - /* scanf uint32_t from value and check for match */ - sr = sscanf(value, "%"SCNi32, (int32_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint32(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT32_ARRAY: { - uint32_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi32, (int32_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint32_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT64: { - int64_t val, val_arg; - - /* scanf int64_t from value and check for match */ - sr = sscanf(value, "%"SCNi64, &val_arg); - if ((sr == 1) && - (nvpair_value_int64(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_INT64_ARRAY: { - int64_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi64, &val_arg); - if ((sr == 1) && - (nvpair_value_int64_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT64: { - uint64_t val_arg, val; - - /* scanf uint64_t from value and check for match */ - sr = sscanf(value, "%"SCNi64, (int64_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint64(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_UINT64_ARRAY: { - uint64_t *val_array, val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi64, (int64_t *)&val_arg); - if ((sr == 1) && - (nvpair_value_uint64_array(nvp, &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_BOOLEAN_VALUE: { - int32_t val_arg; - boolean_t val; - - /* scanf boolean_t from value and check for match */ - sr = sscanf(value, "%"SCNi32, &val_arg); - if ((sr == 1) && - (nvpair_value_boolean_value(nvp, &val) == 0) && - (val == val_arg)) - return (1); - break; - } - case DATA_TYPE_BOOLEAN_ARRAY: { - boolean_t *val_array; - int32_t val_arg; - - /* check indexed value of array for match */ - sr = sscanf(value, "%"SCNi32, &val_arg); - if ((sr == 1) && - (nvpair_value_boolean_array(nvp, - &val_array, &a_len) == 0) && - (ai < a_len) && - (val_array[ai] == val_arg)) - return (1); - break; - } - case DATA_TYPE_HRTIME: - case DATA_TYPE_NVLIST: - case DATA_TYPE_NVLIST_ARRAY: - case DATA_TYPE_BOOLEAN: - case DATA_TYPE_DOUBLE: - case DATA_TYPE_UNKNOWN: - default: - /* - * unknown/unsupported data type - */ - return (-1); /* error fail match */ - } - - /* - * check to see if sscanf failed conversion, return approximate - * pointer to problem - */ - if (sr != 1) { - if (ep) - *ep = value; - return (-1); /* error fail match - syntax */ - } - - return (0); /* fail match */ -} - -int -nvpair_value_match(nvpair_t *nvp, int ai, char *value, char **ep) -{ - return (nvpair_value_match_regex(nvp, ai, value, NULL, ep)); -} diff --git a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h deleted file mode 100644 index b05669e506ba..000000000000 --- a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - */ - -#ifndef _LIBNVPAIR_H -#define _LIBNVPAIR_H - -#include <sys/nvpair.h> -#include <stdlib.h> -#include <stdio.h> -#include <regex.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * All interfaces described in this file are private to Solaris, and - * are subject to change at any time and without notice. The public - * nvlist/nvpair interfaces, as documented in manpage sections 3NVPAIR, - * are all imported from <sys/nvpair.h> included above. - */ - -extern int nvpair_value_match(nvpair_t *, int, char *, char **); -extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, - char **); - -extern void nvlist_print(FILE *, nvlist_t *); -extern int nvlist_print_json(FILE *, nvlist_t *); -extern void dump_nvlist(nvlist_t *, int); - -/* - * Private nvlist printing interface that allows the caller some control - * over output rendering (as opposed to nvlist_print and dump_nvlist). - * - * Obtain an opaque nvlist_prtctl_t cookie using nvlist_prtctl_alloc - * (NULL on failure); on return the cookie is set up for default formatting - * and rendering. Quote the cookie in subsequent customisation functions and - * then pass the cookie to nvlist_prt to render the nvlist. Finally, - * use nvlist_prtctl_free to release the cookie. - * - * For all nvlist_lookup_xxx and nvlist_lookup_xxx_array functions - * we have a corresponding brace of functions that appoint replacement - * rendering functions: - * - * extern void nvlist_prtctl_xxx(nvlist_prtctl_t, - * void (*)(nvlist_prtctl_t ctl, void *private, const char *name, - * xxxtype value)) - * - * and - * - * extern void nvlist_prtctl_xxx_array(nvlist_prtctl_t, - * void (*)(nvlist_prtctl_t ctl, void *private, const char *name, - * xxxtype value, uint_t count)) - * - * where xxxtype is the C datatype corresponding to xxx, eg int8_t for "int8" - * and char * for "string". The function that is appointed to render the - * specified datatype receives as arguments the cookie, the nvlist - * member name, the value of that member (or a pointer for array function), - * and (for array rendering functions) a count of the number of elements. - */ - -typedef struct nvlist_prtctl *nvlist_prtctl_t; /* opaque */ - -enum nvlist_indent_mode { - NVLIST_INDENT_ABS, /* Absolute indentation */ - NVLIST_INDENT_TABBED /* Indent with tabstops */ -}; - -extern nvlist_prtctl_t nvlist_prtctl_alloc(void); -extern void nvlist_prtctl_free(nvlist_prtctl_t); -extern void nvlist_prt(nvlist_t *, nvlist_prtctl_t); - -/* Output stream */ -extern void nvlist_prtctl_setdest(nvlist_prtctl_t, FILE *); -extern FILE *nvlist_prtctl_getdest(nvlist_prtctl_t); - -/* Indentation mode, start indent, indent increment; default tabbed/0/1 */ -extern void nvlist_prtctl_setindent(nvlist_prtctl_t, enum nvlist_indent_mode, - int, int); -extern void nvlist_prtctl_doindent(nvlist_prtctl_t, int); - -enum nvlist_prtctl_fmt { - NVLIST_FMT_MEMBER_NAME, /* name fmt; default "%s = " */ - NVLIST_FMT_MEMBER_POSTAMBLE, /* after nvlist member; default "\n" */ - NVLIST_FMT_BTWN_ARRAY /* between array members; default " " */ -}; - -extern void nvlist_prtctl_setfmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, - const char *); -extern void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, ...); - -/* - * Function prototypes for interfaces that appoint a new rendering function - * for single-valued nvlist members. - * - * A replacement function receives arguments as follows: - * - * nvlist_prtctl_t Print control structure; do not change preferences - * for this object from a print callback function. - * - * void * The function-private cookie argument registered - * when the replacement function was appointed. - * - * nvlist_t * The full nvlist that is being processed. The - * rendering function is called to render a single - * member (name and value passed as below) but it may - * want to reference or incorporate other aspects of - * the full nvlist. - * - * const char * Member name to render - * - * valtype Value of the member to render - * - * The function must return non-zero if it has rendered output for this - * member, or 0 if it wants to default to standard rendering for this - * one member. - */ - -#define NVLIST_PRINTCTL_SVDECL(funcname, valtype) \ - extern void funcname(nvlist_prtctl_t, \ - int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, valtype), \ - void *) - -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean, int); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean_value, boolean_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_byte, uchar_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int8, int8_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint8, uint8_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int16, int16_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint16, uint16_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int32, int32_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint32, uint32_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int64, int64_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint64, uint64_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_double, double); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_string, char *); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_hrtime, hrtime_t); -NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_nvlist, nvlist_t *); - -#undef NVLIST_PRINTCTL_SVDECL /* was just for "clarity" above */ - -/* - * Function prototypes for interfaces that appoint a new rendering function - * for array-valued nvlist members. - * - * One additional argument is taken: uint_t for the number of array elements - * - * Return values as above. - */ -#define NVLIST_PRINTCTL_AVDECL(funcname, vtype) \ - extern void funcname(nvlist_prtctl_t, \ - int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, uint_t), \ - void *) - -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_boolean_array, boolean_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_byte_array, uchar_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int8_array, int8_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint8_array, uint8_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int16_array, int16_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint16_array, uint16_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int32_array, int32_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint32_array, uint32_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int64_array, int64_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint64_array, uint64_t *); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_string_array, char **); -NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_nvlist_array, nvlist_t **); - -#undef NVLIST_PRINTCTL_AVDECL /* was just for "clarity" above */ - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBNVPAIR_H */ diff --git a/cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c b/cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c deleted file mode 100644 index 1aefc1004daf..000000000000 --- a/cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/nvpair.h> -#include <stdlib.h> - -/*ARGSUSED*/ -static void * -nv_alloc_sys(nv_alloc_t *nva, size_t size) -{ - return (malloc(size)); -} - -/*ARGSUSED*/ -static void -nv_free_sys(nv_alloc_t *nva, void *buf, size_t size) -{ - free(buf); -} - -const nv_alloc_ops_t system_ops_def = { - NULL, /* nv_ao_init() */ - NULL, /* nv_ao_fini() */ - nv_alloc_sys, /* nv_ao_alloc() */ - nv_free_sys, /* nv_ao_free() */ - NULL /* nv_ao_reset() */ -}; - -nv_alloc_t nv_alloc_nosleep_def = { - &system_ops_def, - NULL -}; - -nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def; diff --git a/cddl/contrib/opensolaris/lib/libnvpair/nvpair_json.c b/cddl/contrib/opensolaris/lib/libnvpair/nvpair_json.c deleted file mode 100644 index b687a2f5761a..000000000000 --- a/cddl/contrib/opensolaris/lib/libnvpair/nvpair_json.c +++ /dev/null @@ -1,406 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ -/* - * Copyright (c) 2014, Joyent, Inc. - * Copyright (c) 2017 by Delphix. All rights reserved. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <wchar.h> -#include <sys/debug.h> - -#include "libnvpair.h" - -#define FPRINTF(fp, ...) \ - do { \ - if (fprintf(fp, __VA_ARGS__) < 0) \ - return (-1); \ - } while (0) - -/* - * When formatting a string for JSON output we must escape certain characters, - * as described in RFC4627. This applies to both member names and - * DATA_TYPE_STRING values. - * - * This function will only operate correctly if the following conditions are - * met: - * - * 1. The input String is encoded in the current locale. - * - * 2. The current locale includes the Basic Multilingual Plane (plane 0) - * as defined in the Unicode standard. - * - * The output will be entirely 7-bit ASCII (as a subset of UTF-8) with all - * representable Unicode characters included in their escaped numeric form. - */ -static int -nvlist_print_json_string(FILE *fp, const char *input) -{ - mbstate_t mbr; - wchar_t c; - size_t sz; - - bzero(&mbr, sizeof (mbr)); - - FPRINTF(fp, "\""); - while ((sz = mbrtowc(&c, input, MB_CUR_MAX, &mbr)) > 0) { - switch (c) { - case '"': - FPRINTF(fp, "\\\""); - break; - case '\n': - FPRINTF(fp, "\\n"); - break; - case '\r': - FPRINTF(fp, "\\r"); - break; - case '\\': - FPRINTF(fp, "\\\\"); - break; - case '\f': - FPRINTF(fp, "\\f"); - break; - case '\t': - FPRINTF(fp, "\\t"); - break; - case '\b': - FPRINTF(fp, "\\b"); - break; - default: - if ((c >= 0x00 && c <= 0x1f) || - (c > 0x7f && c <= 0xffff)) { - /* - * Render both Control Characters and Unicode - * characters in the Basic Multilingual Plane - * as JSON-escaped multibyte characters. - */ - FPRINTF(fp, "\\u%04x", (int)(0xffff & c)); - } else if (c >= 0x20 && c <= 0x7f) { - /* - * Render other 7-bit ASCII characters directly - * and drop other, unrepresentable characters. - */ - FPRINTF(fp, "%c", (int)(0xff & c)); - } - break; - } - input += sz; - } - - if (sz == (size_t)-1 || sz == (size_t)-2) { - /* - * We last read an invalid multibyte character sequence, - * so return an error. - */ - return (-1); - } - - FPRINTF(fp, "\""); - return (0); -} - -/* - * Dump a JSON-formatted representation of an nvlist to the provided FILE *. - * This routine does not output any new-lines or additional whitespace other - * than that contained in strings, nor does it call fflush(3C). - */ -int -nvlist_print_json(FILE *fp, nvlist_t *nvl) -{ - nvpair_t *curr; - boolean_t first = B_TRUE; - - FPRINTF(fp, "{"); - - for (curr = nvlist_next_nvpair(nvl, NULL); curr; - curr = nvlist_next_nvpair(nvl, curr)) { - data_type_t type = nvpair_type(curr); - - if (!first) - FPRINTF(fp, ","); - else - first = B_FALSE; - - if (nvlist_print_json_string(fp, nvpair_name(curr)) == -1) - return (-1); - FPRINTF(fp, ":"); - - switch (type) { - case DATA_TYPE_STRING: { - char *string = fnvpair_value_string(curr); - if (nvlist_print_json_string(fp, string) == -1) - return (-1); - break; - } - - case DATA_TYPE_BOOLEAN: { - FPRINTF(fp, "true"); - break; - } - - case DATA_TYPE_BOOLEAN_VALUE: { - FPRINTF(fp, "%s", fnvpair_value_boolean_value(curr) == - B_TRUE ? "true" : "false"); - break; - } - - case DATA_TYPE_BYTE: { - FPRINTF(fp, "%hhu", fnvpair_value_byte(curr)); - break; - } - - case DATA_TYPE_INT8: { - FPRINTF(fp, "%hhd", fnvpair_value_int8(curr)); - break; - } - - case DATA_TYPE_UINT8: { - FPRINTF(fp, "%hhu", fnvpair_value_uint8_t(curr)); - break; - } - - case DATA_TYPE_INT16: { - FPRINTF(fp, "%hd", fnvpair_value_int16(curr)); - break; - } - - case DATA_TYPE_UINT16: { - FPRINTF(fp, "%hu", fnvpair_value_uint16(curr)); - break; - } - - case DATA_TYPE_INT32: { - FPRINTF(fp, "%d", fnvpair_value_int32(curr)); - break; - } - - case DATA_TYPE_UINT32: { - FPRINTF(fp, "%u", fnvpair_value_uint32(curr)); - break; - } - - case DATA_TYPE_INT64: { - FPRINTF(fp, "%lld", - (long long)fnvpair_value_int64(curr)); - break; - } - - case DATA_TYPE_UINT64: { - FPRINTF(fp, "%llu", - (unsigned long long)fnvpair_value_uint64(curr)); - break; - } - - case DATA_TYPE_HRTIME: { - hrtime_t val; - VERIFY0(nvpair_value_hrtime(curr, &val)); - FPRINTF(fp, "%llu", (unsigned long long)val); - break; - } - - case DATA_TYPE_DOUBLE: { - double val; - VERIFY0(nvpair_value_double(curr, &val)); - FPRINTF(fp, "%f", val); - break; - } - - case DATA_TYPE_NVLIST: { - if (nvlist_print_json(fp, - fnvpair_value_nvlist(curr)) == -1) - return (-1); - break; - } - - case DATA_TYPE_STRING_ARRAY: { - char **val; - uint_t valsz, i; - VERIFY0(nvpair_value_string_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - if (nvlist_print_json_string(fp, val[i]) == -1) - return (-1); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_NVLIST_ARRAY: { - nvlist_t **val; - uint_t valsz, i; - VERIFY0(nvpair_value_nvlist_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - if (nvlist_print_json(fp, val[i]) == -1) - return (-1); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_BOOLEAN_ARRAY: { - boolean_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_boolean_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, val[i] == B_TRUE ? - "true" : "false"); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_BYTE_ARRAY: { - uchar_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_byte_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hhu", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_UINT8_ARRAY: { - uint8_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_uint8_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hhu", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_INT8_ARRAY: { - int8_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_int8_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hhd", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_UINT16_ARRAY: { - uint16_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_uint16_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hu", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_INT16_ARRAY: { - int16_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_int16_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%hd", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_UINT32_ARRAY: { - uint32_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_uint32_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%u", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_INT32_ARRAY: { - int32_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_int32_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%d", val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_UINT64_ARRAY: { - uint64_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_uint64_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%llu", - (unsigned long long)val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_INT64_ARRAY: { - int64_t *val; - uint_t valsz, i; - VERIFY0(nvpair_value_int64_array(curr, &val, &valsz)); - FPRINTF(fp, "["); - for (i = 0; i < valsz; i++) { - if (i > 0) - FPRINTF(fp, ","); - FPRINTF(fp, "%lld", (long long)val[i]); - } - FPRINTF(fp, "]"); - break; - } - - case DATA_TYPE_UNKNOWN: - case DATA_TYPE_DONTCARE: - return (-1); - } - - } - - FPRINTF(fp, "}"); - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h deleted file mode 100644 index 7a5f8a8570c6..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h +++ /dev/null @@ -1,391 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _LIBUUTIL_H -#define _LIBUUTIL_H - -#include <solaris.h> -#include <sys/types.h> -#include <stdarg.h> -#include <stdio.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Standard flags codes. - */ -#define UU_DEFAULT 0 - -/* - * Standard error codes. - */ -#define UU_ERROR_NONE 0 /* no error */ -#define UU_ERROR_INVALID_ARGUMENT 1 /* invalid argument */ -#define UU_ERROR_UNKNOWN_FLAG 2 /* passed flag invalid */ -#define UU_ERROR_NO_MEMORY 3 /* out of memory */ -#define UU_ERROR_CALLBACK_FAILED 4 /* callback-initiated error */ -#define UU_ERROR_NOT_SUPPORTED 5 /* operation not supported */ -#define UU_ERROR_EMPTY 6 /* no value provided */ -#define UU_ERROR_UNDERFLOW 7 /* value is too small */ -#define UU_ERROR_OVERFLOW 8 /* value is too value */ -#define UU_ERROR_INVALID_CHAR 9 /* value contains unexpected char */ -#define UU_ERROR_INVALID_DIGIT 10 /* value contains digit not in base */ - -#define UU_ERROR_SYSTEM 99 /* underlying system error */ -#define UU_ERROR_UNKNOWN 100 /* error status not known */ - -/* - * Standard program exit codes. - */ -#define UU_EXIT_OK (*(uu_exit_ok())) -#define UU_EXIT_FATAL (*(uu_exit_fatal())) -#define UU_EXIT_USAGE (*(uu_exit_usage())) - -/* - * Exit status profiles. - */ -#define UU_PROFILE_DEFAULT 0 -#define UU_PROFILE_LAUNCHER 1 - -/* - * Error reporting functions. - */ -uint32_t uu_error(void); -const char *uu_strerror(uint32_t); - -/* - * Program notification functions. - */ -extern void uu_alt_exit(int); -extern const char *uu_setpname(char *); -extern const char *uu_getpname(void); -/*PRINTFLIKE1*/ -extern void uu_warn(const char *, ...); -extern void uu_vwarn(const char *, va_list); -/*PRINTFLIKE1*/ -extern void uu_die(const char *, ...) __NORETURN; -extern void uu_vdie(const char *, va_list) __NORETURN; -/*PRINTFLIKE2*/ -extern void uu_xdie(int, const char *, ...) __NORETURN; -extern void uu_vxdie(int, const char *, va_list) __NORETURN; - -/* - * Exit status functions (not to be used directly) - */ -extern int *uu_exit_ok(void); -extern int *uu_exit_fatal(void); -extern int *uu_exit_usage(void); - -/* - * string->number conversions - */ -extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t); -extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t); - -/* - * Debug print facility functions. - */ -typedef struct uu_dprintf uu_dprintf_t; - -typedef enum { - UU_DPRINTF_SILENT, - UU_DPRINTF_FATAL, - UU_DPRINTF_WARNING, - UU_DPRINTF_NOTICE, - UU_DPRINTF_INFO, - UU_DPRINTF_DEBUG -} uu_dprintf_severity_t; - -extern uu_dprintf_t *uu_dprintf_create(const char *, uu_dprintf_severity_t, - uint_t); -/*PRINTFLIKE3*/ -extern void uu_dprintf(uu_dprintf_t *, uu_dprintf_severity_t, - const char *, ...); -extern void uu_dprintf_destroy(uu_dprintf_t *); -extern const char *uu_dprintf_getname(uu_dprintf_t *); - -/* - * Identifier test flags and function. - */ -#define UU_NAME_DOMAIN 0x1 /* allow SUNW, or com.sun, prefix */ -#define UU_NAME_PATH 0x2 /* allow '/'-delimited paths */ - -int uu_check_name(const char *, uint_t); - -/* - * File creation functions. - */ -extern int uu_open_tmp(const char *dir, uint_t uflags); - -/* - * Convenience functions. - */ -#define UU_NELEM(a) (sizeof (a) / sizeof ((a)[0])) - -/*PRINTFLIKE1*/ -extern char *uu_msprintf(const char *format, ...); -extern void *uu_zalloc(size_t); -extern char *uu_strdup(const char *); -extern void uu_free(void *); - -extern boolean_t uu_strcaseeq(const char *a, const char *b); -extern boolean_t uu_streq(const char *a, const char *b); -extern char *uu_strndup(const char *s, size_t n); -extern boolean_t uu_strbw(const char *a, const char *b); -extern void *uu_memdup(const void *buf, size_t sz); -extern void uu_dump(FILE *out, const char *prefix, const void *buf, size_t len); - -/* - * Comparison function type definition. - * Developers should be careful in their use of the _private argument. If you - * break interface guarantees, you get undefined behavior. - */ -typedef int uu_compare_fn_t(const void *__left, const void *__right, - void *__private); - -/* - * Walk variant flags. - * A data structure need not provide support for all variants and - * combinations. Refer to the appropriate documentation. - */ -#define UU_WALK_ROBUST 0x00000001 /* walk can survive removes */ -#define UU_WALK_REVERSE 0x00000002 /* reverse walk order */ - -#define UU_WALK_PREORDER 0x00000010 /* walk tree in pre-order */ -#define UU_WALK_POSTORDER 0x00000020 /* walk tree in post-order */ - -/* - * Walk callback function return codes. - */ -#define UU_WALK_ERROR -1 -#define UU_WALK_NEXT 0 -#define UU_WALK_DONE 1 - -/* - * Walk callback function type definition. - */ -typedef int uu_walk_fn_t(void *_elem, void *_private); - -/* - * lists: opaque structures - */ -typedef struct uu_list_pool uu_list_pool_t; -typedef struct uu_list uu_list_t; - -typedef struct uu_list_node { - uintptr_t uln_opaque[2]; -} uu_list_node_t; - -typedef struct uu_list_walk uu_list_walk_t; - -typedef uintptr_t uu_list_index_t; - -/* - * lists: interface - * - * basic usage: - * typedef struct foo { - * ... - * uu_list_node_t foo_node; - * ... - * } foo_t; - * - * static int - * foo_compare(void *l_arg, void *r_arg, void *private) - * { - * foo_t *l = l_arg; - * foo_t *r = r_arg; - * - * if (... l greater than r ...) - * return (1); - * if (... l less than r ...) - * return (-1); - * return (0); - * } - * - * ... - * // at initialization time - * foo_pool = uu_list_pool_create("foo_pool", - * sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare, - * debugging? 0 : UU_AVL_POOL_DEBUG); - * ... - */ -uu_list_pool_t *uu_list_pool_create(const char *, size_t, size_t, - uu_compare_fn_t *, uint32_t); -#define UU_LIST_POOL_DEBUG 0x00000001 - -void uu_list_pool_destroy(uu_list_pool_t *); - -/* - * usage: - * - * foo_t *a; - * a = malloc(sizeof(*a)); - * uu_list_node_init(a, &a->foo_list, pool); - * ... - * uu_list_node_fini(a, &a->foo_list, pool); - * free(a); - */ -void uu_list_node_init(void *, uu_list_node_t *, uu_list_pool_t *); -void uu_list_node_fini(void *, uu_list_node_t *, uu_list_pool_t *); - -uu_list_t *uu_list_create(uu_list_pool_t *, void *_parent, uint32_t); -#define UU_LIST_DEBUG 0x00000001 -#define UU_LIST_SORTED 0x00000002 /* list is sorted */ - -void uu_list_destroy(uu_list_t *); /* list must be empty */ - -size_t uu_list_numnodes(uu_list_t *); - -void *uu_list_first(uu_list_t *); -void *uu_list_last(uu_list_t *); - -void *uu_list_next(uu_list_t *, void *); -void *uu_list_prev(uu_list_t *, void *); - -int uu_list_walk(uu_list_t *, uu_walk_fn_t *, void *, uint32_t); - -uu_list_walk_t *uu_list_walk_start(uu_list_t *, uint32_t); -void *uu_list_walk_next(uu_list_walk_t *); -void uu_list_walk_end(uu_list_walk_t *); - -void *uu_list_find(uu_list_t *, void *, void *, uu_list_index_t *); -void uu_list_insert(uu_list_t *, void *, uu_list_index_t); - -void *uu_list_nearest_next(uu_list_t *, uu_list_index_t); -void *uu_list_nearest_prev(uu_list_t *, uu_list_index_t); - -void *uu_list_teardown(uu_list_t *, void **); - -void uu_list_remove(uu_list_t *, void *); - -/* - * lists: interfaces for non-sorted lists only - */ -int uu_list_insert_before(uu_list_t *, void *_target, void *_elem); -int uu_list_insert_after(uu_list_t *, void *_target, void *_elem); - -/* - * avl trees: opaque structures - */ -typedef struct uu_avl_pool uu_avl_pool_t; -typedef struct uu_avl uu_avl_t; - -typedef struct uu_avl_node { -#ifdef _LP64 - uintptr_t uan_opaque[3]; -#else - uintptr_t uan_opaque[4]; -#endif -} uu_avl_node_t; - -typedef struct uu_avl_walk uu_avl_walk_t; - -typedef uintptr_t uu_avl_index_t; - -/* - * avl trees: interface - * - * basic usage: - * typedef struct foo { - * ... - * uu_avl_node_t foo_node; - * ... - * } foo_t; - * - * static int - * foo_compare(void *l_arg, void *r_arg, void *private) - * { - * foo_t *l = l_arg; - * foo_t *r = r_arg; - * - * if (... l greater than r ...) - * return (1); - * if (... l less than r ...) - * return (-1); - * return (0); - * } - * - * ... - * // at initialization time - * foo_pool = uu_avl_pool_create("foo_pool", - * sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare, - * debugging? 0 : UU_AVL_POOL_DEBUG); - * ... - */ -uu_avl_pool_t *uu_avl_pool_create(const char *, size_t, size_t, - uu_compare_fn_t *, uint32_t); -#define UU_AVL_POOL_DEBUG 0x00000001 - -void uu_avl_pool_destroy(uu_avl_pool_t *); - -/* - * usage: - * - * foo_t *a; - * a = malloc(sizeof(*a)); - * uu_avl_node_init(a, &a->foo_avl, pool); - * ... - * uu_avl_node_fini(a, &a->foo_avl, pool); - * free(a); - */ -void uu_avl_node_init(void *, uu_avl_node_t *, uu_avl_pool_t *); -void uu_avl_node_fini(void *, uu_avl_node_t *, uu_avl_pool_t *); - -uu_avl_t *uu_avl_create(uu_avl_pool_t *, void *_parent, uint32_t); -#define UU_AVL_DEBUG 0x00000001 - -void uu_avl_destroy(uu_avl_t *); /* list must be empty */ - -size_t uu_avl_numnodes(uu_avl_t *); - -void *uu_avl_first(uu_avl_t *); -void *uu_avl_last(uu_avl_t *); - -void *uu_avl_next(uu_avl_t *, void *); -void *uu_avl_prev(uu_avl_t *, void *); - -int uu_avl_walk(uu_avl_t *, uu_walk_fn_t *, void *, uint32_t); - -uu_avl_walk_t *uu_avl_walk_start(uu_avl_t *, uint32_t); -void *uu_avl_walk_next(uu_avl_walk_t *); -void uu_avl_walk_end(uu_avl_walk_t *); - -void *uu_avl_find(uu_avl_t *, void *, void *, uu_avl_index_t *); -void uu_avl_insert(uu_avl_t *, void *, uu_avl_index_t); - -void *uu_avl_nearest_next(uu_avl_t *, uu_avl_index_t); -void *uu_avl_nearest_prev(uu_avl_t *, uu_avl_index_t); - -void *uu_avl_teardown(uu_avl_t *, void **); - -void uu_avl_remove(uu_avl_t *, void *); - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBUUTIL_H */ diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h deleted file mode 100644 index 9ebaaedfd237..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBUUTIL_COMMON_H -#define _LIBUUTIL_COMMON_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <libuutil.h> -#include <libuutil_impl.h> - -#endif /* _LIBUUTIL_COMMON_H */ diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h deleted file mode 100644 index 9466e5974581..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBUUTIL_IMPL_H -#define _LIBUUTIL_IMPL_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <libuutil.h> -#include <pthread.h> - -#include <sys/avl_impl.h> -#include <sys/byteorder.h> - -#ifdef __cplusplus -extern "C" { -#endif - -void uu_set_error(uint_t); -#pragma rarely_called(uu_set_error) - -/*PRINTFLIKE1*/ -void uu_panic(const char *format, ...); -#pragma rarely_called(uu_panic) - -struct uu_dprintf { - char *uud_name; - uu_dprintf_severity_t uud_severity; - uint_t uud_flags; -}; - -/* - * For debugging purposes, libuutil keeps around linked lists of all uu_lists - * and uu_avls, along with pointers to their parents. These can cause false - * negatives when looking for memory leaks, so we encode the pointers by - * storing them with swapped endianness; this is not perfect, but it's about - * the best we can do without wasting a lot of space. - */ -#ifdef _LP64 -#define UU_PTR_ENCODE(ptr) BSWAP_64((uintptr_t)(void *)(ptr)) -#else -#define UU_PTR_ENCODE(ptr) BSWAP_32((uintptr_t)(void *)(ptr)) -#endif - -#define UU_PTR_DECODE(ptr) ((void *)UU_PTR_ENCODE(ptr)) - -/* - * uu_list structures - */ -typedef struct uu_list_node_impl { - struct uu_list_node_impl *uln_next; - struct uu_list_node_impl *uln_prev; -} uu_list_node_impl_t; - -struct uu_list_walk { - uu_list_walk_t *ulw_next; - uu_list_walk_t *ulw_prev; - - uu_list_t *ulw_list; - int8_t ulw_dir; - uint8_t ulw_robust; - uu_list_node_impl_t *ulw_next_result; -}; - -struct uu_list { - uintptr_t ul_next_enc; - uintptr_t ul_prev_enc; - - uu_list_pool_t *ul_pool; - uintptr_t ul_parent_enc; /* encoded parent pointer */ - size_t ul_offset; - size_t ul_numnodes; - uint8_t ul_debug; - uint8_t ul_sorted; - uint8_t ul_index; /* mark for uu_list_index_ts */ - - uu_list_node_impl_t ul_null_node; - uu_list_walk_t ul_null_walk; /* for robust walkers */ -}; - -#define UU_LIST_PTR(ptr) ((uu_list_t *)UU_PTR_DECODE(ptr)) - -#define UU_LIST_POOL_MAXNAME 64 - -struct uu_list_pool { - uu_list_pool_t *ulp_next; - uu_list_pool_t *ulp_prev; - - char ulp_name[UU_LIST_POOL_MAXNAME]; - size_t ulp_nodeoffset; - size_t ulp_objsize; - uu_compare_fn_t *ulp_cmp; - uint8_t ulp_debug; - uint8_t ulp_last_index; - pthread_mutex_t ulp_lock; /* protects null_list */ - uu_list_t ulp_null_list; -}; - -/* - * uu_avl structures - */ -typedef struct avl_node uu_avl_node_impl_t; - -struct uu_avl_walk { - uu_avl_walk_t *uaw_next; - uu_avl_walk_t *uaw_prev; - - uu_avl_t *uaw_avl; - void *uaw_next_result; - int8_t uaw_dir; - uint8_t uaw_robust; -}; - -struct uu_avl { - uintptr_t ua_next_enc; - uintptr_t ua_prev_enc; - - uu_avl_pool_t *ua_pool; - uintptr_t ua_parent_enc; - uint8_t ua_debug; - uint8_t ua_index; /* mark for uu_avl_index_ts */ - - struct avl_tree ua_tree; - uu_avl_walk_t ua_null_walk; -}; - -#define UU_AVL_PTR(x) ((uu_avl_t *)UU_PTR_DECODE(x)) - -#define UU_AVL_POOL_MAXNAME 64 - -struct uu_avl_pool { - uu_avl_pool_t *uap_next; - uu_avl_pool_t *uap_prev; - - char uap_name[UU_AVL_POOL_MAXNAME]; - size_t uap_nodeoffset; - size_t uap_objsize; - uu_compare_fn_t *uap_cmp; - uint8_t uap_debug; - uint8_t uap_last_index; - pthread_mutex_t uap_lock; /* protects null_avl */ - uu_avl_t uap_null_avl; -}; - -/* - * atfork() handlers - */ -void uu_avl_lockup(void); -void uu_avl_release(void); - -void uu_list_lockup(void); -void uu_list_release(void); - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBUUTIL_IMPL_H */ diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c deleted file mode 100644 index 2bef759d525e..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include "libuutil_common.h" - -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -void * -uu_zalloc(size_t n) -{ - void *p = malloc(n); - - if (p == NULL) { - uu_set_error(UU_ERROR_SYSTEM); - return (NULL); - } - - (void) memset(p, 0, n); - - return (p); -} - -void -uu_free(void *p) -{ - free(p); -} - -char * -uu_strdup(const char *str) -{ - char *buf = NULL; - - if (str != NULL) { - size_t sz; - - sz = strlen(str) + 1; - buf = uu_zalloc(sz); - if (buf != NULL) - (void) memcpy(buf, str, sz); - } - return (buf); -} - -/* - * Duplicate up to n bytes of a string. Kind of sort of like - * strdup(strlcpy(s, n)). - */ -char * -uu_strndup(const char *s, size_t n) -{ - size_t len; - char *p; - - len = strnlen(s, n); - p = uu_zalloc(len + 1); - if (p == NULL) - return (NULL); - - if (len > 0) - (void) memcpy(p, s, len); - p[len] = '\0'; - - return (p); -} - -/* - * Duplicate a block of memory. Combines malloc with memcpy, much as - * strdup combines malloc, strlen, and strcpy. - */ -void * -uu_memdup(const void *buf, size_t sz) -{ - void *p; - - p = uu_zalloc(sz); - if (p == NULL) - return (NULL); - (void) memcpy(p, buf, sz); - return (p); -} - -char * -uu_msprintf(const char *format, ...) -{ - va_list args; - char attic[1]; - uint_t M, m; - char *b; - - va_start(args, format); - M = vsnprintf(attic, 1, format, args); - va_end(args); - - for (;;) { - m = M; - if ((b = uu_zalloc(m + 1)) == NULL) - return (NULL); - - va_start(args, format); - M = vsnprintf(b, m + 1, format, args); - va_end(args); - - if (M == m) - break; /* sizes match */ - - uu_free(b); - } - - return (b); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c deleted file mode 100644 index 5e78ececeec9..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c +++ /dev/null @@ -1,570 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/avl.h> - -static uu_avl_pool_t uu_null_apool = { &uu_null_apool, &uu_null_apool }; -static pthread_mutex_t uu_apool_list_lock = PTHREAD_MUTEX_INITIALIZER; - -/* - * The index mark change on every insert and delete, to catch stale - * references. - * - * We leave the low bit alone, since the avl code uses it. - */ -#define INDEX_MAX (sizeof (uintptr_t) - 2) -#define INDEX_NEXT(m) (((m) == INDEX_MAX)? 2 : ((m) + 2) & INDEX_MAX) - -#define INDEX_DECODE(i) ((i) & ~INDEX_MAX) -#define INDEX_ENCODE(p, n) (((n) & ~INDEX_MAX) | (p)->ua_index) -#define INDEX_VALID(p, i) (((i) & INDEX_MAX) == (p)->ua_index) -#define INDEX_CHECK(i) (((i) & INDEX_MAX) != 0) - -/* - * When an element is inactive (not in a tree), we keep a marked pointer to - * its containing pool in its first word, and a NULL pointer in its second. - * - * On insert, we use these to verify that it comes from the correct pool. - */ -#define NODE_ARRAY(p, n) ((uintptr_t *)((uintptr_t)(n) + \ - (pp)->uap_nodeoffset)) - -#define POOL_TO_MARKER(pp) (((uintptr_t)(pp) | 1)) - -#define DEAD_MARKER 0xc4 - -uu_avl_pool_t * -uu_avl_pool_create(const char *name, size_t objsize, size_t nodeoffset, - uu_compare_fn_t *compare_func, uint32_t flags) -{ - uu_avl_pool_t *pp, *next, *prev; - - if (name == NULL || - uu_check_name(name, UU_NAME_DOMAIN) == -1 || - nodeoffset + sizeof (uu_avl_node_t) > objsize || - compare_func == NULL) { - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (NULL); - } - - if (flags & ~UU_AVL_POOL_DEBUG) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (NULL); - } - - pp = uu_zalloc(sizeof (uu_avl_pool_t)); - if (pp == NULL) { - uu_set_error(UU_ERROR_NO_MEMORY); - return (NULL); - } - - (void) strlcpy(pp->uap_name, name, sizeof (pp->uap_name)); - pp->uap_nodeoffset = nodeoffset; - pp->uap_objsize = objsize; - pp->uap_cmp = compare_func; - if (flags & UU_AVL_POOL_DEBUG) - pp->uap_debug = 1; - pp->uap_last_index = 0; - - (void) pthread_mutex_init(&pp->uap_lock, NULL); - - pp->uap_null_avl.ua_next_enc = UU_PTR_ENCODE(&pp->uap_null_avl); - pp->uap_null_avl.ua_prev_enc = UU_PTR_ENCODE(&pp->uap_null_avl); - - (void) pthread_mutex_lock(&uu_apool_list_lock); - pp->uap_next = next = &uu_null_apool; - pp->uap_prev = prev = next->uap_prev; - next->uap_prev = pp; - prev->uap_next = pp; - (void) pthread_mutex_unlock(&uu_apool_list_lock); - - return (pp); -} - -void -uu_avl_pool_destroy(uu_avl_pool_t *pp) -{ - if (pp->uap_debug) { - if (pp->uap_null_avl.ua_next_enc != - UU_PTR_ENCODE(&pp->uap_null_avl) || - pp->uap_null_avl.ua_prev_enc != - UU_PTR_ENCODE(&pp->uap_null_avl)) { - uu_panic("uu_avl_pool_destroy: Pool \"%.*s\" (%p) has " - "outstanding avls, or is corrupt.\n", - (int)sizeof (pp->uap_name), pp->uap_name, - (void *)pp); - } - } - (void) pthread_mutex_lock(&uu_apool_list_lock); - pp->uap_next->uap_prev = pp->uap_prev; - pp->uap_prev->uap_next = pp->uap_next; - (void) pthread_mutex_unlock(&uu_apool_list_lock); - (void) pthread_mutex_destroy(&pp->uap_lock); - pp->uap_prev = NULL; - pp->uap_next = NULL; - uu_free(pp); -} - -void -uu_avl_node_init(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp) -{ - uintptr_t *na = (uintptr_t *)np; - - if (pp->uap_debug) { - uintptr_t offset = (uintptr_t)np - (uintptr_t)base; - if (offset + sizeof (*np) > pp->uap_objsize) { - uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): " - "offset %ld doesn't fit in object (size %ld)\n", - base, (void *)np, (void *)pp, pp->uap_name, - (long)offset, (long)pp->uap_objsize); - } - if (offset != pp->uap_nodeoffset) { - uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): " - "offset %ld doesn't match pool's offset (%ld)\n", - base, (void *)np, (void *)pp, pp->uap_name, - (long)offset, (long)pp->uap_objsize); - } - } - - na[0] = POOL_TO_MARKER(pp); - na[1] = 0; -} - -void -uu_avl_node_fini(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp) -{ - uintptr_t *na = (uintptr_t *)np; - - if (pp->uap_debug) { - if (na[0] == DEAD_MARKER && na[1] == DEAD_MARKER) { - uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): " - "node already finied\n", - base, (void *)np, (void *)pp, pp->uap_name); - } - if (na[0] != POOL_TO_MARKER(pp) || na[1] != 0) { - uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): " - "node corrupt, in tree, or in different pool\n", - base, (void *)np, (void *)pp, pp->uap_name); - } - } - - na[0] = DEAD_MARKER; - na[1] = DEAD_MARKER; - na[2] = DEAD_MARKER; -} - -struct uu_avl_node_compare_info { - uu_compare_fn_t *ac_compare; - void *ac_private; - void *ac_right; - void *ac_found; -}; - -static int -uu_avl_node_compare(const void *l, const void *r) -{ - struct uu_avl_node_compare_info *info = - (struct uu_avl_node_compare_info *)l; - - int res = info->ac_compare(r, info->ac_right, info->ac_private); - - if (res == 0) { - if (info->ac_found == NULL) - info->ac_found = (void *)r; - return (-1); - } - if (res < 0) - return (1); - return (-1); -} - -uu_avl_t * -uu_avl_create(uu_avl_pool_t *pp, void *parent, uint32_t flags) -{ - uu_avl_t *ap, *next, *prev; - - if (flags & ~UU_AVL_DEBUG) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (NULL); - } - - ap = uu_zalloc(sizeof (*ap)); - if (ap == NULL) { - uu_set_error(UU_ERROR_NO_MEMORY); - return (NULL); - } - - ap->ua_pool = pp; - ap->ua_parent_enc = UU_PTR_ENCODE(parent); - ap->ua_debug = pp->uap_debug || (flags & UU_AVL_DEBUG); - ap->ua_index = (pp->uap_last_index = INDEX_NEXT(pp->uap_last_index)); - - avl_create(&ap->ua_tree, &uu_avl_node_compare, pp->uap_objsize, - pp->uap_nodeoffset); - - ap->ua_null_walk.uaw_next = &ap->ua_null_walk; - ap->ua_null_walk.uaw_prev = &ap->ua_null_walk; - - (void) pthread_mutex_lock(&pp->uap_lock); - next = &pp->uap_null_avl; - prev = UU_PTR_DECODE(next->ua_prev_enc); - ap->ua_next_enc = UU_PTR_ENCODE(next); - ap->ua_prev_enc = UU_PTR_ENCODE(prev); - next->ua_prev_enc = UU_PTR_ENCODE(ap); - prev->ua_next_enc = UU_PTR_ENCODE(ap); - (void) pthread_mutex_unlock(&pp->uap_lock); - - return (ap); -} - -void -uu_avl_destroy(uu_avl_t *ap) -{ - uu_avl_pool_t *pp = ap->ua_pool; - - if (ap->ua_debug) { - if (avl_numnodes(&ap->ua_tree) != 0) { - uu_panic("uu_avl_destroy(%p): tree not empty\n", - (void *)ap); - } - if (ap->ua_null_walk.uaw_next != &ap->ua_null_walk || - ap->ua_null_walk.uaw_prev != &ap->ua_null_walk) { - uu_panic("uu_avl_destroy(%p): outstanding walkers\n", - (void *)ap); - } - } - (void) pthread_mutex_lock(&pp->uap_lock); - UU_AVL_PTR(ap->ua_next_enc)->ua_prev_enc = ap->ua_prev_enc; - UU_AVL_PTR(ap->ua_prev_enc)->ua_next_enc = ap->ua_next_enc; - (void) pthread_mutex_unlock(&pp->uap_lock); - ap->ua_prev_enc = UU_PTR_ENCODE(NULL); - ap->ua_next_enc = UU_PTR_ENCODE(NULL); - - ap->ua_pool = NULL; - avl_destroy(&ap->ua_tree); - - uu_free(ap); -} - -size_t -uu_avl_numnodes(uu_avl_t *ap) -{ - return (avl_numnodes(&ap->ua_tree)); -} - -void * -uu_avl_first(uu_avl_t *ap) -{ - return (avl_first(&ap->ua_tree)); -} - -void * -uu_avl_last(uu_avl_t *ap) -{ - return (avl_last(&ap->ua_tree)); -} - -void * -uu_avl_next(uu_avl_t *ap, void *node) -{ - return (AVL_NEXT(&ap->ua_tree, node)); -} - -void * -uu_avl_prev(uu_avl_t *ap, void *node) -{ - return (AVL_PREV(&ap->ua_tree, node)); -} - -static void -_avl_walk_init(uu_avl_walk_t *wp, uu_avl_t *ap, uint32_t flags) -{ - uu_avl_walk_t *next, *prev; - - int robust = (flags & UU_WALK_ROBUST); - int direction = (flags & UU_WALK_REVERSE)? -1 : 1; - - (void) memset(wp, 0, sizeof (*wp)); - wp->uaw_avl = ap; - wp->uaw_robust = robust; - wp->uaw_dir = direction; - - if (direction > 0) - wp->uaw_next_result = avl_first(&ap->ua_tree); - else - wp->uaw_next_result = avl_last(&ap->ua_tree); - - if (ap->ua_debug || robust) { - wp->uaw_next = next = &ap->ua_null_walk; - wp->uaw_prev = prev = next->uaw_prev; - next->uaw_prev = wp; - prev->uaw_next = wp; - } -} - -static void * -_avl_walk_advance(uu_avl_walk_t *wp, uu_avl_t *ap) -{ - void *np = wp->uaw_next_result; - - avl_tree_t *t = &ap->ua_tree; - - if (np == NULL) - return (NULL); - - wp->uaw_next_result = (wp->uaw_dir > 0)? AVL_NEXT(t, np) : - AVL_PREV(t, np); - - return (np); -} - -static void -_avl_walk_fini(uu_avl_walk_t *wp) -{ - if (wp->uaw_next != NULL) { - wp->uaw_next->uaw_prev = wp->uaw_prev; - wp->uaw_prev->uaw_next = wp->uaw_next; - wp->uaw_next = NULL; - wp->uaw_prev = NULL; - } - wp->uaw_avl = NULL; - wp->uaw_next_result = NULL; -} - -uu_avl_walk_t * -uu_avl_walk_start(uu_avl_t *ap, uint32_t flags) -{ - uu_avl_walk_t *wp; - - if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (NULL); - } - - wp = uu_zalloc(sizeof (*wp)); - if (wp == NULL) { - uu_set_error(UU_ERROR_NO_MEMORY); - return (NULL); - } - - _avl_walk_init(wp, ap, flags); - return (wp); -} - -void * -uu_avl_walk_next(uu_avl_walk_t *wp) -{ - return (_avl_walk_advance(wp, wp->uaw_avl)); -} - -void -uu_avl_walk_end(uu_avl_walk_t *wp) -{ - _avl_walk_fini(wp); - uu_free(wp); -} - -int -uu_avl_walk(uu_avl_t *ap, uu_walk_fn_t *func, void *private, uint32_t flags) -{ - void *e; - uu_avl_walk_t my_walk; - - int status = UU_WALK_NEXT; - - if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (-1); - } - - _avl_walk_init(&my_walk, ap, flags); - while (status == UU_WALK_NEXT && - (e = _avl_walk_advance(&my_walk, ap)) != NULL) - status = (*func)(e, private); - _avl_walk_fini(&my_walk); - - if (status >= 0) - return (0); - uu_set_error(UU_ERROR_CALLBACK_FAILED); - return (-1); -} - -void -uu_avl_remove(uu_avl_t *ap, void *elem) -{ - uu_avl_walk_t *wp; - uu_avl_pool_t *pp = ap->ua_pool; - uintptr_t *na = NODE_ARRAY(pp, elem); - - if (ap->ua_debug) { - /* - * invalidate outstanding uu_avl_index_ts. - */ - ap->ua_index = INDEX_NEXT(ap->ua_index); - } - - /* - * Robust walkers most be advanced, if we are removing the node - * they are currently using. In debug mode, non-robust walkers - * are also on the walker list. - */ - for (wp = ap->ua_null_walk.uaw_next; wp != &ap->ua_null_walk; - wp = wp->uaw_next) { - if (wp->uaw_robust) { - if (elem == wp->uaw_next_result) - (void) _avl_walk_advance(wp, ap); - } else if (wp->uaw_next_result != NULL) { - uu_panic("uu_avl_remove(%p, %p): active non-robust " - "walker\n", (void *)ap, elem); - } - } - - avl_remove(&ap->ua_tree, elem); - - na[0] = POOL_TO_MARKER(pp); - na[1] = 0; -} - -void * -uu_avl_teardown(uu_avl_t *ap, void **cookie) -{ - void *elem = avl_destroy_nodes(&ap->ua_tree, cookie); - - if (elem != NULL) { - uu_avl_pool_t *pp = ap->ua_pool; - uintptr_t *na = NODE_ARRAY(pp, elem); - - na[0] = POOL_TO_MARKER(pp); - na[1] = 0; - } - return (elem); -} - -void * -uu_avl_find(uu_avl_t *ap, void *elem, void *private, uu_avl_index_t *out) -{ - struct uu_avl_node_compare_info info; - void *result; - - info.ac_compare = ap->ua_pool->uap_cmp; - info.ac_private = private; - info.ac_right = elem; - info.ac_found = NULL; - - result = avl_find(&ap->ua_tree, &info, out); - if (out != NULL) - *out = INDEX_ENCODE(ap, *out); - - if (ap->ua_debug && result != NULL) - uu_panic("uu_avl_find: internal error: avl_find succeeded\n"); - - return (info.ac_found); -} - -void -uu_avl_insert(uu_avl_t *ap, void *elem, uu_avl_index_t idx) -{ - if (ap->ua_debug) { - uu_avl_pool_t *pp = ap->ua_pool; - uintptr_t *na = NODE_ARRAY(pp, elem); - - if (na[1] != 0) - uu_panic("uu_avl_insert(%p, %p, %p): node already " - "in tree, or corrupt\n", - (void *)ap, elem, (void *)idx); - if (na[0] == 0) - uu_panic("uu_avl_insert(%p, %p, %p): node not " - "initialized\n", - (void *)ap, elem, (void *)idx); - if (na[0] != POOL_TO_MARKER(pp)) - uu_panic("uu_avl_insert(%p, %p, %p): node from " - "other pool, or corrupt\n", - (void *)ap, elem, (void *)idx); - - if (!INDEX_VALID(ap, idx)) - uu_panic("uu_avl_insert(%p, %p, %p): %s\n", - (void *)ap, elem, (void *)idx, - INDEX_CHECK(idx)? "outdated index" : - "invalid index"); - - /* - * invalidate outstanding uu_avl_index_ts. - */ - ap->ua_index = INDEX_NEXT(ap->ua_index); - } - avl_insert(&ap->ua_tree, elem, INDEX_DECODE(idx)); -} - -void * -uu_avl_nearest_next(uu_avl_t *ap, uu_avl_index_t idx) -{ - if (ap->ua_debug && !INDEX_VALID(ap, idx)) - uu_panic("uu_avl_nearest_next(%p, %p): %s\n", - (void *)ap, (void *)idx, INDEX_CHECK(idx)? - "outdated index" : "invalid index"); - return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_AFTER)); -} - -void * -uu_avl_nearest_prev(uu_avl_t *ap, uu_avl_index_t idx) -{ - if (ap->ua_debug && !INDEX_VALID(ap, idx)) - uu_panic("uu_avl_nearest_prev(%p, %p): %s\n", - (void *)ap, (void *)idx, INDEX_CHECK(idx)? - "outdated index" : "invalid index"); - return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_BEFORE)); -} - -/* - * called from uu_lockup() and uu_release(), as part of our fork1()-safety. - */ -void -uu_avl_lockup(void) -{ - uu_avl_pool_t *pp; - - (void) pthread_mutex_lock(&uu_apool_list_lock); - for (pp = uu_null_apool.uap_next; pp != &uu_null_apool; - pp = pp->uap_next) - (void) pthread_mutex_lock(&pp->uap_lock); -} - -void -uu_avl_release(void) -{ - uu_avl_pool_t *pp; - - for (pp = uu_null_apool.uap_next; pp != &uu_null_apool; - pp = pp->uap_next) - (void) pthread_mutex_unlock(&pp->uap_lock); - (void) pthread_mutex_unlock(&uu_apool_list_lock); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c deleted file mode 100644 index 528c3e7f6d25..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <errno.h> -#include <libintl.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define FACILITY_FMT "%s (%s): " - -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - -static const char * -strseverity(uu_dprintf_severity_t severity) -{ - switch (severity) { - case UU_DPRINTF_SILENT: - return (dgettext(TEXT_DOMAIN, "silent")); - case UU_DPRINTF_FATAL: - return (dgettext(TEXT_DOMAIN, "FATAL")); - case UU_DPRINTF_WARNING: - return (dgettext(TEXT_DOMAIN, "WARNING")); - case UU_DPRINTF_NOTICE: - return (dgettext(TEXT_DOMAIN, "note")); - case UU_DPRINTF_INFO: - return (dgettext(TEXT_DOMAIN, "info")); - case UU_DPRINTF_DEBUG: - return (dgettext(TEXT_DOMAIN, "debug")); - default: - return (dgettext(TEXT_DOMAIN, "unspecified")); - } -} - -uu_dprintf_t * -uu_dprintf_create(const char *name, uu_dprintf_severity_t severity, - uint_t flags) -{ - uu_dprintf_t *D; - - if (uu_check_name(name, UU_NAME_DOMAIN) == -1) { - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (NULL); - } - - if ((D = uu_zalloc(sizeof (uu_dprintf_t))) == NULL) - return (NULL); - - if (name != NULL) { - D->uud_name = strdup(name); - if (D->uud_name == NULL) { - uu_free(D); - return (NULL); - } - } else { - D->uud_name = NULL; - } - - D->uud_severity = severity; - D->uud_flags = flags; - - return (D); -} - -/*PRINTFLIKE3*/ -void -uu_dprintf(uu_dprintf_t *D, uu_dprintf_severity_t severity, - const char *format, ...) -{ - va_list alist; - - /* XXX Assert that severity is not UU_DPRINTF_SILENT. */ - - if (severity > D->uud_severity) - return; - - (void) fprintf(stderr, FACILITY_FMT, D->uud_name, - strseverity(severity)); - - va_start(alist, format); - (void) vfprintf(stderr, format, alist); - va_end(alist); -} - -void -uu_dprintf_destroy(uu_dprintf_t *D) -{ - if (D->uud_name) - free(D->uud_name); - - uu_free(D); -} - -const char * -uu_dprintf_getname(uu_dprintf_t *D) -{ - return (D->uud_name); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c deleted file mode 100644 index 9a643845f8c2..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <string.h> - -/* - * We require names of the form: - * [provider,]identifier[/[provider,]identifier]... - * - * Where provider is either a stock symbol (SUNW) or a java-style reversed - * domain name (com.sun). - * - * Both providers and identifiers must start with a letter, and may - * only contain alphanumerics, dashes, and underlines. Providers - * may also contain periods. - * - * Note that we do _not_ use the macros in <ctype.h>, since they are affected - * by the current locale settings. - */ - -#define IS_ALPHA(c) \ - (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) - -#define IS_DIGIT(c) \ - ((c) >= '0' && (c) <= '9') - -static int -is_valid_ident(const char *s, const char *e, int allowdot) -{ - char c; - - if (s >= e) - return (0); /* name is empty */ - - c = *s++; - if (!IS_ALPHA(c)) - return (0); /* does not start with letter */ - - while (s < e && (c = *s++) != 0) { - if (IS_ALPHA(c) || IS_DIGIT(c) || c == '-' || c == '_' || - (allowdot && c == '.')) - continue; - return (0); /* invalid character */ - } - return (1); -} - -static int -is_valid_component(const char *b, const char *e, uint_t flags) -{ - char *sp; - - if (flags & UU_NAME_DOMAIN) { - sp = strchr(b, ','); - if (sp != NULL && sp < e) { - if (!is_valid_ident(b, sp, 1)) - return (0); - b = sp + 1; - } - } - - return (is_valid_ident(b, e, 0)); -} - -int -uu_check_name(const char *name, uint_t flags) -{ - const char *end = name + strlen(name); - const char *p; - - if (flags & ~(UU_NAME_DOMAIN | UU_NAME_PATH)) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (-1); - } - - if (!(flags & UU_NAME_PATH)) { - if (!is_valid_component(name, end, flags)) - goto bad; - return (0); - } - - while ((p = strchr(name, '/')) != NULL) { - if (!is_valid_component(name, p - 1, flags)) - goto bad; - name = p + 1; - } - if (!is_valid_component(name, end, flags)) - goto bad; - - return (0); - -bad: - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (-1); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c deleted file mode 100644 index 35c7ba800103..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c +++ /dev/null @@ -1,718 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/time.h> - -#define ELEM_TO_NODE(lp, e) \ - ((uu_list_node_impl_t *)((uintptr_t)(e) + (lp)->ul_offset)) - -#define NODE_TO_ELEM(lp, n) \ - ((void *)((uintptr_t)(n) - (lp)->ul_offset)) - -/* - * uu_list_index_ts define a location for insertion. They are simply a - * pointer to the object after the insertion point. We store a mark - * in the low-bits of the index, to help prevent mistakes. - * - * When debugging, the index mark changes on every insert and delete, to - * catch stale references. - */ -#define INDEX_MAX (sizeof (uintptr_t) - 1) -#define INDEX_NEXT(m) (((m) == INDEX_MAX)? 1 : ((m) + 1) & INDEX_MAX) - -#define INDEX_TO_NODE(i) ((uu_list_node_impl_t *)((i) & ~INDEX_MAX)) -#define NODE_TO_INDEX(p, n) (((uintptr_t)(n) & ~INDEX_MAX) | (p)->ul_index) -#define INDEX_VALID(p, i) (((i) & INDEX_MAX) == (p)->ul_index) -#define INDEX_CHECK(i) (((i) & INDEX_MAX) != 0) - -#define POOL_TO_MARKER(pp) ((void *)((uintptr_t)(pp) | 1)) - -static uu_list_pool_t uu_null_lpool = { &uu_null_lpool, &uu_null_lpool }; -static pthread_mutex_t uu_lpool_list_lock = PTHREAD_MUTEX_INITIALIZER; - -uu_list_pool_t * -uu_list_pool_create(const char *name, size_t objsize, - size_t nodeoffset, uu_compare_fn_t *compare_func, uint32_t flags) -{ - uu_list_pool_t *pp, *next, *prev; - - if (name == NULL || - uu_check_name(name, UU_NAME_DOMAIN) == -1 || - nodeoffset + sizeof (uu_list_node_t) > objsize) { - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (NULL); - } - - if (flags & ~UU_LIST_POOL_DEBUG) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (NULL); - } - - pp = uu_zalloc(sizeof (uu_list_pool_t)); - if (pp == NULL) { - uu_set_error(UU_ERROR_NO_MEMORY); - return (NULL); - } - - (void) strlcpy(pp->ulp_name, name, sizeof (pp->ulp_name)); - pp->ulp_nodeoffset = nodeoffset; - pp->ulp_objsize = objsize; - pp->ulp_cmp = compare_func; - if (flags & UU_LIST_POOL_DEBUG) - pp->ulp_debug = 1; - pp->ulp_last_index = 0; - - (void) pthread_mutex_init(&pp->ulp_lock, NULL); - - pp->ulp_null_list.ul_next_enc = UU_PTR_ENCODE(&pp->ulp_null_list); - pp->ulp_null_list.ul_prev_enc = UU_PTR_ENCODE(&pp->ulp_null_list); - - (void) pthread_mutex_lock(&uu_lpool_list_lock); - pp->ulp_next = next = &uu_null_lpool; - pp->ulp_prev = prev = next->ulp_prev; - next->ulp_prev = pp; - prev->ulp_next = pp; - (void) pthread_mutex_unlock(&uu_lpool_list_lock); - - return (pp); -} - -void -uu_list_pool_destroy(uu_list_pool_t *pp) -{ - if (pp->ulp_debug) { - if (pp->ulp_null_list.ul_next_enc != - UU_PTR_ENCODE(&pp->ulp_null_list) || - pp->ulp_null_list.ul_prev_enc != - UU_PTR_ENCODE(&pp->ulp_null_list)) { - uu_panic("uu_list_pool_destroy: Pool \"%.*s\" (%p) has " - "outstanding lists, or is corrupt.\n", - (int)sizeof (pp->ulp_name), pp->ulp_name, - (void *)pp); - } - } - (void) pthread_mutex_lock(&uu_lpool_list_lock); - pp->ulp_next->ulp_prev = pp->ulp_prev; - pp->ulp_prev->ulp_next = pp->ulp_next; - (void) pthread_mutex_unlock(&uu_lpool_list_lock); - pp->ulp_prev = NULL; - pp->ulp_next = NULL; - uu_free(pp); -} - -void -uu_list_node_init(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp) -{ - uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg; - - if (pp->ulp_debug) { - uintptr_t offset = (uintptr_t)np - (uintptr_t)base; - if (offset + sizeof (*np) > pp->ulp_objsize) { - uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): " - "offset %ld doesn't fit in object (size %ld)\n", - base, (void *)np, (void *)pp, pp->ulp_name, - (long)offset, (long)pp->ulp_objsize); - } - if (offset != pp->ulp_nodeoffset) { - uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): " - "offset %ld doesn't match pool's offset (%ld)\n", - base, (void *)np, (void *)pp, pp->ulp_name, - (long)offset, (long)pp->ulp_objsize); - } - } - np->uln_next = POOL_TO_MARKER(pp); - np->uln_prev = NULL; -} - -void -uu_list_node_fini(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp) -{ - uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg; - - if (pp->ulp_debug) { - if (np->uln_next == NULL && - np->uln_prev == NULL) { - uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): " - "node already finied\n", - base, (void *)np_arg, (void *)pp, pp->ulp_name); - } - if (np->uln_next != POOL_TO_MARKER(pp) || - np->uln_prev != NULL) { - uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): " - "node corrupt or on list\n", - base, (void *)np_arg, (void *)pp, pp->ulp_name); - } - } - np->uln_next = NULL; - np->uln_prev = NULL; -} - -uu_list_t * -uu_list_create(uu_list_pool_t *pp, void *parent, uint32_t flags) -{ - uu_list_t *lp, *next, *prev; - - if (flags & ~(UU_LIST_DEBUG | UU_LIST_SORTED)) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (NULL); - } - - if ((flags & UU_LIST_SORTED) && pp->ulp_cmp == NULL) { - if (pp->ulp_debug) - uu_panic("uu_list_create(%p, ...): requested " - "UU_LIST_SORTED, but pool has no comparison func\n", - (void *)pp); - uu_set_error(UU_ERROR_NOT_SUPPORTED); - return (NULL); - } - - lp = uu_zalloc(sizeof (*lp)); - if (lp == NULL) { - uu_set_error(UU_ERROR_NO_MEMORY); - return (NULL); - } - - lp->ul_pool = pp; - lp->ul_parent_enc = UU_PTR_ENCODE(parent); - lp->ul_offset = pp->ulp_nodeoffset; - lp->ul_debug = pp->ulp_debug || (flags & UU_LIST_DEBUG); - lp->ul_sorted = (flags & UU_LIST_SORTED); - lp->ul_numnodes = 0; - lp->ul_index = (pp->ulp_last_index = INDEX_NEXT(pp->ulp_last_index)); - - lp->ul_null_node.uln_next = &lp->ul_null_node; - lp->ul_null_node.uln_prev = &lp->ul_null_node; - - lp->ul_null_walk.ulw_next = &lp->ul_null_walk; - lp->ul_null_walk.ulw_prev = &lp->ul_null_walk; - - (void) pthread_mutex_lock(&pp->ulp_lock); - next = &pp->ulp_null_list; - prev = UU_PTR_DECODE(next->ul_prev_enc); - lp->ul_next_enc = UU_PTR_ENCODE(next); - lp->ul_prev_enc = UU_PTR_ENCODE(prev); - next->ul_prev_enc = UU_PTR_ENCODE(lp); - prev->ul_next_enc = UU_PTR_ENCODE(lp); - (void) pthread_mutex_unlock(&pp->ulp_lock); - - return (lp); -} - -void -uu_list_destroy(uu_list_t *lp) -{ - uu_list_pool_t *pp = lp->ul_pool; - - if (lp->ul_debug) { - if (lp->ul_null_node.uln_next != &lp->ul_null_node || - lp->ul_null_node.uln_prev != &lp->ul_null_node) { - uu_panic("uu_list_destroy(%p): list not empty\n", - (void *)lp); - } - if (lp->ul_numnodes != 0) { - uu_panic("uu_list_destroy(%p): numnodes is nonzero, " - "but list is empty\n", (void *)lp); - } - if (lp->ul_null_walk.ulw_next != &lp->ul_null_walk || - lp->ul_null_walk.ulw_prev != &lp->ul_null_walk) { - uu_panic("uu_list_destroy(%p): outstanding walkers\n", - (void *)lp); - } - } - - (void) pthread_mutex_lock(&pp->ulp_lock); - UU_LIST_PTR(lp->ul_next_enc)->ul_prev_enc = lp->ul_prev_enc; - UU_LIST_PTR(lp->ul_prev_enc)->ul_next_enc = lp->ul_next_enc; - (void) pthread_mutex_unlock(&pp->ulp_lock); - lp->ul_prev_enc = UU_PTR_ENCODE(NULL); - lp->ul_next_enc = UU_PTR_ENCODE(NULL); - lp->ul_pool = NULL; - uu_free(lp); -} - -static void -list_insert(uu_list_t *lp, uu_list_node_impl_t *np, uu_list_node_impl_t *prev, - uu_list_node_impl_t *next) -{ - if (lp->ul_debug) { - if (next->uln_prev != prev || prev->uln_next != next) - uu_panic("insert(%p): internal error: %p and %p not " - "neighbors\n", (void *)lp, (void *)next, - (void *)prev); - - if (np->uln_next != POOL_TO_MARKER(lp->ul_pool) || - np->uln_prev != NULL) { - uu_panic("insert(%p): elem %p node %p corrupt, " - "not initialized, or already in a list.\n", - (void *)lp, NODE_TO_ELEM(lp, np), (void *)np); - } - /* - * invalidate outstanding uu_list_index_ts. - */ - lp->ul_index = INDEX_NEXT(lp->ul_index); - } - np->uln_next = next; - np->uln_prev = prev; - next->uln_prev = np; - prev->uln_next = np; - - lp->ul_numnodes++; -} - -void -uu_list_insert(uu_list_t *lp, void *elem, uu_list_index_t idx) -{ - uu_list_node_impl_t *np; - - np = INDEX_TO_NODE(idx); - if (np == NULL) - np = &lp->ul_null_node; - - if (lp->ul_debug) { - if (!INDEX_VALID(lp, idx)) - uu_panic("uu_list_insert(%p, %p, %p): %s\n", - (void *)lp, elem, (void *)idx, - INDEX_CHECK(idx)? "outdated index" : - "invalid index"); - if (np->uln_prev == NULL) - uu_panic("uu_list_insert(%p, %p, %p): out-of-date " - "index\n", (void *)lp, elem, (void *)idx); - } - - list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np); -} - -void * -uu_list_find(uu_list_t *lp, void *elem, void *private, uu_list_index_t *out) -{ - int sorted = lp->ul_sorted; - uu_compare_fn_t *func = lp->ul_pool->ulp_cmp; - uu_list_node_impl_t *np; - - if (func == NULL) { - if (out != NULL) - *out = 0; - uu_set_error(UU_ERROR_NOT_SUPPORTED); - return (NULL); - } - for (np = lp->ul_null_node.uln_next; np != &lp->ul_null_node; - np = np->uln_next) { - void *ep = NODE_TO_ELEM(lp, np); - int cmp = func(ep, elem, private); - if (cmp == 0) { - if (out != NULL) - *out = NODE_TO_INDEX(lp, np); - return (ep); - } - if (sorted && cmp > 0) { - if (out != NULL) - *out = NODE_TO_INDEX(lp, np); - return (NULL); - } - } - if (out != NULL) - *out = NODE_TO_INDEX(lp, 0); - return (NULL); -} - -void * -uu_list_nearest_next(uu_list_t *lp, uu_list_index_t idx) -{ - uu_list_node_impl_t *np = INDEX_TO_NODE(idx); - - if (np == NULL) - np = &lp->ul_null_node; - - if (lp->ul_debug) { - if (!INDEX_VALID(lp, idx)) - uu_panic("uu_list_nearest_next(%p, %p): %s\n", - (void *)lp, (void *)idx, - INDEX_CHECK(idx)? "outdated index" : - "invalid index"); - if (np->uln_prev == NULL) - uu_panic("uu_list_nearest_next(%p, %p): out-of-date " - "index\n", (void *)lp, (void *)idx); - } - - if (np == &lp->ul_null_node) - return (NULL); - else - return (NODE_TO_ELEM(lp, np)); -} - -void * -uu_list_nearest_prev(uu_list_t *lp, uu_list_index_t idx) -{ - uu_list_node_impl_t *np = INDEX_TO_NODE(idx); - - if (np == NULL) - np = &lp->ul_null_node; - - if (lp->ul_debug) { - if (!INDEX_VALID(lp, idx)) - uu_panic("uu_list_nearest_prev(%p, %p): %s\n", - (void *)lp, (void *)idx, INDEX_CHECK(idx)? - "outdated index" : "invalid index"); - if (np->uln_prev == NULL) - uu_panic("uu_list_nearest_prev(%p, %p): out-of-date " - "index\n", (void *)lp, (void *)idx); - } - - if ((np = np->uln_prev) == &lp->ul_null_node) - return (NULL); - else - return (NODE_TO_ELEM(lp, np)); -} - -static void -list_walk_init(uu_list_walk_t *wp, uu_list_t *lp, uint32_t flags) -{ - uu_list_walk_t *next, *prev; - - int robust = (flags & UU_WALK_ROBUST); - int direction = (flags & UU_WALK_REVERSE)? -1 : 1; - - (void) memset(wp, 0, sizeof (*wp)); - wp->ulw_list = lp; - wp->ulw_robust = robust; - wp->ulw_dir = direction; - if (direction > 0) - wp->ulw_next_result = lp->ul_null_node.uln_next; - else - wp->ulw_next_result = lp->ul_null_node.uln_prev; - - if (lp->ul_debug || robust) { - /* - * Add this walker to the list's list of walkers so - * uu_list_remove() can advance us if somebody tries to - * remove ulw_next_result. - */ - wp->ulw_next = next = &lp->ul_null_walk; - wp->ulw_prev = prev = next->ulw_prev; - next->ulw_prev = wp; - prev->ulw_next = wp; - } -} - -static uu_list_node_impl_t * -list_walk_advance(uu_list_walk_t *wp, uu_list_t *lp) -{ - uu_list_node_impl_t *np = wp->ulw_next_result; - uu_list_node_impl_t *next; - - if (np == &lp->ul_null_node) - return (NULL); - - next = (wp->ulw_dir > 0)? np->uln_next : np->uln_prev; - - wp->ulw_next_result = next; - return (np); -} - -static void -list_walk_fini(uu_list_walk_t *wp) -{ - /* GLXXX debugging? */ - if (wp->ulw_next != NULL) { - wp->ulw_next->ulw_prev = wp->ulw_prev; - wp->ulw_prev->ulw_next = wp->ulw_next; - wp->ulw_next = NULL; - wp->ulw_prev = NULL; - } - wp->ulw_list = NULL; - wp->ulw_next_result = NULL; -} - -uu_list_walk_t * -uu_list_walk_start(uu_list_t *lp, uint32_t flags) -{ - uu_list_walk_t *wp; - - if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (NULL); - } - - wp = uu_zalloc(sizeof (*wp)); - if (wp == NULL) { - uu_set_error(UU_ERROR_NO_MEMORY); - return (NULL); - } - - list_walk_init(wp, lp, flags); - return (wp); -} - -void * -uu_list_walk_next(uu_list_walk_t *wp) -{ - uu_list_t *lp = wp->ulw_list; - uu_list_node_impl_t *np = list_walk_advance(wp, lp); - - if (np == NULL) - return (NULL); - - return (NODE_TO_ELEM(lp, np)); -} - -void -uu_list_walk_end(uu_list_walk_t *wp) -{ - list_walk_fini(wp); - uu_free(wp); -} - -int -uu_list_walk(uu_list_t *lp, uu_walk_fn_t *func, void *private, uint32_t flags) -{ - uu_list_node_impl_t *np; - - int status = UU_WALK_NEXT; - - int robust = (flags & UU_WALK_ROBUST); - int reverse = (flags & UU_WALK_REVERSE); - - if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { - uu_set_error(UU_ERROR_UNKNOWN_FLAG); - return (-1); - } - - if (lp->ul_debug || robust) { - uu_list_walk_t my_walk; - void *e; - - list_walk_init(&my_walk, lp, flags); - while (status == UU_WALK_NEXT && - (e = uu_list_walk_next(&my_walk)) != NULL) - status = (*func)(e, private); - list_walk_fini(&my_walk); - } else { - if (!reverse) { - for (np = lp->ul_null_node.uln_next; - status == UU_WALK_NEXT && np != &lp->ul_null_node; - np = np->uln_next) { - status = (*func)(NODE_TO_ELEM(lp, np), private); - } - } else { - for (np = lp->ul_null_node.uln_prev; - status == UU_WALK_NEXT && np != &lp->ul_null_node; - np = np->uln_prev) { - status = (*func)(NODE_TO_ELEM(lp, np), private); - } - } - } - if (status >= 0) - return (0); - uu_set_error(UU_ERROR_CALLBACK_FAILED); - return (-1); -} - -void -uu_list_remove(uu_list_t *lp, void *elem) -{ - uu_list_node_impl_t *np = ELEM_TO_NODE(lp, elem); - uu_list_walk_t *wp; - - if (lp->ul_debug) { - if (np->uln_prev == NULL) - uu_panic("uu_list_remove(%p, %p): elem not on list\n", - (void *)lp, elem); - /* - * invalidate outstanding uu_list_index_ts. - */ - lp->ul_index = INDEX_NEXT(lp->ul_index); - } - - /* - * robust walkers must be advanced. In debug mode, non-robust - * walkers are also on the list. If there are any, it's an error. - */ - for (wp = lp->ul_null_walk.ulw_next; wp != &lp->ul_null_walk; - wp = wp->ulw_next) { - if (wp->ulw_robust) { - if (np == wp->ulw_next_result) - (void) list_walk_advance(wp, lp); - } else if (wp->ulw_next_result != NULL) { - uu_panic("uu_list_remove(%p, %p): active non-robust " - "walker\n", (void *)lp, elem); - } - } - - np->uln_next->uln_prev = np->uln_prev; - np->uln_prev->uln_next = np->uln_next; - - lp->ul_numnodes--; - - np->uln_next = POOL_TO_MARKER(lp->ul_pool); - np->uln_prev = NULL; -} - -void * -uu_list_teardown(uu_list_t *lp, void **cookie) -{ - void *ep; - - /* - * XXX: disable list modification until list is empty - */ - if (lp->ul_debug && *cookie != NULL) - uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n", - (void *)lp, (void *)cookie); - - ep = uu_list_first(lp); - if (ep) - uu_list_remove(lp, ep); - return (ep); -} - -int -uu_list_insert_before(uu_list_t *lp, void *target, void *elem) -{ - uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target); - - if (target == NULL) - np = &lp->ul_null_node; - - if (lp->ul_debug) { - if (np->uln_prev == NULL) - uu_panic("uu_list_insert_before(%p, %p, %p): %p is " - "not currently on a list\n", - (void *)lp, target, elem, target); - } - if (lp->ul_sorted) { - if (lp->ul_debug) - uu_panic("uu_list_insert_before(%p, ...): list is " - "UU_LIST_SORTED\n", (void *)lp); - uu_set_error(UU_ERROR_NOT_SUPPORTED); - return (-1); - } - - list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np); - return (0); -} - -int -uu_list_insert_after(uu_list_t *lp, void *target, void *elem) -{ - uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target); - - if (target == NULL) - np = &lp->ul_null_node; - - if (lp->ul_debug) { - if (np->uln_prev == NULL) - uu_panic("uu_list_insert_after(%p, %p, %p): %p is " - "not currently on a list\n", - (void *)lp, target, elem, target); - } - if (lp->ul_sorted) { - if (lp->ul_debug) - uu_panic("uu_list_insert_after(%p, ...): list is " - "UU_LIST_SORTED\n", (void *)lp); - uu_set_error(UU_ERROR_NOT_SUPPORTED); - return (-1); - } - - list_insert(lp, ELEM_TO_NODE(lp, elem), np, np->uln_next); - return (0); -} - -size_t -uu_list_numnodes(uu_list_t *lp) -{ - return (lp->ul_numnodes); -} - -void * -uu_list_first(uu_list_t *lp) -{ - uu_list_node_impl_t *n = lp->ul_null_node.uln_next; - if (n == &lp->ul_null_node) - return (NULL); - return (NODE_TO_ELEM(lp, n)); -} - -void * -uu_list_last(uu_list_t *lp) -{ - uu_list_node_impl_t *n = lp->ul_null_node.uln_prev; - if (n == &lp->ul_null_node) - return (NULL); - return (NODE_TO_ELEM(lp, n)); -} - -void * -uu_list_next(uu_list_t *lp, void *elem) -{ - uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem); - - n = n->uln_next; - if (n == &lp->ul_null_node) - return (NULL); - return (NODE_TO_ELEM(lp, n)); -} - -void * -uu_list_prev(uu_list_t *lp, void *elem) -{ - uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem); - - n = n->uln_prev; - if (n == &lp->ul_null_node) - return (NULL); - return (NODE_TO_ELEM(lp, n)); -} - -/* - * called from uu_lockup() and uu_release(), as part of our fork1()-safety. - */ -void -uu_list_lockup(void) -{ - uu_list_pool_t *pp; - - (void) pthread_mutex_lock(&uu_lpool_list_lock); - for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool; - pp = pp->ulp_next) - (void) pthread_mutex_lock(&pp->ulp_lock); -} - -void -uu_list_release(void) -{ - uu_list_pool_t *pp; - - for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool; - pp = pp->ulp_next) - (void) pthread_mutex_unlock(&pp->ulp_lock); - (void) pthread_mutex_unlock(&uu_lpool_list_lock); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c deleted file mode 100644 index b673834e4dcf..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#include "libuutil_common.h" - -#define HAVE_ASSFAIL 1 - -#include <assert.h> -#include <errno.h> -#include <libintl.h> -#include <pthread.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/debug.h> -#include <thread.h> -#include <unistd.h> -#include <ctype.h> - -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - -/* - * All of the old code under !defined(PTHREAD_ONCE_KEY_NP) - * is here to enable the building of a native version of - * libuutil.so when the build machine has not yet been upgraded - * to a version of libc that provides pthread_key_create_once_np(). - * It should all be deleted when solaris_nevada ships. - * The code is not MT-safe in a relaxed memory model. - */ - -#if defined(PTHREAD_ONCE_KEY_NP) -static pthread_key_t uu_error_key = PTHREAD_ONCE_KEY_NP; -#else /* PTHREAD_ONCE_KEY_NP */ -static pthread_key_t uu_error_key = 0; -static pthread_mutex_t uu_key_lock = PTHREAD_MUTEX_INITIALIZER; -#endif /* PTHREAD_ONCE_KEY_NP */ - -static int uu_error_key_setup = 0; - -static pthread_mutex_t uu_panic_lock = PTHREAD_MUTEX_INITIALIZER; -/* LINTED static unused */ -static const char *uu_panic_format; -/* LINTED static unused */ -static va_list uu_panic_args; -static pthread_t uu_panic_thread; - -static uint32_t _uu_main_error; - -void -uu_set_error(uint_t code) -{ - -#if defined(PTHREAD_ONCE_KEY_NP) - if (pthread_key_create_once_np(&uu_error_key, NULL) != 0) - uu_error_key_setup = -1; - else - uu_error_key_setup = 1; -#else /* PTHREAD_ONCE_KEY_NP */ - if (uu_error_key_setup == 0) { - (void) pthread_mutex_lock(&uu_key_lock); - if (uu_error_key_setup == 0) { - if (pthread_key_create(&uu_error_key, NULL) != 0) - uu_error_key_setup = -1; - else - uu_error_key_setup = 1; - } - (void) pthread_mutex_unlock(&uu_key_lock); - } -#endif /* PTHREAD_ONCE_KEY_NP */ - if (uu_error_key_setup > 0) - (void) pthread_setspecific(uu_error_key, - (void *)(uintptr_t)code); -} - -uint32_t -uu_error(void) -{ - - if (uu_error_key_setup < 0) /* can't happen? */ - return (UU_ERROR_UNKNOWN); - - /* - * Because UU_ERROR_NONE == 0, if uu_set_error() was - * never called, then this will return UU_ERROR_NONE: - */ - return ((uint32_t)(uintptr_t)pthread_getspecific(uu_error_key)); -} - -const char * -uu_strerror(uint32_t code) -{ - const char *str; - - switch (code) { - case UU_ERROR_NONE: - str = dgettext(TEXT_DOMAIN, "No error"); - break; - - case UU_ERROR_INVALID_ARGUMENT: - str = dgettext(TEXT_DOMAIN, "Invalid argument"); - break; - - case UU_ERROR_UNKNOWN_FLAG: - str = dgettext(TEXT_DOMAIN, "Unknown flag passed"); - break; - - case UU_ERROR_NO_MEMORY: - str = dgettext(TEXT_DOMAIN, "Out of memory"); - break; - - case UU_ERROR_CALLBACK_FAILED: - str = dgettext(TEXT_DOMAIN, "Callback-initiated failure"); - break; - - case UU_ERROR_NOT_SUPPORTED: - str = dgettext(TEXT_DOMAIN, "Operation not supported"); - break; - - case UU_ERROR_EMPTY: - str = dgettext(TEXT_DOMAIN, "No value provided"); - break; - - case UU_ERROR_UNDERFLOW: - str = dgettext(TEXT_DOMAIN, "Value too small"); - break; - - case UU_ERROR_OVERFLOW: - str = dgettext(TEXT_DOMAIN, "Value too large"); - break; - - case UU_ERROR_INVALID_CHAR: - str = dgettext(TEXT_DOMAIN, - "Value contains unexpected character"); - break; - - case UU_ERROR_INVALID_DIGIT: - str = dgettext(TEXT_DOMAIN, - "Value contains digit not in base"); - break; - - case UU_ERROR_SYSTEM: - str = dgettext(TEXT_DOMAIN, "Underlying system error"); - break; - - case UU_ERROR_UNKNOWN: - str = dgettext(TEXT_DOMAIN, "Error status not known"); - break; - - default: - errno = ESRCH; - str = NULL; - break; - } - return (str); -} - -void -uu_panic(const char *format, ...) -{ - va_list args; - - va_start(args, format); - - (void) pthread_mutex_lock(&uu_panic_lock); - if (uu_panic_thread == 0) { - uu_panic_thread = pthread_self(); - uu_panic_format = format; - va_copy(uu_panic_args, args); - } - (void) pthread_mutex_unlock(&uu_panic_lock); - - (void) vfprintf(stderr, format, args); - - if (uu_panic_thread == pthread_self()) - abort(); - else - for (;;) - (void) pause(); -} - -int -assfail(const char *astring, const char *file, int line) -{ - __assert(astring, file, line); - /*NOTREACHED*/ - return (0); -} - -static void -uu_lockup(void) -{ - (void) pthread_mutex_lock(&uu_panic_lock); -#if !defined(PTHREAD_ONCE_KEY_NP) - (void) pthread_mutex_lock(&uu_key_lock); -#endif - uu_avl_lockup(); - uu_list_lockup(); -} - -static void -uu_release(void) -{ - (void) pthread_mutex_unlock(&uu_panic_lock); -#if !defined(PTHREAD_ONCE_KEY_NP) - (void) pthread_mutex_unlock(&uu_key_lock); -#endif - uu_avl_release(); - uu_list_release(); -} - -static void -uu_release_child(void) -{ - uu_panic_format = NULL; - uu_panic_thread = 0; - - uu_release(); -} - -#pragma init(uu_init) -static void -uu_init(void) -{ - (void) pthread_atfork(uu_lockup, uu_release, uu_release_child); -} - -/* - * Dump a block of memory in hex+ascii, for debugging - */ -void -uu_dump(FILE *out, const char *prefix, const void *buf, size_t len) -{ - const unsigned char *p = buf; - int i; - - for (i = 0; i < len; i += 16) { - int j; - - (void) fprintf(out, "%s", prefix); - for (j = 0; j < 16 && i + j < len; j++) { - (void) fprintf(out, "%2.2x ", p[i + j]); - } - for (; j < 16; j++) { - (void) fprintf(out, " "); - } - for (j = 0; j < 16 && i + j < len; j++) { - (void) fprintf(out, "%c", - isprint(p[i + j]) ? p[i + j] : '.'); - } - (void) fprintf(out, "\n"); - } -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c deleted file mode 100644 index 7256662e38f6..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <sys/time.h> - -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <stdio.h> -#include <unistd.h> - -#ifdef _LP64 -#define TMPPATHFMT "%s/uu%ld" -#else /* _LP64 */ -#define TMPPATHFMT "%s/uu%lld" -#endif /* _LP64 */ - -/*ARGSUSED*/ -int -uu_open_tmp(const char *dir, uint_t uflags) -{ - int f; - char *fname = uu_zalloc(PATH_MAX); - - if (fname == NULL) - return (-1); - - for (;;) { - (void) snprintf(fname, PATH_MAX, "%s/uu%lld", dir, gethrtime()); - - f = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600); - - if (f >= 0 || errno != EEXIST) - break; - } - - if (f >= 0) - (void) unlink(fname); - - uu_free(fname); - - return (f); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c deleted file mode 100644 index 20626ace6b2f..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <libintl.h> -#include <limits.h> -#include <string.h> -#include <stdlib.h> -#include <stdarg.h> -#include <stdio.h> -#include <errno.h> -#include <wchar.h> -#include <unistd.h> - -static const char PNAME_FMT[] = "%s: "; -static const char ERRNO_FMT[] = ": %s\n"; - -static const char *pname; - -static void -uu_die_internal(int status, const char *format, va_list alist) __NORETURN; - -int uu_exit_ok_value = EXIT_SUCCESS; -int uu_exit_fatal_value = EXIT_FAILURE; -int uu_exit_usage_value = 2; - -int * -uu_exit_ok(void) -{ - return (&uu_exit_ok_value); -} - -int * -uu_exit_fatal(void) -{ - return (&uu_exit_fatal_value); -} - -int * -uu_exit_usage(void) -{ - return (&uu_exit_usage_value); -} - -void -uu_alt_exit(int profile) -{ - switch (profile) { - case UU_PROFILE_DEFAULT: - uu_exit_ok_value = EXIT_SUCCESS; - uu_exit_fatal_value = EXIT_FAILURE; - uu_exit_usage_value = 2; - break; - case UU_PROFILE_LAUNCHER: - uu_exit_ok_value = EXIT_SUCCESS; - uu_exit_fatal_value = 124; - uu_exit_usage_value = 125; - break; - } -} - -static void -uu_warn_internal(int err, const char *format, va_list alist) -{ - if (pname != NULL) - (void) fprintf(stderr, PNAME_FMT, pname); - - (void) vfprintf(stderr, format, alist); - - if (strrchr(format, '\n') == NULL) - (void) fprintf(stderr, ERRNO_FMT, strerror(err)); -} - -void -uu_vwarn(const char *format, va_list alist) -{ - uu_warn_internal(errno, format, alist); -} - -/*PRINTFLIKE1*/ -void -uu_warn(const char *format, ...) -{ - va_list alist; - va_start(alist, format); - uu_warn_internal(errno, format, alist); - va_end(alist); -} - -static void -uu_die_internal(int status, const char *format, va_list alist) -{ - uu_warn_internal(errno, format, alist); -#ifdef DEBUG - { - char *cp; - - if (!issetugid()) { - cp = getenv("UU_DIE_ABORTS"); - if (cp != NULL && *cp != '\0') - abort(); - } - } -#endif - exit(status); -} - -void -uu_vdie(const char *format, va_list alist) -{ - uu_die_internal(UU_EXIT_FATAL, format, alist); -} - -/*PRINTFLIKE1*/ -void -uu_die(const char *format, ...) -{ - va_list alist; - va_start(alist, format); - uu_die_internal(UU_EXIT_FATAL, format, alist); - va_end(alist); -} - -void -uu_vxdie(int status, const char *format, va_list alist) -{ - uu_die_internal(status, format, alist); -} - -/*PRINTFLIKE2*/ -void -uu_xdie(int status, const char *format, ...) -{ - va_list alist; - va_start(alist, format); - uu_die_internal(status, format, alist); - va_end(alist); -} - -const char * -uu_setpname(char *arg0) -{ - /* - * Having a NULL argv[0], while uncommon, is possible. It - * makes more sense to handle this event in uu_setpname rather - * than in each of its consumers. - */ - if (arg0 == NULL) { - pname = "unknown_command"; - return (pname); - } - - /* - * Guard against '/' at end of command invocation. - */ - for (;;) { - char *p = strrchr(arg0, '/'); - if (p == NULL) { - pname = arg0; - break; - } else { - if (*(p + 1) == '\0') { - *p = '\0'; - continue; - } - - pname = p + 1; - break; - } - } - - return (pname); -} - -const char * -uu_getpname(void) -{ - return (pname); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c deleted file mode 100644 index 66afba05e849..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * String helper functions - */ - -#include <string.h> -#include <sys/types.h> -#include <stdio.h> -#include <malloc.h> -#include <ctype.h> -#include "libuutil.h" - -/* Return true if strings are equal */ -boolean_t -uu_streq(const char *a, const char *b) -{ - return (strcmp(a, b) == 0); -} - -/* Return true if strings are equal, case-insensitively */ -boolean_t -uu_strcaseeq(const char *a, const char *b) -{ - return (strcasecmp(a, b) == 0); -} - -/* Return true if string a Begins With string b */ -boolean_t -uu_strbw(const char *a, const char *b) -{ - return (strncmp(a, b, strlen(b)) == 0); -} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c deleted file mode 100644 index 8fd1148365cb..000000000000 --- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libuutil_common.h" - -#include <limits.h> -#include <ctype.h> - -#define MAX_BASE 36 - -#define IS_DIGIT(x) ((x) >= '0' && (x) <= '9') - -#define CTOI(x) (((x) >= '0' && (x) <= '9') ? (x) - '0' : \ - ((x) >= 'a' && (x) <= 'z') ? (x) + 10 - 'a' : (x) + 10 - 'A') - -static int -strtoint(const char *s_arg, uint64_t *out, uint32_t base, int sign) -{ - const unsigned char *s = (const unsigned char *)s_arg; - - uint64_t val = 0; - uint64_t multmax; - - unsigned c, i; - - int neg = 0; - - int bad_digit = 0; - int bad_char = 0; - int overflow = 0; - - if (s == NULL || base == 1 || base > MAX_BASE) { - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (-1); - } - - while ((c = *s) != 0 && isspace(c)) - s++; - - switch (c) { - case '-': - if (!sign) - overflow = 1; /* becomes underflow below */ - neg = 1; - /*FALLTHRU*/ - case '+': - c = *++s; - break; - default: - break; - } - - if (c == '\0') { - uu_set_error(UU_ERROR_EMPTY); - return (-1); - } - - if (base == 0) { - if (c != '0') - base = 10; - else if (s[1] == 'x' || s[1] == 'X') - base = 16; - else - base = 8; - } - - if (base == 16 && c == '0' && (s[1] == 'x' || s[1] == 'X')) - c = *(s += 2); - - if ((val = CTOI(c)) >= base) { - if (IS_DIGIT(c)) - bad_digit = 1; - else - bad_char = 1; - val = 0; - } - - multmax = (uint64_t)UINT64_MAX / (uint64_t)base; - - for (c = *++s; c != '\0'; c = *++s) { - if ((i = CTOI(c)) >= base) { - if (isspace(c)) - break; - if (IS_DIGIT(c)) - bad_digit = 1; - else - bad_char = 1; - i = 0; - } - - if (val > multmax) - overflow = 1; - - val *= base; - if ((uint64_t)UINT64_MAX - val < (uint64_t)i) - overflow = 1; - - val += i; - } - - while ((c = *s) != 0) { - if (!isspace(c)) - bad_char = 1; - s++; - } - - if (sign) { - if (neg) { - if (val > -(uint64_t)INT64_MIN) - overflow = 1; - } else { - if (val > INT64_MAX) - overflow = 1; - } - } - - if (neg) - val = -val; - - if (bad_char | bad_digit | overflow) { - if (bad_char) - uu_set_error(UU_ERROR_INVALID_CHAR); - else if (bad_digit) - uu_set_error(UU_ERROR_INVALID_DIGIT); - else if (overflow) { - if (neg) - uu_set_error(UU_ERROR_UNDERFLOW); - else - uu_set_error(UU_ERROR_OVERFLOW); - } - return (-1); - } - - *out = val; - return (0); -} - -int -uu_strtoint(const char *s, void *v, size_t sz, int base, - int64_t min, int64_t max) -{ - uint64_t val_u; - int64_t val; - - if (min > max) - goto bad_argument; - - switch (sz) { - case 1: - if (max > INT8_MAX || min < INT8_MIN) - goto bad_argument; - break; - case 2: - if (max > INT16_MAX || min < INT16_MIN) - goto bad_argument; - break; - case 4: - if (max > INT32_MAX || min < INT32_MIN) - goto bad_argument; - break; - case 8: - if (max > INT64_MAX || min < INT64_MIN) - goto bad_argument; - break; - default: - goto bad_argument; - } - - if (min == 0 && max == 0) { - min = -(1ULL << (8 * sz - 1)); - max = (1ULL << (8 * sz - 1)) - 1; - } - - if (strtoint(s, &val_u, base, 1) == -1) - return (-1); - - val = (int64_t)val_u; - - if (val < min) { - uu_set_error(UU_ERROR_UNDERFLOW); - return (-1); - } else if (val > max) { - uu_set_error(UU_ERROR_OVERFLOW); - return (-1); - } - - switch (sz) { - case 1: - *(int8_t *)v = val; - return (0); - case 2: - *(int16_t *)v = val; - return (0); - case 4: - *(int32_t *)v = val; - return (0); - case 8: - *(int64_t *)v = val; - return (0); - default: - break; /* fall through to bad_argument */ - } - -bad_argument: - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (-1); -} - -int -uu_strtouint(const char *s, void *v, size_t sz, int base, - uint64_t min, uint64_t max) -{ - uint64_t val; - - if (min > max) - goto bad_argument; - - switch (sz) { - case 1: - if (max > UINT8_MAX) - goto bad_argument; - break; - case 2: - if (max > UINT16_MAX) - goto bad_argument; - break; - case 4: - if (max > UINT32_MAX) - goto bad_argument; - break; - case 8: - if (max > UINT64_MAX) - goto bad_argument; - break; - default: - goto bad_argument; - } - - if (min == 0 && max == 0) { - /* we have to be careful, since << can overflow */ - max = (1ULL << (8 * sz - 1)) * 2 - 1; - } - - if (strtoint(s, &val, base, 0) == -1) - return (-1); - - if (val < min) { - uu_set_error(UU_ERROR_UNDERFLOW); - return (-1); - } else if (val > max) { - uu_set_error(UU_ERROR_OVERFLOW); - return (-1); - } - - switch (sz) { - case 1: - *(uint8_t *)v = val; - return (0); - case 2: - *(uint16_t *)v = val; - return (0); - case 4: - *(uint32_t *)v = val; - return (0); - case 8: - *(uint64_t *)v = val; - return (0); - default: - break; /* shouldn't happen, fall through */ - } - -bad_argument: - uu_set_error(UU_ERROR_INVALID_ARGUMENT); - return (-1); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h deleted file mode 100644 index 1899e318d53e..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ /dev/null @@ -1,894 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. - * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2016 Nexenta Systems, Inc. - * Copyright (c) 2019 Datto Inc. - */ - -#ifndef _LIBZFS_H -#define _LIBZFS_H - -#include <assert.h> -#include <libnvpair.h> -#include <sys/mnttab.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/varargs.h> -#include <sys/fs/zfs.h> -#include <sys/avl.h> -#include <sys/zfs_ioctl.h> -#include <libzfs_core.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Miscellaneous ZFS constants - */ -#define ZFS_MAXPROPLEN MAXPATHLEN -#define ZPOOL_MAXPROPLEN MAXPATHLEN - -/* - * libzfs errors - */ -typedef enum zfs_error { - EZFS_SUCCESS = 0, /* no error -- success */ - EZFS_NOMEM = 2000, /* out of memory */ - EZFS_BADPROP, /* invalid property value */ - EZFS_PROPREADONLY, /* cannot set readonly property */ - EZFS_PROPTYPE, /* property does not apply to dataset type */ - EZFS_PROPNONINHERIT, /* property is not inheritable */ - EZFS_PROPSPACE, /* bad quota or reservation */ - EZFS_BADTYPE, /* dataset is not of appropriate type */ - EZFS_BUSY, /* pool or dataset is busy */ - EZFS_EXISTS, /* pool or dataset already exists */ - EZFS_NOENT, /* no such pool or dataset */ - EZFS_BADSTREAM, /* bad backup stream */ - EZFS_DSREADONLY, /* dataset is readonly */ - EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ - EZFS_INVALIDNAME, /* invalid dataset name */ - EZFS_BADRESTORE, /* unable to restore to destination */ - EZFS_BADBACKUP, /* backup failed */ - EZFS_BADTARGET, /* bad attach/detach/replace target */ - EZFS_NODEVICE, /* no such device in pool */ - EZFS_BADDEV, /* invalid device to add */ - EZFS_NOREPLICAS, /* no valid replicas */ - EZFS_RESILVERING, /* currently resilvering */ - EZFS_BADVERSION, /* unsupported version */ - EZFS_POOLUNAVAIL, /* pool is currently unavailable */ - EZFS_DEVOVERFLOW, /* too many devices in one vdev */ - EZFS_BADPATH, /* must be an absolute path */ - EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ - EZFS_ZONED, /* used improperly in local zone */ - EZFS_MOUNTFAILED, /* failed to mount dataset */ - EZFS_UMOUNTFAILED, /* failed to unmount dataset */ - EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */ - EZFS_SHARENFSFAILED, /* share(1M) failed */ - EZFS_PERM, /* permission denied */ - EZFS_NOSPC, /* out of space */ - EZFS_FAULT, /* bad address */ - EZFS_IO, /* I/O error */ - EZFS_INTR, /* signal received */ - EZFS_ISSPARE, /* device is a hot spare */ - EZFS_INVALCONFIG, /* invalid vdev configuration */ - EZFS_RECURSIVE, /* recursive dependency */ - EZFS_NOHISTORY, /* no history object */ - EZFS_POOLPROPS, /* couldn't retrieve pool props */ - EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ - EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ - EZFS_NAMETOOLONG, /* dataset name is too long */ - EZFS_OPENFAILED, /* open of device failed */ - EZFS_NOCAP, /* couldn't get capacity */ - EZFS_LABELFAILED, /* write of label failed */ - EZFS_BADWHO, /* invalid permission who */ - EZFS_BADPERM, /* invalid permission */ - EZFS_BADPERMSET, /* invalid permission set name */ - EZFS_NODELEGATION, /* delegated administration is disabled */ - EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */ - EZFS_SHARESMBFAILED, /* failed to share over smb */ - EZFS_BADCACHE, /* bad cache file */ - EZFS_ISL2CACHE, /* device is for the level 2 ARC */ - EZFS_VDEVNOTSUP, /* unsupported vdev type */ - EZFS_NOTSUP, /* ops not supported on this dataset */ - EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ - EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ - EZFS_REFTAG_RELE, /* snapshot release: tag not found */ - EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ - EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ - EZFS_PIPEFAILED, /* pipe create failed */ - EZFS_THREADCREATEFAILED, /* thread create failed */ - EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ - EZFS_SCRUBBING, /* currently scrubbing */ - EZFS_NO_SCRUB, /* no active scrub */ - EZFS_DIFF, /* general failure of zfs diff */ - EZFS_DIFFDATA, /* bad zfs diff data */ - EZFS_POOLREADONLY, /* pool is in read-only mode */ - EZFS_SCRUB_PAUSED, /* scrub currently paused */ - EZFS_ACTIVE_POOL, /* pool is imported on a different system */ - EZFS_NO_PENDING, /* cannot cancel, no operation is pending */ - EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */ - EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */ - EZFS_NO_CHECKPOINT, /* pool has no checkpoint */ - EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */ - EZFS_VDEV_TOO_BIG, /* a device is too big to be used */ - EZFS_TOOMANY, /* argument list too long */ - EZFS_INITIALIZING, /* currently initializing */ - EZFS_NO_INITIALIZE, /* no active initialize */ - EZFS_WRONG_PARENT, /* invalid parent dataset (e.g ZVOL) */ - EZFS_IOC_NOTSUPPORTED, /* operation not supported by zfs module */ - EZFS_UNKNOWN -} zfs_error_t; - -/* - * UEFI boot support parameters. When creating whole disk boot pool, - * zpool create should allow to create EFI System partition for UEFI boot - * program. In case of BIOS, the EFI System partition is not used - * even if it does exist. - */ -typedef enum zpool_boot_label { - ZPOOL_NO_BOOT_LABEL = 0, - ZPOOL_CREATE_BOOT_LABEL, - ZPOOL_COPY_BOOT_LABEL -} zpool_boot_label_t; - -/* - * The following data structures are all part - * of the zfs_allow_t data structure which is - * used for printing 'allow' permissions. - * It is a linked list of zfs_allow_t's which - * then contain avl tree's for user/group/sets/... - * and each one of the entries in those trees have - * avl tree's for the permissions they belong to and - * whether they are local,descendent or local+descendent - * permissions. The AVL trees are used primarily for - * sorting purposes, but also so that we can quickly find - * a given user and or permission. - */ -typedef struct zfs_perm_node { - avl_node_t z_node; - char z_pname[MAXPATHLEN]; -} zfs_perm_node_t; - -typedef struct zfs_allow_node { - avl_node_t z_node; - char z_key[MAXPATHLEN]; /* name, such as joe */ - avl_tree_t z_localdescend; /* local+descendent perms */ - avl_tree_t z_local; /* local permissions */ - avl_tree_t z_descend; /* descendent permissions */ -} zfs_allow_node_t; - -typedef struct zfs_allow { - struct zfs_allow *z_next; - char z_setpoint[MAXPATHLEN]; - avl_tree_t z_sets; - avl_tree_t z_crperms; - avl_tree_t z_user; - avl_tree_t z_group; - avl_tree_t z_everyone; -} zfs_allow_t; - -/* - * Basic handle types - */ -typedef struct zfs_handle zfs_handle_t; -typedef struct zpool_handle zpool_handle_t; -typedef struct libzfs_handle libzfs_handle_t; - -/* - * Library initialization - */ -extern libzfs_handle_t *libzfs_init(void); -extern void libzfs_fini(libzfs_handle_t *); - -extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); -extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); - -extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); - -extern void zfs_save_arguments(int argc, char **, char *, int); -extern int zpool_log_history(libzfs_handle_t *, const char *); - -extern int libzfs_errno(libzfs_handle_t *); -extern const char *libzfs_error_action(libzfs_handle_t *); -extern const char *libzfs_error_description(libzfs_handle_t *); -extern int zfs_standard_error(libzfs_handle_t *, int, const char *); -extern void libzfs_mnttab_init(libzfs_handle_t *); -extern void libzfs_mnttab_fini(libzfs_handle_t *); -extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t); -extern int libzfs_mnttab_find(libzfs_handle_t *, const char *, - struct mnttab *); -extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, - const char *, const char *); -extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); - -/* - * Basic handle functions - */ -extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); -extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); -extern void zpool_close(zpool_handle_t *); -extern const char *zpool_get_name(zpool_handle_t *); -extern int zpool_get_state(zpool_handle_t *); -extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t); -extern const char *zpool_pool_state_to_name(pool_state_t); -extern void zpool_free_handles(libzfs_handle_t *); -extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *); - -/* - * Iterate over all active pools in the system. - */ -typedef int (*zpool_iter_f)(zpool_handle_t *, void *); -extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); -extern boolean_t zpool_skip_pool(const char *); - -/* - * Functions to create and destroy pools - */ -extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, - nvlist_t *, nvlist_t *); -extern int zpool_destroy(zpool_handle_t *, const char *); -extern int zpool_add(zpool_handle_t *, nvlist_t *); - -typedef struct splitflags { - /* do not split, but return the config that would be split off */ - int dryrun : 1; - - /* after splitting, import the pool */ - int import : 1; - int name_flags; -} splitflags_t; - -/* - * Functions to manipulate pool and vdev state - */ -extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); -extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, - nvlist_t *); -extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); -extern int zpool_reguid(zpool_handle_t *); -extern int zpool_reopen(zpool_handle_t *); - -extern int zpool_sync_one(zpool_handle_t *, void *); - -extern int zpool_vdev_online(zpool_handle_t *, const char *, int, - vdev_state_t *); -extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t); -extern int zpool_vdev_attach(zpool_handle_t *, const char *, - const char *, nvlist_t *, int); -extern int zpool_vdev_detach(zpool_handle_t *, const char *); -extern int zpool_vdev_remove(zpool_handle_t *, const char *); -extern int zpool_vdev_remove_cancel(zpool_handle_t *); -extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *); -extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, - splitflags_t); - -extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); -extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); -extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); - -extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, - boolean_t *, boolean_t *); -extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, - boolean_t *, boolean_t *, boolean_t *); -extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *, - zpool_boot_label_t, uint64_t, int *); - -/* - * Functions to manage pool properties - */ -extern int zpool_set_prop(zpool_handle_t *, const char *, const char *); -extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *, - size_t proplen, zprop_source_t *, boolean_t); -extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t, - zprop_source_t *); - -extern const char *zpool_prop_to_name(zpool_prop_t); -extern const char *zpool_prop_values(zpool_prop_t); - -/* - * Pool health statistics. - */ -typedef enum { - /* - * The following correspond to faults as defined in the (fault.fs.zfs.*) - * event namespace. Each is associated with a corresponding message ID. - * This must be kept in sync with the zfs_msgid_table in - * lib/libzfs/libzfs_status.c. - */ - ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ - ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ - ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */ - ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */ - ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */ - ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */ - ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */ - ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */ - ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ - ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ - ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ - ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */ - ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */ - ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ - ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ - ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ - ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ - - /* - * If the pool has unsupported features but can still be opened in - * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the - * pool has unsupported features but cannot be opened at all, its - * status is ZPOOL_STATUS_UNSUP_FEAT_READ. - */ - ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */ - ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */ - - /* - * These faults have no corresponding message ID. At the time we are - * checking the status, the original reason for the FMA fault (I/O or - * checksum errors) has been lost. - */ - ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */ - ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */ - - /* - * The following are not faults per se, but still an error possibly - * requiring administrative attention. There is no corresponding - * message ID. - */ - ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */ - ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */ - ZPOOL_STATUS_RESILVERING, /* device being resilvered */ - ZPOOL_STATUS_OFFLINE_DEV, /* device offline */ - ZPOOL_STATUS_REMOVED_DEV, /* removed device */ - ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */ - - /* - * Finally, the following indicates a healthy pool. - */ - ZPOOL_STATUS_OK -} zpool_status_t; - -extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); -extern zpool_status_t zpool_import_status(nvlist_t *, char **); -extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); - -/* - * Statistics and configuration functions. - */ -extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); -extern nvlist_t *zpool_get_features(zpool_handle_t *); -extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); -extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); -extern boolean_t zpool_is_bootable(zpool_handle_t *); - -/* - * Import and export functions - */ -extern int zpool_export(zpool_handle_t *, boolean_t, const char *); -extern int zpool_export_force(zpool_handle_t *, const char *); -extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, - char *altroot); -extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, - nvlist_t *, int); -extern void zpool_print_unsup_feat(nvlist_t *config); - -/* - * Search for pools to import - */ - -typedef struct importargs { - char **path; /* a list of paths to search */ - int paths; /* number of paths to search */ - char *poolname; /* name of a pool to find */ - uint64_t guid; /* guid of a pool to find */ - char *cachefile; /* cachefile to use for import */ - int can_be_active : 1; /* can the pool be active? */ - int unique : 1; /* does 'poolname' already exist? */ - int exists : 1; /* set on return if pool already exists */ - nvlist_t *policy; /* load policy (max txg, rewind, etc.) */ -} importargs_t; - -extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *); -extern int zpool_tryimport(libzfs_handle_t *hdl, char *target, - nvlist_t **configp, importargs_t *args); - -/* legacy pool search routines */ -extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); -extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, - char *, uint64_t); - -/* - * Miscellaneous pool functions - */ -struct zfs_cmd; - -extern const char *zfs_history_event_names[]; - -typedef enum { - VDEV_NAME_PATH = 1 << 0, - VDEV_NAME_GUID = 1 << 1, - VDEV_NAME_FOLLOW_LINKS = 1 << 2, - VDEV_NAME_TYPE_ID = 1 << 3, -} vdev_name_t; - -extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, - int name_flags); -extern int zpool_upgrade(zpool_handle_t *, uint64_t); -extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *, - boolean_t *); -extern int zpool_history_unpack(char *, uint64_t, uint64_t *, - nvlist_t ***, uint_t *); -extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, - size_t len); -extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *); -extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); -extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, - nvlist_t *); -extern int zpool_checkpoint(zpool_handle_t *); -extern int zpool_discard_checkpoint(zpool_handle_t *); - -/* - * Basic handle manipulations. These functions do not create or destroy the - * underlying datasets, only the references to them. - */ -extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); -extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); -extern void zfs_close(zfs_handle_t *); -extern zfs_type_t zfs_get_type(const zfs_handle_t *); -extern const char *zfs_get_name(const zfs_handle_t *); -extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *); -extern const char *zfs_get_pool_name(const zfs_handle_t *); - -/* - * Property management functions. Some functions are shared with the kernel, - * and are found in sys/fs/zfs.h. - */ - -/* - * zfs dataset property management - */ -extern const char *zfs_prop_default_string(zfs_prop_t); -extern uint64_t zfs_prop_default_numeric(zfs_prop_t); -extern const char *zfs_prop_column_name(zfs_prop_t); -extern boolean_t zfs_prop_align_right(zfs_prop_t); - -extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, - nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); - -extern const char *zfs_prop_to_name(zfs_prop_t); -extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); -extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *); -extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, - zprop_source_t *, char *, size_t, boolean_t); -extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, - boolean_t); -extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, - zprop_source_t *, char *, size_t); -extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, - uint64_t *propvalue); -extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, - char *propbuf, int proplen, boolean_t literal); -extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, - uint64_t *propvalue); -extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, - char *propbuf, int proplen, boolean_t literal); -extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, - char *buf, size_t len); -extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); -extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); -extern const char *zfs_prop_values(zfs_prop_t); -extern int zfs_prop_is_string(zfs_prop_t prop); -extern nvlist_t *zfs_get_user_props(zfs_handle_t *); -extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); -extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); - - -typedef struct zprop_list { - int pl_prop; - char *pl_user_prop; - struct zprop_list *pl_next; - boolean_t pl_all; - size_t pl_width; - size_t pl_recvd_width; - boolean_t pl_fixed; -} zprop_list_t; - -extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, - boolean_t); -extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); - -#define ZFS_MOUNTPOINT_NONE "none" -#define ZFS_MOUNTPOINT_LEGACY "legacy" - -#define ZFS_FEATURE_DISABLED "disabled" -#define ZFS_FEATURE_ENABLED "enabled" -#define ZFS_FEATURE_ACTIVE "active" - -#define ZFS_UNSUPPORTED_INACTIVE "inactive" -#define ZFS_UNSUPPORTED_READONLY "readonly" - -/* - * zpool property management - */ -extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **); -extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *, - size_t); -extern const char *zpool_prop_default_string(zpool_prop_t); -extern uint64_t zpool_prop_default_numeric(zpool_prop_t); -extern const char *zpool_prop_column_name(zpool_prop_t); -extern boolean_t zpool_prop_align_right(zpool_prop_t); - -/* - * Functions shared by zfs and zpool property management. - */ -extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all, - boolean_t ordered, zfs_type_t type); -extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, - zfs_type_t); -extern void zprop_free_list(zprop_list_t *); - -#define ZFS_GET_NCOLS 5 - -typedef enum { - GET_COL_NONE, - GET_COL_NAME, - GET_COL_PROPERTY, - GET_COL_VALUE, - GET_COL_RECVD, - GET_COL_SOURCE -} zfs_get_column_t; - -/* - * Functions for printing zfs or zpool properties - */ -typedef struct zprop_get_cbdata { - int cb_sources; - zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; - int cb_colwidths[ZFS_GET_NCOLS + 1]; - boolean_t cb_scripted; - boolean_t cb_literal; - boolean_t cb_first; - zprop_list_t *cb_proplist; - zfs_type_t cb_type; -} zprop_get_cbdata_t; - -void zprop_print_one_property(const char *, zprop_get_cbdata_t *, - const char *, const char *, zprop_source_t, const char *, - const char *); - -/* - * Iterator functions. - */ -typedef int (*zfs_iter_f)(zfs_handle_t *, void *); -extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); -extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); -extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); -extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); -extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *, - uint64_t, uint64_t); -extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *, - uint64_t, uint64_t); -extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); -extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *); - -typedef struct get_all_cb { - zfs_handle_t **cb_handles; - size_t cb_alloc; - size_t cb_used; -} get_all_cb_t; - -void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t, - zfs_iter_f, void*, boolean_t); - -void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); - -/* - * Functions to create and destroy datasets. - */ -extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, - nvlist_t *); -extern int zfs_create_ancestors(libzfs_handle_t *, const char *); -extern int zfs_destroy(zfs_handle_t *, boolean_t); -extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); -extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); -extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); -extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); -extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, - nvlist_t *props); -extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); - -typedef struct renameflags { - /* recursive rename */ - int recurse : 1; - - /* don't unmount file systems */ - int nounmount : 1; - - /* force unmount file systems */ - int forceunmount : 1; -} renameflags_t; - -extern int zfs_rename(zfs_handle_t *, const char *, const char *, - renameflags_t flags); - -typedef struct sendflags { - /* print informational messages (ie, -v was specified) */ - boolean_t verbose; - - /* recursive send (ie, -R) */ - boolean_t replicate; - - /* for incrementals, do all intermediate snapshots */ - boolean_t doall; - - /* if dataset is a clone, do incremental from its origin */ - boolean_t fromorigin; - - /* do deduplication */ - boolean_t dedup; - - /* send properties (ie, -p) */ - boolean_t props; - - /* do not send (no-op, ie. -n) */ - boolean_t dryrun; - - /* parsable verbose output (ie. -P) */ - boolean_t parsable; - - /* show progress (ie. -v) */ - boolean_t progress; - - /* large blocks (>128K) are permitted */ - boolean_t largeblock; - - /* WRITE_EMBEDDED records of type DATA are permitted */ - boolean_t embed_data; - - /* compressed WRITE records are permitted */ - boolean_t compress; - - /* show progress as process title(ie. -V) */ - boolean_t progressastitle; -} sendflags_t; - -typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); - -extern int zfs_send(zfs_handle_t *, const char *, const char *, - sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); -extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t flags); -extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, - const char *); -extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, - const char *token); - -extern int zfs_promote(zfs_handle_t *); -extern int zfs_hold(zfs_handle_t *, const char *, const char *, - boolean_t, int); -extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); -extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); -extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); -extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); - -typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, - uid_t rid, uint64_t space); - -extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, - zfs_userspace_cb_t, void *); - -extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); -extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); - -typedef struct recvflags { - /* print informational messages (ie, -v was specified) */ - boolean_t verbose; - - /* the destination is a prefix, not the exact fs (ie, -d) */ - boolean_t isprefix; - - /* - * Only the tail of the sent snapshot path is appended to the - * destination to determine the received snapshot name (ie, -e). - */ - boolean_t istail; - - /* do not actually do the recv, just check if it would work (ie, -n) */ - boolean_t dryrun; - - /* rollback/destroy filesystems as necessary (eg, -F) */ - boolean_t force; - - /* set "canmount=off" on all modified filesystems */ - boolean_t canmountoff; - - /* - * Mark the file systems as "resumable" and do not destroy them if the - * receive is interrupted - */ - boolean_t resumable; - - /* byteswap flag is used internally; callers need not specify */ - boolean_t byteswap; - - /* do not mount file systems as they are extracted (private) */ - boolean_t nomount; - - /* force unmount while recv snapshot (private) */ - boolean_t forceunmount; -} recvflags_t; - -extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, - recvflags_t *, int, avl_tree_t *); - -typedef enum diff_flags { - ZFS_DIFF_PARSEABLE = 0x1, - ZFS_DIFF_TIMESTAMP = 0x2, - ZFS_DIFF_CLASSIFY = 0x4 -} diff_flags_t; - -extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, - int); - -/* - * Miscellaneous functions. - */ -extern const char *zfs_type_to_name(zfs_type_t); -extern void zfs_refresh_properties(zfs_handle_t *); -extern int zfs_name_valid(const char *, zfs_type_t); -extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); -extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, - zfs_type_t); -extern int zfs_spa_version(zfs_handle_t *, int *); -extern boolean_t zfs_bookmark_exists(const char *path); -extern ulong_t get_system_hostid(void); - -/* - * Mount support functions. - */ -extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **); -extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); -extern int zfs_mount(zfs_handle_t *, const char *, int); -extern int zfs_mount_at(zfs_handle_t *, const char *, int, const char *); -extern int zfs_unmount(zfs_handle_t *, const char *, int); -extern int zfs_unmountall(zfs_handle_t *, int); - -/* - * Share support functions. - */ -extern boolean_t zfs_is_shared(zfs_handle_t *); -extern int zfs_share(zfs_handle_t *); -extern int zfs_unshare(zfs_handle_t *); - -/* - * Protocol-specific share support functions. - */ -extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **); -extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **); -extern int zfs_share_nfs(zfs_handle_t *); -extern int zfs_share_smb(zfs_handle_t *); -extern int zfs_shareall(zfs_handle_t *); -extern int zfs_unshare_nfs(zfs_handle_t *, const char *); -extern int zfs_unshare_smb(zfs_handle_t *, const char *); -extern int zfs_unshareall_nfs(zfs_handle_t *); -extern int zfs_unshareall_smb(zfs_handle_t *); -extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); -extern int zfs_unshareall(zfs_handle_t *); -extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, - void *, void *, int, zfs_share_op_t); - -/* - * FreeBSD-specific jail support function. - */ -extern int zfs_jail(zfs_handle_t *, int, int); - -/* - * When dealing with nvlists, verify() is extremely useful - */ -#ifndef verify -#ifdef NDEBUG -#define verify(EX) ((void)(EX)) -#else -#define verify(EX) assert(EX) -#endif -#endif - -/* - * Utility function to convert a number to a human-readable form. - */ -extern void zfs_nicenum(uint64_t, char *, size_t); -extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); - -/* - * Given a device or file, determine if it is part of a pool. - */ -extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, - boolean_t *); - -/* - * Label manipulation. - */ -extern int zpool_read_label(int, nvlist_t **); -extern int zpool_read_all_labels(int, nvlist_t **); -extern int zpool_clear_label(int); -extern int zpool_set_bootenv(zpool_handle_t *, const char *); -extern int zpool_get_bootenv(zpool_handle_t *, char *, size_t, off_t); - -/* is this zvol valid for use as a dump device? */ -extern int zvol_check_dump_config(char *); - -/* - * Management interfaces for SMB ACL files - */ - -int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *); -int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *); -int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *); -int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); - -/* - * Enable and disable datasets within a pool by mounting/unmounting and - * sharing/unsharing them. - */ -extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); -extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); - -/* - * Mappings between vdev and FRU. - */ -extern void libzfs_fru_refresh(libzfs_handle_t *); -extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *); -extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *); -extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *, - const char *); -extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *); -extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *); - -#ifndef illumos -extern int zmount(const char *, const char *, int, char *, char *, int, char *, - int); -#endif -extern int zfs_remap_indirects(libzfs_handle_t *hdl, const char *); - -/* Allow consumers to initialize libshare externally for optimal performance */ -extern int zfs_init_libshare_arg(libzfs_handle_t *, int, void *); -/* - * For most consumers, zfs_init_libshare_arg is sufficient on its own, and - * zfs_uninit_libshare is unnecessary. zfs_uninit_libshare should only be called - * if the caller has already initialized libshare for one set of zfs handles, - * and wishes to share or unshare filesystems outside of that set. In that case, - * the caller should uninitialize libshare, and then re-initialize it with the - * new handles being shared or unshared. - */ -extern void zfs_uninit_libshare(libzfs_handle_t *); -#ifdef __cplusplus -} -#endif - -#endif /* _LIBZFS_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c deleted file mode 100644 index 7bbb68328f29..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c +++ /dev/null @@ -1,736 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Portions Copyright 2007 Ramprakash Jelari - * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. - * All rights reserved. - * Copyright (c) 2014, 2016 by Delphix. All rights reserved. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - */ - -#include <libintl.h> -#include <libuutil.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <zone.h> - -#include <libzfs.h> - -#include "libzfs_impl.h" - -/* - * Structure to keep track of dataset state. Before changing the 'sharenfs' or - * 'mountpoint' property, we record whether the filesystem was previously - * mounted/shared. This prior state dictates whether we remount/reshare the - * dataset after the property has been changed. - * - * The interface consists of the following sequence of functions: - * - * changelist_gather() - * changelist_prefix() - * < change property > - * changelist_postfix() - * changelist_free() - * - * Other interfaces: - * - * changelist_remove() - remove a node from a gathered list - * changelist_rename() - renames all datasets appropriately when doing a rename - * changelist_unshare() - unshares all the nodes in a given changelist - * changelist_haszonedchild() - check if there is any child exported to - * a local zone - */ -typedef struct prop_changenode { - zfs_handle_t *cn_handle; - int cn_shared; - int cn_mounted; - int cn_zoned; - boolean_t cn_needpost; /* is postfix() needed? */ - uu_list_node_t cn_listnode; -} prop_changenode_t; - -struct prop_changelist { - zfs_prop_t cl_prop; - zfs_prop_t cl_realprop; - zfs_prop_t cl_shareprop; /* used with sharenfs/sharesmb */ - uu_list_pool_t *cl_pool; - uu_list_t *cl_list; - boolean_t cl_waslegacy; - boolean_t cl_allchildren; - boolean_t cl_alldependents; - int cl_mflags; /* Mount flags */ - int cl_gflags; /* Gather request flags */ - boolean_t cl_haszonedchild; - boolean_t cl_sorted; -}; - -/* - * If the property is 'mountpoint', go through and unmount filesystems as - * necessary. We don't do the same for 'sharenfs', because we can just re-share - * with different options without interrupting service. We do handle 'sharesmb' - * since there may be old resource names that need to be removed. - */ -int -changelist_prefix(prop_changelist_t *clp) -{ - prop_changenode_t *cn; - int ret = 0; - - if (clp->cl_prop != ZFS_PROP_MOUNTPOINT && - clp->cl_prop != ZFS_PROP_SHARESMB) - return (0); - - for (cn = uu_list_first(clp->cl_list); cn != NULL; - cn = uu_list_next(clp->cl_list, cn)) { - - /* if a previous loop failed, set the remaining to false */ - if (ret == -1) { - cn->cn_needpost = B_FALSE; - continue; - } - - /* - * If we are in the global zone, but this dataset is exported - * to a local zone, do nothing. - */ - if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) - continue; - - if (!ZFS_IS_VOLUME(cn->cn_handle)) { - /* - * Do the property specific processing. - */ - switch (clp->cl_prop) { - case ZFS_PROP_MOUNTPOINT: - if (clp->cl_gflags & CL_GATHER_DONT_UNMOUNT) - break; - if (zfs_unmount(cn->cn_handle, NULL, - clp->cl_mflags) != 0) { - ret = -1; - cn->cn_needpost = B_FALSE; - } - break; - case ZFS_PROP_SHARESMB: - (void) zfs_unshare_smb(cn->cn_handle, NULL); - break; - - default: - break; - } - } - } - - if (ret == -1) - (void) changelist_postfix(clp); - - return (ret); -} - -/* - * If the property is 'mountpoint' or 'sharenfs', go through and remount and/or - * reshare the filesystems as necessary. In changelist_gather() we recorded - * whether the filesystem was previously shared or mounted. The action we take - * depends on the previous state, and whether the value was previously 'legacy'. - * For non-legacy properties, we only remount/reshare the filesystem if it was - * previously mounted/shared. Otherwise, we always remount/reshare the - * filesystem. - */ -int -changelist_postfix(prop_changelist_t *clp) -{ - prop_changenode_t *cn; - char shareopts[ZFS_MAXPROPLEN]; - int errors = 0; - libzfs_handle_t *hdl; -#ifdef illumos - size_t num_datasets = 0, i; - zfs_handle_t **zhandle_arr; - sa_init_selective_arg_t sharearg; -#endif - - /* - * If we're changing the mountpoint, attempt to destroy the underlying - * mountpoint. All other datasets will have inherited from this dataset - * (in which case their mountpoints exist in the filesystem in the new - * location), or have explicit mountpoints set (in which case they won't - * be in the changelist). - */ - if ((cn = uu_list_last(clp->cl_list)) == NULL) - return (0); - - if (clp->cl_prop == ZFS_PROP_MOUNTPOINT && - !(clp->cl_gflags & CL_GATHER_DONT_UNMOUNT)) { - remove_mountpoint(cn->cn_handle); - } - - /* - * It is possible that the changelist_prefix() used libshare - * to unshare some entries. Since libshare caches data, an - * attempt to reshare during postfix can fail unless libshare - * is uninitialized here so that it will reinitialize later. - */ - if (cn->cn_handle != NULL) { - hdl = cn->cn_handle->zfs_hdl; - assert(hdl != NULL); - zfs_uninit_libshare(hdl); - -#ifdef illumos - /* - * For efficiencies sake, we initialize libshare for only a few - * shares (the ones affected here). Future initializations in - * this process should just use the cached initialization. - */ - for (cn = uu_list_last(clp->cl_list); cn != NULL; - cn = uu_list_prev(clp->cl_list, cn)) { - num_datasets++; - } - - zhandle_arr = zfs_alloc(hdl, - num_datasets * sizeof (zfs_handle_t *)); - for (i = 0, cn = uu_list_last(clp->cl_list); cn != NULL; - cn = uu_list_prev(clp->cl_list, cn)) { - zhandle_arr[i++] = cn->cn_handle; - zfs_refresh_properties(cn->cn_handle); - } - assert(i == num_datasets); - sharearg.zhandle_arr = zhandle_arr; - sharearg.zhandle_len = num_datasets; - errors = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE, - &sharearg); - free(zhandle_arr); -#endif - } - /* - * We walk the datasets in reverse, because we want to mount any parent - * datasets before mounting the children. We walk all datasets even if - * there are errors. - */ - for (cn = uu_list_last(clp->cl_list); cn != NULL; - cn = uu_list_prev(clp->cl_list, cn)) { - - boolean_t sharenfs; - boolean_t sharesmb; - boolean_t mounted; - - /* - * If we are in the global zone, but this dataset is exported - * to a local zone, do nothing. - */ - if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) - continue; - - /* Only do post-processing if it's required */ - if (!cn->cn_needpost) - continue; - cn->cn_needpost = B_FALSE; - -#ifndef illumos - zfs_refresh_properties(cn->cn_handle); -#endif - - if (ZFS_IS_VOLUME(cn->cn_handle)) - continue; - - /* - * Remount if previously mounted or mountpoint was legacy, - * or sharenfs or sharesmb property is set. - */ - sharenfs = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS, - shareopts, sizeof (shareopts), NULL, NULL, 0, - B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); - - sharesmb = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARESMB, - shareopts, sizeof (shareopts), NULL, NULL, 0, - B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); - - mounted = (clp->cl_gflags & CL_GATHER_DONT_UNMOUNT) || - zfs_is_mounted(cn->cn_handle, NULL); - - if (!mounted && (cn->cn_mounted || - ((sharenfs || sharesmb || clp->cl_waslegacy) && - (zfs_prop_get_int(cn->cn_handle, - ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) { - - if (zfs_mount(cn->cn_handle, NULL, 0) != 0) - errors++; - else - mounted = TRUE; - } - - /* - * If the file system is mounted we always re-share even - * if the filesystem is currently shared, so that we can - * adopt any new options. - */ - if (sharenfs && mounted) - errors += zfs_share_nfs(cn->cn_handle); - else if (cn->cn_shared || clp->cl_waslegacy) - errors += zfs_unshare_nfs(cn->cn_handle, NULL); - if (sharesmb && mounted) - errors += zfs_share_smb(cn->cn_handle); - else if (cn->cn_shared || clp->cl_waslegacy) - errors += zfs_unshare_smb(cn->cn_handle, NULL); - } - - return (errors ? -1 : 0); -} - -/* - * Is this "dataset" a child of "parent"? - */ -boolean_t -isa_child_of(const char *dataset, const char *parent) -{ - int len; - - len = strlen(parent); - - if (strncmp(dataset, parent, len) == 0 && - (dataset[len] == '@' || dataset[len] == '/' || - dataset[len] == '\0')) - return (B_TRUE); - else - return (B_FALSE); - -} - -/* - * If we rename a filesystem, child filesystem handles are no longer valid - * since we identify each dataset by its name in the ZFS namespace. As a - * result, we have to go through and fix up all the names appropriately. We - * could do this automatically if libzfs kept track of all open handles, but - * this is a lot less work. - */ -void -changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) -{ - prop_changenode_t *cn; - char newname[ZFS_MAX_DATASET_NAME_LEN]; - - for (cn = uu_list_first(clp->cl_list); cn != NULL; - cn = uu_list_next(clp->cl_list, cn)) { - /* - * Do not rename a clone that's not in the source hierarchy. - */ - if (!isa_child_of(cn->cn_handle->zfs_name, src)) - continue; - - /* - * Destroy the previous mountpoint if needed. - */ - remove_mountpoint(cn->cn_handle); - - (void) strlcpy(newname, dst, sizeof (newname)); - (void) strcat(newname, cn->cn_handle->zfs_name + strlen(src)); - - (void) strlcpy(cn->cn_handle->zfs_name, newname, - sizeof (cn->cn_handle->zfs_name)); - } -} - -/* - * Given a gathered changelist for the 'sharenfs' or 'sharesmb' property, - * unshare all the datasets in the list. - */ -int -changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) -{ - prop_changenode_t *cn; - int ret = 0; - - if (clp->cl_prop != ZFS_PROP_SHARENFS && - clp->cl_prop != ZFS_PROP_SHARESMB) - return (0); - - for (cn = uu_list_first(clp->cl_list); cn != NULL; - cn = uu_list_next(clp->cl_list, cn)) { - if (zfs_unshare_proto(cn->cn_handle, NULL, proto) != 0) - ret = -1; - } - - return (ret); -} - -/* - * Check if there is any child exported to a local zone in a given changelist. - * This information has already been recorded while gathering the changelist - * via changelist_gather(). - */ -int -changelist_haszonedchild(prop_changelist_t *clp) -{ - return (clp->cl_haszonedchild); -} - -/* - * Remove a node from a gathered list. - */ -void -changelist_remove(prop_changelist_t *clp, const char *name) -{ - prop_changenode_t *cn; - - for (cn = uu_list_first(clp->cl_list); cn != NULL; - cn = uu_list_next(clp->cl_list, cn)) { - - if (strcmp(cn->cn_handle->zfs_name, name) == 0) { - uu_list_remove(clp->cl_list, cn); - zfs_close(cn->cn_handle); - free(cn); - return; - } - } -} - -/* - * Release any memory associated with a changelist. - */ -void -changelist_free(prop_changelist_t *clp) -{ - prop_changenode_t *cn; - void *cookie; - - if (clp->cl_list) { - cookie = NULL; - while ((cn = uu_list_teardown(clp->cl_list, &cookie)) != NULL) { - zfs_close(cn->cn_handle); - free(cn); - } - - uu_list_destroy(clp->cl_list); - } - if (clp->cl_pool) - uu_list_pool_destroy(clp->cl_pool); - - free(clp); -} - -static int -change_one(zfs_handle_t *zhp, void *data) -{ - prop_changelist_t *clp = data; - char property[ZFS_MAXPROPLEN]; - char where[64]; - prop_changenode_t *cn; - zprop_source_t sourcetype; - zprop_source_t share_sourcetype; - - /* - * We only want to unmount/unshare those filesystems that may inherit - * from the target filesystem. If we find any filesystem with a - * locally set mountpoint, we ignore any children since changing the - * property will not affect them. If this is a rename, we iterate - * over all children regardless, since we need them unmounted in - * order to do the rename. Also, if this is a volume and we're doing - * a rename, then always add it to the changelist. - */ - - if (!(ZFS_IS_VOLUME(zhp) && clp->cl_realprop == ZFS_PROP_NAME) && - zfs_prop_get(zhp, clp->cl_prop, property, - sizeof (property), &sourcetype, where, sizeof (where), - B_FALSE) != 0) { - zfs_close(zhp); - return (0); - } - - /* - * If we are "watching" sharenfs or sharesmb - * then check out the companion property which is tracked - * in cl_shareprop - */ - if (clp->cl_shareprop != ZPROP_INVAL && - zfs_prop_get(zhp, clp->cl_shareprop, property, - sizeof (property), &share_sourcetype, where, sizeof (where), - B_FALSE) != 0) { - zfs_close(zhp); - return (0); - } - - if (clp->cl_alldependents || clp->cl_allchildren || - sourcetype == ZPROP_SRC_DEFAULT || - sourcetype == ZPROP_SRC_INHERITED || - (clp->cl_shareprop != ZPROP_INVAL && - (share_sourcetype == ZPROP_SRC_DEFAULT || - share_sourcetype == ZPROP_SRC_INHERITED))) { - if ((cn = zfs_alloc(zfs_get_handle(zhp), - sizeof (prop_changenode_t))) == NULL) { - zfs_close(zhp); - return (-1); - } - - cn->cn_handle = zhp; - cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) || - zfs_is_mounted(zhp, NULL); - cn->cn_shared = zfs_is_shared(zhp); - cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); - cn->cn_needpost = B_TRUE; - - /* Indicate if any child is exported to a local zone. */ - if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) - clp->cl_haszonedchild = B_TRUE; - - uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool); - - if (clp->cl_sorted) { - uu_list_index_t idx; - - (void) uu_list_find(clp->cl_list, cn, NULL, - &idx); - uu_list_insert(clp->cl_list, cn, idx); - } else { - /* - * Add this child to beginning of the list. Children - * below this one in the hierarchy will get added above - * this one in the list. This produces a list in - * reverse dataset name order. - * This is necessary when the original mountpoint - * is legacy or none. - */ - verify(uu_list_insert_before(clp->cl_list, - uu_list_first(clp->cl_list), cn) == 0); - } - - if (!clp->cl_alldependents) - return (zfs_iter_children(zhp, change_one, data)); - } else { - zfs_close(zhp); - } - - return (0); -} - -/*ARGSUSED*/ -static int -compare_mountpoints(const void *a, const void *b, void *unused) -{ - const prop_changenode_t *ca = a; - const prop_changenode_t *cb = b; - - char mounta[MAXPATHLEN]; - char mountb[MAXPATHLEN]; - - boolean_t hasmounta, hasmountb; - - /* - * When unsharing or unmounting filesystems, we need to do it in - * mountpoint order. This allows the user to have a mountpoint - * hierarchy that is different from the dataset hierarchy, and still - * allow it to be changed. However, if either dataset doesn't have a - * mountpoint (because it is a volume or a snapshot), we place it at the - * end of the list, because it doesn't affect our change at all. - */ - hasmounta = (zfs_prop_get(ca->cn_handle, ZFS_PROP_MOUNTPOINT, mounta, - sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); - hasmountb = (zfs_prop_get(cb->cn_handle, ZFS_PROP_MOUNTPOINT, mountb, - sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); - - if (!hasmounta && hasmountb) - return (-1); - else if (hasmounta && !hasmountb) - return (1); - else if (!hasmounta && !hasmountb) - return (0); - else - return (strcmp(mountb, mounta)); -} - -/* - * Given a ZFS handle and a property, construct a complete list of datasets - * that need to be modified as part of this process. For anything but the - * 'mountpoint' and 'sharenfs' properties, this just returns an empty list. - * Otherwise, we iterate over all children and look for any datasets that - * inherit the property. For each such dataset, we add it to the list and - * mark whether it was shared beforehand. - */ -prop_changelist_t * -changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags, - int mnt_flags) -{ - prop_changelist_t *clp; - prop_changenode_t *cn; - zfs_handle_t *temp; - char property[ZFS_MAXPROPLEN]; - uu_compare_fn_t *compare = NULL; - boolean_t legacy = B_FALSE; - - if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL) - return (NULL); - - /* - * For mountpoint-related tasks, we want to sort everything by - * mountpoint, so that we mount and unmount them in the appropriate - * order, regardless of their position in the hierarchy. - */ - if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED || - prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS || - prop == ZFS_PROP_SHARESMB) { - - if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, - property, sizeof (property), - NULL, NULL, 0, B_FALSE) == 0 && - (strcmp(property, "legacy") == 0 || - strcmp(property, "none") == 0)) { - - legacy = B_TRUE; - } - if (!legacy) { - compare = compare_mountpoints; - clp->cl_sorted = B_TRUE; - } - } - - clp->cl_pool = uu_list_pool_create("changelist_pool", - sizeof (prop_changenode_t), - offsetof(prop_changenode_t, cn_listnode), - compare, 0); - if (clp->cl_pool == NULL) { - assert(uu_error() == UU_ERROR_NO_MEMORY); - (void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error"); - changelist_free(clp); - return (NULL); - } - - clp->cl_list = uu_list_create(clp->cl_pool, NULL, - clp->cl_sorted ? UU_LIST_SORTED : 0); - clp->cl_gflags = gather_flags; - clp->cl_mflags = mnt_flags; - - if (clp->cl_list == NULL) { - assert(uu_error() == UU_ERROR_NO_MEMORY); - (void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error"); - changelist_free(clp); - return (NULL); - } - - /* - * If this is a rename or the 'zoned' property, we pretend we're - * changing the mountpoint and flag it so we can catch all children in - * change_one(). - * - * Flag cl_alldependents to catch all children plus the dependents - * (clones) that are not in the hierarchy. - */ - if (prop == ZFS_PROP_NAME) { - clp->cl_prop = ZFS_PROP_MOUNTPOINT; - clp->cl_alldependents = B_TRUE; - } else if (prop == ZFS_PROP_ZONED) { - clp->cl_prop = ZFS_PROP_MOUNTPOINT; - clp->cl_allchildren = B_TRUE; - } else if (prop == ZFS_PROP_CANMOUNT) { - clp->cl_prop = ZFS_PROP_MOUNTPOINT; - } else if (prop == ZFS_PROP_VOLSIZE) { - clp->cl_prop = ZFS_PROP_MOUNTPOINT; - } else { - clp->cl_prop = prop; - } - clp->cl_realprop = prop; - - if (clp->cl_prop != ZFS_PROP_MOUNTPOINT && - clp->cl_prop != ZFS_PROP_SHARENFS && - clp->cl_prop != ZFS_PROP_SHARESMB) - return (clp); - - /* - * If watching SHARENFS or SHARESMB then - * also watch its companion property. - */ - if (clp->cl_prop == ZFS_PROP_SHARENFS) - clp->cl_shareprop = ZFS_PROP_SHARESMB; - else if (clp->cl_prop == ZFS_PROP_SHARESMB) - clp->cl_shareprop = ZFS_PROP_SHARENFS; - - if (clp->cl_alldependents) { - if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) { - changelist_free(clp); - return (NULL); - } - } else if (zfs_iter_children(zhp, change_one, clp) != 0) { - changelist_free(clp); - return (NULL); - } - - /* - * We have to re-open ourselves because we auto-close all the handles - * and can't tell the difference. - */ - if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp), - ZFS_TYPE_DATASET)) == NULL) { - changelist_free(clp); - return (NULL); - } - - /* - * Always add ourself to the list. We add ourselves to the end so that - * we're the last to be unmounted. - */ - if ((cn = zfs_alloc(zhp->zfs_hdl, - sizeof (prop_changenode_t))) == NULL) { - zfs_close(temp); - changelist_free(clp); - return (NULL); - } - - cn->cn_handle = temp; - cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) || - zfs_is_mounted(temp, NULL); - cn->cn_shared = zfs_is_shared(temp); - cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); - cn->cn_needpost = B_TRUE; - - uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool); - if (clp->cl_sorted) { - uu_list_index_t idx; - (void) uu_list_find(clp->cl_list, cn, NULL, &idx); - uu_list_insert(clp->cl_list, cn, idx); - } else { - /* - * Add the target dataset to the end of the list. - * The list is not really unsorted. The list will be - * in reverse dataset name order. This is necessary - * when the original mountpoint is legacy or none. - */ - verify(uu_list_insert_after(clp->cl_list, - uu_list_last(clp->cl_list), cn) == 0); - } - - /* - * If the mountpoint property was previously 'legacy', or 'none', - * record it as the behavior of changelist_postfix() will be different. - */ - if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) && legacy) { - /* - * do not automatically mount ex-legacy datasets if - * we specifically set canmount to noauto - */ - if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) != - ZFS_CANMOUNT_NOAUTO) - clp->cl_waslegacy = B_TRUE; - } - - return (clp); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c deleted file mode 100644 index 7545331b40b4..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * CDDL HEADER SART - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - */ - -#include "libzfs_compat.h" - -int zfs_ioctl_version = ZFS_IOCVER_UNDEF; -static int zfs_spa_version = -1; - -/* - * Get zfs_ioctl_version - */ -int -get_zfs_ioctl_version(void) -{ - size_t ver_size; - int ver = ZFS_IOCVER_NONE; - - ver_size = sizeof(ver); - sysctlbyname("vfs.zfs.version.ioctl", &ver, &ver_size, NULL, 0); - - return (ver); -} - -/* - * Get the SPA version - */ -static int -get_zfs_spa_version(void) -{ - size_t ver_size; - int ver = 0; - - ver_size = sizeof(ver); - sysctlbyname("vfs.zfs.version.spa", &ver, &ver_size, NULL, 0); - - return (ver); -} - -/* - * This is FreeBSD version of ioctl, because Solaris' ioctl() updates - * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an - * error is returned zc_nvlist_dst_size won't be updated. - */ -int -zcmd_ioctl(int fd, int request, zfs_cmd_t *zc) -{ - size_t oldsize; - int ret, cflag = ZFS_CMD_COMPAT_NONE; - - if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) - zfs_ioctl_version = get_zfs_ioctl_version(); - - if (zfs_ioctl_version >= ZFS_IOCVER_DEADMAN) { - switch (zfs_ioctl_version) { - case ZFS_IOCVER_INLANES: - cflag = ZFS_CMD_COMPAT_INLANES; - break; - case ZFS_IOCVER_RESUME: - cflag = ZFS_CMD_COMPAT_RESUME; - break; - case ZFS_IOCVER_EDBP: - cflag = ZFS_CMD_COMPAT_EDBP; - break; - case ZFS_IOCVER_ZCMD: - cflag = ZFS_CMD_COMPAT_ZCMD; - break; - case ZFS_IOCVER_LZC: - cflag = ZFS_CMD_COMPAT_LZC; - break; - case ZFS_IOCVER_DEADMAN: - cflag = ZFS_CMD_COMPAT_DEADMAN; - break; - } - } else { - /* - * If vfs.zfs.version.ioctl is not defined, assume we have v28 - * compatible binaries and use vfs.zfs.version.spa to test for v15 - */ - cflag = ZFS_CMD_COMPAT_V28; - - if (zfs_spa_version < 0) - zfs_spa_version = get_zfs_spa_version(); - - if (zfs_spa_version == SPA_VERSION_15 || - zfs_spa_version == SPA_VERSION_14 || - zfs_spa_version == SPA_VERSION_13) - cflag = ZFS_CMD_COMPAT_V15; - } - - oldsize = zc->zc_nvlist_dst_size; - ret = zcmd_ioctl_compat(fd, request, zc, cflag); - - if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) { - ret = -1; - errno = ENOMEM; - } - - return (ret); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h deleted file mode 100644 index 37616683330a..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * CDDL HEADER SART - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - */ - -#ifndef _LIBZFS_COMPAT_H -#define _LIBZFS_COMPAT_H - -#include <zfs_ioctl_compat.h> - -#ifdef __cplusplus -extern "C" { -#endif - -int get_zfs_ioctl_version(void); -int zcmd_ioctl(int fd, int request, zfs_cmd_t *zc); - -#define ioctl(fd, ioc, zc) zcmd_ioctl((fd), (ioc), (zc)) - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBZFS_COMPAT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c deleted file mode 100644 index b33d86432dc5..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2015 by Syneto S.R.L. All rights reserved. - * Copyright 2016 Nexenta Systems, Inc. - */ - -/* - * The pool configuration repository is stored in /etc/zfs/zpool.cache as a - * single packed nvlist. While it would be nice to just read in this - * file from userland, this wouldn't work from a local zone. So we have to have - * a zpool ioctl to return the complete configuration for all pools. In the - * global zone, this will be identical to reading the file and unpacking it in - * userland. - */ - -#include <errno.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <stddef.h> -#include <string.h> -#include <unistd.h> -#include <libintl.h> -#include <libuutil.h> - -#include "libzfs_impl.h" - -typedef struct config_node { - char *cn_name; - nvlist_t *cn_config; - uu_avl_node_t cn_avl; -} config_node_t; - -/* ARGSUSED */ -static int -config_node_compare(const void *a, const void *b, void *unused) -{ - int ret; - - const config_node_t *ca = (config_node_t *)a; - const config_node_t *cb = (config_node_t *)b; - - ret = strcmp(ca->cn_name, cb->cn_name); - - if (ret < 0) - return (-1); - else if (ret > 0) - return (1); - else - return (0); -} - -void -namespace_clear(libzfs_handle_t *hdl) -{ - if (hdl->libzfs_ns_avl) { - config_node_t *cn; - void *cookie = NULL; - - while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, - &cookie)) != NULL) { - nvlist_free(cn->cn_config); - free(cn->cn_name); - free(cn); - } - - uu_avl_destroy(hdl->libzfs_ns_avl); - hdl->libzfs_ns_avl = NULL; - } - - if (hdl->libzfs_ns_avlpool) { - uu_avl_pool_destroy(hdl->libzfs_ns_avlpool); - hdl->libzfs_ns_avlpool = NULL; - } -} - -/* - * Loads the pool namespace, or re-loads it if the cache has changed. - */ -static int -namespace_reload(libzfs_handle_t *hdl) -{ - nvlist_t *config; - config_node_t *cn; - nvpair_t *elem; - zfs_cmd_t zc = { 0 }; - void *cookie; - - if (hdl->libzfs_ns_gen == 0) { - /* - * This is the first time we've accessed the configuration - * cache. Initialize the AVL tree and then fall through to the - * common code. - */ - if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool", - sizeof (config_node_t), - offsetof(config_node_t, cn_avl), - config_node_compare, UU_DEFAULT)) == NULL) - return (no_memory(hdl)); - - if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool, - NULL, UU_DEFAULT)) == NULL) - return (no_memory(hdl)); - } - - if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) - return (-1); - - for (;;) { - zc.zc_cookie = hdl->libzfs_ns_gen; - if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) { - switch (errno) { - case EEXIST: - /* - * The namespace hasn't changed. - */ - zcmd_free_nvlists(&zc); - return (0); - - case ENOMEM: - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - break; - - default: - zcmd_free_nvlists(&zc); - return (zfs_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, "failed to read " - "pool configuration"))); - } - } else { - hdl->libzfs_ns_gen = zc.zc_cookie; - break; - } - } - - if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - - zcmd_free_nvlists(&zc); - - /* - * Clear out any existing configuration information. - */ - cookie = NULL; - while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) { - nvlist_free(cn->cn_config); - free(cn->cn_name); - free(cn); - } - - elem = NULL; - while ((elem = nvlist_next_nvpair(config, elem)) != NULL) { - nvlist_t *child; - uu_avl_index_t where; - - if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) { - nvlist_free(config); - return (-1); - } - - if ((cn->cn_name = zfs_strdup(hdl, - nvpair_name(elem))) == NULL) { - free(cn); - nvlist_free(config); - return (-1); - } - - verify(nvpair_value_nvlist(elem, &child) == 0); - if (nvlist_dup(child, &cn->cn_config, 0) != 0) { - free(cn->cn_name); - free(cn); - nvlist_free(config); - return (no_memory(hdl)); - } - verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where) - == NULL); - - uu_avl_insert(hdl->libzfs_ns_avl, cn, where); - } - - nvlist_free(config); - return (0); -} - -/* - * Retrieve the configuration for the given pool. The configuration is a nvlist - * describing the vdevs, as well as the statistics associated with each one. - */ -nvlist_t * -zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig) -{ - if (oldconfig) - *oldconfig = zhp->zpool_old_config; - return (zhp->zpool_config); -} - -/* - * Retrieves a list of enabled features and their refcounts and caches it in - * the pool handle. - */ -nvlist_t * -zpool_get_features(zpool_handle_t *zhp) -{ - nvlist_t *config, *features; - - config = zpool_get_config(zhp, NULL); - - if (config == NULL || !nvlist_exists(config, - ZPOOL_CONFIG_FEATURE_STATS)) { - int error; - boolean_t missing = B_FALSE; - - error = zpool_refresh_stats(zhp, &missing); - - if (error != 0 || missing) - return (NULL); - - config = zpool_get_config(zhp, NULL); - } - - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, - &features) != 0) - return (NULL); - - return (features); -} - -/* - * Refresh the vdev statistics associated with the given pool. This is used in - * iostat to show configuration changes and determine the delta from the last - * time the function was called. This function can fail, in case the pool has - * been destroyed. - */ -int -zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing) -{ - zfs_cmd_t zc = { 0 }; - int error; - nvlist_t *config; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - *missing = B_FALSE; - (void) strcpy(zc.zc_name, zhp->zpool_name); - - if (zhp->zpool_config_size == 0) - zhp->zpool_config_size = 1 << 16; - - if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0) - return (-1); - - for (;;) { - if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS, - &zc) == 0) { - /* - * The real error is returned in the zc_cookie field. - */ - error = zc.zc_cookie; - break; - } - - if (errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - } else { - zcmd_free_nvlists(&zc); - if (errno == ENOENT || errno == EINVAL) - *missing = B_TRUE; - zhp->zpool_state = POOL_STATE_UNAVAIL; - return (0); - } - } - - if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - - zcmd_free_nvlists(&zc); - - zhp->zpool_config_size = zc.zc_nvlist_dst_size; - - if (zhp->zpool_config != NULL) { - uint64_t oldtxg, newtxg; - - verify(nvlist_lookup_uint64(zhp->zpool_config, - ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0); - - nvlist_free(zhp->zpool_old_config); - - if (oldtxg != newtxg) { - nvlist_free(zhp->zpool_config); - zhp->zpool_old_config = NULL; - } else { - zhp->zpool_old_config = zhp->zpool_config; - } - } - - zhp->zpool_config = config; - if (error) - zhp->zpool_state = POOL_STATE_UNAVAIL; - else - zhp->zpool_state = POOL_STATE_ACTIVE; - - return (0); -} - -/* - * The following environment variables are undocumented - * and should be used for testing purposes only: - * - * __ZFS_POOL_EXCLUDE - don't iterate over the pools it lists - * __ZFS_POOL_RESTRICT - iterate only over the pools it lists - * - * This function returns B_TRUE if the pool should be skipped - * during iteration. - */ -boolean_t -zpool_skip_pool(const char *poolname) -{ - static boolean_t initialized = B_FALSE; - static const char *exclude = NULL; - static const char *restricted = NULL; - - const char *cur, *end; - int len; - int namelen = strlen(poolname); - - if (!initialized) { - initialized = B_TRUE; - exclude = getenv("__ZFS_POOL_EXCLUDE"); - restricted = getenv("__ZFS_POOL_RESTRICT"); - } - - if (exclude != NULL) { - cur = exclude; - do { - end = strchr(cur, ' '); - len = (NULL == end) ? strlen(cur) : (end - cur); - if (len == namelen && 0 == strncmp(cur, poolname, len)) - return (B_TRUE); - cur += (len + 1); - } while (NULL != end); - } - - if (NULL == restricted) - return (B_FALSE); - - cur = restricted; - do { - end = strchr(cur, ' '); - len = (NULL == end) ? strlen(cur) : (end - cur); - - if (len == namelen && 0 == strncmp(cur, poolname, len)) { - return (B_FALSE); - } - - cur += (len + 1); - } while (NULL != end); - - return (B_TRUE); -} - -/* - * Iterate over all pools in the system. - */ -int -zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data) -{ - config_node_t *cn; - zpool_handle_t *zhp; - int ret; - - /* - * If someone makes a recursive call to zpool_iter(), we want to avoid - * refreshing the namespace because that will invalidate the parent - * context. We allow recursive calls, but simply re-use the same - * namespace AVL tree. - */ - if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0) - return (-1); - - hdl->libzfs_pool_iter++; - for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; - cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { - - if (zpool_skip_pool(cn->cn_name)) - continue; - - if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) { - hdl->libzfs_pool_iter--; - return (-1); - } - - if (zhp == NULL) - continue; - - if ((ret = func(zhp, data)) != 0) { - hdl->libzfs_pool_iter--; - return (ret); - } - } - hdl->libzfs_pool_iter--; - - return (0); -} - -/* - * Iterate over root datasets, calling the given function for each. The zfs - * handle passed each time must be explicitly closed by the callback. - */ -int -zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data) -{ - config_node_t *cn; - zfs_handle_t *zhp; - int ret; - - if (namespace_reload(hdl) != 0) - return (-1); - - for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; - cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { - - if (zpool_skip_pool(cn->cn_name)) - continue; - - if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL) - continue; - - if ((ret = func(zhp, data)) != 0) - return (ret); - } - - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c deleted file mode 100644 index 7075d060c78d..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c +++ /dev/null @@ -1,5284 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. - * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. - * Copyright (c) 2013 Martin Matuska. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2017 Nexenta Systems, Inc. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - * Copyright 2017-2018 RackTop Systems. - * Copyright (c) 2019 Datto Inc. - */ - -#include <ctype.h> -#include <errno.h> -#include <libintl.h> -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <stddef.h> -#include <zone.h> -#include <fcntl.h> -#include <sys/mntent.h> -#include <sys/mount.h> -#include <priv.h> -#include <pwd.h> -#include <grp.h> -#include <stddef.h> -#ifdef illumos -#include <idmap.h> -#endif - -#include <sys/dnode.h> -#include <sys/spa.h> -#include <sys/zap.h> -#include <sys/misc.h> -#include <libzfs.h> - -#include "zfs_namecheck.h" -#include "zfs_prop.h" -#include "libzfs_impl.h" -#include "zfs_deleg.h" - -static int userquota_propname_decode(const char *propname, boolean_t zoned, - zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); - -/* - * Given a single type (not a mask of types), return the type in a human - * readable form. - */ -const char * -zfs_type_to_name(zfs_type_t type) -{ - switch (type) { - case ZFS_TYPE_FILESYSTEM: - return (dgettext(TEXT_DOMAIN, "filesystem")); - case ZFS_TYPE_SNAPSHOT: - return (dgettext(TEXT_DOMAIN, "snapshot")); - case ZFS_TYPE_VOLUME: - return (dgettext(TEXT_DOMAIN, "volume")); - case ZFS_TYPE_POOL: - return (dgettext(TEXT_DOMAIN, "pool")); - case ZFS_TYPE_BOOKMARK: - return (dgettext(TEXT_DOMAIN, "bookmark")); - default: - assert(!"unhandled zfs_type_t"); - } - - return (NULL); -} - -/* - * Validate a ZFS path. This is used even before trying to open the dataset, to - * provide a more meaningful error message. We call zfs_error_aux() to - * explain exactly why the name was not valid. - */ -int -zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, - boolean_t modifying) -{ - namecheck_err_t why; - char what; - - if (entity_namecheck(path, &why, &what) != 0) { - if (hdl != NULL) { - switch (why) { - case NAME_ERR_TOOLONG: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "name is too long")); - break; - - case NAME_ERR_LEADING_SLASH: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "leading slash in name")); - break; - - case NAME_ERR_EMPTY_COMPONENT: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "empty component in name")); - break; - - case NAME_ERR_TRAILING_SLASH: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "trailing slash in name")); - break; - - case NAME_ERR_INVALCHAR: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "invalid character " - "'%c' in name"), what); - break; - - case NAME_ERR_MULTIPLE_DELIMITERS: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "multiple '@' and/or '#' delimiters in " - "name")); - break; - - case NAME_ERR_NOLETTER: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool doesn't begin with a letter")); - break; - - case NAME_ERR_RESERVED: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "name is reserved")); - break; - - case NAME_ERR_DISKLIKE: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "reserved disk name")); - break; - - default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "(%d) not defined"), why); - break; - } - } - - return (0); - } - - if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) { - if (hdl != NULL) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "snapshot delimiter '@' is not expected here")); - return (0); - } - - if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { - if (hdl != NULL) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '@' delimiter in snapshot name")); - return (0); - } - - if (!(type & ZFS_TYPE_BOOKMARK) && strchr(path, '#') != NULL) { - if (hdl != NULL) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "bookmark delimiter '#' is not expected here")); - return (0); - } - - if (type == ZFS_TYPE_BOOKMARK && strchr(path, '#') == NULL) { - if (hdl != NULL) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '#' delimiter in bookmark name")); - return (0); - } - - if (modifying && strchr(path, '%') != NULL) { - if (hdl != NULL) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid character %c in name"), '%'); - return (0); - } - - return (-1); -} - -int -zfs_name_valid(const char *name, zfs_type_t type) -{ - if (type == ZFS_TYPE_POOL) - return (zpool_name_valid(NULL, B_FALSE, name)); - return (zfs_validate_name(NULL, name, type, B_FALSE)); -} - -/* - * This function takes the raw DSL properties, and filters out the user-defined - * properties into a separate nvlist. - */ -static nvlist_t * -process_user_props(zfs_handle_t *zhp, nvlist_t *props) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - nvpair_t *elem; - nvlist_t *propval; - nvlist_t *nvl; - - if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { - (void) no_memory(hdl); - return (NULL); - } - - elem = NULL; - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { - if (!zfs_prop_user(nvpair_name(elem))) - continue; - - verify(nvpair_value_nvlist(elem, &propval) == 0); - if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) { - nvlist_free(nvl); - (void) no_memory(hdl); - return (NULL); - } - } - - return (nvl); -} - -static zpool_handle_t * -zpool_add_handle(zfs_handle_t *zhp, const char *pool_name) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - zpool_handle_t *zph; - - if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) { - if (hdl->libzfs_pool_handles != NULL) - zph->zpool_next = hdl->libzfs_pool_handles; - hdl->libzfs_pool_handles = zph; - } - return (zph); -} - -static zpool_handle_t * -zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - zpool_handle_t *zph = hdl->libzfs_pool_handles; - - while ((zph != NULL) && - (strncmp(pool_name, zpool_get_name(zph), len) != 0)) - zph = zph->zpool_next; - return (zph); -} - -/* - * Returns a handle to the pool that contains the provided dataset. - * If a handle to that pool already exists then that handle is returned. - * Otherwise, a new handle is created and added to the list of handles. - */ -static zpool_handle_t * -zpool_handle(zfs_handle_t *zhp) -{ - char *pool_name; - int len; - zpool_handle_t *zph; - - len = strcspn(zhp->zfs_name, "/@#") + 1; - pool_name = zfs_alloc(zhp->zfs_hdl, len); - (void) strlcpy(pool_name, zhp->zfs_name, len); - - zph = zpool_find_handle(zhp, pool_name, len); - if (zph == NULL) - zph = zpool_add_handle(zhp, pool_name); - - free(pool_name); - return (zph); -} - -void -zpool_free_handles(libzfs_handle_t *hdl) -{ - zpool_handle_t *next, *zph = hdl->libzfs_pool_handles; - - while (zph != NULL) { - next = zph->zpool_next; - zpool_close(zph); - zph = next; - } - hdl->libzfs_pool_handles = NULL; -} - -/* - * Utility function to gather stats (objset and zpl) for the given object. - */ -static int -get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - - (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); - - while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) { - if (errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, zc) != 0) { - return (-1); - } - } else { - return (-1); - } - } - return (0); -} - -/* - * Utility function to get the received properties of the given object. - */ -static int -get_recvd_props_ioctl(zfs_handle_t *zhp) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - nvlist_t *recvdprops; - zfs_cmd_t zc = { 0 }; - int err; - - if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) - return (-1); - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) { - if (errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - return (-1); - } - } else { - zcmd_free_nvlists(&zc); - return (-1); - } - } - - err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops); - zcmd_free_nvlists(&zc); - if (err != 0) - return (-1); - - nvlist_free(zhp->zfs_recvd_props); - zhp->zfs_recvd_props = recvdprops; - - return (0); -} - -static int -put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc) -{ - nvlist_t *allprops, *userprops; - - zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */ - - if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) { - return (-1); - } - - /* - * XXX Why do we store the user props separately, in addition to - * storing them in zfs_props? - */ - if ((userprops = process_user_props(zhp, allprops)) == NULL) { - nvlist_free(allprops); - return (-1); - } - - nvlist_free(zhp->zfs_props); - nvlist_free(zhp->zfs_user_props); - - zhp->zfs_props = allprops; - zhp->zfs_user_props = userprops; - - return (0); -} - -static int -get_stats(zfs_handle_t *zhp) -{ - int rc = 0; - zfs_cmd_t zc = { 0 }; - - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); - if (get_stats_ioctl(zhp, &zc) != 0) - rc = -1; - else if (put_stats_zhdl(zhp, &zc) != 0) - rc = -1; - zcmd_free_nvlists(&zc); - return (rc); -} - -/* - * Refresh the properties currently stored in the handle. - */ -void -zfs_refresh_properties(zfs_handle_t *zhp) -{ - (void) get_stats(zhp); -} - -/* - * Makes a handle from the given dataset name. Used by zfs_open() and - * zfs_iter_* to create child handles on the fly. - */ -static int -make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) -{ - if (put_stats_zhdl(zhp, zc) != 0) - return (-1); - - /* - * We've managed to open the dataset and gather statistics. Determine - * the high-level type. - */ - if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) - zhp->zfs_head_type = ZFS_TYPE_VOLUME; - else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) - zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM; - else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) - return (-1); - else - abort(); - - if (zhp->zfs_dmustats.dds_is_snapshot) - zhp->zfs_type = ZFS_TYPE_SNAPSHOT; - else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) - zhp->zfs_type = ZFS_TYPE_VOLUME; - else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) - zhp->zfs_type = ZFS_TYPE_FILESYSTEM; - else - abort(); /* we should never see any other types */ - - if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) - return (-1); - - return (0); -} - -zfs_handle_t * -make_dataset_handle(libzfs_handle_t *hdl, const char *path) -{ - zfs_cmd_t zc = { 0 }; - - zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); - - if (zhp == NULL) - return (NULL); - - zhp->zfs_hdl = hdl; - (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); - if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) { - free(zhp); - return (NULL); - } - if (get_stats_ioctl(zhp, &zc) == -1) { - zcmd_free_nvlists(&zc); - free(zhp); - return (NULL); - } - if (make_dataset_handle_common(zhp, &zc) == -1) { - free(zhp); - zhp = NULL; - } - zcmd_free_nvlists(&zc); - return (zhp); -} - -zfs_handle_t * -make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc) -{ - zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); - - if (zhp == NULL) - return (NULL); - - zhp->zfs_hdl = hdl; - (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); - if (make_dataset_handle_common(zhp, zc) == -1) { - free(zhp); - return (NULL); - } - return (zhp); -} - -zfs_handle_t * -make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc) -{ - zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); - - if (zhp == NULL) - return (NULL); - - zhp->zfs_hdl = pzhp->zfs_hdl; - (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); - zhp->zfs_head_type = pzhp->zfs_type; - zhp->zfs_type = ZFS_TYPE_SNAPSHOT; - zhp->zpool_hdl = zpool_handle(zhp); - return (zhp); -} - -zfs_handle_t * -zfs_handle_dup(zfs_handle_t *zhp_orig) -{ - zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); - - if (zhp == NULL) - return (NULL); - - zhp->zfs_hdl = zhp_orig->zfs_hdl; - zhp->zpool_hdl = zhp_orig->zpool_hdl; - (void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name, - sizeof (zhp->zfs_name)); - zhp->zfs_type = zhp_orig->zfs_type; - zhp->zfs_head_type = zhp_orig->zfs_head_type; - zhp->zfs_dmustats = zhp_orig->zfs_dmustats; - if (zhp_orig->zfs_props != NULL) { - if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) { - (void) no_memory(zhp->zfs_hdl); - zfs_close(zhp); - return (NULL); - } - } - if (zhp_orig->zfs_user_props != NULL) { - if (nvlist_dup(zhp_orig->zfs_user_props, - &zhp->zfs_user_props, 0) != 0) { - (void) no_memory(zhp->zfs_hdl); - zfs_close(zhp); - return (NULL); - } - } - if (zhp_orig->zfs_recvd_props != NULL) { - if (nvlist_dup(zhp_orig->zfs_recvd_props, - &zhp->zfs_recvd_props, 0)) { - (void) no_memory(zhp->zfs_hdl); - zfs_close(zhp); - return (NULL); - } - } - zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck; - if (zhp_orig->zfs_mntopts != NULL) { - zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl, - zhp_orig->zfs_mntopts); - } - zhp->zfs_props_table = zhp_orig->zfs_props_table; - return (zhp); -} - -boolean_t -zfs_bookmark_exists(const char *path) -{ - nvlist_t *bmarks; - nvlist_t *props; - char fsname[ZFS_MAX_DATASET_NAME_LEN]; - char *bmark_name; - char *pound; - int err; - boolean_t rv; - - - (void) strlcpy(fsname, path, sizeof (fsname)); - pound = strchr(fsname, '#'); - if (pound == NULL) - return (B_FALSE); - - *pound = '\0'; - bmark_name = pound + 1; - props = fnvlist_alloc(); - err = lzc_get_bookmarks(fsname, props, &bmarks); - nvlist_free(props); - if (err != 0) { - nvlist_free(bmarks); - return (B_FALSE); - } - - rv = nvlist_exists(bmarks, bmark_name); - nvlist_free(bmarks); - return (rv); -} - -zfs_handle_t * -make_bookmark_handle(zfs_handle_t *parent, const char *path, - nvlist_t *bmark_props) -{ - zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); - - if (zhp == NULL) - return (NULL); - - /* Fill in the name. */ - zhp->zfs_hdl = parent->zfs_hdl; - (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); - - /* Set the property lists. */ - if (nvlist_dup(bmark_props, &zhp->zfs_props, 0) != 0) { - free(zhp); - return (NULL); - } - - /* Set the types. */ - zhp->zfs_head_type = parent->zfs_head_type; - zhp->zfs_type = ZFS_TYPE_BOOKMARK; - - if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) { - nvlist_free(zhp->zfs_props); - free(zhp); - return (NULL); - } - - return (zhp); -} - -struct zfs_open_bookmarks_cb_data { - const char *path; - zfs_handle_t *zhp; -}; - -static int -zfs_open_bookmarks_cb(zfs_handle_t *zhp, void *data) -{ - struct zfs_open_bookmarks_cb_data *dp = data; - - /* - * Is it the one we are looking for? - */ - if (strcmp(dp->path, zfs_get_name(zhp)) == 0) { - /* - * We found it. Save it and let the caller know we are done. - */ - dp->zhp = zhp; - return (EEXIST); - } - - /* - * Not found. Close the handle and ask for another one. - */ - zfs_close(zhp); - return (0); -} - -/* - * Opens the given snapshot, bookmark, filesystem, or volume. The 'types' - * argument is a mask of acceptable types. The function will print an - * appropriate error message and return NULL if it can't be opened. - */ -zfs_handle_t * -zfs_open(libzfs_handle_t *hdl, const char *path, int types) -{ - zfs_handle_t *zhp; - char errbuf[1024]; - char *bookp; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); - - /* - * Validate the name before we even try to open it. - */ - if (!zfs_validate_name(hdl, path, types, B_FALSE)) { - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - return (NULL); - } - - /* - * Bookmarks needs to be handled separately. - */ - bookp = strchr(path, '#'); - if (bookp == NULL) { - /* - * Try to get stats for the dataset, which will tell us if it - * exists. - */ - errno = 0; - if ((zhp = make_dataset_handle(hdl, path)) == NULL) { - (void) zfs_standard_error(hdl, errno, errbuf); - return (NULL); - } - } else { - char dsname[ZFS_MAX_DATASET_NAME_LEN]; - zfs_handle_t *pzhp; - struct zfs_open_bookmarks_cb_data cb_data = {path, NULL}; - - /* - * We need to cut out '#' and everything after '#' - * to get the parent dataset name only. - */ - assert(bookp - path < sizeof (dsname)); - (void) strncpy(dsname, path, bookp - path); - dsname[bookp - path] = '\0'; - - /* - * Create handle for the parent dataset. - */ - errno = 0; - if ((pzhp = make_dataset_handle(hdl, dsname)) == NULL) { - (void) zfs_standard_error(hdl, errno, errbuf); - return (NULL); - } - - /* - * Iterate bookmarks to find the right one. - */ - errno = 0; - if ((zfs_iter_bookmarks(pzhp, zfs_open_bookmarks_cb, - &cb_data) == 0) && (cb_data.zhp == NULL)) { - (void) zfs_error(hdl, EZFS_NOENT, errbuf); - zfs_close(pzhp); - return (NULL); - } - if (cb_data.zhp == NULL) { - (void) zfs_standard_error(hdl, errno, errbuf); - zfs_close(pzhp); - return (NULL); - } - zhp = cb_data.zhp; - - /* - * Cleanup. - */ - zfs_close(pzhp); - } - - if (zhp == NULL) { - char *at = strchr(path, '@'); - - if (at != NULL) - *at = '\0'; - errno = 0; - if ((zhp = make_dataset_handle(hdl, path)) == NULL) { - (void) zfs_standard_error(hdl, errno, errbuf); - return (NULL); - } - if (at != NULL) - *at = '@'; - (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); - zhp->zfs_type = ZFS_TYPE_SNAPSHOT; - } - - if (!(types & zhp->zfs_type)) { - (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); - zfs_close(zhp); - return (NULL); - } - - return (zhp); -} - -/* - * Release a ZFS handle. Nothing to do but free the associated memory. - */ -void -zfs_close(zfs_handle_t *zhp) -{ - if (zhp->zfs_mntopts) - free(zhp->zfs_mntopts); - nvlist_free(zhp->zfs_props); - nvlist_free(zhp->zfs_user_props); - nvlist_free(zhp->zfs_recvd_props); - free(zhp); -} - -typedef struct mnttab_node { - struct mnttab mtn_mt; - avl_node_t mtn_node; -} mnttab_node_t; - -static int -libzfs_mnttab_cache_compare(const void *arg1, const void *arg2) -{ - const mnttab_node_t *mtn1 = (const mnttab_node_t *)arg1; - const mnttab_node_t *mtn2 = (const mnttab_node_t *)arg2; - int rv; - - rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special); - - return (AVL_ISIGN(rv)); -} - -void -libzfs_mnttab_init(libzfs_handle_t *hdl) -{ - pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL); - assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); - avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, - sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); -} - -void -libzfs_mnttab_update(libzfs_handle_t *hdl) -{ - struct mnttab entry; - - rewind(hdl->libzfs_mnttab); - while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { - mnttab_node_t *mtn; - - if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) - continue; - mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); - mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special); - mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp); - mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype); - mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts); - avl_add(&hdl->libzfs_mnttab_cache, mtn); - } -} - -void -libzfs_mnttab_fini(libzfs_handle_t *hdl) -{ - void *cookie = NULL; - mnttab_node_t *mtn; - - while ((mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie)) - != NULL) { - free(mtn->mtn_mt.mnt_special); - free(mtn->mtn_mt.mnt_mountp); - free(mtn->mtn_mt.mnt_fstype); - free(mtn->mtn_mt.mnt_mntopts); - free(mtn); - } - avl_destroy(&hdl->libzfs_mnttab_cache); - (void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock); -} - -void -libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable) -{ - hdl->libzfs_mnttab_enable = enable; -} - -int -libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, - struct mnttab *entry) -{ - mnttab_node_t find; - mnttab_node_t *mtn; - int ret = ENOENT; - - if (!hdl->libzfs_mnttab_enable) { - struct mnttab srch = { 0 }; - - if (avl_numnodes(&hdl->libzfs_mnttab_cache)) - libzfs_mnttab_fini(hdl); - rewind(hdl->libzfs_mnttab); - srch.mnt_special = (char *)fsname; - srch.mnt_fstype = MNTTYPE_ZFS; - if (getmntany(hdl->libzfs_mnttab, entry, &srch) == 0) - return (0); - else - return (ENOENT); - } - - pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); - if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) - libzfs_mnttab_update(hdl); - - find.mtn_mt.mnt_special = (char *)fsname; - mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); - if (mtn) { - *entry = mtn->mtn_mt; - ret = 0; - } - pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); - return (ret); -} - -void -libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, - const char *mountp, const char *mntopts) -{ - mnttab_node_t *mtn; - - pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); - if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) { - mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); - mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); - mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); - mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); - mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); - avl_add(&hdl->libzfs_mnttab_cache, mtn); - } - pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); -} - -void -libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) -{ - mnttab_node_t find; - mnttab_node_t *ret; - - pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); - find.mtn_mt.mnt_special = (char *)fsname; - if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) - != NULL) { - avl_remove(&hdl->libzfs_mnttab_cache, ret); - free(ret->mtn_mt.mnt_special); - free(ret->mtn_mt.mnt_mountp); - free(ret->mtn_mt.mnt_fstype); - free(ret->mtn_mt.mnt_mntopts); - free(ret); - } - pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); -} - -int -zfs_spa_version(zfs_handle_t *zhp, int *spa_version) -{ - zpool_handle_t *zpool_handle = zhp->zpool_hdl; - - if (zpool_handle == NULL) - return (-1); - - *spa_version = zpool_get_prop_int(zpool_handle, - ZPOOL_PROP_VERSION, NULL); - return (0); -} - -/* - * The choice of reservation property depends on the SPA version. - */ -static int -zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) -{ - int spa_version; - - if (zfs_spa_version(zhp, &spa_version) < 0) - return (-1); - - if (spa_version >= SPA_VERSION_REFRESERVATION) - *resv_prop = ZFS_PROP_REFRESERVATION; - else - *resv_prop = ZFS_PROP_RESERVATION; - - return (0); -} - -/* - * Given an nvlist of properties to set, validates that they are correct, and - * parses any numeric properties (index, boolean, etc) if they are specified as - * strings. - */ -nvlist_t * -zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, - uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl, - const char *errbuf) -{ - nvpair_t *elem; - uint64_t intval; - char *strval; - zfs_prop_t prop; - nvlist_t *ret; - int chosen_normal = -1; - int chosen_utf = -1; - - if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) { - (void) no_memory(hdl); - return (NULL); - } - - /* - * Make sure this property is valid and applies to this type. - */ - - elem = NULL; - while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { - const char *propname = nvpair_name(elem); - - prop = zfs_name_to_prop(propname); - if (prop == ZPROP_INVAL && zfs_prop_user(propname)) { - /* - * This is a user property: make sure it's a - * string, and that it's less than ZAP_MAXNAMELEN. - */ - if (nvpair_type(elem) != DATA_TYPE_STRING) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a string"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property name '%s' is too long"), - propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - (void) nvpair_value_string(elem, &strval); - if (nvlist_add_string(ret, propname, strval) != 0) { - (void) no_memory(hdl); - goto error; - } - continue; - } - - /* - * Currently, only user properties can be modified on - * snapshots. - */ - if (type == ZFS_TYPE_SNAPSHOT) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "this property can not be modified for snapshots")); - (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); - goto error; - } - - if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) { - zfs_userquota_prop_t uqtype; - char newpropname[128]; - char domain[128]; - uint64_t rid; - uint64_t valary[3]; - - if (userquota_propname_decode(propname, zoned, - &uqtype, domain, sizeof (domain), &rid) != 0) { - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, - "'%s' has an invalid user/group name"), - propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (uqtype != ZFS_PROP_USERQUOTA && - uqtype != ZFS_PROP_GROUPQUOTA) { - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "'%s' is readonly"), - propname); - (void) zfs_error(hdl, EZFS_PROPREADONLY, - errbuf); - goto error; - } - - if (nvpair_type(elem) == DATA_TYPE_STRING) { - (void) nvpair_value_string(elem, &strval); - if (strcmp(strval, "none") == 0) { - intval = 0; - } else if (zfs_nicestrtonum(hdl, - strval, &intval) != 0) { - (void) zfs_error(hdl, - EZFS_BADPROP, errbuf); - goto error; - } - } else if (nvpair_type(elem) == - DATA_TYPE_UINT64) { - (void) nvpair_value_uint64(elem, &intval); - if (intval == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "use 'none' to disable " - "userquota/groupquota")); - goto error; - } - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a number"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - /* - * Encode the prop name as - * userquota@<hex-rid>-domain, to make it easy - * for the kernel to decode. - */ - (void) snprintf(newpropname, sizeof (newpropname), - "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype], - (longlong_t)rid, domain); - valary[0] = uqtype; - valary[1] = rid; - valary[2] = intval; - if (nvlist_add_uint64_array(ret, newpropname, - valary, 3) != 0) { - (void) no_memory(hdl); - goto error; - } - continue; - } else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is readonly"), - propname); - (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); - goto error; - } - - if (prop == ZPROP_INVAL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid property '%s'"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (!zfs_prop_valid_for_type(prop, type)) { - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "'%s' does not " - "apply to datasets of this type"), propname); - (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); - goto error; - } - - if (zfs_prop_readonly(prop) && - (!zfs_prop_setonce(prop) || zhp != NULL)) { - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "'%s' is readonly"), - propname); - (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); - goto error; - } - - if (zprop_parse_value(hdl, elem, prop, type, ret, - &strval, &intval, errbuf) != 0) - goto error; - - /* - * Perform some additional checks for specific properties. - */ - switch (prop) { - case ZFS_PROP_VERSION: - { - int version; - - if (zhp == NULL) - break; - version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - if (intval < version) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Can not downgrade; already at version %u"), - version); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - } - - case ZFS_PROP_VOLBLOCKSIZE: - case ZFS_PROP_RECORDSIZE: - { - int maxbs = SPA_MAXBLOCKSIZE; - if (zpool_hdl != NULL) { - maxbs = zpool_get_prop_int(zpool_hdl, - ZPOOL_PROP_MAXBLOCKSIZE, NULL); - } - /* - * Volumes are limited to a volblocksize of 128KB, - * because they typically service workloads with - * small random writes, which incur a large performance - * penalty with large blocks. - */ - if (prop == ZFS_PROP_VOLBLOCKSIZE) - maxbs = SPA_OLD_MAXBLOCKSIZE; - /* - * The value must be a power of two between - * SPA_MINBLOCKSIZE and maxbs. - */ - if (intval < SPA_MINBLOCKSIZE || - intval > maxbs || !ISP2(intval)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be power of 2 from 512B " - "to %uKB"), propname, maxbs >> 10); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - } - - case ZFS_PROP_SPECIAL_SMALL_BLOCKS: - if (zpool_hdl != NULL) { - char state[64] = ""; - - /* - * Issue a warning but do not fail so that - * tests for setable properties succeed. - */ - if (zpool_prop_get_feature(zpool_hdl, - "feature@allocation_classes", state, - sizeof (state)) != 0 || - strcmp(state, ZFS_FEATURE_ACTIVE) != 0) { - (void) fprintf(stderr, gettext( - "%s: property requires a special " - "device in the pool\n"), propname); - } - } - if (intval != 0 && - (intval < SPA_MINBLOCKSIZE || - intval > SPA_OLD_MAXBLOCKSIZE || !ISP2(intval))) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid '%s=%d' property: must be zero or " - "a power of 2 from 512B to 128K"), propname, - intval); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - - case ZFS_PROP_MLSLABEL: - { -#ifdef illumos - /* - * Verify the mlslabel string and convert to - * internal hex label string. - */ - - m_label_t *new_sl; - char *hex = NULL; /* internal label string */ - - /* Default value is already OK. */ - if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) - break; - - /* Verify the label can be converted to binary form */ - if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) || - (str_to_label(strval, &new_sl, MAC_LABEL, - L_NO_CORRECTION, NULL) == -1)) { - goto badlabel; - } - - /* Now translate to hex internal label string */ - if (label_to_str(new_sl, &hex, M_INTERNAL, - DEF_NAMES) != 0) { - if (hex) - free(hex); - goto badlabel; - } - m_label_free(new_sl); - - /* If string is already in internal form, we're done. */ - if (strcmp(strval, hex) == 0) { - free(hex); - break; - } - - /* Replace the label string with the internal form. */ - (void) nvlist_remove(ret, zfs_prop_to_name(prop), - DATA_TYPE_STRING); - verify(nvlist_add_string(ret, zfs_prop_to_name(prop), - hex) == 0); - free(hex); - - break; - -badlabel: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid mlslabel '%s'"), strval); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - m_label_free(new_sl); /* OK if null */ -#else /* !illumos */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "mlslabel is not supported on FreeBSD")); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); -#endif /* illumos */ - goto error; - - } - - case ZFS_PROP_MOUNTPOINT: - { - namecheck_err_t why; - - if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 || - strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0) - break; - - if (mountpoint_namecheck(strval, &why)) { - switch (why) { - case NAME_ERR_LEADING_SLASH: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, - "'%s' must be an absolute path, " - "'none', or 'legacy'"), propname); - break; - case NAME_ERR_TOOLONG: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, - "component of '%s' is too long"), - propname); - break; - - default: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, - "(%d) not defined"), - why); - break; - } - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - } - - /*FALLTHRU*/ - - case ZFS_PROP_SHARESMB: - case ZFS_PROP_SHARENFS: - /* - * For the mountpoint and sharenfs or sharesmb - * properties, check if it can be set in a - * global/non-global zone based on - * the zoned property value: - * - * global zone non-global zone - * -------------------------------------------------- - * zoned=on mountpoint (no) mountpoint (yes) - * sharenfs (no) sharenfs (no) - * sharesmb (no) sharesmb (no) - * - * zoned=off mountpoint (yes) N/A - * sharenfs (yes) - * sharesmb (yes) - */ - if (zoned) { - if (getzoneid() == GLOBAL_ZONEID) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be set on " - "dataset in a non-global zone"), - propname); - (void) zfs_error(hdl, EZFS_ZONED, - errbuf); - goto error; - } else if (prop == ZFS_PROP_SHARENFS || - prop == ZFS_PROP_SHARESMB) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be set in " - "a non-global zone"), propname); - (void) zfs_error(hdl, EZFS_ZONED, - errbuf); - goto error; - } - } else if (getzoneid() != GLOBAL_ZONEID) { - /* - * If zoned property is 'off', this must be in - * a global zone. If not, something is wrong. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be set while dataset " - "'zoned' property is set"), propname); - (void) zfs_error(hdl, EZFS_ZONED, errbuf); - goto error; - } - - /* - * At this point, it is legitimate to set the - * property. Now we want to make sure that the - * property value is valid if it is sharenfs. - */ - if ((prop == ZFS_PROP_SHARENFS || - prop == ZFS_PROP_SHARESMB) && - strcmp(strval, "on") != 0 && - strcmp(strval, "off") != 0) { - zfs_share_proto_t proto; - - if (prop == ZFS_PROP_SHARESMB) - proto = PROTO_SMB; - else - proto = PROTO_NFS; - - /* - * Must be an valid sharing protocol - * option string so init the libshare - * in order to enable the parser and - * then parse the options. We use the - * control API since we don't care about - * the current configuration and don't - * want the overhead of loading it - * until we actually do something. - */ - - if (zfs_init_libshare(hdl, - SA_INIT_CONTROL_API) != SA_OK) { - /* - * An error occurred so we can't do - * anything - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be set: problem " - "in share initialization"), - propname); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - goto error; - } - - if (zfs_parse_options(strval, proto) != SA_OK) { - /* - * There was an error in parsing so - * deal with it by issuing an error - * message and leaving after - * uninitializing the the libshare - * interface. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be set to invalid " - "options"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - zfs_uninit_libshare(hdl); - goto error; - } - zfs_uninit_libshare(hdl); - } - - break; - - case ZFS_PROP_UTF8ONLY: - chosen_utf = (int)intval; - break; - - case ZFS_PROP_NORMALIZE: - chosen_normal = (int)intval; - break; - - default: - break; - } - - /* - * For changes to existing volumes, we have some additional - * checks to enforce. - */ - if (type == ZFS_TYPE_VOLUME && zhp != NULL) { - uint64_t volsize = zfs_prop_get_int(zhp, - ZFS_PROP_VOLSIZE); - uint64_t blocksize = zfs_prop_get_int(zhp, - ZFS_PROP_VOLBLOCKSIZE); - char buf[64]; - - switch (prop) { - case ZFS_PROP_RESERVATION: - if (intval > volsize) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is greater than current " - "volume size"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - goto error; - } - break; - - case ZFS_PROP_REFRESERVATION: - if (intval > volsize && intval != UINT64_MAX) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is greater than current " - "volume size"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - goto error; - } - break; - - case ZFS_PROP_VOLSIZE: - if (intval % blocksize != 0) { - zfs_nicenum(blocksize, buf, - sizeof (buf)); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a multiple of " - "volume block size (%s)"), - propname, buf); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - goto error; - } - - if (intval == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be zero"), - propname); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - goto error; - } - break; - - default: - break; - } - } - } - - /* - * If normalization was chosen, but no UTF8 choice was made, - * enforce rejection of non-UTF8 names. - * - * If normalization was chosen, but rejecting non-UTF8 names - * was explicitly not chosen, it is an error. - */ - if (chosen_normal > 0 && chosen_utf < 0) { - if (nvlist_add_uint64(ret, - zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) { - (void) no_memory(hdl); - goto error; - } - } else if (chosen_normal > 0 && chosen_utf == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be set 'on' if normalization chosen"), - zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - return (ret); - -error: - nvlist_free(ret); - return (NULL); -} - -int -zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) -{ - uint64_t old_volsize; - uint64_t new_volsize; - uint64_t old_reservation; - uint64_t new_reservation; - zfs_prop_t resv_prop; - nvlist_t *props; - - /* - * If this is an existing volume, and someone is setting the volsize, - * make sure that it matches the reservation, or add it if necessary. - */ - old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); - if (zfs_which_resv_prop(zhp, &resv_prop) < 0) - return (-1); - old_reservation = zfs_prop_get_int(zhp, resv_prop); - - props = fnvlist_alloc(); - fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE)); - - if ((zvol_volsize_to_reservation(old_volsize, props) != - old_reservation) || nvlist_exists(nvl, - zfs_prop_to_name(resv_prop))) { - fnvlist_free(props); - return (0); - } - if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), - &new_volsize) != 0) { - fnvlist_free(props); - return (-1); - } - new_reservation = zvol_volsize_to_reservation(new_volsize, props); - fnvlist_free(props); - - if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop), - new_reservation) != 0) { - (void) no_memory(zhp->zfs_hdl); - return (-1); - } - return (1); -} - -/* - * Helper for 'zfs {set|clone} refreservation=auto'. Must be called after - * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinal value. - * Return codes must match zfs_add_synthetic_resv(). - */ -static int -zfs_fix_auto_resv(zfs_handle_t *zhp, nvlist_t *nvl) -{ - uint64_t volsize; - uint64_t resvsize; - zfs_prop_t prop; - nvlist_t *props; - - if (!ZFS_IS_VOLUME(zhp)) { - return (0); - } - - if (zfs_which_resv_prop(zhp, &prop) != 0) { - return (-1); - } - - if (prop != ZFS_PROP_REFRESERVATION) { - return (0); - } - - if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(prop), &resvsize) != 0) { - /* No value being set, so it can't be "auto" */ - return (0); - } - if (resvsize != UINT64_MAX) { - /* Being set to a value other than "auto" */ - return (0); - } - - props = fnvlist_alloc(); - - fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE)); - - if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), - &volsize) != 0) { - volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); - } - - resvsize = zvol_volsize_to_reservation(volsize, props); - fnvlist_free(props); - - (void) nvlist_remove_all(nvl, zfs_prop_to_name(prop)); - if (nvlist_add_uint64(nvl, zfs_prop_to_name(prop), resvsize) != 0) { - (void) no_memory(zhp->zfs_hdl); - return (-1); - } - return (1); -} - -void -zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, - char *errbuf) -{ - switch (err) { - - case ENOSPC: - /* - * For quotas and reservations, ENOSPC indicates - * something different; setting a quota or reservation - * doesn't use any disk space. - */ - switch (prop) { - case ZFS_PROP_QUOTA: - case ZFS_PROP_REFQUOTA: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "size is less than current used or " - "reserved space")); - (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); - break; - - case ZFS_PROP_RESERVATION: - case ZFS_PROP_REFRESERVATION: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "size is greater than available space")); - (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); - break; - - default: - (void) zfs_standard_error(hdl, err, errbuf); - break; - } - break; - - case EBUSY: - (void) zfs_standard_error(hdl, EBUSY, errbuf); - break; - - case EROFS: - (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); - break; - - case E2BIG: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property value too long")); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - break; - - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool and or dataset must be upgraded to set this " - "property or value")); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - - case ERANGE: - case EDOM: - if (prop == ZFS_PROP_COMPRESSION || - prop == ZFS_PROP_RECORDSIZE) { - (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property setting is not allowed on " - "bootable datasets")); - (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); - } else if (prop == ZFS_PROP_CHECKSUM || - prop == ZFS_PROP_DEDUP) { - (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property setting is not allowed on " - "root pools")); - (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); - } else { - (void) zfs_standard_error(hdl, err, errbuf); - } - break; - - case EINVAL: - if (prop == ZPROP_INVAL) { - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - } else { - (void) zfs_standard_error(hdl, err, errbuf); - } - break; - - case EOVERFLOW: - /* - * This platform can't address a volume this big. - */ -#ifdef _ILP32 - if (prop == ZFS_PROP_VOLSIZE) { - (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); - break; - } -#endif - /* FALLTHROUGH */ - default: - (void) zfs_standard_error(hdl, err, errbuf); - } -} - -/* - * Given a property name and value, set the property for the given dataset. - */ -int -zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) -{ - int ret = -1; - char errbuf[1024]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - nvlist_t *nvl = NULL; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), - zhp->zfs_name); - - if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 || - nvlist_add_string(nvl, propname, propval) != 0) { - (void) no_memory(hdl); - goto error; - } - - ret = zfs_prop_set_list(zhp, nvl); - -error: - nvlist_free(nvl); - return (ret); -} - - - -/* - * Given an nvlist of property names and values, set the properties for the - * given dataset. - */ -int -zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props) -{ - zfs_cmd_t zc = { 0 }; - int ret = -1; - prop_changelist_t **cls = NULL; - int cl_idx; - char errbuf[1024]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - nvlist_t *nvl; - int nvl_len; - int added_resv = 0; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), - zhp->zfs_name); - - if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props, - zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl, - errbuf)) == NULL) - goto error; - - /* - * We have to check for any extra properties which need to be added - * before computing the length of the nvlist. - */ - for (nvpair_t *elem = nvlist_next_nvpair(nvl, NULL); - elem != NULL; - elem = nvlist_next_nvpair(nvl, elem)) { - if (zfs_name_to_prop(nvpair_name(elem)) == ZFS_PROP_VOLSIZE && - (added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1) { - goto error; - } - } - - if (added_resv != 1 && - (added_resv = zfs_fix_auto_resv(zhp, nvl)) == -1) { - goto error; - } - - /* - * Check how many properties we're setting and allocate an array to - * store changelist pointers for postfix(). - */ - nvl_len = 0; - for (nvpair_t *elem = nvlist_next_nvpair(nvl, NULL); - elem != NULL; - elem = nvlist_next_nvpair(nvl, elem)) - nvl_len++; - if ((cls = calloc(nvl_len, sizeof (prop_changelist_t *))) == NULL) - goto error; - - cl_idx = 0; - for (nvpair_t *elem = nvlist_next_nvpair(nvl, NULL); - elem != NULL; - elem = nvlist_next_nvpair(nvl, elem)) { - - zfs_prop_t prop = zfs_name_to_prop(nvpair_name(elem)); - - assert(cl_idx < nvl_len); - /* - * We don't want to unmount & remount the dataset when changing - * its canmount property to 'on' or 'noauto'. We only use - * the changelist logic to unmount when setting canmount=off. - */ - if (prop != ZFS_PROP_CANMOUNT || - (fnvpair_value_uint64(elem) == ZFS_CANMOUNT_OFF && - zfs_is_mounted(zhp, NULL))) { - cls[cl_idx] = changelist_gather(zhp, prop, 0, 0); - if (cls[cl_idx] == NULL) - goto error; - } - - if (prop == ZFS_PROP_MOUNTPOINT && - changelist_haszonedchild(cls[cl_idx])) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "child dataset with inherited mountpoint is used " - "in a non-global zone")); - ret = zfs_error(hdl, EZFS_ZONED, errbuf); - goto error; - } - - /* We don't support those properties on FreeBSD. */ - switch (prop) { - case ZFS_PROP_DEVICES: - case ZFS_PROP_ISCSIOPTIONS: - case ZFS_PROP_XATTR: - case ZFS_PROP_VSCAN: - case ZFS_PROP_NBMAND: - case ZFS_PROP_MLSLABEL: - (void) snprintf(errbuf, sizeof (errbuf), - "property '%s' not supported on FreeBSD", - nvpair_name(elem)); - ret = zfs_error(hdl, EZFS_PERM, errbuf); - goto error; - } - - if (cls[cl_idx] != NULL && - (ret = changelist_prefix(cls[cl_idx])) != 0) - goto error; - - cl_idx++; - } - assert(cl_idx == nvl_len); - - /* - * Execute the corresponding ioctl() to set this list of properties. - */ - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - if ((ret = zcmd_write_src_nvlist(hdl, &zc, nvl)) != 0 || - (ret = zcmd_alloc_dst_nvlist(hdl, &zc, 0)) != 0) - goto error; - - ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); - - if (ret != 0) { - if (zc.zc_nvlist_dst_filled == B_FALSE) { - (void) zfs_standard_error(hdl, errno, errbuf); - goto error; - } - - /* Get the list of unset properties back and report them. */ - nvlist_t *errorprops = NULL; - if (zcmd_read_dst_nvlist(hdl, &zc, &errorprops) != 0) - goto error; - for (nvpair_t *elem = nvlist_next_nvpair(errorprops, NULL); - elem != NULL; - elem = nvlist_next_nvpair(errorprops, elem)) { - zfs_prop_t prop = zfs_name_to_prop(nvpair_name(elem)); - zfs_setprop_error(hdl, prop, errno, errbuf); - } - nvlist_free(errorprops); - - if (added_resv && errno == ENOSPC) { - /* clean up the volsize property we tried to set */ - uint64_t old_volsize = zfs_prop_get_int(zhp, - ZFS_PROP_VOLSIZE); - nvlist_free(nvl); - nvl = NULL; - zcmd_free_nvlists(&zc); - - if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) - goto error; - if (nvlist_add_uint64(nvl, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), - old_volsize) != 0) - goto error; - if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0) - goto error; - (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); - } - } else { - for (cl_idx = 0; cl_idx < nvl_len; cl_idx++) { - if (cls[cl_idx] != NULL) { - int clp_err = changelist_postfix(cls[cl_idx]); - if (clp_err != 0) - ret = clp_err; - } - } - - /* - * Refresh the statistics so the new property value - * is reflected. - */ - if (ret == 0) - (void) get_stats(zhp); - } - -error: - nvlist_free(nvl); - zcmd_free_nvlists(&zc); - if (cls != NULL) { - for (cl_idx = 0; cl_idx < nvl_len; cl_idx++) { - if (cls[cl_idx] != NULL) - changelist_free(cls[cl_idx]); - } - free(cls); - } - return (ret); -} - -/* - * Given a property, inherit the value from the parent dataset, or if received - * is TRUE, revert to the received value, if any. - */ -int -zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) -{ - zfs_cmd_t zc = { 0 }; - int ret; - prop_changelist_t *cl; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - zfs_prop_t prop; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot inherit %s for '%s'"), propname, zhp->zfs_name); - - zc.zc_cookie = received; - if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) { - /* - * For user properties, the amount of work we have to do is very - * small, so just do it here. - */ - if (!zfs_prop_user(propname)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid property")); - return (zfs_error(hdl, EZFS_BADPROP, errbuf)); - } - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); - - if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0) - return (zfs_standard_error(hdl, errno, errbuf)); - - return (0); - } - - /* - * Verify that this property is inheritable. - */ - if (zfs_prop_readonly(prop)) - return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf)); - - if (!zfs_prop_inheritable(prop) && !received) - return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf)); - - /* - * Check to see if the value applies to this type - */ - if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) - return (zfs_error(hdl, EZFS_PROPTYPE, errbuf)); - - /* - * Normalize the name, to get rid of shorthand abbreviations. - */ - propname = zfs_prop_to_name(prop); - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); - - if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID && - zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset is used in a non-global zone")); - return (zfs_error(hdl, EZFS_ZONED, errbuf)); - } - - /* - * Determine datasets which will be affected by this change, if any. - */ - if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL) - return (-1); - - if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "child dataset with inherited mountpoint is used " - "in a non-global zone")); - ret = zfs_error(hdl, EZFS_ZONED, errbuf); - goto error; - } - - if ((ret = changelist_prefix(cl)) != 0) - goto error; - - if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) { - return (zfs_standard_error(hdl, errno, errbuf)); - } else { - - if ((ret = changelist_postfix(cl)) != 0) - goto error; - - /* - * Refresh the statistics so the new property is reflected. - */ - (void) get_stats(zhp); - } - -error: - changelist_free(cl); - return (ret); -} - -/* - * True DSL properties are stored in an nvlist. The following two functions - * extract them appropriately. - */ -static uint64_t -getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source) -{ - nvlist_t *nv; - uint64_t value; - - *source = NULL; - if (nvlist_lookup_nvlist(zhp->zfs_props, - zfs_prop_to_name(prop), &nv) == 0) { - verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0); - (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); - } else { - verify(!zhp->zfs_props_table || - zhp->zfs_props_table[prop] == B_TRUE); - value = zfs_prop_default_numeric(prop); - *source = ""; - } - - return (value); -} - -static const char * -getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) -{ - nvlist_t *nv; - const char *value; - - *source = NULL; - if (nvlist_lookup_nvlist(zhp->zfs_props, - zfs_prop_to_name(prop), &nv) == 0) { - value = fnvlist_lookup_string(nv, ZPROP_VALUE); - (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); - } else { - verify(!zhp->zfs_props_table || - zhp->zfs_props_table[prop] == B_TRUE); - value = zfs_prop_default_string(prop); - *source = ""; - } - - return (value); -} - -static boolean_t -zfs_is_recvd_props_mode(zfs_handle_t *zhp) -{ - return (zhp->zfs_props == zhp->zfs_recvd_props); -} - -static void -zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) -{ - *cookie = (uint64_t)(uintptr_t)zhp->zfs_props; - zhp->zfs_props = zhp->zfs_recvd_props; -} - -static void -zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) -{ - zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie; - *cookie = 0; -} - -/* - * Internal function for getting a numeric property. Both zfs_prop_get() and - * zfs_prop_get_int() are built using this interface. - * - * Certain properties can be overridden using 'mount -o'. In this case, scan - * the contents of the /etc/mnttab entry, searching for the appropriate options. - * If they differ from the on-disk values, report the current values and mark - * the source "temporary". - */ -static int -get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, - char **source, uint64_t *val) -{ - zfs_cmd_t zc = { 0 }; - nvlist_t *zplprops = NULL; - struct mnttab mnt; - char *mntopt_on = NULL; - char *mntopt_off = NULL; - boolean_t received = zfs_is_recvd_props_mode(zhp); - - *source = NULL; - - switch (prop) { - case ZFS_PROP_ATIME: - mntopt_on = MNTOPT_ATIME; - mntopt_off = MNTOPT_NOATIME; - break; - - case ZFS_PROP_DEVICES: - mntopt_on = MNTOPT_DEVICES; - mntopt_off = MNTOPT_NODEVICES; - break; - - case ZFS_PROP_EXEC: - mntopt_on = MNTOPT_EXEC; - mntopt_off = MNTOPT_NOEXEC; - break; - - case ZFS_PROP_READONLY: - mntopt_on = MNTOPT_RO; - mntopt_off = MNTOPT_RW; - break; - - case ZFS_PROP_SETUID: - mntopt_on = MNTOPT_SETUID; - mntopt_off = MNTOPT_NOSETUID; - break; - - case ZFS_PROP_XATTR: - mntopt_on = MNTOPT_XATTR; - mntopt_off = MNTOPT_NOXATTR; - break; - - case ZFS_PROP_NBMAND: - mntopt_on = MNTOPT_NBMAND; - mntopt_off = MNTOPT_NONBMAND; - break; - - default: - break; - } - - /* - * Because looking up the mount options is potentially expensive - * (iterating over all of /etc/mnttab), we defer its calculation until - * we're looking up a property which requires its presence. - */ - if (!zhp->zfs_mntcheck && - (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) { - libzfs_handle_t *hdl = zhp->zfs_hdl; - struct mnttab entry; - - if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0) { - zhp->zfs_mntopts = zfs_strdup(hdl, - entry.mnt_mntopts); - if (zhp->zfs_mntopts == NULL) - return (-1); - } - - zhp->zfs_mntcheck = B_TRUE; - } - - if (zhp->zfs_mntopts == NULL) - mnt.mnt_mntopts = ""; - else - mnt.mnt_mntopts = zhp->zfs_mntopts; - - switch (prop) { - case ZFS_PROP_ATIME: - case ZFS_PROP_DEVICES: - case ZFS_PROP_EXEC: - case ZFS_PROP_READONLY: - case ZFS_PROP_SETUID: - case ZFS_PROP_XATTR: - case ZFS_PROP_NBMAND: - *val = getprop_uint64(zhp, prop, source); - - if (received) - break; - - if (hasmntopt(&mnt, mntopt_on) && !*val) { - *val = B_TRUE; - if (src) - *src = ZPROP_SRC_TEMPORARY; - } else if (hasmntopt(&mnt, mntopt_off) && *val) { - *val = B_FALSE; - if (src) - *src = ZPROP_SRC_TEMPORARY; - } - break; - - case ZFS_PROP_CANMOUNT: - case ZFS_PROP_VOLSIZE: - case ZFS_PROP_QUOTA: - case ZFS_PROP_REFQUOTA: - case ZFS_PROP_RESERVATION: - case ZFS_PROP_REFRESERVATION: - case ZFS_PROP_FILESYSTEM_LIMIT: - case ZFS_PROP_SNAPSHOT_LIMIT: - case ZFS_PROP_FILESYSTEM_COUNT: - case ZFS_PROP_SNAPSHOT_COUNT: - *val = getprop_uint64(zhp, prop, source); - - if (*source == NULL) { - /* not default, must be local */ - *source = zhp->zfs_name; - } - break; - - case ZFS_PROP_MOUNTED: - *val = (zhp->zfs_mntopts != NULL); - break; - - case ZFS_PROP_NUMCLONES: - *val = zhp->zfs_dmustats.dds_num_clones; - break; - - case ZFS_PROP_VERSION: - case ZFS_PROP_NORMALIZE: - case ZFS_PROP_UTF8ONLY: - case ZFS_PROP_CASE: - if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) || - zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) { - zcmd_free_nvlists(&zc); - return (-1); - } - if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 || - nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop), - val) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - nvlist_free(zplprops); - zcmd_free_nvlists(&zc); - break; - - case ZFS_PROP_INCONSISTENT: - *val = zhp->zfs_dmustats.dds_inconsistent; - break; - - default: - switch (zfs_prop_get_type(prop)) { - case PROP_TYPE_NUMBER: - case PROP_TYPE_INDEX: - *val = getprop_uint64(zhp, prop, source); - /* - * If we tried to use a default value for a - * readonly property, it means that it was not - * present. Note this only applies to "truly" - * readonly properties, not set-once properties - * like volblocksize. - */ - if (zfs_prop_readonly(prop) && - !zfs_prop_setonce(prop) && - *source != NULL && (*source)[0] == '\0') { - *source = NULL; - return (-1); - } - break; - - case PROP_TYPE_STRING: - default: - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "cannot get non-numeric property")); - return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP, - dgettext(TEXT_DOMAIN, "internal error"))); - } - } - - return (0); -} - -/* - * Calculate the source type, given the raw source string. - */ -static void -get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source, - char *statbuf, size_t statlen) -{ - if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY) - return; - - if (source == NULL) { - *srctype = ZPROP_SRC_NONE; - } else if (source[0] == '\0') { - *srctype = ZPROP_SRC_DEFAULT; - } else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) { - *srctype = ZPROP_SRC_RECEIVED; - } else { - if (strcmp(source, zhp->zfs_name) == 0) { - *srctype = ZPROP_SRC_LOCAL; - } else { - (void) strlcpy(statbuf, source, statlen); - *srctype = ZPROP_SRC_INHERITED; - } - } - -} - -int -zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf, - size_t proplen, boolean_t literal) -{ - zfs_prop_t prop; - int err = 0; - - if (zhp->zfs_recvd_props == NULL) - if (get_recvd_props_ioctl(zhp) != 0) - return (-1); - - prop = zfs_name_to_prop(propname); - - if (prop != ZPROP_INVAL) { - uint64_t cookie; - if (!nvlist_exists(zhp->zfs_recvd_props, propname)) - return (-1); - zfs_set_recvd_props_mode(zhp, &cookie); - err = zfs_prop_get(zhp, prop, propbuf, proplen, - NULL, NULL, 0, literal); - zfs_unset_recvd_props_mode(zhp, &cookie); - } else { - nvlist_t *propval; - char *recvdval; - if (nvlist_lookup_nvlist(zhp->zfs_recvd_props, - propname, &propval) != 0) - return (-1); - verify(nvlist_lookup_string(propval, ZPROP_VALUE, - &recvdval) == 0); - (void) strlcpy(propbuf, recvdval, proplen); - } - - return (err == 0 ? 0 : -1); -} - -static int -get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) -{ - nvlist_t *value; - nvpair_t *pair; - - value = zfs_get_clones_nvl(zhp); - if (value == NULL) - return (-1); - - propbuf[0] = '\0'; - for (pair = nvlist_next_nvpair(value, NULL); pair != NULL; - pair = nvlist_next_nvpair(value, pair)) { - if (propbuf[0] != '\0') - (void) strlcat(propbuf, ",", proplen); - (void) strlcat(propbuf, nvpair_name(pair), proplen); - } - - return (0); -} - -struct get_clones_arg { - uint64_t numclones; - nvlist_t *value; - const char *origin; - char buf[ZFS_MAX_DATASET_NAME_LEN]; -}; - -int -get_clones_cb(zfs_handle_t *zhp, void *arg) -{ - struct get_clones_arg *gca = arg; - - if (gca->numclones == 0) { - zfs_close(zhp); - return (0); - } - - if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf), - NULL, NULL, 0, B_TRUE) != 0) - goto out; - if (strcmp(gca->buf, gca->origin) == 0) { - fnvlist_add_boolean(gca->value, zfs_get_name(zhp)); - gca->numclones--; - } - -out: - (void) zfs_iter_children(zhp, get_clones_cb, gca); - zfs_close(zhp); - return (0); -} - -nvlist_t * -zfs_get_clones_nvl(zfs_handle_t *zhp) -{ - nvlist_t *nv, *value; - - if (nvlist_lookup_nvlist(zhp->zfs_props, - zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) { - struct get_clones_arg gca; - - /* - * if this is a snapshot, then the kernel wasn't able - * to get the clones. Do it by slowly iterating. - */ - if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) - return (NULL); - if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0) - return (NULL); - if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) { - nvlist_free(nv); - return (NULL); - } - - gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES); - gca.value = value; - gca.origin = zhp->zfs_name; - - if (gca.numclones != 0) { - zfs_handle_t *root; - char pool[ZFS_MAX_DATASET_NAME_LEN]; - char *cp = pool; - - /* get the pool name */ - (void) strlcpy(pool, zhp->zfs_name, sizeof (pool)); - (void) strsep(&cp, "/@"); - root = zfs_open(zhp->zfs_hdl, pool, - ZFS_TYPE_FILESYSTEM); - - (void) get_clones_cb(root, &gca); - } - - if (gca.numclones != 0 || - nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 || - nvlist_add_nvlist(zhp->zfs_props, - zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) { - nvlist_free(nv); - nvlist_free(value); - return (NULL); - } - nvlist_free(nv); - nvlist_free(value); - verify(0 == nvlist_lookup_nvlist(zhp->zfs_props, - zfs_prop_to_name(ZFS_PROP_CLONES), &nv)); - } - - verify(nvlist_lookup_nvlist(nv, ZPROP_VALUE, &value) == 0); - - return (value); -} - -/* - * Accepts a property and value and checks that the value - * matches the one found by the channel program. If they are - * not equal, print both of them. - */ -void -zcp_check(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t intval, - const char *strval) -{ - if (!zhp->zfs_hdl->libzfs_prop_debug) - return; - int error; - char *poolname = zhp->zpool_hdl->zpool_name; - const char *program = - "args = ...\n" - "ds = args['dataset']\n" - "prop = args['property']\n" - "value, setpoint = zfs.get_prop(ds, prop)\n" - "return {value=value, setpoint=setpoint}\n"; - nvlist_t *outnvl; - nvlist_t *retnvl; - nvlist_t *argnvl = fnvlist_alloc(); - - fnvlist_add_string(argnvl, "dataset", zhp->zfs_name); - fnvlist_add_string(argnvl, "property", zfs_prop_to_name(prop)); - - error = lzc_channel_program_nosync(poolname, program, - 10 * 1000 * 1000, 10 * 1024 * 1024, argnvl, &outnvl); - - if (error == 0) { - retnvl = fnvlist_lookup_nvlist(outnvl, "return"); - if (zfs_prop_get_type(prop) == PROP_TYPE_NUMBER) { - int64_t ans; - error = nvlist_lookup_int64(retnvl, "value", &ans); - if (error != 0) { - (void) fprintf(stderr, "zcp check error: %u\n", - error); - return; - } - if (ans != intval) { - (void) fprintf(stderr, - "%s: zfs found %lld, but zcp found %lld\n", - zfs_prop_to_name(prop), - (longlong_t)intval, (longlong_t)ans); - } - } else { - char *str_ans; - error = nvlist_lookup_string(retnvl, "value", &str_ans); - if (error != 0) { - (void) fprintf(stderr, "zcp check error: %u\n", - error); - return; - } - if (strcmp(strval, str_ans) != 0) { - (void) fprintf(stderr, - "%s: zfs found %s, but zcp found %s\n", - zfs_prop_to_name(prop), - strval, str_ans); - } - } - } else { - (void) fprintf(stderr, - "zcp check failed, channel program error: %u\n", error); - } - nvlist_free(argnvl); - nvlist_free(outnvl); -} - -/* - * Retrieve a property from the given object. If 'literal' is specified, then - * numbers are left as exact values. Otherwise, numbers are converted to a - * human-readable form. - * - * Returns 0 on success, or -1 on error. - */ -int -zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, - zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal) -{ - char *source = NULL; - uint64_t val; - const char *str; - const char *strval; - boolean_t received = zfs_is_recvd_props_mode(zhp); - - /* - * Check to see if this property applies to our object - */ - if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) - return (-1); - - if (received && zfs_prop_readonly(prop)) - return (-1); - - if (src) - *src = ZPROP_SRC_NONE; - - switch (prop) { - case ZFS_PROP_CREATION: - /* - * 'creation' is a time_t stored in the statistics. We convert - * this into a string unless 'literal' is specified. - */ - { - val = getprop_uint64(zhp, prop, &source); - time_t time = (time_t)val; - struct tm t; - - if (literal || - localtime_r(&time, &t) == NULL || - strftime(propbuf, proplen, "%a %b %e %k:%M %Y", - &t) == 0) - (void) snprintf(propbuf, proplen, "%llu", val); - } - zcp_check(zhp, prop, val, NULL); - break; - - case ZFS_PROP_MOUNTPOINT: - /* - * Getting the precise mountpoint can be tricky. - * - * - for 'none' or 'legacy', return those values. - * - for inherited mountpoints, we want to take everything - * after our ancestor and append it to the inherited value. - * - * If the pool has an alternate root, we want to prepend that - * root to any values we return. - */ - - str = getprop_string(zhp, prop, &source); - - if (str[0] == '/') { - char buf[MAXPATHLEN]; - char *root = buf; - const char *relpath; - - /* - * If we inherit the mountpoint, even from a dataset - * with a received value, the source will be the path of - * the dataset we inherit from. If source is - * ZPROP_SOURCE_VAL_RECVD, the received value is not - * inherited. - */ - if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { - relpath = ""; - } else { - relpath = zhp->zfs_name + strlen(source); - if (relpath[0] == '/') - relpath++; - } - - if ((zpool_get_prop(zhp->zpool_hdl, - ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL, - B_FALSE)) || (strcmp(root, "-") == 0)) - root[0] = '\0'; - /* - * Special case an alternate root of '/'. This will - * avoid having multiple leading slashes in the - * mountpoint path. - */ - if (strcmp(root, "/") == 0) - root++; - - /* - * If the mountpoint is '/' then skip over this - * if we are obtaining either an alternate root or - * an inherited mountpoint. - */ - if (str[1] == '\0' && (root[0] != '\0' || - relpath[0] != '\0')) - str++; - - if (relpath[0] == '\0') - (void) snprintf(propbuf, proplen, "%s%s", - root, str); - else - (void) snprintf(propbuf, proplen, "%s%s%s%s", - root, str, relpath[0] == '@' ? "" : "/", - relpath); - } else { - /* 'legacy' or 'none' */ - (void) strlcpy(propbuf, str, proplen); - } - zcp_check(zhp, prop, NULL, propbuf); - break; - - case ZFS_PROP_ORIGIN: - str = getprop_string(zhp, prop, &source); - if (str == NULL) - return (-1); - (void) strlcpy(propbuf, str, proplen); - zcp_check(zhp, prop, NULL, str); - break; - - case ZFS_PROP_CLONES: - if (get_clones_string(zhp, propbuf, proplen) != 0) - return (-1); - break; - - case ZFS_PROP_QUOTA: - case ZFS_PROP_REFQUOTA: - case ZFS_PROP_RESERVATION: - case ZFS_PROP_REFRESERVATION: - - if (get_numeric_property(zhp, prop, src, &source, &val) != 0) - return (-1); - /* - * If quota or reservation is 0, we translate this into 'none' - * (unless literal is set), and indicate that it's the default - * value. Otherwise, we print the number nicely and indicate - * that its set locally. - */ - if (val == 0) { - if (literal) - (void) strlcpy(propbuf, "0", proplen); - else - (void) strlcpy(propbuf, "none", proplen); - } else { - if (literal) - (void) snprintf(propbuf, proplen, "%llu", - (u_longlong_t)val); - else - zfs_nicenum(val, propbuf, proplen); - } - zcp_check(zhp, prop, val, NULL); - break; - - case ZFS_PROP_FILESYSTEM_LIMIT: - case ZFS_PROP_SNAPSHOT_LIMIT: - case ZFS_PROP_FILESYSTEM_COUNT: - case ZFS_PROP_SNAPSHOT_COUNT: - - if (get_numeric_property(zhp, prop, src, &source, &val) != 0) - return (-1); - - /* - * If limit is UINT64_MAX, we translate this into 'none' (unless - * literal is set), and indicate that it's the default value. - * Otherwise, we print the number nicely and indicate that it's - * set locally. - */ - if (literal) { - (void) snprintf(propbuf, proplen, "%llu", - (u_longlong_t)val); - } else if (val == UINT64_MAX) { - (void) strlcpy(propbuf, "none", proplen); - } else { - zfs_nicenum(val, propbuf, proplen); - } - - zcp_check(zhp, prop, val, NULL); - break; - - case ZFS_PROP_REFRATIO: - case ZFS_PROP_COMPRESSRATIO: - if (get_numeric_property(zhp, prop, src, &source, &val) != 0) - return (-1); - (void) snprintf(propbuf, proplen, "%llu.%02llux", - (u_longlong_t)(val / 100), - (u_longlong_t)(val % 100)); - zcp_check(zhp, prop, val, NULL); - break; - - case ZFS_PROP_TYPE: - switch (zhp->zfs_type) { - case ZFS_TYPE_FILESYSTEM: - str = "filesystem"; - break; - case ZFS_TYPE_VOLUME: - str = "volume"; - break; - case ZFS_TYPE_SNAPSHOT: - str = "snapshot"; - break; - case ZFS_TYPE_BOOKMARK: - str = "bookmark"; - break; - default: - abort(); - } - (void) snprintf(propbuf, proplen, "%s", str); - zcp_check(zhp, prop, NULL, propbuf); - break; - - case ZFS_PROP_MOUNTED: - /* - * The 'mounted' property is a pseudo-property that described - * whether the filesystem is currently mounted. Even though - * it's a boolean value, the typical values of "on" and "off" - * don't make sense, so we translate to "yes" and "no". - */ - if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, - src, &source, &val) != 0) - return (-1); - if (val) - (void) strlcpy(propbuf, "yes", proplen); - else - (void) strlcpy(propbuf, "no", proplen); - break; - - case ZFS_PROP_NAME: - /* - * The 'name' property is a pseudo-property derived from the - * dataset name. It is presented as a real property to simplify - * consumers. - */ - (void) strlcpy(propbuf, zhp->zfs_name, proplen); - zcp_check(zhp, prop, NULL, propbuf); - break; - - case ZFS_PROP_MLSLABEL: - { -#ifdef illumos - m_label_t *new_sl = NULL; - char *ascii = NULL; /* human readable label */ - - (void) strlcpy(propbuf, - getprop_string(zhp, prop, &source), proplen); - - if (literal || (strcasecmp(propbuf, - ZFS_MLSLABEL_DEFAULT) == 0)) - break; - - /* - * Try to translate the internal hex string to - * human-readable output. If there are any - * problems just use the hex string. - */ - - if (str_to_label(propbuf, &new_sl, MAC_LABEL, - L_NO_CORRECTION, NULL) == -1) { - m_label_free(new_sl); - break; - } - - if (label_to_str(new_sl, &ascii, M_LABEL, - DEF_NAMES) != 0) { - if (ascii) - free(ascii); - m_label_free(new_sl); - break; - } - m_label_free(new_sl); - - (void) strlcpy(propbuf, ascii, proplen); - free(ascii); -#else /* !illumos */ - propbuf[0] = '\0'; -#endif /* illumos */ - } - break; - - case ZFS_PROP_GUID: - case ZFS_PROP_CREATETXG: - /* - * GUIDs are stored as numbers, but they are identifiers. - * We don't want them to be pretty printed, because pretty - * printing mangles the ID into a truncated and useless value. - */ - if (get_numeric_property(zhp, prop, src, &source, &val) != 0) - return (-1); - (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); - zcp_check(zhp, prop, val, NULL); - break; - - default: - switch (zfs_prop_get_type(prop)) { - case PROP_TYPE_NUMBER: - if (get_numeric_property(zhp, prop, src, - &source, &val) != 0) { - return (-1); - } - - if (literal) { - (void) snprintf(propbuf, proplen, "%llu", - (u_longlong_t)val); - } else { - zfs_nicenum(val, propbuf, proplen); - } - zcp_check(zhp, prop, val, NULL); - break; - - case PROP_TYPE_STRING: - str = getprop_string(zhp, prop, &source); - if (str == NULL) - return (-1); - - (void) strlcpy(propbuf, str, proplen); - zcp_check(zhp, prop, NULL, str); - break; - - case PROP_TYPE_INDEX: - if (get_numeric_property(zhp, prop, src, - &source, &val) != 0) - return (-1); - if (zfs_prop_index_to_string(prop, val, &strval) != 0) - return (-1); - - (void) strlcpy(propbuf, strval, proplen); - zcp_check(zhp, prop, NULL, strval); - break; - - default: - abort(); - } - } - - get_source(zhp, src, source, statbuf, statlen); - - return (0); -} - -/* - * Utility function to get the given numeric property. Does no validation that - * the given property is the appropriate type; should only be used with - * hard-coded property types. - */ -uint64_t -zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop) -{ - char *source; - uint64_t val; - - (void) get_numeric_property(zhp, prop, NULL, &source, &val); - - return (val); -} - -int -zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val) -{ - char buf[64]; - - (void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val); - return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf)); -} - -/* - * Similar to zfs_prop_get(), but returns the value as an integer. - */ -int -zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value, - zprop_source_t *src, char *statbuf, size_t statlen) -{ - char *source; - - /* - * Check to see if this property applies to our object - */ - if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) { - return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE, - dgettext(TEXT_DOMAIN, "cannot get property '%s'"), - zfs_prop_to_name(prop))); - } - - if (src) - *src = ZPROP_SRC_NONE; - - if (get_numeric_property(zhp, prop, src, &source, value) != 0) - return (-1); - - get_source(zhp, src, source, statbuf, statlen); - - return (0); -} - -static int -idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser, - char **domainp, idmap_rid_t *ridp) -{ -#ifdef illumos - idmap_get_handle_t *get_hdl = NULL; - idmap_stat status; - int err = EINVAL; - - if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS) - goto out; - - if (isuser) { - err = idmap_get_sidbyuid(get_hdl, id, - IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); - } else { - err = idmap_get_sidbygid(get_hdl, id, - IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); - } - if (err == IDMAP_SUCCESS && - idmap_get_mappings(get_hdl) == IDMAP_SUCCESS && - status == IDMAP_SUCCESS) - err = 0; - else - err = EINVAL; -out: - if (get_hdl) - idmap_get_destroy(get_hdl); - return (err); -#else /* !illumos */ - assert(!"invalid code path"); - return (EINVAL); // silence compiler warning -#endif /* illumos */ -} - -/* - * convert the propname into parameters needed by kernel - * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829 - * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789 - */ -static int -userquota_propname_decode(const char *propname, boolean_t zoned, - zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp) -{ - zfs_userquota_prop_t type; - char *cp, *end; - char *numericsid = NULL; - boolean_t isuser; - - domain[0] = '\0'; - *ridp = 0; - /* Figure out the property type ({user|group}{quota|space}) */ - for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) { - if (strncmp(propname, zfs_userquota_prop_prefixes[type], - strlen(zfs_userquota_prop_prefixes[type])) == 0) - break; - } - if (type == ZFS_NUM_USERQUOTA_PROPS) - return (EINVAL); - *typep = type; - - isuser = (type == ZFS_PROP_USERQUOTA || - type == ZFS_PROP_USERUSED); - - cp = strchr(propname, '@') + 1; - - if (strchr(cp, '@')) { -#ifdef illumos - /* - * It's a SID name (eg "user@domain") that needs to be - * turned into S-1-domainID-RID. - */ - int flag = 0; - idmap_stat stat, map_stat; - uid_t pid; - idmap_rid_t rid; - idmap_get_handle_t *gh = NULL; - - stat = idmap_get_create(&gh); - if (stat != IDMAP_SUCCESS) { - idmap_get_destroy(gh); - return (ENOMEM); - } - if (zoned && getzoneid() == GLOBAL_ZONEID) - return (ENOENT); - if (isuser) { - stat = idmap_getuidbywinname(cp, NULL, flag, &pid); - if (stat < 0) - return (ENOENT); - stat = idmap_get_sidbyuid(gh, pid, flag, &numericsid, - &rid, &map_stat); - } else { - stat = idmap_getgidbywinname(cp, NULL, flag, &pid); - if (stat < 0) - return (ENOENT); - stat = idmap_get_sidbygid(gh, pid, flag, &numericsid, - &rid, &map_stat); - } - if (stat < 0) { - idmap_get_destroy(gh); - return (ENOENT); - } - stat = idmap_get_mappings(gh); - idmap_get_destroy(gh); - - if (stat < 0) { - return (ENOENT); - } - if (numericsid == NULL) - return (ENOENT); - cp = numericsid; - *ridp = rid; - /* will be further decoded below */ -#else /* !illumos */ - return (ENOENT); -#endif /* illumos */ - } - - if (strncmp(cp, "S-1-", 4) == 0) { - /* It's a numeric SID (eg "S-1-234-567-89") */ - (void) strlcpy(domain, cp, domainlen); - errno = 0; - if (*ridp == 0) { - cp = strrchr(domain, '-'); - *cp = '\0'; - cp++; - *ridp = strtoull(cp, &end, 10); - } else { - end = ""; - } - if (numericsid) { - free(numericsid); - numericsid = NULL; - } - if (errno != 0 || *end != '\0') - return (EINVAL); - } else if (!isdigit(*cp)) { - /* - * It's a user/group name (eg "user") that needs to be - * turned into a uid/gid - */ - if (zoned && getzoneid() == GLOBAL_ZONEID) - return (ENOENT); - if (isuser) { - struct passwd *pw; - pw = getpwnam(cp); - if (pw == NULL) - return (ENOENT); - *ridp = pw->pw_uid; - } else { - struct group *gr; - gr = getgrnam(cp); - if (gr == NULL) - return (ENOENT); - *ridp = gr->gr_gid; - } - } else { - /* It's a user/group ID (eg "12345"). */ - uid_t id = strtoul(cp, &end, 10); - idmap_rid_t rid; - char *mapdomain; - - if (*end != '\0') - return (EINVAL); - if (id > MAXUID) { - /* It's an ephemeral ID. */ - if (idmap_id_to_numeric_domain_rid(id, isuser, - &mapdomain, &rid) != 0) - return (ENOENT); - (void) strlcpy(domain, mapdomain, domainlen); - *ridp = rid; - } else { - *ridp = id; - } - } - - ASSERT3P(numericsid, ==, NULL); - return (0); -} - -static int -zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname, - uint64_t *propvalue, zfs_userquota_prop_t *typep) -{ - int err; - zfs_cmd_t zc = { 0 }; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - err = userquota_propname_decode(propname, - zfs_prop_get_int(zhp, ZFS_PROP_ZONED), - typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid); - zc.zc_objset_type = *typep; - if (err) - return (err); - - err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc); - if (err) - return (err); - - *propvalue = zc.zc_cookie; - return (0); -} - -int -zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, - uint64_t *propvalue) -{ - zfs_userquota_prop_t type; - - return (zfs_prop_get_userquota_common(zhp, propname, propvalue, - &type)); -} - -int -zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, - char *propbuf, int proplen, boolean_t literal) -{ - int err; - uint64_t propvalue; - zfs_userquota_prop_t type; - - err = zfs_prop_get_userquota_common(zhp, propname, &propvalue, - &type); - - if (err) - return (err); - - if (literal) { - (void) snprintf(propbuf, proplen, "%llu", propvalue); - } else if (propvalue == 0 && - (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) { - (void) strlcpy(propbuf, "none", proplen); - } else { - zfs_nicenum(propvalue, propbuf, proplen); - } - return (0); -} - -int -zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, - uint64_t *propvalue) -{ - int err; - zfs_cmd_t zc = { 0 }; - const char *snapname; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - snapname = strchr(propname, '@') + 1; - if (strchr(snapname, '@')) { - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - } else { - /* snapname is the short name, append it to zhp's fsname */ - char *cp; - - (void) strlcpy(zc.zc_value, zhp->zfs_name, - sizeof (zc.zc_value)); - cp = strchr(zc.zc_value, '@'); - if (cp != NULL) - *cp = '\0'; - (void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value)); - (void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value)); - } - - err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc); - if (err) - return (err); - - *propvalue = zc.zc_cookie; - return (0); -} - -int -zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, - char *propbuf, int proplen, boolean_t literal) -{ - int err; - uint64_t propvalue; - - err = zfs_prop_get_written_int(zhp, propname, &propvalue); - - if (err) - return (err); - - if (literal) { - (void) snprintf(propbuf, proplen, "%llu", propvalue); - } else { - zfs_nicenum(propvalue, propbuf, proplen); - } - return (0); -} - -/* - * Returns the name of the given zfs handle. - */ -const char * -zfs_get_name(const zfs_handle_t *zhp) -{ - return (zhp->zfs_name); -} - -/* - * Returns the name of the parent pool for the given zfs handle. - */ -const char * -zfs_get_pool_name(const zfs_handle_t *zhp) -{ - return (zhp->zpool_hdl->zpool_name); -} - -/* - * Returns the type of the given zfs handle. - */ -zfs_type_t -zfs_get_type(const zfs_handle_t *zhp) -{ - return (zhp->zfs_type); -} - -/* - * Is one dataset name a child dataset of another? - * - * Needs to handle these cases: - * Dataset 1 "a/foo" "a/foo" "a/foo" "a/foo" - * Dataset 2 "a/fo" "a/foobar" "a/bar/baz" "a/foo/bar" - * Descendant? No. No. No. Yes. - */ -static boolean_t -is_descendant(const char *ds1, const char *ds2) -{ - size_t d1len = strlen(ds1); - - /* ds2 can't be a descendant if it's smaller */ - if (strlen(ds2) < d1len) - return (B_FALSE); - - /* otherwise, compare strings and verify that there's a '/' char */ - return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0)); -} - -/* - * Given a complete name, return just the portion that refers to the parent. - * Will return -1 if there is no parent (path is just the name of the - * pool). - */ -static int -parent_name(const char *path, char *buf, size_t buflen) -{ - char *slashp; - - (void) strlcpy(buf, path, buflen); - - if ((slashp = strrchr(buf, '/')) == NULL) - return (-1); - *slashp = '\0'; - - return (0); -} - -/* - * If accept_ancestor is false, then check to make sure that the given path has - * a parent, and that it exists. If accept_ancestor is true, then find the - * closest existing ancestor for the given path. In prefixlen return the - * length of already existing prefix of the given path. We also fetch the - * 'zoned' property, which is used to validate property settings when creating - * new datasets. - */ -static int -check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, - boolean_t accept_ancestor, int *prefixlen) -{ - zfs_cmd_t zc = { 0 }; - char parent[ZFS_MAX_DATASET_NAME_LEN]; - char *slash; - zfs_handle_t *zhp; - char errbuf[1024]; - uint64_t is_zoned; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); - - /* get parent, and check to see if this is just a pool */ - if (parent_name(path, parent, sizeof (parent)) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing dataset name")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - - /* check to see if the pool exists */ - if ((slash = strchr(parent, '/')) == NULL) - slash = parent + strlen(parent); - (void) strncpy(zc.zc_name, parent, slash - parent); - zc.zc_name[slash - parent] = '\0'; - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 && - errno == ENOENT) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "no such pool '%s'"), zc.zc_name); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } - - /* check to see if the parent dataset exists */ - while ((zhp = make_dataset_handle(hdl, parent)) == NULL) { - if (errno == ENOENT && accept_ancestor) { - /* - * Go deeper to find an ancestor, give up on top level. - */ - if (parent_name(parent, parent, sizeof (parent)) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "no such pool '%s'"), zc.zc_name); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } - } else if (errno == ENOENT) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "parent does not exist")); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } else - return (zfs_standard_error(hdl, errno, errbuf)); - } - - is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); - if (zoned != NULL) - *zoned = is_zoned; - - /* we are in a non-global zone, but parent is in the global zone */ - if (getzoneid() != GLOBAL_ZONEID && !is_zoned) { - (void) zfs_standard_error(hdl, EPERM, errbuf); - zfs_close(zhp); - return (-1); - } - - /* make sure parent is a filesystem */ - if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "parent is not a filesystem")); - (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); - zfs_close(zhp); - return (-1); - } - - zfs_close(zhp); - if (prefixlen != NULL) - *prefixlen = strlen(parent); - return (0); -} - -/* - * Finds whether the dataset of the given type(s) exists. - */ -boolean_t -zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types) -{ - zfs_handle_t *zhp; - - if (!zfs_validate_name(hdl, path, types, B_FALSE)) - return (B_FALSE); - - /* - * Try to get stats for the dataset, which will tell us if it exists. - */ - if ((zhp = make_dataset_handle(hdl, path)) != NULL) { - int ds_type = zhp->zfs_type; - - zfs_close(zhp); - if (types & ds_type) - return (B_TRUE); - } - return (B_FALSE); -} - -/* - * Given a path to 'target', create all the ancestors between - * the prefixlen portion of the path, and the target itself. - * Fail if the initial prefixlen-ancestor does not already exist. - */ -int -create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) -{ - zfs_handle_t *h; - char *cp; - const char *opname; - - /* make sure prefix exists */ - cp = target + prefixlen; - if (*cp != '/') { - assert(strchr(cp, '/') == NULL); - h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); - } else { - *cp = '\0'; - h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); - *cp = '/'; - } - if (h == NULL) - return (-1); - zfs_close(h); - - /* - * Attempt to create, mount, and share any ancestor filesystems, - * up to the prefixlen-long one. - */ - for (cp = target + prefixlen + 1; - (cp = strchr(cp, '/')) != NULL; *cp = '/', cp++) { - - *cp = '\0'; - - h = make_dataset_handle(hdl, target); - if (h) { - /* it already exists, nothing to do here */ - zfs_close(h); - continue; - } - - if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, - NULL) != 0) { - opname = dgettext(TEXT_DOMAIN, "create"); - goto ancestorerr; - } - - h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); - if (h == NULL) { - opname = dgettext(TEXT_DOMAIN, "open"); - goto ancestorerr; - } - - if (zfs_mount(h, NULL, 0) != 0) { - opname = dgettext(TEXT_DOMAIN, "mount"); - goto ancestorerr; - } - - if (zfs_share(h) != 0) { - opname = dgettext(TEXT_DOMAIN, "share"); - goto ancestorerr; - } - - zfs_close(h); - } - - return (0); - -ancestorerr: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "failed to %s ancestor '%s'"), opname, target); - return (-1); -} - -/* - * Creates non-existing ancestors of the given path. - */ -int -zfs_create_ancestors(libzfs_handle_t *hdl, const char *path) -{ - int prefix; - char *path_copy; - char errbuf[1024]; - int rc = 0; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot create '%s'"), path); - - /* - * Check that we are not passing the nesting limit - * before we start creating any ancestors. - */ - if (dataset_nestcheck(path) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "maximum name nesting depth exceeded")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - - if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0) - return (-1); - - if ((path_copy = strdup(path)) != NULL) { - rc = create_parents(hdl, path_copy, prefix); - free(path_copy); - } - if (path_copy == NULL || rc != 0) - return (-1); - - return (0); -} - -/* - * Create a new filesystem or volume. - */ -int -zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, - nvlist_t *props) -{ - int ret; - uint64_t size = 0; - uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); - char errbuf[1024]; - uint64_t zoned; - enum lzc_dataset_type ost; - zpool_handle_t *zpool_handle; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot create '%s'"), path); - - /* validate the path, taking care to note the extended error message */ - if (!zfs_validate_name(hdl, path, type, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - if (dataset_nestcheck(path) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "maximum name nesting depth exceeded")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - - /* validate parents exist */ - if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0) - return (-1); - - /* - * The failure modes when creating a dataset of a different type over - * one that already exists is a little strange. In particular, if you - * try to create a dataset on top of an existing dataset, the ioctl() - * will return ENOENT, not EEXIST. To prevent this from happening, we - * first try to see if the dataset exists. - */ - if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset already exists")); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - - if (type == ZFS_TYPE_VOLUME) - ost = LZC_DATSET_TYPE_ZVOL; - else - ost = LZC_DATSET_TYPE_ZFS; - - /* open zpool handle for prop validation */ - char pool_path[ZFS_MAX_DATASET_NAME_LEN]; - (void) strlcpy(pool_path, path, sizeof (pool_path)); - - /* truncate pool_path at first slash */ - char *p = strchr(pool_path, '/'); - if (p != NULL) - *p = '\0'; - - if ((zpool_handle = zpool_open(hdl, pool_path)) == NULL) - return (-1); - - if (props && (props = zfs_valid_proplist(hdl, type, props, - zoned, NULL, zpool_handle, errbuf)) == 0) { - zpool_close(zpool_handle); - return (-1); - } - zpool_close(zpool_handle); - - if (type == ZFS_TYPE_VOLUME) { - /* - * If we are creating a volume, the size and block size must - * satisfy a few restraints. First, the blocksize must be a - * valid block size between SPA_{MIN,MAX}BLOCKSIZE. Second, the - * volsize must be a multiple of the block size, and cannot be - * zero. - */ - if (props == NULL || nvlist_lookup_uint64(props, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) { - nvlist_free(props); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing volume size")); - return (zfs_error(hdl, EZFS_BADPROP, errbuf)); - } - - if ((ret = nvlist_lookup_uint64(props, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &blocksize)) != 0) { - if (ret == ENOENT) { - blocksize = zfs_prop_default_numeric( - ZFS_PROP_VOLBLOCKSIZE); - } else { - nvlist_free(props); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing volume block size")); - return (zfs_error(hdl, EZFS_BADPROP, errbuf)); - } - } - - if (size == 0) { - nvlist_free(props); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "volume size cannot be zero")); - return (zfs_error(hdl, EZFS_BADPROP, errbuf)); - } - - if (size % blocksize != 0) { - nvlist_free(props); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "volume size must be a multiple of volume block " - "size")); - return (zfs_error(hdl, EZFS_BADPROP, errbuf)); - } - } - - /* create the dataset */ - ret = lzc_create(path, ost, props); - nvlist_free(props); - - /* check for failure */ - if (ret != 0) { - char parent[ZFS_MAX_DATASET_NAME_LEN]; - (void) parent_name(path, parent, sizeof (parent)); - - switch (errno) { - case ENOENT: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "no such parent '%s'"), parent); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded to set this " - "property or value")); - return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); - case ERANGE: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid property value(s) specified")); - return (zfs_error(hdl, EZFS_BADPROP, errbuf)); -#ifdef _ILP32 - case EOVERFLOW: - /* - * This platform can't address a volume this big. - */ - if (type == ZFS_TYPE_VOLUME) - return (zfs_error(hdl, EZFS_VOLTOOBIG, - errbuf)); -#endif - /* FALLTHROUGH */ - default: - return (zfs_standard_error(hdl, errno, errbuf)); - } - } - - return (0); -} - -/* - * Destroys the given dataset. The caller must make sure that the filesystem - * isn't mounted, and that there are no active dependents. If the file system - * does not exist this function does nothing. - */ -int -zfs_destroy(zfs_handle_t *zhp, boolean_t defer) -{ - int error; - - if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT && defer) - return (EINVAL); - - if (zhp->zfs_type == ZFS_TYPE_BOOKMARK) { - nvlist_t *nv = fnvlist_alloc(); - fnvlist_add_boolean(nv, zhp->zfs_name); - error = lzc_destroy_bookmarks(nv, NULL); - fnvlist_free(nv); - if (error != 0) { - return (zfs_standard_error_fmt(zhp->zfs_hdl, error, - dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), - zhp->zfs_name)); - } - return (0); - } - - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { - nvlist_t *nv = fnvlist_alloc(); - fnvlist_add_boolean(nv, zhp->zfs_name); - error = lzc_destroy_snaps(nv, defer, NULL); - fnvlist_free(nv); - } else { - error = lzc_destroy(zhp->zfs_name); - } - - if (error != 0 && error != ENOENT) { - return (zfs_standard_error_fmt(zhp->zfs_hdl, errno, - dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), - zhp->zfs_name)); - } - - remove_mountpoint(zhp); - - return (0); -} - -struct destroydata { - nvlist_t *nvl; - const char *snapname; -}; - -static int -zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) -{ - struct destroydata *dd = arg; - char name[ZFS_MAX_DATASET_NAME_LEN]; - int rv = 0; - - (void) snprintf(name, sizeof (name), - "%s@%s", zhp->zfs_name, dd->snapname); - - if (lzc_exists(name)) - verify(nvlist_add_boolean(dd->nvl, name) == 0); - - rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); - zfs_close(zhp); - return (rv); -} - -/* - * Destroys all snapshots with the given name in zhp & descendants. - */ -int -zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) -{ - int ret; - struct destroydata dd = { 0 }; - - dd.snapname = snapname; - verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); - (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - - if (nvlist_empty(dd.nvl)) { - ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, - dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), - zhp->zfs_name, snapname); - } else { - ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer); - } - nvlist_free(dd.nvl); - return (ret); -} - -/* - * Destroys all the snapshots named in the nvlist. - */ -int -zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) -{ - int ret; - nvlist_t *errlist = NULL; - - ret = lzc_destroy_snaps(snaps, defer, &errlist); - - if (ret == 0) { - nvlist_free(errlist); - return (0); - } - - if (nvlist_empty(errlist)) { - char errbuf[1024]; - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); - - ret = zfs_standard_error(hdl, ret, errbuf); - } - for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL); - pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { - char errbuf[1024]; - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), - nvpair_name(pair)); - - switch (fnvpair_value_int32(pair)) { - case EEXIST: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "snapshot is cloned")); - ret = zfs_error(hdl, EZFS_EXISTS, errbuf); - break; - default: - ret = zfs_standard_error(hdl, errno, errbuf); - break; - } - } - - nvlist_free(errlist); - return (ret); -} - -/* - * Clones the given dataset. The target must be of the same type as the source. - */ -int -zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) -{ - char parent[ZFS_MAX_DATASET_NAME_LEN]; - int ret; - char errbuf[1024]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - uint64_t zoned; - - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot create '%s'"), target); - - /* validate the target/clone name */ - if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - /* validate parents exist */ - if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0) - return (-1); - - (void) parent_name(target, parent, sizeof (parent)); - - /* do the clone */ - - if (props) { - zfs_type_t type; - - if (ZFS_IS_VOLUME(zhp)) { - type = ZFS_TYPE_VOLUME; - } else { - type = ZFS_TYPE_FILESYSTEM; - } - if ((props = zfs_valid_proplist(hdl, type, props, zoned, - zhp, zhp->zpool_hdl, errbuf)) == NULL) - return (-1); - if (zfs_fix_auto_resv(zhp, props) == -1) { - nvlist_free(props); - return (-1); - } - } - - ret = lzc_clone(target, zhp->zfs_name, props); - nvlist_free(props); - - if (ret != 0) { - switch (errno) { - - case ENOENT: - /* - * The parent doesn't exist. We should have caught this - * above, but there may a race condition that has since - * destroyed the parent. - * - * At this point, we don't know whether it's the source - * that doesn't exist anymore, or whether the target - * dataset doesn't exist. - */ - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "no such parent '%s'"), parent); - return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); - - case EXDEV: - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "source and target pools differ")); - return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET, - errbuf)); - - default: - return (zfs_standard_error(zhp->zfs_hdl, errno, - errbuf)); - } - } - - return (ret); -} - -/* - * Promotes the given clone fs to be the clone parent. - */ -int -zfs_promote(zfs_handle_t *zhp) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - char snapname[ZFS_MAX_DATASET_NAME_LEN]; - int ret; - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot promote '%s'"), zhp->zfs_name); - - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "snapshots can not be promoted")); - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); - } - - if (zhp->zfs_dmustats.dds_origin[0] == '\0') { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "not a cloned filesystem")); - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); - } - - if (!zfs_validate_name(hdl, zhp->zfs_name, zhp->zfs_type, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - ret = lzc_promote(zhp->zfs_name, snapname, sizeof (snapname)); - - if (ret != 0) { - switch (ret) { - case EEXIST: - /* There is a conflicting snapshot name. */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "conflicting snapshot '%s' from parent '%s'"), - snapname, zhp->zfs_dmustats.dds_origin); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - - default: - return (zfs_standard_error(hdl, ret, errbuf)); - } - } - return (ret); -} - -typedef struct snapdata { - nvlist_t *sd_nvl; - const char *sd_snapname; -} snapdata_t; - -static int -zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) -{ - snapdata_t *sd = arg; - char name[ZFS_MAX_DATASET_NAME_LEN]; - int rv = 0; - - if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) == 0) { - (void) snprintf(name, sizeof (name), - "%s@%s", zfs_get_name(zhp), sd->sd_snapname); - - fnvlist_add_boolean(sd->sd_nvl, name); - - rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); - } - zfs_close(zhp); - - return (rv); -} - -int -zfs_remap_indirects(libzfs_handle_t *hdl, const char *fs) -{ - int err; - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot remap dataset '%s'"), fs); - - err = lzc_remap(fs); - - if (err != 0) { - switch (err) { - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - case EINVAL: - (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - default: - (void) zfs_standard_error(hdl, err, errbuf); - break; - } - } - - return (err); -} - -/* - * Creates snapshots. The keys in the snaps nvlist are the snapshots to be - * created. - */ -int -zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) -{ - int ret; - char errbuf[1024]; - nvpair_t *elem; - nvlist_t *errors; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot create snapshots ")); - - elem = NULL; - while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) { - const char *snapname = nvpair_name(elem); - - /* validate the target name */ - if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT, - B_TRUE)) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot create snapshot '%s'"), snapname); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - } - - /* - * get pool handle for prop validation. assumes all snaps are in the - * same pool, as does lzc_snapshot (below). - */ - char pool[ZFS_MAX_DATASET_NAME_LEN]; - elem = nvlist_next_nvpair(snaps, NULL); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/@")] = '\0'; - zpool_handle_t *zpool_hdl = zpool_open(hdl, pool); - - if (props != NULL && - (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, - props, B_FALSE, NULL, zpool_hdl, errbuf)) == NULL) { - zpool_close(zpool_hdl); - return (-1); - } - zpool_close(zpool_hdl); - - ret = lzc_snapshot(snaps, props, &errors); - - if (ret != 0) { - boolean_t printed = B_FALSE; - for (elem = nvlist_next_nvpair(errors, NULL); - elem != NULL; - elem = nvlist_next_nvpair(errors, elem)) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot create snapshot '%s'"), nvpair_name(elem)); - (void) zfs_standard_error(hdl, - fnvpair_value_int32(elem), errbuf); - printed = B_TRUE; - } - if (!printed) { - switch (ret) { - case EXDEV: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "multiple snapshots of same " - "fs not allowed")); - (void) zfs_error(hdl, EZFS_EXISTS, errbuf); - - break; - default: - (void) zfs_standard_error(hdl, ret, errbuf); - } - } - } - - nvlist_free(props); - nvlist_free(errors); - return (ret); -} - -int -zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, - nvlist_t *props) -{ - int ret; - snapdata_t sd = { 0 }; - char fsname[ZFS_MAX_DATASET_NAME_LEN]; - char *cp; - zfs_handle_t *zhp; - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot snapshot %s"), path); - - if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - (void) strlcpy(fsname, path, sizeof (fsname)); - cp = strchr(fsname, '@'); - *cp = '\0'; - sd.sd_snapname = cp + 1; - - if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME)) == NULL) { - return (-1); - } - - verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0); - if (recursive) { - (void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd); - } else { - fnvlist_add_boolean(sd.sd_nvl, path); - } - - ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props); - nvlist_free(sd.sd_nvl); - zfs_close(zhp); - return (ret); -} - -/* - * Destroy any more recent snapshots. We invoke this callback on any dependents - * of the snapshot first. If the 'cb_dependent' member is non-zero, then this - * is a dependent and we should just destroy it without checking the transaction - * group. - */ -typedef struct rollback_data { - const char *cb_target; /* the snapshot */ - uint64_t cb_create; /* creation time reference */ - boolean_t cb_error; - boolean_t cb_force; -} rollback_data_t; - -static int -rollback_destroy_dependent(zfs_handle_t *zhp, void *data) -{ - rollback_data_t *cbp = data; - prop_changelist_t *clp; - - /* We must destroy this clone; first unmount it */ - clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - cbp->cb_force ? MS_FORCE: 0); - if (clp == NULL || changelist_prefix(clp) != 0) { - cbp->cb_error = B_TRUE; - zfs_close(zhp); - return (0); - } - if (zfs_destroy(zhp, B_FALSE) != 0) - cbp->cb_error = B_TRUE; - else - changelist_remove(clp, zhp->zfs_name); - (void) changelist_postfix(clp); - changelist_free(clp); - - zfs_close(zhp); - return (0); -} - -static int -rollback_destroy(zfs_handle_t *zhp, void *data) -{ - rollback_data_t *cbp = data; - - if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { - cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, - rollback_destroy_dependent, cbp); - - cbp->cb_error |= zfs_destroy(zhp, B_FALSE); - } - - zfs_close(zhp); - return (0); -} - -/* - * Given a dataset, rollback to a specific snapshot, discarding any - * data changes since then and making it the active dataset. - * - * Any snapshots and bookmarks more recent than the target are - * destroyed, along with their dependents (i.e. clones). - */ -int -zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) -{ - rollback_data_t cb = { 0 }; - int err; - boolean_t restore_resv = 0; - uint64_t min_txg = 0, old_volsize = 0, new_volsize; - zfs_prop_t resv_prop; - - assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM || - zhp->zfs_type == ZFS_TYPE_VOLUME); - - /* - * Destroy all recent snapshots and their dependents. - */ - cb.cb_force = force; - cb.cb_target = snap->zfs_name; - cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); - - if (cb.cb_create > 0) - min_txg = cb.cb_create; - - (void) zfs_iter_snapshots(zhp, B_FALSE, rollback_destroy, &cb, - min_txg, 0); - - (void) zfs_iter_bookmarks(zhp, rollback_destroy, &cb); - - if (cb.cb_error) - return (-1); - - /* - * Now that we have verified that the snapshot is the latest, - * rollback to the given snapshot. - */ - - if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - if (zfs_which_resv_prop(zhp, &resv_prop) < 0) - return (-1); - old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); - restore_resv = - (old_volsize == zfs_prop_get_int(zhp, resv_prop)); - } - - /* - * Pass both the filesystem and the wanted snapshot names, - * we would get an error back if the snapshot is destroyed or - * a new snapshot is created before this request is processed. - */ - err = lzc_rollback_to(zhp->zfs_name, snap->zfs_name); - if (err != 0) { - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot rollback '%s'"), - zhp->zfs_name); - switch (err) { - case EEXIST: - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "there is a snapshot or bookmark more recent " - "than '%s'"), snap->zfs_name); - (void) zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf); - break; - case ESRCH: - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "'%s' is not found among snapshots of '%s'"), - snap->zfs_name, zhp->zfs_name); - (void) zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf); - break; - case EINVAL: - (void) zfs_error(zhp->zfs_hdl, EZFS_BADTYPE, errbuf); - break; - default: - (void) zfs_standard_error(zhp->zfs_hdl, err, errbuf); - } - return (err); - } - - /* - * For volumes, if the pre-rollback volsize matched the pre- - * rollback reservation and the volsize has changed then set - * the reservation property to the post-rollback volsize. - * Make a new handle since the rollback closed the dataset. - */ - if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && - (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { - if (restore_resv) { - new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); - if (old_volsize != new_volsize) - err = zfs_prop_set_int(zhp, resv_prop, - new_volsize); - } - zfs_close(zhp); - } - return (err); -} - -/* - * Renames the given dataset. - */ -int -zfs_rename(zfs_handle_t *zhp, const char *source, const char *target, - renameflags_t flags) -{ - int ret = 0; - zfs_cmd_t zc = { 0 }; - char *delim; - prop_changelist_t *cl = NULL; - zfs_handle_t *zhrp = NULL; - char *parentname = NULL; - char parent[ZFS_MAX_DATASET_NAME_LEN]; - char property[ZFS_MAXPROPLEN]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - - /* if we have the same exact name, just return success */ - if (strcmp(zhp->zfs_name, target) == 0) - return (0); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot rename to '%s'"), target); - - if (source != NULL) { - /* - * This is recursive snapshots rename, put snapshot name - * (that might not exist) into zfs_name. - */ - assert(flags.recurse); - - (void) strlcat(zhp->zfs_name, "@", sizeof(zhp->zfs_name)); - (void) strlcat(zhp->zfs_name, source, sizeof(zhp->zfs_name)); - zhp->zfs_type = ZFS_TYPE_SNAPSHOT; - } - - /* make sure source name is valid */ - if (!zfs_validate_name(hdl, zhp->zfs_name, zhp->zfs_type, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - /* - * Make sure the target name is valid - */ - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT || - zhp->zfs_type == ZFS_TYPE_BOOKMARK) { - const char sep = zhp->zfs_type == ZFS_TYPE_SNAPSHOT ? '@' : '#'; - - if ((strchr(target, sep) == NULL) || *target == sep) { - /* - * Snapshot target name is abbreviated, - * reconstruct full dataset name - */ - (void) strlcpy(parent, zhp->zfs_name, sizeof (parent)); - delim = strchr(parent, sep); - if (strchr(target, sep) == NULL) - *(++delim) = '\0'; - else - *delim = '\0'; - (void) strlcat(parent, target, sizeof (parent)); - target = parent; - } else { - /* - * Make sure we're renaming within the same dataset. - */ - delim = strchr(target, sep); - if (strncmp(zhp->zfs_name, target, delim - target) - != 0 || zhp->zfs_name[delim - target] != sep) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "%s must be part of same dataset"), - zhp->zfs_type == ZFS_TYPE_SNAPSHOT ? - "snapshots" : "bookmarks"); - return (zfs_error(hdl, EZFS_CROSSTARGET, - errbuf)); - } - } - - if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } else { - if (flags.recurse) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "recursive rename must be a snapshot")); - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); - } - - if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - /* validate parents */ - if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0) - return (-1); - - /* make sure we're in the same pool */ - verify((delim = strchr(target, '/')) != NULL); - if (strncmp(zhp->zfs_name, target, delim - target) != 0 || - zhp->zfs_name[delim - target] != '/') { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "datasets must be within same pool")); - return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); - } - - /* new name cannot be a child of the current dataset name */ - if (is_descendant(zhp->zfs_name, target)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "New dataset name cannot be a descendant of " - "current dataset name")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - } - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name); - - if (getzoneid() == GLOBAL_ZONEID && - zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset is used in a non-global zone")); - return (zfs_error(hdl, EZFS_ZONED, errbuf)); - } - - /* - * Avoid unmounting file systems with mountpoint property set to - * 'legacy' or 'none' even if -u option is not given. - */ - if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && - !flags.recurse && !flags.nounmount && - zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, property, - sizeof (property), NULL, NULL, 0, B_FALSE) == 0 && - (strcmp(property, "legacy") == 0 || - strcmp(property, "none") == 0)) { - flags.nounmount = B_TRUE; - } - if (flags.recurse) { - parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); - if (parentname == NULL) { - ret = -1; - goto error; - } - delim = strchr(parentname, '@'); - *delim = '\0'; - zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); - if (zhrp == NULL) { - ret = -1; - goto error; - } - } else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT && - zhp->zfs_type != ZFS_TYPE_BOOKMARK) { - if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, - flags.nounmount ? CL_GATHER_DONT_UNMOUNT : 0, - flags.forceunmount ? MS_FORCE : 0)) == NULL) { - return (-1); - } - - if (changelist_haszonedchild(cl)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "child dataset with inherited mountpoint is used " - "in a non-global zone")); - (void) zfs_error(hdl, EZFS_ZONED, errbuf); - ret = -1; - goto error; - } - - if ((ret = changelist_prefix(cl)) != 0) - goto error; - } - - if (ZFS_IS_VOLUME(zhp)) - zc.zc_objset_type = DMU_OST_ZVOL; - else - zc.zc_objset_type = DMU_OST_ZFS; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); - - zc.zc_cookie = flags.recurse ? 1 : 0; - if (flags.nounmount) - zc.zc_cookie |= 2; - - if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) { - /* - * if it was recursive, the one that actually failed will - * be in zc.zc_name - */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot rename '%s'"), zc.zc_name); - - if (flags.recurse && errno == EEXIST) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "a child dataset already has a snapshot " - "with the new name")); - (void) zfs_error(hdl, EZFS_EXISTS, errbuf); - } else if (errno == EINVAL) { - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - } else { - (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); - } - - /* - * On failure, we still want to remount any filesystems that - * were previously mounted, so we don't alter the system state. - */ - if (cl != NULL) - (void) changelist_postfix(cl); - } else { - if (cl != NULL) { - changelist_rename(cl, zfs_get_name(zhp), target); - ret = changelist_postfix(cl); - } - } - -error: - if (parentname != NULL) { - free(parentname); - } - if (zhrp != NULL) { - zfs_close(zhrp); - } - if (cl != NULL) { - changelist_free(cl); - } - return (ret); -} - -nvlist_t * -zfs_get_user_props(zfs_handle_t *zhp) -{ - return (zhp->zfs_user_props); -} - -nvlist_t * -zfs_get_recvd_props(zfs_handle_t *zhp) -{ - if (zhp->zfs_recvd_props == NULL) - if (get_recvd_props_ioctl(zhp) != 0) - return (NULL); - return (zhp->zfs_recvd_props); -} - -/* - * This function is used by 'zfs list' to determine the exact set of columns to - * display, and their maximum widths. This does two main things: - * - * - If this is a list of all properties, then expand the list to include - * all native properties, and set a flag so that for each dataset we look - * for new unique user properties and add them to the list. - * - * - For non fixed-width properties, keep track of the maximum width seen - * so that we can size the column appropriately. If the user has - * requested received property values, we also need to compute the width - * of the RECEIVED column. - */ -int -zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received, - boolean_t literal) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - zprop_list_t *entry; - zprop_list_t **last, **start; - nvlist_t *userprops, *propval; - nvpair_t *elem; - char *strval; - char buf[ZFS_MAXPROPLEN]; - - if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0) - return (-1); - - userprops = zfs_get_user_props(zhp); - - entry = *plp; - if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) { - /* - * Go through and add any user properties as necessary. We - * start by incrementing our list pointer to the first - * non-native property. - */ - start = plp; - while (*start != NULL) { - if ((*start)->pl_prop == ZPROP_INVAL) - break; - start = &(*start)->pl_next; - } - - elem = NULL; - while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) { - /* - * See if we've already found this property in our list. - */ - for (last = start; *last != NULL; - last = &(*last)->pl_next) { - if (strcmp((*last)->pl_user_prop, - nvpair_name(elem)) == 0) - break; - } - - if (*last == NULL) { - if ((entry = zfs_alloc(hdl, - sizeof (zprop_list_t))) == NULL || - ((entry->pl_user_prop = zfs_strdup(hdl, - nvpair_name(elem)))) == NULL) { - free(entry); - return (-1); - } - - entry->pl_prop = ZPROP_INVAL; - entry->pl_width = strlen(nvpair_name(elem)); - entry->pl_all = B_TRUE; - *last = entry; - } - } - } - - /* - * Now go through and check the width of any non-fixed columns - */ - for (entry = *plp; entry != NULL; entry = entry->pl_next) { - if (entry->pl_fixed && !literal) - continue; - - if (entry->pl_prop != ZPROP_INVAL) { - if (zfs_prop_get(zhp, entry->pl_prop, - buf, sizeof (buf), NULL, NULL, 0, literal) == 0) { - if (strlen(buf) > entry->pl_width) - entry->pl_width = strlen(buf); - } - if (received && zfs_prop_get_recvd(zhp, - zfs_prop_to_name(entry->pl_prop), - buf, sizeof (buf), literal) == 0) - if (strlen(buf) > entry->pl_recvd_width) - entry->pl_recvd_width = strlen(buf); - } else { - if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop, - &propval) == 0) { - verify(nvlist_lookup_string(propval, - ZPROP_VALUE, &strval) == 0); - if (strlen(strval) > entry->pl_width) - entry->pl_width = strlen(strval); - } - if (received && zfs_prop_get_recvd(zhp, - entry->pl_user_prop, - buf, sizeof (buf), literal) == 0) - if (strlen(buf) > entry->pl_recvd_width) - entry->pl_recvd_width = strlen(buf); - } - } - - return (0); -} - -int -zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, - char *resource, void *export, void *sharetab, - int sharemax, zfs_share_op_t operation) -{ - zfs_cmd_t zc = { 0 }; - int error; - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value)); - if (resource) - (void) strlcpy(zc.zc_string, resource, sizeof (zc.zc_string)); - zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab; - zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export; - zc.zc_share.z_sharetype = operation; - zc.zc_share.z_sharemax = sharemax; - error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc); - return (error); -} - -void -zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) -{ - nvpair_t *curr; - - /* - * Keep a reference to the props-table against which we prune the - * properties. - */ - zhp->zfs_props_table = props; - - curr = nvlist_next_nvpair(zhp->zfs_props, NULL); - - while (curr) { - zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr)); - nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr); - - /* - * User properties will result in ZPROP_INVAL, and since we - * only know how to prune standard ZFS properties, we always - * leave these in the list. This can also happen if we - * encounter an unknown DSL property (when running older - * software, for example). - */ - if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE) - (void) nvlist_remove(zhp->zfs_props, - nvpair_name(curr), nvpair_type(curr)); - curr = next; - } -} - -#ifdef illumos -static int -zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path, - zfs_smb_acl_op_t cmd, char *resource1, char *resource2) -{ - zfs_cmd_t zc = { 0 }; - nvlist_t *nvlist = NULL; - int error; - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value)); - zc.zc_cookie = (uint64_t)cmd; - - if (cmd == ZFS_SMB_ACL_RENAME) { - if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) { - (void) no_memory(hdl); - return (0); - } - } - - switch (cmd) { - case ZFS_SMB_ACL_ADD: - case ZFS_SMB_ACL_REMOVE: - (void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string)); - break; - case ZFS_SMB_ACL_RENAME: - if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC, - resource1) != 0) { - (void) no_memory(hdl); - return (-1); - } - if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET, - resource2) != 0) { - (void) no_memory(hdl); - return (-1); - } - if (zcmd_write_src_nvlist(hdl, &zc, nvlist) != 0) { - nvlist_free(nvlist); - return (-1); - } - break; - case ZFS_SMB_ACL_PURGE: - break; - default: - return (-1); - } - error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc); - nvlist_free(nvlist); - return (error); -} - -int -zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset, - char *path, char *resource) -{ - return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD, - resource, NULL)); -} - -int -zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset, - char *path, char *resource) -{ - return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE, - resource, NULL)); -} - -int -zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path) -{ - return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE, - NULL, NULL)); -} - -int -zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path, - char *oldname, char *newname) -{ - return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME, - oldname, newname)); -} -#endif /* illumos */ - -int -zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, - zfs_userspace_cb_t func, void *arg) -{ - zfs_cmd_t zc = { 0 }; - zfs_useracct_t buf[100]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - int ret; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - zc.zc_objset_type = type; - zc.zc_nvlist_dst = (uintptr_t)buf; - - for (;;) { - zfs_useracct_t *zua = buf; - - zc.zc_nvlist_dst_size = sizeof (buf); - if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) { - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot get used/quota for %s"), zc.zc_name); - return (zfs_standard_error_fmt(hdl, errno, errbuf)); - } - if (zc.zc_nvlist_dst_size == 0) - break; - - while (zc.zc_nvlist_dst_size > 0) { - if ((ret = func(arg, zua->zu_domain, zua->zu_rid, - zua->zu_space)) != 0) - return (ret); - zua++; - zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t); - } - } - - return (0); -} - -struct holdarg { - nvlist_t *nvl; - const char *snapname; - const char *tag; - boolean_t recursive; - int error; -}; - -static int -zfs_hold_one(zfs_handle_t *zhp, void *arg) -{ - struct holdarg *ha = arg; - char name[ZFS_MAX_DATASET_NAME_LEN]; - int rv = 0; - - (void) snprintf(name, sizeof (name), - "%s@%s", zhp->zfs_name, ha->snapname); - - if (lzc_exists(name)) - fnvlist_add_string(ha->nvl, name, ha->tag); - - if (ha->recursive) - rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); - zfs_close(zhp); - return (rv); -} - -int -zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, int cleanup_fd) -{ - int ret; - struct holdarg ha; - - ha.nvl = fnvlist_alloc(); - ha.snapname = snapname; - ha.tag = tag; - ha.recursive = recursive; - (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); - - if (nvlist_empty(ha.nvl)) { - char errbuf[1024]; - - fnvlist_free(ha.nvl); - ret = ENOENT; - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot hold snapshot '%s@%s'"), - zhp->zfs_name, snapname); - (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); - return (ret); - } - - ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); - fnvlist_free(ha.nvl); - - return (ret); -} - -int -zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) -{ - int ret; - nvlist_t *errors; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - nvpair_t *elem; - - errors = NULL; - ret = lzc_hold(holds, cleanup_fd, &errors); - - if (ret == 0) { - /* There may be errors even in the success case. */ - fnvlist_free(errors); - return (0); - } - - if (nvlist_empty(errors)) { - /* no hold-specific errors */ - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot hold")); - switch (ret) { - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - case EINVAL: - (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - default: - (void) zfs_standard_error(hdl, ret, errbuf); - } - } - - for (elem = nvlist_next_nvpair(errors, NULL); - elem != NULL; - elem = nvlist_next_nvpair(errors, elem)) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot hold snapshot '%s'"), nvpair_name(elem)); - switch (fnvpair_value_int32(elem)) { - case E2BIG: - /* - * Temporary tags wind up having the ds object id - * prepended. So even if we passed the length check - * above, it's still possible for the tag to wind - * up being slightly too long. - */ - (void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf); - break; - case EINVAL: - (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - case EEXIST: - (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); - break; - default: - (void) zfs_standard_error(hdl, - fnvpair_value_int32(elem), errbuf); - } - } - - fnvlist_free(errors); - return (ret); -} - -static int -zfs_release_one(zfs_handle_t *zhp, void *arg) -{ - struct holdarg *ha = arg; - char name[ZFS_MAX_DATASET_NAME_LEN]; - int rv = 0; - nvlist_t *existing_holds; - - (void) snprintf(name, sizeof (name), - "%s@%s", zhp->zfs_name, ha->snapname); - - if (lzc_get_holds(name, &existing_holds) != 0) { - ha->error = ENOENT; - } else if (!nvlist_exists(existing_holds, ha->tag)) { - ha->error = ESRCH; - } else { - nvlist_t *torelease = fnvlist_alloc(); - fnvlist_add_boolean(torelease, ha->tag); - fnvlist_add_nvlist(ha->nvl, name, torelease); - fnvlist_free(torelease); - } - - if (ha->recursive) - rv = zfs_iter_filesystems(zhp, zfs_release_one, ha); - zfs_close(zhp); - return (rv); -} - -int -zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive) -{ - int ret; - struct holdarg ha; - nvlist_t *errors = NULL; - nvpair_t *elem; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - - ha.nvl = fnvlist_alloc(); - ha.snapname = snapname; - ha.tag = tag; - ha.recursive = recursive; - ha.error = 0; - (void) zfs_release_one(zfs_handle_dup(zhp), &ha); - - if (nvlist_empty(ha.nvl)) { - fnvlist_free(ha.nvl); - ret = ha.error; - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot release hold from snapshot '%s@%s'"), - zhp->zfs_name, snapname); - if (ret == ESRCH) { - (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); - } else { - (void) zfs_standard_error(hdl, ret, errbuf); - } - return (ret); - } - - ret = lzc_release(ha.nvl, &errors); - fnvlist_free(ha.nvl); - - if (ret == 0) { - /* There may be errors even in the success case. */ - fnvlist_free(errors); - return (0); - } - - if (nvlist_empty(errors)) { - /* no hold-specific errors */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot release")); - switch (errno) { - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - default: - (void) zfs_standard_error_fmt(hdl, errno, errbuf); - } - } - - for (elem = nvlist_next_nvpair(errors, NULL); - elem != NULL; - elem = nvlist_next_nvpair(errors, elem)) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot release hold from snapshot '%s'"), - nvpair_name(elem)); - switch (fnvpair_value_int32(elem)) { - case ESRCH: - (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); - break; - case EINVAL: - (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - default: - (void) zfs_standard_error_fmt(hdl, - fnvpair_value_int32(elem), errbuf); - } - } - - fnvlist_free(errors); - return (ret); -} - -int -zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zfs_hdl; - int nvsz = 2048; - void *nvbuf; - int err = 0; - char errbuf[1024]; - - assert(zhp->zfs_type == ZFS_TYPE_VOLUME || - zhp->zfs_type == ZFS_TYPE_FILESYSTEM); - -tryagain: - - nvbuf = malloc(nvsz); - if (nvbuf == NULL) { - err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); - goto out; - } - - zc.zc_nvlist_dst_size = nvsz; - zc.zc_nvlist_dst = (uintptr_t)nvbuf; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), - zc.zc_name); - switch (errno) { - case ENOMEM: - free(nvbuf); - nvsz = zc.zc_nvlist_dst_size; - goto tryagain; - - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - err = zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - case EINVAL: - err = zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - case ENOENT: - err = zfs_error(hdl, EZFS_NOENT, errbuf); - break; - default: - err = zfs_standard_error_fmt(hdl, errno, errbuf); - break; - } - } else { - /* success */ - int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); - if (rc) { - (void) snprintf(errbuf, sizeof (errbuf), dgettext( - TEXT_DOMAIN, "cannot get permissions on '%s'"), - zc.zc_name); - err = zfs_standard_error_fmt(hdl, rc, errbuf); - } - } - - free(nvbuf); -out: - return (err); -} - -int -zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char *nvbuf; - char errbuf[1024]; - size_t nvsz; - int err; - - assert(zhp->zfs_type == ZFS_TYPE_VOLUME || - zhp->zfs_type == ZFS_TYPE_FILESYSTEM); - - err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); - assert(err == 0); - - nvbuf = malloc(nvsz); - - err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); - assert(err == 0); - - zc.zc_nvlist_src_size = nvsz; - zc.zc_nvlist_src = (uintptr_t)nvbuf; - zc.zc_perm_action = un; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"), - zc.zc_name); - switch (errno) { - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - err = zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - case EINVAL: - err = zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - case ENOENT: - err = zfs_error(hdl, EZFS_NOENT, errbuf); - break; - default: - err = zfs_standard_error_fmt(hdl, errno, errbuf); - break; - } - } - - free(nvbuf); - - return (err); -} - -int -zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) -{ - int err; - char errbuf[1024]; - - err = lzc_get_holds(zhp->zfs_name, nvl); - - if (err != 0) { - libzfs_handle_t *hdl = zhp->zfs_hdl; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), - zhp->zfs_name); - switch (err) { - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - err = zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - case EINVAL: - err = zfs_error(hdl, EZFS_BADTYPE, errbuf); - break; - case ENOENT: - err = zfs_error(hdl, EZFS_NOENT, errbuf); - break; - default: - err = zfs_standard_error_fmt(hdl, errno, errbuf); - break; - } - } - - return (err); -} - -/* - * Convert the zvol's volume size to an appropriate reservation. - * Note: If this routine is updated, it is necessary to update the ZFS test - * suite's shell version in reservation.kshlib. - */ -uint64_t -zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) -{ - uint64_t numdb; - uint64_t nblocks, volblocksize; - int ncopies; - char *strval; - - if (nvlist_lookup_string(props, - zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0) - ncopies = atoi(strval); - else - ncopies = 1; - if (nvlist_lookup_uint64(props, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &volblocksize) != 0) - volblocksize = ZVOL_DEFAULT_BLOCKSIZE; - nblocks = volsize/volblocksize; - /* start with metadnode L0-L6 */ - numdb = 7; - /* calculate number of indirects */ - while (nblocks > 1) { - nblocks += DNODES_PER_LEVEL - 1; - nblocks /= DNODES_PER_LEVEL; - numdb += nblocks; - } - numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1); - volsize *= ncopies; - /* - * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't - * compressed, but in practice they compress down to about - * 1100 bytes - */ - numdb *= 1ULL << DN_MAX_INDBLKSHIFT; - volsize += numdb; - return (volsize); -} - -/* - * Attach/detach the given filesystem to/from the given jail. - */ -int -zfs_jail(zfs_handle_t *zhp, int jailid, int attach) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - zfs_cmd_t zc = { 0 }; - char errbuf[1024]; - unsigned long cmd; - int ret; - - if (attach) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name); - } else { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot unjail '%s'"), zhp->zfs_name); - } - - switch (zhp->zfs_type) { - case ZFS_TYPE_VOLUME: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "volumes can not be jailed")); - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); - case ZFS_TYPE_SNAPSHOT: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "snapshots can not be jailed")); - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); - } - assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM); - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - zc.zc_objset_type = DMU_OST_ZFS; - zc.zc_jailid = jailid; - - cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL; - if ((ret = ioctl(hdl->libzfs_fd, cmd, &zc)) != 0) - zfs_standard_error(hdl, errno, errbuf); - - return (ret); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c deleted file mode 100644 index db132190154c..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c +++ /dev/null @@ -1,834 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2015, 2018 by Delphix. All rights reserved. - * Copyright 2016 Joyent, Inc. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - */ - -/* - * zfs diff support - */ -#include <ctype.h> -#include <errno.h> -#include <libintl.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <stddef.h> -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <pthread.h> -#include <sys/zfs_ioctl.h> -#include <libzfs.h> -#include "libzfs_impl.h" - -#define ZDIFF_SNAPDIR "/.zfs/snapshot/" -#define ZDIFF_SHARESDIR "/.zfs/shares/" -#define ZDIFF_PREFIX "zfs-diff-%d" - -#define ZDIFF_ADDED '+' -#define ZDIFF_MODIFIED 'M' -#define ZDIFF_REMOVED '-' -#define ZDIFF_RENAMED 'R' - -typedef struct differ_info { - zfs_handle_t *zhp; - char *fromsnap; - char *frommnt; - char *tosnap; - char *tomnt; - char *ds; - char *dsmnt; - char *tmpsnap; - char errbuf[1024]; - boolean_t isclone; - boolean_t scripted; - boolean_t classify; - boolean_t timestamped; - uint64_t shares; - int zerr; - int cleanupfd; - int outputfd; - int datafd; -} differ_info_t; - -/* - * Given a {dsname, object id}, get the object path - */ -static int -get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, - char *pn, int maxlen, zfs_stat_t *sb) -{ - zfs_cmd_t zc = { 0 }; - int error; - - (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); - zc.zc_obj = obj; - - errno = 0; - error = ioctl(di->zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc); - di->zerr = errno; - - /* we can get stats even if we failed to get a path */ - (void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t)); - if (error == 0) { - ASSERT(di->zerr == 0); - (void) strlcpy(pn, zc.zc_value, maxlen); - return (0); - } - - if (di->zerr == ESTALE) { - (void) snprintf(pn, maxlen, "(on_delete_queue)"); - return (0); - } else if (di->zerr == EPERM) { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "The sys_config privilege or diff delegated permission " - "is needed\nto discover path names")); - return (-1); - } else { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "Unable to determine path or stats for " - "object %jd in %s"), (uintmax_t)obj, dsname); - return (-1); - } -} - -/* - * stream_bytes - * - * Prints a file name out a character at a time. If the character is - * not in the range of what we consider "printable" ASCII, display it - * as an escaped 3-digit octal value. ASCII values less than a space - * are all control characters and we declare the upper end as the - * DELete character. This also is the last 7-bit ASCII character. - * We choose to treat all 8-bit ASCII as not printable for this - * application. - */ -static void -stream_bytes(FILE *fp, const char *string) -{ - char c; - - while ((c = *string++) != '\0') { - if (c > ' ' && c != '\\' && c < '\177') { - (void) fprintf(fp, "%c", c); - } else { - (void) fprintf(fp, "\\%03o", (uint8_t)c); - } - } -} - -static void -print_what(FILE *fp, mode_t what) -{ - char symbol; - - switch (what & S_IFMT) { - case S_IFBLK: - symbol = 'B'; - break; - case S_IFCHR: - symbol = 'C'; - break; - case S_IFDIR: - symbol = '/'; - break; -#ifdef S_IFDOOR - case S_IFDOOR: - symbol = '>'; - break; -#endif - case S_IFIFO: - symbol = '|'; - break; - case S_IFLNK: - symbol = '@'; - break; -#ifdef S_IFPORT - case S_IFPORT: - symbol = 'P'; - break; -#endif - case S_IFSOCK: - symbol = '='; - break; - case S_IFREG: - symbol = 'F'; - break; - default: - symbol = '?'; - break; - } - (void) fprintf(fp, "%c", symbol); -} - -static void -print_cmn(FILE *fp, differ_info_t *di, const char *file) -{ - stream_bytes(fp, di->dsmnt); - stream_bytes(fp, file); -} - -static void -print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new, - zfs_stat_t *isb) -{ - if (di->timestamped) - (void) fprintf(fp, "%10lld.%09lld\t", - (longlong_t)isb->zs_ctime[0], - (longlong_t)isb->zs_ctime[1]); - (void) fprintf(fp, "%c\t", ZDIFF_RENAMED); - if (di->classify) { - print_what(fp, isb->zs_mode); - (void) fprintf(fp, "\t"); - } - print_cmn(fp, di, old); - if (di->scripted) - (void) fprintf(fp, "\t"); - else - (void) fprintf(fp, " -> "); - print_cmn(fp, di, new); - (void) fprintf(fp, "\n"); -} - -static void -print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file, - zfs_stat_t *isb) -{ - if (di->timestamped) - (void) fprintf(fp, "%10lld.%09lld\t", - (longlong_t)isb->zs_ctime[0], - (longlong_t)isb->zs_ctime[1]); - (void) fprintf(fp, "%c\t", ZDIFF_MODIFIED); - if (di->classify) { - print_what(fp, isb->zs_mode); - (void) fprintf(fp, "\t"); - } - print_cmn(fp, di, file); - (void) fprintf(fp, "\t(%+d)", delta); - (void) fprintf(fp, "\n"); -} - -static void -print_file(FILE *fp, differ_info_t *di, char type, const char *file, - zfs_stat_t *isb) -{ - if (di->timestamped) - (void) fprintf(fp, "%10lld.%09lld\t", - (longlong_t)isb->zs_ctime[0], - (longlong_t)isb->zs_ctime[1]); - (void) fprintf(fp, "%c\t", type); - if (di->classify) { - print_what(fp, isb->zs_mode); - (void) fprintf(fp, "\t"); - } - print_cmn(fp, di, file); - (void) fprintf(fp, "\n"); -} - -static int -write_inuse_diffs_one(FILE *fp, differ_info_t *di, uint64_t dobj) -{ - struct zfs_stat fsb, tsb; - mode_t fmode, tmode; - char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN]; - int fobjerr, tobjerr; - int change; - - if (dobj == di->shares) - return (0); - - /* - * Check the from and to snapshots for info on the object. If - * we get ENOENT, then the object just didn't exist in that - * snapshot. If we get ENOTSUP, then we tried to get - * info on a non-ZPL object, which we don't care about anyway. - */ - fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname, - MAXPATHLEN, &fsb); - if (fobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP) - return (-1); - - tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname, - MAXPATHLEN, &tsb); - if (tobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP) - return (-1); - - /* - * Unallocated object sharing the same meta dnode block - */ - if (fobjerr && tobjerr) { - ASSERT(di->zerr == ENOENT || di->zerr == ENOTSUP); - di->zerr = 0; - return (0); - } - - di->zerr = 0; /* negate get_stats_for_obj() from side that failed */ - fmode = fsb.zs_mode & S_IFMT; - tmode = tsb.zs_mode & S_IFMT; - if (fmode == S_IFDIR || tmode == S_IFDIR || fsb.zs_links == 0 || - tsb.zs_links == 0) - change = 0; - else - change = tsb.zs_links - fsb.zs_links; - - if (fobjerr) { - if (change) { - print_link_change(fp, di, change, tobjname, &tsb); - return (0); - } - print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb); - return (0); - } else if (tobjerr) { - if (change) { - print_link_change(fp, di, change, fobjname, &fsb); - return (0); - } - print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb); - return (0); - } - - if (fmode != tmode && fsb.zs_gen == tsb.zs_gen) - tsb.zs_gen++; /* Force a generational difference */ - - /* Simple modification or no change */ - if (fsb.zs_gen == tsb.zs_gen) { - /* No apparent changes. Could we assert !this? */ - if (fsb.zs_ctime[0] == tsb.zs_ctime[0] && - fsb.zs_ctime[1] == tsb.zs_ctime[1]) - return (0); - if (change) { - print_link_change(fp, di, change, - change > 0 ? fobjname : tobjname, &tsb); - } else if (strcmp(fobjname, tobjname) == 0) { - print_file(fp, di, ZDIFF_MODIFIED, fobjname, &tsb); - } else { - print_rename(fp, di, fobjname, tobjname, &tsb); - } - return (0); - } else { - /* file re-created or object re-used */ - print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb); - print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb); - return (0); - } -} - -static int -write_inuse_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) -{ - uint64_t o; - int err; - - for (o = dr->ddr_first; o <= dr->ddr_last; o++) { - if ((err = write_inuse_diffs_one(fp, di, o)) != 0) - return (err); - } - return (0); -} - -static int -describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf, - int maxlen) -{ - struct zfs_stat sb; - - if (get_stats_for_obj(di, di->fromsnap, object, namebuf, - maxlen, &sb) != 0) { - return (-1); - } - /* Don't print if in the delete queue on from side */ - if (di->zerr == ESTALE) { - di->zerr = 0; - return (0); - } - - print_file(fp, di, ZDIFF_REMOVED, namebuf, &sb); - return (0); -} - -static int -write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *lhdl = di->zhp->zfs_hdl; - char fobjname[MAXPATHLEN]; - - (void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name)); - zc.zc_obj = dr->ddr_first - 1; - - ASSERT(di->zerr == 0); - - while (zc.zc_obj < dr->ddr_last) { - int err; - - err = ioctl(lhdl->libzfs_fd, ZFS_IOC_NEXT_OBJ, &zc); - if (err == 0) { - if (zc.zc_obj == di->shares) { - zc.zc_obj++; - continue; - } - if (zc.zc_obj > dr->ddr_last) { - break; - } - err = describe_free(fp, di, zc.zc_obj, fobjname, - MAXPATHLEN); - if (err) - break; - } else if (errno == ESRCH) { - break; - } else { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "next allocated object (> %jd) find failure"), - (uintmax_t)zc.zc_obj); - di->zerr = errno; - break; - } - } - if (di->zerr) - return (-1); - return (0); -} - -static void * -differ(void *arg) -{ - differ_info_t *di = arg; - dmu_diff_record_t dr; - FILE *ofp; - int err = 0; - - if ((ofp = fdopen(di->outputfd, "w")) == NULL) { - di->zerr = errno; - (void) strerror_r(errno, di->errbuf, sizeof (di->errbuf)); - (void) close(di->datafd); - return ((void *)-1); - } - - for (;;) { - char *cp = (char *)&dr; - int len = sizeof (dr); - int rv; - - do { - rv = read(di->datafd, cp, len); - cp += rv; - len -= rv; - } while (len > 0 && rv > 0); - - if (rv < 0 || (rv == 0 && len != sizeof (dr))) { - di->zerr = EPIPE; - break; - } else if (rv == 0) { - /* end of file at a natural breaking point */ - break; - } - - switch (dr.ddr_type) { - case DDR_FREE: - err = write_free_diffs(ofp, di, &dr); - break; - case DDR_INUSE: - err = write_inuse_diffs(ofp, di, &dr); - break; - default: - di->zerr = EPIPE; - break; - } - - if (err || di->zerr) - break; - } - - (void) fclose(ofp); - (void) close(di->datafd); - if (err) - return ((void *)-1); - if (di->zerr) { - ASSERT(di->zerr == EPIPE); - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "Internal error: bad data from diff IOCTL")); - return ((void *)-1); - } - return ((void *)0); -} - -static int -find_shares_object(differ_info_t *di) -{ - char fullpath[MAXPATHLEN]; - struct stat64 sb = { 0 }; - - (void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN); - (void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN); - - if (stat64(fullpath, &sb) != 0) { -#ifdef illumos - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath); - return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf)); -#else - return (0); -#endif - } - - di->shares = (uint64_t)sb.st_ino; - return (0); -} - -static int -make_temp_snapshot(differ_info_t *di) -{ - libzfs_handle_t *hdl = di->zhp->zfs_hdl; - zfs_cmd_t zc = { 0 }; - - (void) snprintf(zc.zc_value, sizeof (zc.zc_value), - ZDIFF_PREFIX, getpid()); - (void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name)); - zc.zc_cleanup_fd = di->cleanupfd; - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) { - int err = errno; - if (err == EPERM) { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, "The diff delegated " - "permission is needed in order\nto create a " - "just-in-time snapshot for diffing\n")); - return (zfs_error(hdl, EZFS_DIFF, di->errbuf)); - } else { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, "Cannot create just-in-time " - "snapshot of '%s'"), zc.zc_name); - return (zfs_standard_error(hdl, err, di->errbuf)); - } - } - - di->tmpsnap = zfs_strdup(hdl, zc.zc_value); - di->tosnap = zfs_asprintf(hdl, "%s@%s", di->ds, di->tmpsnap); - return (0); -} - -static void -teardown_differ_info(differ_info_t *di) -{ - free(di->ds); - free(di->dsmnt); - free(di->fromsnap); - free(di->frommnt); - free(di->tosnap); - free(di->tmpsnap); - free(di->tomnt); - (void) close(di->cleanupfd); -} - -static int -get_snapshot_names(differ_info_t *di, const char *fromsnap, - const char *tosnap) -{ - libzfs_handle_t *hdl = di->zhp->zfs_hdl; - char *atptrf = NULL; - char *atptrt = NULL; - int fdslen, fsnlen; - int tdslen, tsnlen; - - /* - * Can accept - * dataset@snap1 - * dataset@snap1 dataset@snap2 - * dataset@snap1 @snap2 - * dataset@snap1 dataset - * @snap1 dataset@snap2 - */ - if (tosnap == NULL) { - /* only a from snapshot given, must be valid */ - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "Badly formed snapshot name %s"), fromsnap); - - if (!zfs_validate_name(hdl, fromsnap, ZFS_TYPE_SNAPSHOT, - B_FALSE)) { - return (zfs_error(hdl, EZFS_INVALIDNAME, - di->errbuf)); - } - - atptrf = strchr(fromsnap, '@'); - ASSERT(atptrf != NULL); - fdslen = atptrf - fromsnap; - - di->fromsnap = zfs_strdup(hdl, fromsnap); - di->ds = zfs_strdup(hdl, fromsnap); - di->ds[fdslen] = '\0'; - - /* the to snap will be a just-in-time snap of the head */ - return (make_temp_snapshot(di)); - } - - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "Unable to determine which snapshots to compare")); - - atptrf = strchr(fromsnap, '@'); - atptrt = strchr(tosnap, '@'); - fdslen = atptrf ? atptrf - fromsnap : strlen(fromsnap); - tdslen = atptrt ? atptrt - tosnap : strlen(tosnap); - fsnlen = strlen(fromsnap) - fdslen; /* includes @ sign */ - tsnlen = strlen(tosnap) - tdslen; /* includes @ sign */ - - if (fsnlen <= 1 || tsnlen == 1 || (fdslen == 0 && tdslen == 0) || - (fsnlen == 0 && tsnlen == 0)) { - return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); - } else if ((fdslen > 0 && tdslen > 0) && - ((tdslen != fdslen || strncmp(fromsnap, tosnap, fdslen) != 0))) { - /* - * not the same dataset name, might be okay if - * tosnap is a clone of a fromsnap descendant. - */ - char origin[ZFS_MAX_DATASET_NAME_LEN]; - zprop_source_t src; - zfs_handle_t *zhp; - - di->ds = zfs_alloc(di->zhp->zfs_hdl, tdslen + 1); - (void) strncpy(di->ds, tosnap, tdslen); - di->ds[tdslen] = '\0'; - - zhp = zfs_open(hdl, di->ds, ZFS_TYPE_FILESYSTEM); - while (zhp != NULL) { - if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, - sizeof (origin), &src, NULL, 0, B_FALSE) != 0) { - (void) zfs_close(zhp); - zhp = NULL; - break; - } - if (strncmp(origin, fromsnap, fsnlen) == 0) - break; - - (void) zfs_close(zhp); - zhp = zfs_open(hdl, origin, ZFS_TYPE_FILESYSTEM); - } - - if (zhp == NULL) { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "Not an earlier snapshot from the same fs")); - return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); - } else { - (void) zfs_close(zhp); - } - - di->isclone = B_TRUE; - di->fromsnap = zfs_strdup(hdl, fromsnap); - if (tsnlen) { - di->tosnap = zfs_strdup(hdl, tosnap); - } else { - return (make_temp_snapshot(di)); - } - } else { - int dslen = fdslen ? fdslen : tdslen; - - di->ds = zfs_alloc(hdl, dslen + 1); - (void) strncpy(di->ds, fdslen ? fromsnap : tosnap, dslen); - di->ds[dslen] = '\0'; - - di->fromsnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrf); - if (tsnlen) { - di->tosnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrt); - } else { - return (make_temp_snapshot(di)); - } - } - return (0); -} - -static int -get_mountpoint(differ_info_t *di, char *dsnm, char **mntpt) -{ - boolean_t mounted; - - mounted = is_mounted(di->zhp->zfs_hdl, dsnm, mntpt); - if (mounted == B_FALSE) { - (void) snprintf(di->errbuf, sizeof (di->errbuf), - dgettext(TEXT_DOMAIN, - "Cannot diff an unmounted snapshot")); - return (zfs_error(di->zhp->zfs_hdl, EZFS_BADTYPE, di->errbuf)); - } - - /* Avoid a double slash at the beginning of root-mounted datasets */ - if (**mntpt == '/' && *(*mntpt + 1) == '\0') - **mntpt = '\0'; - return (0); -} - -static int -get_mountpoints(differ_info_t *di) -{ - char *strptr; - char *frommntpt; - - /* - * first get the mountpoint for the parent dataset - */ - if (get_mountpoint(di, di->ds, &di->dsmnt) != 0) - return (-1); - - strptr = strchr(di->tosnap, '@'); - ASSERT3P(strptr, !=, NULL); - di->tomnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", di->dsmnt, - ZDIFF_SNAPDIR, ++strptr); - - strptr = strchr(di->fromsnap, '@'); - ASSERT3P(strptr, !=, NULL); - - frommntpt = di->dsmnt; - if (di->isclone) { - char *mntpt; - int err; - - *strptr = '\0'; - err = get_mountpoint(di, di->fromsnap, &mntpt); - *strptr = '@'; - if (err != 0) - return (-1); - frommntpt = mntpt; - } - - di->frommnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", frommntpt, - ZDIFF_SNAPDIR, ++strptr); - - if (di->isclone) - free(frommntpt); - - return (0); -} - -static int -setup_differ_info(zfs_handle_t *zhp, const char *fromsnap, - const char *tosnap, differ_info_t *di) -{ - di->zhp = zhp; - - di->cleanupfd = open(ZFS_DEV, O_RDWR|O_EXCL); - VERIFY(di->cleanupfd >= 0); - - if (get_snapshot_names(di, fromsnap, tosnap) != 0) - return (-1); - - if (get_mountpoints(di) != 0) - return (-1); - - if (find_shares_object(di) != 0) - return (-1); - - return (0); -} - -int -zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap, - const char *tosnap, int flags) -{ - zfs_cmd_t zc = { 0 }; - char errbuf[1024]; - differ_info_t di = { 0 }; - pthread_t tid; - int pipefd[2]; - int iocerr; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "zfs diff failed")); - - if (setup_differ_info(zhp, fromsnap, tosnap, &di)) { - teardown_differ_info(&di); - return (-1); - } - - if (pipe(pipefd)) { - zfs_error_aux(zhp->zfs_hdl, strerror(errno)); - teardown_differ_info(&di); - return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf)); - } - - di.scripted = (flags & ZFS_DIFF_PARSEABLE); - di.classify = (flags & ZFS_DIFF_CLASSIFY); - di.timestamped = (flags & ZFS_DIFF_TIMESTAMP); - - di.outputfd = outfd; - di.datafd = pipefd[0]; - - if (pthread_create(&tid, NULL, differ, &di)) { - zfs_error_aux(zhp->zfs_hdl, strerror(errno)); - (void) close(pipefd[0]); - (void) close(pipefd[1]); - teardown_differ_info(&di); - return (zfs_error(zhp->zfs_hdl, - EZFS_THREADCREATEFAILED, errbuf)); - } - - /* do the ioctl() */ - (void) strlcpy(zc.zc_value, di.fromsnap, strlen(di.fromsnap) + 1); - (void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1); - zc.zc_cookie = pipefd[1]; - - iocerr = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DIFF, &zc); - if (iocerr != 0) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "Unable to obtain diffs")); - if (errno == EPERM) { - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "\n The sys_mount privilege or diff delegated " - "permission is needed\n to execute the " - "diff ioctl")); - } else if (errno == EXDEV) { - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "\n Not an earlier snapshot from the same fs")); - } else if (errno != EPIPE || di.zerr == 0) { - zfs_error_aux(zhp->zfs_hdl, strerror(errno)); - } - (void) close(pipefd[1]); - (void) pthread_cancel(tid); - (void) pthread_join(tid, NULL); - teardown_differ_info(&di); - if (di.zerr != 0 && di.zerr != EPIPE) { - zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr)); - return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf)); - } else { - return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf)); - } - } - - (void) close(pipefd[1]); - (void) pthread_join(tid, NULL); - - if (di.zerr != 0) { - zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr)); - return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf)); - } - teardown_differ_info(&di); - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c deleted file mode 100644 index 474470c416ea..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <dlfcn.h> -#include <errno.h> -#include <libintl.h> -#include <link.h> -#include <pthread.h> -#include <strings.h> -#include <unistd.h> - -#include <libzfs.h> - -#include <fm/libtopo.h> -#include <sys/fm/protocol.h> -#include <sys/systeminfo.h> - -#include "libzfs_impl.h" - -/* - * This file is responsible for determining the relationship between I/O - * devices paths and physical locations. In the world of MPxIO and external - * enclosures, the device path is not synonymous with the physical location. - * If you remove a drive and insert it into a different slot, it will end up - * with the same path under MPxIO. If you recable storage enclosures, the - * device paths may change. All of this makes it difficult to implement the - * 'autoreplace' property, which is supposed to automatically manage disk - * replacement based on physical slot. - * - * In order to work around these limitations, we have a per-vdev FRU property - * that is the libtopo path (minus disk-specific authority information) to the - * physical location of the device on the system. This is an optional - * property, and is only needed when using the 'autoreplace' property or when - * generating FMA faults against vdevs. - */ - -/* - * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case - * it is not present. We only need this once per library instance, so it is - * not part of the libzfs handle. - */ -static void *_topo_dlhandle; -static topo_hdl_t *(*_topo_open)(int, const char *, int *); -static void (*_topo_close)(topo_hdl_t *); -static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *); -static void (*_topo_snap_release)(topo_hdl_t *); -static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *, - topo_walk_cb_t, void *, int *); -static int (*_topo_walk_step)(topo_walk_t *, int); -static void (*_topo_walk_fini)(topo_walk_t *); -static void (*_topo_hdl_strfree)(topo_hdl_t *, char *); -static char *(*_topo_node_name)(tnode_t *); -static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *, - char **, int *); -static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *); -static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *); -static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *, - const char *); - -#define ZFS_FRU_HASH_SIZE 257 - -static size_t -fru_strhash(const char *key) -{ - ulong_t g, h = 0; - const char *p; - - for (p = key; *p != '\0'; p++) { - h = (h << 4) + *p; - - if ((g = (h & 0xf0000000)) != 0) { - h ^= (g >> 24); - h ^= g; - } - } - - return (h % ZFS_FRU_HASH_SIZE); -} - -static int -libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg) -{ - libzfs_handle_t *hdl = arg; - nvlist_t *fru; - char *devpath, *frustr; - int err; - libzfs_fru_t *frup; - size_t idx; - - /* - * If this is the chassis node, and we don't yet have the system - * chassis ID, then fill in this value now. - */ - if (hdl->libzfs_chassis_id[0] == '\0' && - strcmp(_topo_node_name(tn), "chassis") == 0) { - if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY, - FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0) - (void) strlcpy(hdl->libzfs_chassis_id, devpath, - sizeof (hdl->libzfs_chassis_id)); - } - - /* - * Skip non-disk nodes. - */ - if (strcmp(_topo_node_name(tn), "disk") != 0) - return (TOPO_WALK_NEXT); - - /* - * Get the devfs path and FRU. - */ - if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0) - return (TOPO_WALK_NEXT); - - if (libzfs_fru_lookup(hdl, devpath) != NULL) { - _topo_hdl_strfree(thp, devpath); - return (TOPO_WALK_NEXT); - } - - if (_topo_node_fru(tn, &fru, NULL, &err) != 0) { - _topo_hdl_strfree(thp, devpath); - return (TOPO_WALK_NEXT); - } - - /* - * Convert the FRU into a string. - */ - if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) { - nvlist_free(fru); - _topo_hdl_strfree(thp, devpath); - return (TOPO_WALK_NEXT); - } - - nvlist_free(fru); - - /* - * Finally, we have a FRU string and device path. Add it to the hash. - */ - if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) { - _topo_hdl_strfree(thp, devpath); - _topo_hdl_strfree(thp, frustr); - return (TOPO_WALK_NEXT); - } - - if ((frup->zf_device = strdup(devpath)) == NULL || - (frup->zf_fru = strdup(frustr)) == NULL) { - free(frup->zf_device); - free(frup); - _topo_hdl_strfree(thp, devpath); - _topo_hdl_strfree(thp, frustr); - return (TOPO_WALK_NEXT); - } - - _topo_hdl_strfree(thp, devpath); - _topo_hdl_strfree(thp, frustr); - - idx = fru_strhash(frup->zf_device); - frup->zf_chain = hdl->libzfs_fru_hash[idx]; - hdl->libzfs_fru_hash[idx] = frup; - frup->zf_next = hdl->libzfs_fru_list; - hdl->libzfs_fru_list = frup; - - return (TOPO_WALK_NEXT); -} - -/* - * Called during initialization to setup the dynamic libtopo connection. - */ -#pragma init(libzfs_init_fru) -static void -libzfs_init_fru(void) -{ - char path[MAXPATHLEN]; - char isa[257]; - -#if defined(_LP64) - if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0) - isa[0] = '\0'; -#else - isa[0] = '\0'; -#endif - (void) snprintf(path, sizeof (path), - "/usr/lib/fm/%s/libtopo.so", isa); - - if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL) - return; - - _topo_open = (topo_hdl_t *(*)()) - dlsym(_topo_dlhandle, "topo_open"); - _topo_close = (void (*)()) - dlsym(_topo_dlhandle, "topo_close"); - _topo_snap_hold = (char *(*)()) - dlsym(_topo_dlhandle, "topo_snap_hold"); - _topo_snap_release = (void (*)()) - dlsym(_topo_dlhandle, "topo_snap_release"); - _topo_walk_init = (topo_walk_t *(*)()) - dlsym(_topo_dlhandle, "topo_walk_init"); - _topo_walk_step = (int (*)()) - dlsym(_topo_dlhandle, "topo_walk_step"); - _topo_walk_fini = (void (*)()) - dlsym(_topo_dlhandle, "topo_walk_fini"); - _topo_hdl_strfree = (void (*)()) - dlsym(_topo_dlhandle, "topo_hdl_strfree"); - _topo_node_name = (char *(*)()) - dlsym(_topo_dlhandle, "topo_node_name"); - _topo_prop_get_string = (int (*)()) - dlsym(_topo_dlhandle, "topo_prop_get_string"); - _topo_node_fru = (int (*)()) - dlsym(_topo_dlhandle, "topo_node_fru"); - _topo_fmri_nvl2str = (int (*)()) - dlsym(_topo_dlhandle, "topo_fmri_nvl2str"); - _topo_fmri_strcmp_noauth = (int (*)()) - dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth"); - - if (_topo_open == NULL || _topo_close == NULL || - _topo_snap_hold == NULL || _topo_snap_release == NULL || - _topo_walk_init == NULL || _topo_walk_step == NULL || - _topo_walk_fini == NULL || _topo_hdl_strfree == NULL || - _topo_node_name == NULL || _topo_prop_get_string == NULL || - _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL || - _topo_fmri_strcmp_noauth == NULL) { - (void) dlclose(_topo_dlhandle); - _topo_dlhandle = NULL; - } -} - -/* - * Refresh the mappings from device path -> FMRI. We do this by walking the - * hc topology looking for disk nodes, and recording the io/devfs-path and FRU. - * Note that we strip out the disk-specific authority information (serial, - * part, revision, etc) so that we are left with only the identifying - * characteristics of the slot (hc path and chassis-id). - */ -void -libzfs_fru_refresh(libzfs_handle_t *hdl) -{ - int err; - char *uuid; - topo_hdl_t *thp; - topo_walk_t *twp; - - if (_topo_dlhandle == NULL) - return; - - /* - * Clear the FRU hash and initialize our basic structures. - */ - libzfs_fru_clear(hdl, B_FALSE); - - if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION, - NULL, &err)) == NULL) - return; - - thp = hdl->libzfs_topo_hdl; - - if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL) - return; - - _topo_hdl_strfree(thp, uuid); - - if (hdl->libzfs_fru_hash == NULL && - (hdl->libzfs_fru_hash = - calloc(ZFS_FRU_HASH_SIZE, sizeof (void *))) == NULL) - return; - - /* - * We now have a topo snapshot, so iterate over the hc topology looking - * for disks to add to the hash. - */ - twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC, - libzfs_fru_gather, hdl, &err); - if (twp != NULL) { - (void) _topo_walk_step(twp, TOPO_WALK_CHILD); - _topo_walk_fini(twp); - } -} - -/* - * Given a devfs path, return the FRU for the device, if known. This will - * automatically call libzfs_fru_refresh() if it hasn't already been called by - * the consumer. The string returned is valid until the next call to - * libzfs_fru_refresh(). - */ -const char * -libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath) -{ - size_t idx = fru_strhash(devpath); - libzfs_fru_t *frup; - - if (hdl->libzfs_fru_hash == NULL) - libzfs_fru_refresh(hdl); - - if (hdl->libzfs_fru_hash == NULL) - return (NULL); - - for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; - frup = frup->zf_chain) { - if (strcmp(devpath, frup->zf_device) == 0) - return (frup->zf_fru); - } - - return (NULL); -} - -/* - * Given a fru path, return the device path. This will automatically call - * libzfs_fru_refresh() if it hasn't already been called by the consumer. The - * string returned is valid until the next call to libzfs_fru_refresh(). - */ -const char * -libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru) -{ - libzfs_fru_t *frup; - size_t idx; - - if (hdl->libzfs_fru_hash == NULL) - libzfs_fru_refresh(hdl); - - if (hdl->libzfs_fru_hash == NULL) - return (NULL); - - for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) { - for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; - frup = frup->zf_next) { - if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, - fru, frup->zf_fru)) - return (frup->zf_device); - } - } - - return (NULL); -} - -/* - * Change the stored FRU for the given vdev. - */ -int -zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru) -{ - zfs_cmd_t zc = { 0 }; - - (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value)); - zc.zc_guid = vdev_guid; - - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0) - return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, - dgettext(TEXT_DOMAIN, "cannot set FRU"))); - - return (0); -} - -/* - * Compare to two FRUs, ignoring any authority information. - */ -boolean_t -libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b) -{ - if (hdl->libzfs_fru_hash == NULL) - libzfs_fru_refresh(hdl); - - if (hdl->libzfs_fru_hash == NULL) - return (strcmp(a, b) == 0); - - return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b)); -} - -/* - * This special function checks to see whether the FRU indicates it's supposed - * to be in the system chassis, but the chassis-id doesn't match. This can - * happen in a clustered case, where both head nodes have the same logical - * disk, but opening the device on the other head node is meaningless. - */ -boolean_t -libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru) -{ - const char *chassisid; - size_t len; - - if (hdl->libzfs_fru_hash == NULL) - libzfs_fru_refresh(hdl); - - if (hdl->libzfs_chassis_id[0] == '\0') - return (B_FALSE); - - if (strstr(fru, "/chassis=0/") == NULL) - return (B_FALSE); - - if ((chassisid = strstr(fru, ":chassis-id=")) == NULL) - return (B_FALSE); - - chassisid += 12; - len = strlen(hdl->libzfs_chassis_id); - if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 && - (chassisid[len] == '/' || chassisid[len] == ':')) - return (B_FALSE); - - return (B_TRUE); -} - -/* - * Clear memory associated with the FRU hash. - */ -void -libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final) -{ - libzfs_fru_t *frup; - - while ((frup = hdl->libzfs_fru_list) != NULL) { - hdl->libzfs_fru_list = frup->zf_next; - free(frup->zf_device); - free(frup->zf_fru); - free(frup); - } - - hdl->libzfs_fru_list = NULL; - - if (hdl->libzfs_topo_hdl != NULL) { - _topo_snap_release(hdl->libzfs_topo_hdl); - _topo_close(hdl->libzfs_topo_hdl); - hdl->libzfs_topo_hdl = NULL; - } - - if (final) { - free(hdl->libzfs_fru_hash); - } else if (hdl->libzfs_fru_hash != NULL) { - bzero(hdl->libzfs_fru_hash, - ZFS_FRU_HASH_SIZE * sizeof (void *)); - } -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h deleted file mode 100644 index a0338afadb8f..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * CDDL HEADER SART - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. - * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - */ - -#ifndef _LIBZFS_IMPL_H -#define _LIBZFS_IMPL_H - -#include <sys/fs/zfs.h> -#include <sys/spa.h> -#include <sys/nvpair.h> -#include <sys/dmu.h> -#include <sys/zfs_ioctl.h> - -#include <libshare.h> -#include <libuutil.h> -#include <libzfs.h> -#include <libzfs_core.h> -#include <libzfs_compat.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef VERIFY -#undef VERIFY -#endif -#define VERIFY verify - -typedef struct libzfs_fru { - char *zf_device; - char *zf_fru; - struct libzfs_fru *zf_chain; - struct libzfs_fru *zf_next; -} libzfs_fru_t; - -struct libzfs_handle { - int libzfs_error; - int libzfs_fd; - FILE *libzfs_mnttab; - FILE *libzfs_sharetab; - zpool_handle_t *libzfs_pool_handles; - uu_avl_pool_t *libzfs_ns_avlpool; - uu_avl_t *libzfs_ns_avl; - uint64_t libzfs_ns_gen; - int libzfs_desc_active; - char libzfs_action[1024]; - char libzfs_desc[1024]; - int libzfs_printerr; - int libzfs_storeerr; /* stuff error messages into buffer */ - void *libzfs_sharehdl; /* libshare handle */ - boolean_t libzfs_mnttab_enable; - /* - * We need a lock to handle the case where parallel mount - * threads are populating the mnttab cache simultaneously. The - * lock only protects the integrity of the avl tree, and does - * not protect the contents of the mnttab entries themselves. - */ - pthread_mutex_t libzfs_mnttab_cache_lock; - avl_tree_t libzfs_mnttab_cache; - int libzfs_pool_iter; - libzfs_fru_t **libzfs_fru_hash; - libzfs_fru_t *libzfs_fru_list; - char libzfs_chassis_id[256]; - boolean_t libzfs_prop_debug; -}; - -struct zfs_handle { - libzfs_handle_t *zfs_hdl; - zpool_handle_t *zpool_hdl; - char zfs_name[ZFS_MAX_DATASET_NAME_LEN]; - zfs_type_t zfs_type; /* type including snapshot */ - zfs_type_t zfs_head_type; /* type excluding snapshot */ - dmu_objset_stats_t zfs_dmustats; - nvlist_t *zfs_props; - nvlist_t *zfs_user_props; - nvlist_t *zfs_recvd_props; - boolean_t zfs_mntcheck; - char *zfs_mntopts; - uint8_t *zfs_props_table; -}; - -/* - * This is different from checking zfs_type, because it will also catch - * snapshots of volumes. - */ -#define ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME) - -struct zpool_handle { - libzfs_handle_t *zpool_hdl; - zpool_handle_t *zpool_next; - char zpool_name[ZFS_MAX_DATASET_NAME_LEN]; - int zpool_state; - size_t zpool_config_size; - nvlist_t *zpool_config; - nvlist_t *zpool_old_config; - nvlist_t *zpool_props; - diskaddr_t zpool_start_block; -}; - -typedef enum { - PROTO_NFS = 0, - PROTO_SMB = 1, - PROTO_END = 2 -} zfs_share_proto_t; - -/* - * The following can be used as a bitmask and any new values - * added must preserve that capability. - */ -typedef enum { - SHARED_NOT_SHARED = 0x0, - SHARED_NFS = 0x2, - SHARED_SMB = 0x4 -} zfs_share_type_t; - -#define CONFIG_BUF_MINSIZE 262144 - -int zfs_error(libzfs_handle_t *, int, const char *); -int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...); -void zfs_error_aux(libzfs_handle_t *, const char *, ...); -void *zfs_alloc(libzfs_handle_t *, size_t); -void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t); -char *zfs_asprintf(libzfs_handle_t *, const char *, ...); -char *zfs_strdup(libzfs_handle_t *, const char *); -int no_memory(libzfs_handle_t *); - -int zfs_standard_error(libzfs_handle_t *, int, const char *); -int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...); -int zpool_standard_error(libzfs_handle_t *, int, const char *); -int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...); - -int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***, - size_t *); -zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *); -zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *); - -int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t, - nvlist_t *, char **, uint64_t *, const char *); -int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, - zfs_type_t type); - -/* - * Use this changelist_gather() flag to force attempting mounts - * on each change node regardless of whether or not it is currently - * mounted. - */ -#define CL_GATHER_MOUNT_ALWAYS 0x01 -/* - * Use this changelist_gather() flag to prevent unmounting of file systems. - */ -#define CL_GATHER_DONT_UNMOUNT 0x02 - -typedef struct prop_changelist prop_changelist_t; - -int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t); -int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *); -int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *); -int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *); -int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **); -void zcmd_free_nvlists(zfs_cmd_t *); - -int changelist_prefix(prop_changelist_t *); -int changelist_postfix(prop_changelist_t *); -void changelist_rename(prop_changelist_t *, const char *, const char *); -void changelist_remove(prop_changelist_t *, const char *); -void changelist_free(prop_changelist_t *); -prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int); -int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *); -int changelist_haszonedchild(prop_changelist_t *); - -void remove_mountpoint(zfs_handle_t *); -int create_parents(libzfs_handle_t *, char *, int); -boolean_t isa_child_of(const char *dataset, const char *parent); - -zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); -zfs_handle_t *make_bookmark_handle(zfs_handle_t *, const char *, - nvlist_t *props); - -int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **); - -boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *); - -int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, - boolean_t modifying); - -void namespace_clear(libzfs_handle_t *); - -/* - * libshare (sharemgr) interfaces used internally. - */ - -extern int zfs_init_libshare(libzfs_handle_t *, int); -extern int zfs_parse_options(char *, zfs_share_proto_t); - -extern int zfs_unshare_proto(zfs_handle_t *, - const char *, zfs_share_proto_t *); - -extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBZFS_IMPL_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c deleted file mode 100644 index 87c8dd14898b..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c +++ /dev/null @@ -1,1929 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright 2015 RackTop Systems. - * Copyright 2016 Nexenta Systems, Inc. - */ - -/* - * Pool import support functions. - * - * To import a pool, we rely on reading the configuration information from the - * ZFS label of each device. If we successfully read the label, then we - * organize the configuration information in the following hierarchy: - * - * pool guid -> toplevel vdev guid -> label txg - * - * Duplicate entries matching this same tuple will be discarded. Once we have - * examined every device, we pick the best label txg config for each toplevel - * vdev. We then arrange these toplevel vdevs into a complete pool config, and - * update any paths that have changed. Finally, we attempt to import the pool - * using our derived config, and record the results. - */ - -#include <aio.h> -#include <ctype.h> -#include <devid.h> -#include <dirent.h> -#include <errno.h> -#include <libintl.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> -#include <thread_pool.h> -#include <libgeom.h> - -#include <sys/vdev_impl.h> - -#include "libzfs.h" -#include "libzfs_impl.h" - -/* - * Intermediate structures used to gather configuration information. - */ -typedef struct config_entry { - uint64_t ce_txg; - nvlist_t *ce_config; - struct config_entry *ce_next; -} config_entry_t; - -typedef struct vdev_entry { - uint64_t ve_guid; - config_entry_t *ve_configs; - struct vdev_entry *ve_next; -} vdev_entry_t; - -typedef struct pool_entry { - uint64_t pe_guid; - vdev_entry_t *pe_vdevs; - struct pool_entry *pe_next; -} pool_entry_t; - -typedef struct name_entry { - char *ne_name; - uint64_t ne_guid; - struct name_entry *ne_next; -} name_entry_t; - -typedef struct pool_list { - pool_entry_t *pools; - name_entry_t *names; -} pool_list_t; - -static char * -get_devid(const char *path) -{ -#ifdef have_devid - int fd; - ddi_devid_t devid; - char *minor, *ret; - - if ((fd = open(path, O_RDONLY)) < 0) - return (NULL); - - minor = NULL; - ret = NULL; - if (devid_get(fd, &devid) == 0) { - if (devid_get_minor_name(fd, &minor) == 0) - ret = devid_str_encode(devid, minor); - if (minor != NULL) - devid_str_free(minor); - devid_free(devid); - } - (void) close(fd); - - return (ret); -#else - return (NULL); -#endif -} - - -/* - * Go through and fix up any path and/or devid information for the given vdev - * configuration. - */ -static int -fix_paths(nvlist_t *nv, name_entry_t *names) -{ - nvlist_t **child; - uint_t c, children; - uint64_t guid; - name_entry_t *ne, *best; - char *path, *devid; - int matched; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if (fix_paths(child[c], names) != 0) - return (-1); - return (0); - } - - /* - * This is a leaf (file or disk) vdev. In either case, go through - * the name list and see if we find a matching guid. If so, replace - * the path and see if we can calculate a new devid. - * - * There may be multiple names associated with a particular guid, in - * which case we have overlapping slices or multiple paths to the same - * disk. If this is the case, then we want to pick the path that is - * the most similar to the original, where "most similar" is the number - * of matching characters starting from the end of the path. This will - * preserve slice numbers even if the disks have been reorganized, and - * will also catch preferred disk names if multiple paths exist. - */ - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); - if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) - path = NULL; - - matched = 0; - best = NULL; - for (ne = names; ne != NULL; ne = ne->ne_next) { - if (ne->ne_guid == guid) { - const char *src, *dst; - int count; - - if (path == NULL) { - best = ne; - break; - } - - src = ne->ne_name + strlen(ne->ne_name) - 1; - dst = path + strlen(path) - 1; - for (count = 0; src >= ne->ne_name && dst >= path; - src--, dst--, count++) - if (*src != *dst) - break; - - /* - * At this point, 'count' is the number of characters - * matched from the end. - */ - if (count > matched || best == NULL) { - best = ne; - matched = count; - } - } - } - - if (best == NULL) - return (0); - - if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) - return (-1); - - if ((devid = get_devid(best->ne_name)) == NULL) { - (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); - } else { - if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) { - devid_str_free(devid); - return (-1); - } - devid_str_free(devid); - } - - return (0); -} - -/* - * Add the given configuration to the list of known devices. - */ -static int -add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, - nvlist_t *config) -{ - uint64_t pool_guid, vdev_guid, top_guid, txg, state; - pool_entry_t *pe; - vdev_entry_t *ve; - config_entry_t *ce; - name_entry_t *ne; - - /* - * If this is a hot spare not currently in use or level 2 cache - * device, add it to the list of names to translate, but don't do - * anything else. - */ - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, - &state) == 0 && - (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && - nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { - if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) - return (-1); - - if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { - free(ne); - return (-1); - } - - ne->ne_guid = vdev_guid; - ne->ne_next = pl->names; - pl->names = ne; - - return (0); - } - - /* - * If we have a valid config but cannot read any of these fields, then - * it means we have a half-initialized label. In vdev_label_init() - * we write a label with txg == 0 so that we can identify the device - * in case the user refers to the same disk later on. If we fail to - * create the pool, we'll be left with a label in this state - * which should not be considered part of a valid pool. - */ - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &pool_guid) != 0 || - nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, - &vdev_guid) != 0 || - nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, - &top_guid) != 0 || - nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, - &txg) != 0 || txg == 0) { - return (0); - } - - /* - * First, see if we know about this pool. If not, then add it to the - * list of known pools. - */ - for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { - if (pe->pe_guid == pool_guid) - break; - } - - if (pe == NULL) { - if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { - return (-1); - } - pe->pe_guid = pool_guid; - pe->pe_next = pl->pools; - pl->pools = pe; - } - - /* - * Second, see if we know about this toplevel vdev. Add it if its - * missing. - */ - for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { - if (ve->ve_guid == top_guid) - break; - } - - if (ve == NULL) { - if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { - return (-1); - } - ve->ve_guid = top_guid; - ve->ve_next = pe->pe_vdevs; - pe->pe_vdevs = ve; - } - - /* - * Third, see if we have a config with a matching transaction group. If - * so, then we do nothing. Otherwise, add it to the list of known - * configs. - */ - for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { - if (ce->ce_txg == txg) - break; - } - - if (ce == NULL) { - if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { - return (-1); - } - ce->ce_txg = txg; - ce->ce_config = fnvlist_dup(config); - ce->ce_next = ve->ve_configs; - ve->ve_configs = ce; - } - - /* - * At this point we've successfully added our config to the list of - * known configs. The last thing to do is add the vdev guid -> path - * mappings so that we can fix up the configuration as necessary before - * doing the import. - */ - if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) - return (-1); - - if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { - free(ne); - return (-1); - } - - ne->ne_guid = vdev_guid; - ne->ne_next = pl->names; - pl->names = ne; - - return (0); -} - -/* - * Returns true if the named pool matches the given GUID. - */ -static int -pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, - boolean_t *isactive) -{ - zpool_handle_t *zhp; - uint64_t theguid; - - if (zpool_open_silent(hdl, name, &zhp) != 0) - return (-1); - - if (zhp == NULL) { - *isactive = B_FALSE; - return (0); - } - - verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, - &theguid) == 0); - - zpool_close(zhp); - - *isactive = (theguid == guid); - return (0); -} - -static nvlist_t * -refresh_config(libzfs_handle_t *hdl, nvlist_t *config) -{ - nvlist_t *nvl; - zfs_cmd_t zc = { 0 }; - int err, dstbuf_size; - - if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) - return (NULL); - - dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 4); - - if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) { - zcmd_free_nvlists(&zc); - return (NULL); - } - - while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, - &zc)) != 0 && errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - zcmd_free_nvlists(&zc); - return (NULL); - } - } - - if (err) { - zcmd_free_nvlists(&zc); - return (NULL); - } - - if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { - zcmd_free_nvlists(&zc); - return (NULL); - } - - zcmd_free_nvlists(&zc); - return (nvl); -} - -/* - * Determine if the vdev id is a hole in the namespace. - */ -boolean_t -vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) -{ - for (int c = 0; c < holes; c++) { - - /* Top-level is a hole */ - if (hole_array[c] == id) - return (B_TRUE); - } - return (B_FALSE); -} - -/* - * Convert our list of pools into the definitive set of configurations. We - * start by picking the best config for each toplevel vdev. Once that's done, - * we assemble the toplevel vdevs into a full config for the pool. We make a - * pass to fix up any incorrect paths, and then add it to the main list to - * return to the user. - */ -static nvlist_t * -get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, - nvlist_t *policy) -{ - pool_entry_t *pe; - vdev_entry_t *ve; - config_entry_t *ce; - nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot; - nvlist_t **spares, **l2cache; - uint_t i, nspares, nl2cache; - boolean_t config_seen; - uint64_t best_txg; - char *name, *hostname = NULL; - uint64_t guid; - uint_t children = 0; - nvlist_t **child = NULL; - uint_t holes; - uint64_t *hole_array, max_id; - uint_t c; - boolean_t isactive; - uint64_t hostid; - nvlist_t *nvl; - boolean_t found_one = B_FALSE; - boolean_t valid_top_config = B_FALSE; - - if (nvlist_alloc(&ret, 0, 0) != 0) - goto nomem; - - for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { - uint64_t id, max_txg = 0; - - if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) - goto nomem; - config_seen = B_FALSE; - - /* - * Iterate over all toplevel vdevs. Grab the pool configuration - * from the first one we find, and then go through the rest and - * add them as necessary to the 'vdevs' member of the config. - */ - for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { - - /* - * Determine the best configuration for this vdev by - * selecting the config with the latest transaction - * group. - */ - best_txg = 0; - for (ce = ve->ve_configs; ce != NULL; - ce = ce->ce_next) { - - if (ce->ce_txg > best_txg) { - tmp = ce->ce_config; - best_txg = ce->ce_txg; - } - } - - /* - * We rely on the fact that the max txg for the - * pool will contain the most up-to-date information - * about the valid top-levels in the vdev namespace. - */ - if (best_txg > max_txg) { - (void) nvlist_remove(config, - ZPOOL_CONFIG_VDEV_CHILDREN, - DATA_TYPE_UINT64); - (void) nvlist_remove(config, - ZPOOL_CONFIG_HOLE_ARRAY, - DATA_TYPE_UINT64_ARRAY); - - max_txg = best_txg; - hole_array = NULL; - holes = 0; - max_id = 0; - valid_top_config = B_FALSE; - - if (nvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { - verify(nvlist_add_uint64(config, - ZPOOL_CONFIG_VDEV_CHILDREN, - max_id) == 0); - valid_top_config = B_TRUE; - } - - if (nvlist_lookup_uint64_array(tmp, - ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, - &holes) == 0) { - verify(nvlist_add_uint64_array(config, - ZPOOL_CONFIG_HOLE_ARRAY, - hole_array, holes) == 0); - } - } - - if (!config_seen) { - /* - * Copy the relevant pieces of data to the pool - * configuration: - * - * version - * pool guid - * name - * comment (if available) - * pool state - * hostid (if available) - * hostname (if available) - */ - uint64_t state, version; - char *comment = NULL; - - version = fnvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_VERSION); - fnvlist_add_uint64(config, - ZPOOL_CONFIG_VERSION, version); - guid = fnvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_POOL_GUID); - fnvlist_add_uint64(config, - ZPOOL_CONFIG_POOL_GUID, guid); - name = fnvlist_lookup_string(tmp, - ZPOOL_CONFIG_POOL_NAME); - fnvlist_add_string(config, - ZPOOL_CONFIG_POOL_NAME, name); - - if (nvlist_lookup_string(tmp, - ZPOOL_CONFIG_COMMENT, &comment) == 0) - fnvlist_add_string(config, - ZPOOL_CONFIG_COMMENT, comment); - - state = fnvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_POOL_STATE); - fnvlist_add_uint64(config, - ZPOOL_CONFIG_POOL_STATE, state); - - hostid = 0; - if (nvlist_lookup_uint64(tmp, - ZPOOL_CONFIG_HOSTID, &hostid) == 0) { - fnvlist_add_uint64(config, - ZPOOL_CONFIG_HOSTID, hostid); - hostname = fnvlist_lookup_string(tmp, - ZPOOL_CONFIG_HOSTNAME); - fnvlist_add_string(config, - ZPOOL_CONFIG_HOSTNAME, hostname); - } - - config_seen = B_TRUE; - } - - /* - * Add this top-level vdev to the child array. - */ - verify(nvlist_lookup_nvlist(tmp, - ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); - verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, - &id) == 0); - - if (id >= children) { - nvlist_t **newchild; - - newchild = zfs_alloc(hdl, (id + 1) * - sizeof (nvlist_t *)); - if (newchild == NULL) - goto nomem; - - for (c = 0; c < children; c++) - newchild[c] = child[c]; - - free(child); - child = newchild; - children = id + 1; - } - if (nvlist_dup(nvtop, &child[id], 0) != 0) - goto nomem; - - } - - /* - * If we have information about all the top-levels then - * clean up the nvlist which we've constructed. This - * means removing any extraneous devices that are - * beyond the valid range or adding devices to the end - * of our array which appear to be missing. - */ - if (valid_top_config) { - if (max_id < children) { - for (c = max_id; c < children; c++) - nvlist_free(child[c]); - children = max_id; - } else if (max_id > children) { - nvlist_t **newchild; - - newchild = zfs_alloc(hdl, (max_id) * - sizeof (nvlist_t *)); - if (newchild == NULL) - goto nomem; - - for (c = 0; c < children; c++) - newchild[c] = child[c]; - - free(child); - child = newchild; - children = max_id; - } - } - - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &guid) == 0); - - /* - * The vdev namespace may contain holes as a result of - * device removal. We must add them back into the vdev - * tree before we process any missing devices. - */ - if (holes > 0) { - ASSERT(valid_top_config); - - for (c = 0; c < children; c++) { - nvlist_t *holey; - - if (child[c] != NULL || - !vdev_is_hole(hole_array, holes, c)) - continue; - - if (nvlist_alloc(&holey, NV_UNIQUE_NAME, - 0) != 0) - goto nomem; - - /* - * Holes in the namespace are treated as - * "hole" top-level vdevs and have a - * special flag set on them. - */ - if (nvlist_add_string(holey, - ZPOOL_CONFIG_TYPE, - VDEV_TYPE_HOLE) != 0 || - nvlist_add_uint64(holey, - ZPOOL_CONFIG_ID, c) != 0 || - nvlist_add_uint64(holey, - ZPOOL_CONFIG_GUID, 0ULL) != 0) { - nvlist_free(holey); - goto nomem; - } - child[c] = holey; - } - } - - /* - * Look for any missing top-level vdevs. If this is the case, - * create a faked up 'missing' vdev as a placeholder. We cannot - * simply compress the child array, because the kernel performs - * certain checks to make sure the vdev IDs match their location - * in the configuration. - */ - for (c = 0; c < children; c++) { - if (child[c] == NULL) { - nvlist_t *missing; - if (nvlist_alloc(&missing, NV_UNIQUE_NAME, - 0) != 0) - goto nomem; - if (nvlist_add_string(missing, - ZPOOL_CONFIG_TYPE, - VDEV_TYPE_MISSING) != 0 || - nvlist_add_uint64(missing, - ZPOOL_CONFIG_ID, c) != 0 || - nvlist_add_uint64(missing, - ZPOOL_CONFIG_GUID, 0ULL) != 0) { - nvlist_free(missing); - goto nomem; - } - child[c] = missing; - } - } - - /* - * Put all of this pool's top-level vdevs into a root vdev. - */ - if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) - goto nomem; - if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_ROOT) != 0 || - nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || - nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || - nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - child, children) != 0) { - nvlist_free(nvroot); - goto nomem; - } - - for (c = 0; c < children; c++) - nvlist_free(child[c]); - free(child); - children = 0; - child = NULL; - - /* - * Go through and fix up any paths and/or devids based on our - * known list of vdev GUID -> path mappings. - */ - if (fix_paths(nvroot, pl->names) != 0) { - nvlist_free(nvroot); - goto nomem; - } - - /* - * Add the root vdev to this pool's configuration. - */ - if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - nvroot) != 0) { - nvlist_free(nvroot); - goto nomem; - } - nvlist_free(nvroot); - - /* - * zdb uses this path to report on active pools that were - * imported or created using -R. - */ - if (active_ok) - goto add_pool; - - /* - * Determine if this pool is currently active, in which case we - * can't actually import it. - */ - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &guid) == 0); - - if (pool_active(hdl, name, guid, &isactive) != 0) - goto error; - - if (isactive) { - nvlist_free(config); - config = NULL; - continue; - } - - if (policy != NULL) { - if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY, - policy) != 0) - goto nomem; - } - - if ((nvl = refresh_config(hdl, config)) == NULL) { - nvlist_free(config); - config = NULL; - continue; - } - - nvlist_free(config); - config = nvl; - - /* - * Go through and update the paths for spares, now that we have - * them. - */ - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - for (i = 0; i < nspares; i++) { - if (fix_paths(spares[i], pl->names) != 0) - goto nomem; - } - } - - /* - * Update the paths for l2cache devices. - */ - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) { - for (i = 0; i < nl2cache; i++) { - if (fix_paths(l2cache[i], pl->names) != 0) - goto nomem; - } - } - - /* - * Restore the original information read from the actual label. - */ - (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, - DATA_TYPE_UINT64); - (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, - DATA_TYPE_STRING); - if (hostid != 0) { - verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, - hostid) == 0); - verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, - hostname) == 0); - } - -add_pool: - /* - * Add this pool to the list of configs. - */ - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); - if (nvlist_add_nvlist(ret, name, config) != 0) - goto nomem; - - found_one = B_TRUE; - nvlist_free(config); - config = NULL; - } - - if (!found_one) { - nvlist_free(ret); - ret = NULL; - } - - return (ret); - -nomem: - (void) no_memory(hdl); -error: - nvlist_free(config); - nvlist_free(ret); - for (c = 0; c < children; c++) - nvlist_free(child[c]); - free(child); - - return (NULL); -} - -/* - * Return the offset of the given label. - */ -static uint64_t -label_offset(uint64_t size, int l) -{ - ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); - return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? - 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); -} - -/* - * Given a file descriptor, read the label information and return an nvlist - * describing the configuration, if there is one. - * Return 0 on success, or -1 on failure - */ -int -zpool_read_label(int fd, nvlist_t **config) -{ - struct stat64 statbuf; - int l; - vdev_label_t *label; - uint64_t state, txg, size; - - *config = NULL; - - if (fstat64(fd, &statbuf) == -1) - return (-1); - size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); - - if ((label = malloc(sizeof (vdev_label_t))) == NULL) - return (-1); - - for (l = 0; l < VDEV_LABELS; l++) { - if (pread64(fd, label, sizeof (vdev_label_t), - label_offset(size, l)) != sizeof (vdev_label_t)) - continue; - - if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, - sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) - continue; - - if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, - &state) != 0 || state > POOL_STATE_L2CACHE) { - nvlist_free(*config); - continue; - } - - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && - (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, - &txg) != 0 || txg == 0)) { - nvlist_free(*config); - continue; - } - - free(label); - return (0); - } - - free(label); - *config = NULL; - errno = ENOENT; - return (-1); -} - -/* - * Given a file descriptor, read the label information and return an nvlist - * describing the configuration, if there is one. - * returns the number of valid labels found - * If a label is found, returns it via config. The caller is responsible for - * freeing it. - */ -int -zpool_read_all_labels(int fd, nvlist_t **config) -{ - struct stat64 statbuf; - struct aiocb aiocbs[VDEV_LABELS]; - struct aiocb *aiocbps[VDEV_LABELS]; - int l; - vdev_phys_t *labels; - uint64_t state, txg, size; - int nlabels = 0; - - *config = NULL; - - if (fstat64(fd, &statbuf) == -1) - return (0); - size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); - - if ((labels = calloc(VDEV_LABELS, sizeof (vdev_phys_t))) == NULL) - return (0); - - memset(aiocbs, 0, sizeof(aiocbs)); - for (l = 0; l < VDEV_LABELS; l++) { - aiocbs[l].aio_fildes = fd; - aiocbs[l].aio_offset = label_offset(size, l) + VDEV_SKIP_SIZE; - aiocbs[l].aio_buf = &labels[l]; - aiocbs[l].aio_nbytes = sizeof(vdev_phys_t); - aiocbs[l].aio_lio_opcode = LIO_READ; - aiocbps[l] = &aiocbs[l]; - } - - if (lio_listio(LIO_WAIT, aiocbps, VDEV_LABELS, NULL) != 0) { - if (errno == EAGAIN || errno == EINTR || errno == EIO) { - for (l = 0; l < VDEV_LABELS; l++) { - errno = 0; - int r = aio_error(&aiocbs[l]); - if (r != EINVAL) - (void)aio_return(&aiocbs[l]); - } - } - free(labels); - return (0); - } - - for (l = 0; l < VDEV_LABELS; l++) { - nvlist_t *temp = NULL; - - if (aio_return(&aiocbs[l]) != sizeof(vdev_phys_t)) - continue; - - if (nvlist_unpack(labels[l].vp_nvlist, - sizeof (labels[l].vp_nvlist), &temp, 0) != 0) - continue; - - if (nvlist_lookup_uint64(temp, ZPOOL_CONFIG_POOL_STATE, - &state) != 0 || state > POOL_STATE_L2CACHE) { - nvlist_free(temp); - temp = NULL; - continue; - } - - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && - (nvlist_lookup_uint64(temp, ZPOOL_CONFIG_POOL_TXG, - &txg) != 0 || txg == 0)) { - nvlist_free(temp); - temp = NULL; - continue; - } - if (temp) - *config = temp; - - nlabels++; - } - - free(labels); - return (nlabels); -} - -typedef struct rdsk_node { - char *rn_name; - int rn_dfd; - libzfs_handle_t *rn_hdl; - nvlist_t *rn_config; - avl_tree_t *rn_avl; - avl_node_t rn_node; - boolean_t rn_nozpool; -} rdsk_node_t; - -static int -slice_cache_compare(const void *arg1, const void *arg2) -{ - const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; - const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; - char *nm1slice, *nm2slice; - int rv; - - /* - * slices zero and two are the most likely to provide results, - * so put those first - */ - nm1slice = strstr(nm1, "s0"); - nm2slice = strstr(nm2, "s0"); - if (nm1slice && !nm2slice) { - return (-1); - } - if (!nm1slice && nm2slice) { - return (1); - } - nm1slice = strstr(nm1, "s2"); - nm2slice = strstr(nm2, "s2"); - if (nm1slice && !nm2slice) { - return (-1); - } - if (!nm1slice && nm2slice) { - return (1); - } - - rv = strcmp(nm1, nm2); - if (rv == 0) - return (0); - return (rv > 0 ? 1 : -1); -} - -#ifdef illumos -static void -check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, - diskaddr_t size, uint_t blksz) -{ - rdsk_node_t tmpnode; - rdsk_node_t *node; - char sname[MAXNAMELEN]; - - tmpnode.rn_name = &sname[0]; - (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u", - diskname, partno); - /* - * protect against division by zero for disk labels that - * contain a bogus sector size - */ - if (blksz == 0) - blksz = DEV_BSIZE; - /* too small to contain a zpool? */ - if ((size < (SPA_MINDEVSIZE / blksz)) && - (node = avl_find(r, &tmpnode, NULL))) - node->rn_nozpool = B_TRUE; -} -#endif /* illumos */ - -static void -nozpool_all_slices(avl_tree_t *r, const char *sname) -{ -#ifdef illumos - char diskname[MAXNAMELEN]; - char *ptr; - int i; - - (void) strncpy(diskname, sname, MAXNAMELEN); - if (((ptr = strrchr(diskname, 's')) == NULL) && - ((ptr = strrchr(diskname, 'p')) == NULL)) - return; - ptr[0] = 's'; - ptr[1] = '\0'; - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, 0, 1); - ptr[0] = 'p'; - for (i = 0; i <= FD_NUMPART; i++) - check_one_slice(r, diskname, i, 0, 1); -#endif /* illumos */ -} - -#ifdef illumos -static void -check_slices(avl_tree_t *r, int fd, const char *sname) -{ - struct extvtoc vtoc; - struct dk_gpt *gpt; - char diskname[MAXNAMELEN]; - char *ptr; - int i; - - (void) strncpy(diskname, sname, MAXNAMELEN); - if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) - return; - ptr[1] = '\0'; - - if (read_extvtoc(fd, &vtoc) >= 0) { - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, - vtoc.v_part[i].p_size, vtoc.v_sectorsz); - } else if (efi_alloc_and_read(fd, &gpt) >= 0) { - /* - * on x86 we'll still have leftover links that point - * to slices s[9-15], so use NDKMAP instead - */ - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, - gpt->efi_parts[i].p_size, gpt->efi_lbasize); - /* nodes p[1-4] are never used with EFI labels */ - ptr[0] = 'p'; - for (i = 1; i <= FD_NUMPART; i++) - check_one_slice(r, diskname, i, 0, 1); - efi_free(gpt); - } -} -#endif /* illumos */ - -static void -zpool_open_func(void *arg) -{ - rdsk_node_t *rn = arg; - struct stat64 statbuf; - nvlist_t *config; - int fd; - - if (rn->rn_nozpool) - return; - if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { - /* symlink to a device that's no longer there */ - if (errno == ENOENT) - nozpool_all_slices(rn->rn_avl, rn->rn_name); - return; - } - /* - * Ignore failed stats. We only want regular - * files, character devs and block devs. - */ - if (fstat64(fd, &statbuf) != 0 || - (!S_ISREG(statbuf.st_mode) && - !S_ISCHR(statbuf.st_mode) && - !S_ISBLK(statbuf.st_mode))) { - (void) close(fd); - return; - } - /* this file is too small to hold a zpool */ -#ifdef illumos - if (S_ISREG(statbuf.st_mode) && - statbuf.st_size < SPA_MINDEVSIZE) { - (void) close(fd); - return; - } else if (!S_ISREG(statbuf.st_mode)) { - /* - * Try to read the disk label first so we don't have to - * open a bunch of minor nodes that can't have a zpool. - */ - check_slices(rn->rn_avl, fd, rn->rn_name); - } -#else /* !illumos */ - if (statbuf.st_size < SPA_MINDEVSIZE) { - (void) close(fd); - return; - } -#endif /* illumos */ - - if ((zpool_read_label(fd, &config)) != 0 && errno == ENOMEM) { - (void) close(fd); - (void) no_memory(rn->rn_hdl); - return; - } - (void) close(fd); - - rn->rn_config = config; -} - -/* - * Given a file descriptor, clear (zero) the label information. - */ -int -zpool_clear_label(int fd) -{ - struct stat64 statbuf; - int l; - vdev_label_t *label; - uint64_t size; - - if (fstat64(fd, &statbuf) == -1) - return (0); - size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); - - if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) - return (-1); - - for (l = 0; l < VDEV_LABELS; l++) { - if (pwrite64(fd, label, sizeof (vdev_label_t), - label_offset(size, l)) != sizeof (vdev_label_t)) { - free(label); - return (-1); - } - } - - free(label); - return (0); -} - -/* - * Given a list of directories to search, find all pools stored on disk. This - * includes partial pools which are not available to import. If no args are - * given (argc is 0), then the default directory (/dev/dsk) is searched. - * poolname or guid (but not both) are provided by the caller when trying - * to import a specific pool. - */ -static nvlist_t * -zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) -{ - int i, dirs = iarg->paths; - struct dirent64 *dp; - char path[MAXPATHLEN]; - char *end, **dir = iarg->path; - size_t pathleft; - nvlist_t *ret = NULL; - static char *default_dir = "/dev"; - pool_list_t pools = { 0 }; - pool_entry_t *pe, *penext; - vdev_entry_t *ve, *venext; - config_entry_t *ce, *cenext; - name_entry_t *ne, *nenext; - avl_tree_t slice_cache; - rdsk_node_t *slice; - void *cookie; - boolean_t skip_zvols = B_FALSE; - int value; - size_t size = sizeof(value); - - if (dirs == 0) { - dirs = 1; - dir = &default_dir; - } - - if (sysctlbyname("vfs.zfs.vol.recursive", &value, &size, NULL, 0) == 0 - && value == 0) { - skip_zvols = B_TRUE; - } - - /* - * Go through and read the label configuration information from every - * possible device, organizing the information according to pool GUID - * and toplevel GUID. - */ - for (i = 0; i < dirs; i++) { - tpool_t *t; - char rdsk[MAXPATHLEN]; - int dfd; - boolean_t config_failed = B_FALSE; - DIR *dirp; - - /* use realpath to normalize the path */ - if (realpath(dir[i], path) == 0) { - (void) zfs_error_fmt(hdl, EZFS_BADPATH, - dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]); - goto error; - } - end = &path[strlen(path)]; - *end++ = '/'; - *end = 0; - pathleft = &path[sizeof (path)] - end; - -#ifdef illumos - /* - * Using raw devices instead of block devices when we're - * reading the labels skips a bunch of slow operations during - * close(2) processing, so we replace /dev/dsk with /dev/rdsk. - */ - if (strcmp(path, ZFS_DISK_ROOTD) == 0) - (void) strlcpy(rdsk, ZFS_RDISK_ROOTD, sizeof (rdsk)); - else -#endif - (void) strlcpy(rdsk, path, sizeof (rdsk)); - - if ((dfd = open64(rdsk, O_RDONLY)) < 0 || - (dirp = fdopendir(dfd)) == NULL) { - if (dfd >= 0) - (void) close(dfd); - zfs_error_aux(hdl, strerror(errno)); - (void) zfs_error_fmt(hdl, EZFS_BADPATH, - dgettext(TEXT_DOMAIN, "cannot open '%s'"), - rdsk); - goto error; - } - - avl_create(&slice_cache, slice_cache_compare, - sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); - - if (strcmp(rdsk, "/dev/") == 0) { - struct gmesh mesh; - struct gclass *mp; - struct ggeom *gp; - struct gprovider *pp; - - errno = geom_gettree(&mesh); - if (errno != 0) { - zfs_error_aux(hdl, strerror(errno)); - (void) zfs_error_fmt(hdl, EZFS_BADPATH, - dgettext(TEXT_DOMAIN, "cannot get GEOM tree")); - goto error; - } - - LIST_FOREACH(mp, &mesh.lg_class, lg_class) { - if (skip_zvols && - strcmp(mp->lg_name, "ZFS::ZVOL") == 0) { - continue; - } - LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { - LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - slice->rn_name = zfs_strdup(hdl, pp->lg_name); - slice->rn_avl = &slice_cache; - slice->rn_dfd = dfd; - slice->rn_hdl = hdl; - slice->rn_nozpool = B_FALSE; - avl_add(&slice_cache, slice); - } - } - } - - geom_deletetree(&mesh); - goto skipdir; - } - - /* - * This is not MT-safe, but we have no MT consumers of libzfs - */ - while ((dp = readdir64(dirp)) != NULL) { - const char *name = dp->d_name; - if (name[0] == '.' && - (name[1] == 0 || (name[1] == '.' && name[2] == 0))) - continue; - - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - slice->rn_name = zfs_strdup(hdl, name); - slice->rn_avl = &slice_cache; - slice->rn_dfd = dfd; - slice->rn_hdl = hdl; - slice->rn_nozpool = B_FALSE; - avl_add(&slice_cache, slice); - } -skipdir: - /* - * create a thread pool to do all of this in parallel; - * rn_nozpool is not protected, so this is racy in that - * multiple tasks could decide that the same slice can - * not hold a zpool, which is benign. Also choose - * double the number of processors; we hold a lot of - * locks in the kernel, so going beyond this doesn't - * buy us much. - */ - t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), - 0, NULL); - for (slice = avl_first(&slice_cache); slice; - (slice = avl_walk(&slice_cache, slice, - AVL_AFTER))) - (void) tpool_dispatch(t, zpool_open_func, slice); - tpool_wait(t); - tpool_destroy(t); - - cookie = NULL; - while ((slice = avl_destroy_nodes(&slice_cache, - &cookie)) != NULL) { - if (slice->rn_config != NULL && !config_failed) { - nvlist_t *config = slice->rn_config; - boolean_t matched = B_TRUE; - - if (iarg->poolname != NULL) { - char *pname; - - matched = nvlist_lookup_string(config, - ZPOOL_CONFIG_POOL_NAME, - &pname) == 0 && - strcmp(iarg->poolname, pname) == 0; - } else if (iarg->guid != 0) { - uint64_t this_guid; - - matched = nvlist_lookup_uint64(config, - ZPOOL_CONFIG_POOL_GUID, - &this_guid) == 0 && - iarg->guid == this_guid; - } - if (matched) { - /* - * use the non-raw path for the config - */ - (void) strlcpy(end, slice->rn_name, - pathleft); - if (add_config(hdl, &pools, path, - config) != 0) - config_failed = B_TRUE; - } - nvlist_free(config); - } - free(slice->rn_name); - free(slice); - } - avl_destroy(&slice_cache); - - (void) closedir(dirp); - - if (config_failed) - goto error; - } - - ret = get_configs(hdl, &pools, iarg->can_be_active, iarg->policy); - -error: - for (pe = pools.pools; pe != NULL; pe = penext) { - penext = pe->pe_next; - for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { - venext = ve->ve_next; - for (ce = ve->ve_configs; ce != NULL; ce = cenext) { - cenext = ce->ce_next; - nvlist_free(ce->ce_config); - free(ce); - } - free(ve); - } - free(pe); - } - - for (ne = pools.names; ne != NULL; ne = nenext) { - nenext = ne->ne_next; - free(ne->ne_name); - free(ne); - } - - return (ret); -} - -nvlist_t * -zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) -{ - importargs_t iarg = { 0 }; - - iarg.paths = argc; - iarg.path = argv; - - return (zpool_find_import_impl(hdl, &iarg)); -} - -/* - * Given a cache file, return the contents as a list of importable pools. - * poolname or guid (but not both) are provided by the caller when trying - * to import a specific pool. - */ -nvlist_t * -zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, - char *poolname, uint64_t guid) -{ - char *buf; - int fd; - struct stat64 statbuf; - nvlist_t *raw, *src, *dst; - nvlist_t *pools; - nvpair_t *elem; - char *name; - uint64_t this_guid; - boolean_t active; - - verify(poolname == NULL || guid == 0); - - if ((fd = open(cachefile, O_RDONLY)) < 0) { - zfs_error_aux(hdl, "%s", strerror(errno)); - (void) zfs_error(hdl, EZFS_BADCACHE, - dgettext(TEXT_DOMAIN, "failed to open cache file")); - return (NULL); - } - - if (fstat64(fd, &statbuf) != 0) { - zfs_error_aux(hdl, "%s", strerror(errno)); - (void) close(fd); - (void) zfs_error(hdl, EZFS_BADCACHE, - dgettext(TEXT_DOMAIN, "failed to get size of cache file")); - return (NULL); - } - - if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { - (void) close(fd); - return (NULL); - } - - if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { - (void) close(fd); - free(buf); - (void) zfs_error(hdl, EZFS_BADCACHE, - dgettext(TEXT_DOMAIN, - "failed to read cache file contents")); - return (NULL); - } - - (void) close(fd); - - if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { - free(buf); - (void) zfs_error(hdl, EZFS_BADCACHE, - dgettext(TEXT_DOMAIN, - "invalid or corrupt cache file contents")); - return (NULL); - } - - free(buf); - - /* - * Go through and get the current state of the pools and refresh their - * state. - */ - if (nvlist_alloc(&pools, 0, 0) != 0) { - (void) no_memory(hdl); - nvlist_free(raw); - return (NULL); - } - - elem = NULL; - while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { - src = fnvpair_value_nvlist(elem); - - name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME); - if (poolname != NULL && strcmp(poolname, name) != 0) - continue; - - this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID); - if (guid != 0 && guid != this_guid) - continue; - - if (pool_active(hdl, name, this_guid, &active) != 0) { - nvlist_free(raw); - nvlist_free(pools); - return (NULL); - } - - if (active) - continue; - - if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE, - cachefile) != 0) { - (void) no_memory(hdl); - nvlist_free(raw); - nvlist_free(pools); - return (NULL); - } - - if ((dst = refresh_config(hdl, src)) == NULL) { - nvlist_free(raw); - nvlist_free(pools); - return (NULL); - } - - if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { - (void) no_memory(hdl); - nvlist_free(dst); - nvlist_free(raw); - nvlist_free(pools); - return (NULL); - } - nvlist_free(dst); - } - - nvlist_free(raw); - return (pools); -} - -static int -name_or_guid_exists(zpool_handle_t *zhp, void *data) -{ - importargs_t *import = data; - int found = 0; - - if (import->poolname != NULL) { - char *pool_name; - - verify(nvlist_lookup_string(zhp->zpool_config, - ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0); - if (strcmp(pool_name, import->poolname) == 0) - found = 1; - } else { - uint64_t pool_guid; - - verify(nvlist_lookup_uint64(zhp->zpool_config, - ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0); - if (pool_guid == import->guid) - found = 1; - } - - zpool_close(zhp); - return (found); -} - -nvlist_t * -zpool_search_import(libzfs_handle_t *hdl, importargs_t *import) -{ - nvlist_t *pools = NULL; - - verify(import->poolname == NULL || import->guid == 0); - - if (import->unique) - import->exists = zpool_iter(hdl, name_or_guid_exists, import); - - if (import->cachefile != NULL) - pools = zpool_find_import_cached(hdl, import->cachefile, - import->poolname, import->guid); - else - pools = zpool_find_import_impl(hdl, import); - - return (pools); -} - -static boolean_t -pool_match(nvlist_t *cfg, char *tgt) -{ - uint64_t v, guid = strtoull(tgt, NULL, 0); - char *s; - - if (guid != 0) { - if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0) - return (v == guid); - } else { - if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0) - return (strcmp(s, tgt) == 0); - } - return (B_FALSE); -} - -int -zpool_tryimport(libzfs_handle_t *hdl, char *target, nvlist_t **configp, - importargs_t *args) -{ - nvlist_t *pools; - nvlist_t *match = NULL; - nvlist_t *config = NULL; - char *sepp = NULL; - int count = 0; - char *targetdup = strdup(target); - - *configp = NULL; - - if ((sepp = strpbrk(targetdup, "/@")) != NULL) { - *sepp = '\0'; - } - - pools = zpool_search_import(hdl, args); - - if (pools != NULL) { - nvpair_t *elem = NULL; - while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { - VERIFY0(nvpair_value_nvlist(elem, &config)); - if (pool_match(config, targetdup)) { - count++; - if (match != NULL) { - /* multiple matches found */ - continue; - } else { - match = config; - } - } - } - } - - if (count == 0) { - free(targetdup); - return (ENOENT); - } - - if (count > 1) { - free(targetdup); - return (EINVAL); - } - - *configp = match; - free(targetdup); - - return (0); -} - -boolean_t -find_guid(nvlist_t *nv, uint64_t guid) -{ - uint64_t tmp; - nvlist_t **child; - uint_t c, children; - - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); - if (tmp == guid) - return (B_TRUE); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_guid(child[c], guid)) - return (B_TRUE); - } - - return (B_FALSE); -} - -typedef struct aux_cbdata { - const char *cb_type; - uint64_t cb_guid; - zpool_handle_t *cb_zhp; -} aux_cbdata_t; - -static int -find_aux(zpool_handle_t *zhp, void *data) -{ - aux_cbdata_t *cbp = data; - nvlist_t **list; - uint_t i, count; - uint64_t guid; - nvlist_t *nvroot; - - verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, - &list, &count) == 0) { - for (i = 0; i < count; i++) { - verify(nvlist_lookup_uint64(list[i], - ZPOOL_CONFIG_GUID, &guid) == 0); - if (guid == cbp->cb_guid) { - cbp->cb_zhp = zhp; - return (1); - } - } - } - - zpool_close(zhp); - return (0); -} - -/* - * Determines if the pool is in use. If so, it returns true and the state of - * the pool as well as the name of the pool. Both strings are allocated and - * must be freed by the caller. - */ -int -zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, - boolean_t *inuse) -{ - nvlist_t *config; - char *name; - boolean_t ret; - uint64_t guid, vdev_guid; - zpool_handle_t *zhp; - nvlist_t *pool_config; - uint64_t stateval, isspare; - aux_cbdata_t cb = { 0 }; - boolean_t isactive; - - *inuse = B_FALSE; - - if (zpool_read_label(fd, &config) != 0 && errno == ENOMEM) { - (void) no_memory(hdl); - return (-1); - } - - if (config == NULL) - return (0); - - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, - &stateval) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, - &vdev_guid) == 0); - - if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &guid) == 0); - } - - switch (stateval) { - case POOL_STATE_EXPORTED: - /* - * A pool with an exported state may in fact be imported - * read-only, so check the in-core state to see if it's - * active and imported read-only. If it is, set - * its state to active. - */ - if (pool_active(hdl, name, guid, &isactive) == 0 && isactive && - (zhp = zpool_open_canfail(hdl, name)) != NULL) { - if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL)) - stateval = POOL_STATE_ACTIVE; - - /* - * All we needed the zpool handle for is the - * readonly prop check. - */ - zpool_close(zhp); - } - - ret = B_TRUE; - break; - - case POOL_STATE_ACTIVE: - /* - * For an active pool, we have to determine if it's really part - * of a currently active pool (in which case the pool will exist - * and the guid will be the same), or whether it's part of an - * active pool that was disconnected without being explicitly - * exported. - */ - if (pool_active(hdl, name, guid, &isactive) != 0) { - nvlist_free(config); - return (-1); - } - - if (isactive) { - /* - * Because the device may have been removed while - * offlined, we only report it as active if the vdev is - * still present in the config. Otherwise, pretend like - * it's not in use. - */ - if ((zhp = zpool_open_canfail(hdl, name)) != NULL && - (pool_config = zpool_get_config(zhp, NULL)) - != NULL) { - nvlist_t *nvroot; - - verify(nvlist_lookup_nvlist(pool_config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - ret = find_guid(nvroot, vdev_guid); - } else { - ret = B_FALSE; - } - - /* - * If this is an active spare within another pool, we - * treat it like an unused hot spare. This allows the - * user to create a pool with a hot spare that currently - * in use within another pool. Since we return B_TRUE, - * libdiskmgt will continue to prevent generic consumers - * from using the device. - */ - if (ret && nvlist_lookup_uint64(config, - ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) - stateval = POOL_STATE_SPARE; - - if (zhp != NULL) - zpool_close(zhp); - } else { - stateval = POOL_STATE_POTENTIALLY_ACTIVE; - ret = B_TRUE; - } - break; - - case POOL_STATE_SPARE: - /* - * For a hot spare, it can be either definitively in use, or - * potentially active. To determine if it's in use, we iterate - * over all pools in the system and search for one with a spare - * with a matching guid. - * - * Due to the shared nature of spares, we don't actually report - * the potentially active case as in use. This means the user - * can freely create pools on the hot spares of exported pools, - * but to do otherwise makes the resulting code complicated, and - * we end up having to deal with this case anyway. - */ - cb.cb_zhp = NULL; - cb.cb_guid = vdev_guid; - cb.cb_type = ZPOOL_CONFIG_SPARES; - if (zpool_iter(hdl, find_aux, &cb) == 1) { - name = (char *)zpool_get_name(cb.cb_zhp); - ret = B_TRUE; - } else { - ret = B_FALSE; - } - break; - - case POOL_STATE_L2CACHE: - - /* - * Check if any pool is currently using this l2cache device. - */ - cb.cb_zhp = NULL; - cb.cb_guid = vdev_guid; - cb.cb_type = ZPOOL_CONFIG_L2CACHE; - if (zpool_iter(hdl, find_aux, &cb) == 1) { - name = (char *)zpool_get_name(cb.cb_zhp); - ret = B_TRUE; - } else { - ret = B_FALSE; - } - break; - - default: - ret = B_FALSE; - } - - - if (ret) { - if ((*namestr = zfs_strdup(hdl, name)) == NULL) { - if (cb.cb_zhp) - zpool_close(cb.cb_zhp); - nvlist_free(config); - return (-1); - } - *state = (pool_state_t)stateval; - } - - if (cb.cb_zhp) - zpool_close(cb.cb_zhp); - - nvlist_free(config); - *inuse = ret; - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c deleted file mode 100644 index 36138676e7db..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. - * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2019 Datto Inc. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <stddef.h> -#include <libintl.h> -#include <libzfs.h> - -#include "libzfs_impl.h" - -int -zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data) -{ - nvlist_t *nvl = zfs_get_clones_nvl(zhp); - nvpair_t *pair; - - if (nvl == NULL) - return (0); - - for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; - pair = nvlist_next_nvpair(nvl, pair)) { - zfs_handle_t *clone = zfs_open(zhp->zfs_hdl, nvpair_name(pair), - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (clone != NULL) { - int err = func(clone, data); - if (err != 0) - return (err); - } - } - return (0); -} - -static int -zfs_do_list_ioctl(zfs_handle_t *zhp, unsigned long arg, zfs_cmd_t *zc) -{ - int rc; - uint64_t orig_cookie; - - orig_cookie = zc->zc_cookie; -top: - (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); - rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc); - - if (rc == -1) { - switch (errno) { - case ENOMEM: - /* expand nvlist memory and try again */ - if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) { - zcmd_free_nvlists(zc); - return (-1); - } - zc->zc_cookie = orig_cookie; - goto top; - /* - * An errno value of ESRCH indicates normal completion. - * If ENOENT is returned, then the underlying dataset - * has been removed since we obtained the handle. - */ - case ESRCH: - case ENOENT: - rc = 1; - break; - default: - rc = zfs_standard_error(zhp->zfs_hdl, errno, - dgettext(TEXT_DOMAIN, - "cannot iterate filesystems")); - break; - } - } - return (rc); -} - -/* - * Iterate over all child filesystems - */ -int -zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) -{ - zfs_cmd_t zc = { 0 }; - zfs_handle_t *nzhp; - int ret; - - if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) - return (0); - - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); - - while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, - &zc)) == 0) { - /* - * Silently ignore errors, as the only plausible explanation is - * that the pool has since been removed. - */ - if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, - &zc)) == NULL) { - continue; - } - - if ((ret = func(nzhp, data)) != 0) { - zcmd_free_nvlists(&zc); - return (ret); - } - } - zcmd_free_nvlists(&zc); - return ((ret < 0) ? ret : 0); -} - -/* - * Iterate over all snapshots - */ -int -zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, - void *data, uint64_t min_txg, uint64_t max_txg) -{ - zfs_cmd_t zc = { 0 }; - zfs_handle_t *nzhp; - int ret; - nvlist_t *range_nvl = NULL; - - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT || - zhp->zfs_type == ZFS_TYPE_BOOKMARK) - return (0); - - zc.zc_simple = simple; - - if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) - return (-1); - - if (min_txg != 0) { - range_nvl = fnvlist_alloc(); - fnvlist_add_uint64(range_nvl, SNAP_ITER_MIN_TXG, min_txg); - } - if (max_txg != 0) { - if (range_nvl == NULL) - range_nvl = fnvlist_alloc(); - fnvlist_add_uint64(range_nvl, SNAP_ITER_MAX_TXG, max_txg); - } - - if (range_nvl != NULL && - zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, range_nvl) != 0) { - zcmd_free_nvlists(&zc); - fnvlist_free(range_nvl); - return (-1); - } - - while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, - &zc)) == 0) { - - if (simple) - nzhp = make_dataset_simple_handle_zc(zhp, &zc); - else - nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc); - if (nzhp == NULL) - continue; - - if ((ret = func(nzhp, data)) != 0) { - zcmd_free_nvlists(&zc); - fnvlist_free(range_nvl); - return (ret); - } - } - zcmd_free_nvlists(&zc); - fnvlist_free(range_nvl); - return ((ret < 0) ? ret : 0); -} - -/* - * Iterate over all bookmarks - */ -int -zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data) -{ - zfs_handle_t *nzhp; - nvlist_t *props = NULL; - nvlist_t *bmarks = NULL; - int err; - - if ((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | ZFS_TYPE_BOOKMARK)) != 0) - return (0); - - /* Setup the requested properties nvlist. */ - props = fnvlist_alloc(); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_GUID)); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATETXG)); - fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATION)); - - if ((err = lzc_get_bookmarks(zhp->zfs_name, props, &bmarks)) != 0) - goto out; - - for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL); - pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) { - char name[ZFS_MAX_DATASET_NAME_LEN]; - char *bmark_name; - nvlist_t *bmark_props; - - bmark_name = nvpair_name(pair); - bmark_props = fnvpair_value_nvlist(pair); - - (void) snprintf(name, sizeof (name), "%s#%s", zhp->zfs_name, - bmark_name); - - nzhp = make_bookmark_handle(zhp, name, bmark_props); - if (nzhp == NULL) - continue; - - if ((err = func(nzhp, data)) != 0) - goto out; - } - -out: - fnvlist_free(props); - fnvlist_free(bmarks); - - return (err); -} - -/* - * Routines for dealing with the sorted snapshot functionality - */ -typedef struct zfs_node { - zfs_handle_t *zn_handle; - avl_node_t zn_avlnode; -} zfs_node_t; - -static int -zfs_sort_snaps(zfs_handle_t *zhp, void *data) -{ - avl_tree_t *avl = data; - zfs_node_t *node; - zfs_node_t search; - - search.zn_handle = zhp; - node = avl_find(avl, &search, NULL); - if (node) { - /* - * If this snapshot was renamed while we were creating the - * AVL tree, it's possible that we already inserted it under - * its old name. Remove the old handle before adding the new - * one. - */ - zfs_close(node->zn_handle); - avl_remove(avl, node); - free(node); - } - - node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); - node->zn_handle = zhp; - avl_add(avl, node); - - return (0); -} - -static int -zfs_snapshot_compare(const void *larg, const void *rarg) -{ - zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; - zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; - uint64_t lcreate, rcreate; - - /* - * Sort them according to creation time. We use the hidden - * CREATETXG property to get an absolute ordering of snapshots. - */ - lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); - rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); - - return (AVL_CMP(lcreate, rcreate)); -} - -int -zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data, - uint64_t min_txg, uint64_t max_txg) -{ - int ret = 0; - zfs_node_t *node; - avl_tree_t avl; - void *cookie = NULL; - - avl_create(&avl, zfs_snapshot_compare, - sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); - - ret = zfs_iter_snapshots(zhp, B_FALSE, zfs_sort_snaps, &avl, min_txg, - max_txg); - - for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) - ret |= callback(node->zn_handle, data); - - while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) - free(node); - - avl_destroy(&avl); - - return (ret); -} - -typedef struct { - char *ssa_first; - char *ssa_last; - boolean_t ssa_seenfirst; - boolean_t ssa_seenlast; - zfs_iter_f ssa_func; - void *ssa_arg; -} snapspec_arg_t; - -static int -snapspec_cb(zfs_handle_t *zhp, void *arg) -{ - snapspec_arg_t *ssa = arg; - const char *shortsnapname; - int err = 0; - - if (ssa->ssa_seenlast) - return (0); - - shortsnapname = strchr(zfs_get_name(zhp), '@') + 1; - if (!ssa->ssa_seenfirst && strcmp(shortsnapname, ssa->ssa_first) == 0) - ssa->ssa_seenfirst = B_TRUE; - if (strcmp(shortsnapname, ssa->ssa_last) == 0) - ssa->ssa_seenlast = B_TRUE; - - if (ssa->ssa_seenfirst) { - err = ssa->ssa_func(zhp, ssa->ssa_arg); - } else { - zfs_close(zhp); - } - - return (err); -} - -/* - * spec is a string like "A,B%C,D" - * - * <snaps>, where <snaps> can be: - * <snap> (single snapshot) - * <snap>%<snap> (range of snapshots, inclusive) - * %<snap> (range of snapshots, starting with earliest) - * <snap>% (range of snapshots, ending with last) - * % (all snapshots) - * <snaps>[,...] (comma separated list of the above) - * - * If a snapshot can not be opened, continue trying to open the others, but - * return ENOENT at the end. - */ -int -zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, - zfs_iter_f func, void *arg) -{ - char *buf, *comma_separated, *cp; - int err = 0; - int ret = 0; - - buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig); - cp = buf; - - while ((comma_separated = strsep(&cp, ",")) != NULL) { - char *pct = strchr(comma_separated, '%'); - if (pct != NULL) { - snapspec_arg_t ssa = { 0 }; - ssa.ssa_func = func; - ssa.ssa_arg = arg; - - if (pct == comma_separated) - ssa.ssa_seenfirst = B_TRUE; - else - ssa.ssa_first = comma_separated; - *pct = '\0'; - ssa.ssa_last = pct + 1; - - /* - * If there is a lastname specified, make sure it - * exists. - */ - if (ssa.ssa_last[0] != '\0') { - char snapname[ZFS_MAX_DATASET_NAME_LEN]; - (void) snprintf(snapname, sizeof (snapname), - "%s@%s", zfs_get_name(fs_zhp), - ssa.ssa_last); - if (!zfs_dataset_exists(fs_zhp->zfs_hdl, - snapname, ZFS_TYPE_SNAPSHOT)) { - ret = ENOENT; - continue; - } - } - - err = zfs_iter_snapshots_sorted(fs_zhp, - snapspec_cb, &ssa, 0, 0); - if (ret == 0) - ret = err; - if (ret == 0 && (!ssa.ssa_seenfirst || - (ssa.ssa_last[0] != '\0' && !ssa.ssa_seenlast))) { - ret = ENOENT; - } - } else { - char snapname[ZFS_MAX_DATASET_NAME_LEN]; - zfs_handle_t *snap_zhp; - (void) snprintf(snapname, sizeof (snapname), "%s@%s", - zfs_get_name(fs_zhp), comma_separated); - snap_zhp = make_dataset_handle(fs_zhp->zfs_hdl, - snapname); - if (snap_zhp == NULL) { - ret = ENOENT; - continue; - } - err = func(snap_zhp, arg); - if (ret == 0) - ret = err; - } - } - - free(buf); - return (ret); -} - -/* - * Iterate over all children, snapshots and filesystems - * Process snapshots before filesystems because they are nearer the input - * handle: this is extremely important when used with zfs_iter_f functions - * looking for data, following the logic that we would like to find it as soon - * and as close as possible. - */ -int -zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) -{ - int ret; - - if ((ret = zfs_iter_snapshots(zhp, B_FALSE, func, data, 0, 0)) != 0) - return (ret); - - return (zfs_iter_filesystems(zhp, func, data)); -} - - -typedef struct iter_stack_frame { - struct iter_stack_frame *next; - zfs_handle_t *zhp; -} iter_stack_frame_t; - -typedef struct iter_dependents_arg { - boolean_t first; - boolean_t allowrecursion; - iter_stack_frame_t *stack; - zfs_iter_f func; - void *data; -} iter_dependents_arg_t; - -static int -iter_dependents_cb(zfs_handle_t *zhp, void *arg) -{ - iter_dependents_arg_t *ida = arg; - int err = 0; - boolean_t first = ida->first; - ida->first = B_FALSE; - - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { - err = zfs_iter_clones(zhp, iter_dependents_cb, ida); - } else if (zhp->zfs_type != ZFS_TYPE_BOOKMARK) { - iter_stack_frame_t isf; - iter_stack_frame_t *f; - - /* - * check if there is a cycle by seeing if this fs is already - * on the stack. - */ - for (f = ida->stack; f != NULL; f = f->next) { - if (f->zhp->zfs_dmustats.dds_guid == - zhp->zfs_dmustats.dds_guid) { - if (ida->allowrecursion) { - zfs_close(zhp); - return (0); - } else { - zfs_error_aux(zhp->zfs_hdl, - dgettext(TEXT_DOMAIN, - "recursive dependency at '%s'"), - zfs_get_name(zhp)); - err = zfs_error(zhp->zfs_hdl, - EZFS_RECURSIVE, - dgettext(TEXT_DOMAIN, - "cannot determine dependent " - "datasets")); - zfs_close(zhp); - return (err); - } - } - } - - isf.zhp = zhp; - isf.next = ida->stack; - ida->stack = &isf; - err = zfs_iter_filesystems(zhp, iter_dependents_cb, ida); - if (err == 0) { - err = zfs_iter_snapshots(zhp, B_FALSE, - iter_dependents_cb, ida, 0, 0); - } - ida->stack = isf.next; - } - - if (!first && err == 0) - err = ida->func(zhp, ida->data); - else - zfs_close(zhp); - - return (err); -} - -int -zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, - zfs_iter_f func, void *data) -{ - iter_dependents_arg_t ida; - ida.allowrecursion = allowrecursion; - ida.stack = NULL; - ida.func = func; - ida.data = data; - ida.first = B_TRUE; - return (iter_dependents_cb(zfs_handle_dup(zhp), &ida)); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c deleted file mode 100644 index 9d4948cc7173..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c +++ /dev/null @@ -1,1734 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2016 by Delphix. All rights reserved. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - * Copyright 2017 Joyent, Inc. - * Copyright 2017 RackTop Systems. - * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. - */ - -/* - * Routines to manage ZFS mounts. We separate all the nasty routines that have - * to deal with the OS. The following functions are the main entry points -- - * they are used by mount and unmount and when changing a filesystem's - * mountpoint. - * - * zfs_is_mounted() - * zfs_mount() - * zfs_unmount() - * zfs_unmountall() - * - * This file also contains the functions used to manage sharing filesystems via - * NFS and iSCSI: - * - * zfs_is_shared() - * zfs_share() - * zfs_unshare() - * - * zfs_is_shared_nfs() - * zfs_is_shared_smb() - * zfs_share_proto() - * zfs_shareall(); - * zfs_unshare_nfs() - * zfs_unshare_smb() - * zfs_unshareall_nfs() - * zfs_unshareall_smb() - * zfs_unshareall() - * zfs_unshareall_bypath() - * - * The following functions are available for pool consumers, and will - * mount/unmount and share/unshare all datasets within pool: - * - * zpool_enable_datasets() - * zpool_disable_datasets() - */ - -#include <dirent.h> -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <libgen.h> -#include <libintl.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <zone.h> -#include <sys/mntent.h> -#include <sys/mount.h> -#include <sys/stat.h> -#include <sys/statvfs.h> - -#include <libzfs.h> - -#include "libzfs_impl.h" -#include <thread_pool.h> - -#include <libshare.h> -#define MAXISALEN 257 /* based on sysinfo(2) man page */ - -static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ - -static void zfs_mount_task(void *); -static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); -zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, - zfs_share_proto_t); - -/* - * The share protocols table must be in the same order as the zfs_share_proto_t - * enum in libzfs_impl.h - */ -typedef struct { - zfs_prop_t p_prop; - char *p_name; - int p_share_err; - int p_unshare_err; -} proto_table_t; - -proto_table_t proto_table[PROTO_END] = { - {ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED}, - {ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED}, -}; - -zfs_share_proto_t nfs_only[] = { - PROTO_NFS, - PROTO_END -}; - -zfs_share_proto_t smb_only[] = { - PROTO_SMB, - PROTO_END -}; -zfs_share_proto_t share_all_proto[] = { - PROTO_NFS, - PROTO_SMB, - PROTO_END -}; - -/* - * Search the sharetab for the given mountpoint and protocol, returning - * a zfs_share_type_t value. - */ -static zfs_share_type_t -is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto) -{ - char buf[MAXPATHLEN], *tab; - char *ptr; - - if (hdl->libzfs_sharetab == NULL) - return (SHARED_NOT_SHARED); - - (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET); - - while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) { - - /* the mountpoint is the first entry on each line */ - if ((tab = strchr(buf, '\t')) == NULL) - continue; - - *tab = '\0'; - if (strcmp(buf, mountpoint) == 0) { -#ifdef illumos - /* - * the protocol field is the third field - * skip over second field - */ - ptr = ++tab; - if ((tab = strchr(ptr, '\t')) == NULL) - continue; - ptr = ++tab; - if ((tab = strchr(ptr, '\t')) == NULL) - continue; - *tab = '\0'; - if (strcmp(ptr, - proto_table[proto].p_name) == 0) { - switch (proto) { - case PROTO_NFS: - return (SHARED_NFS); - case PROTO_SMB: - return (SHARED_SMB); - default: - return (0); - } - } -#else - if (proto == PROTO_NFS) - return (SHARED_NFS); -#endif - } - } - - return (SHARED_NOT_SHARED); -} - -#ifdef illumos -static boolean_t -dir_is_empty_stat(const char *dirname) -{ - struct stat st; - - /* - * We only want to return false if the given path is a non empty - * directory, all other errors are handled elsewhere. - */ - if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) { - return (B_TRUE); - } - - /* - * An empty directory will still have two entries in it, one - * entry for each of "." and "..". - */ - if (st.st_size > 2) { - return (B_FALSE); - } - - return (B_TRUE); -} - -static boolean_t -dir_is_empty_readdir(const char *dirname) -{ - DIR *dirp; - struct dirent64 *dp; - int dirfd; - - if ((dirfd = openat(AT_FDCWD, dirname, - O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) { - return (B_TRUE); - } - - if ((dirp = fdopendir(dirfd)) == NULL) { - (void) close(dirfd); - return (B_TRUE); - } - - while ((dp = readdir64(dirp)) != NULL) { - - if (strcmp(dp->d_name, ".") == 0 || - strcmp(dp->d_name, "..") == 0) - continue; - - (void) closedir(dirp); - return (B_FALSE); - } - - (void) closedir(dirp); - return (B_TRUE); -} - -/* - * Returns true if the specified directory is empty. If we can't open the - * directory at all, return true so that the mount can fail with a more - * informative error message. - */ -static boolean_t -dir_is_empty(const char *dirname) -{ - struct statvfs64 st; - - /* - * If the statvfs call fails or the filesystem is not a ZFS - * filesystem, fall back to the slow path which uses readdir. - */ - if ((statvfs64(dirname, &st) != 0) || - (strcmp(st.f_basetype, "zfs") != 0)) { - return (dir_is_empty_readdir(dirname)); - } - - /* - * At this point, we know the provided path is on a ZFS - * filesystem, so we can use stat instead of readdir to - * determine if the directory is empty or not. We try to avoid - * using readdir because that requires opening "dirname"; this - * open file descriptor can potentially end up in a child - * process if there's a concurrent fork, thus preventing the - * zfs_mount() from otherwise succeeding (the open file - * descriptor inherited by the child process will cause the - * parent's mount to fail with EBUSY). The performance - * implications of replacing the open, read, and close with a - * single stat is nice; but is not the main motivation for the - * added complexity. - */ - return (dir_is_empty_stat(dirname)); -} -#endif - -/* - * Checks to see if the mount is active. If the filesystem is mounted, we fill - * in 'where' with the current mountpoint, and return 1. Otherwise, we return - * 0. - */ -boolean_t -is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where) -{ - struct mnttab entry; - - if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0) - return (B_FALSE); - - if (where != NULL) - *where = zfs_strdup(zfs_hdl, entry.mnt_mountp); - - return (B_TRUE); -} - -boolean_t -zfs_is_mounted(zfs_handle_t *zhp, char **where) -{ - return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where)); -} - -static boolean_t -zfs_is_mountable_internal(zfs_handle_t *zhp, const char *mountpoint) -{ - - if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && - getzoneid() == GLOBAL_ZONEID) - return (B_FALSE); - - return (B_TRUE); -} - -/* - * Returns true if the given dataset is mountable, false otherwise. Returns the - * mountpoint in 'buf'. - */ -static boolean_t -zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen, - zprop_source_t *source) -{ - char sourceloc[MAXNAMELEN]; - zprop_source_t sourcetype; - - if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type)) - return (B_FALSE); - - verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen, - &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0); - - if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 || - strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0) - return (B_FALSE); - - if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF) - return (B_FALSE); - - if (!zfs_is_mountable_internal(zhp, buf)) - return (B_FALSE); - - if (source) - *source = sourcetype; - - return (B_TRUE); -} - -/* - * Mount the given filesystem. - */ -int -zfs_mount(zfs_handle_t *zhp, const char *options, int flags) -{ - char mountpoint[ZFS_MAXPROPLEN]; - - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) - return (0); - - return (zfs_mount_at(zhp, options, flags, mountpoint)); -} - -int -zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags, - const char *mountpoint) -{ - struct stat buf; - char mntopts[MNT_LINE_MAX]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - - if (options == NULL) - mntopts[0] = '\0'; - else - (void) strlcpy(mntopts, options, sizeof (mntopts)); - - /* - * If the pool is imported read-only then all mounts must be read-only - */ - if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL)) - flags |= MS_RDONLY; - - if (!zfs_is_mountable_internal(zhp, mountpoint)) - return (B_FALSE); - - /* Create the directory if it doesn't already exist */ - if (lstat(mountpoint, &buf) != 0) { - if (mkdirp(mountpoint, 0755) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "failed to create mountpoint")); - return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, - dgettext(TEXT_DOMAIN, "cannot mount '%s'"), - mountpoint)); - } - } - -#ifdef illumos /* FreeBSD: overlay mounts are not checked. */ - /* - * Determine if the mountpoint is empty. If so, refuse to perform the - * mount. We don't perform this check if MS_OVERLAY is specified, which - * would defeat the point. We also avoid this check if 'remount' is - * specified. - */ - if ((flags & MS_OVERLAY) == 0 && - strstr(mntopts, MNTOPT_REMOUNT) == NULL && - !dir_is_empty(mountpoint)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "directory is not empty")); - return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, - dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint)); - } -#endif - - /* perform the mount */ - if (zmount(zfs_get_name(zhp), mountpoint, flags, - MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) { - /* - * Generic errors are nasty, but there are just way too many - * from mount(), and they're well-understood. We pick a few - * common ones to improve upon. - */ - if (errno == EBUSY) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "mountpoint or dataset is busy")); - } else if (errno == EPERM) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Insufficient privileges")); - } else if (errno == ENOTSUP) { - char buf[256]; - int spa_version; - - VERIFY(zfs_spa_version(zhp, &spa_version) == 0); - (void) snprintf(buf, sizeof (buf), - dgettext(TEXT_DOMAIN, "Can't mount a version %lld " - "file system on a version %d pool. Pool must be" - " upgraded to mount this file system."), - (u_longlong_t)zfs_prop_get_int(zhp, - ZFS_PROP_VERSION), spa_version); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf)); - } else { - zfs_error_aux(hdl, strerror(errno)); - } - return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, - dgettext(TEXT_DOMAIN, "cannot mount '%s'"), - zhp->zfs_name)); - } - - /* add the mounted entry into our cache */ - libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, - mntopts); - return (0); -} - -/* - * Unmount a single filesystem. - */ -static int -unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags) -{ - if (umount2(mountpoint, flags) != 0) { - zfs_error_aux(hdl, strerror(errno)); - return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED, - dgettext(TEXT_DOMAIN, "cannot unmount '%s'"), - mountpoint)); - } - - return (0); -} - -/* - * Unmount the given filesystem. - */ -int -zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - struct mnttab entry; - char *mntpt = NULL; - - /* check to see if we need to unmount the filesystem */ - if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && - libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) { - /* - * mountpoint may have come from a call to - * getmnt/getmntany if it isn't NULL. If it is NULL, - * we know it comes from libzfs_mnttab_find which can - * then get freed later. We strdup it to play it safe. - */ - if (mountpoint == NULL) - mntpt = zfs_strdup(hdl, entry.mnt_mountp); - else - mntpt = zfs_strdup(hdl, mountpoint); - - /* - * Unshare and unmount the filesystem - */ - if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0) - return (-1); - - if (unmount_one(hdl, mntpt, flags) != 0) { - free(mntpt); - (void) zfs_shareall(zhp); - return (-1); - } - libzfs_mnttab_remove(hdl, zhp->zfs_name); - free(mntpt); - } - - return (0); -} - -/* - * Unmount this filesystem and any children inheriting the mountpoint property. - * To do this, just act like we're changing the mountpoint property, but don't - * remount the filesystems afterwards. - */ -int -zfs_unmountall(zfs_handle_t *zhp, int flags) -{ - prop_changelist_t *clp; - int ret; - - clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags); - if (clp == NULL) - return (-1); - - ret = changelist_prefix(clp); - changelist_free(clp); - - return (ret); -} - -boolean_t -zfs_is_shared(zfs_handle_t *zhp) -{ - zfs_share_type_t rc = 0; - zfs_share_proto_t *curr_proto; - - if (ZFS_IS_VOLUME(zhp)) - return (B_FALSE); - - for (curr_proto = share_all_proto; *curr_proto != PROTO_END; - curr_proto++) - rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto); - - return (rc ? B_TRUE : B_FALSE); -} - -int -zfs_share(zfs_handle_t *zhp) -{ - assert(!ZFS_IS_VOLUME(zhp)); - return (zfs_share_proto(zhp, share_all_proto)); -} - -int -zfs_unshare(zfs_handle_t *zhp) -{ - assert(!ZFS_IS_VOLUME(zhp)); - return (zfs_unshareall(zhp)); -} - -/* - * Check to see if the filesystem is currently shared. - */ -zfs_share_type_t -zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto) -{ - char *mountpoint; - zfs_share_type_t rc; - - if (!zfs_is_mounted(zhp, &mountpoint)) - return (SHARED_NOT_SHARED); - - if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto)) - != SHARED_NOT_SHARED) { - if (where != NULL) - *where = mountpoint; - else - free(mountpoint); - return (rc); - } else { - free(mountpoint); - return (SHARED_NOT_SHARED); - } -} - -boolean_t -zfs_is_shared_nfs(zfs_handle_t *zhp, char **where) -{ - return (zfs_is_shared_proto(zhp, where, - PROTO_NFS) != SHARED_NOT_SHARED); -} - -boolean_t -zfs_is_shared_smb(zfs_handle_t *zhp, char **where) -{ - return (zfs_is_shared_proto(zhp, where, - PROTO_SMB) != SHARED_NOT_SHARED); -} - -/* - * Make sure things will work if libshare isn't installed by using - * wrapper functions that check to see that the pointers to functions - * initialized in _zfs_init_libshare() are actually present. - */ - -#ifdef illumos -static sa_handle_t (*_sa_init)(int); -static sa_handle_t (*_sa_init_arg)(int, void *); -static void (*_sa_fini)(sa_handle_t); -static sa_share_t (*_sa_find_share)(sa_handle_t, char *); -static int (*_sa_enable_share)(sa_share_t, char *); -static int (*_sa_disable_share)(sa_share_t, char *); -static char *(*_sa_errorstr)(int); -static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *); -static boolean_t (*_sa_needs_refresh)(sa_handle_t *); -static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t); -static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t, - char *, char *, zprop_source_t, char *, char *, char *); -static void (*_sa_update_sharetab_ts)(sa_handle_t); -#endif - -/* - * _zfs_init_libshare() - * - * Find the libshare.so.1 entry points that we use here and save the - * values to be used later. This is triggered by the runtime loader. - * Make sure the correct ISA version is loaded. - */ - -#pragma init(_zfs_init_libshare) -static void -_zfs_init_libshare(void) -{ -#ifdef illumos - void *libshare; - char path[MAXPATHLEN]; - char isa[MAXISALEN]; - -#if defined(_LP64) - if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1) - isa[0] = '\0'; -#else - isa[0] = '\0'; -#endif - (void) snprintf(path, MAXPATHLEN, - "/usr/lib/%s/libshare.so.1", isa); - - if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) { - _sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init"); - _sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare, - "sa_init_arg"); - _sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini"); - _sa_find_share = (sa_share_t (*)(sa_handle_t, char *)) - dlsym(libshare, "sa_find_share"); - _sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare, - "sa_enable_share"); - _sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare, - "sa_disable_share"); - _sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr"); - _sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *)) - dlsym(libshare, "sa_parse_legacy_options"); - _sa_needs_refresh = (boolean_t (*)(sa_handle_t *)) - dlsym(libshare, "sa_needs_refresh"); - _sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t)) - dlsym(libshare, "sa_get_zfs_handle"); - _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t, - sa_share_t, char *, char *, zprop_source_t, char *, - char *, char *))dlsym(libshare, "sa_zfs_process_share"); - _sa_update_sharetab_ts = (void (*)(sa_handle_t)) - dlsym(libshare, "sa_update_sharetab_ts"); - if (_sa_init == NULL || _sa_init_arg == NULL || - _sa_fini == NULL || _sa_find_share == NULL || - _sa_enable_share == NULL || _sa_disable_share == NULL || - _sa_errorstr == NULL || _sa_parse_legacy_options == NULL || - _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL || - _sa_zfs_process_share == NULL || - _sa_update_sharetab_ts == NULL) { - _sa_init = NULL; - _sa_init_arg = NULL; - _sa_fini = NULL; - _sa_disable_share = NULL; - _sa_enable_share = NULL; - _sa_errorstr = NULL; - _sa_parse_legacy_options = NULL; - (void) dlclose(libshare); - _sa_needs_refresh = NULL; - _sa_get_zfs_handle = NULL; - _sa_zfs_process_share = NULL; - _sa_update_sharetab_ts = NULL; - } - } -#endif -} - -/* - * zfs_init_libshare(zhandle, service) - * - * Initialize the libshare API if it hasn't already been initialized. - * In all cases it returns 0 if it succeeded and an error if not. The - * service value is which part(s) of the API to initialize and is a - * direct map to the libshare sa_init(service) interface. - */ -static int -zfs_init_libshare_impl(libzfs_handle_t *zhandle, int service, void *arg) -{ -#ifdef illumos - /* - * libshare is either not installed or we're in a branded zone. The - * rest of the wrapper functions around the libshare calls already - * handle NULL function pointers, but we don't want the callers of - * zfs_init_libshare() to fail prematurely if libshare is not available. - */ - if (_sa_init == NULL) - return (SA_OK); - - /* - * Attempt to refresh libshare. This is necessary if there was a cache - * miss for a new ZFS dataset that was just created, or if state of the - * sharetab file has changed since libshare was last initialized. We - * want to make sure so check timestamps to see if a different process - * has updated any of the configuration. If there was some non-ZFS - * change, we need to re-initialize the internal cache. - */ - if (_sa_needs_refresh != NULL && - _sa_needs_refresh(zhandle->libzfs_sharehdl)) { - zfs_uninit_libshare(zhandle); - zhandle->libzfs_sharehdl = _sa_init_arg(service, arg); - } - - if (zhandle && zhandle->libzfs_sharehdl == NULL) - zhandle->libzfs_sharehdl = _sa_init_arg(service, arg); - - if (zhandle->libzfs_sharehdl == NULL) - return (SA_NO_MEMORY); -#endif - - return (SA_OK); -} -int -zfs_init_libshare(libzfs_handle_t *zhandle, int service) -{ - return (zfs_init_libshare_impl(zhandle, service, NULL)); -} - -int -zfs_init_libshare_arg(libzfs_handle_t *zhandle, int service, void *arg) -{ - return (zfs_init_libshare_impl(zhandle, service, arg)); -} - - -/* - * zfs_uninit_libshare(zhandle) - * - * Uninitialize the libshare API if it hasn't already been - * uninitialized. It is OK to call multiple times. - */ -void -zfs_uninit_libshare(libzfs_handle_t *zhandle) -{ - if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) { -#ifdef illumos - if (_sa_fini != NULL) - _sa_fini(zhandle->libzfs_sharehdl); -#endif - zhandle->libzfs_sharehdl = NULL; - } -} - -/* - * zfs_parse_options(options, proto) - * - * Call the legacy parse interface to get the protocol specific - * options using the NULL arg to indicate that this is a "parse" only. - */ -int -zfs_parse_options(char *options, zfs_share_proto_t proto) -{ -#ifdef illumos - if (_sa_parse_legacy_options != NULL) { - return (_sa_parse_legacy_options(NULL, options, - proto_table[proto].p_name)); - } - return (SA_CONFIG_ERR); -#else - return (SA_OK); -#endif -} - -#ifdef illumos -/* - * zfs_sa_find_share(handle, path) - * - * wrapper around sa_find_share to find a share path in the - * configuration. - */ -static sa_share_t -zfs_sa_find_share(sa_handle_t handle, char *path) -{ - if (_sa_find_share != NULL) - return (_sa_find_share(handle, path)); - return (NULL); -} - -/* - * zfs_sa_enable_share(share, proto) - * - * Wrapper for sa_enable_share which enables a share for a specified - * protocol. - */ -static int -zfs_sa_enable_share(sa_share_t share, char *proto) -{ - if (_sa_enable_share != NULL) - return (_sa_enable_share(share, proto)); - return (SA_CONFIG_ERR); -} - -/* - * zfs_sa_disable_share(share, proto) - * - * Wrapper for sa_enable_share which disables a share for a specified - * protocol. - */ -static int -zfs_sa_disable_share(sa_share_t share, char *proto) -{ - if (_sa_disable_share != NULL) - return (_sa_disable_share(share, proto)); - return (SA_CONFIG_ERR); -} -#endif /* illumos */ - -/* - * Share the given filesystem according to the options in the specified - * protocol specific properties (sharenfs, sharesmb). We rely - * on "libshare" to the dirty work for us. - */ -static int -zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) -{ - char mountpoint[ZFS_MAXPROPLEN]; - char shareopts[ZFS_MAXPROPLEN]; - char sourcestr[ZFS_MAXPROPLEN]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - zfs_share_proto_t *curr_proto; - zprop_source_t sourcetype; - int error, ret; - - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) - return (0); - - for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) { - /* - * Return success if there are no share options. - */ - if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop, - shareopts, sizeof (shareopts), &sourcetype, sourcestr, - ZFS_MAXPROPLEN, B_FALSE) != 0 || - strcmp(shareopts, "off") == 0) - continue; -#ifdef illumos - ret = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_HANDLE, - zhp); - if (ret != SA_OK) { - (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED, - dgettext(TEXT_DOMAIN, "cannot share '%s': %s"), - zfs_get_name(zhp), _sa_errorstr != NULL ? - _sa_errorstr(ret) : ""); - return (-1); - } -#endif - - /* - * If the 'zoned' property is set, then zfs_is_mountable() - * will have already bailed out if we are in the global zone. - * But local zones cannot be NFS servers, so we ignore it for - * local zones as well. - */ - if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) - continue; - -#ifdef illumos - share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint); - if (share == NULL) { - /* - * This may be a new file system that was just - * created so isn't in the internal cache - * (second time through). Rather than - * reloading the entire configuration, we can - * assume ZFS has done the checking and it is - * safe to add this to the internal - * configuration. - */ - if (_sa_zfs_process_share(hdl->libzfs_sharehdl, - NULL, NULL, mountpoint, - proto_table[*curr_proto].p_name, sourcetype, - shareopts, sourcestr, zhp->zfs_name) != SA_OK) { - (void) zfs_error_fmt(hdl, - proto_table[*curr_proto].p_share_err, - dgettext(TEXT_DOMAIN, "cannot share '%s'"), - zfs_get_name(zhp)); - return (-1); - } - share = zfs_sa_find_share(hdl->libzfs_sharehdl, - mountpoint); - } - if (share != NULL) { - int err; - err = zfs_sa_enable_share(share, - proto_table[*curr_proto].p_name); - if (err != SA_OK) { - (void) zfs_error_fmt(hdl, - proto_table[*curr_proto].p_share_err, - dgettext(TEXT_DOMAIN, "cannot share '%s'"), - zfs_get_name(zhp)); - return (-1); - } - } else -#else - if (*curr_proto != PROTO_NFS) { - fprintf(stderr, "Unsupported share protocol: %d.\n", - *curr_proto); - continue; - } - - if (strcmp(shareopts, "on") == 0) - error = fsshare(ZFS_EXPORTS_PATH, mountpoint, ""); - else - error = fsshare(ZFS_EXPORTS_PATH, mountpoint, shareopts); - if (error != 0) -#endif - { - (void) zfs_error_fmt(hdl, - proto_table[*curr_proto].p_share_err, - dgettext(TEXT_DOMAIN, "cannot share '%s'"), - zfs_get_name(zhp)); - return (-1); - } - - } - return (0); -} - - -int -zfs_share_nfs(zfs_handle_t *zhp) -{ - return (zfs_share_proto(zhp, nfs_only)); -} - -int -zfs_share_smb(zfs_handle_t *zhp) -{ - return (zfs_share_proto(zhp, smb_only)); -} - -int -zfs_shareall(zfs_handle_t *zhp) -{ - return (zfs_share_proto(zhp, share_all_proto)); -} - -/* - * Unshare a filesystem by mountpoint. - */ -static int -unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint, - zfs_share_proto_t proto) -{ -#ifdef illumos - sa_share_t share; - int err; - char *mntpt; - - /* - * Mountpoint could get trashed if libshare calls getmntany - * which it does during API initialization, so strdup the - * value. - */ - mntpt = zfs_strdup(hdl, mountpoint); - - /* - * make sure libshare initialized, initialize everything because we - * don't know what other unsharing may happen later. Functions up the - * stack are allowed to initialize instead a subset of shares at the - * time the set is known. - */ - if ((err = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_NAME, - (void *)name)) != SA_OK) { - free(mntpt); /* don't need the copy anymore */ - return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err, - dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"), - name, _sa_errorstr(err))); - } - - share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt); - free(mntpt); /* don't need the copy anymore */ - - if (share != NULL) { - err = zfs_sa_disable_share(share, proto_table[proto].p_name); - if (err != SA_OK) { - return (zfs_error_fmt(hdl, - proto_table[proto].p_unshare_err, - dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"), - name, _sa_errorstr(err))); - } - } else { - return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err, - dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"), - name)); - } -#else - char buf[MAXPATHLEN]; - FILE *fp; - int err; - - if (proto != PROTO_NFS) { - fprintf(stderr, "No SMB support in FreeBSD yet.\n"); - return (EOPNOTSUPP); - } - - err = fsunshare(ZFS_EXPORTS_PATH, mountpoint); - if (err != 0) { - zfs_error_aux(hdl, "%s", strerror(err)); - return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED, - dgettext(TEXT_DOMAIN, - "cannot unshare '%s'"), name)); - } -#endif - return (0); -} - -/* - * Unshare the given filesystem. - */ -int -zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint, - zfs_share_proto_t *proto) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - struct mnttab entry; - char *mntpt = NULL; - - /* check to see if need to unmount the filesystem */ - rewind(zhp->zfs_hdl->libzfs_mnttab); - if (mountpoint != NULL) - mountpoint = mntpt = zfs_strdup(hdl, mountpoint); - - if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && - libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) { - zfs_share_proto_t *curr_proto; - - if (mountpoint == NULL) - mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp); - - for (curr_proto = proto; *curr_proto != PROTO_END; - curr_proto++) { - - if (is_shared(hdl, mntpt, *curr_proto) && - unshare_one(hdl, zhp->zfs_name, - mntpt, *curr_proto) != 0) { - if (mntpt != NULL) - free(mntpt); - return (-1); - } - } - } - if (mntpt != NULL) - free(mntpt); - - return (0); -} - -int -zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint) -{ - return (zfs_unshare_proto(zhp, mountpoint, nfs_only)); -} - -int -zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint) -{ - return (zfs_unshare_proto(zhp, mountpoint, smb_only)); -} - -/* - * Same as zfs_unmountall(), but for NFS and SMB unshares. - */ -int -zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) -{ - prop_changelist_t *clp; - int ret; - - clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0); - if (clp == NULL) - return (-1); - - ret = changelist_unshare(clp, proto); - changelist_free(clp); - - return (ret); -} - -int -zfs_unshareall_nfs(zfs_handle_t *zhp) -{ - return (zfs_unshareall_proto(zhp, nfs_only)); -} - -int -zfs_unshareall_smb(zfs_handle_t *zhp) -{ - return (zfs_unshareall_proto(zhp, smb_only)); -} - -int -zfs_unshareall(zfs_handle_t *zhp) -{ - return (zfs_unshareall_proto(zhp, share_all_proto)); -} - -int -zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint) -{ - return (zfs_unshare_proto(zhp, mountpoint, share_all_proto)); -} - -/* - * Remove the mountpoint associated with the current dataset, if necessary. - * We only remove the underlying directory if: - * - * - The mountpoint is not 'none' or 'legacy' - * - The mountpoint is non-empty - * - The mountpoint is the default or inherited - * - The 'zoned' property is set, or we're in a local zone - * - * Any other directories we leave alone. - */ -void -remove_mountpoint(zfs_handle_t *zhp) -{ - char mountpoint[ZFS_MAXPROPLEN]; - zprop_source_t source; - - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), - &source)) - return; - - if (source == ZPROP_SRC_DEFAULT || - source == ZPROP_SRC_INHERITED) { - /* - * Try to remove the directory, silently ignoring any errors. - * The filesystem may have since been removed or moved around, - * and this error isn't really useful to the administrator in - * any way. - */ - (void) rmdir(mountpoint); - } -} - -/* - * Add the given zfs handle to the cb_handles array, dynamically reallocating - * the array if it is out of space - */ -void -libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp) -{ - if (cbp->cb_alloc == cbp->cb_used) { - size_t newsz; - zfs_handle_t **newhandles; - - newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64; - newhandles = zfs_realloc(zhp->zfs_hdl, - cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *), - newsz * sizeof (zfs_handle_t *)); - cbp->cb_handles = newhandles; - cbp->cb_alloc = newsz; - } - cbp->cb_handles[cbp->cb_used++] = zhp; -} - -/* - * Recursive helper function used during file system enumeration - */ -static int -zfs_iter_cb(zfs_handle_t *zhp, void *data) -{ - get_all_cb_t *cbp = data; - - if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) { - zfs_close(zhp); - return (0); - } - - if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) { - zfs_close(zhp); - return (0); - } - - /* - * If this filesystem is inconsistent and has a receive resume - * token, we can not mount it. - */ - if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && - zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, - NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { - zfs_close(zhp); - return (0); - } - - libzfs_add_handle(cbp, zhp); - if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) { - zfs_close(zhp); - return (-1); - } - return (0); -} - -/* - * Sort comparator that compares two mountpoint paths. We sort these paths so - * that subdirectories immediately follow their parents. This means that we - * effectively treat the '/' character as the lowest value non-nul char. - * Since filesystems from non-global zones can have the same mountpoint - * as other filesystems, the comparator sorts global zone filesystems to - * the top of the list. This means that the global zone will traverse the - * filesystem list in the correct order and can stop when it sees the - * first zoned filesystem. In a non-global zone, only the delegated - * filesystems are seen. - * - * An example sorted list using this comparator would look like: - * - * /foo - * /foo/bar - * /foo/bar/baz - * /foo/baz - * /foo.bar - * /foo (NGZ1) - * /foo (NGZ2) - * - * The mount code depend on this ordering to deterministically iterate - * over filesystems in order to spawn parallel mount tasks. - */ -static int -mountpoint_cmp(const void *arga, const void *argb) -{ - zfs_handle_t *const *zap = arga; - zfs_handle_t *za = *zap; - zfs_handle_t *const *zbp = argb; - zfs_handle_t *zb = *zbp; - char mounta[MAXPATHLEN]; - char mountb[MAXPATHLEN]; - const char *a = mounta; - const char *b = mountb; - boolean_t gota, gotb; - uint64_t zoneda, zonedb; - - zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED); - zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED); - if (zoneda && !zonedb) - return (1); - if (!zoneda && zonedb) - return (-1); - gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM); - if (gota) - verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta, - sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); - gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM); - if (gotb) - verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb, - sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); - - if (gota && gotb) { - while (*a != '\0' && (*a == *b)) { - a++; - b++; - } - if (*a == *b) - return (0); - if (*a == '\0') - return (-1); - if (*b == '\0') - return (1); - if (*a == '/') - return (-1); - if (*b == '/') - return (1); - return (*a < *b ? -1 : *a > *b); - } - - if (gota) - return (-1); - if (gotb) - return (1); - - /* - * If neither filesystem has a mountpoint, revert to sorting by - * datset name. - */ - return (strcmp(zfs_get_name(za), zfs_get_name(zb))); -} - -/* - * Return true if path2 is a child of path1 or path2 equals path1 or - * path1 is "/" (path2 is always a child of "/"). - */ -static boolean_t -libzfs_path_contains(const char *path1, const char *path2) -{ - return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 || - (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/')); -} - - -static int -non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx) -{ - char parent[ZFS_MAXPROPLEN]; - char child[ZFS_MAXPROPLEN]; - int i; - - verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent, - sizeof (parent), NULL, NULL, 0, B_FALSE) == 0); - - for (i = idx + 1; i < num_handles; i++) { - verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child, - sizeof (child), NULL, NULL, 0, B_FALSE) == 0); - if (!libzfs_path_contains(parent, child)) - break; - } - return (i); -} - -typedef struct mnt_param { - libzfs_handle_t *mnt_hdl; - tpool_t *mnt_tp; - zfs_handle_t **mnt_zhps; /* filesystems to mount */ - size_t mnt_num_handles; - int mnt_idx; /* Index of selected entry to mount */ - zfs_iter_f mnt_func; - void *mnt_data; -} mnt_param_t; - -/* - * Allocate and populate the parameter struct for mount function, and - * schedule mounting of the entry selected by idx. - */ -static void -zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles, - size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp) -{ - mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t)); - - mnt_param->mnt_hdl = hdl; - mnt_param->mnt_tp = tp; - mnt_param->mnt_zhps = handles; - mnt_param->mnt_num_handles = num_handles; - mnt_param->mnt_idx = idx; - mnt_param->mnt_func = func; - mnt_param->mnt_data = data; - - (void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param); -} - -/* - * This is the structure used to keep state of mounting or sharing operations - * during a call to zpool_enable_datasets(). - */ -typedef struct mount_state { - /* - * ms_mntstatus is set to -1 if any mount fails. While multiple threads - * could update this variable concurrently, no synchronization is - * needed as it's only ever set to -1. - */ - int ms_mntstatus; - int ms_mntflags; - const char *ms_mntopts; -} mount_state_t; - -static int -zfs_mount_one(zfs_handle_t *zhp, void *arg) -{ - mount_state_t *ms = arg; - int ret = 0; - - if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0) - ret = ms->ms_mntstatus = -1; - return (ret); -} - -static int -zfs_share_one(zfs_handle_t *zhp, void *arg) -{ - mount_state_t *ms = arg; - int ret = 0; - - if (zfs_share(zhp) != 0) - ret = ms->ms_mntstatus = -1; - return (ret); -} - -/* - * Thread pool function to mount one file system. On completion, it finds and - * schedules its children to be mounted. This depends on the sorting done in - * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries - * each descending from the previous) will have no parallelism since we always - * have to wait for the parent to finish mounting before we can schedule - * its children. - */ -static void -zfs_mount_task(void *arg) -{ - mnt_param_t *mp = arg; - int idx = mp->mnt_idx; - zfs_handle_t **handles = mp->mnt_zhps; - size_t num_handles = mp->mnt_num_handles; - char mountpoint[ZFS_MAXPROPLEN]; - - verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); - - if (mp->mnt_func(handles[idx], mp->mnt_data) != 0) - return; - - /* - * We dispatch tasks to mount filesystems with mountpoints underneath - * this one. We do this by dispatching the next filesystem with a - * descendant mountpoint of the one we just mounted, then skip all of - * its descendants, dispatch the next descendant mountpoint, and so on. - * The non_descendant_idx() function skips over filesystems that are - * descendants of the filesystem we just dispatched. - */ - for (int i = idx + 1; i < num_handles; - i = non_descendant_idx(handles, num_handles, i)) { - char child[ZFS_MAXPROPLEN]; - verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, - child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0); - - if (!libzfs_path_contains(mountpoint, child)) - break; /* not a descendant, return */ - zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i, - mp->mnt_func, mp->mnt_data, mp->mnt_tp); - } - free(mp); -} - -/* - * Issue the func callback for each ZFS handle contained in the handles - * array. This function is used to mount all datasets, and so this function - * guarantees that filesystems for parent mountpoints are called before their - * children. As such, before issuing any callbacks, we first sort the array - * of handles by mountpoint. - * - * Callbacks are issued in one of two ways: - * - * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT - * environment variable is set, then we issue callbacks sequentially. - * - * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT - * environment variable is not set, then we use a tpool to dispatch threads - * to mount filesystems in parallel. This function dispatches tasks to mount - * the filesystems at the top-level mountpoints, and these tasks in turn - * are responsible for recursively mounting filesystems in their children - * mountpoints. - */ -void -zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, - size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) -{ - zoneid_t zoneid = getzoneid(); - - /* - * The ZFS_SERIAL_MOUNT environment variable is an undocumented - * variable that can be used as a convenience to do a/b comparison - * of serial vs. parallel mounting. - */ - boolean_t serial_mount = !parallel || - (getenv("ZFS_SERIAL_MOUNT") != NULL); - - /* - * Sort the datasets by mountpoint. See mountpoint_cmp for details - * of how these are sorted. - */ - qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp); - - if (serial_mount) { - for (int i = 0; i < num_handles; i++) { - func(handles[i], data); - } - return; - } - - /* - * Issue the callback function for each dataset using a parallel - * algorithm that uses a thread pool to manage threads. - */ - tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL); - - /* - * There may be multiple "top level" mountpoints outside of the pool's - * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of - * these. - */ - for (int i = 0; i < num_handles; - i = non_descendant_idx(handles, num_handles, i)) { - /* - * Since the mountpoints have been sorted so that the zoned - * filesystems are at the end, a zoned filesystem seen from - * the global zone means that we're done. - */ - if (zoneid == GLOBAL_ZONEID && - zfs_prop_get_int(handles[i], ZFS_PROP_ZONED)) - break; - zfs_dispatch_mount(hdl, handles, num_handles, i, func, data, - tp); - } - - tpool_wait(tp); /* wait for all scheduled mounts to complete */ - tpool_destroy(tp); -} - -/* - * Mount and share all datasets within the given pool. This assumes that no - * datasets within the pool are currently mounted. - */ -#pragma weak zpool_mount_datasets = zpool_enable_datasets -int -zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) -{ - get_all_cb_t cb = { 0 }; - mount_state_t ms = { 0 }; - zfs_handle_t *zfsp; - int ret = 0; - - if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name, - ZFS_TYPE_DATASET)) == NULL) - goto out; - - /* - * Gather all non-snapshot datasets within the pool. Start by adding - * the root filesystem for this pool to the list, and then iterate - * over all child filesystems. - */ - libzfs_add_handle(&cb, zfsp); - if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0) - goto out; - - /* - * Mount all filesystems - */ - ms.ms_mntopts = mntopts; - ms.ms_mntflags = flags; - zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, - zfs_mount_one, &ms, B_TRUE); - if (ms.ms_mntstatus != 0) - ret = ms.ms_mntstatus; - - /* - * Share all filesystems that need to be shared. This needs to be - * a separate pass because libshare is not mt-safe, and so we need - * to share serially. - */ - ms.ms_mntstatus = 0; - zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, - zfs_share_one, &ms, B_FALSE); - if (ms.ms_mntstatus != 0) - ret = ms.ms_mntstatus; - -out: - for (int i = 0; i < cb.cb_used; i++) - zfs_close(cb.cb_handles[i]); - free(cb.cb_handles); - - return (ret); -} - -static int -mountpoint_compare(const void *a, const void *b) -{ - const char *mounta = *((char **)a); - const char *mountb = *((char **)b); - - return (strcmp(mountb, mounta)); -} - -/* alias for 2002/240 */ -#pragma weak zpool_unmount_datasets = zpool_disable_datasets -/* - * Unshare and unmount all datasets within the given pool. We don't want to - * rely on traversing the DSL to discover the filesystems within the pool, - * because this may be expensive (if not all of them are mounted), and can fail - * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and - * gather all the filesystems that are currently mounted. - */ -int -zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) -{ - int used, alloc; - struct mnttab entry; - size_t namelen; - char **mountpoints = NULL; - zfs_handle_t **datasets = NULL; - libzfs_handle_t *hdl = zhp->zpool_hdl; - int i; - int ret = -1; - int flags = (force ? MS_FORCE : 0); -#ifdef illumos - sa_init_selective_arg_t sharearg; -#endif - - namelen = strlen(zhp->zpool_name); - - rewind(hdl->libzfs_mnttab); - used = alloc = 0; - while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { - /* - * Ignore non-ZFS entries. - */ - if (entry.mnt_fstype == NULL || - strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) - continue; - - /* - * Ignore filesystems not within this pool. - */ - if (entry.mnt_mountp == NULL || - strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 || - (entry.mnt_special[namelen] != '/' && - entry.mnt_special[namelen] != '\0')) - continue; - - /* - * At this point we've found a filesystem within our pool. Add - * it to our growing list. - */ - if (used == alloc) { - if (alloc == 0) { - if ((mountpoints = zfs_alloc(hdl, - 8 * sizeof (void *))) == NULL) - goto out; - - if ((datasets = zfs_alloc(hdl, - 8 * sizeof (void *))) == NULL) - goto out; - - alloc = 8; - } else { - void *ptr; - - if ((ptr = zfs_realloc(hdl, mountpoints, - alloc * sizeof (void *), - alloc * 2 * sizeof (void *))) == NULL) - goto out; - mountpoints = ptr; - - if ((ptr = zfs_realloc(hdl, datasets, - alloc * sizeof (void *), - alloc * 2 * sizeof (void *))) == NULL) - goto out; - datasets = ptr; - - alloc *= 2; - } - } - - if ((mountpoints[used] = zfs_strdup(hdl, - entry.mnt_mountp)) == NULL) - goto out; - - /* - * This is allowed to fail, in case there is some I/O error. It - * is only used to determine if we need to remove the underlying - * mountpoint, so failure is not fatal. - */ - datasets[used] = make_dataset_handle(hdl, entry.mnt_special); - - used++; - } - - /* - * At this point, we have the entire list of filesystems, so sort it by - * mountpoint. - */ -#ifdef illumos - sharearg.zhandle_arr = datasets; - sharearg.zhandle_len = used; - ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE, - &sharearg); - if (ret != 0) - goto out; -#endif - qsort(mountpoints, used, sizeof (char *), mountpoint_compare); - - /* - * Walk through and first unshare everything. - */ - for (i = 0; i < used; i++) { - zfs_share_proto_t *curr_proto; - for (curr_proto = share_all_proto; *curr_proto != PROTO_END; - curr_proto++) { - if (is_shared(hdl, mountpoints[i], *curr_proto) && - unshare_one(hdl, mountpoints[i], - mountpoints[i], *curr_proto) != 0) - goto out; - } - } - - /* - * Now unmount everything, removing the underlying directories as - * appropriate. - */ - for (i = 0; i < used; i++) { - if (unmount_one(hdl, mountpoints[i], flags) != 0) - goto out; - } - - for (i = 0; i < used; i++) { - if (datasets[i]) - remove_mountpoint(datasets[i]); - } - - ret = 0; -out: - for (i = 0; i < used; i++) { - if (datasets[i]) - zfs_close(datasets[i]); - free(mountpoints[i]); - } - free(datasets); - free(mountpoints); - - return (ret); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c deleted file mode 100644 index 434f77e27da9..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ /dev/null @@ -1,4669 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - * Copyright 2016 Nexenta Systems, Inc. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - * Copyright (c) 2017 Datto Inc. - * Copyright (c) 2017, Intel Corporation. - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <ctype.h> -#include <errno.h> -#include <devid.h> -#include <fcntl.h> -#include <libintl.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <libgen.h> -#include <sys/zfs_ioctl.h> -#include <dlfcn.h> - -#include "zfs_namecheck.h" -#include "zfs_prop.h" -#include "libzfs_impl.h" -#include "zfs_comutil.h" -#include "zfeature_common.h" - -static int read_efi_label(nvlist_t *, diskaddr_t *, boolean_t *); -static boolean_t zpool_vdev_is_interior(const char *name); - -#define BACKUP_SLICE "s2" - -typedef struct prop_flags { - int create:1; /* Validate property on creation */ - int import:1; /* Validate property on import */ -} prop_flags_t; - -/* - * ==================================================================== - * zpool property functions - * ==================================================================== - */ - -static int -zpool_get_all_props(zpool_handle_t *zhp) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - - if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) - return (-1); - - while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) { - if (errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - } else { - zcmd_free_nvlists(&zc); - return (-1); - } - } - - if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - - zcmd_free_nvlists(&zc); - - return (0); -} - -static int -zpool_props_refresh(zpool_handle_t *zhp) -{ - nvlist_t *old_props; - - old_props = zhp->zpool_props; - - if (zpool_get_all_props(zhp) != 0) - return (-1); - - nvlist_free(old_props); - return (0); -} - -static char * -zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop, - zprop_source_t *src) -{ - nvlist_t *nv, *nvl; - uint64_t ival; - char *value; - zprop_source_t source; - - nvl = zhp->zpool_props; - if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) { - verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0); - source = ival; - verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); - } else { - source = ZPROP_SRC_DEFAULT; - if ((value = (char *)zpool_prop_default_string(prop)) == NULL) - value = "-"; - } - - if (src) - *src = source; - - return (value); -} - -uint64_t -zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src) -{ - nvlist_t *nv, *nvl; - uint64_t value; - zprop_source_t source; - - if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) { - /* - * zpool_get_all_props() has most likely failed because - * the pool is faulted, but if all we need is the top level - * vdev's guid then get it from the zhp config nvlist. - */ - if ((prop == ZPOOL_PROP_GUID) && - (nvlist_lookup_nvlist(zhp->zpool_config, - ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) && - (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value) - == 0)) { - return (value); - } - return (zpool_prop_default_numeric(prop)); - } - - nvl = zhp->zpool_props; - if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) { - verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0); - source = value; - verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0); - } else { - source = ZPROP_SRC_DEFAULT; - value = zpool_prop_default_numeric(prop); - } - - if (src) - *src = source; - - return (value); -} - -/* - * Map VDEV STATE to printed strings. - */ -const char * -zpool_state_to_name(vdev_state_t state, vdev_aux_t aux) -{ - switch (state) { - case VDEV_STATE_CLOSED: - case VDEV_STATE_OFFLINE: - return (gettext("OFFLINE")); - case VDEV_STATE_REMOVED: - return (gettext("REMOVED")); - case VDEV_STATE_CANT_OPEN: - if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) - return (gettext("FAULTED")); - else if (aux == VDEV_AUX_SPLIT_POOL) - return (gettext("SPLIT")); - else - return (gettext("UNAVAIL")); - case VDEV_STATE_FAULTED: - return (gettext("FAULTED")); - case VDEV_STATE_DEGRADED: - return (gettext("DEGRADED")); - case VDEV_STATE_HEALTHY: - return (gettext("ONLINE")); - - default: - break; - } - - return (gettext("UNKNOWN")); -} - -/* - * Map POOL STATE to printed strings. - */ -const char * -zpool_pool_state_to_name(pool_state_t state) -{ - switch (state) { - case POOL_STATE_ACTIVE: - return (gettext("ACTIVE")); - case POOL_STATE_EXPORTED: - return (gettext("EXPORTED")); - case POOL_STATE_DESTROYED: - return (gettext("DESTROYED")); - case POOL_STATE_SPARE: - return (gettext("SPARE")); - case POOL_STATE_L2CACHE: - return (gettext("L2CACHE")); - case POOL_STATE_UNINITIALIZED: - return (gettext("UNINITIALIZED")); - case POOL_STATE_UNAVAIL: - return (gettext("UNAVAIL")); - case POOL_STATE_POTENTIALLY_ACTIVE: - return (gettext("POTENTIALLY_ACTIVE")); - } - - return (gettext("UNKNOWN")); -} - -/* - * Get a zpool property value for 'prop' and return the value in - * a pre-allocated buffer. - */ -int -zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, - zprop_source_t *srctype, boolean_t literal) -{ - uint64_t intval; - const char *strval; - zprop_source_t src = ZPROP_SRC_NONE; - nvlist_t *nvroot; - vdev_stat_t *vs; - uint_t vsc; - - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - switch (prop) { - case ZPOOL_PROP_NAME: - (void) strlcpy(buf, zpool_get_name(zhp), len); - break; - - case ZPOOL_PROP_HEALTH: - (void) strlcpy(buf, - zpool_pool_state_to_name(POOL_STATE_UNAVAIL), len); - break; - - case ZPOOL_PROP_GUID: - intval = zpool_get_prop_int(zhp, prop, &src); - (void) snprintf(buf, len, "%llu", intval); - break; - - case ZPOOL_PROP_ALTROOT: - case ZPOOL_PROP_CACHEFILE: - case ZPOOL_PROP_COMMENT: - if (zhp->zpool_props != NULL || - zpool_get_all_props(zhp) == 0) { - (void) strlcpy(buf, - zpool_get_prop_string(zhp, prop, &src), - len); - break; - } - /* FALLTHROUGH */ - default: - (void) strlcpy(buf, "-", len); - break; - } - - if (srctype != NULL) - *srctype = src; - return (0); - } - - if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) && - prop != ZPOOL_PROP_NAME) - return (-1); - - switch (zpool_prop_get_type(prop)) { - case PROP_TYPE_STRING: - (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src), - len); - break; - - case PROP_TYPE_NUMBER: - intval = zpool_get_prop_int(zhp, prop, &src); - - switch (prop) { - case ZPOOL_PROP_SIZE: - case ZPOOL_PROP_ALLOCATED: - case ZPOOL_PROP_FREE: - case ZPOOL_PROP_FREEING: - case ZPOOL_PROP_LEAKED: - if (literal) { - (void) snprintf(buf, len, "%llu", - (u_longlong_t)intval); - } else { - (void) zfs_nicenum(intval, buf, len); - } - break; - case ZPOOL_PROP_BOOTSIZE: - case ZPOOL_PROP_EXPANDSZ: - case ZPOOL_PROP_CHECKPOINT: - if (intval == 0) { - (void) strlcpy(buf, "-", len); - } else if (literal) { - (void) snprintf(buf, len, "%llu", - (u_longlong_t)intval); - } else { - (void) zfs_nicenum(intval, buf, len); - } - break; - case ZPOOL_PROP_CAPACITY: - if (literal) { - (void) snprintf(buf, len, "%llu", - (u_longlong_t)intval); - } else { - (void) snprintf(buf, len, "%llu%%", - (u_longlong_t)intval); - } - break; - case ZPOOL_PROP_FRAGMENTATION: - if (intval == UINT64_MAX) { - (void) strlcpy(buf, "-", len); - } else { - (void) snprintf(buf, len, "%llu%%", - (u_longlong_t)intval); - } - break; - case ZPOOL_PROP_DEDUPRATIO: - (void) snprintf(buf, len, "%llu.%02llux", - (u_longlong_t)(intval / 100), - (u_longlong_t)(intval % 100)); - break; - case ZPOOL_PROP_HEALTH: - verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - verify(nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) - == 0); - - (void) strlcpy(buf, zpool_state_to_name(intval, - vs->vs_aux), len); - break; - case ZPOOL_PROP_VERSION: - if (intval >= SPA_VERSION_FEATURES) { - (void) snprintf(buf, len, "-"); - break; - } - /* FALLTHROUGH */ - default: - (void) snprintf(buf, len, "%llu", intval); - } - break; - - case PROP_TYPE_INDEX: - intval = zpool_get_prop_int(zhp, prop, &src); - if (zpool_prop_index_to_string(prop, intval, &strval) - != 0) - return (-1); - (void) strlcpy(buf, strval, len); - break; - - default: - abort(); - } - - if (srctype) - *srctype = src; - - return (0); -} - -/* - * Check if the bootfs name has the same pool name as it is set to. - * Assuming bootfs is a valid dataset name. - */ -static boolean_t -bootfs_name_valid(const char *pool, const char *bootfs) -{ - int len = strlen(pool); - - if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT)) - return (B_FALSE); - - if (strncmp(pool, bootfs, len) == 0 && - (bootfs[len] == '/' || bootfs[len] == '\0')) - return (B_TRUE); - - return (B_FALSE); -} - -boolean_t -zpool_is_bootable(zpool_handle_t *zhp) -{ - char bootfs[ZFS_MAX_DATASET_NAME_LEN]; - - return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs, - sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-", - sizeof (bootfs)) != 0); -} - - -/* - * Given an nvlist of zpool properties to be set, validate that they are - * correct, and parse any numeric properties (index, boolean, etc) if they are - * specified as strings. - */ -static nvlist_t * -zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, - nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf) -{ - nvpair_t *elem; - nvlist_t *retprops; - zpool_prop_t prop; - char *strval; - uint64_t intval; - char *slash, *check; - struct stat64 statbuf; - zpool_handle_t *zhp; - - if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) { - (void) no_memory(hdl); - return (NULL); - } - - elem = NULL; - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { - const char *propname = nvpair_name(elem); - - prop = zpool_name_to_prop(propname); - if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) { - int err; - char *fname = strchr(propname, '@') + 1; - - err = zfeature_lookup_name(fname, NULL); - if (err != 0) { - ASSERT3U(err, ==, ENOENT); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid feature '%s'"), fname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (nvpair_type(elem) != DATA_TYPE_STRING) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a string"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - (void) nvpair_value_string(elem, &strval); - if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' can only be set to " - "'enabled'"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (nvlist_add_uint64(retprops, propname, 0) != 0) { - (void) no_memory(hdl); - goto error; - } - continue; - } - - /* - * Make sure this property is valid and applies to this type. - */ - if (prop == ZPOOL_PROP_INVAL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid property '%s'"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (zpool_prop_readonly(prop)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " - "is readonly"), propname); - (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); - goto error; - } - - if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops, - &strval, &intval, errbuf) != 0) - goto error; - - /* - * Perform additional checking for specific properties. - */ - switch (prop) { - case ZPOOL_PROP_VERSION: - if (intval < version || - !SPA_VERSION_IS_SUPPORTED(intval)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' number %d is invalid."), - propname, intval); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - goto error; - } - break; - - case ZPOOL_PROP_BOOTSIZE: - if (!flags.create) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' can only be set during pool " - "creation"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - - case ZPOOL_PROP_BOOTFS: - if (flags.create || flags.import) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' cannot be set at creation " - "or import time"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (version < SPA_VERSION_BOOTFS) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded to support " - "'%s' property"), propname); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - goto error; - } - - /* - * bootfs property value has to be a dataset name and - * the dataset has to be in the same pool as it sets to. - */ - if (strval[0] != '\0' && !bootfs_name_valid(poolname, - strval)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " - "is an invalid name"), strval); - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - goto error; - } - - if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not open pool '%s'"), poolname); - (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); - goto error; - } - zpool_close(zhp); - break; - - case ZPOOL_PROP_ALTROOT: - if (!flags.create && !flags.import) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' can only be set during pool " - "creation or import"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - - if (strval[0] != '/') { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "bad alternate root '%s'"), strval); - (void) zfs_error(hdl, EZFS_BADPATH, errbuf); - goto error; - } - break; - - case ZPOOL_PROP_CACHEFILE: - if (strval[0] == '\0') - break; - - if (strcmp(strval, "none") == 0) - break; - - if (strval[0] != '/') { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' must be empty, an " - "absolute path, or 'none'"), propname); - (void) zfs_error(hdl, EZFS_BADPATH, errbuf); - goto error; - } - - slash = strrchr(strval, '/'); - - if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || - strcmp(slash, "/..") == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is not a valid file"), strval); - (void) zfs_error(hdl, EZFS_BADPATH, errbuf); - goto error; - } - - *slash = '\0'; - - if (strval[0] != '\0' && - (stat64(strval, &statbuf) != 0 || - !S_ISDIR(statbuf.st_mode))) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is not a valid directory"), - strval); - (void) zfs_error(hdl, EZFS_BADPATH, errbuf); - goto error; - } - - *slash = '/'; - break; - - case ZPOOL_PROP_COMMENT: - for (check = strval; *check != '\0'; check++) { - if (!isprint(*check)) { - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, - "comment may only have printable " - "characters")); - (void) zfs_error(hdl, EZFS_BADPROP, - errbuf); - goto error; - } - } - if (strlen(strval) > ZPROP_MAX_COMMENT) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "comment must not exceed %d characters"), - ZPROP_MAX_COMMENT); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - - case ZPOOL_PROP_READONLY: - if (!flags.import) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' can only be set at " - "import time"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - - case ZPOOL_PROP_TNAME: - if (!flags.create) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s' can only be set at " - "creation time"), propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - - case ZPOOL_PROP_MULTIHOST: - if (get_system_hostid() == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "requires a non-zero system hostid")); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; - } - break; - - default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property '%s'(%d) not defined"), propname, prop); - break; - } - } - - return (retprops); -error: - nvlist_free(retprops); - return (NULL); -} - -/* - * Set zpool property : propname=propval. - */ -int -zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval) -{ - zfs_cmd_t zc = { 0 }; - int ret = -1; - char errbuf[1024]; - nvlist_t *nvl = NULL; - nvlist_t *realprops; - uint64_t version; - prop_flags_t flags = { 0 }; - - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), - zhp->zpool_name); - - if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) - return (no_memory(zhp->zpool_hdl)); - - if (nvlist_add_string(nvl, propname, propval) != 0) { - nvlist_free(nvl); - return (no_memory(zhp->zpool_hdl)); - } - - version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); - if ((realprops = zpool_valid_proplist(zhp->zpool_hdl, - zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) { - nvlist_free(nvl); - return (-1); - } - - nvlist_free(nvl); - nvl = realprops; - - /* - * Execute the corresponding ioctl() to set this property. - */ - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - - if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) { - nvlist_free(nvl); - return (-1); - } - - ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc); - - zcmd_free_nvlists(&zc); - nvlist_free(nvl); - - if (ret) - (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf); - else - (void) zpool_props_refresh(zhp); - - return (ret); -} - -int -zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) -{ - libzfs_handle_t *hdl = zhp->zpool_hdl; - zprop_list_t *entry; - char buf[ZFS_MAXPROPLEN]; - nvlist_t *features = NULL; - zprop_list_t **last; - boolean_t firstexpand = (NULL == *plp); - - if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0) - return (-1); - - last = plp; - while (*last != NULL) - last = &(*last)->pl_next; - - if ((*plp)->pl_all) - features = zpool_get_features(zhp); - - if ((*plp)->pl_all && firstexpand) { - for (int i = 0; i < SPA_FEATURES; i++) { - zprop_list_t *entry = zfs_alloc(hdl, - sizeof (zprop_list_t)); - entry->pl_prop = ZPROP_INVAL; - entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s", - spa_feature_table[i].fi_uname); - entry->pl_width = strlen(entry->pl_user_prop); - entry->pl_all = B_TRUE; - - *last = entry; - last = &entry->pl_next; - } - } - - /* add any unsupported features */ - for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL); - nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) { - char *propname; - boolean_t found; - zprop_list_t *entry; - - if (zfeature_is_supported(nvpair_name(nvp))) - continue; - - propname = zfs_asprintf(hdl, "unsupported@%s", - nvpair_name(nvp)); - - /* - * Before adding the property to the list make sure that no - * other pool already added the same property. - */ - found = B_FALSE; - entry = *plp; - while (entry != NULL) { - if (entry->pl_user_prop != NULL && - strcmp(propname, entry->pl_user_prop) == 0) { - found = B_TRUE; - break; - } - entry = entry->pl_next; - } - if (found) { - free(propname); - continue; - } - - entry = zfs_alloc(hdl, sizeof (zprop_list_t)); - entry->pl_prop = ZPROP_INVAL; - entry->pl_user_prop = propname; - entry->pl_width = strlen(entry->pl_user_prop); - entry->pl_all = B_TRUE; - - *last = entry; - last = &entry->pl_next; - } - - for (entry = *plp; entry != NULL; entry = entry->pl_next) { - - if (entry->pl_fixed) - continue; - - if (entry->pl_prop != ZPROP_INVAL && - zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf), - NULL, B_FALSE) == 0) { - if (strlen(buf) > entry->pl_width) - entry->pl_width = strlen(buf); - } - } - - return (0); -} - -/* - * Get the state for the given feature on the given ZFS pool. - */ -int -zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf, - size_t len) -{ - uint64_t refcount; - boolean_t found = B_FALSE; - nvlist_t *features = zpool_get_features(zhp); - boolean_t supported; - const char *feature = strchr(propname, '@') + 1; - - supported = zpool_prop_feature(propname); - ASSERT(supported || zpool_prop_unsupported(propname)); - - /* - * Convert from feature name to feature guid. This conversion is - * unecessary for unsupported@... properties because they already - * use guids. - */ - if (supported) { - int ret; - spa_feature_t fid; - - ret = zfeature_lookup_name(feature, &fid); - if (ret != 0) { - (void) strlcpy(buf, "-", len); - return (ENOTSUP); - } - feature = spa_feature_table[fid].fi_guid; - } - - if (nvlist_lookup_uint64(features, feature, &refcount) == 0) - found = B_TRUE; - - if (supported) { - if (!found) { - (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len); - } else { - if (refcount == 0) - (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len); - else - (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len); - } - } else { - if (found) { - if (refcount == 0) { - (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE); - } else { - (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY); - } - } else { - (void) strlcpy(buf, "-", len); - return (ENOTSUP); - } - } - - return (0); -} - -/* - * Don't start the slice at the default block of 34; many storage - * devices will use a stripe width of 128k, so start there instead. - */ -#define NEW_START_BLOCK 256 - -/* - * Validate the given pool name, optionally putting an extended error message in - * 'buf'. - */ -boolean_t -zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) -{ - namecheck_err_t why; - char what; - int ret; - - ret = pool_namecheck(pool, &why, &what); - - /* - * The rules for reserved pool names were extended at a later point. - * But we need to support users with existing pools that may now be - * invalid. So we only check for this expanded set of names during a - * create (or import), and only in userland. - */ - if (ret == 0 && !isopen && - (strncmp(pool, "mirror", 6) == 0 || - strncmp(pool, "raidz", 5) == 0 || - strncmp(pool, "spare", 5) == 0 || - strcmp(pool, "log") == 0)) { - if (hdl != NULL) - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "name is reserved")); - return (B_FALSE); - } - - - if (ret != 0) { - if (hdl != NULL) { - switch (why) { - case NAME_ERR_TOOLONG: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "name is too long")); - break; - - case NAME_ERR_INVALCHAR: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "invalid character " - "'%c' in pool name"), what); - break; - - case NAME_ERR_NOLETTER: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "name must begin with a letter")); - break; - - case NAME_ERR_RESERVED: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "name is reserved")); - break; - - case NAME_ERR_DISKLIKE: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool name is reserved")); - break; - - case NAME_ERR_LEADING_SLASH: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "leading slash in name")); - break; - - case NAME_ERR_EMPTY_COMPONENT: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "empty component in name")); - break; - - case NAME_ERR_TRAILING_SLASH: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "trailing slash in name")); - break; - - case NAME_ERR_MULTIPLE_DELIMITERS: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "multiple '@' and/or '#' delimiters in " - "name")); - break; - - default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "(%d) not defined"), why); - break; - } - } - return (B_FALSE); - } - - return (B_TRUE); -} - -/* - * Open a handle to the given pool, even if the pool is currently in the FAULTED - * state. - */ -zpool_handle_t * -zpool_open_canfail(libzfs_handle_t *hdl, const char *pool) -{ - zpool_handle_t *zhp; - boolean_t missing; - - /* - * Make sure the pool name is valid. - */ - if (!zpool_name_valid(hdl, B_TRUE, pool)) { - (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME, - dgettext(TEXT_DOMAIN, "cannot open '%s'"), - pool); - return (NULL); - } - - if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) - return (NULL); - - zhp->zpool_hdl = hdl; - (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); - - if (zpool_refresh_stats(zhp, &missing) != 0) { - zpool_close(zhp); - return (NULL); - } - - if (missing) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool")); - (void) zfs_error_fmt(hdl, EZFS_NOENT, - dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool); - zpool_close(zhp); - return (NULL); - } - - return (zhp); -} - -/* - * Like the above, but silent on error. Used when iterating over pools (because - * the configuration cache may be out of date). - */ -int -zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret) -{ - zpool_handle_t *zhp; - boolean_t missing; - - if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) - return (-1); - - zhp->zpool_hdl = hdl; - (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); - - if (zpool_refresh_stats(zhp, &missing) != 0) { - zpool_close(zhp); - return (-1); - } - - if (missing) { - zpool_close(zhp); - *ret = NULL; - return (0); - } - - *ret = zhp; - return (0); -} - -/* - * Similar to zpool_open_canfail(), but refuses to open pools in the faulted - * state. - */ -zpool_handle_t * -zpool_open(libzfs_handle_t *hdl, const char *pool) -{ - zpool_handle_t *zhp; - - if ((zhp = zpool_open_canfail(hdl, pool)) == NULL) - return (NULL); - - if (zhp->zpool_state == POOL_STATE_UNAVAIL) { - (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL, - dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name); - zpool_close(zhp); - return (NULL); - } - - return (zhp); -} - -/* - * Close the handle. Simply frees the memory associated with the handle. - */ -void -zpool_close(zpool_handle_t *zhp) -{ - nvlist_free(zhp->zpool_config); - nvlist_free(zhp->zpool_old_config); - nvlist_free(zhp->zpool_props); - free(zhp); -} - -/* - * Return the name of the pool. - */ -const char * -zpool_get_name(zpool_handle_t *zhp) -{ - return (zhp->zpool_name); -} - - -/* - * Return the state of the pool (ACTIVE or UNAVAILABLE) - */ -int -zpool_get_state(zpool_handle_t *zhp) -{ - return (zhp->zpool_state); -} - -/* - * Check if vdev list contains a special vdev - */ -static boolean_t -zpool_has_special_vdev(nvlist_t *nvroot) -{ - nvlist_t **child; - uint_t children; - - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0) { - for (uint_t c = 0; c < children; c++) { - char *bias; - - if (nvlist_lookup_string(child[c], - ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0 && - strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) { - return (B_TRUE); - } - } - } - return (B_FALSE); -} - -/* - * Create the named pool, using the provided vdev list. It is assumed - * that the consumer has already validated the contents of the nvlist, so we - * don't have to worry about error semantics. - */ -int -zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, - nvlist_t *props, nvlist_t *fsprops) -{ - zfs_cmd_t zc = { 0 }; - nvlist_t *zc_fsprops = NULL; - nvlist_t *zc_props = NULL; - char msg[1024]; - int ret = -1; - - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot create '%s'"), pool); - - if (!zpool_name_valid(hdl, B_FALSE, pool)) - return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); - - if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) - return (-1); - - if (props) { - prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE }; - - if ((zc_props = zpool_valid_proplist(hdl, pool, props, - SPA_VERSION_1, flags, msg)) == NULL) { - goto create_failed; - } - } - - if (fsprops) { - uint64_t zoned; - char *zonestr; - - zoned = ((nvlist_lookup_string(fsprops, - zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) && - strcmp(zonestr, "on") == 0); - - if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, - fsprops, zoned, NULL, NULL, msg)) == NULL) { - goto create_failed; - } - - if (nvlist_exists(zc_fsprops, - zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)) && - !zpool_has_special_vdev(nvroot)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "%s property requires a special vdev"), - zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)); - (void) zfs_error(hdl, EZFS_BADPROP, msg); - goto create_failed; - } - - if (!zc_props && - (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { - goto create_failed; - } - if (nvlist_add_nvlist(zc_props, - ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { - goto create_failed; - } - } - - if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) - goto create_failed; - - (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); - - if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) { - - zcmd_free_nvlists(&zc); - nvlist_free(zc_props); - nvlist_free(zc_fsprops); - - switch (errno) { - case EBUSY: - /* - * This can happen if the user has specified the same - * device multiple times. We can't reliably detect this - * until we try to add it and see we already have a - * label. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more vdevs refer to the same device")); - return (zfs_error(hdl, EZFS_BADDEV, msg)); - - case ERANGE: - /* - * This happens if the record size is smaller or larger - * than the allowed size range, or not a power of 2. - * - * NOTE: although zfs_valid_proplist is called earlier, - * this case may have slipped through since the - * pool does not exist yet and it is therefore - * impossible to read properties e.g. max blocksize - * from the pool. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "record size invalid")); - return (zfs_error(hdl, EZFS_BADPROP, msg)); - - case EOVERFLOW: - /* - * This occurs when one of the devices is below - * SPA_MINDEVSIZE. Unfortunately, we can't detect which - * device was the problem device since there's no - * reliable way to determine device size from userland. - */ - { - char buf[64]; - - zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf)); - - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more devices is less than the " - "minimum size (%s)"), buf); - } - return (zfs_error(hdl, EZFS_BADDEV, msg)); - - case ENOSPC: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more devices is out of space")); - return (zfs_error(hdl, EZFS_BADDEV, msg)); - - case ENOTBLK: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cache device must be a disk or disk slice")); - return (zfs_error(hdl, EZFS_BADDEV, msg)); - - default: - return (zpool_standard_error(hdl, errno, msg)); - } - } - -create_failed: - zcmd_free_nvlists(&zc); - nvlist_free(zc_props); - nvlist_free(zc_fsprops); - return (ret); -} - -/* - * Destroy the given pool. It is up to the caller to ensure that there are no - * datasets left in the pool. - */ -int -zpool_destroy(zpool_handle_t *zhp, const char *log_str) -{ - zfs_cmd_t zc = { 0 }; - zfs_handle_t *zfp = NULL; - libzfs_handle_t *hdl = zhp->zpool_hdl; - char msg[1024]; - - if (zhp->zpool_state == POOL_STATE_ACTIVE && - (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) - return (-1); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_history = (uint64_t)(uintptr_t)log_str; - - if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot destroy '%s'"), zhp->zpool_name); - - if (errno == EROFS) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more devices is read only")); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - } else { - (void) zpool_standard_error(hdl, errno, msg); - } - - if (zfp) - zfs_close(zfp); - return (-1); - } - - if (zfp) { - remove_mountpoint(zfp); - zfs_close(zfp); - } - - return (0); -} - -/* - * Create a checkpoint in the given pool. - */ -int -zpool_checkpoint(zpool_handle_t *zhp) -{ - libzfs_handle_t *hdl = zhp->zpool_hdl; - char msg[1024]; - int error; - - error = lzc_pool_checkpoint(zhp->zpool_name); - if (error != 0) { - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot checkpoint '%s'"), zhp->zpool_name); - (void) zpool_standard_error(hdl, error, msg); - return (-1); - } - - return (0); -} - -/* - * Discard the checkpoint from the given pool. - */ -int -zpool_discard_checkpoint(zpool_handle_t *zhp) -{ - libzfs_handle_t *hdl = zhp->zpool_hdl; - char msg[1024]; - int error; - - error = lzc_pool_checkpoint_discard(zhp->zpool_name); - if (error != 0) { - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot discard checkpoint in '%s'"), zhp->zpool_name); - (void) zpool_standard_error(hdl, error, msg); - return (-1); - } - - return (0); -} - -/* - * Add the given vdevs to the pool. The caller must have already performed the - * necessary verification to ensure that the vdev specification is well-formed. - */ -int -zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) -{ - zfs_cmd_t zc = { 0 }; - int ret; - libzfs_handle_t *hdl = zhp->zpool_hdl; - char msg[1024]; - nvlist_t **spares, **l2cache; - uint_t nspares, nl2cache; - - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot add to '%s'"), zhp->zpool_name); - - if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < - SPA_VERSION_SPARES && - nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, - &spares, &nspares) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " - "upgraded to add hot spares")); - return (zfs_error(hdl, EZFS_BADVERSION, msg)); - } - - if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < - SPA_VERSION_L2CACHE && - nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, - &l2cache, &nl2cache) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " - "upgraded to add cache devices")); - return (zfs_error(hdl, EZFS_BADVERSION, msg)); - } - - if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) - return (-1); - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) { - switch (errno) { - case EBUSY: - /* - * This can happen if the user has specified the same - * device multiple times. We can't reliably detect this - * until we try to add it and see we already have a - * label. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more vdevs refer to the same device")); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - case EINVAL: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid config; a pool with removing/removed " - "vdevs does not support adding raidz vdevs")); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - case EOVERFLOW: - /* - * This occurrs when one of the devices is below - * SPA_MINDEVSIZE. Unfortunately, we can't detect which - * device was the problem device since there's no - * reliable way to determine device size from userland. - */ - { - char buf[64]; - - zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf)); - - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "device is less than the minimum " - "size (%s)"), buf); - } - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded to add these vdevs")); - (void) zfs_error(hdl, EZFS_BADVERSION, msg); - break; - - case EDOM: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "root pool can not have multiple vdevs" - " or separate logs")); - (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg); - break; - - case ENOTBLK: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cache device must be a disk or disk slice")); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - default: - (void) zpool_standard_error(hdl, errno, msg); - } - - ret = -1; - } else { - ret = 0; - } - - zcmd_free_nvlists(&zc); - - return (ret); -} - -/* - * Exports the pool from the system. The caller must ensure that there are no - * mounted datasets in the pool. - */ -static int -zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, - const char *log_str) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot export '%s'"), zhp->zpool_name); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = force; - zc.zc_guid = hardforce; - zc.zc_history = (uint64_t)(uintptr_t)log_str; - - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { - switch (errno) { - case EXDEV: - zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, - "use '-f' to override the following errors:\n" - "'%s' has an active shared spare which could be" - " used by other pools once '%s' is exported."), - zhp->zpool_name, zhp->zpool_name); - return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, - msg)); - default: - return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, - msg)); - } - } - - return (0); -} - -int -zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str) -{ - return (zpool_export_common(zhp, force, B_FALSE, log_str)); -} - -int -zpool_export_force(zpool_handle_t *zhp, const char *log_str) -{ - return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str)); -} - -static void -zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun, - nvlist_t *config) -{ - nvlist_t *nv = NULL; - uint64_t rewindto; - int64_t loss = -1; - struct tm t; - char timestr[128]; - - if (!hdl->libzfs_printerr || config == NULL) - return; - - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || - nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) { - return; - } - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) - return; - (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss); - - if (localtime_r((time_t *)&rewindto, &t) != NULL && - strftime(timestr, 128, 0, &t) != 0) { - if (dryrun) { - (void) printf(dgettext(TEXT_DOMAIN, - "Would be able to return %s " - "to its state as of %s.\n"), - name, timestr); - } else { - (void) printf(dgettext(TEXT_DOMAIN, - "Pool %s returned to its state as of %s.\n"), - name, timestr); - } - if (loss > 120) { - (void) printf(dgettext(TEXT_DOMAIN, - "%s approximately %lld "), - dryrun ? "Would discard" : "Discarded", - (loss + 30) / 60); - (void) printf(dgettext(TEXT_DOMAIN, - "minutes of transactions.\n")); - } else if (loss > 0) { - (void) printf(dgettext(TEXT_DOMAIN, - "%s approximately %lld "), - dryrun ? "Would discard" : "Discarded", loss); - (void) printf(dgettext(TEXT_DOMAIN, - "seconds of transactions.\n")); - } - } -} - -void -zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason, - nvlist_t *config) -{ - nvlist_t *nv = NULL; - int64_t loss = -1; - uint64_t edata = UINT64_MAX; - uint64_t rewindto; - struct tm t; - char timestr[128]; - - if (!hdl->libzfs_printerr) - return; - - if (reason >= 0) - (void) printf(dgettext(TEXT_DOMAIN, "action: ")); - else - (void) printf(dgettext(TEXT_DOMAIN, "\t")); - - /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */ - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || - nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 || - nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) - goto no_info; - - (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss); - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS, - &edata); - - (void) printf(dgettext(TEXT_DOMAIN, - "Recovery is possible, but will result in some data loss.\n")); - - if (localtime_r((time_t *)&rewindto, &t) != NULL && - strftime(timestr, 128, 0, &t) != 0) { - (void) printf(dgettext(TEXT_DOMAIN, - "\tReturning the pool to its state as of %s\n" - "\tshould correct the problem. "), - timestr); - } else { - (void) printf(dgettext(TEXT_DOMAIN, - "\tReverting the pool to an earlier state " - "should correct the problem.\n\t")); - } - - if (loss > 120) { - (void) printf(dgettext(TEXT_DOMAIN, - "Approximately %lld minutes of data\n" - "\tmust be discarded, irreversibly. "), (loss + 30) / 60); - } else if (loss > 0) { - (void) printf(dgettext(TEXT_DOMAIN, - "Approximately %lld seconds of data\n" - "\tmust be discarded, irreversibly. "), loss); - } - if (edata != 0 && edata != UINT64_MAX) { - if (edata == 1) { - (void) printf(dgettext(TEXT_DOMAIN, - "After rewind, at least\n" - "\tone persistent user-data error will remain. ")); - } else { - (void) printf(dgettext(TEXT_DOMAIN, - "After rewind, several\n" - "\tpersistent user-data errors will remain. ")); - } - } - (void) printf(dgettext(TEXT_DOMAIN, - "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "), - reason >= 0 ? "clear" : "import", name); - - (void) printf(dgettext(TEXT_DOMAIN, - "A scrub of the pool\n" - "\tis strongly recommended after recovery.\n")); - return; - -no_info: - (void) printf(dgettext(TEXT_DOMAIN, - "Destroy and re-create the pool from\n\ta backup source.\n")); -} - -/* - * zpool_import() is a contracted interface. Should be kept the same - * if possible. - * - * Applications should use zpool_import_props() to import a pool with - * new properties value to be set. - */ -int -zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, - char *altroot) -{ - nvlist_t *props = NULL; - int ret; - - if (altroot != NULL) { - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { - return (zfs_error_fmt(hdl, EZFS_NOMEM, - dgettext(TEXT_DOMAIN, "cannot import '%s'"), - newname)); - } - - if (nvlist_add_string(props, - zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 || - nvlist_add_string(props, - zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) { - nvlist_free(props); - return (zfs_error_fmt(hdl, EZFS_NOMEM, - dgettext(TEXT_DOMAIN, "cannot import '%s'"), - newname)); - } - } - - ret = zpool_import_props(hdl, config, newname, props, - ZFS_IMPORT_NORMAL); - nvlist_free(props); - return (ret); -} - -static void -print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv, - int indent) -{ - nvlist_t **child; - uint_t c, children; - char *vname; - uint64_t is_log = 0; - - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, - &is_log); - - if (name != NULL) - (void) printf("\t%*s%s%s\n", indent, "", name, - is_log ? " [log]" : ""); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - return; - - for (c = 0; c < children; c++) { - vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID); - print_vdev_tree(hdl, vname, child[c], indent + 2); - free(vname); - } -} - -void -zpool_print_unsup_feat(nvlist_t *config) -{ - nvlist_t *nvinfo, *unsup_feat; - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == - 0); - verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT, - &unsup_feat) == 0); - - for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL; - nvp = nvlist_next_nvpair(unsup_feat, nvp)) { - char *desc; - - verify(nvpair_type(nvp) == DATA_TYPE_STRING); - verify(nvpair_value_string(nvp, &desc) == 0); - - if (strlen(desc) > 0) - (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc); - else - (void) printf("\t%s\n", nvpair_name(nvp)); - } -} - -/* - * Import the given pool using the known configuration and a list of - * properties to be set. The configuration should have come from - * zpool_find_import(). The 'newname' parameters control whether the pool - * is imported with a different name. - */ -int -zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, - nvlist_t *props, int flags) -{ - zfs_cmd_t zc = { 0 }; - zpool_load_policy_t policy; - nvlist_t *nv = NULL; - nvlist_t *nvinfo = NULL; - nvlist_t *missing = NULL; - char *thename; - char *origname; - int ret; - int error = 0; - char errbuf[1024]; - - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &origname) == 0); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot import pool '%s'"), origname); - - if (newname != NULL) { - if (!zpool_name_valid(hdl, B_FALSE, newname)) - return (zfs_error_fmt(hdl, EZFS_INVALIDNAME, - dgettext(TEXT_DOMAIN, "cannot import '%s'"), - newname)); - thename = (char *)newname; - } else { - thename = origname; - } - - if (props != NULL) { - uint64_t version; - prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE }; - - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &version) == 0); - - if ((props = zpool_valid_proplist(hdl, origname, - props, version, flags, errbuf)) == NULL) - return (-1); - if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { - nvlist_free(props); - return (-1); - } - nvlist_free(props); - } - - (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name)); - - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &zc.zc_guid) == 0); - - if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - - zc.zc_cookie = flags; - while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 && - errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - } - if (ret != 0) - error = errno; - - (void) zcmd_read_dst_nvlist(hdl, &zc, &nv); - - zcmd_free_nvlists(&zc); - - zpool_get_load_policy(config, &policy); - - if (error) { - char desc[1024]; - char aux[256]; - - /* - * Dry-run failed, but we print out what success - * looks like if we found a best txg - */ - if (policy.zlp_rewind & ZPOOL_TRY_REWIND) { - zpool_rewind_exclaim(hdl, newname ? origname : thename, - B_TRUE, nv); - nvlist_free(nv); - return (-1); - } - - if (newname == NULL) - (void) snprintf(desc, sizeof (desc), - dgettext(TEXT_DOMAIN, "cannot import '%s'"), - thename); - else - (void) snprintf(desc, sizeof (desc), - dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"), - origname, thename); - - switch (error) { - case ENOTSUP: - if (nv != NULL && nvlist_lookup_nvlist(nv, - ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 && - nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) { - (void) printf(dgettext(TEXT_DOMAIN, "This " - "pool uses the following feature(s) not " - "supported by this system:\n")); - zpool_print_unsup_feat(nv); - if (nvlist_exists(nvinfo, - ZPOOL_CONFIG_CAN_RDONLY)) { - (void) printf(dgettext(TEXT_DOMAIN, - "All unsupported features are only " - "required for writing to the pool." - "\nThe pool can be imported using " - "'-o readonly=on'.\n")); - } - } - /* - * Unsupported version. - */ - (void) zfs_error(hdl, EZFS_BADVERSION, desc); - break; - - case EREMOTEIO: - if (nv != NULL && nvlist_lookup_nvlist(nv, - ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) { - char *hostname = "<unknown>"; - uint64_t hostid = 0; - mmp_state_t mmp_state; - - mmp_state = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_STATE); - - if (nvlist_exists(nvinfo, - ZPOOL_CONFIG_MMP_HOSTNAME)) - hostname = fnvlist_lookup_string(nvinfo, - ZPOOL_CONFIG_MMP_HOSTNAME); - - if (nvlist_exists(nvinfo, - ZPOOL_CONFIG_MMP_HOSTID)) - hostid = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_HOSTID); - - if (mmp_state == MMP_STATE_ACTIVE) { - (void) snprintf(aux, sizeof (aux), - dgettext(TEXT_DOMAIN, "pool is imp" - "orted on host '%s' (hostid=%lx).\n" - "Export the pool on the other " - "system, then run 'zpool import'."), - hostname, (unsigned long) hostid); - } else if (mmp_state == MMP_STATE_NO_HOSTID) { - (void) snprintf(aux, sizeof (aux), - dgettext(TEXT_DOMAIN, "pool has " - "the multihost property on and " - "the\nsystem's hostid is not " - "set.\n")); - } - - (void) zfs_error_aux(hdl, aux); - } - (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc); - break; - - case EINVAL: - (void) zfs_error(hdl, EZFS_INVALCONFIG, desc); - break; - - case EROFS: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more devices is read only")); - (void) zfs_error(hdl, EZFS_BADDEV, desc); - break; - - case ENXIO: - if (nv && nvlist_lookup_nvlist(nv, - ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 && - nvlist_lookup_nvlist(nvinfo, - ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) { - (void) printf(dgettext(TEXT_DOMAIN, - "The devices below are missing or " - "corrupted, use '-m' to import the pool " - "anyway:\n")); - print_vdev_tree(hdl, NULL, missing, 2); - (void) printf("\n"); - } - (void) zpool_standard_error(hdl, error, desc); - break; - - case EEXIST: - (void) zpool_standard_error(hdl, error, desc); - break; - case ENAMETOOLONG: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "new name of at least one dataset is longer than " - "the maximum allowable length")); - (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc); - break; - default: - (void) zpool_standard_error(hdl, error, desc); - zpool_explain_recover(hdl, - newname ? origname : thename, -error, nv); - break; - } - - nvlist_free(nv); - ret = -1; - } else { - zpool_handle_t *zhp; - - /* - * This should never fail, but play it safe anyway. - */ - if (zpool_open_silent(hdl, thename, &zhp) != 0) - ret = -1; - else if (zhp != NULL) - zpool_close(zhp); - if (policy.zlp_rewind & - (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { - zpool_rewind_exclaim(hdl, newname ? origname : thename, - ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv); - } - nvlist_free(nv); - return (0); - } - - return (ret); -} - -/* - * Scan the pool. - */ -int -zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - int err; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = func; - zc.zc_flags = cmd; - - if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0) - return (0); - - err = errno; - - /* ECANCELED on a scrub means we resumed a paused scrub */ - if (err == ECANCELED && func == POOL_SCAN_SCRUB && - cmd == POOL_SCRUB_NORMAL) - return (0); - - if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL) - return (0); - - if (func == POOL_SCAN_SCRUB) { - if (cmd == POOL_SCRUB_PAUSE) { - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot pause scrubbing %s"), zc.zc_name); - } else { - assert(cmd == POOL_SCRUB_NORMAL); - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot scrub %s"), zc.zc_name); - } - } else if (func == POOL_SCAN_NONE) { - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), - zc.zc_name); - } else { - assert(!"unexpected result"); - } - - if (err == EBUSY) { - nvlist_t *nvroot; - pool_scan_stat_t *ps = NULL; - uint_t psc; - - verify(nvlist_lookup_nvlist(zhp->zpool_config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - (void) nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); - if (ps && ps->pss_func == POOL_SCAN_SCRUB) { - if (cmd == POOL_SCRUB_PAUSE) - return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg)); - else - return (zfs_error(hdl, EZFS_SCRUBBING, msg)); - } else { - return (zfs_error(hdl, EZFS_RESILVERING, msg)); - } - } else if (err == ENOENT) { - return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); - } else { - return (zpool_standard_error(hdl, err, msg)); - } -} - -static int -xlate_init_err(int err) -{ - switch (err) { - case ENODEV: - return (EZFS_NODEVICE); - case EINVAL: - case EROFS: - return (EZFS_BADDEV); - case EBUSY: - return (EZFS_INITIALIZING); - case ESRCH: - return (EZFS_NO_INITIALIZE); - } - return (err); -} - -/* - * Begin, suspend, or cancel the initialization (initializing of all free - * blocks) for the given vdevs in the given pool. - */ -int -zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type, - nvlist_t *vds) -{ - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - nvlist_t *errlist; - - /* translate vdev names to guids */ - nvlist_t *vdev_guids = fnvlist_alloc(); - nvlist_t *guids_to_paths = fnvlist_alloc(); - boolean_t spare, cache; - nvlist_t *tgt; - nvpair_t *elem; - - for (elem = nvlist_next_nvpair(vds, NULL); elem != NULL; - elem = nvlist_next_nvpair(vds, elem)) { - char *vd_path = nvpair_name(elem); - tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache, NULL); - - if ((tgt == NULL) || cache || spare) { - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot initialize '%s'"), - vd_path); - int err = (tgt == NULL) ? EZFS_NODEVICE : - (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE); - fnvlist_free(vdev_guids); - fnvlist_free(guids_to_paths); - return (zfs_error(hdl, err, msg)); - } - - uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); - fnvlist_add_uint64(vdev_guids, vd_path, guid); - - (void) snprintf(msg, sizeof (msg), "%llu", guid); - fnvlist_add_string(guids_to_paths, msg, vd_path); - } - - int err = lzc_initialize(zhp->zpool_name, cmd_type, vdev_guids, - &errlist); - fnvlist_free(vdev_guids); - - if (err == 0) { - fnvlist_free(guids_to_paths); - return (0); - } - - nvlist_t *vd_errlist = NULL; - if (errlist != NULL) { - vd_errlist = fnvlist_lookup_nvlist(errlist, - ZPOOL_INITIALIZE_VDEVS); - } - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "operation failed")); - - for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL; - elem = nvlist_next_nvpair(vd_errlist, elem)) { - int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem)); - char *path = fnvlist_lookup_string(guids_to_paths, - nvpair_name(elem)); - (void) zfs_error_fmt(hdl, vd_error, "cannot initialize '%s'", - path); - } - - fnvlist_free(guids_to_paths); - if (vd_errlist != NULL) - return (-1); - - return (zpool_standard_error(hdl, err, msg)); -} - -#ifdef illumos -/* - * This provides a very minimal check whether a given string is likely a - * c#t#d# style string. Users of this are expected to do their own - * verification of the s# part. - */ -#define CTD_CHECK(str) (str && str[0] == 'c' && isdigit(str[1])) - -/* - * More elaborate version for ones which may start with "/dev/dsk/" - * and the like. - */ -static int -ctd_check_path(char *str) -{ - /* - * If it starts with a slash, check the last component. - */ - if (str && str[0] == '/') { - char *tmp = strrchr(str, '/'); - - /* - * If it ends in "/old", check the second-to-last - * component of the string instead. - */ - if (tmp != str && strcmp(tmp, "/old") == 0) { - for (tmp--; *tmp != '/'; tmp--) - ; - } - str = tmp + 1; - } - return (CTD_CHECK(str)); -} -#endif - -/* - * Find a vdev that matches the search criteria specified. We use the - * the nvpair name to determine how we should look for the device. - * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL - * spare; but FALSE if its an INUSE spare. - */ -static nvlist_t * -vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, - boolean_t *l2cache, boolean_t *log) -{ - uint_t c, children; - nvlist_t **child; - nvlist_t *ret; - uint64_t is_log; - char *srchkey; - nvpair_t *pair = nvlist_next_nvpair(search, NULL); - - /* Nothing to look for */ - if (search == NULL || pair == NULL) - return (NULL); - - /* Obtain the key we will use to search */ - srchkey = nvpair_name(pair); - - switch (nvpair_type(pair)) { - case DATA_TYPE_UINT64: - if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) { - uint64_t srchval, theguid; - - verify(nvpair_value_uint64(pair, &srchval) == 0); - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, - &theguid) == 0); - if (theguid == srchval) - return (nv); - } - break; - - case DATA_TYPE_STRING: { - char *srchval, *val; - - verify(nvpair_value_string(pair, &srchval) == 0); - if (nvlist_lookup_string(nv, srchkey, &val) != 0) - break; - - /* - * Search for the requested value. Special cases: - * - * - ZPOOL_CONFIG_PATH for whole disk entries. To support - * UEFI boot, these end in "s0" or "s0/old" or "s1" or - * "s1/old". The "s0" or "s1" part is hidden from the user, - * but included in the string, so this matches around it. - * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). - * - * Otherwise, all other searches are simple string compares. - */ -#ifdef illumos - if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && - ctd_check_path(val)) { - uint64_t wholedisk = 0; - - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk); - if (wholedisk) { - int slen = strlen(srchval); - int vlen = strlen(val); - - if (slen != vlen - 2) - break; - - /* - * make_leaf_vdev() should only set - * wholedisk for ZPOOL_CONFIG_PATHs which - * will include "/dev/dsk/", giving plenty of - * room for the indices used next. - */ - ASSERT(vlen >= 6); - - /* - * strings identical except trailing "s0" - */ - if ((strcmp(&val[vlen - 2], "s0") == 0 || - strcmp(&val[vlen - 2], "s1") == 0) && - strncmp(srchval, val, slen) == 0) - return (nv); - - /* - * strings identical except trailing "s0/old" - */ - if ((strcmp(&val[vlen - 6], "s0/old") == 0 || - strcmp(&val[vlen - 6], "s1/old") == 0) && - strcmp(&srchval[slen - 4], "/old") == 0 && - strncmp(srchval, val, slen - 4) == 0) - return (nv); - - break; - } - } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { -#else - if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { -#endif - char *type, *idx, *end, *p; - uint64_t id, vdev_id; - - /* - * Determine our vdev type, keeping in mind - * that the srchval is composed of a type and - * vdev id pair (i.e. mirror-4). - */ - if ((type = strdup(srchval)) == NULL) - return (NULL); - - if ((p = strrchr(type, '-')) == NULL) { - free(type); - break; - } - idx = p + 1; - *p = '\0'; - - /* - * If the types don't match then keep looking. - */ - if (strncmp(val, type, strlen(val)) != 0) { - free(type); - break; - } - - verify(zpool_vdev_is_interior(type)); - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, - &id) == 0); - - errno = 0; - vdev_id = strtoull(idx, &end, 10); - - free(type); - if (errno != 0) - return (NULL); - - /* - * Now verify that we have the correct vdev id. - */ - if (vdev_id == id) - return (nv); - } - - /* - * Common case - */ - if (strcmp(srchval, val) == 0) - return (nv); - break; - } - - default: - break; - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - return (NULL); - - for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, - avail_spare, l2cache, NULL)) != NULL) { - /* - * The 'is_log' value is only set for the toplevel - * vdev, not the leaf vdevs. So we always lookup the - * log device from the root of the vdev tree (where - * 'log' is non-NULL). - */ - if (log != NULL && - nvlist_lookup_uint64(child[c], - ZPOOL_CONFIG_IS_LOG, &is_log) == 0 && - is_log) { - *log = B_TRUE; - } - return (ret); - } - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, - avail_spare, l2cache, NULL)) != NULL) { - *avail_spare = B_TRUE; - return (ret); - } - } - } - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, - avail_spare, l2cache, NULL)) != NULL) { - *l2cache = B_TRUE; - return (ret); - } - } - } - - return (NULL); -} - -/* - * Given a physical path (minus the "/devices" prefix), find the - * associated vdev. - */ -nvlist_t * -zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, - boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) -{ - nvlist_t *search, *nvroot, *ret; - - verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); - verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0); - - verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - *avail_spare = B_FALSE; - *l2cache = B_FALSE; - if (log != NULL) - *log = B_FALSE; - ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); - nvlist_free(search); - - return (ret); -} - -/* - * Determine if we have an "interior" top-level vdev (i.e mirror/raidz). - */ -static boolean_t -zpool_vdev_is_interior(const char *name) -{ - if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 || - strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 || - strncmp(name, - VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 || - strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0) - return (B_TRUE); - return (B_FALSE); -} - -nvlist_t * -zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, - boolean_t *l2cache, boolean_t *log) -{ - char buf[MAXPATHLEN]; - char *end; - nvlist_t *nvroot, *search, *ret; - uint64_t guid; - - verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - guid = strtoull(path, &end, 10); - if (guid != 0 && *end == '\0') { - verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); - } else if (zpool_vdev_is_interior(path)) { - verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0); - } else if (path[0] != '/') { - (void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path); - verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); - } else { - verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0); - } - - verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - *avail_spare = B_FALSE; - *l2cache = B_FALSE; - if (log != NULL) - *log = B_FALSE; - ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); - nvlist_free(search); - - return (ret); -} - -static int -vdev_is_online(nvlist_t *nv) -{ - uint64_t ival; - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || - nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || - nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) - return (0); - - return (1); -} - -/* - * Helper function for zpool_get_physpaths(). - */ -static int -vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size, - size_t *bytes_written) -{ - size_t bytes_left, pos, rsz; - char *tmppath; - const char *format; - - if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH, - &tmppath) != 0) - return (EZFS_NODEVICE); - - pos = *bytes_written; - bytes_left = physpath_size - pos; - format = (pos == 0) ? "%s" : " %s"; - - rsz = snprintf(physpath + pos, bytes_left, format, tmppath); - *bytes_written += rsz; - - if (rsz >= bytes_left) { - /* if physpath was not copied properly, clear it */ - if (bytes_left != 0) { - physpath[pos] = 0; - } - return (EZFS_NOSPC); - } - return (0); -} - -static int -vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size, - size_t *rsz, boolean_t is_spare) -{ - char *type; - int ret; - - if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) - return (EZFS_INVALCONFIG); - - if (strcmp(type, VDEV_TYPE_DISK) == 0) { - /* - * An active spare device has ZPOOL_CONFIG_IS_SPARE set. - * For a spare vdev, we only want to boot from the active - * spare device. - */ - if (is_spare) { - uint64_t spare = 0; - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, - &spare); - if (!spare) - return (EZFS_INVALCONFIG); - } - - if (vdev_is_online(nv)) { - if ((ret = vdev_get_one_physpath(nv, physpath, - phypath_size, rsz)) != 0) - return (ret); - } - } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || - strcmp(type, VDEV_TYPE_RAIDZ) == 0 || - strcmp(type, VDEV_TYPE_REPLACING) == 0 || - (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { - nvlist_t **child; - uint_t count; - int i, ret; - - if (nvlist_lookup_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, &child, &count) != 0) - return (EZFS_INVALCONFIG); - - for (i = 0; i < count; i++) { - ret = vdev_get_physpaths(child[i], physpath, - phypath_size, rsz, is_spare); - if (ret == EZFS_NOSPC) - return (ret); - } - } - - return (EZFS_POOL_INVALARG); -} - -/* - * Get phys_path for a root pool config. - * Return 0 on success; non-zero on failure. - */ -static int -zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size) -{ - size_t rsz; - nvlist_t *vdev_root; - nvlist_t **child; - uint_t count; - char *type; - - rsz = 0; - - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &vdev_root) != 0) - return (EZFS_INVALCONFIG); - - if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 || - nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN, - &child, &count) != 0) - return (EZFS_INVALCONFIG); - - /* - * root pool can only have a single top-level vdev. - */ - if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1) - return (EZFS_POOL_INVALARG); - - (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz, - B_FALSE); - - /* No online devices */ - if (rsz == 0) - return (EZFS_NODEVICE); - - return (0); -} - -/* - * Get phys_path for a root pool - * Return 0 on success; non-zero on failure. - */ -int -zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size) -{ - return (zpool_get_config_physpath(zhp->zpool_config, physpath, - phypath_size)); -} - -/* - * If the device has being dynamically expanded then we need to relabel - * the disk to use the new unallocated space. - */ -static int -zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) -{ -#ifdef illumos - char path[MAXPATHLEN]; - char errbuf[1024]; - int fd, error; - int (*_efi_use_whole_disk)(int); - - if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT, - "efi_use_whole_disk")) == NULL) - return (-1); - - (void) snprintf(path, sizeof (path), "%s/%s", ZFS_RDISK_ROOT, name); - - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " - "relabel '%s': unable to open device"), name); - return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); - } - - /* - * It's possible that we might encounter an error if the device - * does not have any unallocated space left. If so, we simply - * ignore that error and continue on. - */ - error = _efi_use_whole_disk(fd); - (void) close(fd); - if (error && error != VT_ENOSPC) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " - "relabel '%s': unable to read disk capacity"), name); - return (zfs_error(hdl, EZFS_NOCAP, errbuf)); - } -#endif /* illumos */ - return (0); -} - -/* - * Bring the specified vdev online. The 'flags' parameter is a set of the - * ZFS_ONLINE_* flags. - */ -int -zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, - vdev_state_t *newstate) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - char *pathname; - nvlist_t *tgt; - boolean_t avail_spare, l2cache, islog; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - if (flags & ZFS_ONLINE_EXPAND) { - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot expand %s"), path); - } else { - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot online %s"), path); - } - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - &islog)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - - if (avail_spare) - return (zfs_error(hdl, EZFS_ISSPARE, msg)); - - if ((flags & ZFS_ONLINE_EXPAND || - zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) && - nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) { - uint64_t wholedisk = 0; - - (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk); - - /* - * XXX - L2ARC 1.0 devices can't support expansion. - */ - if (l2cache) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot expand cache devices")); - return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg)); - } - - if (wholedisk) { - pathname += strlen(ZFS_DISK_ROOT) + 1; - (void) zpool_relabel_disk(hdl, pathname); - } - } - - zc.zc_cookie = VDEV_STATE_ONLINE; - zc.zc_obj = flags; - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) { - if (errno == EINVAL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split " - "from this pool into a new one. Use '%s' " - "instead"), "zpool detach"); - return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg)); - } - return (zpool_standard_error(hdl, errno, msg)); - } - - *newstate = zc.zc_cookie; - return (0); -} - -/* - * Take the specified vdev offline - */ -int -zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - nvlist_t *tgt; - boolean_t avail_spare, l2cache; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot offline %s"), path); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - NULL)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - - if (avail_spare) - return (zfs_error(hdl, EZFS_ISSPARE, msg)); - - zc.zc_cookie = VDEV_STATE_OFFLINE; - zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0; - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) - return (0); - - switch (errno) { - case EBUSY: - - /* - * There are no other replicas of this device. - */ - return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); - - case EEXIST: - /* - * The log device has unplayed logs - */ - return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg)); - - default: - return (zpool_standard_error(hdl, errno, msg)); - } -} - -/* - * Mark the given vdev faulted. - */ -int -zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_guid = guid; - zc.zc_cookie = VDEV_STATE_FAULTED; - zc.zc_obj = aux; - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) - return (0); - - switch (errno) { - case EBUSY: - - /* - * There are no other replicas of this device. - */ - return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); - - default: - return (zpool_standard_error(hdl, errno, msg)); - } - -} - -/* - * Mark the given vdev degraded. - */ -int -zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_guid = guid; - zc.zc_cookie = VDEV_STATE_DEGRADED; - zc.zc_obj = aux; - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) - return (0); - - return (zpool_standard_error(hdl, errno, msg)); -} - -/* - * Returns TRUE if the given nvlist is a vdev that was originally swapped in as - * a hot spare. - */ -static boolean_t -is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which) -{ - nvlist_t **child; - uint_t c, children; - char *type; - - if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0) { - verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE, - &type) == 0); - - if (strcmp(type, VDEV_TYPE_SPARE) == 0 && - children == 2 && child[which] == tgt) - return (B_TRUE); - - for (c = 0; c < children; c++) - if (is_replacing_spare(child[c], tgt, which)) - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * Attach new_disk (fully described by nvroot) to old_disk. - * If 'replacing' is specified, the new disk will replace the old one. - */ -int -zpool_vdev_attach(zpool_handle_t *zhp, - const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - int ret; - nvlist_t *tgt; - boolean_t avail_spare, l2cache, islog; - uint64_t val; - char *newname; - nvlist_t **child; - uint_t children; - nvlist_t *config_root; - libzfs_handle_t *hdl = zhp->zpool_hdl; - boolean_t rootpool = zpool_is_bootable(zhp); - - if (replacing) - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot replace %s with %s"), old_disk, new_disk); - else - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot attach %s to %s"), new_disk, old_disk); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache, - &islog)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - if (avail_spare) - return (zfs_error(hdl, EZFS_ISSPARE, msg)); - - if (l2cache) - return (zfs_error(hdl, EZFS_ISL2CACHE, msg)); - - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - zc.zc_cookie = replacing; - - if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0 || children != 1) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "new device must be a single disk")); - return (zfs_error(hdl, EZFS_INVALCONFIG, msg)); - } - - verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), - ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0); - - if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL) - return (-1); - - /* - * If the target is a hot spare that has been swapped in, we can only - * replace it with another hot spare. - */ - if (replacing && - nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 && - (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache, - NULL) == NULL || !avail_spare) && - is_replacing_spare(config_root, tgt, 1)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "can only be replaced by another hot spare")); - free(newname); - return (zfs_error(hdl, EZFS_BADTARGET, msg)); - } - - free(newname); - - if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) - return (-1); - - ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc); - - zcmd_free_nvlists(&zc); - - if (ret == 0) { - if (rootpool) { - /* - * XXX need a better way to prevent user from - * booting up a half-baked vdev. - */ - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make " - "sure to wait until resilver is done " - "before rebooting.\n")); - (void) fprintf(stderr, "\n"); - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "If " - "you boot from pool '%s', you may need to update\n" - "boot code on newly attached disk '%s'.\n\n" - "Assuming you use GPT partitioning and 'da0' is " - "your new boot disk\n" - "you may use the following command:\n\n" - "\tgpart bootcode -b /boot/pmbr -p " - "/boot/gptzfsboot -i 1 da0\n\n"), - zhp->zpool_name, new_disk); - } - return (0); - } - - switch (errno) { - case ENOTSUP: - /* - * Can't attach to or replace this type of vdev. - */ - if (replacing) { - uint64_t version = zpool_get_prop_int(zhp, - ZPOOL_PROP_VERSION, NULL); - - if (islog) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot replace a log with a spare")); - else if (version >= SPA_VERSION_MULTI_REPLACE) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "already in replacing/spare config; wait " - "for completion or use 'zpool detach'")); - else - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot replace a replacing device")); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "can only attach to mirrors and top-level " - "disks")); - } - (void) zfs_error(hdl, EZFS_BADTARGET, msg); - break; - - case EINVAL: - /* - * The new device must be a single disk. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "new device must be a single disk")); - (void) zfs_error(hdl, EZFS_INVALCONFIG, msg); - break; - - case EBUSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, " - "or device removal is in progress"), - new_disk); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - case EOVERFLOW: - /* - * The new device is too small. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "device is too small")); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - case EDOM: - /* - * The new device has a different alignment requirement. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "devices have different sector alignment")); - (void) zfs_error(hdl, EZFS_BADDEV, msg); - break; - - case ENAMETOOLONG: - /* - * The resulting top-level vdev spec won't fit in the label. - */ - (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg); - break; - - default: - (void) zpool_standard_error(hdl, errno, msg); - } - - return (-1); -} - -/* - * Detach the specified device. - */ -int -zpool_vdev_detach(zpool_handle_t *zhp, const char *path) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - nvlist_t *tgt; - boolean_t avail_spare, l2cache; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot detach %s"), path); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - NULL)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - if (avail_spare) - return (zfs_error(hdl, EZFS_ISSPARE, msg)); - - if (l2cache) - return (zfs_error(hdl, EZFS_ISL2CACHE, msg)); - - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0) - return (0); - - switch (errno) { - - case ENOTSUP: - /* - * Can't detach from this type of vdev. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only " - "applicable to mirror and replacing vdevs")); - (void) zfs_error(hdl, EZFS_BADTARGET, msg); - break; - - case EBUSY: - /* - * There are no other replicas of this device. - */ - (void) zfs_error(hdl, EZFS_NOREPLICAS, msg); - break; - - default: - (void) zpool_standard_error(hdl, errno, msg); - } - - return (-1); -} - -/* - * Find a mirror vdev in the source nvlist. - * - * The mchild array contains a list of disks in one of the top-level mirrors - * of the source pool. The schild array contains a list of disks that the - * user specified on the command line. We loop over the mchild array to - * see if any entry in the schild array matches. - * - * If a disk in the mchild array is found in the schild array, we return - * the index of that entry. Otherwise we return -1. - */ -static int -find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren, - nvlist_t **schild, uint_t schildren) -{ - uint_t mc; - - for (mc = 0; mc < mchildren; mc++) { - uint_t sc; - char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp, - mchild[mc], 0); - - for (sc = 0; sc < schildren; sc++) { - char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp, - schild[sc], 0); - boolean_t result = (strcmp(mpath, spath) == 0); - - free(spath); - if (result) { - free(mpath); - return (mc); - } - } - - free(mpath); - } - - return (-1); -} - -/* - * Split a mirror pool. If newroot points to null, then a new nvlist - * is generated and it is the responsibility of the caller to free it. - */ -int -zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, - nvlist_t *props, splitflags_t flags) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL; - nvlist_t **varray = NULL, *zc_props = NULL; - uint_t c, children, newchildren, lastlog = 0, vcount, found = 0; - libzfs_handle_t *hdl = zhp->zpool_hdl; - uint64_t vers; - boolean_t freelist = B_FALSE, memory_err = B_TRUE; - int retval = 0; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name); - - if (!zpool_name_valid(hdl, B_FALSE, newname)) - return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); - - if ((config = zpool_get_config(zhp, NULL)) == NULL) { - (void) fprintf(stderr, gettext("Internal error: unable to " - "retrieve pool configuration\n")); - return (-1); - } - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) - == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0); - - if (props) { - prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE }; - if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name, - props, vers, flags, msg)) == NULL) - return (-1); - } - - if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, - &children) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Source pool is missing vdev tree")); - nvlist_free(zc_props); - return (-1); - } - - varray = zfs_alloc(hdl, children * sizeof (nvlist_t *)); - vcount = 0; - - if (*newroot == NULL || - nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, - &newchild, &newchildren) != 0) - newchildren = 0; - - for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE, is_hole = B_FALSE; - char *type; - nvlist_t **mchild, *vdev; - uint_t mchildren; - int entry; - - /* - * Unlike cache & spares, slogs are stored in the - * ZPOOL_CONFIG_CHILDREN array. We filter them out here. - */ - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, - &is_hole); - if (is_log || is_hole) { - /* - * Create a hole vdev and put it in the config. - */ - if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0) - goto out; - if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_HOLE) != 0) - goto out; - if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE, - 1) != 0) - goto out; - if (lastlog == 0) - lastlog = vcount; - varray[vcount++] = vdev; - continue; - } - lastlog = 0; - verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type) - == 0); - if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Source pool must be composed only of mirrors\n")); - retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); - goto out; - } - - verify(nvlist_lookup_nvlist_array(child[c], - ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); - - /* find or add an entry for this top-level vdev */ - if (newchildren > 0 && - (entry = find_vdev_entry(zhp, mchild, mchildren, - newchild, newchildren)) >= 0) { - /* We found a disk that the user specified. */ - vdev = mchild[entry]; - ++found; - } else { - /* User didn't specify a disk for this vdev. */ - vdev = mchild[mchildren - 1]; - } - - if (nvlist_dup(vdev, &varray[vcount++], 0) != 0) - goto out; - } - - /* did we find every disk the user specified? */ - if (found != newchildren) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must " - "include at most one disk from each mirror")); - retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); - goto out; - } - - /* Prepare the nvlist for populating. */ - if (*newroot == NULL) { - if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0) - goto out; - freelist = B_TRUE; - if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_ROOT) != 0) - goto out; - } else { - verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0); - } - - /* Add all the children we found */ - if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray, - lastlog == 0 ? vcount : lastlog) != 0) - goto out; - - /* - * If we're just doing a dry run, exit now with success. - */ - if (flags.dryrun) { - memory_err = B_FALSE; - freelist = B_FALSE; - goto out; - } - - /* now build up the config list & call the ioctl */ - if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0) - goto out; - - if (nvlist_add_nvlist(newconfig, - ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 || - nvlist_add_string(newconfig, - ZPOOL_CONFIG_POOL_NAME, newname) != 0 || - nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0) - goto out; - - /* - * The new pool is automatically part of the namespace unless we - * explicitly export it. - */ - if (!flags.import) - zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT; - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string)); - if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0) - goto out; - if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) - goto out; - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) { - retval = zpool_standard_error(hdl, errno, msg); - goto out; - } - - freelist = B_FALSE; - memory_err = B_FALSE; - -out: - if (varray != NULL) { - int v; - - for (v = 0; v < vcount; v++) - nvlist_free(varray[v]); - free(varray); - } - zcmd_free_nvlists(&zc); - nvlist_free(zc_props); - nvlist_free(newconfig); - if (freelist) { - nvlist_free(*newroot); - *newroot = NULL; - } - - if (retval != 0) - return (retval); - - if (memory_err) - return (no_memory(hdl)); - - return (0); -} - -/* - * Remove the given device. - */ -int -zpool_vdev_remove(zpool_handle_t *zhp, const char *path) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - nvlist_t *tgt; - boolean_t avail_spare, l2cache, islog; - libzfs_handle_t *hdl = zhp->zpool_hdl; - uint64_t version; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot remove %s"), path); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - &islog)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); - if (islog && version < SPA_VERSION_HOLES) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded to support log removal")); - return (zfs_error(hdl, EZFS_BADVERSION, msg)); - } - - zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) - return (0); - - switch (errno) { - - case EINVAL: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid config; all top-level vdevs must " - "have the same sector size and not be raidz.")); - (void) zfs_error(hdl, EZFS_INVALCONFIG, msg); - break; - - case EBUSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Pool busy; removal may already be in progress")); - (void) zfs_error(hdl, EZFS_BUSY, msg); - break; - - default: - (void) zpool_standard_error(hdl, errno, msg); - } - return (-1); -} - -int -zpool_vdev_remove_cancel(zpool_handle_t *zhp) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot cancel removal")); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = 1; - - if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) - return (0); - - return (zpool_standard_error(hdl, errno, msg)); -} - -int -zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path, - uint64_t *sizep) -{ - char msg[1024]; - nvlist_t *tgt; - boolean_t avail_spare, l2cache, islog; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"), - path); - - if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - &islog)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - if (avail_spare || l2cache || islog) { - *sizep = 0; - return (0); - } - - if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "indirect size not available")); - return (zfs_error(hdl, EINVAL, msg)); - } - return (0); -} - -/* - * Clear the errors for the pool, or the particular device if specified. - */ -int -zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - nvlist_t *tgt; - zpool_load_policy_t policy; - boolean_t avail_spare, l2cache; - libzfs_handle_t *hdl = zhp->zpool_hdl; - nvlist_t *nvi = NULL; - int error; - - if (path) - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot clear errors for %s"), - path); - else - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot clear errors for %s"), - zhp->zpool_name); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (path) { - if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, - &l2cache, NULL)) == NULL) - return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - /* - * Don't allow error clearing for hot spares. Do allow - * error clearing for l2cache devices. - */ - if (avail_spare) - return (zfs_error(hdl, EZFS_ISSPARE, msg)); - - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, - &zc.zc_guid) == 0); - } - - zpool_get_load_policy(rewindnvl, &policy); - zc.zc_cookie = policy.zlp_rewind; - - if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0) - return (-1); - - if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0) - return (-1); - - while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 && - errno == ENOMEM) { - if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { - zcmd_free_nvlists(&zc); - return (-1); - } - } - - if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) && - errno != EPERM && errno != EACCES)) { - if (policy.zlp_rewind & - (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { - (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); - zpool_rewind_exclaim(hdl, zc.zc_name, - ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), - nvi); - nvlist_free(nvi); - } - zcmd_free_nvlists(&zc); - return (0); - } - - zcmd_free_nvlists(&zc); - return (zpool_standard_error(hdl, errno, msg)); -} - -/* - * Similar to zpool_clear(), but takes a GUID (used by fmd). - */ -int -zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"), - guid); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_guid = guid; - zc.zc_cookie = ZPOOL_NO_REWIND; - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0) - return (0); - - return (zpool_standard_error(hdl, errno, msg)); -} - -/* - * Change the GUID for a pool. - */ -int -zpool_reguid(zpool_handle_t *zhp) -{ - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - zfs_cmd_t zc = { 0 }; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0) - return (0); - - return (zpool_standard_error(hdl, errno, msg)); -} - -/* - * Reopen the pool. - */ -int -zpool_reopen(zpool_handle_t *zhp) -{ - zfs_cmd_t zc = { 0 }; - char msg[1024]; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), - zhp->zpool_name); - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0) - return (0); - return (zpool_standard_error(hdl, errno, msg)); -} - -/* call into libzfs_core to execute the sync IOCTL per pool */ -int -zpool_sync_one(zpool_handle_t *zhp, void *data) -{ - int ret; - libzfs_handle_t *hdl = zpool_get_handle(zhp); - const char *pool_name = zpool_get_name(zhp); - boolean_t *force = data; - nvlist_t *innvl = fnvlist_alloc(); - - fnvlist_add_boolean_value(innvl, "force", *force); - if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) { - nvlist_free(innvl); - return (zpool_standard_error_fmt(hdl, ret, - dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name)); - } - nvlist_free(innvl); - - return (0); -} - -/* - * Convert from a devid string to a path. - */ -static char * -devid_to_path(char *devid_str) -{ - ddi_devid_t devid; - char *minor; - char *path; - devid_nmlist_t *list = NULL; - int ret; - - if (devid_str_decode(devid_str, &devid, &minor) != 0) - return (NULL); - - ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list); - - devid_str_free(minor); - devid_free(devid); - - if (ret != 0) - return (NULL); - - /* - * In a case the strdup() fails, we will just return NULL below. - */ - path = strdup(list[0].devname); - - devid_free_nmlist(list); - - return (path); -} - -/* - * Convert from a path to a devid string. - */ -static char * -path_to_devid(const char *path) -{ -#ifdef have_devid - int fd; - ddi_devid_t devid; - char *minor, *ret; - - if ((fd = open(path, O_RDONLY)) < 0) - return (NULL); - - minor = NULL; - ret = NULL; - if (devid_get(fd, &devid) == 0) { - if (devid_get_minor_name(fd, &minor) == 0) - ret = devid_str_encode(devid, minor); - if (minor != NULL) - devid_str_free(minor); - devid_free(devid); - } - (void) close(fd); - - return (ret); -#else - return (NULL); -#endif -} - -/* - * Issue the necessary ioctl() to update the stored path value for the vdev. We - * ignore any failure here, since a common case is for an unprivileged user to - * type 'zpool status', and we'll display the correct information anyway. - */ -static void -set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) -{ - zfs_cmd_t zc = { 0 }; - - (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value)); - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, - &zc.zc_guid) == 0); - - (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc); -} - -/* - * Given a vdev, return the name to display in iostat. If the vdev has a path, - * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type. - * We also check if this is a whole disk, in which case we strip off the - * trailing 's0' slice name. - * - * This routine is also responsible for identifying when disks have been - * reconfigured in a new location. The kernel will have opened the device by - * devid, but the path will still refer to the old location. To catch this, we - * first do a path -> devid translation (which is fast for the common case). If - * the devid matches, we're done. If not, we do a reverse devid -> path - * translation and issue the appropriate ioctl() to update the path of the vdev. - * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any - * of these checks. - */ -char * -zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, - int name_flags) -{ - char *path, *devid, *env; - uint64_t value; - char buf[64]; - vdev_stat_t *vs; - uint_t vsc; - int have_stats; - int have_path; - - env = getenv("ZPOOL_VDEV_NAME_PATH"); - if (env && (strtoul(env, NULL, 0) > 0 || - !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) - name_flags |= VDEV_NAME_PATH; - - env = getenv("ZPOOL_VDEV_NAME_GUID"); - if (env && (strtoul(env, NULL, 0) > 0 || - !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) - name_flags |= VDEV_NAME_GUID; - - env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS"); - if (env && (strtoul(env, NULL, 0) > 0 || - !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) - name_flags |= VDEV_NAME_FOLLOW_LINKS; - - have_stats = nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0; - have_path = nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0; - - /* - * If the device is not currently present, assume it will not - * come back at the same device path. Display the device by GUID. - */ - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 || - (name_flags & VDEV_NAME_GUID) != 0 || - have_path && have_stats && vs->vs_state <= VDEV_STATE_CANT_OPEN) { - nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value); - (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value); - path = buf; - } else if (have_path) { - - /* - * If the device is dead (faulted, offline, etc) then don't - * bother opening it. Otherwise we may be forcing the user to - * open a misbehaving device, which can have undesirable - * effects. - */ - if ((have_stats == 0 || - vs->vs_state >= VDEV_STATE_DEGRADED) && - zhp != NULL && - nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) { - /* - * Determine if the current path is correct. - */ - char *newdevid = path_to_devid(path); - - if (newdevid == NULL || - strcmp(devid, newdevid) != 0) { - char *newpath; - - if ((newpath = devid_to_path(devid)) != NULL) { - /* - * Update the path appropriately. - */ - set_path(zhp, nv, newpath); - if (nvlist_add_string(nv, - ZPOOL_CONFIG_PATH, newpath) == 0) - verify(nvlist_lookup_string(nv, - ZPOOL_CONFIG_PATH, - &path) == 0); - free(newpath); - } - } - - if (newdevid) - devid_str_free(newdevid); - } - -#ifdef illumos - if (name_flags & VDEV_NAME_FOLLOW_LINKS) { - char *rp = realpath(path, NULL); - if (rp) { - strlcpy(buf, rp, sizeof (buf)); - path = buf; - free(rp); - } - } - - if (strncmp(path, ZFS_DISK_ROOTD, strlen(ZFS_DISK_ROOTD)) == 0) - path += strlen(ZFS_DISK_ROOTD); - - /* - * Remove the partition from the path it this is a whole disk. - */ - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value) - == 0 && value && !(name_flags & VDEV_NAME_PATH)) { - int pathlen = strlen(path); - char *tmp = zfs_strdup(hdl, path); - - /* - * If it starts with c#, and ends with "s0" or "s1", - * chop the slice off, or if it ends with "s0/old" or - * "s1/old", remove the slice from the middle. - */ - if (CTD_CHECK(tmp)) { - if (strcmp(&tmp[pathlen - 2], "s0") == 0 || - strcmp(&tmp[pathlen - 2], "s1") == 0) { - tmp[pathlen - 2] = '\0'; - } else if (pathlen > 6 && - (strcmp(&tmp[pathlen - 6], "s0/old") == 0 || - strcmp(&tmp[pathlen - 6], "s1/old") == 0)) { - (void) strcpy(&tmp[pathlen - 6], - "/old"); - } - } - return (tmp); - } -#else /* !illumos */ - if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) - path += sizeof(_PATH_DEV) - 1; -#endif /* illumos */ - } else { - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0); - - /* - * If it's a raidz device, we need to stick in the parity level. - */ - if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) { - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, - &value) == 0); - (void) snprintf(buf, sizeof (buf), "%s%llu", path, - (u_longlong_t)value); - path = buf; - } - - /* - * We identify each top-level vdev by using a <type-id> - * naming convention. - */ - if (name_flags & VDEV_NAME_TYPE_ID) { - uint64_t id; - - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, - &id) == 0); - (void) snprintf(buf, sizeof (buf), "%s-%llu", path, - (u_longlong_t)id); - path = buf; - } - } - - return (zfs_strdup(hdl, path)); -} - -static int -zbookmark_mem_compare(const void *a, const void *b) -{ - return (memcmp(a, b, sizeof (zbookmark_phys_t))); -} - -/* - * Retrieve the persistent error log, uniquify the members, and return to the - * caller. - */ -int -zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) -{ - zfs_cmd_t zc = { 0 }; - uint64_t count; - zbookmark_phys_t *zb = NULL; - int i; - - /* - * Retrieve the raw error list from the kernel. If the number of errors - * has increased, allocate more space and continue until we get the - * entire list. - */ - verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT, - &count) == 0); - if (count == 0) - return (0); - if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl, - count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL) - return (-1); - zc.zc_nvlist_dst_size = count; - (void) strcpy(zc.zc_name, zhp->zpool_name); - for (;;) { - if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG, - &zc) != 0) { - free((void *)(uintptr_t)zc.zc_nvlist_dst); - if (errno == ENOMEM) { - void *dst; - - count = zc.zc_nvlist_dst_size; - dst = zfs_alloc(zhp->zpool_hdl, count * - sizeof (zbookmark_phys_t)); - if (dst == NULL) - return (-1); - zc.zc_nvlist_dst = (uintptr_t)dst; - } else { - return (-1); - } - } else { - break; - } - } - - /* - * Sort the resulting bookmarks. This is a little confusing due to the - * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last - * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks - * _not_ copied as part of the process. So we point the start of our - * array appropriate and decrement the total number of elements. - */ - zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) + - zc.zc_nvlist_dst_size; - count -= zc.zc_nvlist_dst_size; - - qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare); - - verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); - - /* - * Fill in the nverrlistp with nvlist's of dataset and object numbers. - */ - for (i = 0; i < count; i++) { - nvlist_t *nv; - - /* ignoring zb_blkid and zb_level for now */ - if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset && - zb[i-1].zb_object == zb[i].zb_object) - continue; - - if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0) - goto nomem; - if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET, - zb[i].zb_objset) != 0) { - nvlist_free(nv); - goto nomem; - } - if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT, - zb[i].zb_object) != 0) { - nvlist_free(nv); - goto nomem; - } - if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) { - nvlist_free(nv); - goto nomem; - } - nvlist_free(nv); - } - - free((void *)(uintptr_t)zc.zc_nvlist_dst); - return (0); - -nomem: - free((void *)(uintptr_t)zc.zc_nvlist_dst); - return (no_memory(zhp->zpool_hdl)); -} - -/* - * Upgrade a ZFS pool to the latest on-disk version. - */ -int -zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) strcpy(zc.zc_name, zhp->zpool_name); - zc.zc_cookie = new_version; - - if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0) - return (zpool_standard_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"), - zhp->zpool_name)); - return (0); -} - -void -zfs_save_arguments(int argc, char **argv, char *string, int len) -{ - (void) strlcpy(string, basename(argv[0]), len); - for (int i = 1; i < argc; i++) { - (void) strlcat(string, " ", len); - (void) strlcat(string, argv[i], len); - } -} - -int -zpool_log_history(libzfs_handle_t *hdl, const char *message) -{ - zfs_cmd_t zc = { 0 }; - nvlist_t *args; - int err; - - args = fnvlist_alloc(); - fnvlist_add_string(args, "message", message); - err = zcmd_write_src_nvlist(hdl, &zc, args); - if (err == 0) - err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc); - nvlist_free(args); - zcmd_free_nvlists(&zc); - return (err); -} - -/* - * Perform ioctl to get some command history of a pool. - * - * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the - * logical offset of the history buffer to start reading from. - * - * Upon return, 'off' is the next logical offset to read from and - * 'len' is the actual amount of bytes read into 'buf'. - */ -static int -get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zpool_hdl; - - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - - zc.zc_history = (uint64_t)(uintptr_t)buf; - zc.zc_history_len = *len; - zc.zc_history_offset = *off; - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) { - switch (errno) { - case EPERM: - return (zfs_error_fmt(hdl, EZFS_PERM, - dgettext(TEXT_DOMAIN, - "cannot show history for pool '%s'"), - zhp->zpool_name)); - case ENOENT: - return (zfs_error_fmt(hdl, EZFS_NOHISTORY, - dgettext(TEXT_DOMAIN, "cannot get history for pool " - "'%s'"), zhp->zpool_name)); - case ENOTSUP: - return (zfs_error_fmt(hdl, EZFS_BADVERSION, - dgettext(TEXT_DOMAIN, "cannot get history for pool " - "'%s', pool must be upgraded"), zhp->zpool_name)); - default: - return (zpool_standard_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, - "cannot get history for '%s'"), zhp->zpool_name)); - } - } - - *len = zc.zc_history_len; - *off = zc.zc_history_offset; - - return (0); -} - -/* - * Process the buffer of nvlists, unpacking and storing each nvlist record - * into 'records'. 'leftover' is set to the number of bytes that weren't - * processed as there wasn't a complete record. - */ -int -zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, - nvlist_t ***records, uint_t *numrecords) -{ - uint64_t reclen; - nvlist_t *nv; - int i; - - while (bytes_read > sizeof (reclen)) { - - /* get length of packed record (stored as little endian) */ - for (i = 0, reclen = 0; i < sizeof (reclen); i++) - reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i); - - if (bytes_read < sizeof (reclen) + reclen) - break; - - /* unpack record */ - if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0) - return (ENOMEM); - bytes_read -= sizeof (reclen) + reclen; - buf += sizeof (reclen) + reclen; - - /* add record to nvlist array */ - (*numrecords)++; - if (ISP2(*numrecords + 1)) { - *records = realloc(*records, - *numrecords * 2 * sizeof (nvlist_t *)); - } - (*records)[*numrecords - 1] = nv; - } - - *leftover = bytes_read; - return (0); -} - -/* from spa_history.c: spa_history_create_obj() */ -#define HIS_BUF_LEN_DEF (128 << 10) -#define HIS_BUF_LEN_MAX (1 << 30) - -/* - * Retrieve the command history of a pool. - */ -int -zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off, - boolean_t *eof) -{ - char *buf; - uint64_t buflen = HIS_BUF_LEN_DEF; - nvlist_t **records = NULL; - uint_t numrecords = 0; - int err, i; - uint64_t start = *off; - - buf = malloc(buflen); - if (buf == NULL) - return (ENOMEM); - /* process about 1MB at a time */ - while (*off - start < 1024 * 1024) { - uint64_t bytes_read = buflen; - uint64_t leftover; - - if ((err = get_history(zhp, buf, off, &bytes_read)) != 0) - break; - - /* if nothing else was read in, we're at EOF, just return */ - if (bytes_read == 0) { - *eof = B_TRUE; - break; - } - - if ((err = zpool_history_unpack(buf, bytes_read, - &leftover, &records, &numrecords)) != 0) - break; - *off -= leftover; - if (leftover == bytes_read) { - /* - * no progress made, because buffer is not big enough - * to hold this record; resize and retry. - */ - buflen *= 2; - free(buf); - buf = NULL; - if ((buflen >= HIS_BUF_LEN_MAX) || - ((buf = malloc(buflen)) == NULL)) { - err = ENOMEM; - break; - } - } - } - - free(buf); - - if (!err) { - verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD, - records, numrecords) == 0); - } - for (i = 0; i < numrecords; i++) - nvlist_free(records[i]); - free(records); - - return (err); -} - -void -zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, - char *pathname, size_t len) -{ - zfs_cmd_t zc = { 0 }; - boolean_t mounted = B_FALSE; - char *mntpnt = NULL; - char dsname[ZFS_MAX_DATASET_NAME_LEN]; - - if (dsobj == 0) { - /* special case for the MOS */ - (void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj); - return; - } - - /* get the dataset's name */ - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_obj = dsobj; - if (ioctl(zhp->zpool_hdl->libzfs_fd, - ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) { - /* just write out a path of two object numbers */ - (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>", - dsobj, obj); - return; - } - (void) strlcpy(dsname, zc.zc_value, sizeof (dsname)); - - /* find out if the dataset is mounted */ - mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt); - - /* get the corrupted object's path */ - (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); - zc.zc_obj = obj; - if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH, - &zc) == 0) { - if (mounted) { - (void) snprintf(pathname, len, "%s%s", mntpnt, - zc.zc_value); - } else { - (void) snprintf(pathname, len, "%s:%s", - dsname, zc.zc_value); - } - } else { - (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj); - } - free(mntpnt); -} - -int -zpool_set_bootenv(zpool_handle_t *zhp, const char *envmap) -{ - int error = lzc_set_bootenv(zhp->zpool_name, envmap); - if (error != 0) { - (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, - dgettext(TEXT_DOMAIN, - "error setting bootenv in pool '%s'"), zhp->zpool_name); - } - - return (error); -} - -int -zpool_get_bootenv(zpool_handle_t *zhp, char *outbuf, size_t size, off_t offset) -{ - nvlist_t *nvl; - int error = lzc_get_bootenv(zhp->zpool_name, &nvl);; - if (error != 0) { - (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, - dgettext(TEXT_DOMAIN, - "error getting bootenv in pool '%s'"), zhp->zpool_name); - return (-1); - } - char *envmap = fnvlist_lookup_string(nvl, "envmap"); - if (offset >= strlen(envmap)) { - fnvlist_free(nvl); - return (0); - } - - strlcpy(outbuf, envmap + offset, size); - int bytes = MIN(strlen(envmap + offset), size); - fnvlist_free(nvl); - return (bytes); -} - -#ifdef illumos -/* - * Read the EFI label from the config, if a label does not exist then - * pass back the error to the caller. If the caller has passed a non-NULL - * diskaddr argument then we set it to the starting address of the EFI - * partition. If the caller has passed a non-NULL boolean argument, then - * we set it to indicate if the disk does have efi system partition. - */ -static int -read_efi_label(nvlist_t *config, diskaddr_t *sb, boolean_t *system) -{ - char *path; - int fd; - char diskname[MAXPATHLEN]; - boolean_t boot = B_FALSE; - int err = -1; - int slice; - - if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0) - return (err); - - (void) snprintf(diskname, sizeof (diskname), "%s%s", ZFS_RDISK_ROOT, - strrchr(path, '/')); - if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) { - struct dk_gpt *vtoc; - - if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { - for (slice = 0; slice < vtoc->efi_nparts; slice++) { - if (vtoc->efi_parts[slice].p_tag == V_SYSTEM) - boot = B_TRUE; - if (vtoc->efi_parts[slice].p_tag == V_USR) - break; - } - if (sb != NULL && vtoc->efi_parts[slice].p_tag == V_USR) - *sb = vtoc->efi_parts[slice].p_start; - if (system != NULL) - *system = boot; - efi_free(vtoc); - } - (void) close(fd); - } - return (err); -} - -/* - * determine where a partition starts on a disk in the current - * configuration - */ -static diskaddr_t -find_start_block(nvlist_t *config) -{ - nvlist_t **child; - uint_t c, children; - diskaddr_t sb = MAXOFFSET_T; - uint64_t wholedisk; - - if (nvlist_lookup_nvlist_array(config, - ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { - if (nvlist_lookup_uint64(config, - ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) != 0 || !wholedisk) { - return (MAXOFFSET_T); - } - if (read_efi_label(config, &sb, NULL) < 0) - sb = MAXOFFSET_T; - return (sb); - } - - for (c = 0; c < children; c++) { - sb = find_start_block(child[c]); - if (sb != MAXOFFSET_T) { - return (sb); - } - } - return (MAXOFFSET_T); -} -#endif /* illumos */ - -/* - * Label an individual disk. The name provided is the short name, - * stripped of any leading /dev path. - */ -int -zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name, - zpool_boot_label_t boot_type, uint64_t boot_size, int *slice) -{ -#ifdef illumos - char path[MAXPATHLEN]; - struct dk_gpt *vtoc; - int fd; - size_t resv = EFI_MIN_RESV_SIZE; - uint64_t slice_size; - diskaddr_t start_block; - char errbuf[1024]; - - /* prepare an error message just in case */ - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot label '%s'"), name); - - if (zhp) { - nvlist_t *nvroot; - - verify(nvlist_lookup_nvlist(zhp->zpool_config, - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - - if (zhp->zpool_start_block == 0) - start_block = find_start_block(nvroot); - else - start_block = zhp->zpool_start_block; - zhp->zpool_start_block = start_block; - } else { - /* new pool */ - start_block = NEW_START_BLOCK; - } - - (void) snprintf(path, sizeof (path), "%s/%s%s", ZFS_RDISK_ROOT, name, - BACKUP_SLICE); - - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { - /* - * This shouldn't happen. We've long since verified that this - * is a valid device. - */ - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "unable to open device")); - return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); - } - - if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) { - /* - * The only way this can fail is if we run out of memory, or we - * were unable to read the disk's capacity - */ - if (errno == ENOMEM) - (void) no_memory(hdl); - - (void) close(fd); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "unable to read disk capacity"), name); - - return (zfs_error(hdl, EZFS_NOCAP, errbuf)); - } - - /* - * Why we use V_USR: V_BACKUP confuses users, and is considered - * disposable by some EFI utilities (since EFI doesn't have a backup - * slice). V_UNASSIGNED is supposed to be used only for zero size - * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT, - * etc. were all pretty specific. V_USR is as close to reality as we - * can get, in the absence of V_OTHER. - */ - /* first fix the partition start block */ - if (start_block == MAXOFFSET_T) - start_block = NEW_START_BLOCK; - - /* - * EFI System partition is using slice 0. - * ZFS is on slice 1 and slice 8 is reserved. - * We assume the GPT partition table without system - * partition has zfs p_start == NEW_START_BLOCK. - * If start_block != NEW_START_BLOCK, it means we have - * system partition. Correct solution would be to query/cache vtoc - * from existing vdev member. - */ - if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { - if (boot_size % vtoc->efi_lbasize != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "boot partition size must be a multiple of %d"), - vtoc->efi_lbasize); - (void) close(fd); - efi_free(vtoc); - return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); - } - /* - * System partition size checks. - * Note the 1MB is quite arbitrary value, since we - * are creating dedicated pool, it should be enough - * to hold fat + efi bootloader. May need to be - * adjusted if the bootloader size will grow. - */ - if (boot_size < 1024 * 1024) { - char buf[64]; - zfs_nicenum(boot_size, buf, sizeof (buf)); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Specified size %s for EFI System partition is too " - "small, the minimum size is 1MB."), buf); - (void) close(fd); - efi_free(vtoc); - return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); - } - /* 33MB is tested with mkfs -F pcfs */ - if (hdl->libzfs_printerr && - ((vtoc->efi_lbasize == 512 && - boot_size < 33 * 1024 * 1024) || - (vtoc->efi_lbasize == 4096 && - boot_size < 256 * 1024 * 1024))) { - char buf[64]; - zfs_nicenum(boot_size, buf, sizeof (buf)); - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "Warning: EFI System partition size %s is " - "not allowing to create FAT32 file\nsystem, which " - "may result in unbootable system.\n"), buf); - } - /* Adjust zfs partition start by size of system partition. */ - start_block += boot_size / vtoc->efi_lbasize; - } - - if (start_block == NEW_START_BLOCK) { - /* - * Use default layout. - * ZFS is on slice 0 and slice 8 is reserved. - */ - slice_size = vtoc->efi_last_u_lba + 1; - slice_size -= EFI_MIN_RESV_SIZE; - slice_size -= start_block; - if (slice != NULL) - *slice = 0; - - vtoc->efi_parts[0].p_start = start_block; - vtoc->efi_parts[0].p_size = slice_size; - - vtoc->efi_parts[0].p_tag = V_USR; - (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); - - vtoc->efi_parts[8].p_start = slice_size + start_block; - vtoc->efi_parts[8].p_size = resv; - vtoc->efi_parts[8].p_tag = V_RESERVED; - } else { - slice_size = start_block - NEW_START_BLOCK; - vtoc->efi_parts[0].p_start = NEW_START_BLOCK; - vtoc->efi_parts[0].p_size = slice_size; - vtoc->efi_parts[0].p_tag = V_SYSTEM; - (void) strcpy(vtoc->efi_parts[0].p_name, "loader"); - if (slice != NULL) - *slice = 1; - /* prepare slice 1 */ - slice_size = vtoc->efi_last_u_lba + 1 - slice_size; - slice_size -= resv; - slice_size -= NEW_START_BLOCK; - vtoc->efi_parts[1].p_start = start_block; - vtoc->efi_parts[1].p_size = slice_size; - vtoc->efi_parts[1].p_tag = V_USR; - (void) strcpy(vtoc->efi_parts[1].p_name, "zfs"); - - vtoc->efi_parts[8].p_start = slice_size + start_block; - vtoc->efi_parts[8].p_size = resv; - vtoc->efi_parts[8].p_tag = V_RESERVED; - } - - if (efi_write(fd, vtoc) != 0) { - /* - * Some block drivers (like pcata) may not support EFI - * GPT labels. Print out a helpful error message dir- - * ecting the user to manually label the disk and give - * a specific slice. - */ - (void) close(fd); - efi_free(vtoc); - - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "try using fdisk(1M) and then provide a specific slice")); - return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); - } - - (void) close(fd); - efi_free(vtoc); -#endif /* illumos */ - return (0); -} - -static boolean_t -supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) -{ - char *type; - nvlist_t **child; - uint_t children, c; - - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_FILE) == 0 || - strcmp(type, VDEV_TYPE_HOLE) == 0 || - strcmp(type, VDEV_TYPE_MISSING) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "vdev type '%s' is not supported"), type); - (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf); - return (B_FALSE); - } - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - if (!supported_dump_vdev_type(hdl, child[c], errbuf)) - return (B_FALSE); - } - } - return (B_TRUE); -} - -/* - * Check if this zvol is allowable for use as a dump device; zero if - * it is, > 0 if it isn't, < 0 if it isn't a zvol. - * - * Allowable storage configurations include mirrors, all raidz variants, and - * pools with log, cache, and spare devices. Pools which are backed by files or - * have missing/hole vdevs are not suitable. - */ -int -zvol_check_dump_config(char *arg) -{ - zpool_handle_t *zhp = NULL; - nvlist_t *config, *nvroot; - char *p, *volname; - nvlist_t **top; - uint_t toplevels; - libzfs_handle_t *hdl; - char errbuf[1024]; - char poolname[ZFS_MAX_DATASET_NAME_LEN]; - int pathlen = strlen(ZVOL_FULL_DEV_DIR); - int ret = 1; - - if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) { - return (-1); - } - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "dump is not supported on device '%s'"), arg); - - if ((hdl = libzfs_init()) == NULL) - return (1); - libzfs_print_on_error(hdl, B_TRUE); - - volname = arg + pathlen; - - /* check the configuration of the pool */ - if ((p = strchr(volname, '/')) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "malformed dataset name")); - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - return (1); - } else if (p - volname >= ZFS_MAX_DATASET_NAME_LEN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset name is too long")); - (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf); - return (1); - } else { - (void) strncpy(poolname, volname, p - volname); - poolname[p - volname] = '\0'; - } - - if ((zhp = zpool_open(hdl, poolname)) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not open pool '%s'"), poolname); - (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); - goto out; - } - config = zpool_get_config(zhp, NULL); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not obtain vdev configuration for '%s'"), poolname); - (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf); - goto out; - } - - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &top, &toplevels) == 0); - - if (!supported_dump_vdev_type(hdl, top[0], errbuf)) { - goto out; - } - ret = 0; - -out: - if (zhp) - zpool_close(zhp); - libzfs_fini(hdl); - return (ret); -} - -int -zpool_nextboot(libzfs_handle_t *hdl, uint64_t pool_guid, uint64_t dev_guid, - const char *command) -{ - zfs_cmd_t zc = { 0 }; - nvlist_t *args; - char *packed; - size_t size; - int error; - - args = fnvlist_alloc(); - fnvlist_add_uint64(args, ZPOOL_CONFIG_POOL_GUID, pool_guid); - fnvlist_add_uint64(args, ZPOOL_CONFIG_GUID, dev_guid); - fnvlist_add_string(args, "command", command); - error = zcmd_write_src_nvlist(hdl, &zc, args); - if (error == 0) - error = ioctl(hdl->libzfs_fd, ZFS_IOC_NEXTBOOT, &zc); - zcmd_free_nvlists(&zc); - nvlist_free(args); - return (error); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c deleted file mode 100644 index 2e2e1020ad8a..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c +++ /dev/null @@ -1,3924 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. - * Copyright (c) 2019 Datto Inc. - */ - -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <libintl.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <stddef.h> -#include <fcntl.h> -#include <sys/param.h> -#include <sys/mount.h> -#include <pthread.h> -#include <umem.h> -#include <time.h> - -#include <libzfs.h> -#include <libzfs_core.h> - -#include "zfs_namecheck.h" -#include "zfs_prop.h" -#include "zfs_fletcher.h" -#include "libzfs_impl.h" -#include <zlib.h> -#include <sha2.h> -#include <sys/zio_checksum.h> -#include <sys/ddt.h> - -#ifdef __FreeBSD__ -extern int zfs_ioctl_version; -#endif - -/* in libzfs_dataset.c */ -extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); -/* We need to use something for ENODATA. */ -#define ENODATA EIDRM - -static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *, - recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, - uint64_t *, const char *); -static int guid_to_name(libzfs_handle_t *, const char *, - uint64_t, boolean_t, char *); - -static const zio_cksum_t zero_cksum = { 0 }; - -typedef struct dedup_arg { - int inputfd; - int outputfd; - libzfs_handle_t *dedup_hdl; -} dedup_arg_t; - -typedef struct progress_arg { - zfs_handle_t *pa_zhp; - int pa_fd; - boolean_t pa_parsable; - boolean_t pa_astitle; - uint64_t pa_size; -} progress_arg_t; - -typedef struct dataref { - uint64_t ref_guid; - uint64_t ref_object; - uint64_t ref_offset; -} dataref_t; - -typedef struct dedup_entry { - struct dedup_entry *dde_next; - zio_cksum_t dde_chksum; - uint64_t dde_prop; - dataref_t dde_ref; -} dedup_entry_t; - -#define MAX_DDT_PHYSMEM_PERCENT 20 -#define SMALLEST_POSSIBLE_MAX_DDT_MB 128 - -typedef struct dedup_table { - dedup_entry_t **dedup_hash_array; - umem_cache_t *ddecache; - uint64_t max_ddt_size; /* max dedup table size in bytes */ - uint64_t cur_ddt_size; /* current dedup table size in bytes */ - uint64_t ddt_count; - int numhashbits; - boolean_t ddt_full; -} dedup_table_t; - -static int -high_order_bit(uint64_t n) -{ - int count; - - for (count = 0; n != 0; count++) - n >>= 1; - return (count); -} - -static size_t -ssread(void *buf, size_t len, FILE *stream) -{ - size_t outlen; - - if ((outlen = fread(buf, len, 1, stream)) == 0) - return (0); - - return (outlen); -} - -static void -ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp, - zio_cksum_t *cs, uint64_t prop, dataref_t *dr) -{ - dedup_entry_t *dde; - - if (ddt->cur_ddt_size >= ddt->max_ddt_size) { - if (ddt->ddt_full == B_FALSE) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Dedup table full. Deduplication will continue " - "with existing table entries")); - ddt->ddt_full = B_TRUE; - } - return; - } - - if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT)) - != NULL) { - assert(*ddepp == NULL); - dde->dde_next = NULL; - dde->dde_chksum = *cs; - dde->dde_prop = prop; - dde->dde_ref = *dr; - *ddepp = dde; - ddt->cur_ddt_size += sizeof (dedup_entry_t); - ddt->ddt_count++; - } -} - -/* - * Using the specified dedup table, do a lookup for an entry with - * the checksum cs. If found, return the block's reference info - * in *dr. Otherwise, insert a new entry in the dedup table, using - * the reference information specified by *dr. - * - * return value: true - entry was found - * false - entry was not found - */ -static boolean_t -ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs, - uint64_t prop, dataref_t *dr) -{ - uint32_t hashcode; - dedup_entry_t **ddepp; - - hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits); - - for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL; - ddepp = &((*ddepp)->dde_next)) { - if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) && - (*ddepp)->dde_prop == prop) { - *dr = (*ddepp)->dde_ref; - return (B_TRUE); - } - } - ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr); - return (B_FALSE); -} - -static int -dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, - zio_cksum_t *zc, int outfd) -{ - ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), - ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); - (void) fletcher_4_incremental_native(drr, - offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); - if (drr->drr_type != DRR_BEGIN) { - ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. - drr_checksum.drr_checksum)); - drr->drr_u.drr_checksum.drr_checksum = *zc; - } - (void) fletcher_4_incremental_native( - &drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), zc); - if (write(outfd, drr, sizeof (*drr)) == -1) - return (errno); - if (payload_len != 0) { - (void) fletcher_4_incremental_native(payload, payload_len, zc); - if (write(outfd, payload, payload_len) == -1) - return (errno); - } - return (0); -} - -/* - * This function is started in a separate thread when the dedup option - * has been requested. The main send thread determines the list of - * snapshots to be included in the send stream and makes the ioctl calls - * for each one. But instead of having the ioctl send the output to the - * the output fd specified by the caller of zfs_send()), the - * ioctl is told to direct the output to a pipe, which is read by the - * alternate thread running THIS function. This function does the - * dedup'ing by: - * 1. building a dedup table (the DDT) - * 2. doing checksums on each data block and inserting a record in the DDT - * 3. looking for matching checksums, and - * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever - * a duplicate block is found. - * The output of this function then goes to the output fd requested - * by the caller of zfs_send(). - */ -static void * -cksummer(void *arg) -{ - dedup_arg_t *dda = arg; - char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); - dmu_replay_record_t thedrr; - dmu_replay_record_t *drr = &thedrr; - FILE *ofp; - int outfd; - dedup_table_t ddt; - zio_cksum_t stream_cksum; - uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); - uint64_t numbuckets; - - ddt.max_ddt_size = - MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100, - SMALLEST_POSSIBLE_MAX_DDT_MB << 20); - - numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t)); - - /* - * numbuckets must be a power of 2. Increase number to - * a power of 2 if necessary. - */ - if (!ISP2(numbuckets)) - numbuckets = 1 << high_order_bit(numbuckets); - - ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *)); - ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0, - NULL, NULL, NULL, NULL, NULL, 0); - ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *); - ddt.numhashbits = high_order_bit(numbuckets) - 1; - ddt.ddt_full = B_FALSE; - - outfd = dda->outputfd; - ofp = fdopen(dda->inputfd, "r"); - while (ssread(drr, sizeof (*drr), ofp) != 0) { - - /* - * kernel filled in checksum, we are going to write same - * record, but need to regenerate checksum. - */ - if (drr->drr_type != DRR_BEGIN) { - bzero(&drr->drr_u.drr_checksum.drr_checksum, - sizeof (drr->drr_u.drr_checksum.drr_checksum)); - } - - switch (drr->drr_type) { - case DRR_BEGIN: - { - struct drr_begin *drrb = &drr->drr_u.drr_begin; - int fflags; - int sz = 0; - ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); - - ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); - - /* set the DEDUP feature flag for this stream */ - fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); - fflags |= (DMU_BACKUP_FEATURE_DEDUP | - DMU_BACKUP_FEATURE_DEDUPPROPS); - DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); - - if (drr->drr_payloadlen != 0) { - sz = drr->drr_payloadlen; - - if (sz > SPA_MAXBLOCKSIZE) { - buf = zfs_realloc(dda->dedup_hdl, buf, - SPA_MAXBLOCKSIZE, sz); - } - (void) ssread(buf, sz, ofp); - if (ferror(stdin)) - perror("fread"); - } - if (dump_record(drr, buf, sz, &stream_cksum, - outfd) != 0) - goto out; - break; - } - - case DRR_END: - { - struct drr_end *drre = &drr->drr_u.drr_end; - /* use the recalculated checksum */ - drre->drr_checksum = stream_cksum; - if (dump_record(drr, NULL, 0, &stream_cksum, - outfd) != 0) - goto out; - break; - } - - case DRR_OBJECT: - { - struct drr_object *drro = &drr->drr_u.drr_object; - if (drro->drr_bonuslen > 0) { - (void) ssread(buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - ofp); - } - if (dump_record(drr, buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - &stream_cksum, outfd) != 0) - goto out; - break; - } - - case DRR_SPILL: - { - struct drr_spill *drrs = &drr->drr_u.drr_spill; - (void) ssread(buf, drrs->drr_length, ofp); - if (dump_record(drr, buf, drrs->drr_length, - &stream_cksum, outfd) != 0) - goto out; - break; - } - - case DRR_FREEOBJECTS: - { - if (dump_record(drr, NULL, 0, &stream_cksum, - outfd) != 0) - goto out; - break; - } - - case DRR_WRITE: - { - struct drr_write *drrw = &drr->drr_u.drr_write; - dataref_t dataref; - uint64_t payload_size; - - payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); - (void) ssread(buf, payload_size, ofp); - - /* - * Use the existing checksum if it's dedup-capable, - * else calculate a SHA256 checksum for it. - */ - - if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, - zero_cksum) || - !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { - SHA256_CTX ctx; - zio_cksum_t tmpsha256; - - SHA256Init(&ctx); - SHA256Update(&ctx, buf, payload_size); - SHA256Final(&tmpsha256, &ctx); - drrw->drr_key.ddk_cksum.zc_word[0] = - BE_64(tmpsha256.zc_word[0]); - drrw->drr_key.ddk_cksum.zc_word[1] = - BE_64(tmpsha256.zc_word[1]); - drrw->drr_key.ddk_cksum.zc_word[2] = - BE_64(tmpsha256.zc_word[2]); - drrw->drr_key.ddk_cksum.zc_word[3] = - BE_64(tmpsha256.zc_word[3]); - drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; - drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; - } - - dataref.ref_guid = drrw->drr_toguid; - dataref.ref_object = drrw->drr_object; - dataref.ref_offset = drrw->drr_offset; - - if (ddt_update(dda->dedup_hdl, &ddt, - &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, - &dataref)) { - dmu_replay_record_t wbr_drr = {0}; - struct drr_write_byref *wbr_drrr = - &wbr_drr.drr_u.drr_write_byref; - - /* block already present in stream */ - wbr_drr.drr_type = DRR_WRITE_BYREF; - - wbr_drrr->drr_object = drrw->drr_object; - wbr_drrr->drr_offset = drrw->drr_offset; - wbr_drrr->drr_length = drrw->drr_logical_size; - wbr_drrr->drr_toguid = drrw->drr_toguid; - wbr_drrr->drr_refguid = dataref.ref_guid; - wbr_drrr->drr_refobject = - dataref.ref_object; - wbr_drrr->drr_refoffset = - dataref.ref_offset; - - wbr_drrr->drr_checksumtype = - drrw->drr_checksumtype; - wbr_drrr->drr_checksumflags = - drrw->drr_checksumtype; - wbr_drrr->drr_key.ddk_cksum = - drrw->drr_key.ddk_cksum; - wbr_drrr->drr_key.ddk_prop = - drrw->drr_key.ddk_prop; - - if (dump_record(&wbr_drr, NULL, 0, - &stream_cksum, outfd) != 0) - goto out; - } else { - /* block not previously seen */ - if (dump_record(drr, buf, payload_size, - &stream_cksum, outfd) != 0) - goto out; - } - break; - } - - case DRR_WRITE_EMBEDDED: - { - struct drr_write_embedded *drrwe = - &drr->drr_u.drr_write_embedded; - (void) ssread(buf, - P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp); - if (dump_record(drr, buf, - P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), - &stream_cksum, outfd) != 0) - goto out; - break; - } - - case DRR_FREE: - { - if (dump_record(drr, NULL, 0, &stream_cksum, - outfd) != 0) - goto out; - break; - } - - default: - (void) fprintf(stderr, "INVALID record type 0x%x\n", - drr->drr_type); - /* should never happen, so assert */ - assert(B_FALSE); - } - } -out: - umem_cache_destroy(ddt.ddecache); - free(ddt.dedup_hash_array); - free(buf); - (void) fclose(ofp); - - return (NULL); -} - -/* - * Routines for dealing with the AVL tree of fs-nvlists - */ -typedef struct fsavl_node { - avl_node_t fn_node; - nvlist_t *fn_nvfs; - char *fn_snapname; - uint64_t fn_guid; -} fsavl_node_t; - -static int -fsavl_compare(const void *arg1, const void *arg2) -{ - const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1; - const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2; - - return (AVL_CMP(fn1->fn_guid, fn2->fn_guid)); -} - -/* - * Given the GUID of a snapshot, find its containing filesystem and - * (optionally) name. - */ -static nvlist_t * -fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname) -{ - fsavl_node_t fn_find; - fsavl_node_t *fn; - - fn_find.fn_guid = snapguid; - - fn = avl_find(avl, &fn_find, NULL); - if (fn) { - if (snapname) - *snapname = fn->fn_snapname; - return (fn->fn_nvfs); - } - return (NULL); -} - -static void -fsavl_destroy(avl_tree_t *avl) -{ - fsavl_node_t *fn; - void *cookie; - - if (avl == NULL) - return; - - cookie = NULL; - while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL) - free(fn); - avl_destroy(avl); - free(avl); -} - -/* - * Given an nvlist, produce an avl tree of snapshots, ordered by guid - */ -static avl_tree_t * -fsavl_create(nvlist_t *fss) -{ - avl_tree_t *fsavl; - nvpair_t *fselem = NULL; - - if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL) - return (NULL); - - avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t), - offsetof(fsavl_node_t, fn_node)); - - while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) { - nvlist_t *nvfs, *snaps; - nvpair_t *snapelem = NULL; - - VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); - VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); - - while ((snapelem = - nvlist_next_nvpair(snaps, snapelem)) != NULL) { - fsavl_node_t *fn; - uint64_t guid; - - VERIFY(0 == nvpair_value_uint64(snapelem, &guid)); - if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) { - fsavl_destroy(fsavl); - return (NULL); - } - fn->fn_nvfs = nvfs; - fn->fn_snapname = nvpair_name(snapelem); - fn->fn_guid = guid; - - /* - * Note: if there are multiple snaps with the - * same GUID, we ignore all but one. - */ - if (avl_find(fsavl, fn, NULL) == NULL) - avl_add(fsavl, fn); - else - free(fn); - } - } - - return (fsavl); -} - -/* - * Routines for dealing with the giant nvlist of fs-nvlists, etc. - */ -typedef struct send_data { - /* - * assigned inside every recursive call, - * restored from *_save on return: - * - * guid of fromsnap snapshot in parent dataset - * txg of fromsnap snapshot in current dataset - * txg of tosnap snapshot in current dataset - */ - - uint64_t parent_fromsnap_guid; - uint64_t fromsnap_txg; - uint64_t tosnap_txg; - - /* the nvlists get accumulated during depth-first traversal */ - nvlist_t *parent_snaps; - nvlist_t *fss; - nvlist_t *snapprops; - - /* send-receive configuration, does not change during traversal */ - const char *fsname; - const char *fromsnap; - const char *tosnap; - boolean_t recursive; - boolean_t verbose; - boolean_t replicate; - - /* - * The header nvlist is of the following format: - * { - * "tosnap" -> string - * "fromsnap" -> string (if incremental) - * "fss" -> { - * id -> { - * - * "name" -> string (full name; for debugging) - * "parentfromsnap" -> number (guid of fromsnap in parent) - * - * "props" -> { name -> value (only if set here) } - * "snaps" -> { name (lastname) -> number (guid) } - * "snapprops" -> { name (lastname) -> { name -> value } } - * - * "origin" -> number (guid) (if clone) - * "sent" -> boolean (not on-disk) - * } - * } - * } - * - */ -} send_data_t; - -static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv); - -static int -send_iterate_snap(zfs_handle_t *zhp, void *arg) -{ - send_data_t *sd = arg; - uint64_t guid = zhp->zfs_dmustats.dds_guid; - uint64_t txg = zhp->zfs_dmustats.dds_creation_txg; - char *snapname; - nvlist_t *nv; - - snapname = strrchr(zhp->zfs_name, '@')+1; - - if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) { - if (sd->verbose) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "skipping snapshot %s because it was created " - "after the destination snapshot (%s)\n"), - zhp->zfs_name, sd->tosnap); - } - zfs_close(zhp); - return (0); - } - - VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid)); - /* - * NB: if there is no fromsnap here (it's a newly created fs in - * an incremental replication), we will substitute the tosnap. - */ - if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) || - (sd->parent_fromsnap_guid == 0 && sd->tosnap && - strcmp(snapname, sd->tosnap) == 0)) { - sd->parent_fromsnap_guid = guid; - } - - VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); - send_iterate_prop(zhp, nv); - VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv)); - nvlist_free(nv); - - zfs_close(zhp); - return (0); -} - -static void -send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv) -{ - nvpair_t *elem = NULL; - - while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) { - char *propname = nvpair_name(elem); - zfs_prop_t prop = zfs_name_to_prop(propname); - nvlist_t *propnv; - - if (!zfs_prop_user(propname)) { - /* - * Realistically, this should never happen. However, - * we want the ability to add DSL properties without - * needing to make incompatible version changes. We - * need to ignore unknown properties to allow older - * software to still send datasets containing these - * properties, with the unknown properties elided. - */ - if (prop == ZPROP_INVAL) - continue; - - if (zfs_prop_readonly(prop)) - continue; - } - - verify(nvpair_value_nvlist(elem, &propnv) == 0); - if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || - prop == ZFS_PROP_REFQUOTA || - prop == ZFS_PROP_REFRESERVATION) { - char *source; - uint64_t value; - verify(nvlist_lookup_uint64(propnv, - ZPROP_VALUE, &value) == 0); - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) - continue; - /* - * May have no source before SPA_VERSION_RECVD_PROPS, - * but is still modifiable. - */ - if (nvlist_lookup_string(propnv, - ZPROP_SOURCE, &source) == 0) { - if ((strcmp(source, zhp->zfs_name) != 0) && - (strcmp(source, - ZPROP_SOURCE_VAL_RECVD) != 0)) - continue; - } - } else { - char *source; - if (nvlist_lookup_string(propnv, - ZPROP_SOURCE, &source) != 0) - continue; - if ((strcmp(source, zhp->zfs_name) != 0) && - (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) - continue; - } - - if (zfs_prop_user(propname) || - zfs_prop_get_type(prop) == PROP_TYPE_STRING) { - char *value; - verify(nvlist_lookup_string(propnv, - ZPROP_VALUE, &value) == 0); - VERIFY(0 == nvlist_add_string(nv, propname, value)); - } else { - uint64_t value; - verify(nvlist_lookup_uint64(propnv, - ZPROP_VALUE, &value) == 0); - VERIFY(0 == nvlist_add_uint64(nv, propname, value)); - } - } -} - -/* - * returns snapshot creation txg - * and returns 0 if the snapshot does not exist - */ -static uint64_t -get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap) -{ - char name[ZFS_MAX_DATASET_NAME_LEN]; - uint64_t txg = 0; - - if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0') - return (txg); - - (void) snprintf(name, sizeof (name), "%s@%s", fs, snap); - if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) { - zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT); - if (zhp != NULL) { - txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG); - zfs_close(zhp); - } - } - - return (txg); -} - -/* - * recursively generate nvlists describing datasets. See comment - * for the data structure send_data_t above for description of contents - * of the nvlist. - */ -static int -send_iterate_fs(zfs_handle_t *zhp, void *arg) -{ - send_data_t *sd = arg; - nvlist_t *nvfs, *nv; - int rv = 0; - uint64_t min_txg = 0, max_txg = 0; - uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; - uint64_t fromsnap_txg_save = sd->fromsnap_txg; - uint64_t tosnap_txg_save = sd->tosnap_txg; - uint64_t txg = zhp->zfs_dmustats.dds_creation_txg; - uint64_t guid = zhp->zfs_dmustats.dds_guid; - uint64_t fromsnap_txg, tosnap_txg; - char guidstring[64]; - - fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap); - if (fromsnap_txg != 0) - sd->fromsnap_txg = fromsnap_txg; - - tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap); - if (tosnap_txg != 0) - sd->tosnap_txg = tosnap_txg; - - /* - * on the send side, if the current dataset does not have tosnap, - * perform two additional checks: - * - * - skip sending the current dataset if it was created later than - * the parent tosnap - * - return error if the current dataset was created earlier than - * the parent tosnap - */ - if (sd->tosnap != NULL && tosnap_txg == 0) { - if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) { - if (sd->verbose) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "skipping dataset %s: snapshot %s does " - "not exist\n"), zhp->zfs_name, sd->tosnap); - } - } else { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "cannot send %s@%s%s: snapshot %s@%s does not " - "exist\n"), sd->fsname, sd->tosnap, sd->recursive ? - dgettext(TEXT_DOMAIN, " recursively") : "", - zhp->zfs_name, sd->tosnap); - rv = -1; - } - goto out; - } - - nvfs = fnvlist_alloc(); - fnvlist_add_string(nvfs, "name", zhp->zfs_name); - fnvlist_add_uint64(nvfs, "parentfromsnap", - sd->parent_fromsnap_guid); - - if (zhp->zfs_dmustats.dds_origin[0]) { - zfs_handle_t *origin = zfs_open(zhp->zfs_hdl, - zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); - if (origin == NULL) { - rv = -1; - goto out; - } - VERIFY(0 == nvlist_add_uint64(nvfs, "origin", - origin->zfs_dmustats.dds_guid)); - } - - /* iterate over props */ - VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); - send_iterate_prop(zhp, nv); - VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); - nvlist_free(nv); - - /* iterate over snaps, and set sd->parent_fromsnap_guid */ - if (!sd->replicate && fromsnap_txg != 0) - min_txg = fromsnap_txg; - if (!sd->replicate && tosnap_txg != 0) - max_txg = tosnap_txg; - sd->parent_fromsnap_guid = 0; - VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0)); - VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0)); - (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd, - min_txg, max_txg); - VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps)); - VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops)); - fnvlist_free(sd->parent_snaps); - fnvlist_free(sd->snapprops); - - /* add this fs to nvlist */ - (void) snprintf(guidstring, sizeof (guidstring), - "0x%llx", (longlong_t)guid); - VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); - nvlist_free(nvfs); - - /* iterate over children */ - if (sd->recursive) - rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); - -out: - sd->parent_fromsnap_guid = parent_fromsnap_guid_save; - sd->fromsnap_txg = fromsnap_txg_save; - sd->tosnap_txg = tosnap_txg_save; - - zfs_close(zhp); - return (rv); -} - -static int -gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, - const char *tosnap, boolean_t recursive, boolean_t verbose, - boolean_t replicate, nvlist_t **nvlp, avl_tree_t **avlp) -{ - zfs_handle_t *zhp; - int error; - uint64_t min_txg = 0, max_txg = 0; - send_data_t sd = { 0 }; - - zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - return (EZFS_BADTYPE); - - VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0)); - sd.fsname = fsname; - sd.fromsnap = fromsnap; - sd.tosnap = tosnap; - sd.recursive = recursive; - sd.verbose = verbose; - sd.replicate = replicate; - - if ((error = send_iterate_fs(zhp, &sd)) != 0) { - nvlist_free(sd.fss); - if (avlp != NULL) - *avlp = NULL; - *nvlp = NULL; - return (error); - } - - if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) { - nvlist_free(sd.fss); - *nvlp = NULL; - return (EZFS_NOMEM); - } - - *nvlp = sd.fss; - return (0); -} - -/* - * Routines specific to "zfs send" - */ -typedef struct send_dump_data { - /* these are all just the short snapname (the part after the @) */ - const char *fromsnap; - const char *tosnap; - char prevsnap[ZFS_MAX_DATASET_NAME_LEN]; - uint64_t prevsnap_obj; - boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; - boolean_t progressastitle; - boolean_t large_block, compress; - int outfd; - boolean_t err; - nvlist_t *fss; - nvlist_t *snapholds; - avl_tree_t *fsavl; - snapfilter_cb_t *filter_cb; - void *filter_cb_arg; - nvlist_t *debugnv; - char holdtag[ZFS_MAX_DATASET_NAME_LEN]; - int cleanup_fd; - uint64_t size; -} send_dump_data_t; - -static int -zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from, - enum lzc_send_flags flags, uint64_t *spacep) -{ - libzfs_handle_t *hdl = zhp->zfs_hdl; - int error; - - assert(snapname != NULL); - error = lzc_send_space(snapname, from, flags, spacep); - - if (error != 0) { - char errbuf[1024]; - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "warning: cannot estimate space for '%s'"), snapname); - - switch (error) { - case EXDEV: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "not an earlier snapshot from the same fs")); - return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); - - case ENOENT: - if (zfs_dataset_exists(hdl, snapname, - ZFS_TYPE_SNAPSHOT)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "incremental source (%s) does not exist"), - snapname); - } - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - - case EDQUOT: - case EFBIG: - case EIO: - case ENOLINK: - case ENOSPC: - case ENXIO: - case EPIPE: - case ERANGE: - case EFAULT: - case EROFS: - case EINVAL: - zfs_error_aux(hdl, strerror(error)); - return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); - - default: - return (zfs_standard_error(hdl, error, errbuf)); - } - } - - return (0); -} - -/* - * Dumps a backup of the given snapshot (incremental from fromsnap if it's not - * NULL) to the file descriptor specified by outfd. - */ -static int -dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, - boolean_t fromorigin, int outfd, enum lzc_send_flags flags, - nvlist_t *debugnv) -{ - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zfs_hdl; - nvlist_t *thisdbg; - - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - assert(fromsnap_obj == 0 || !fromorigin); - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - zc.zc_cookie = outfd; - zc.zc_obj = fromorigin; - zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); - zc.zc_fromobj = fromsnap_obj; - zc.zc_flags = flags; - - VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); - if (fromsnap && fromsnap[0] != '\0') { - VERIFY(0 == nvlist_add_string(thisdbg, - "fromsnap", fromsnap)); - } - - if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { - char errbuf[1024]; - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "warning: cannot send '%s'"), zhp->zfs_name); - - VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno)); - if (debugnv) { - VERIFY(0 == nvlist_add_nvlist(debugnv, - zhp->zfs_name, thisdbg)); - } - nvlist_free(thisdbg); - - switch (errno) { - case EXDEV: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "not an earlier snapshot from the same fs")); - return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); - - case ENOENT: - if (zfs_dataset_exists(hdl, zc.zc_name, - ZFS_TYPE_SNAPSHOT)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "incremental source (@%s) does not exist"), - zc.zc_value); - } - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - - case EDQUOT: - case EFBIG: - case EIO: - case ENOLINK: - case ENOSPC: -#ifdef illumos - case ENOSTR: -#endif - case ENXIO: - case EPIPE: - case ERANGE: - case EFAULT: - case EROFS: - zfs_error_aux(hdl, strerror(errno)); - return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); - - default: - return (zfs_standard_error(hdl, errno, errbuf)); - } - } - - if (debugnv) - VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); - nvlist_free(thisdbg); - - return (0); -} - -static void -gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) -{ - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - - /* - * zfs_send() only sets snapholds for sends that need them, - * e.g. replication and doall. - */ - if (sdd->snapholds == NULL) - return; - - fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); -} - -static void * -send_progress_thread(void *arg) -{ - progress_arg_t *pa = arg; - zfs_cmd_t zc = { 0 }; - zfs_handle_t *zhp = pa->pa_zhp; - libzfs_handle_t *hdl = zhp->zfs_hdl; - unsigned long long bytes, total; - char buf[16]; - time_t t; - struct tm *tm; - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - - if (!pa->pa_parsable && !pa->pa_astitle) - (void) fprintf(stderr, "TIME SENT SNAPSHOT\n"); - - /* - * Print the progress from ZFS_IOC_SEND_PROGRESS every second. - */ - for (;;) { - (void) sleep(1); - - zc.zc_cookie = pa->pa_fd; - if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0) - return ((void *)-1); - - (void) time(&t); - tm = localtime(&t); - bytes = zc.zc_cookie; - - if (pa->pa_astitle) { - int pct; - if (pa->pa_size > bytes) - pct = 100 * bytes / pa->pa_size; - else - pct = 100; - - setproctitle("sending %s (%d%%: %llu/%llu)", - zhp->zfs_name, pct, bytes, pa->pa_size); - } else if (pa->pa_parsable) { - (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", - tm->tm_hour, tm->tm_min, tm->tm_sec, - bytes, zhp->zfs_name); - } else { - zfs_nicenum(bytes, buf, sizeof (buf)); - (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", - tm->tm_hour, tm->tm_min, tm->tm_sec, - buf, zhp->zfs_name); - } - } -} - -static void -send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap, - uint64_t size, boolean_t parsable) -{ - if (parsable) { - if (fromsnap != NULL) { - (void) fprintf(fout, "incremental\t%s\t%s", - fromsnap, tosnap); - } else { - (void) fprintf(fout, "full\t%s", - tosnap); - } - } else { - if (fromsnap != NULL) { - if (strchr(fromsnap, '@') == NULL && - strchr(fromsnap, '#') == NULL) { - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "send from @%s to %s"), - fromsnap, tosnap); - } else { - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "send from %s to %s"), - fromsnap, tosnap); - } - } else { - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "full send of %s"), - tosnap); - } - } - - if (parsable) { - (void) fprintf(fout, "\t%llu", - (longlong_t)size); - } else if (size != 0) { - char buf[16]; - zfs_nicenum(size, buf, sizeof (buf)); - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - " estimated size is %s"), buf); - } - (void) fprintf(fout, "\n"); -} - -static int -dump_snapshot(zfs_handle_t *zhp, void *arg) -{ - send_dump_data_t *sdd = arg; - progress_arg_t pa = { 0 }; - pthread_t tid; - char *thissnap; - enum lzc_send_flags flags = 0; - int err; - boolean_t isfromsnap, istosnap, fromorigin; - boolean_t exclude = B_FALSE; - FILE *fout = sdd->std_out ? stdout : stderr; - - err = 0; - thissnap = strchr(zhp->zfs_name, '@') + 1; - isfromsnap = (sdd->fromsnap != NULL && - strcmp(sdd->fromsnap, thissnap) == 0); - - if (!sdd->seenfrom && isfromsnap) { - gather_holds(zhp, sdd); - sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->prevsnap, thissnap); - sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); - zfs_close(zhp); - return (0); - } - - if (sdd->seento || !sdd->seenfrom) { - zfs_close(zhp); - return (0); - } - - istosnap = (strcmp(sdd->tosnap, thissnap) == 0); - if (istosnap) - sdd->seento = B_TRUE; - - if (sdd->large_block) - flags |= LZC_SEND_FLAG_LARGE_BLOCK; - if (sdd->embed_data) - flags |= LZC_SEND_FLAG_EMBED_DATA; - if (sdd->compress) - flags |= LZC_SEND_FLAG_COMPRESS; - - if (!sdd->doall && !isfromsnap && !istosnap) { - if (sdd->replicate) { - char *snapname; - nvlist_t *snapprops; - /* - * Filter out all intermediate snapshots except origin - * snapshots needed to replicate clones. - */ - nvlist_t *nvfs = fsavl_find(sdd->fsavl, - zhp->zfs_dmustats.dds_guid, &snapname); - - VERIFY(0 == nvlist_lookup_nvlist(nvfs, - "snapprops", &snapprops)); - VERIFY(0 == nvlist_lookup_nvlist(snapprops, - thissnap, &snapprops)); - exclude = !nvlist_exists(snapprops, "is_clone_origin"); - } else { - exclude = B_TRUE; - } - } - - /* - * If a filter function exists, call it to determine whether - * this snapshot will be sent. - */ - if (exclude || (sdd->filter_cb != NULL && - sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) { - /* - * This snapshot is filtered out. Don't send it, and don't - * set prevsnap_obj, so it will be as if this snapshot didn't - * exist, and the next accepted snapshot will be sent as - * an incremental from the last accepted one, or as the - * first (and full) snapshot in the case of a replication, - * non-incremental send. - */ - zfs_close(zhp); - return (0); - } - - gather_holds(zhp, sdd); - fromorigin = sdd->prevsnap[0] == '\0' && - (sdd->fromorigin || sdd->replicate); - - if (sdd->verbose || sdd->progress) { - uint64_t size = 0; - char fromds[ZFS_MAX_DATASET_NAME_LEN]; - - if (sdd->prevsnap[0] != '\0') { - (void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds)); - *(strchr(fromds, '@') + 1) = '\0'; - (void) strlcat(fromds, sdd->prevsnap, sizeof (fromds)); - } - if (zfs_send_space(zhp, zhp->zfs_name, - sdd->prevsnap[0] ? fromds : NULL, flags, &size) != 0) { - size = 0; /* cannot estimate send space */ - } else { - send_print_verbose(fout, zhp->zfs_name, - sdd->prevsnap[0] ? sdd->prevsnap : NULL, - size, sdd->parsable); - } - sdd->size += size; - } - - if (!sdd->dryrun) { - /* - * If progress reporting is requested, spawn a new thread to - * poll ZFS_IOC_SEND_PROGRESS at a regular interval. - */ - if (sdd->progress) { - pa.pa_zhp = zhp; - pa.pa_fd = sdd->outfd; - pa.pa_parsable = sdd->parsable; - pa.pa_size = sdd->size; - pa.pa_astitle = sdd->progressastitle; - - if ((err = pthread_create(&tid, NULL, - send_progress_thread, &pa)) != 0) { - zfs_close(zhp); - return (err); - } - } - - err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, - fromorigin, sdd->outfd, flags, sdd->debugnv); - - if (sdd->progress) { - (void) pthread_cancel(tid); - (void) pthread_join(tid, NULL); - } - } - - (void) strcpy(sdd->prevsnap, thissnap); - sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); - zfs_close(zhp); - return (err); -} - -static int -dump_filesystem(zfs_handle_t *zhp, void *arg) -{ - int rv = 0; - uint64_t min_txg = 0, max_txg = 0; - send_dump_data_t *sdd = arg; - boolean_t missingfrom = B_FALSE; - zfs_cmd_t zc = { 0 }; - - (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", - zhp->zfs_name, sdd->tosnap); - if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "WARNING: could not send %s@%s: does not exist\n"), - zhp->zfs_name, sdd->tosnap); - sdd->err = B_TRUE; - return (0); - } - - if (sdd->replicate && sdd->fromsnap) { - /* - * If this fs does not have fromsnap, and we're doing - * recursive, we need to send a full stream from the - * beginning (or an incremental from the origin if this - * is a clone). If we're doing non-recursive, then let - * them get the error. - */ - (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", - zhp->zfs_name, sdd->fromsnap); - if (ioctl(zhp->zfs_hdl->libzfs_fd, - ZFS_IOC_OBJSET_STATS, &zc) != 0) { - missingfrom = B_TRUE; - } - } - - sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0; - sdd->prevsnap_obj = 0; - if (sdd->fromsnap == NULL || missingfrom) - sdd->seenfrom = B_TRUE; - - if (!sdd->replicate && sdd->fromsnap != NULL) - min_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, - sdd->fromsnap); - if (!sdd->replicate && sdd->tosnap != NULL) - max_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, - sdd->tosnap); - - rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg, - min_txg, max_txg); - if (!sdd->seenfrom) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "WARNING: could not send %s@%s:\n" - "incremental source (%s@%s) does not exist\n"), - zhp->zfs_name, sdd->tosnap, - zhp->zfs_name, sdd->fromsnap); - sdd->err = B_TRUE; - } else if (!sdd->seento) { - if (sdd->fromsnap) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "WARNING: could not send %s@%s:\n" - "incremental source (%s@%s) " - "is not earlier than it\n"), - zhp->zfs_name, sdd->tosnap, - zhp->zfs_name, sdd->fromsnap); - } else { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "WARNING: " - "could not send %s@%s: does not exist\n"), - zhp->zfs_name, sdd->tosnap); - } - sdd->err = B_TRUE; - } - - return (rv); -} - -static int -dump_filesystems(zfs_handle_t *rzhp, void *arg) -{ - send_dump_data_t *sdd = arg; - nvpair_t *fspair; - boolean_t needagain, progress; - - if (!sdd->replicate) - return (dump_filesystem(rzhp, sdd)); - - /* Mark the clone origin snapshots. */ - for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; - fspair = nvlist_next_nvpair(sdd->fss, fspair)) { - nvlist_t *nvfs; - uint64_t origin_guid = 0; - - VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs)); - (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid); - if (origin_guid != 0) { - char *snapname; - nvlist_t *origin_nv = fsavl_find(sdd->fsavl, - origin_guid, &snapname); - if (origin_nv != NULL) { - nvlist_t *snapprops; - VERIFY(0 == nvlist_lookup_nvlist(origin_nv, - "snapprops", &snapprops)); - VERIFY(0 == nvlist_lookup_nvlist(snapprops, - snapname, &snapprops)); - VERIFY(0 == nvlist_add_boolean( - snapprops, "is_clone_origin")); - } - } - } -again: - needagain = progress = B_FALSE; - for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; - fspair = nvlist_next_nvpair(sdd->fss, fspair)) { - nvlist_t *fslist, *parent_nv; - char *fsname; - zfs_handle_t *zhp; - int err; - uint64_t origin_guid = 0; - uint64_t parent_guid = 0; - - VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); - if (nvlist_lookup_boolean(fslist, "sent") == 0) - continue; - - VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); - (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); - (void) nvlist_lookup_uint64(fslist, "parentfromsnap", - &parent_guid); - - if (parent_guid != 0) { - parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL); - if (!nvlist_exists(parent_nv, "sent")) { - /* parent has not been sent; skip this one */ - needagain = B_TRUE; - continue; - } - } - - if (origin_guid != 0) { - nvlist_t *origin_nv = fsavl_find(sdd->fsavl, - origin_guid, NULL); - if (origin_nv != NULL && - !nvlist_exists(origin_nv, "sent")) { - /* - * origin has not been sent yet; - * skip this clone. - */ - needagain = B_TRUE; - continue; - } - } - - zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET); - if (zhp == NULL) - return (-1); - err = dump_filesystem(zhp, sdd); - VERIFY(nvlist_add_boolean(fslist, "sent") == 0); - progress = B_TRUE; - zfs_close(zhp); - if (err) - return (err); - } - if (needagain) { - assert(progress); - goto again; - } - - /* clean out the sent flags in case we reuse this fss */ - for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; - fspair = nvlist_next_nvpair(sdd->fss, fspair)) { - nvlist_t *fslist; - - VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); - (void) nvlist_remove_all(fslist, "sent"); - } - - return (0); -} - -nvlist_t * -zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token) -{ - unsigned int version; - int nread; - unsigned long long checksum, packed_len; - - /* - * Decode token header, which is: - * <token version>-<checksum of payload>-<uncompressed payload length> - * Note that the only supported token version is 1. - */ - nread = sscanf(token, "%u-%llx-%llx-", - &version, &checksum, &packed_len); - if (nread != 3) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt (invalid format)")); - return (NULL); - } - - if (version != ZFS_SEND_RESUME_TOKEN_VERSION) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt (invalid version %u)"), - version); - return (NULL); - } - - /* convert hexadecimal representation to binary */ - token = strrchr(token, '-') + 1; - int len = strlen(token) / 2; - unsigned char *compressed = zfs_alloc(hdl, len); - for (int i = 0; i < len; i++) { - nread = sscanf(token + i * 2, "%2hhx", compressed + i); - if (nread != 1) { - free(compressed); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt " - "(payload is not hex-encoded)")); - return (NULL); - } - } - - /* verify checksum */ - zio_cksum_t cksum; - fletcher_4_native(compressed, len, NULL, &cksum); - if (cksum.zc_word[0] != checksum) { - free(compressed); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt (incorrect checksum)")); - return (NULL); - } - - /* uncompress */ - void *packed = zfs_alloc(hdl, packed_len); - uLongf packed_len_long = packed_len; - if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK || - packed_len_long != packed_len) { - free(packed); - free(compressed); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt (decompression failed)")); - return (NULL); - } - - /* unpack nvlist */ - nvlist_t *nv; - int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP); - free(packed); - free(compressed); - if (error != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt (nvlist_unpack failed)")); - return (NULL); - } - return (nv); -} - -int -zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, - const char *resume_token) -{ - char errbuf[1024]; - char *toname; - char *fromname = NULL; - uint64_t resumeobj, resumeoff, toguid, fromguid, bytes; - zfs_handle_t *zhp; - int error = 0; - char name[ZFS_MAX_DATASET_NAME_LEN]; - enum lzc_send_flags lzc_flags = 0; - uint64_t size = 0; - FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot resume send")); - - nvlist_t *resume_nvl = - zfs_send_resume_token_to_nvlist(hdl, resume_token); - if (resume_nvl == NULL) { - /* - * zfs_error_aux has already been set by - * zfs_send_resume_token_to_nvlist - */ - return (zfs_error(hdl, EZFS_FAULT, errbuf)); - } - if (flags->verbose) { - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "resume token contents:\n")); - nvlist_print(fout, resume_nvl); - } - - if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 || - nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 || - nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 || - nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 || - nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "resume token is corrupt")); - return (zfs_error(hdl, EZFS_FAULT, errbuf)); - } - fromguid = 0; - (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid); - - if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok")) - lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; - if (flags->embed_data || nvlist_exists(resume_nvl, "embedok")) - lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; - if (flags->compress || nvlist_exists(resume_nvl, "compressok")) - lzc_flags |= LZC_SEND_FLAG_COMPRESS; - - if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { - if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is no longer the same snapshot used in " - "the initial send"), toname); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' used in the initial send no longer exists"), - toname); - } - return (zfs_error(hdl, EZFS_BADPATH, errbuf)); - } - zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); - if (zhp == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "unable to access '%s'"), name); - return (zfs_error(hdl, EZFS_BADPATH, errbuf)); - } - - if (fromguid != 0) { - if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "incremental source %#llx no longer exists"), - (longlong_t)fromguid); - return (zfs_error(hdl, EZFS_BADPATH, errbuf)); - } - fromname = name; - } - - if (flags->progress || flags->verbose) { - error = lzc_send_space(zhp->zfs_name, fromname, - lzc_flags, &size); - if (error == 0) - size = MAX(0, (int64_t)(size - bytes)); - } - if (flags->verbose) { - send_print_verbose(fout, zhp->zfs_name, fromname, - size, flags->parsable); - } - - if (!flags->dryrun) { - progress_arg_t pa = { 0 }; - pthread_t tid; - /* - * If progress reporting is requested, spawn a new thread to - * poll ZFS_IOC_SEND_PROGRESS at a regular interval. - */ - if (flags->progress) { - pa.pa_zhp = zhp; - pa.pa_fd = outfd; - pa.pa_parsable = flags->parsable; - pa.pa_size = size; - pa.pa_astitle = flags->progressastitle; - - error = pthread_create(&tid, NULL, - send_progress_thread, &pa); - if (error != 0) { - zfs_close(zhp); - return (error); - } - } - - error = lzc_send_resume(zhp->zfs_name, fromname, outfd, - lzc_flags, resumeobj, resumeoff); - - if (flags->progress) { - (void) pthread_cancel(tid); - (void) pthread_join(tid, NULL); - } - - char errbuf[1024]; - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "warning: cannot send '%s'"), zhp->zfs_name); - - zfs_close(zhp); - - switch (error) { - case 0: - return (0); - case EXDEV: - case ENOENT: - case EDQUOT: - case EFBIG: - case EIO: - case ENOLINK: - case ENOSPC: -#ifdef illumos - case ENOSTR: -#endif - case ENXIO: - case EPIPE: - case ERANGE: - case EFAULT: - case EROFS: - zfs_error_aux(hdl, strerror(errno)); - return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); - - default: - return (zfs_standard_error(hdl, errno, errbuf)); - } - } - - - zfs_close(zhp); - - return (error); -} - -/* - * Generate a send stream for the dataset identified by the argument zhp. - * - * The content of the send stream is the snapshot identified by - * 'tosnap'. Incremental streams are requested in two ways: - * - from the snapshot identified by "fromsnap" (if non-null) or - * - from the origin of the dataset identified by zhp, which must - * be a clone. In this case, "fromsnap" is null and "fromorigin" - * is TRUE. - * - * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and - * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM) - * if "replicate" is set. If "doall" is set, dump all the intermediate - * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall" - * case too. If "props" is set, send properties. - */ -int -zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - sendflags_t *flags, int outfd, snapfilter_cb_t filter_func, - void *cb_arg, nvlist_t **debugnvp) -{ - char errbuf[1024]; - send_dump_data_t sdd = { 0 }; - int err = 0; - nvlist_t *fss = NULL; - avl_tree_t *fsavl = NULL; - static uint64_t holdseq; - int spa_version; - pthread_t tid = 0; - int pipefd[2]; - dedup_arg_t dda = { 0 }; - int featureflags = 0; - FILE *fout; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot send '%s'"), zhp->zfs_name); - - if (fromsnap && fromsnap[0] == '\0') { - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "zero-length incremental source")); - return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); - } - - if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { - uint64_t version; - version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - if (version >= ZPL_VERSION_SA) { - featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; - } - } - - if (flags->dedup && !flags->dryrun) { - featureflags |= (DMU_BACKUP_FEATURE_DEDUP | - DMU_BACKUP_FEATURE_DEDUPPROPS); - if ((err = pipe(pipefd)) != 0) { - zfs_error_aux(zhp->zfs_hdl, strerror(errno)); - return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, - errbuf)); - } - dda.outputfd = outfd; - dda.inputfd = pipefd[1]; - dda.dedup_hdl = zhp->zfs_hdl; - if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) { - (void) close(pipefd[0]); - (void) close(pipefd[1]); - zfs_error_aux(zhp->zfs_hdl, strerror(errno)); - return (zfs_error(zhp->zfs_hdl, - EZFS_THREADCREATEFAILED, errbuf)); - } - } - - if (flags->replicate || flags->doall || flags->props) { - dmu_replay_record_t drr = { 0 }; - char *packbuf = NULL; - size_t buflen = 0; - zio_cksum_t zc = { 0 }; - - if (flags->replicate || flags->props) { - nvlist_t *hdrnv; - - VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); - if (fromsnap) { - VERIFY(0 == nvlist_add_string(hdrnv, - "fromsnap", fromsnap)); - } - VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); - if (!flags->replicate) { - VERIFY(0 == nvlist_add_boolean(hdrnv, - "not_recursive")); - } - - err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags->replicate, flags->verbose, - flags->replicate, &fss, &fsavl); - if (err) - goto err_out; - VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); - err = nvlist_pack(hdrnv, &packbuf, &buflen, - NV_ENCODE_XDR, 0); - if (debugnvp) - *debugnvp = hdrnv; - else - nvlist_free(hdrnv); - if (err) - goto stderr_out; - } - - if (!flags->dryrun) { - /* write first begin record */ - drr.drr_type = DRR_BEGIN; - drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; - DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin. - drr_versioninfo, DMU_COMPOUNDSTREAM); - DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin. - drr_versioninfo, featureflags); - (void) snprintf(drr.drr_u.drr_begin.drr_toname, - sizeof (drr.drr_u.drr_begin.drr_toname), - "%s@%s", zhp->zfs_name, tosnap); - drr.drr_payloadlen = buflen; - - err = dump_record(&drr, packbuf, buflen, &zc, outfd); - free(packbuf); - if (err != 0) - goto stderr_out; - - /* write end record */ - bzero(&drr, sizeof (drr)); - drr.drr_type = DRR_END; - drr.drr_u.drr_end.drr_checksum = zc; - err = write(outfd, &drr, sizeof (drr)); - if (err == -1) { - err = errno; - goto stderr_out; - } - - err = 0; - } - } - - /* dump each stream */ - sdd.fromsnap = fromsnap; - sdd.tosnap = tosnap; - if (tid != 0) - sdd.outfd = pipefd[0]; - else - sdd.outfd = outfd; - sdd.replicate = flags->replicate; - sdd.doall = flags->doall; - sdd.fromorigin = flags->fromorigin; - sdd.fss = fss; - sdd.fsavl = fsavl; - sdd.verbose = flags->verbose; - sdd.parsable = flags->parsable; - sdd.progress = flags->progress; - sdd.progressastitle = flags->progressastitle; - sdd.dryrun = flags->dryrun; - sdd.large_block = flags->largeblock; - sdd.embed_data = flags->embed_data; - sdd.compress = flags->compress; - sdd.filter_cb = filter_func; - sdd.filter_cb_arg = cb_arg; - if (debugnvp) - sdd.debugnv = *debugnvp; - if (sdd.verbose && sdd.dryrun) - sdd.std_out = B_TRUE; - fout = sdd.std_out ? stdout : stderr; - - /* - * Some flags require that we place user holds on the datasets that are - * being sent so they don't get destroyed during the send. We can skip - * this step if the pool is imported read-only since the datasets cannot - * be destroyed. - */ - if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp), - ZPOOL_PROP_READONLY, NULL) && - zfs_spa_version(zhp, &spa_version) == 0 && - spa_version >= SPA_VERSION_USERREFS && - (flags->doall || flags->replicate)) { - ++holdseq; - (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag), - ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); - sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); - if (sdd.cleanup_fd < 0) { - err = errno; - goto stderr_out; - } - sdd.snapholds = fnvlist_alloc(); - } else { - sdd.cleanup_fd = -1; - sdd.snapholds = NULL; - } - if (flags->progress || flags->verbose || sdd.snapholds != NULL) { - /* - * Do a verbose no-op dry run to get all the verbose output - * or to gather snapshot hold's before generating any data, - * then do a non-verbose real run to generate the streams. - */ - sdd.dryrun = B_TRUE; - err = dump_filesystems(zhp, &sdd); - - if (err != 0) - goto stderr_out; - - if (flags->verbose) { - if (flags->parsable) { - (void) fprintf(fout, "size\t%llu\n", - (longlong_t)sdd.size); - } else { - char buf[16]; - zfs_nicenum(sdd.size, buf, sizeof (buf)); - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "total estimated size is %s\n"), buf); - } - } - - /* Ensure no snaps found is treated as an error. */ - if (!sdd.seento) { - err = ENOENT; - goto err_out; - } - - /* Skip the second run if dryrun was requested. */ - if (flags->dryrun) - goto err_out; - - if (sdd.snapholds != NULL) { - err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); - if (err != 0) - goto stderr_out; - - fnvlist_free(sdd.snapholds); - sdd.snapholds = NULL; - } - - sdd.dryrun = B_FALSE; - sdd.verbose = B_FALSE; - } - - err = dump_filesystems(zhp, &sdd); - fsavl_destroy(fsavl); - nvlist_free(fss); - - /* Ensure no snaps found is treated as an error. */ - if (err == 0 && !sdd.seento) - err = ENOENT; - - if (tid != 0) { - if (err != 0) - (void) pthread_cancel(tid); - (void) close(pipefd[0]); - (void) pthread_join(tid, NULL); - } - - if (sdd.cleanup_fd != -1) { - VERIFY(0 == close(sdd.cleanup_fd)); - sdd.cleanup_fd = -1; - } - - if (!flags->dryrun && (flags->replicate || flags->doall || - flags->props)) { - /* - * write final end record. NB: want to do this even if - * there was some error, because it might not be totally - * failed. - */ - dmu_replay_record_t drr = { 0 }; - drr.drr_type = DRR_END; - if (write(outfd, &drr, sizeof (drr)) == -1) { - return (zfs_standard_error(zhp->zfs_hdl, - errno, errbuf)); - } - } - - return (err || sdd.err); - -stderr_out: - err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); -err_out: - fsavl_destroy(fsavl); - nvlist_free(fss); - fnvlist_free(sdd.snapholds); - - if (sdd.cleanup_fd != -1) - VERIFY(0 == close(sdd.cleanup_fd)); - if (tid != 0) { - (void) pthread_cancel(tid); - (void) close(pipefd[0]); - (void) pthread_join(tid, NULL); - } - return (err); -} - -int -zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags) -{ - int err = 0; - libzfs_handle_t *hdl = zhp->zfs_hdl; - enum lzc_send_flags lzc_flags = 0; - FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr; - char errbuf[1024]; - - if (flags.largeblock) - lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; - if (flags.embed_data) - lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; - if (flags.compress) - lzc_flags |= LZC_SEND_FLAG_COMPRESS; - - if (flags.verbose) { - uint64_t size = 0; - err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size); - if (err == 0) { - send_print_verbose(fout, zhp->zfs_name, from, size, - flags.parsable); - if (flags.parsable) { - (void) fprintf(fout, "size\t%llu\n", - (longlong_t)size); - } else { - char buf[16]; - zfs_nicenum(size, buf, sizeof (buf)); - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "total estimated size is %s\n"), buf); - } - } else { - (void) fprintf(stderr, "Cannot estimate send size: " - "%s\n", strerror(errno)); - } - } - - if (flags.dryrun) - return (err); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "warning: cannot send '%s'"), zhp->zfs_name); - - err = lzc_send(zhp->zfs_name, from, fd, lzc_flags); - if (err != 0) { - switch (errno) { - case EXDEV: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "not an earlier snapshot from the same fs")); - return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); - - case ENOENT: - case ESRCH: - if (lzc_exists(zhp->zfs_name)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "incremental source (%s) does not exist"), - from); - } - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - - case EBUSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "target is busy; if a filesystem, " - "it must not be mounted")); - return (zfs_error(hdl, EZFS_BUSY, errbuf)); - - case EDQUOT: - case EFBIG: - case EIO: - case ENOLINK: - case ENOSPC: -#ifdef illumos - case ENOSTR: -#endif - case ENXIO: - case EPIPE: - case ERANGE: - case EFAULT: - case EROFS: - zfs_error_aux(hdl, strerror(errno)); - return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); - - default: - return (zfs_standard_error(hdl, errno, errbuf)); - } - } - return (err != 0); -} - -/* - * Routines specific to "zfs recv" - */ - -static int -recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen, - boolean_t byteswap, zio_cksum_t *zc) -{ - char *cp = buf; - int rv; - int len = ilen; - - assert(ilen <= SPA_MAXBLOCKSIZE); - - do { - rv = read(fd, cp, len); - cp += rv; - len -= rv; - } while (rv > 0); - - if (rv < 0 || len != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "failed to read from stream")); - return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN, - "cannot receive"))); - } - - if (zc) { - if (byteswap) - (void) fletcher_4_incremental_byteswap(buf, ilen, zc); - else - (void) fletcher_4_incremental_native(buf, ilen, zc); - } - return (0); -} - -static int -recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, - boolean_t byteswap, zio_cksum_t *zc) -{ - char *buf; - int err; - - buf = zfs_alloc(hdl, len); - if (buf == NULL) - return (ENOMEM); - - err = recv_read(hdl, fd, buf, len, byteswap, zc); - if (err != 0) { - free(buf); - return (err); - } - - err = nvlist_unpack(buf, len, nvp, 0); - free(buf); - if (err != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "stream (malformed nvlist)")); - return (EINVAL); - } - return (0); -} - -static int -recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, - int baselen, char *newname, recvflags_t *flags) -{ - static int seq; - int err; - prop_changelist_t *clp; - zfs_handle_t *zhp; - - zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); - if (zhp == NULL) - return (-1); - clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags->force ? MS_FORCE : 0); - zfs_close(zhp); - if (clp == NULL) - return (-1); - err = changelist_prefix(clp); - if (err) - return (err); - - if (tryname) { - (void) strcpy(newname, tryname); - if (flags->verbose) { - (void) printf("attempting rename %s to %s\n", - name, newname); - } - err = lzc_rename(name, newname); - if (err == 0) - changelist_rename(clp, name, tryname); - } else { - err = ENOENT; - } - - if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) { - seq++; - - (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN, - "%.*srecv-%u-%u", baselen, name, getpid(), seq); - if (flags->verbose) { - (void) printf("failed - trying rename %s to %s\n", - name, newname); - } - err = lzc_rename(name, newname); - if (err == 0) - changelist_rename(clp, name, newname); - if (err && flags->verbose) { - (void) printf("failed (%u) - " - "will try again on next pass\n", errno); - } - err = EAGAIN; - } else if (flags->verbose) { - if (err == 0) - (void) printf("success\n"); - else - (void) printf("failed (%u)\n", errno); - } - - (void) changelist_postfix(clp); - changelist_free(clp); - - return (err); -} - -static int -recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, - char *newname, recvflags_t *flags) -{ - int err = 0; - prop_changelist_t *clp; - zfs_handle_t *zhp; - boolean_t defer = B_FALSE; - int spa_version; - - zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); - if (zhp == NULL) - return (-1); - clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags->force ? MS_FORCE : 0); - if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && - zfs_spa_version(zhp, &spa_version) == 0 && - spa_version >= SPA_VERSION_USERREFS) - defer = B_TRUE; - zfs_close(zhp); - if (clp == NULL) - return (-1); - err = changelist_prefix(clp); - if (err) - return (err); - - if (flags->verbose) - (void) printf("attempting destroy %s\n", name); - if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { - nvlist_t *nv = fnvlist_alloc(); - fnvlist_add_boolean(nv, name); - err = lzc_destroy_snaps(nv, defer, NULL); - fnvlist_free(nv); - } else { - err = lzc_destroy(name); - } - if (err == 0) { - if (flags->verbose) - (void) printf("success\n"); - changelist_remove(clp, name); - } - - (void) changelist_postfix(clp); - changelist_free(clp); - - /* - * Deferred destroy might destroy the snapshot or only mark it to be - * destroyed later, and it returns success in either case. - */ - if (err != 0 || (defer && zfs_dataset_exists(hdl, name, - ZFS_TYPE_SNAPSHOT))) { - err = recv_rename(hdl, name, NULL, baselen, newname, flags); - } - - return (err); -} - -typedef struct guid_to_name_data { - uint64_t guid; - boolean_t bookmark_ok; - char *name; - char *skip; -} guid_to_name_data_t; - -static int -guid_to_name_cb(zfs_handle_t *zhp, void *arg) -{ - guid_to_name_data_t *gtnd = arg; - const char *slash; - int err; - - if (gtnd->skip != NULL && - (slash = strrchr(zhp->zfs_name, '/')) != NULL && - strcmp(slash + 1, gtnd->skip) == 0) { - zfs_close(zhp); - return (0); - } - - if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) { - (void) strcpy(gtnd->name, zhp->zfs_name); - zfs_close(zhp); - return (EEXIST); - } - - err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); - if (err != EEXIST && gtnd->bookmark_ok) - err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd); - zfs_close(zhp); - return (err); -} - -/* - * Attempt to find the local dataset associated with this guid. In the case of - * multiple matches, we attempt to find the "best" match by searching - * progressively larger portions of the hierarchy. This allows one to send a - * tree of datasets individually and guarantee that we will find the source - * guid within that hierarchy, even if there are multiple matches elsewhere. - */ -static int -guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, - boolean_t bookmark_ok, char *name) -{ - char pname[ZFS_MAX_DATASET_NAME_LEN]; - guid_to_name_data_t gtnd; - - gtnd.guid = guid; - gtnd.bookmark_ok = bookmark_ok; - gtnd.name = name; - gtnd.skip = NULL; - - /* - * Search progressively larger portions of the hierarchy, starting - * with the filesystem specified by 'parent'. This will - * select the "most local" version of the origin snapshot in the case - * that there are multiple matching snapshots in the system. - */ - (void) strlcpy(pname, parent, sizeof (pname)); - char *cp = strrchr(pname, '@'); - if (cp == NULL) - cp = strchr(pname, '\0'); - for (; cp != NULL; cp = strrchr(pname, '/')) { - /* Chop off the last component and open the parent */ - *cp = '\0'; - zfs_handle_t *zhp = make_dataset_handle(hdl, pname); - - if (zhp == NULL) - continue; - int err = guid_to_name_cb(zfs_handle_dup(zhp), >nd); - if (err != EEXIST) - err = zfs_iter_children(zhp, guid_to_name_cb, >nd); - if (err != EEXIST && bookmark_ok) - err = zfs_iter_bookmarks(zhp, guid_to_name_cb, >nd); - zfs_close(zhp); - if (err == EEXIST) - return (0); - - /* - * Remember the last portion of the dataset so we skip it next - * time through (as we've already searched that portion of the - * hierarchy). - */ - gtnd.skip = strrchr(pname, '/') + 1; - } - - return (ENOENT); -} - -/* - * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if - * guid1 is after guid2. - */ -static int -created_before(libzfs_handle_t *hdl, avl_tree_t *avl, - uint64_t guid1, uint64_t guid2) -{ - nvlist_t *nvfs; - char *fsname, *snapname; - char buf[ZFS_MAX_DATASET_NAME_LEN]; - int rv; - zfs_handle_t *guid1hdl, *guid2hdl; - uint64_t create1, create2; - - if (guid2 == 0) - return (0); - if (guid1 == 0) - return (1); - - nvfs = fsavl_find(avl, guid1, &snapname); - VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); - (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); - guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); - if (guid1hdl == NULL) - return (-1); - - nvfs = fsavl_find(avl, guid2, &snapname); - VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); - (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); - guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); - if (guid2hdl == NULL) { - zfs_close(guid1hdl); - return (-1); - } - - create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG); - create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG); - - if (create1 < create2) - rv = -1; - else if (create1 > create2) - rv = +1; - else - rv = 0; - - zfs_close(guid1hdl); - zfs_close(guid2hdl); - - return (rv); -} - -static int -recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, - recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, - nvlist_t *renamed) -{ - nvlist_t *local_nv, *deleted = NULL; - avl_tree_t *local_avl; - nvpair_t *fselem, *nextfselem; - char *fromsnap; - char newname[ZFS_MAX_DATASET_NAME_LEN]; - char guidname[32]; - int error; - boolean_t needagain, progress, recursive; - char *s1, *s2; - - VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); - - recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == - ENOENT); - - if (flags->dryrun) - return (0); - -again: - needagain = progress = B_FALSE; - - VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0)); - - if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, - recursive, B_FALSE, B_FALSE, &local_nv, &local_avl)) != 0) - return (error); - - /* - * Process deletes and renames - */ - for (fselem = nvlist_next_nvpair(local_nv, NULL); - fselem; fselem = nextfselem) { - nvlist_t *nvfs, *snaps; - nvlist_t *stream_nvfs = NULL; - nvpair_t *snapelem, *nextsnapelem; - uint64_t fromguid = 0; - uint64_t originguid = 0; - uint64_t stream_originguid = 0; - uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid; - char *fsname, *stream_fsname; - - nextfselem = nvlist_next_nvpair(local_nv, fselem); - - VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); - VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); - VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); - VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap", - &parent_fromsnap_guid)); - (void) nvlist_lookup_uint64(nvfs, "origin", &originguid); - - /* - * First find the stream's fs, so we can check for - * a different origin (due to "zfs promote") - */ - for (snapelem = nvlist_next_nvpair(snaps, NULL); - snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) { - uint64_t thisguid; - - VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); - stream_nvfs = fsavl_find(stream_avl, thisguid, NULL); - - if (stream_nvfs != NULL) - break; - } - - /* check for promote */ - (void) nvlist_lookup_uint64(stream_nvfs, "origin", - &stream_originguid); - if (stream_nvfs && originguid != stream_originguid) { - switch (created_before(hdl, local_avl, - stream_originguid, originguid)) { - case 1: { - /* promote it! */ - zfs_cmd_t zc = { 0 }; - nvlist_t *origin_nvfs; - char *origin_fsname; - - if (flags->verbose) - (void) printf("promoting %s\n", fsname); - - origin_nvfs = fsavl_find(local_avl, originguid, - NULL); - VERIFY(0 == nvlist_lookup_string(origin_nvfs, - "name", &origin_fsname)); - (void) strlcpy(zc.zc_value, origin_fsname, - sizeof (zc.zc_value)); - (void) strlcpy(zc.zc_name, fsname, - sizeof (zc.zc_name)); - error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); - if (error == 0) - progress = B_TRUE; - break; - } - default: - break; - case -1: - fsavl_destroy(local_avl); - nvlist_free(local_nv); - return (-1); - } - /* - * We had/have the wrong origin, therefore our - * list of snapshots is wrong. Need to handle - * them on the next pass. - */ - needagain = B_TRUE; - continue; - } - - for (snapelem = nvlist_next_nvpair(snaps, NULL); - snapelem; snapelem = nextsnapelem) { - uint64_t thisguid; - char *stream_snapname; - nvlist_t *found, *props; - - nextsnapelem = nvlist_next_nvpair(snaps, snapelem); - - VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); - found = fsavl_find(stream_avl, thisguid, - &stream_snapname); - - /* check for delete */ - if (found == NULL) { - char name[ZFS_MAX_DATASET_NAME_LEN]; - - if (!flags->force) - continue; - - (void) snprintf(name, sizeof (name), "%s@%s", - fsname, nvpair_name(snapelem)); - - error = recv_destroy(hdl, name, - strlen(fsname)+1, newname, flags); - if (error) - needagain = B_TRUE; - else - progress = B_TRUE; - sprintf(guidname, "%" PRIu64, thisguid); - nvlist_add_boolean(deleted, guidname); - continue; - } - - stream_nvfs = found; - - if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops", - &props) && 0 == nvlist_lookup_nvlist(props, - stream_snapname, &props)) { - zfs_cmd_t zc = { 0 }; - - zc.zc_cookie = B_TRUE; /* received */ - (void) snprintf(zc.zc_name, sizeof (zc.zc_name), - "%s@%s", fsname, nvpair_name(snapelem)); - if (zcmd_write_src_nvlist(hdl, &zc, - props) == 0) { - (void) zfs_ioctl(hdl, - ZFS_IOC_SET_PROP, &zc); - zcmd_free_nvlists(&zc); - } - } - - /* check for different snapname */ - if (strcmp(nvpair_name(snapelem), - stream_snapname) != 0) { - char name[ZFS_MAX_DATASET_NAME_LEN]; - char tryname[ZFS_MAX_DATASET_NAME_LEN]; - - (void) snprintf(name, sizeof (name), "%s@%s", - fsname, nvpair_name(snapelem)); - (void) snprintf(tryname, sizeof (name), "%s@%s", - fsname, stream_snapname); - - error = recv_rename(hdl, name, tryname, - strlen(fsname)+1, newname, flags); - if (error) - needagain = B_TRUE; - else - progress = B_TRUE; - } - - if (strcmp(stream_snapname, fromsnap) == 0) - fromguid = thisguid; - } - - /* check for delete */ - if (stream_nvfs == NULL) { - if (!flags->force) - continue; - - error = recv_destroy(hdl, fsname, strlen(tofs)+1, - newname, flags); - if (error) - needagain = B_TRUE; - else - progress = B_TRUE; - sprintf(guidname, "%" PRIu64, parent_fromsnap_guid); - nvlist_add_boolean(deleted, guidname); - continue; - } - - if (fromguid == 0) { - if (flags->verbose) { - (void) printf("local fs %s does not have " - "fromsnap (%s in stream); must have " - "been deleted locally; ignoring\n", - fsname, fromsnap); - } - continue; - } - - VERIFY(0 == nvlist_lookup_string(stream_nvfs, - "name", &stream_fsname)); - VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, - "parentfromsnap", &stream_parent_fromsnap_guid)); - - s1 = strrchr(fsname, '/'); - s2 = strrchr(stream_fsname, '/'); - - /* - * Check if we're going to rename based on parent guid change - * and the current parent guid was also deleted. If it was then - * rename will fail and is likely unneeded, so avoid this and - * force an early retry to determine the new - * parent_fromsnap_guid. - */ - if (stream_parent_fromsnap_guid != 0 && - parent_fromsnap_guid != 0 && - stream_parent_fromsnap_guid != parent_fromsnap_guid) { - sprintf(guidname, "%" PRIu64, parent_fromsnap_guid); - if (nvlist_exists(deleted, guidname)) { - progress = B_TRUE; - needagain = B_TRUE; - goto doagain; - } - } - - /* - * Check for rename. If the exact receive path is specified, it - * does not count as a rename, but we still need to check the - * datasets beneath it. - */ - if ((stream_parent_fromsnap_guid != 0 && - parent_fromsnap_guid != 0 && - stream_parent_fromsnap_guid != parent_fromsnap_guid) || - ((flags->isprefix || strcmp(tofs, fsname) != 0) && - (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { - nvlist_t *parent; - char tryname[ZFS_MAX_DATASET_NAME_LEN]; - - parent = fsavl_find(local_avl, - stream_parent_fromsnap_guid, NULL); - /* - * NB: parent might not be found if we used the - * tosnap for stream_parent_fromsnap_guid, - * because the parent is a newly-created fs; - * we'll be able to rename it after we recv the - * new fs. - */ - if (parent != NULL) { - char *pname; - - VERIFY(0 == nvlist_lookup_string(parent, "name", - &pname)); - (void) snprintf(tryname, sizeof (tryname), - "%s%s", pname, strrchr(stream_fsname, '/')); - } else { - tryname[0] = '\0'; - if (flags->verbose) { - (void) printf("local fs %s new parent " - "not found\n", fsname); - } - } - - newname[0] = '\0'; - - error = recv_rename(hdl, fsname, tryname, - strlen(tofs)+1, newname, flags); - - if (renamed != NULL && newname[0] != '\0') { - VERIFY(0 == nvlist_add_boolean(renamed, - newname)); - } - - if (error) - needagain = B_TRUE; - else - progress = B_TRUE; - } - } - -doagain: - fsavl_destroy(local_avl); - nvlist_free(local_nv); - nvlist_free(deleted); - - if (needagain && progress) { - /* do another pass to fix up temporary names */ - if (flags->verbose) - (void) printf("another pass:\n"); - goto again; - } - - return (needagain); -} - -static int -zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, - recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc, - char **top_zfs, int cleanup_fd, uint64_t *action_handlep) -{ - nvlist_t *stream_nv = NULL; - avl_tree_t *stream_avl = NULL; - char *fromsnap = NULL; - char *sendsnap = NULL; - char *cp; - char tofs[ZFS_MAX_DATASET_NAME_LEN]; - char sendfs[ZFS_MAX_DATASET_NAME_LEN]; - char errbuf[1024]; - dmu_replay_record_t drre; - int error; - boolean_t anyerr = B_FALSE; - boolean_t softerr = B_FALSE; - boolean_t recursive; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive")); - - assert(drr->drr_type == DRR_BEGIN); - assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC); - assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) == - DMU_COMPOUNDSTREAM); - - /* - * Read in the nvlist from the stream. - */ - if (drr->drr_payloadlen != 0) { - error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, - &stream_nv, flags->byteswap, zc); - if (error) { - error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); - goto out; - } - } - - recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == - ENOENT); - - if (recursive && strchr(destname, '@')) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot specify snapshot name for multi-snapshot stream")); - error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); - goto out; - } - - /* - * Read in the end record and verify checksum. - */ - if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre), - flags->byteswap, NULL))) - goto out; - if (flags->byteswap) { - drre.drr_type = BSWAP_32(drre.drr_type); - drre.drr_u.drr_end.drr_checksum.zc_word[0] = - BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]); - drre.drr_u.drr_end.drr_checksum.zc_word[1] = - BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]); - drre.drr_u.drr_end.drr_checksum.zc_word[2] = - BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]); - drre.drr_u.drr_end.drr_checksum.zc_word[3] = - BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]); - } - if (drre.drr_type != DRR_END) { - error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); - goto out; - } - if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "incorrect header checksum")); - error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); - goto out; - } - - (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap); - - if (drr->drr_payloadlen != 0) { - nvlist_t *stream_fss; - - VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", - &stream_fss)); - if ((stream_avl = fsavl_create(stream_fss)) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "couldn't allocate avl tree")); - error = zfs_error(hdl, EZFS_NOMEM, errbuf); - goto out; - } - - if (fromsnap != NULL && recursive) { - nvlist_t *renamed = NULL; - nvpair_t *pair = NULL; - - (void) strlcpy(tofs, destname, sizeof (tofs)); - if (flags->isprefix) { - struct drr_begin *drrb = &drr->drr_u.drr_begin; - int i; - - if (flags->istail) { - cp = strrchr(drrb->drr_toname, '/'); - if (cp == NULL) { - (void) strlcat(tofs, "/", - sizeof (tofs)); - i = 0; - } else { - i = (cp - drrb->drr_toname); - } - } else { - i = strcspn(drrb->drr_toname, "/@"); - } - /* zfs_receive_one() will create_parents() */ - (void) strlcat(tofs, &drrb->drr_toname[i], - sizeof (tofs)); - *strchr(tofs, '@') = '\0'; - } - - if (!flags->dryrun && !flags->nomount) { - VERIFY(0 == nvlist_alloc(&renamed, - NV_UNIQUE_NAME, 0)); - } - - softerr = recv_incremental_replication(hdl, tofs, flags, - stream_nv, stream_avl, renamed); - - /* Unmount renamed filesystems before receiving. */ - while ((pair = nvlist_next_nvpair(renamed, - pair)) != NULL) { - zfs_handle_t *zhp; - prop_changelist_t *clp = NULL; - - zhp = zfs_open(hdl, nvpair_name(pair), - ZFS_TYPE_FILESYSTEM); - if (zhp != NULL) { - clp = changelist_gather(zhp, - ZFS_PROP_MOUNTPOINT, 0, - flags->forceunmount ? MS_FORCE : 0); - zfs_close(zhp); - if (clp != NULL) { - softerr |= - changelist_prefix(clp); - changelist_free(clp); - } - } - } - - nvlist_free(renamed); - } - } - - /* - * Get the fs specified by the first path in the stream (the top level - * specified by 'zfs send') and pass it to each invocation of - * zfs_receive_one(). - */ - (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname, - sizeof (sendfs)); - if ((cp = strchr(sendfs, '@')) != NULL) { - *cp = '\0'; - /* - * Find the "sendsnap", the final snapshot in a replication - * stream. zfs_receive_one() handles certain errors - * differently, depending on if the contained stream is the - * last one or not. - */ - sendsnap = (cp + 1); - } - - /* Finally, receive each contained stream */ - do { - /* - * we should figure out if it has a recoverable - * error, in which case do a recv_skip() and drive on. - * Note, if we fail due to already having this guid, - * zfs_receive_one() will take care of it (ie, - * recv_skip() and return 0). - */ - error = zfs_receive_impl(hdl, destname, NULL, flags, fd, - sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, - action_handlep, sendsnap); - if (error == ENODATA) { - error = 0; - break; - } - anyerr |= error; - } while (error == 0); - - if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) { - /* - * Now that we have the fs's they sent us, try the - * renames again. - */ - softerr = recv_incremental_replication(hdl, tofs, flags, - stream_nv, stream_avl, NULL); - } - -out: - fsavl_destroy(stream_avl); - nvlist_free(stream_nv); - if (softerr) - error = -2; - if (anyerr) - error = -1; - return (error); -} - -static void -trunc_prop_errs(int truncated) -{ - ASSERT(truncated != 0); - - if (truncated == 1) - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "1 more property could not be set\n")); - else - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "%d more properties could not be set\n"), truncated); -} - -static int -recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) -{ - dmu_replay_record_t *drr; - void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive:")); - - /* XXX would be great to use lseek if possible... */ - drr = buf; - - while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t), - byteswap, NULL) == 0) { - if (byteswap) - drr->drr_type = BSWAP_32(drr->drr_type); - - switch (drr->drr_type) { - case DRR_BEGIN: - if (drr->drr_payloadlen != 0) { - (void) recv_read(hdl, fd, buf, - drr->drr_payloadlen, B_FALSE, NULL); - } - break; - - case DRR_END: - free(buf); - return (0); - - case DRR_OBJECT: - if (byteswap) { - drr->drr_u.drr_object.drr_bonuslen = - BSWAP_32(drr->drr_u.drr_object. - drr_bonuslen); - } - (void) recv_read(hdl, fd, buf, - P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), - B_FALSE, NULL); - break; - - case DRR_WRITE: - if (byteswap) { - drr->drr_u.drr_write.drr_logical_size = - BSWAP_64( - drr->drr_u.drr_write.drr_logical_size); - drr->drr_u.drr_write.drr_compressed_size = - BSWAP_64( - drr->drr_u.drr_write.drr_compressed_size); - } - uint64_t payload_size = - DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write); - (void) recv_read(hdl, fd, buf, - payload_size, B_FALSE, NULL); - break; - case DRR_SPILL: - if (byteswap) { - drr->drr_u.drr_spill.drr_length = - BSWAP_64(drr->drr_u.drr_spill.drr_length); - } - (void) recv_read(hdl, fd, buf, - drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); - break; - case DRR_WRITE_EMBEDDED: - if (byteswap) { - drr->drr_u.drr_write_embedded.drr_psize = - BSWAP_32(drr->drr_u.drr_write_embedded. - drr_psize); - } - (void) recv_read(hdl, fd, buf, - P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize, - 8), B_FALSE, NULL); - break; - case DRR_WRITE_BYREF: - case DRR_FREEOBJECTS: - case DRR_FREE: - break; - - default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid record type")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - } - - free(buf); - return (-1); -} - -static void -recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap, - boolean_t resumable) -{ - char target_fs[ZFS_MAX_DATASET_NAME_LEN]; - - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "checksum mismatch or incomplete stream")); - - if (!resumable) - return; - (void) strlcpy(target_fs, target_snap, sizeof (target_fs)); - *strchr(target_fs, '@') = '\0'; - zfs_handle_t *zhp = zfs_open(hdl, target_fs, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) - return; - - char token_buf[ZFS_MAXPROPLEN]; - int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, - token_buf, sizeof (token_buf), - NULL, NULL, 0, B_TRUE); - if (error == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "checksum mismatch or incomplete stream.\n" - "Partially received snapshot is saved.\n" - "A resuming stream can be generated on the sending " - "system by running:\n" - " zfs send -t %s"), - token_buf); - } - zfs_close(zhp); -} - -/* - * Restores a backup of tosnap from the file descriptor specified by infd. - */ -static int -zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, - const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr, - dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, - avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, - uint64_t *action_handlep, const char *finalsnap) -{ - zfs_cmd_t zc = { 0 }; - time_t begin_time; - int ioctl_err, ioctl_errno, err; - char *cp; - struct drr_begin *drrb = &drr->drr_u.drr_begin; - char errbuf[1024]; - char prop_errbuf[1024]; - const char *chopprefix; - boolean_t newfs = B_FALSE; - boolean_t stream_wantsnewfs; - uint64_t parent_snapguid = 0; - prop_changelist_t *clp = NULL; - nvlist_t *snapprops_nvlist = NULL; - zprop_errflags_t prop_errflags; - boolean_t recursive; - char *snapname = NULL; - - begin_time = time(NULL); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive")); - - recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == - ENOENT); - - if (stream_avl != NULL) { - nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, - &snapname); - nvlist_t *props; - int ret; - - (void) nvlist_lookup_uint64(fs, "parentfromsnap", - &parent_snapguid); - err = nvlist_lookup_nvlist(fs, "props", &props); - if (err) - VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); - - if (flags->canmountoff) { - VERIFY(0 == nvlist_add_uint64(props, - zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); - } - ret = zcmd_write_src_nvlist(hdl, &zc, props); - if (err) - nvlist_free(props); - - if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) { - VERIFY(0 == nvlist_lookup_nvlist(props, - snapname, &snapprops_nvlist)); - } - - if (ret != 0) - return (-1); - } - - cp = NULL; - - /* - * Determine how much of the snapshot name stored in the stream - * we are going to tack on to the name they specified on the - * command line, and how much we are going to chop off. - * - * If they specified a snapshot, chop the entire name stored in - * the stream. - */ - if (flags->istail) { - /* - * A filesystem was specified with -e. We want to tack on only - * the tail of the sent snapshot path. - */ - if (strchr(tosnap, '@')) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "argument - snapshot not allowed with -e")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - - chopprefix = strrchr(sendfs, '/'); - - if (chopprefix == NULL) { - /* - * The tail is the poolname, so we need to - * prepend a path separator. - */ - int len = strlen(drrb->drr_toname); - cp = malloc(len + 2); - cp[0] = '/'; - (void) strcpy(&cp[1], drrb->drr_toname); - chopprefix = cp; - } else { - chopprefix = drrb->drr_toname + (chopprefix - sendfs); - } - } else if (flags->isprefix) { - /* - * A filesystem was specified with -d. We want to tack on - * everything but the first element of the sent snapshot path - * (all but the pool name). - */ - if (strchr(tosnap, '@')) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "argument - snapshot not allowed with -d")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - - chopprefix = strchr(drrb->drr_toname, '/'); - if (chopprefix == NULL) - chopprefix = strchr(drrb->drr_toname, '@'); - } else if (strchr(tosnap, '@') == NULL) { - /* - * If a filesystem was specified without -d or -e, we want to - * tack on everything after the fs specified by 'zfs send'. - */ - chopprefix = drrb->drr_toname + strlen(sendfs); - } else { - /* A snapshot was specified as an exact path (no -d or -e). */ - if (recursive) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot specify snapshot name for multi-snapshot " - "stream")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); - } - - ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname); - ASSERT(chopprefix > drrb->drr_toname); - ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname)); - ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' || - chopprefix[0] == '\0'); - - /* - * Determine name of destination snapshot, store in zc_value. - */ - (void) strcpy(zc.zc_value, tosnap); - (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); -#ifdef __FreeBSD__ - if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) - zfs_ioctl_version = get_zfs_ioctl_version(); - /* - * For forward compatibility hide tosnap in zc_value - */ - if (zfs_ioctl_version < ZFS_IOCVER_LZC) - (void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap); -#endif - free(cp); - if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { - zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - - /* - * Determine the name of the origin snapshot, store in zc_string. - */ - if (originsnap) { - (void) strncpy(zc.zc_string, originsnap, sizeof (zc.zc_string)); - if (flags->verbose) - (void) printf("using provided clone origin %s\n", - zc.zc_string); - } else if (drrb->drr_flags & DRR_FLAG_CLONE) { - if (guid_to_name(hdl, zc.zc_value, - drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "local origin for clone %s does not exist"), - zc.zc_value); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } - if (flags->verbose) - (void) printf("found clone origin %s\n", zc.zc_string); - } - - boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_RESUMING; - stream_wantsnewfs = (drrb->drr_fromguid == 0 || - (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; - - if (stream_wantsnewfs) { - /* - * if the parent fs does not exist, look for it based on - * the parent snap GUID - */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive new filesystem stream")); - - (void) strcpy(zc.zc_name, zc.zc_value); - cp = strrchr(zc.zc_name, '/'); - if (cp) - *cp = '\0'; - if (cp && - !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { - char suffix[ZFS_MAX_DATASET_NAME_LEN]; - (void) strcpy(suffix, strrchr(zc.zc_value, '/')); - if (guid_to_name(hdl, zc.zc_name, parent_snapguid, - B_FALSE, zc.zc_value) == 0) { - *strchr(zc.zc_value, '@') = '\0'; - (void) strcat(zc.zc_value, suffix); - } - } - } else { - /* - * If the fs does not exist, look for it based on the - * fromsnap GUID. - */ - if (resuming) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot receive resume stream")); - } else { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot receive incremental stream")); - } - - (void) strcpy(zc.zc_name, zc.zc_value); - *strchr(zc.zc_name, '@') = '\0'; - - /* - * If the exact receive path was specified and this is the - * topmost path in the stream, then if the fs does not exist we - * should look no further. - */ - if ((flags->isprefix || (*(chopprefix = drrb->drr_toname + - strlen(sendfs)) != '\0' && *chopprefix != '@')) && - !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { - char snap[ZFS_MAX_DATASET_NAME_LEN]; - (void) strcpy(snap, strchr(zc.zc_value, '@')); - if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid, - B_FALSE, zc.zc_value) == 0) { - *strchr(zc.zc_value, '@') = '\0'; - (void) strcat(zc.zc_value, snap); - } - } - } - - (void) strcpy(zc.zc_name, zc.zc_value); - *strchr(zc.zc_name, '@') = '\0'; - - if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { - zfs_handle_t *zhp; - - /* - * Destination fs exists. It must be one of these cases: - * - an incremental send stream - * - the stream specifies a new fs (full stream or clone) - * and they want us to blow away the existing fs (and - * have therefore specified -F and removed any snapshots) - * - we are resuming a failed receive. - */ - if (stream_wantsnewfs) { - boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL; - if (!flags->force) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination '%s' exists\n" - "must specify -F to overwrite it"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, - &zc) == 0) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination has snapshots (eg. %s)\n" - "must destroy them to overwrite it"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - if (is_volume && strrchr(zc.zc_name, '/') == NULL) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination '%s' is the root dataset\n" - "cannot overwrite with a ZVOL"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - if (is_volume && - ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, - &zc) == 0) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination has children (eg. %s)\n" - "cannot overwrite with a ZVOL"), - zc.zc_name); - return (zfs_error(hdl, EZFS_WRONG_PARENT, - errbuf)); - } - } - - if ((zhp = zfs_open(hdl, zc.zc_name, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { - zcmd_free_nvlists(&zc); - return (-1); - } - - if (stream_wantsnewfs && - zhp->zfs_dmustats.dds_origin[0]) { - zcmd_free_nvlists(&zc); - zfs_close(zhp); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination '%s' is a clone\n" - "must destroy it to overwrite it"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - - if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && - (stream_wantsnewfs || resuming)) { - /* We can't do online recv in this case */ - clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, - flags->forceunmount ? MS_FORCE : 0); - if (clp == NULL) { - zfs_close(zhp); - zcmd_free_nvlists(&zc); - return (-1); - } - if (changelist_prefix(clp) != 0) { - changelist_free(clp); - zfs_close(zhp); - zcmd_free_nvlists(&zc); - return (-1); - } - } - - /* - * If we are resuming a newfs, set newfs here so that we will - * mount it if the recv succeeds this time. We can tell - * that it was a newfs on the first recv because the fs - * itself will be inconsistent (if the fs existed when we - * did the first recv, we would have received it into - * .../%recv). - */ - if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT)) - newfs = B_TRUE; - - zfs_close(zhp); - } else { - zfs_handle_t *zhp; - - /* - * Destination filesystem does not exist. Therefore we better - * be creating a new filesystem (either from a full backup, or - * a clone). It would therefore be invalid if the user - * specified only the pool name (i.e. if the destination name - * contained no slash character). - */ - if (!stream_wantsnewfs || - (cp = strrchr(zc.zc_name, '/')) == NULL) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination '%s' does not exist"), zc.zc_name); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } - - /* - * Trim off the final dataset component so we perform the - * recvbackup ioctl to the filesystems's parent. - */ - *cp = '\0'; - - if (flags->isprefix && !flags->istail && !flags->dryrun && - create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { - zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); - } - - /* validate parent */ - zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_DATASET); - if (zhp == NULL) { - zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); - } - if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) { - zcmd_free_nvlists(&zc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "parent '%s' is not a filesystem"), zc.zc_name); - zfs_close(zhp); - return (zfs_error(hdl, EZFS_WRONG_PARENT, errbuf)); - } - zfs_close(zhp); - - newfs = B_TRUE; - } - - zc.zc_begin_record = *drr_noswap; - zc.zc_cookie = infd; - zc.zc_guid = flags->force; - zc.zc_resumable = flags->resumable; - if (flags->verbose) { - (void) printf("%s %s stream of %s into %s\n", - flags->dryrun ? "would receive" : "receiving", - drrb->drr_fromguid ? "incremental" : "full", - drrb->drr_toname, zc.zc_value); - (void) fflush(stdout); - } - - if (flags->dryrun) { - zcmd_free_nvlists(&zc); - return (recv_skip(hdl, infd, flags->byteswap)); - } - - zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; - zc.zc_nvlist_dst_size = sizeof (prop_errbuf); - zc.zc_cleanup_fd = cleanup_fd; - zc.zc_action_handle = *action_handlep; - - err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); - ioctl_errno = errno; - prop_errflags = (zprop_errflags_t)zc.zc_obj; - - if (err == 0) { - nvlist_t *prop_errors; - VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, - zc.zc_nvlist_dst_size, &prop_errors, 0)); - - nvpair_t *prop_err = NULL; - - while ((prop_err = nvlist_next_nvpair(prop_errors, - prop_err)) != NULL) { - char tbuf[1024]; - zfs_prop_t prop; - int intval; - - prop = zfs_name_to_prop(nvpair_name(prop_err)); - (void) nvpair_value_int32(prop_err, &intval); - if (strcmp(nvpair_name(prop_err), - ZPROP_N_MORE_ERRORS) == 0) { - trunc_prop_errs(intval); - break; - } else if (snapname == NULL || finalsnap == NULL || - strcmp(finalsnap, snapname) == 0 || - strcmp(nvpair_name(prop_err), - zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) { - /* - * Skip the special case of, for example, - * "refquota", errors on intermediate - * snapshots leading up to a final one. - * That's why we have all of the checks above. - * - * See zfs_ioctl.c's extract_delay_props() for - * a list of props which can fail on - * intermediate snapshots, but shouldn't - * affect the overall receive. - */ - (void) snprintf(tbuf, sizeof (tbuf), - dgettext(TEXT_DOMAIN, - "cannot receive %s property on %s"), - nvpair_name(prop_err), zc.zc_name); - zfs_setprop_error(hdl, prop, intval, tbuf); - } - } - nvlist_free(prop_errors); - } - - zc.zc_nvlist_dst = 0; - zc.zc_nvlist_dst_size = 0; - zcmd_free_nvlists(&zc); - - if (err == 0 && snapprops_nvlist) { - zfs_cmd_t zc2 = { 0 }; - - (void) strcpy(zc2.zc_name, zc.zc_value); - zc2.zc_cookie = B_TRUE; /* received */ - if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) { - (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2); - zcmd_free_nvlists(&zc2); - } - } - - if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) { - /* - * It may be that this snapshot already exists, - * in which case we want to consume & ignore it - * rather than failing. - */ - avl_tree_t *local_avl; - nvlist_t *local_nv, *fs; - cp = strchr(zc.zc_value, '@'); - - /* - * XXX Do this faster by just iterating over snaps in - * this fs. Also if zc_value does not exist, we will - * get a strange "does not exist" error message. - */ - *cp = '\0'; - if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, - B_FALSE, B_FALSE, &local_nv, &local_avl) == 0) { - *cp = '@'; - fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); - fsavl_destroy(local_avl); - nvlist_free(local_nv); - - if (fs != NULL) { - if (flags->verbose) { - (void) printf("snap %s already exists; " - "ignoring\n", zc.zc_value); - } - err = ioctl_err = recv_skip(hdl, infd, - flags->byteswap); - } - } - *cp = '@'; - } - - if (ioctl_err != 0) { - switch (ioctl_errno) { - case ENODEV: - cp = strchr(zc.zc_value, '@'); - *cp = '\0'; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "most recent snapshot of %s does not\n" - "match incremental source"), zc.zc_value); - (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); - *cp = '@'; - break; - case ETXTBSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination %s has been modified\n" - "since most recent snapshot"), zc.zc_name); - (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); - break; - case EEXIST: - cp = strchr(zc.zc_value, '@'); - if (newfs) { - /* it's the containing fs that exists */ - *cp = '\0'; - } - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination already exists")); - (void) zfs_error_fmt(hdl, EZFS_EXISTS, - dgettext(TEXT_DOMAIN, "cannot restore to %s"), - zc.zc_value); - *cp = '@'; - break; - case EINVAL: - (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); - break; - case ECKSUM: - recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable); - (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); - break; - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded to receive this stream.")); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - case EDQUOT: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination %s space quota exceeded"), zc.zc_name); - (void) zfs_error(hdl, EZFS_NOSPC, errbuf); - break; - default: - (void) zfs_standard_error(hdl, ioctl_errno, errbuf); - } - } - - /* - * Mount the target filesystem (if created). Also mount any - * children of the target filesystem if we did a replication - * receive (indicated by stream_avl being non-NULL). - */ - cp = strchr(zc.zc_value, '@'); - if (cp && (ioctl_err == 0 || !newfs)) { - zfs_handle_t *h; - - *cp = '\0'; - h = zfs_open(hdl, zc.zc_value, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (h != NULL) { - if (h->zfs_type == ZFS_TYPE_VOLUME) { - *cp = '@'; - } else if (newfs || stream_avl) { - /* - * Track the first/top of hierarchy fs, - * for mounting and sharing later. - */ - if (top_zfs && *top_zfs == NULL) - *top_zfs = zfs_strdup(hdl, zc.zc_value); - } - zfs_close(h); - } - *cp = '@'; - } - - if (clp) { - if (!flags->nomount) - err |= changelist_postfix(clp); - changelist_free(clp); - } - - if (prop_errflags & ZPROP_ERR_NOCLEAR) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " - "failed to clear unreceived properties on %s"), - zc.zc_name); - (void) fprintf(stderr, "\n"); - } - if (prop_errflags & ZPROP_ERR_NORESTORE) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " - "failed to restore original properties on %s"), - zc.zc_name); - (void) fprintf(stderr, "\n"); - } - - if (err || ioctl_err) - return (-1); - - *action_handlep = zc.zc_action_handle; - - if (flags->verbose) { - char buf1[64]; - char buf2[64]; - uint64_t bytes = zc.zc_cookie; - time_t delta = time(NULL) - begin_time; - if (delta == 0) - delta = 1; - zfs_nicenum(bytes, buf1, sizeof (buf1)); - zfs_nicenum(bytes/delta, buf2, sizeof (buf1)); - - (void) printf("received %sB stream in %lu seconds (%sB/sec)\n", - buf1, delta, buf2); - } - - return (0); -} - -static int -zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, - const char *originsnap, recvflags_t *flags, int infd, const char *sendfs, - nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, - uint64_t *action_handlep, const char *finalsnap) -{ - int err; - dmu_replay_record_t drr, drr_noswap; - struct drr_begin *drrb = &drr.drr_u.drr_begin; - char errbuf[1024]; - zio_cksum_t zcksum = { 0 }; - uint64_t featureflags; - int hdrtype; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive")); - - if (flags->isprefix && - !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs " - "(%s) does not exist"), tosnap); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } - if (originsnap && - !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs " - "(%s) does not exist"), originsnap); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); - } - - /* read in the BEGIN record */ - if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, - &zcksum))) - return (err); - - if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) { - /* It's the double end record at the end of a package */ - return (ENODATA); - } - - /* the kernel needs the non-byteswapped begin record */ - drr_noswap = drr; - - flags->byteswap = B_FALSE; - if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { - /* - * We computed the checksum in the wrong byteorder in - * recv_read() above; do it again correctly. - */ - bzero(&zcksum, sizeof (zio_cksum_t)); - (void) fletcher_4_incremental_byteswap(&drr, - sizeof (drr), &zcksum); - flags->byteswap = B_TRUE; - - drr.drr_type = BSWAP_32(drr.drr_type); - drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); - drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); - drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); - drrb->drr_type = BSWAP_32(drrb->drr_type); - drrb->drr_flags = BSWAP_32(drrb->drr_flags); - drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); - drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); - } - - if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "stream (bad magic number)")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - - featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); - hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo); - - if (!DMU_STREAM_SUPPORTED(featureflags) || - (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "stream has unsupported feature, feature flags = %lx"), - featureflags); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - - if (strchr(drrb->drr_toname, '@') == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "stream (bad snapshot name)")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - - if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) { - char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN]; - if (sendfs == NULL) { - /* - * We were not called from zfs_receive_package(). Get - * the fs specified by 'zfs send'. - */ - char *cp; - (void) strlcpy(nonpackage_sendfs, - drr.drr_u.drr_begin.drr_toname, - sizeof (nonpackage_sendfs)); - if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) - *cp = '\0'; - sendfs = nonpackage_sendfs; - VERIFY(finalsnap == NULL); - } - return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags, - &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, - cleanup_fd, action_handlep, finalsnap)); - } else { - assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == - DMU_COMPOUNDSTREAM); - return (zfs_receive_package(hdl, infd, tosnap, flags, &drr, - &zcksum, top_zfs, cleanup_fd, action_handlep)); - } -} - -/* - * Restores a backup of tosnap from the file descriptor specified by infd. - * Return 0 on total success, -2 if some things couldn't be - * destroyed/renamed/promoted, -1 if some things couldn't be received. - * (-1 will override -2, if -1 and the resumable flag was specified the - * transfer can be resumed if the sending side supports it). - */ -int -zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, - recvflags_t *flags, int infd, avl_tree_t *stream_avl) -{ - char *top_zfs = NULL; - int err; - int cleanup_fd; - uint64_t action_handle = 0; - char *originsnap = NULL; - - if (props) { - err = nvlist_lookup_string(props, "origin", &originsnap); - if (err && err != ENOENT) - return (err); - } - - cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); - VERIFY(cleanup_fd >= 0); - - err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL, - stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL); - - VERIFY(0 == close(cleanup_fd)); - - if (err == 0 && !flags->nomount && top_zfs) { - zfs_handle_t *zhp; - prop_changelist_t *clp; - - zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM); - if (zhp != NULL) { - clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, - CL_GATHER_MOUNT_ALWAYS, - flags->forceunmount ? MS_FORCE : 0); - zfs_close(zhp); - if (clp != NULL) { - /* mount and share received datasets */ - err = changelist_postfix(clp); - changelist_free(clp); - } - } - if (zhp == NULL || clp == NULL || err) - err = -1; - } - if (top_zfs) - free(top_zfs); - - return (err); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c deleted file mode 100644 index d32662022cf5..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c +++ /dev/null @@ -1,511 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - */ - -/* - * This file contains the functions which analyze the status of a pool. This - * include both the status of an active pool, as well as the status exported - * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of - * the pool. This status is independent (to a certain degree) from the state of - * the pool. A pool's state describes only whether or not it is capable of - * providing the necessary fault tolerance for data. The status describes the - * overall status of devices. A pool that is online can still have a device - * that is experiencing errors. - * - * Only a subset of the possible faults can be detected using 'zpool status', - * and not all possible errors correspond to a FMA message ID. The explanation - * is left up to the caller, depending on whether it is a live pool or an - * import. - */ - -#include <libzfs.h> -#include <string.h> -#include <unistd.h> -#include "libzfs_impl.h" -#include "zfeature_common.h" - -/* - * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines - * in libzfs.h. Note that there are some status results which go past the end - * of this table, and hence have no associated message ID. - */ -static char *zfs_msgid_table[] = { - "ZFS-8000-14", /* ZPOOL_STATUS_CORRUPT_CACHE */ - "ZFS-8000-2Q", /* ZPOOL_STATUS_MISSING_DEV_R */ - "ZFS-8000-3C", /* ZPOOL_STATUS_MISSING_DEV_NR */ - "ZFS-8000-4J", /* ZPOOL_STATUS_CORRUPT_LABEL_R */ - "ZFS-8000-5E", /* ZPOOL_STATUS_CORRUPT_LABEL_NR */ - "ZFS-8000-6X", /* ZPOOL_STATUS_BAD_GUID_SUM */ - "ZFS-8000-72", /* ZPOOL_STATUS_CORRUPT_POOL */ - "ZFS-8000-8A", /* ZPOOL_STATUS_CORRUPT_DATA */ - "ZFS-8000-9P", /* ZPOOL_STATUS_FAILING_DEV */ - "ZFS-8000-A5", /* ZPOOL_STATUS_VERSION_NEWER */ - "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_MISMATCH */ - "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_ACTIVE */ - "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_REQUIRED */ - "ZFS-8000-HC", /* ZPOOL_STATUS_IO_FAILURE_WAIT */ - "ZFS-8000-JQ", /* ZPOOL_STATUS_IO_FAILURE_CONTINUE */ - "ZFS-8000-MM", /* ZPOOL_STATUS_IO_FAILURE_MMP */ - "ZFS-8000-K4", /* ZPOOL_STATUS_BAD_LOG */ - /* - * The following results have no message ID. - * ZPOOL_STATUS_UNSUP_FEAT_READ - * ZPOOL_STATUS_UNSUP_FEAT_WRITE - * ZPOOL_STATUS_FAULTED_DEV_R - * ZPOOL_STATUS_FAULTED_DEV_NR - * ZPOOL_STATUS_VERSION_OLDER - * ZPOOL_STATUS_FEAT_DISABLED - * ZPOOL_STATUS_RESILVERING - * ZPOOL_STATUS_OFFLINE_DEV - * ZPOOL_STATUS_REMOVED_DEV - * ZPOOL_STATUS_OK - */ -}; - -#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) - -/* ARGSUSED */ -static int -vdev_missing(vdev_stat_t *vs, uint_t vsc) -{ - return (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_OPEN_FAILED); -} - -/* ARGSUSED */ -static int -vdev_faulted(vdev_stat_t *vs, uint_t vsc) -{ - return (vs->vs_state == VDEV_STATE_FAULTED); -} - -/* ARGSUSED */ -static int -vdev_errors(vdev_stat_t *vs, uint_t vsc) -{ - return (vs->vs_state == VDEV_STATE_DEGRADED || - vs->vs_read_errors != 0 || vs->vs_write_errors != 0 || - vs->vs_checksum_errors != 0); -} - -/* ARGSUSED */ -static int -vdev_broken(vdev_stat_t *vs, uint_t vsc) -{ - return (vs->vs_state == VDEV_STATE_CANT_OPEN); -} - -/* ARGSUSED */ -static int -vdev_offlined(vdev_stat_t *vs, uint_t vsc) -{ - return (vs->vs_state == VDEV_STATE_OFFLINE); -} - -/* ARGSUSED */ -static int -vdev_removed(vdev_stat_t *vs, uint_t vsc) -{ - return (vs->vs_state == VDEV_STATE_REMOVED); -} - -static int -vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) -{ - return (VDEV_STAT_VALID(vs_physical_ashift, vsc) && - vs->vs_configured_ashift < vs->vs_physical_ashift); -} - -/* - * Detect if any leaf devices that have seen errors or could not be opened. - */ -static boolean_t -find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), - boolean_t ignore_replacing) -{ - nvlist_t **child; - vdev_stat_t *vs; - uint_t c, vsc, children; - - /* - * Ignore problems within a 'replacing' vdev, since we're presumably in - * the process of repairing any such errors, and don't want to call them - * out again. We'll pick up the fact that a resilver is happening - * later. - */ - if (ignore_replacing == B_TRUE) { - char *type; - - verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, - &type) == 0); - if (strcmp(type, VDEV_TYPE_REPLACING) == 0) - return (B_FALSE); - } - - if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) - return (B_TRUE); - } else { - verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0); - - if (func(vs, vsc) != 0) - return (B_TRUE); - } - - /* - * Check any L2 cache devs - */ - if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, - &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) - return (B_TRUE); - } - - return (B_FALSE); -} - -/* - * Active pool health status. - * - * To determine the status for a pool, we make several passes over the config, - * picking the most egregious error we find. In order of importance, we do the - * following: - * - * - Check for a complete and valid configuration - * - Look for any faulted or missing devices in a non-replicated config - * - Check for any data errors - * - Check for any faulted or missing devices in a replicated config - * - Look for any devices showing errors - * - Check for any resilvering devices - * - * There can obviously be multiple errors within a single pool, so this routine - * only picks the most damaging of all the current errors to report. - */ -static zpool_status_t -check_status(nvlist_t *config, boolean_t isimport) -{ - nvlist_t *nvroot; - vdev_stat_t *vs; - pool_scan_stat_t *ps = NULL; - uint_t vsc, psc; - uint64_t nerr; - uint64_t version; - uint64_t stateval; - uint64_t suspended; - uint64_t hostid = 0; - unsigned long system_hostid = get_system_hostid(); - - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, - &version) == 0); - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &vsc) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, - &stateval) == 0); - - /* - * Currently resilvering a vdev - */ - (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, - (uint64_t **)&ps, &psc); - if (ps != NULL && ps->pss_func == POOL_SCAN_RESILVER && - ps->pss_state == DSS_SCANNING) - return (ZPOOL_STATUS_RESILVERING); - - /* - * The multihost property is set and the pool may be active. - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_ACTIVE) { - mmp_state_t mmp_state; - nvlist_t *nvinfo; - - nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); - mmp_state = fnvlist_lookup_uint64(nvinfo, - ZPOOL_CONFIG_MMP_STATE); - - if (mmp_state == MMP_STATE_ACTIVE) - return (ZPOOL_STATUS_HOSTID_ACTIVE); - else if (mmp_state == MMP_STATE_NO_HOSTID) - return (ZPOOL_STATUS_HOSTID_REQUIRED); - else - return (ZPOOL_STATUS_HOSTID_MISMATCH); - } - - /* - * Pool last accessed by another system. - */ - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); - if (hostid != 0 && (unsigned long)hostid != system_hostid && - stateval == POOL_STATE_ACTIVE) - return (ZPOOL_STATUS_HOSTID_MISMATCH); - - /* - * Newer on-disk version. - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_VERSION_NEWER) - return (ZPOOL_STATUS_VERSION_NEWER); - - /* - * Unsupported feature(s). - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_UNSUP_FEAT) { - nvlist_t *nvinfo; - - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, - &nvinfo) == 0); - if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) - return (ZPOOL_STATUS_UNSUP_FEAT_WRITE); - return (ZPOOL_STATUS_UNSUP_FEAT_READ); - } - - /* - * Check that the config is complete. - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) - return (ZPOOL_STATUS_BAD_GUID_SUM); - - /* - * Check whether the pool has suspended. - */ - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, - &suspended) == 0) { - uint64_t reason; - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON, - &reason) == 0 && reason == ZIO_SUSPEND_MMP) - return (ZPOOL_STATUS_IO_FAILURE_MMP); - - if (suspended == ZIO_FAILURE_MODE_CONTINUE) - return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); - return (ZPOOL_STATUS_IO_FAILURE_WAIT); - } - - /* - * Could not read a log. - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_BAD_LOG) { - return (ZPOOL_STATUS_BAD_LOG); - } - - /* - * Bad devices in non-replicated config. - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) - return (ZPOOL_STATUS_FAULTED_DEV_NR); - - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_missing, B_TRUE)) - return (ZPOOL_STATUS_MISSING_DEV_NR); - - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_broken, B_TRUE)) - return (ZPOOL_STATUS_CORRUPT_LABEL_NR); - - /* - * Corrupted pool metadata - */ - if (vs->vs_state == VDEV_STATE_CANT_OPEN && - vs->vs_aux == VDEV_AUX_CORRUPT_DATA) - return (ZPOOL_STATUS_CORRUPT_POOL); - - /* - * Persistent data errors. - */ - if (!isimport) { - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, - &nerr) == 0 && nerr != 0) - return (ZPOOL_STATUS_CORRUPT_DATA); - } - - /* - * Missing devices in a replicated config. - */ - if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) - return (ZPOOL_STATUS_FAULTED_DEV_R); - if (find_vdev_problem(nvroot, vdev_missing, B_TRUE)) - return (ZPOOL_STATUS_MISSING_DEV_R); - if (find_vdev_problem(nvroot, vdev_broken, B_TRUE)) - return (ZPOOL_STATUS_CORRUPT_LABEL_R); - - /* - * Devices with errors - */ - if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE)) - return (ZPOOL_STATUS_FAILING_DEV); - - /* - * Offlined devices - */ - if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE)) - return (ZPOOL_STATUS_OFFLINE_DEV); - - /* - * Removed device - */ - if (find_vdev_problem(nvroot, vdev_removed, B_TRUE)) - return (ZPOOL_STATUS_REMOVED_DEV); - - /* - * Suboptimal, but usable, ashift configuration. - */ - if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE)) - return (ZPOOL_STATUS_NON_NATIVE_ASHIFT); - - /* - * Outdated, but usable, version - */ - if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) - return (ZPOOL_STATUS_VERSION_OLDER); - - /* - * Usable pool with disabled features - */ - if (version >= SPA_VERSION_FEATURES) { - int i; - nvlist_t *feat; - - if (isimport) { - feat = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_LOAD_INFO); - if (nvlist_exists(feat, ZPOOL_CONFIG_ENABLED_FEAT)) - feat = fnvlist_lookup_nvlist(feat, - ZPOOL_CONFIG_ENABLED_FEAT); - } else { - feat = fnvlist_lookup_nvlist(config, - ZPOOL_CONFIG_FEATURE_STATS); - } - - for (i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t *fi = &spa_feature_table[i]; - if (!nvlist_exists(feat, fi->fi_guid)) - return (ZPOOL_STATUS_FEAT_DISABLED); - } - } - - return (ZPOOL_STATUS_OK); -} - -zpool_status_t -zpool_get_status(zpool_handle_t *zhp, char **msgid) -{ - zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); - - if (ret >= NMSGID) - *msgid = NULL; - else - *msgid = zfs_msgid_table[ret]; - - return (ret); -} - -zpool_status_t -zpool_import_status(nvlist_t *config, char **msgid) -{ - zpool_status_t ret = check_status(config, B_TRUE); - - if (ret >= NMSGID) - *msgid = NULL; - else - *msgid = zfs_msgid_table[ret]; - - return (ret); -} - -static void -dump_ddt_stat(const ddt_stat_t *dds, int h) -{ - char refcnt[6]; - char blocks[6], lsize[6], psize[6], dsize[6]; - char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; - - if (dds == NULL || dds->dds_blocks == 0) - return; - - if (h == -1) - (void) strcpy(refcnt, "Total"); - else - zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); - - zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); - zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); - zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); - zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); - zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); - zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); - zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); - zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); - - (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", - refcnt, - blocks, lsize, psize, dsize, - ref_blocks, ref_lsize, ref_psize, ref_dsize); -} - -/* - * Print the DDT histogram and the column totals. - */ -void -zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) -{ - int h; - - (void) printf("\n"); - - (void) printf("bucket " - " allocated " - " referenced \n"); - (void) printf("______ " - "______________________________ " - "______________________________\n"); - - (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", - "refcnt", - "blocks", "LSIZE", "PSIZE", "DSIZE", - "blocks", "LSIZE", "PSIZE", "DSIZE"); - - (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", - "------", - "------", "-----", "-----", "-----", - "------", "-----", "-----", "-----"); - - for (h = 0; h < 64; h++) - dump_ddt_stat(&ddh->ddh_stat[h], h); - - dump_ddt_stat(dds_total, -1); - - (void) printf("\n"); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c deleted file mode 100644 index 4439bcbbee57..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c +++ /dev/null @@ -1,1661 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - * Copyright (c) 2017 Datto Inc. - */ - -/* - * Internal utility routines for the ZFS library. - */ - -#include <sys/param.h> -#include <sys/linker.h> -#include <sys/module.h> -#include <sys/stat.h> - -#include <errno.h> -#include <fcntl.h> -#include <libintl.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> -#include <unistd.h> -#include <ctype.h> -#include <math.h> -#include <sys/mnttab.h> -#include <sys/mntent.h> -#include <sys/types.h> -#include <libcmdutils.h> - -#include <libzfs.h> -#include <libzfs_core.h> - -#include "libzfs_impl.h" -#include "zfs_prop.h" -#include "zfs_comutil.h" -#include "zfeature_common.h" - - -int -libzfs_errno(libzfs_handle_t *hdl) -{ - return (hdl->libzfs_error); -} - -const char * -libzfs_error_action(libzfs_handle_t *hdl) -{ - return (hdl->libzfs_action); -} - -const char * -libzfs_error_description(libzfs_handle_t *hdl) -{ - if (hdl->libzfs_desc[0] != '\0') - return (hdl->libzfs_desc); - - switch (hdl->libzfs_error) { - case EZFS_NOMEM: - return (dgettext(TEXT_DOMAIN, "out of memory")); - case EZFS_BADPROP: - return (dgettext(TEXT_DOMAIN, "invalid property value")); - case EZFS_PROPREADONLY: - return (dgettext(TEXT_DOMAIN, "read-only property")); - case EZFS_PROPTYPE: - return (dgettext(TEXT_DOMAIN, "property doesn't apply to " - "datasets of this type")); - case EZFS_PROPNONINHERIT: - return (dgettext(TEXT_DOMAIN, "property cannot be inherited")); - case EZFS_PROPSPACE: - return (dgettext(TEXT_DOMAIN, "invalid quota or reservation")); - case EZFS_BADTYPE: - return (dgettext(TEXT_DOMAIN, "operation not applicable to " - "datasets of this type")); - case EZFS_BUSY: - return (dgettext(TEXT_DOMAIN, "pool or dataset is busy")); - case EZFS_EXISTS: - return (dgettext(TEXT_DOMAIN, "pool or dataset exists")); - case EZFS_NOENT: - return (dgettext(TEXT_DOMAIN, "no such pool or dataset")); - case EZFS_BADSTREAM: - return (dgettext(TEXT_DOMAIN, "invalid backup stream")); - case EZFS_DSREADONLY: - return (dgettext(TEXT_DOMAIN, "dataset is read-only")); - case EZFS_VOLTOOBIG: - return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for " - "this system")); - case EZFS_INVALIDNAME: - return (dgettext(TEXT_DOMAIN, "invalid name")); - case EZFS_BADRESTORE: - return (dgettext(TEXT_DOMAIN, "unable to restore to " - "destination")); - case EZFS_BADBACKUP: - return (dgettext(TEXT_DOMAIN, "backup failed")); - case EZFS_BADTARGET: - return (dgettext(TEXT_DOMAIN, "invalid target vdev")); - case EZFS_NODEVICE: - return (dgettext(TEXT_DOMAIN, "no such device in pool")); - case EZFS_BADDEV: - return (dgettext(TEXT_DOMAIN, "invalid device")); - case EZFS_NOREPLICAS: - return (dgettext(TEXT_DOMAIN, "no valid replicas")); - case EZFS_RESILVERING: - return (dgettext(TEXT_DOMAIN, "currently resilvering")); - case EZFS_BADVERSION: - return (dgettext(TEXT_DOMAIN, "unsupported version or " - "feature")); - case EZFS_POOLUNAVAIL: - return (dgettext(TEXT_DOMAIN, "pool is unavailable")); - case EZFS_DEVOVERFLOW: - return (dgettext(TEXT_DOMAIN, "too many devices in one vdev")); - case EZFS_BADPATH: - return (dgettext(TEXT_DOMAIN, "must be an absolute path")); - case EZFS_CROSSTARGET: - return (dgettext(TEXT_DOMAIN, "operation crosses datasets or " - "pools")); - case EZFS_ZONED: - return (dgettext(TEXT_DOMAIN, "dataset in use by local zone")); - case EZFS_MOUNTFAILED: - return (dgettext(TEXT_DOMAIN, "mount failed")); - case EZFS_UMOUNTFAILED: - return (dgettext(TEXT_DOMAIN, "umount failed")); - case EZFS_UNSHARENFSFAILED: - return (dgettext(TEXT_DOMAIN, "unshare(1M) failed")); - case EZFS_SHARENFSFAILED: - return (dgettext(TEXT_DOMAIN, "share(1M) failed")); - case EZFS_UNSHARESMBFAILED: - return (dgettext(TEXT_DOMAIN, "smb remove share failed")); - case EZFS_SHARESMBFAILED: - return (dgettext(TEXT_DOMAIN, "smb add share failed")); - case EZFS_PERM: - return (dgettext(TEXT_DOMAIN, "permission denied")); - case EZFS_NOSPC: - return (dgettext(TEXT_DOMAIN, "out of space")); - case EZFS_FAULT: - return (dgettext(TEXT_DOMAIN, "bad address")); - case EZFS_IO: - return (dgettext(TEXT_DOMAIN, "I/O error")); - case EZFS_INTR: - return (dgettext(TEXT_DOMAIN, "signal received")); - case EZFS_ISSPARE: - return (dgettext(TEXT_DOMAIN, "device is reserved as a hot " - "spare")); - case EZFS_INVALCONFIG: - return (dgettext(TEXT_DOMAIN, "invalid vdev configuration")); - case EZFS_RECURSIVE: - return (dgettext(TEXT_DOMAIN, "recursive dataset dependency")); - case EZFS_NOHISTORY: - return (dgettext(TEXT_DOMAIN, "no history available")); - case EZFS_POOLPROPS: - return (dgettext(TEXT_DOMAIN, "failed to retrieve " - "pool properties")); - case EZFS_POOL_NOTSUP: - return (dgettext(TEXT_DOMAIN, "operation not supported " - "on this type of pool")); - case EZFS_POOL_INVALARG: - return (dgettext(TEXT_DOMAIN, "invalid argument for " - "this pool operation")); - case EZFS_NAMETOOLONG: - return (dgettext(TEXT_DOMAIN, "dataset name is too long")); - case EZFS_OPENFAILED: - return (dgettext(TEXT_DOMAIN, "open failed")); - case EZFS_NOCAP: - return (dgettext(TEXT_DOMAIN, - "disk capacity information could not be retrieved")); - case EZFS_LABELFAILED: - return (dgettext(TEXT_DOMAIN, "write of label failed")); - case EZFS_BADWHO: - return (dgettext(TEXT_DOMAIN, "invalid user/group")); - case EZFS_BADPERM: - return (dgettext(TEXT_DOMAIN, "invalid permission")); - case EZFS_BADPERMSET: - return (dgettext(TEXT_DOMAIN, "invalid permission set name")); - case EZFS_NODELEGATION: - return (dgettext(TEXT_DOMAIN, "delegated administration is " - "disabled on pool")); - case EZFS_BADCACHE: - return (dgettext(TEXT_DOMAIN, "invalid or missing cache file")); - case EZFS_ISL2CACHE: - return (dgettext(TEXT_DOMAIN, "device is in use as a cache")); - case EZFS_VDEVNOTSUP: - return (dgettext(TEXT_DOMAIN, "vdev specification is not " - "supported")); - case EZFS_NOTSUP: - return (dgettext(TEXT_DOMAIN, "operation not supported " - "on this dataset")); - case EZFS_IOC_NOTSUPPORTED: - return (dgettext(TEXT_DOMAIN, "operation not supported by " - "zfs kernel module")); - case EZFS_ACTIVE_SPARE: - return (dgettext(TEXT_DOMAIN, "pool has active shared spare " - "device")); - case EZFS_UNPLAYED_LOGS: - return (dgettext(TEXT_DOMAIN, "log device has unplayed intent " - "logs")); - case EZFS_REFTAG_RELE: - return (dgettext(TEXT_DOMAIN, "no such tag on this dataset")); - case EZFS_REFTAG_HOLD: - return (dgettext(TEXT_DOMAIN, "tag already exists on this " - "dataset")); - case EZFS_TAGTOOLONG: - return (dgettext(TEXT_DOMAIN, "tag too long")); - case EZFS_PIPEFAILED: - return (dgettext(TEXT_DOMAIN, "pipe create failed")); - case EZFS_THREADCREATEFAILED: - return (dgettext(TEXT_DOMAIN, "thread create failed")); - case EZFS_POSTSPLIT_ONLINE: - return (dgettext(TEXT_DOMAIN, "disk was split from this pool " - "into a new one")); - case EZFS_SCRUB_PAUSED: - return (dgettext(TEXT_DOMAIN, "scrub is paused; " - "use 'zpool scrub' to resume")); - case EZFS_SCRUBBING: - return (dgettext(TEXT_DOMAIN, "currently scrubbing; " - "use 'zpool scrub -s' to cancel current scrub")); - case EZFS_NO_SCRUB: - return (dgettext(TEXT_DOMAIN, "there is no active scrub")); - case EZFS_DIFF: - return (dgettext(TEXT_DOMAIN, "unable to generate diffs")); - case EZFS_DIFFDATA: - return (dgettext(TEXT_DOMAIN, "invalid diff data")); - case EZFS_POOLREADONLY: - return (dgettext(TEXT_DOMAIN, "pool is read-only")); - case EZFS_NO_PENDING: - return (dgettext(TEXT_DOMAIN, "operation is not " - "in progress")); - case EZFS_CHECKPOINT_EXISTS: - return (dgettext(TEXT_DOMAIN, "checkpoint exists")); - case EZFS_DISCARDING_CHECKPOINT: - return (dgettext(TEXT_DOMAIN, "currently discarding " - "checkpoint")); - case EZFS_NO_CHECKPOINT: - return (dgettext(TEXT_DOMAIN, "checkpoint does not exist")); - case EZFS_DEVRM_IN_PROGRESS: - return (dgettext(TEXT_DOMAIN, "device removal in progress")); - case EZFS_VDEV_TOO_BIG: - return (dgettext(TEXT_DOMAIN, "device exceeds supported size")); - case EZFS_ACTIVE_POOL: - return (dgettext(TEXT_DOMAIN, "pool is imported on a " - "different host")); - case EZFS_TOOMANY: - return (dgettext(TEXT_DOMAIN, "argument list too long")); - case EZFS_INITIALIZING: - return (dgettext(TEXT_DOMAIN, "currently initializing")); - case EZFS_NO_INITIALIZE: - return (dgettext(TEXT_DOMAIN, "there is no active " - "initialization")); - case EZFS_WRONG_PARENT: - return (dgettext(TEXT_DOMAIN, "invalid parent dataset")); - case EZFS_UNKNOWN: - return (dgettext(TEXT_DOMAIN, "unknown error")); - default: - assert(hdl->libzfs_error == 0); - return (dgettext(TEXT_DOMAIN, "no error")); - } -} - -/*PRINTFLIKE2*/ -void -zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - - (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc), - fmt, ap); - hdl->libzfs_desc_active = 1; - - va_end(ap); -} - -static void -zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) -{ - (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action), - fmt, ap); - hdl->libzfs_error = error; - - if (hdl->libzfs_desc_active) - hdl->libzfs_desc_active = 0; - else - hdl->libzfs_desc[0] = '\0'; - - if (hdl->libzfs_printerr) { - if (error == EZFS_UNKNOWN) { - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal " - "error: %s\n"), libzfs_error_description(hdl)); - abort(); - } - - (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action, - libzfs_error_description(hdl)); - if (error == EZFS_NOMEM) - exit(1); - } -} - -int -zfs_error(libzfs_handle_t *hdl, int error, const char *msg) -{ - return (zfs_error_fmt(hdl, error, "%s", msg)); -} - -/*PRINTFLIKE3*/ -int -zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - - zfs_verror(hdl, error, fmt, ap); - - va_end(ap); - - return (-1); -} - -static int -zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt, - va_list ap) -{ - switch (error) { - case EPERM: - case EACCES: - zfs_verror(hdl, EZFS_PERM, fmt, ap); - return (-1); - - case ECANCELED: - zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap); - return (-1); - - case EIO: - zfs_verror(hdl, EZFS_IO, fmt, ap); - return (-1); - - case EFAULT: - zfs_verror(hdl, EZFS_FAULT, fmt, ap); - return (-1); - - case EINTR: - zfs_verror(hdl, EZFS_INTR, fmt, ap); - return (-1); - } - - return (0); -} - -int -zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg) -{ - return (zfs_standard_error_fmt(hdl, error, "%s", msg)); -} - -/*PRINTFLIKE3*/ -int -zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - - if (zfs_common_error(hdl, error, fmt, ap) != 0) { - va_end(ap); - return (-1); - } - - switch (error) { - case ENXIO: - case ENODEV: - case EPIPE: - zfs_verror(hdl, EZFS_IO, fmt, ap); - break; - - case ENOENT: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset does not exist")); - zfs_verror(hdl, EZFS_NOENT, fmt, ap); - break; - - case ENOSPC: - case EDQUOT: - zfs_verror(hdl, EZFS_NOSPC, fmt, ap); - va_end(ap); - return (-1); - - case EEXIST: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset already exists")); - zfs_verror(hdl, EZFS_EXISTS, fmt, ap); - break; - - case EBUSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset is busy")); - zfs_verror(hdl, EZFS_BUSY, fmt, ap); - break; - case EROFS: - zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); - break; - case ENAMETOOLONG: - zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap); - break; - case ENOTSUP: - zfs_verror(hdl, EZFS_BADVERSION, fmt, ap); - break; - case EAGAIN: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool I/O is currently suspended")); - zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); - break; - case EREMOTEIO: - zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); - break; - case ZFS_ERR_IOC_CMD_UNAVAIL: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " - "module does not support this operation. A reboot may " - "be required to enable this operation.")); - zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); - break; - case ZFS_ERR_IOC_ARG_UNAVAIL: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " - "module does not support an option for this operation. " - "A reboot may be required to enable this option.")); - zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); - break; - case ZFS_ERR_IOC_ARG_REQUIRED: - case ZFS_ERR_IOC_ARG_BADTYPE: - zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); - break; - default: - zfs_error_aux(hdl, strerror(error)); - zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); - break; - } - - va_end(ap); - return (-1); -} - -int -zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg) -{ - return (zpool_standard_error_fmt(hdl, error, "%s", msg)); -} - -/*PRINTFLIKE3*/ -int -zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - - if (zfs_common_error(hdl, error, fmt, ap) != 0) { - va_end(ap); - return (-1); - } - - switch (error) { - case ENODEV: - zfs_verror(hdl, EZFS_NODEVICE, fmt, ap); - break; - - case ENOENT: - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "no such pool or dataset")); - zfs_verror(hdl, EZFS_NOENT, fmt, ap); - break; - - case EEXIST: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool already exists")); - zfs_verror(hdl, EZFS_EXISTS, fmt, ap); - break; - - case EBUSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy")); - zfs_verror(hdl, EZFS_BUSY, fmt, ap); - break; - - case ENXIO: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more devices is currently unavailable")); - zfs_verror(hdl, EZFS_BADDEV, fmt, ap); - break; - - case ENAMETOOLONG: - zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap); - break; - - case ENOTSUP: - zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap); - break; - - case EINVAL: - zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap); - break; - - case ENOSPC: - case EDQUOT: - zfs_verror(hdl, EZFS_NOSPC, fmt, ap); - va_end(ap); - return (-1); - - case EAGAIN: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool I/O is currently suspended")); - zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); - break; - - case EROFS: - zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); - break; - /* There is no pending operation to cancel */ - case ESRCH: - zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap); - break; - case EREMOTEIO: - zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); - break; - case ZFS_ERR_CHECKPOINT_EXISTS: - zfs_verror(hdl, EZFS_CHECKPOINT_EXISTS, fmt, ap); - break; - case ZFS_ERR_DISCARDING_CHECKPOINT: - zfs_verror(hdl, EZFS_DISCARDING_CHECKPOINT, fmt, ap); - break; - case ZFS_ERR_NO_CHECKPOINT: - zfs_verror(hdl, EZFS_NO_CHECKPOINT, fmt, ap); - break; - case ZFS_ERR_DEVRM_IN_PROGRESS: - zfs_verror(hdl, EZFS_DEVRM_IN_PROGRESS, fmt, ap); - break; - case ZFS_ERR_VDEV_TOO_BIG: - zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap); - break; - case ZFS_ERR_WRONG_PARENT: - zfs_verror(hdl, EZFS_WRONG_PARENT, fmt, ap); - break; - case ZFS_ERR_IOC_CMD_UNAVAIL: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " - "module does not support this operation. A reboot may " - "be required to enable this operation.")); - zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); - break; - case ZFS_ERR_IOC_ARG_UNAVAIL: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " - "module does not support an option for this operation. " - "A reboot may be required to enable this option.")); - zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); - break; - case ZFS_ERR_IOC_ARG_REQUIRED: - case ZFS_ERR_IOC_ARG_BADTYPE: - zfs_verror(hdl, EZFS_IOC_NOTSUPPORTED, fmt, ap); - break; - default: - zfs_error_aux(hdl, strerror(error)); - zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); - } - - va_end(ap); - return (-1); -} - -/* - * Display an out of memory error message and abort the current program. - */ -int -no_memory(libzfs_handle_t *hdl) -{ - return (zfs_error(hdl, EZFS_NOMEM, "internal error")); -} - -/* - * A safe form of malloc() which will die if the allocation fails. - */ -void * -zfs_alloc(libzfs_handle_t *hdl, size_t size) -{ - void *data; - - if ((data = calloc(1, size)) == NULL) - (void) no_memory(hdl); - - return (data); -} - -/* - * A safe form of asprintf() which will die if the allocation fails. - */ -/*PRINTFLIKE2*/ -char * -zfs_asprintf(libzfs_handle_t *hdl, const char *fmt, ...) -{ - va_list ap; - char *ret; - int err; - - va_start(ap, fmt); - - err = vasprintf(&ret, fmt, ap); - - va_end(ap); - - if (err < 0) - (void) no_memory(hdl); - - return (ret); -} - -/* - * A safe form of realloc(), which also zeroes newly allocated space. - */ -void * -zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize) -{ - void *ret; - - if ((ret = realloc(ptr, newsize)) == NULL) { - (void) no_memory(hdl); - return (NULL); - } - - bzero((char *)ret + oldsize, (newsize - oldsize)); - return (ret); -} - -/* - * A safe form of strdup() which will die if the allocation fails. - */ -char * -zfs_strdup(libzfs_handle_t *hdl, const char *str) -{ - char *ret; - - if ((ret = strdup(str)) == NULL) - (void) no_memory(hdl); - - return (ret); -} - -/* - * Convert a number to an appropriately human-readable output. - */ -void -zfs_nicenum(uint64_t num, char *buf, size_t buflen) -{ - nicenum(num, buf, buflen); -} - -void -libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr) -{ - hdl->libzfs_printerr = printerr; -} - -static int -libzfs_load(void) -{ - int error; - - if (modfind("zfs") < 0) { - /* Not present in kernel, try loading it. */ - if (kldload("zfs") < 0 || modfind("zfs") < 0) { - if (errno != EEXIST) - return (-1); - } - } - return (0); -} - -libzfs_handle_t * -libzfs_init(void) -{ - libzfs_handle_t *hdl; - - if ((hdl = calloc(1, sizeof (libzfs_handle_t))) == NULL) { - return (NULL); - } - - if (libzfs_load() < 0) { - free(hdl); - return (NULL); - } - - if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { - free(hdl); - return (NULL); - } - - if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) { - (void) close(hdl->libzfs_fd); - free(hdl); - return (NULL); - } - - hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r"); - - if (libzfs_core_init() != 0) { - (void) close(hdl->libzfs_fd); - (void) fclose(hdl->libzfs_mnttab); - (void) fclose(hdl->libzfs_sharetab); - free(hdl); - return (NULL); - } - - zfs_prop_init(); - zpool_prop_init(); - zpool_feature_init(); - libzfs_mnttab_init(hdl); - - if (getenv("ZFS_PROP_DEBUG") != NULL) { - hdl->libzfs_prop_debug = B_TRUE; - } - - return (hdl); -} - -void -libzfs_fini(libzfs_handle_t *hdl) -{ - (void) close(hdl->libzfs_fd); - if (hdl->libzfs_mnttab) - (void) fclose(hdl->libzfs_mnttab); - if (hdl->libzfs_sharetab) - (void) fclose(hdl->libzfs_sharetab); - zfs_uninit_libshare(hdl); - zpool_free_handles(hdl); -#ifdef illumos - libzfs_fru_clear(hdl, B_TRUE); -#endif - namespace_clear(hdl); - libzfs_mnttab_fini(hdl); - libzfs_core_fini(); - free(hdl); -} - -libzfs_handle_t * -zpool_get_handle(zpool_handle_t *zhp) -{ - return (zhp->zpool_hdl); -} - -libzfs_handle_t * -zfs_get_handle(zfs_handle_t *zhp) -{ - return (zhp->zfs_hdl); -} - -zpool_handle_t * -zfs_get_pool_handle(const zfs_handle_t *zhp) -{ - return (zhp->zpool_hdl); -} - -/* - * Given a name, determine whether or not it's a valid path - * (starts with '/' or "./"). If so, walk the mnttab trying - * to match the device number. If not, treat the path as an - * fs/vol/snap/bkmark name. - */ -zfs_handle_t * -zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype) -{ - struct stat64 statbuf; - struct extmnttab entry; - int ret; - - if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) { - /* - * It's not a valid path, assume it's a name of type 'argtype'. - */ - return (zfs_open(hdl, path, argtype)); - } - - if (stat64(path, &statbuf) != 0) { - (void) fprintf(stderr, "%s: %s\n", path, strerror(errno)); - return (NULL); - } - -#ifdef illumos - rewind(hdl->libzfs_mnttab); - while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) { - if (makedevice(entry.mnt_major, entry.mnt_minor) == - statbuf.st_dev) { - break; - } - } -#else - { - struct statfs sfs; - - ret = statfs(path, &sfs); - if (ret == 0) - statfs2mnttab(&sfs, &entry); - else { - (void) fprintf(stderr, "%s: %s\n", path, - strerror(errno)); - } - } -#endif /* illumos */ - if (ret != 0) { - return (NULL); - } - - if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { - (void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"), - path); - return (NULL); - } - - return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM)); -} - -/* - * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from - * an ioctl(). - */ -int -zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) -{ - if (len == 0) - len = 16 * 1024; - zc->zc_nvlist_dst_size = len; - zc->zc_nvlist_dst = - (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); - if (zc->zc_nvlist_dst == 0) - return (-1); - - return (0); -} - -/* - * Called when an ioctl() which returns an nvlist fails with ENOMEM. This will - * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was - * filled in by the kernel to indicate the actual required size. - */ -int -zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc) -{ - free((void *)(uintptr_t)zc->zc_nvlist_dst); - zc->zc_nvlist_dst = - (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); - if (zc->zc_nvlist_dst == 0) - return (-1); - - return (0); -} - -/* - * Called to free the src and dst nvlists stored in the command structure. - */ -void -zcmd_free_nvlists(zfs_cmd_t *zc) -{ - free((void *)(uintptr_t)zc->zc_nvlist_conf); - free((void *)(uintptr_t)zc->zc_nvlist_src); - free((void *)(uintptr_t)zc->zc_nvlist_dst); - zc->zc_nvlist_conf = NULL; - zc->zc_nvlist_src = NULL; - zc->zc_nvlist_dst = NULL; -} - -static int -zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen, - nvlist_t *nvl) -{ - char *packed; - size_t len; - - verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0); - - if ((packed = zfs_alloc(hdl, len)) == NULL) - return (-1); - - verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); - - *outnv = (uint64_t)(uintptr_t)packed; - *outlen = len; - - return (0); -} - -int -zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) -{ - return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf, - &zc->zc_nvlist_conf_size, nvl)); -} - -int -zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) -{ - return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src, - &zc->zc_nvlist_src_size, nvl)); -} - -/* - * Unpacks an nvlist from the ZFS ioctl command structure. - */ -int -zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) -{ - if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst, - zc->zc_nvlist_dst_size, nvlp, 0) != 0) - return (no_memory(hdl)); - - return (0); -} - -int -zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc) -{ - return (ioctl(hdl->libzfs_fd, request, zc)); -} - -/* - * ================================================================ - * API shared by zfs and zpool property management - * ================================================================ - */ - -static void -zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) -{ - zprop_list_t *pl = cbp->cb_proplist; - int i; - char *title; - size_t len; - - cbp->cb_first = B_FALSE; - if (cbp->cb_scripted) - return; - - /* - * Start with the length of the column headers. - */ - cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME")); - cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN, - "PROPERTY")); - cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN, - "VALUE")); - cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN, - "RECEIVED")); - cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN, - "SOURCE")); - - /* first property is always NAME */ - assert(cbp->cb_proplist->pl_prop == - ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME)); - - /* - * Go through and calculate the widths for each column. For the - * 'source' column, we kludge it up by taking the worst-case scenario of - * inheriting from the longest name. This is acceptable because in the - * majority of cases 'SOURCE' is the last column displayed, and we don't - * use the width anyway. Note that the 'VALUE' column can be oversized, - * if the name of the property is much longer than any values we find. - */ - for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { - /* - * 'PROPERTY' column - */ - if (pl->pl_prop != ZPROP_INVAL) { - const char *propname = (type == ZFS_TYPE_POOL) ? - zpool_prop_to_name(pl->pl_prop) : - zfs_prop_to_name(pl->pl_prop); - - len = strlen(propname); - if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) - cbp->cb_colwidths[GET_COL_PROPERTY] = len; - } else { - len = strlen(pl->pl_user_prop); - if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) - cbp->cb_colwidths[GET_COL_PROPERTY] = len; - } - - /* - * 'VALUE' column. The first property is always the 'name' - * property that was tacked on either by /sbin/zfs's - * zfs_do_get() or when calling zprop_expand_list(), so we - * ignore its width. If the user specified the name property - * to display, then it will be later in the list in any case. - */ - if (pl != cbp->cb_proplist && - pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE]) - cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width; - - /* 'RECEIVED' column. */ - if (pl != cbp->cb_proplist && - pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD]) - cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width; - - /* - * 'NAME' and 'SOURCE' columns - */ - if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME : - ZFS_PROP_NAME) && - pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) { - cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width; - cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width + - strlen(dgettext(TEXT_DOMAIN, "inherited from")); - } - } - - /* - * Now go through and print the headers. - */ - for (i = 0; i < ZFS_GET_NCOLS; i++) { - switch (cbp->cb_columns[i]) { - case GET_COL_NAME: - title = dgettext(TEXT_DOMAIN, "NAME"); - break; - case GET_COL_PROPERTY: - title = dgettext(TEXT_DOMAIN, "PROPERTY"); - break; - case GET_COL_VALUE: - title = dgettext(TEXT_DOMAIN, "VALUE"); - break; - case GET_COL_RECVD: - title = dgettext(TEXT_DOMAIN, "RECEIVED"); - break; - case GET_COL_SOURCE: - title = dgettext(TEXT_DOMAIN, "SOURCE"); - break; - default: - title = NULL; - } - - if (title != NULL) { - if (i == (ZFS_GET_NCOLS - 1) || - cbp->cb_columns[i + 1] == GET_COL_NONE) - (void) printf("%s", title); - else - (void) printf("%-*s ", - cbp->cb_colwidths[cbp->cb_columns[i]], - title); - } - } - (void) printf("\n"); -} - -/* - * Display a single line of output, according to the settings in the callback - * structure. - */ -void -zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, - const char *propname, const char *value, zprop_source_t sourcetype, - const char *source, const char *recvd_value) -{ - int i; - const char *str = NULL; - char buf[128]; - - /* - * Ignore those source types that the user has chosen to ignore. - */ - if ((sourcetype & cbp->cb_sources) == 0) - return; - - if (cbp->cb_first) - zprop_print_headers(cbp, cbp->cb_type); - - for (i = 0; i < ZFS_GET_NCOLS; i++) { - switch (cbp->cb_columns[i]) { - case GET_COL_NAME: - str = name; - break; - - case GET_COL_PROPERTY: - str = propname; - break; - - case GET_COL_VALUE: - str = value; - break; - - case GET_COL_SOURCE: - switch (sourcetype) { - case ZPROP_SRC_NONE: - str = "-"; - break; - - case ZPROP_SRC_DEFAULT: - str = "default"; - break; - - case ZPROP_SRC_LOCAL: - str = "local"; - break; - - case ZPROP_SRC_TEMPORARY: - str = "temporary"; - break; - - case ZPROP_SRC_INHERITED: - (void) snprintf(buf, sizeof (buf), - "inherited from %s", source); - str = buf; - break; - case ZPROP_SRC_RECEIVED: - str = "received"; - break; - - default: - str = NULL; - assert(!"unhandled zprop_source_t"); - } - break; - - case GET_COL_RECVD: - str = (recvd_value == NULL ? "-" : recvd_value); - break; - - default: - continue; - } - - if (cbp->cb_columns[i + 1] == GET_COL_NONE) - (void) printf("%s", str); - else if (cbp->cb_scripted) - (void) printf("%s\t", str); - else - (void) printf("%-*s ", - cbp->cb_colwidths[cbp->cb_columns[i]], - str); - } - - (void) printf("\n"); -} - -/* - * Given a numeric suffix, convert the value into a number of bits that the - * resulting value must be shifted. - */ -static int -str2shift(libzfs_handle_t *hdl, const char *buf) -{ - const char *ends = "BKMGTPEZ"; - int i; - - if (buf[0] == '\0') - return (0); - for (i = 0; i < strlen(ends); i++) { - if (toupper(buf[0]) == ends[i]) - break; - } - if (i == strlen(ends)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid numeric suffix '%s'"), buf); - return (-1); - } - - /* - * We want to allow trailing 'b' characters for 'GB' or 'Mb'. But don't - * allow 'BB' - that's just weird. - */ - if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' && - toupper(buf[0]) != 'B')) - return (10*i); - - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid numeric suffix '%s'"), buf); - return (-1); -} - -/* - * Convert a string of the form '100G' into a real number. Used when setting - * properties or creating a volume. 'buf' is used to place an extended error - * message for the caller to use. - */ -int -zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num) -{ - char *end; - int shift; - - *num = 0; - - /* Check to see if this looks like a number. */ - if ((value[0] < '0' || value[0] > '9') && value[0] != '.') { - if (hdl) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "bad numeric value '%s'"), value); - return (-1); - } - - /* Rely on strtoull() to process the numeric portion. */ - errno = 0; - *num = strtoull(value, &end, 10); - - /* - * Check for ERANGE, which indicates that the value is too large to fit - * in a 64-bit value. - */ - if (errno == ERANGE) { - if (hdl) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "numeric value is too large")); - return (-1); - } - - /* - * If we have a decimal value, then do the computation with floating - * point arithmetic. Otherwise, use standard arithmetic. - */ - if (*end == '.') { - double fval = strtod(value, &end); - - if ((shift = str2shift(hdl, end)) == -1) - return (-1); - - fval *= pow(2, shift); - - if (fval > UINT64_MAX) { - if (hdl) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "numeric value is too large")); - return (-1); - } - - *num = (uint64_t)fval; - } else { - if ((shift = str2shift(hdl, end)) == -1) - return (-1); - - /* Check for overflow */ - if (shift >= 64 || (*num << shift) >> shift != *num) { - if (hdl) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "numeric value is too large")); - return (-1); - } - - *num <<= shift; - } - - return (0); -} - -/* - * Given a propname=value nvpair to set, parse any numeric properties - * (index, boolean, etc) if they are specified as strings and add the - * resulting nvpair to the returned nvlist. - * - * At the DSL layer, all properties are either 64-bit numbers or strings. - * We want the user to be able to ignore this fact and specify properties - * as native values (numbers, for example) or as strings (to simplify - * command line utilities). This also handles converting index types - * (compression, checksum, etc) from strings to their on-disk index. - */ -int -zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, - zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp, - const char *errbuf) -{ - data_type_t datatype = nvpair_type(elem); - zprop_type_t proptype; - const char *propname; - char *value; - boolean_t isnone = B_FALSE; - boolean_t isauto = B_FALSE; - - if (type == ZFS_TYPE_POOL) { - proptype = zpool_prop_get_type(prop); - propname = zpool_prop_to_name(prop); - } else { - proptype = zfs_prop_get_type(prop); - propname = zfs_prop_to_name(prop); - } - - /* - * Convert any properties to the internal DSL value types. - */ - *svalp = NULL; - *ivalp = 0; - - switch (proptype) { - case PROP_TYPE_STRING: - if (datatype != DATA_TYPE_STRING) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a string"), nvpair_name(elem)); - goto error; - } - (void) nvpair_value_string(elem, svalp); - if (strlen(*svalp) >= ZFS_MAXPROPLEN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' is too long"), nvpair_name(elem)); - goto error; - } - break; - - case PROP_TYPE_NUMBER: - if (datatype == DATA_TYPE_STRING) { - (void) nvpair_value_string(elem, &value); - if (strcmp(value, "none") == 0) { - isnone = B_TRUE; - } else if (strcmp(value, "auto") == 0) { - isauto = B_TRUE; - } else if (zfs_nicestrtonum(hdl, value, ivalp) != 0) { - goto error; - } - } else if (datatype == DATA_TYPE_UINT64) { - (void) nvpair_value_uint64(elem, ivalp); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a number"), nvpair_name(elem)); - goto error; - } - - /* - * Quota special: force 'none' and don't allow 0. - */ - if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone && - (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "use 'none' to disable quota/refquota")); - goto error; - } - - /* - * Special handling for "*_limit=none". In this case it's not - * 0 but UINT64_MAX. - */ - if ((type & ZFS_TYPE_DATASET) && isnone && - (prop == ZFS_PROP_FILESYSTEM_LIMIT || - prop == ZFS_PROP_SNAPSHOT_LIMIT)) { - *ivalp = UINT64_MAX; - } - - /* - * Special handling for setting 'refreservation' to 'auto'. Use - * UINT64_MAX to tell the caller to use zfs_fix_auto_resv(). - * 'auto' is only allowed on volumes. - */ - if (isauto) { - switch (prop) { - case ZFS_PROP_REFRESERVATION: - if ((type & ZFS_TYPE_VOLUME) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s=auto' only allowed on " - "volumes"), nvpair_name(elem)); - goto error; - } - *ivalp = UINT64_MAX; - break; - default: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'auto' is invalid value for '%s'"), - nvpair_name(elem)); - goto error; - } - } - - break; - - case PROP_TYPE_INDEX: - if (datatype != DATA_TYPE_STRING) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be a string"), nvpair_name(elem)); - goto error; - } - - (void) nvpair_value_string(elem, &value); - - if (zprop_string_to_index(prop, value, ivalp, type) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be one of '%s'"), propname, - zprop_values(prop, type)); - goto error; - } - break; - - default: - abort(); - } - - /* - * Add the result to our return set of properties. - */ - if (*svalp != NULL) { - if (nvlist_add_string(ret, propname, *svalp) != 0) { - (void) no_memory(hdl); - return (-1); - } - } else { - if (nvlist_add_uint64(ret, propname, *ivalp) != 0) { - (void) no_memory(hdl); - return (-1); - } - } - - return (0); -error: - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - return (-1); -} - -static int -addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp, - zfs_type_t type) -{ - int prop; - zprop_list_t *entry; - - prop = zprop_name_to_prop(propname, type); - - if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type)) - prop = ZPROP_INVAL; - - /* - * When no property table entry can be found, return failure if - * this is a pool property or if this isn't a user-defined - * dataset property, - */ - if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL && - !zpool_prop_feature(propname) && - !zpool_prop_unsupported(propname)) || - (type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) && - !zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid property '%s'"), propname); - return (zfs_error(hdl, EZFS_BADPROP, - dgettext(TEXT_DOMAIN, "bad property list"))); - } - - if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) - return (-1); - - entry->pl_prop = prop; - if (prop == ZPROP_INVAL) { - if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == - NULL) { - free(entry); - return (-1); - } - entry->pl_width = strlen(propname); - } else { - entry->pl_width = zprop_width(prop, &entry->pl_fixed, - type); - } - - *listp = entry; - - return (0); -} - -/* - * Given a comma-separated list of properties, construct a property list - * containing both user-defined and native properties. This function will - * return a NULL list if 'all' is specified, which can later be expanded - * by zprop_expand_list(). - */ -int -zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp, - zfs_type_t type) -{ - *listp = NULL; - - /* - * If 'all' is specified, return a NULL list. - */ - if (strcmp(props, "all") == 0) - return (0); - - /* - * If no props were specified, return an error. - */ - if (props[0] == '\0') { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "no properties specified")); - return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, - "bad property list"))); - } - - /* - * It would be nice to use getsubopt() here, but the inclusion of column - * aliases makes this more effort than it's worth. - */ - while (*props != '\0') { - size_t len; - char *p; - char c; - - if ((p = strchr(props, ',')) == NULL) { - len = strlen(props); - p = props + len; - } else { - len = p - props; - } - - /* - * Check for empty options. - */ - if (len == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "empty property name")); - return (zfs_error(hdl, EZFS_BADPROP, - dgettext(TEXT_DOMAIN, "bad property list"))); - } - - /* - * Check all regular property names. - */ - c = props[len]; - props[len] = '\0'; - - if (strcmp(props, "space") == 0) { - static char *spaceprops[] = { - "name", "avail", "used", "usedbysnapshots", - "usedbydataset", "usedbyrefreservation", - "usedbychildren", NULL - }; - int i; - - for (i = 0; spaceprops[i]; i++) { - if (addlist(hdl, spaceprops[i], listp, type)) - return (-1); - listp = &(*listp)->pl_next; - } - } else { - if (addlist(hdl, props, listp, type)) - return (-1); - listp = &(*listp)->pl_next; - } - - props = p; - if (c == ',') - props++; - } - - return (0); -} - -void -zprop_free_list(zprop_list_t *pl) -{ - zprop_list_t *next; - - while (pl != NULL) { - next = pl->pl_next; - free(pl->pl_user_prop); - free(pl); - pl = next; - } -} - -typedef struct expand_data { - zprop_list_t **last; - libzfs_handle_t *hdl; - zfs_type_t type; -} expand_data_t; - -int -zprop_expand_list_cb(int prop, void *cb) -{ - zprop_list_t *entry; - expand_data_t *edp = cb; - - if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL) - return (ZPROP_INVAL); - - entry->pl_prop = prop; - entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type); - entry->pl_all = B_TRUE; - - *(edp->last) = entry; - edp->last = &entry->pl_next; - - return (ZPROP_CONT); -} - -int -zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type) -{ - zprop_list_t *entry; - zprop_list_t **last; - expand_data_t exp; - - if (*plp == NULL) { - /* - * If this is the very first time we've been called for an 'all' - * specification, expand the list to include all native - * properties. - */ - last = plp; - - exp.last = last; - exp.hdl = hdl; - exp.type = type; - - if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE, - B_FALSE, type) == ZPROP_INVAL) - return (-1); - - /* - * Add 'name' to the beginning of the list, which is handled - * specially. - */ - if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) - return (-1); - - entry->pl_prop = (type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : - ZFS_PROP_NAME; - entry->pl_width = zprop_width(entry->pl_prop, - &entry->pl_fixed, type); - entry->pl_all = B_TRUE; - entry->pl_next = *plp; - *plp = entry; - } - return (0); -} - -int -zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, - zfs_type_t type) -{ - return (zprop_iter_common(func, cb, show_all, ordered, type)); -} - -ulong_t -get_system_hostid(void) -{ - char *env; - - /* - * Allow the hostid to be subverted for testing. - */ - env = getenv("ZFS_HOSTID"); - if (env) { - ulong_t hostid = strtoull(env, NULL, 16); - return (hostid & 0xFFFFFFFF); - } - - return (gethostid()); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c deleted file mode 100644 index 2a6b5cc5927c..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c +++ /dev/null @@ -1,1234 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012, 2020 by Delphix. All rights reserved. - * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright 2017 RackTop Systems. - * Copyright (c) 2017 Datto Inc. - */ - -/* - * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. - * It has the following characteristics: - * - * - Thread Safe. libzfs_core is accessible concurrently from multiple - * threads. This is accomplished primarily by avoiding global data - * (e.g. caching). Since it's thread-safe, there is no reason for a - * process to have multiple libzfs "instances". Therefore, we store - * our few pieces of data (e.g. the file descriptor) in global - * variables. The fd is reference-counted so that the libzfs_core - * library can be "initialized" multiple times (e.g. by different - * consumers within the same process). - * - * - Committed Interface. The libzfs_core interface will be committed, - * therefore consumers can compile against it and be confident that - * their code will continue to work on future releases of this code. - * Currently, the interface is Evolving (not Committed), but we intend - * to commit to it once it is more complete and we determine that it - * meets the needs of all consumers. - * - * - Programatic Error Handling. libzfs_core communicates errors with - * defined error numbers, and doesn't print anything to stdout/stderr. - * - * - Thin Layer. libzfs_core is a thin layer, marshaling arguments - * to/from the kernel ioctls. There is generally a 1:1 correspondence - * between libzfs_core functions and ioctls to /dev/zfs. - * - * - Clear Atomicity. Because libzfs_core functions are generally 1:1 - * with kernel ioctls, and kernel ioctls are general atomic, each - * libzfs_core function is atomic. For example, creating multiple - * snapshots with a single call to lzc_snapshot() is atomic -- it - * can't fail with only some of the requested snapshots created, even - * in the event of power loss or system crash. - * - * - Continued libzfs Support. Some higher-level operations (e.g. - * support for "zfs send -R") are too complicated to fit the scope of - * libzfs_core. This functionality will continue to live in libzfs. - * Where appropriate, libzfs will use the underlying atomic operations - * of libzfs_core. For example, libzfs may implement "zfs send -R | - * zfs receive" by using individual "send one snapshot", rename, - * destroy, and "receive one snapshot" operations in libzfs_core. - * /sbin/zfs and /zbin/zpool will link with both libzfs and - * libzfs_core. Other consumers should aim to use only libzfs_core, - * since that will be the supported, stable interface going forwards. - */ - -#define _IN_LIBZFS_CORE_ - -#include <libzfs_core.h> -#include <ctype.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#ifdef ZFS_DEBUG -#include <stdio.h> -#endif -#include <errno.h> -#include <fcntl.h> -#include <pthread.h> -#include <sys/nvpair.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/zfs_ioctl.h> -#include "libzfs_core_compat.h" -#include "libzfs_compat.h" - -#ifdef __FreeBSD__ -extern int zfs_ioctl_version; -#endif - -static int g_fd = -1; -static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; -static int g_refcount; - -#ifdef ZFS_DEBUG -static zfs_ioc_t fail_ioc_cmd; -static zfs_errno_t fail_ioc_err; - -static void -libzfs_core_debug_ioc(void) -{ - /* - * To test running newer user space binaries with kernel's - * that don't yet support an ioctl or a new ioctl arg we - * provide an override to intentionally fail an ioctl. - * - * USAGE: - * The override variable, ZFS_IOC_TEST, is of the form "cmd:err" - * - * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a - * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029" - * - * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank" - * cannot checkpoint 'tank': the loaded zfs module does not support - * this operation. A reboot may be required to enable this operation. - */ - if (fail_ioc_cmd == 0) { - char *ioc_test = getenv("ZFS_IOC_TEST"); - unsigned int ioc_num = 0, ioc_err = 0; - - if (ioc_test != NULL && - sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 && - ioc_num < ZFS_IOC_LAST) { - fail_ioc_cmd = ioc_num; - fail_ioc_err = ioc_err; - } - } -} -#endif - -int -libzfs_core_init(void) -{ - (void) pthread_mutex_lock(&g_lock); - if (g_refcount == 0) { - g_fd = open("/dev/zfs", O_RDWR); - if (g_fd < 0) { - (void) pthread_mutex_unlock(&g_lock); - return (errno); - } - } - g_refcount++; - -#ifdef ZFS_DEBUG - libzfs_core_debug_ioc(); -#endif - (void) pthread_mutex_unlock(&g_lock); - - return (0); -} - -void -libzfs_core_fini(void) -{ - (void) pthread_mutex_lock(&g_lock); - ASSERT3S(g_refcount, >, 0); - - if (g_refcount > 0) - g_refcount--; - - if (g_refcount == 0 && g_fd != -1) { - (void) close(g_fd); - g_fd = -1; - } - (void) pthread_mutex_unlock(&g_lock); -} - -static int -lzc_ioctl(zfs_ioc_t ioc, const char *name, - nvlist_t *source, nvlist_t **resultp) -{ - zfs_cmd_t zc = { 0 }; - int error = 0; - char *packed = NULL; -#ifdef __FreeBSD__ - nvlist_t *oldsource; -#endif - size_t size = 0; - - ASSERT3S(g_refcount, >, 0); - VERIFY3S(g_fd, !=, -1); - -#ifdef ZFS_DEBUG - if (ioc == fail_ioc_cmd) - return (fail_ioc_err); -#endif - - if (name != NULL) - (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); - -#ifdef __FreeBSD__ - if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) - zfs_ioctl_version = get_zfs_ioctl_version(); - - if (zfs_ioctl_version < ZFS_IOCVER_LZC) { - oldsource = source; - error = lzc_compat_pre(&zc, &ioc, &source); - if (error) - return (error); - } -#endif - - if (source != NULL) { - packed = fnvlist_pack(source, &size); - zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; - zc.zc_nvlist_src_size = size; - } - - if (resultp != NULL) { - *resultp = NULL; - if (ioc == ZFS_IOC_CHANNEL_PROGRAM) { - zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source, - ZCP_ARG_MEMLIMIT); - } else { - zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); - } - zc.zc_nvlist_dst = (uint64_t)(uintptr_t) - malloc(zc.zc_nvlist_dst_size); -#ifdef illumos - if (zc.zc_nvlist_dst == NULL) { -#else - if (zc.zc_nvlist_dst == 0) { -#endif - error = ENOMEM; - goto out; - } - } - - while (ioctl(g_fd, ioc, &zc) != 0) { - /* - * If ioctl exited with ENOMEM, we retry the ioctl after - * increasing the size of the destination nvlist. - * - * Channel programs that exit with ENOMEM ran over the - * lua memory sandbox; they should not be retried. - */ - if (errno == ENOMEM && resultp != NULL && - ioc != ZFS_IOC_CHANNEL_PROGRAM) { - free((void *)(uintptr_t)zc.zc_nvlist_dst); - zc.zc_nvlist_dst_size *= 2; - zc.zc_nvlist_dst = (uint64_t)(uintptr_t) - malloc(zc.zc_nvlist_dst_size); -#ifdef illumos - if (zc.zc_nvlist_dst == NULL) { -#else - if (zc.zc_nvlist_dst == 0) { -#endif - error = ENOMEM; - goto out; - } - } else { - error = errno; - break; - } - } - -#ifdef __FreeBSD__ - if (zfs_ioctl_version < ZFS_IOCVER_LZC) - lzc_compat_post(&zc, ioc); -#endif - if (zc.zc_nvlist_dst_filled) { - *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, - zc.zc_nvlist_dst_size); - } -#ifdef __FreeBSD__ - if (zfs_ioctl_version < ZFS_IOCVER_LZC) - lzc_compat_outnvl(&zc, ioc, resultp); -#endif -out: -#ifdef __FreeBSD__ - if (zfs_ioctl_version < ZFS_IOCVER_LZC) { - if (source != oldsource) - nvlist_free(source); - source = oldsource; - } -#endif - fnvlist_pack_free(packed, size); - free((void *)(uintptr_t)zc.zc_nvlist_dst); - return (error); -} - -int -lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) -{ - int error; - nvlist_t *args = fnvlist_alloc(); - fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); - if (props != NULL) - fnvlist_add_nvlist(args, "props", props); - error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); - nvlist_free(args); - return (error); -} - -int -lzc_clone(const char *fsname, const char *origin, - nvlist_t *props) -{ - int error; - nvlist_t *args = fnvlist_alloc(); - fnvlist_add_string(args, "origin", origin); - if (props != NULL) - fnvlist_add_nvlist(args, "props", props); - error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); - nvlist_free(args); - return (error); -} - -int -lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen) -{ - /* - * The promote ioctl is still legacy, so we need to construct our - * own zfs_cmd_t rather than using lzc_ioctl(). - */ - zfs_cmd_t zc = { 0 }; - - ASSERT3S(g_refcount, >, 0); - VERIFY3S(g_fd, !=, -1); - - (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); - if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) { - int error = errno; - if (error == EEXIST && snapnamebuf != NULL) - (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen); - return (error); - } - return (0); -} - -int -lzc_remap(const char *fsname) -{ - int error; - nvlist_t *args = fnvlist_alloc(); - error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL); - nvlist_free(args); - return (error); -} - -int -lzc_rename(const char *source, const char *target) -{ - zfs_cmd_t zc = { 0 }; - int error; - - ASSERT3S(g_refcount, >, 0); - VERIFY3S(g_fd, !=, -1); - - (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); - error = ioctl(g_fd, ZFS_IOC_RENAME, &zc); - if (error != 0) - error = errno; - return (error); -} - -int -lzc_destroy(const char *fsname) -{ - int error; - - nvlist_t *args = fnvlist_alloc(); - error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL); - nvlist_free(args); - return (error); -} - -/* - * Creates snapshots. - * - * The keys in the snaps nvlist are the snapshots to be created. - * They must all be in the same pool. - * - * The props nvlist is properties to set. Currently only user properties - * are supported. { user:prop_name -> string value } - * - * The returned results nvlist will have an entry for each snapshot that failed. - * The value will be the (int32) error code. - * - * The return value will be 0 if all snapshots were created, otherwise it will - * be the errno of a (unspecified) snapshot that failed. - */ -int -lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) -{ - nvpair_t *elem; - nvlist_t *args; - int error; - char pool[ZFS_MAX_DATASET_NAME_LEN]; - - *errlist = NULL; - - /* determine the pool name */ - elem = nvlist_next_nvpair(snaps, NULL); - if (elem == NULL) - return (0); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/@")] = '\0'; - - args = fnvlist_alloc(); - fnvlist_add_nvlist(args, "snaps", snaps); - if (props != NULL) - fnvlist_add_nvlist(args, "props", props); - - error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); - nvlist_free(args); - - return (error); -} - -/* - * Destroys snapshots. - * - * The keys in the snaps nvlist are the snapshots to be destroyed. - * They must all be in the same pool. - * - * Snapshots that do not exist will be silently ignored. - * - * If 'defer' is not set, and a snapshot has user holds or clones, the - * destroy operation will fail and none of the snapshots will be - * destroyed. - * - * If 'defer' is set, and a snapshot has user holds or clones, it will be - * marked for deferred destruction, and will be destroyed when the last hold - * or clone is removed/destroyed. - * - * The return value will be 0 if all snapshots were destroyed (or marked for - * later destruction if 'defer' is set) or didn't exist to begin with. - * - * Otherwise the return value will be the errno of a (unspecified) snapshot - * that failed, no snapshots will be destroyed, and the errlist will have an - * entry for each snapshot that failed. The value in the errlist will be - * the (int32) error code. - */ -int -lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) -{ - nvpair_t *elem; - nvlist_t *args; - int error; - char pool[ZFS_MAX_DATASET_NAME_LEN]; - - /* determine the pool name */ - elem = nvlist_next_nvpair(snaps, NULL); - if (elem == NULL) - return (0); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/@")] = '\0'; - - args = fnvlist_alloc(); - fnvlist_add_nvlist(args, "snaps", snaps); - if (defer) - fnvlist_add_boolean(args, "defer"); - - error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); - nvlist_free(args); - - return (error); -} - -int -lzc_snaprange_space(const char *firstsnap, const char *lastsnap, - uint64_t *usedp) -{ - nvlist_t *args; - nvlist_t *result; - int err; - char fs[ZFS_MAX_DATASET_NAME_LEN]; - char *atp; - - /* determine the fs name */ - (void) strlcpy(fs, firstsnap, sizeof (fs)); - atp = strchr(fs, '@'); - if (atp == NULL) - return (EINVAL); - *atp = '\0'; - - args = fnvlist_alloc(); - fnvlist_add_string(args, "firstsnap", firstsnap); - - err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); - nvlist_free(args); - if (err == 0) - *usedp = fnvlist_lookup_uint64(result, "used"); - fnvlist_free(result); - - return (err); -} - -boolean_t -lzc_exists(const char *dataset) -{ - /* - * The objset_stats ioctl is still legacy, so we need to construct our - * own zfs_cmd_t rather than using lzc_ioctl(). - */ - zfs_cmd_t zc = { 0 }; - - ASSERT3S(g_refcount, >, 0); - VERIFY3S(g_fd, !=, -1); - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); -} - -/* - * outnvl is unused. - * It was added to preserve the function signature in case it is - * needed in the future. - */ -/*ARGSUSED*/ -int -lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) -{ - return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL)); -} - -/* - * Create "user holds" on snapshots. If there is a hold on a snapshot, - * the snapshot can not be destroyed. (However, it can be marked for deletion - * by lzc_destroy_snaps(defer=B_TRUE).) - * - * The keys in the nvlist are snapshot names. - * The snapshots must all be in the same pool. - * The value is the name of the hold (string type). - * - * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). - * In this case, when the cleanup_fd is closed (including on process - * termination), the holds will be released. If the system is shut down - * uncleanly, the holds will be released when the pool is next opened - * or imported. - * - * Holds for snapshots which don't exist will be skipped and have an entry - * added to errlist, but will not cause an overall failure. - * - * The return value will be 0 if all holds, for snapshots that existed, - * were succesfully created. - * - * Otherwise the return value will be the errno of a (unspecified) hold that - * failed and no holds will be created. - * - * In all cases the errlist will have an entry for each hold that failed - * (name = snapshot), with its value being the error code (int32). - */ -int -lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) -{ - char pool[ZFS_MAX_DATASET_NAME_LEN]; - nvlist_t *args; - nvpair_t *elem; - int error; - - /* determine the pool name */ - elem = nvlist_next_nvpair(holds, NULL); - if (elem == NULL) - return (0); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/@")] = '\0'; - - args = fnvlist_alloc(); - fnvlist_add_nvlist(args, "holds", holds); - if (cleanup_fd != -1) - fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); - - error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); - nvlist_free(args); - return (error); -} - -/* - * Release "user holds" on snapshots. If the snapshot has been marked for - * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have - * any clones, and all the user holds are removed, then the snapshot will be - * destroyed. - * - * The keys in the nvlist are snapshot names. - * The snapshots must all be in the same pool. - * The value is a nvlist whose keys are the holds to remove. - * - * Holds which failed to release because they didn't exist will have an entry - * added to errlist, but will not cause an overall failure. - * - * The return value will be 0 if the nvl holds was empty or all holds that - * existed, were successfully removed. - * - * Otherwise the return value will be the errno of a (unspecified) hold that - * failed to release and no holds will be released. - * - * In all cases the errlist will have an entry for each hold that failed to - * to release. - */ -int -lzc_release(nvlist_t *holds, nvlist_t **errlist) -{ - char pool[ZFS_MAX_DATASET_NAME_LEN]; - nvpair_t *elem; - - /* determine the pool name */ - elem = nvlist_next_nvpair(holds, NULL); - if (elem == NULL) - return (0); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/@")] = '\0'; - - return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); -} - -/* - * Retrieve list of user holds on the specified snapshot. - * - * On success, *holdsp will be set to a nvlist which the caller must free. - * The keys are the names of the holds, and the value is the creation time - * of the hold (uint64) in seconds since the epoch. - */ -int -lzc_get_holds(const char *snapname, nvlist_t **holdsp) -{ - return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp)); -} - -/* - * Generate a zfs send stream for the specified snapshot and write it to - * the specified file descriptor. - * - * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") - * - * If "from" is NULL, a full (non-incremental) stream will be sent. - * If "from" is non-NULL, it must be the full name of a snapshot or - * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or - * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or - * bookmark must represent an earlier point in the history of "snapname"). - * It can be an earlier snapshot in the same filesystem or zvol as "snapname", - * or it can be the origin of "snapname"'s filesystem, or an earlier - * snapshot in the origin, etc. - * - * "fd" is the file descriptor to write the send stream to. - * - * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted - * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT - * records with drr_blksz > 128K. - * - * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted - * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, - * which the receiving system must support (as indicated by support - * for the "embedded_data" feature). - */ -int -lzc_send(const char *snapname, const char *from, int fd, - enum lzc_send_flags flags) -{ - return (lzc_send_resume(snapname, from, fd, flags, 0, 0)); -} - -int -lzc_send_resume(const char *snapname, const char *from, int fd, - enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) -{ - nvlist_t *args; - int err; - - args = fnvlist_alloc(); - fnvlist_add_int32(args, "fd", fd); - if (from != NULL) - fnvlist_add_string(args, "fromsnap", from); - if (flags & LZC_SEND_FLAG_LARGE_BLOCK) - fnvlist_add_boolean(args, "largeblockok"); - if (flags & LZC_SEND_FLAG_EMBED_DATA) - fnvlist_add_boolean(args, "embedok"); - if (flags & LZC_SEND_FLAG_COMPRESS) - fnvlist_add_boolean(args, "compressok"); - if (resumeobj != 0 || resumeoff != 0) { - fnvlist_add_uint64(args, "resume_object", resumeobj); - fnvlist_add_uint64(args, "resume_offset", resumeoff); - } - err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); - nvlist_free(args); - return (err); -} - -/* - * "from" can be NULL, a snapshot, or a bookmark. - * - * If from is NULL, a full (non-incremental) stream will be estimated. This - * is calculated very efficiently. - * - * If from is a snapshot, lzc_send_space uses the deadlists attached to - * each snapshot to efficiently estimate the stream size. - * - * If from is a bookmark, the indirect blocks in the destination snapshot - * are traversed, looking for blocks with a birth time since the creation TXG of - * the snapshot this bookmark was created from. This will result in - * significantly more I/O and be less efficient than a send space estimation on - * an equivalent snapshot. - */ -int -lzc_send_space(const char *snapname, const char *from, - enum lzc_send_flags flags, uint64_t *spacep) -{ - nvlist_t *args; - nvlist_t *result; - int err; - - args = fnvlist_alloc(); - if (from != NULL) - fnvlist_add_string(args, "from", from); - if (flags & LZC_SEND_FLAG_LARGE_BLOCK) - fnvlist_add_boolean(args, "largeblockok"); - if (flags & LZC_SEND_FLAG_EMBED_DATA) - fnvlist_add_boolean(args, "embedok"); - if (flags & LZC_SEND_FLAG_COMPRESS) - fnvlist_add_boolean(args, "compressok"); - err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); - nvlist_free(args); - if (err == 0) - *spacep = fnvlist_lookup_uint64(result, "space"); - nvlist_free(result); - return (err); -} - -static int -recv_read(int fd, void *buf, int ilen) -{ - char *cp = buf; - int rv; - int len = ilen; - - do { - rv = read(fd, cp, len); - cp += rv; - len -= rv; - } while (rv > 0); - - if (rv < 0 || len != 0) - return (EIO); - - return (0); -} - -static int -recv_impl(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, boolean_t resumable, int fd, - const dmu_replay_record_t *begin_record) -{ - /* - * The receive ioctl is still legacy, so we need to construct our own - * zfs_cmd_t rather than using zfsc_ioctl(). - */ - zfs_cmd_t zc = { 0 }; - char *atp; - char *packed = NULL; - size_t size; - int error; - - ASSERT3S(g_refcount, >, 0); - VERIFY3S(g_fd, !=, -1); - - /* zc_name is name of containing filesystem */ - (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); - atp = strchr(zc.zc_name, '@'); - if (atp == NULL) - return (EINVAL); - *atp = '\0'; - - /* if the fs does not exist, try its parent. */ - if (!lzc_exists(zc.zc_name)) { - char *slashp = strrchr(zc.zc_name, '/'); - if (slashp == NULL) - return (ENOENT); - *slashp = '\0'; - - } - - /* zc_value is full name of the snapshot to create */ - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - - if (props != NULL) { - /* zc_nvlist_src is props to set */ - packed = fnvlist_pack(props, &size); - zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; - zc.zc_nvlist_src_size = size; - } - - /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ - if (origin != NULL) - (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); - - /* zc_begin_record is non-byteswapped BEGIN record */ - if (begin_record == NULL) { - error = recv_read(fd, &zc.zc_begin_record, - sizeof (zc.zc_begin_record)); - if (error != 0) - goto out; - } else { - zc.zc_begin_record = *begin_record; - } - - /* zc_cookie is fd to read from */ - zc.zc_cookie = fd; - - /* zc guid is force flag */ - zc.zc_guid = force; - - zc.zc_resumable = resumable; - - /* zc_cleanup_fd is unused */ - zc.zc_cleanup_fd = -1; - - error = ioctl(g_fd, ZFS_IOC_RECV, &zc); - if (error != 0) - error = errno; - -out: - if (packed != NULL) - fnvlist_pack_free(packed, size); - free((void*)(uintptr_t)zc.zc_nvlist_dst); - return (error); -} - -/* - * The simplest receive case: receive from the specified fd, creating the - * specified snapshot. Apply the specified properties as "received" properties - * (which can be overridden by locally-set properties). If the stream is a - * clone, its origin snapshot must be specified by 'origin'. The 'force' - * flag will cause the target filesystem to be rolled back or destroyed if - * necessary to receive. - * - * Return 0 on success or an errno on failure. - * - * Note: this interface does not work on dedup'd streams - * (those with DMU_BACKUP_FEATURE_DEDUP). - */ -int -lzc_receive(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) -{ - return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL)); -} - -/* - * Like lzc_receive, but if the receive fails due to premature stream - * termination, the intermediate state will be preserved on disk. In this - * case, ECKSUM will be returned. The receive may subsequently be resumed - * with a resuming send stream generated by lzc_send_resume(). - */ -int -lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) -{ - return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL)); -} - -/* - * Like lzc_receive, but allows the caller to read the begin record and then to - * pass it in. That could be useful if the caller wants to derive, for example, - * the snapname or the origin parameters based on the information contained in - * the begin record. - * The begin record must be in its original form as read from the stream, - * in other words, it should not be byteswapped. - * - * The 'resumable' parameter allows to obtain the same behavior as with - * lzc_receive_resumable. - */ -int -lzc_receive_with_header(const char *snapname, nvlist_t *props, - const char *origin, boolean_t force, boolean_t resumable, int fd, - const dmu_replay_record_t *begin_record) -{ - if (begin_record == NULL) - return (EINVAL); - return (recv_impl(snapname, props, origin, force, resumable, fd, - begin_record)); -} - -/* - * Roll back this filesystem or volume to its most recent snapshot. - * If snapnamebuf is not NULL, it will be filled in with the name - * of the most recent snapshot. - * Note that the latest snapshot may change if a new one is concurrently - * created or the current one is destroyed. lzc_rollback_to can be used - * to roll back to a specific latest snapshot. - * - * Return 0 on success or an errno on failure. - */ -int -lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) -{ - nvlist_t *args; - nvlist_t *result; - int err; - - args = fnvlist_alloc(); - err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); - nvlist_free(args); - if (err == 0 && snapnamebuf != NULL) { - const char *snapname = fnvlist_lookup_string(result, "target"); - (void) strlcpy(snapnamebuf, snapname, snapnamelen); - } - nvlist_free(result); - - return (err); -} - -/* - * Roll back this filesystem or volume to the specified snapshot, - * if possible. - * - * Return 0 on success or an errno on failure. - */ -int -lzc_rollback_to(const char *fsname, const char *snapname) -{ - nvlist_t *args; - nvlist_t *result; - int err; - - args = fnvlist_alloc(); - fnvlist_add_string(args, "target", snapname); - err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); - nvlist_free(args); - nvlist_free(result); - return (err); -} - -/* - * Creates bookmarks. - * - * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to - * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and - * snapshots must be in the same pool. - * - * The returned results nvlist will have an entry for each bookmark that failed. - * The value will be the (int32) error code. - * - * The return value will be 0 if all bookmarks were created, otherwise it will - * be the errno of a (undetermined) bookmarks that failed. - */ -int -lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) -{ - nvpair_t *elem; - int error; - char pool[ZFS_MAX_DATASET_NAME_LEN]; - - /* determine the pool name */ - elem = nvlist_next_nvpair(bookmarks, NULL); - if (elem == NULL) - return (0); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/#")] = '\0'; - - error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); - - return (error); -} - -/* - * Retrieve bookmarks. - * - * Retrieve the list of bookmarks for the given file system. The props - * parameter is an nvlist of property names (with no values) that will be - * returned for each bookmark. - * - * The following are valid properties on bookmarks, all of which are numbers - * (represented as uint64 in the nvlist) - * - * "guid" - globally unique identifier of the snapshot it refers to - * "createtxg" - txg when the snapshot it refers to was created - * "creation" - timestamp when the snapshot it refers to was created - * - * The format of the returned nvlist as follows: - * <short name of bookmark> -> { - * <name of property> -> { - * "value" -> uint64 - * } - * } - */ -int -lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) -{ - return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); -} - -/* - * Destroys bookmarks. - * - * The keys in the bmarks nvlist are the bookmarks to be destroyed. - * They must all be in the same pool. Bookmarks are specified as - * <fs>#<bmark>. - * - * Bookmarks that do not exist will be silently ignored. - * - * The return value will be 0 if all bookmarks that existed were destroyed. - * - * Otherwise the return value will be the errno of a (undetermined) bookmark - * that failed, no bookmarks will be destroyed, and the errlist will have an - * entry for each bookmarks that failed. The value in the errlist will be - * the (int32) error code. - */ -int -lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) -{ - nvpair_t *elem; - int error; - char pool[ZFS_MAX_DATASET_NAME_LEN]; - - /* determine the pool name */ - elem = nvlist_next_nvpair(bmarks, NULL); - if (elem == NULL) - return (0); - (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); - pool[strcspn(pool, "/#")] = '\0'; - - error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); - - return (error); -} - -static int -lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync, - uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) -{ - int error; - nvlist_t *args; - - args = fnvlist_alloc(); - fnvlist_add_string(args, ZCP_ARG_PROGRAM, program); - fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl); - fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync); - fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit); - fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit); - error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl); - fnvlist_free(args); - - return (error); -} - -/* - * Executes a channel program. - * - * If this function returns 0 the channel program was successfully loaded and - * ran without failing. Note that individual commands the channel program ran - * may have failed and the channel program is responsible for reporting such - * errors through outnvl if they are important. - * - * This method may also return: - * - * EINVAL The program contains syntax errors, or an invalid memory or time - * limit was given. No part of the channel program was executed. - * If caused by syntax errors, 'outnvl' contains information about the - * errors. - * - * EDOM The program was executed, but encountered a runtime error, such as - * calling a function with incorrect arguments, invoking the error() - * function directly, failing an assert() command, etc. Some portion - * of the channel program may have executed and committed changes. - * Information about the failure can be found in 'outnvl'. - * - * ENOMEM The program fully executed, but the output buffer was not large - * enough to store the returned value. No output is returned through - * 'outnvl'. - * - * ENOSPC The program was terminated because it exceeded its memory usage - * limit. Some portion of the channel program may have executed and - * committed changes to disk. No output is returned through 'outnvl'. - * - * ETIMEDOUT The program was terminated because it exceeded its Lua instruction - * limit. Some portion of the channel program may have executed and - * committed changes to disk. No output is returned through 'outnvl'. - */ -int -lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, - uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) -{ - return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit, - memlimit, argnvl, outnvl)); -} - -/* - * Creates a checkpoint for the specified pool. - * - * If this function returns 0 the pool was successfully checkpointed. - * - * This method may also return: - * - * ZFS_ERR_CHECKPOINT_EXISTS - * The pool already has a checkpoint. A pools can only have one - * checkpoint at most, at any given time. - * - * ZFS_ERR_DISCARDING_CHECKPOINT - * ZFS is in the middle of discarding a checkpoint for this pool. - * The pool can be checkpointed again once the discard is done. - * - * ZFS_DEVRM_IN_PROGRESS - * A vdev is currently being removed. The pool cannot be - * checkpointed until the device removal is done. - * - * ZFS_VDEV_TOO_BIG - * One or more top-level vdevs exceed the maximum vdev size - * supported for this feature. - */ -int -lzc_pool_checkpoint(const char *pool) -{ - int error; - - nvlist_t *result = NULL; - nvlist_t *args = fnvlist_alloc(); - - error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result); - - fnvlist_free(args); - fnvlist_free(result); - - return (error); -} - -/* - * Discard the checkpoint from the specified pool. - * - * If this function returns 0 the checkpoint was successfully discarded. - * - * This method may also return: - * - * ZFS_ERR_NO_CHECKPOINT - * The pool does not have a checkpoint. - * - * ZFS_ERR_DISCARDING_CHECKPOINT - * ZFS is already in the middle of discarding the checkpoint. - */ -int -lzc_pool_checkpoint_discard(const char *pool) -{ - int error; - - nvlist_t *result = NULL; - nvlist_t *args = fnvlist_alloc(); - - error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result); - - fnvlist_free(args); - fnvlist_free(result); - - return (error); -} - -/* - * Executes a read-only channel program. - * - * A read-only channel program works programmatically the same way as a - * normal channel program executed with lzc_channel_program(). The only - * difference is it runs exclusively in open-context and therefore can - * return faster. The downside to that, is that the program cannot change - * on-disk state by calling functions from the zfs.sync submodule. - * - * The return values of this function (and their meaning) are exactly the - * same as the ones described in lzc_channel_program(). - */ -int -lzc_channel_program_nosync(const char *pool, const char *program, - uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) -{ - return (lzc_channel_program_impl(pool, program, B_FALSE, timeout, - memlimit, argnvl, outnvl)); -} - -/* - * Changes initializing state. - * - * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. - * The key is ignored. - * - * If there are errors related to vdev arguments, per-vdev errors are returned - * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where - * guid is stringified with PRIu64, and errno is one of the following as - * an int64_t: - * - ENODEV if the device was not found - * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) - * - EROFS if the device is not writeable - * - EBUSY start requested but the device is already being initialized - * - ESRCH cancel/suspend requested but device is not being initialized - * - * If the errlist is empty, then return value will be: - * - EINVAL if one or more arguments was invalid - * - Other spa_open failures - * - 0 if the operation succeeded - */ -int -lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, - nvlist_t *vdevs, nvlist_t **errlist) -{ - int error; - nvlist_t *args = fnvlist_alloc(); - fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type); - fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs); - - error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist); - - fnvlist_free(args); - - return (error); -} - -/* - * Set the bootenv contents for the given pool. - */ -int -lzc_set_bootenv(const char *pool, const char *env) -{ - nvlist_t *args = fnvlist_alloc(); - fnvlist_add_string(args, "envmap", env); - int error = lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, args, NULL); - fnvlist_free(args); - return (error); -} - -/* - * Get the contents of the bootenv of the given pool. - */ -int -lzc_get_bootenv(const char *pool, nvlist_t **outnvl) -{ - return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h deleted file mode 100644 index 76c4fa1bf6b4..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012, 2020 by Delphix. All rights reserved. - * Copyright (c) 2013 by Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Copyright 2017 RackTop Systems. - * Copyright (c) 2017 Datto Inc. - */ - -#ifndef _LIBZFS_CORE_H -#define _LIBZFS_CORE_H - -#include <libnvpair.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/fs/zfs.h> - - -#ifdef __cplusplus -extern "C" { -#endif - -int libzfs_core_init(void); -void libzfs_core_fini(void); - -/* - * NB: this type should be kept binary compatible with dmu_objset_type_t. - */ -enum lzc_dataset_type { - LZC_DATSET_TYPE_ZFS = 2, - LZC_DATSET_TYPE_ZVOL -}; - -int lzc_remap(const char *fsname); -int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); -int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *); -int lzc_clone(const char *, const char *, nvlist_t *); -int lzc_promote(const char *, char *, int); -int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); -int lzc_bookmark(nvlist_t *, nvlist_t **); -int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); -int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); -int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *, - nvlist_t **); - -int lzc_snaprange_space(const char *, const char *, uint64_t *); - -int lzc_hold(nvlist_t *, int, nvlist_t **); -int lzc_release(nvlist_t *, nvlist_t **); -int lzc_get_holds(const char *, nvlist_t **); - -enum lzc_send_flags { - LZC_SEND_FLAG_EMBED_DATA = 1 << 0, - LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1, - LZC_SEND_FLAG_COMPRESS = 1 << 2 -}; - -int lzc_send(const char *, const char *, int, enum lzc_send_flags); -int lzc_send_resume(const char *, const char *, int, - enum lzc_send_flags, uint64_t, uint64_t); -int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); - -struct dmu_replay_record; - -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); -int lzc_receive_resumable(const char *, nvlist_t *, const char *, - boolean_t, int); -int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int, const struct dmu_replay_record *); - -boolean_t lzc_exists(const char *); - -int lzc_rollback(const char *, char *, int); -int lzc_rollback_to(const char *, const char *); - -int lzc_sync(const char *, nvlist_t *, nvlist_t **); - -int lzc_rename(const char *, const char *); -int lzc_destroy(const char *); - -int lzc_channel_program(const char *, const char *, uint64_t, - uint64_t, nvlist_t *, nvlist_t **); -int lzc_channel_program_nosync(const char *, const char *, uint64_t, - uint64_t, nvlist_t *, nvlist_t **); - -int lzc_pool_checkpoint(const char *); -int lzc_pool_checkpoint_discard(const char *); - -int lzc_set_bootenv(const char *, const char *); -int lzc_get_bootenv(const char *, nvlist_t **); -#ifdef __cplusplus -} -#endif - -#endif /* _LIBZFS_CORE_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c deleted file mode 100644 index a3b872ee29da..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - */ - -#include <sys/zfs_ioctl.h> -#include <zfs_ioctl_compat.h> -#include "libzfs_core_compat.h" - -extern int zfs_ioctl_version; - -int -lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source) -{ - nvlist_t *nvl = NULL; - nvpair_t *pair, *hpair; - char *buf, *val; - zfs_ioc_t vecnum; - uint32_t type32; - int32_t cleanup_fd; - int error = 0; - int pos; - - if (zfs_ioctl_version >= ZFS_IOCVER_LZC) - return (0); - - vecnum = *ioc; - - switch (vecnum) { - case ZFS_IOC_CREATE: - type32 = fnvlist_lookup_int32(*source, "type"); - zc->zc_objset_type = (uint64_t)type32; - nvlist_lookup_nvlist(*source, "props", &nvl); - *source = nvl; - break; - case ZFS_IOC_CLONE: - buf = fnvlist_lookup_string(*source, "origin"); - strlcpy(zc->zc_value, buf, MAXPATHLEN); - nvlist_lookup_nvlist(*source, "props", &nvl); - *ioc = ZFS_IOC_CREATE; - *source = nvl; - break; - case ZFS_IOC_SNAPSHOT: - nvl = fnvlist_lookup_nvlist(*source, "snaps"); - pair = nvlist_next_nvpair(nvl, NULL); - if (pair != NULL) { - buf = nvpair_name(pair); - pos = strcspn(buf, "@"); - strlcpy(zc->zc_name, buf, pos + 1); - strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); - } else - error = EINVAL; - /* old kernel cannot create multiple snapshots */ - if (!error && nvlist_next_nvpair(nvl, pair) != NULL) - error = EOPNOTSUPP; - nvlist_free(nvl); - nvl = NULL; - nvlist_lookup_nvlist(*source, "props", &nvl); - *source = nvl; - break; - case ZFS_IOC_SPACE_SNAPS: - buf = fnvlist_lookup_string(*source, "firstsnap"); - strlcpy(zc->zc_value, buf, MAXPATHLEN); - break; - case ZFS_IOC_DESTROY_SNAPS: - nvl = fnvlist_lookup_nvlist(*source, "snaps"); - pair = nvlist_next_nvpair(nvl, NULL); - if (pair != NULL) { - buf = nvpair_name(pair); - pos = strcspn(buf, "@"); - strlcpy(zc->zc_name, buf, pos + 1); - } else - error = EINVAL; - /* old kernel cannot atomically destroy multiple snaps */ - if (!error && nvlist_next_nvpair(nvl, pair) != NULL) - error = EOPNOTSUPP; - *source = nvl; - break; - case ZFS_IOC_HOLD: - nvl = fnvlist_lookup_nvlist(*source, "holds"); - pair = nvlist_next_nvpair(nvl, NULL); - if (pair != NULL) { - buf = nvpair_name(pair); - pos = strcspn(buf, "@"); - strlcpy(zc->zc_name, buf, pos + 1); - strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); - if (nvpair_value_string(pair, &val) == 0) - strlcpy(zc->zc_string, val, MAXNAMELEN); - else - error = EINVAL; - } else - error = EINVAL; - /* old kernel cannot atomically create multiple holds */ - if (!error && nvlist_next_nvpair(nvl, pair) != NULL) - error = EOPNOTSUPP; - nvlist_free(nvl); - if (nvlist_lookup_int32(*source, "cleanup_fd", - &cleanup_fd) == 0) - zc->zc_cleanup_fd = cleanup_fd; - else - zc->zc_cleanup_fd = -1; - break; - case ZFS_IOC_RELEASE: - pair = nvlist_next_nvpair(*source, NULL); - if (pair != NULL) { - buf = nvpair_name(pair); - pos = strcspn(buf, "@"); - strlcpy(zc->zc_name, buf, pos + 1); - strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); - if (nvpair_value_nvlist(pair, &nvl) == 0) { - hpair = nvlist_next_nvpair(nvl, NULL); - if (hpair != NULL) - strlcpy(zc->zc_string, - nvpair_name(hpair), MAXNAMELEN); - else - error = EINVAL; - if (!error && nvlist_next_nvpair(nvl, - hpair) != NULL) - error = EOPNOTSUPP; - } else - error = EINVAL; - } else - error = EINVAL; - /* old kernel cannot atomically release multiple holds */ - if (!error && nvlist_next_nvpair(nvl, pair) != NULL) - error = EOPNOTSUPP; - break; - } - - return (error); -} - -void -lzc_compat_post(zfs_cmd_t *zc, const zfs_ioc_t ioc) -{ - if (zfs_ioctl_version >= ZFS_IOCVER_LZC) - return; - - switch (ioc) { - case ZFS_IOC_CREATE: - case ZFS_IOC_CLONE: - case ZFS_IOC_SNAPSHOT: - case ZFS_IOC_SPACE_SNAPS: - case ZFS_IOC_DESTROY_SNAPS: - zc->zc_nvlist_dst_filled = B_FALSE; - break; - } -} - -int -lzc_compat_outnvl(zfs_cmd_t *zc, const zfs_ioc_t ioc, nvlist_t **outnvl) -{ - nvlist_t *nvl; - - if (zfs_ioctl_version >= ZFS_IOCVER_LZC) - return (0); - - switch (ioc) { - case ZFS_IOC_SPACE_SNAPS: - nvl = fnvlist_alloc(); - fnvlist_add_uint64(nvl, "used", zc->zc_cookie); - fnvlist_add_uint64(nvl, "compressed", zc->zc_objset_type); - fnvlist_add_uint64(nvl, "uncompressed", zc->zc_perm_action); - *outnvl = nvl; - break; - } - - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h deleted file mode 100644 index 6527c4b2576f..000000000000 --- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2013 by Martin Matuska <mm@FreeBSD.org>. All rights reserved. - */ - -#ifndef _LIBZFS_CORE_COMPAT_H -#define _LIBZFS_CORE_COMPAT_H - -#include <libnvpair.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/fs/zfs.h> -#include <sys/zfs_ioctl.h> - -#ifdef __cplusplus -extern "C" { -#endif - -int lzc_compat_pre(zfs_cmd_t *, zfs_ioc_t *, nvlist_t **); -void lzc_compat_post(zfs_cmd_t *, const zfs_ioc_t); -int lzc_compat_outnvl(zfs_cmd_t *, const zfs_ioc_t, nvlist_t **); - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBZFS_CORE_COMPAT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c deleted file mode 100644 index 9b54e419705b..000000000000 --- a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c +++ /dev/null @@ -1,1238 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 by Delphix. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - */ - -#include <assert.h> -#include <fcntl.h> -#include <poll.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <zlib.h> -#include <libgen.h> -#include <sys/assfail.h> -#include <sys/spa.h> -#include <sys/stat.h> -#include <sys/processor.h> -#include <sys/zfs_context.h> -#include <sys/rrwlock.h> -#include <sys/zmod.h> -#include <sys/utsname.h> -#include <sys/systeminfo.h> -#include <libzfs.h> - -/* - * Emulation of kernel services in userland. - */ - -#ifndef __FreeBSD__ -int aok; -#endif -uint64_t physmem; -vnode_t *rootdir = (vnode_t *)0xabcd1234; -char hw_serial[HW_HOSTID_LEN]; -#ifdef illumos -kmutex_t cpu_lock; -#endif - -/* If set, all blocks read will be copied to the specified directory. */ -char *vn_dumpdir = NULL; - -struct utsname utsname = { - "userland", "libzpool", "1", "1", "na" -}; - -/* this only exists to have its address taken */ -struct proc p0; - -/* - * ========================================================================= - * threads - * ========================================================================= - */ -/*ARGSUSED*/ -kthread_t * -zk_thread_create(void (*func)(), void *arg) -{ - thread_t tid; - - VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, - &tid) == 0); - - return ((void *)(uintptr_t)tid); -} - -/* - * ========================================================================= - * kstats - * ========================================================================= - */ -/*ARGSUSED*/ -kstat_t * -kstat_create(char *module, int instance, char *name, char *class, - uchar_t type, ulong_t ndata, uchar_t ks_flag) -{ - return (NULL); -} - -/*ARGSUSED*/ -void -kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type) -{} - -/*ARGSUSED*/ -void -kstat_install(kstat_t *ksp) -{} - -/*ARGSUSED*/ -void -kstat_delete(kstat_t *ksp) -{} - -/* - * ========================================================================= - * mutexes - * ========================================================================= - */ -void -zmutex_init(kmutex_t *mp) -{ - mp->m_owner = NULL; - mp->initialized = B_TRUE; - (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); -} - -void -zmutex_destroy(kmutex_t *mp) -{ - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner == NULL); - (void) _mutex_destroy(&(mp)->m_lock); - mp->m_owner = (void *)-1UL; - mp->initialized = B_FALSE; -} - -int -zmutex_owned(kmutex_t *mp) -{ - ASSERT(mp->initialized == B_TRUE); - - return (mp->m_owner == curthread); -} - -void -mutex_enter(kmutex_t *mp) -{ - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - ASSERT(mp->m_owner != curthread); - VERIFY(mutex_lock(&mp->m_lock) == 0); - ASSERT(mp->m_owner == NULL); - mp->m_owner = curthread; -} - -int -mutex_tryenter(kmutex_t *mp) -{ - ASSERT(mp->initialized == B_TRUE); - ASSERT(mp->m_owner != (void *)-1UL); - if (0 == mutex_trylock(&mp->m_lock)) { - ASSERT(mp->m_owner == NULL); - mp->m_owner = curthread; - return (1); - } else { - return (0); - } -} - -void -mutex_exit(kmutex_t *mp) -{ - ASSERT(mp->initialized == B_TRUE); - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - VERIFY(mutex_unlock(&mp->m_lock) == 0); -} - -void * -mutex_owner(kmutex_t *mp) -{ - ASSERT(mp->initialized == B_TRUE); - return (mp->m_owner); -} - -/* - * ========================================================================= - * rwlocks - * ========================================================================= - */ -/*ARGSUSED*/ -void -rw_init(krwlock_t *rwlp, char *name, int type, void *arg) -{ - rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); - rwlp->rw_owner = NULL; - rwlp->initialized = B_TRUE; - rwlp->rw_count = 0; -} - -void -rw_destroy(krwlock_t *rwlp) -{ - ASSERT(rwlp->rw_count == 0); - rwlock_destroy(&rwlp->rw_lock); - rwlp->rw_owner = (void *)-1UL; - rwlp->initialized = B_FALSE; -} - -void -rw_enter(krwlock_t *rwlp, krw_t rw) -{ - //ASSERT(!RW_LOCK_HELD(rwlp)); - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - ASSERT(rwlp->rw_owner != curthread); - - if (rw == RW_READER) { - VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); - ASSERT(rwlp->rw_count >= 0); - atomic_add_int(&rwlp->rw_count, 1); - } else { - VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); - ASSERT(rwlp->rw_count == 0); - rwlp->rw_count = -1; - rwlp->rw_owner = curthread; - } -} - -void -rw_exit(krwlock_t *rwlp) -{ - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - - if (rwlp->rw_owner == curthread) { - /* Write locked. */ - ASSERT(rwlp->rw_count == -1); - rwlp->rw_count = 0; - rwlp->rw_owner = NULL; - } else { - /* Read locked. */ - ASSERT(rwlp->rw_count > 0); - atomic_add_int(&rwlp->rw_count, -1); - } - VERIFY(rw_unlock(&rwlp->rw_lock) == 0); -} - -int -rw_tryenter(krwlock_t *rwlp, krw_t rw) -{ - int rv; - - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - ASSERT(rwlp->rw_owner != curthread); - - if (rw == RW_READER) - rv = rw_tryrdlock(&rwlp->rw_lock); - else - rv = rw_trywrlock(&rwlp->rw_lock); - - if (rv == 0) { - ASSERT(rwlp->rw_owner == NULL); - if (rw == RW_READER) { - ASSERT(rwlp->rw_count >= 0); - atomic_add_int(&rwlp->rw_count, 1); - } else { - ASSERT(rwlp->rw_count == 0); - rwlp->rw_count = -1; - rwlp->rw_owner = curthread; - } - return (1); - } - - return (0); -} - -/*ARGSUSED*/ -int -rw_tryupgrade(krwlock_t *rwlp) -{ - ASSERT(rwlp->initialized == B_TRUE); - ASSERT(rwlp->rw_owner != (void *)-1UL); - - return (0); -} - -int -rw_lock_held(krwlock_t *rwlp) -{ - - return (rwlp->rw_count != 0); -} - -/* - * ========================================================================= - * condition variables - * ========================================================================= - */ -/*ARGSUSED*/ -void -cv_init(kcondvar_t *cv, char *name, int type, void *arg) -{ - VERIFY(cond_init(cv, name, NULL) == 0); -} - -void -cv_destroy(kcondvar_t *cv) -{ - VERIFY(cond_destroy(cv) == 0); -} - -void -cv_wait(kcondvar_t *cv, kmutex_t *mp) -{ - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - int ret = cond_wait(cv, &mp->m_lock); - VERIFY(ret == 0 || ret == EINTR); - mp->m_owner = curthread; -} - -/* - * NB: this emulates FreeBSD cv_wait_sig(9), not the illumos one. - * Meanings of the return code are different. - * NB: this does not actually catch any signals. - */ -int -cv_wait_sig(kcondvar_t *cv, kmutex_t *mp) -{ - cv_wait(cv, mp); - return (0); -} - -clock_t -cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) -{ - int error; - struct timespec ts; - struct timeval tv; - clock_t delta; - - abstime += ddi_get_lbolt(); -top: - delta = abstime - ddi_get_lbolt(); - if (delta <= 0) - return (-1); - - if (gettimeofday(&tv, NULL) != 0) - assert(!"gettimeofday() failed"); - - ts.tv_sec = tv.tv_sec + delta / hz; - ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); - ASSERT(ts.tv_nsec >= 0); - - if (ts.tv_nsec >= NANOSEC) { - ts.tv_sec++; - ts.tv_nsec -= NANOSEC; - } - - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); - mp->m_owner = curthread; - - if (error == EINTR) - goto top; - - if (error == ETIMEDOUT) - return (-1); - - ASSERT(error == 0); - - return (1); -} - -/*ARGSUSED*/ -clock_t -cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, - int flag) -{ - int error; - timespec_t ts; - hrtime_t delta; - - ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE); - -top: - delta = tim; - if (flag & CALLOUT_FLAG_ABSOLUTE) - delta -= gethrtime(); - - if (delta <= 0) - return (-1); - - clock_gettime(CLOCK_REALTIME, &ts); - ts.tv_sec += delta / NANOSEC; - ts.tv_nsec += delta % NANOSEC; - if (ts.tv_nsec >= NANOSEC) { - ts.tv_sec++; - ts.tv_nsec -= NANOSEC; - } - - ASSERT(mutex_owner(mp) == curthread); - mp->m_owner = NULL; - error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); - mp->m_owner = curthread; - - if (error == ETIMEDOUT) - return (-1); - - if (error == EINTR) - goto top; - - ASSERT(error == 0); - - return (1); -} - -void -cv_signal(kcondvar_t *cv) -{ - VERIFY(cond_signal(cv) == 0); -} - -void -cv_broadcast(kcondvar_t *cv) -{ - VERIFY(cond_broadcast(cv) == 0); -} - -/* - * ========================================================================= - * vnode operations - * ========================================================================= - */ -/* - * Note: for the xxxat() versions of these functions, we assume that the - * starting vp is always rootdir (which is true for spa_directory.c, the only - * ZFS consumer of these interfaces). We assert this is true, and then emulate - * them by adding '/' in front of the path. - */ - -/*ARGSUSED*/ -int -vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) -{ - int fd; - int dump_fd; - vnode_t *vp; - int old_umask; - char realpath[MAXPATHLEN]; - struct stat64 st; - - /* - * If we're accessing a real disk from userland, we need to use - * the character interface to avoid caching. This is particularly - * important if we're trying to look at a real in-kernel storage - * pool from userland, e.g. via zdb, because otherwise we won't - * see the changes occurring under the segmap cache. - * On the other hand, the stupid character device returns zero - * for its size. So -- gag -- we open the block device to get - * its size, and remember it for subsequent VOP_GETATTR(). - */ - if (strncmp(path, "/dev/", 5) == 0) { - char *dsk; - fd = open64(path, O_RDONLY); - if (fd == -1) - return (errno); - if (fstat64(fd, &st) == -1) { - close(fd); - return (errno); - } - close(fd); - (void) sprintf(realpath, "%s", path); - dsk = strstr(path, "/dsk/"); - if (dsk != NULL) - (void) sprintf(realpath + (dsk - path) + 1, "r%s", - dsk + 1); - } else { - (void) sprintf(realpath, "%s", path); - if (!(flags & FCREAT) && stat64(realpath, &st) == -1) - return (errno); - } - - if (flags & FCREAT) - old_umask = umask(0); - - /* - * The construct 'flags - FREAD' conveniently maps combinations of - * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. - */ - fd = open64(realpath, flags - FREAD, mode); - - if (flags & FCREAT) - (void) umask(old_umask); - - if (vn_dumpdir != NULL) { - char dumppath[MAXPATHLEN]; - (void) snprintf(dumppath, sizeof (dumppath), - "%s/%s", vn_dumpdir, basename(realpath)); - dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); - if (dump_fd == -1) - return (errno); - } else { - dump_fd = -1; - } - - if (fd == -1) - return (errno); - - if (fstat64(fd, &st) == -1) { - close(fd); - return (errno); - } - - (void) fcntl(fd, F_SETFD, FD_CLOEXEC); - - *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); - - vp->v_fd = fd; - vp->v_size = st.st_size; - vp->v_path = spa_strdup(path); - vp->v_dump_fd = dump_fd; - - return (0); -} - -/*ARGSUSED*/ -int -vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, - int x3, vnode_t *startvp, int fd) -{ - char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); - int ret; - - ASSERT(startvp == rootdir); - (void) sprintf(realpath, "/%s", path); - - /* fd ignored for now, need if want to simulate nbmand support */ - ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); - - umem_free(realpath, strlen(path) + 2); - - return (ret); -} - -/*ARGSUSED*/ -int -vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, - int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) -{ - ssize_t iolen, split; - - if (uio == UIO_READ) { - iolen = pread64(vp->v_fd, addr, len, offset); - if (vp->v_dump_fd != -1) { - int status = - pwrite64(vp->v_dump_fd, addr, iolen, offset); - ASSERT(status != -1); - } - } else { - /* - * To simulate partial disk writes, we split writes into two - * system calls so that the process can be killed in between. - */ - int sectors = len >> SPA_MINBLOCKSHIFT; - split = (sectors > 0 ? rand() % sectors : 0) << - SPA_MINBLOCKSHIFT; - iolen = pwrite64(vp->v_fd, addr, split, offset); - iolen += pwrite64(vp->v_fd, (char *)addr + split, - len - split, offset + split); - } - - if (iolen == -1) - return (errno); - if (residp) - *residp = len - iolen; - else if (iolen != len) - return (EIO); - return (0); -} - -void -vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td) -{ - close(vp->v_fd); - if (vp->v_dump_fd != -1) - close(vp->v_dump_fd); - spa_strfree(vp->v_path); - umem_free(vp, sizeof (vnode_t)); -} - -/* - * At a minimum we need to update the size since vdev_reopen() - * will no longer call vn_openat(). - */ -int -fop_getattr(vnode_t *vp, vattr_t *vap) -{ - struct stat64 st; - - if (fstat64(vp->v_fd, &st) == -1) { - close(vp->v_fd); - return (errno); - } - - vap->va_size = st.st_size; - return (0); -} - -#ifdef ZFS_DEBUG - -/* - * ========================================================================= - * Figure out which debugging statements to print - * ========================================================================= - */ - -static char *dprintf_string; -static int dprintf_print_all; - -int -dprintf_find_string(const char *string) -{ - char *tmp_str = dprintf_string; - int len = strlen(string); - - /* - * Find out if this is a string we want to print. - * String format: file1.c,function_name1,file2.c,file3.c - */ - - while (tmp_str != NULL) { - if (strncmp(tmp_str, string, len) == 0 && - (tmp_str[len] == ',' || tmp_str[len] == '\0')) - return (1); - tmp_str = strchr(tmp_str, ','); - if (tmp_str != NULL) - tmp_str++; /* Get rid of , */ - } - return (0); -} - -void -dprintf_setup(int *argc, char **argv) -{ - int i, j; - - /* - * Debugging can be specified two ways: by setting the - * environment variable ZFS_DEBUG, or by including a - * "debug=..." argument on the command line. The command - * line setting overrides the environment variable. - */ - - for (i = 1; i < *argc; i++) { - int len = strlen("debug="); - /* First look for a command line argument */ - if (strncmp("debug=", argv[i], len) == 0) { - dprintf_string = argv[i] + len; - /* Remove from args */ - for (j = i; j < *argc; j++) - argv[j] = argv[j+1]; - argv[j] = NULL; - (*argc)--; - } - } - - if (dprintf_string == NULL) { - /* Look for ZFS_DEBUG environment variable */ - dprintf_string = getenv("ZFS_DEBUG"); - } - - /* - * Are we just turning on all debugging? - */ - if (dprintf_find_string("on")) - dprintf_print_all = 1; - - if (dprintf_string != NULL) - zfs_flags |= ZFS_DEBUG_DPRINTF; -} - -int -sysctl_handle_64(SYSCTL_HANDLER_ARGS) -{ - return (0); -} - -/* - * ========================================================================= - * debug printfs - * ========================================================================= - */ -void -__dprintf(const char *file, const char *func, int line, const char *fmt, ...) -{ - const char *newfile; - va_list adx; - - /* - * Get rid of annoying "../common/" prefix to filename. - */ - newfile = strrchr(file, '/'); - if (newfile != NULL) { - newfile = newfile + 1; /* Get rid of leading / */ - } else { - newfile = file; - } - - if (dprintf_print_all || - dprintf_find_string(newfile) || - dprintf_find_string(func)) { - /* Print out just the function name if requested */ - flockfile(stdout); - if (dprintf_find_string("pid")) - (void) printf("%d ", getpid()); - if (dprintf_find_string("tid")) - (void) printf("%lu ", thr_self()); -#if 0 - if (dprintf_find_string("cpu")) - (void) printf("%u ", getcpuid()); -#endif - if (dprintf_find_string("time")) - (void) printf("%llu ", gethrtime()); - if (dprintf_find_string("long")) - (void) printf("%s, line %d: ", newfile, line); - (void) printf("%s: ", func); - va_start(adx, fmt); - (void) vprintf(fmt, adx); - va_end(adx); - funlockfile(stdout); - } -} - -#endif /* ZFS_DEBUG */ - -/* - * ========================================================================= - * cmn_err() and panic() - * ========================================================================= - */ -static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; -static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; - -void -vpanic(const char *fmt, va_list adx) -{ - char buf[512]; - (void) vsnprintf(buf, 512, fmt, adx); - assfail(buf, NULL, 0); - abort(); /* necessary to make vpanic meet noreturn requirements */ -} - -void -panic(const char *fmt, ...) -{ - va_list adx; - - va_start(adx, fmt); - vpanic(fmt, adx); - va_end(adx); -} - -void -vcmn_err(int ce, const char *fmt, va_list adx) -{ - if (ce == CE_PANIC) - vpanic(fmt, adx); - if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ - (void) fprintf(stderr, "%s", ce_prefix[ce]); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "%s", ce_suffix[ce]); - } -} - -/*PRINTFLIKE2*/ -void -cmn_err(int ce, const char *fmt, ...) -{ - va_list adx; - - va_start(adx, fmt); - vcmn_err(ce, fmt, adx); - va_end(adx); -} - -/* - * ========================================================================= - * kobj interfaces - * ========================================================================= - */ -struct _buf * -kobj_open_file(char *name) -{ - struct _buf *file; - vnode_t *vp; - - /* set vp as the _fd field of the file */ - if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, - -1) != 0) - return ((void *)-1UL); - - file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); - file->_fd = (intptr_t)vp; - return (file); -} - -int -kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) -{ - ssize_t resid; - - vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, - UIO_SYSSPACE, 0, 0, 0, &resid); - - return (size - resid); -} - -void -kobj_close_file(struct _buf *file) -{ - vn_close((vnode_t *)file->_fd, 0, NULL, NULL); - umem_free(file, sizeof (struct _buf)); -} - -int -kobj_get_filesize(struct _buf *file, uint64_t *size) -{ - struct stat64 st; - vnode_t *vp = (vnode_t *)file->_fd; - - if (fstat64(vp->v_fd, &st) == -1) { - vn_close(vp, 0, NULL, NULL); - return (errno); - } - *size = st.st_size; - return (0); -} - -/* - * ========================================================================= - * misc routines - * ========================================================================= - */ - -void -delay(clock_t ticks) -{ - poll(0, 0, ticks * (1000 / hz)); -} - -#if 0 -/* - * Find highest one bit set. - * Returns bit number + 1 of highest bit that is set, otherwise returns 0. - */ -int -highbit64(uint64_t i) -{ - int h = 1; - - if (i == 0) - return (0); - if (i & 0xffffffff00000000ULL) { - h += 32; i >>= 32; - } - if (i & 0xffff0000) { - h += 16; i >>= 16; - } - if (i & 0xff00) { - h += 8; i >>= 8; - } - if (i & 0xf0) { - h += 4; i >>= 4; - } - if (i & 0xc) { - h += 2; i >>= 2; - } - if (i & 0x2) { - h += 1; - } - return (h); -} -#endif - -static int random_fd = -1, urandom_fd = -1; - -static int -random_get_bytes_common(uint8_t *ptr, size_t len, int fd) -{ - size_t resid = len; - ssize_t bytes; - - ASSERT(fd != -1); - - while (resid != 0) { - bytes = read(fd, ptr, resid); - ASSERT3S(bytes, >=, 0); - ptr += bytes; - resid -= bytes; - } - - return (0); -} - -int -random_get_bytes(uint8_t *ptr, size_t len) -{ - return (random_get_bytes_common(ptr, len, random_fd)); -} - -int -random_get_pseudo_bytes(uint8_t *ptr, size_t len) -{ - return (random_get_bytes_common(ptr, len, urandom_fd)); -} - -int -ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) -{ - char *end; - - *result = strtoul(hw_serial, &end, base); - if (*result == 0) - return (errno); - return (0); -} - -int -ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) -{ - char *end; - - *result = strtoull(str, &end, base); - if (*result == 0) - return (errno); - return (0); -} - -#ifdef illumos -/* ARGSUSED */ -cyclic_id_t -cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) -{ - return (1); -} - -/* ARGSUSED */ -void -cyclic_remove(cyclic_id_t id) -{ -} - -/* ARGSUSED */ -int -cyclic_reprogram(cyclic_id_t id, hrtime_t expiration) -{ - return (1); -} -#endif - -/* - * ========================================================================= - * kernel emulation setup & teardown - * ========================================================================= - */ -static int -umem_out_of_memory(void) -{ - char errmsg[] = "out of memory -- generating core dump\n"; - - write(fileno(stderr), errmsg, sizeof (errmsg)); - abort(); - return (0); -} - -void -kernel_init(int mode) -{ - extern uint_t rrw_tsd_key; - - umem_nofail_callback(umem_out_of_memory); - - physmem = sysconf(_SC_PHYS_PAGES); - - dprintf("physmem = %llu pages (%.2f GB)\n", physmem, - (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); - - (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", - (mode & FWRITE) ? get_system_hostid() : 0); - - VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); - VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); - - system_taskq_init(); - -#ifdef illumos - mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL); -#endif - - spa_init(mode); - - tsd_create(&rrw_tsd_key, rrw_tsd_destroy); -} - -void -kernel_fini(void) -{ - spa_fini(); - - system_taskq_fini(); - - close(random_fd); - close(urandom_fd); - - random_fd = -1; - urandom_fd = -1; -} - -/* ARGSUSED */ -uint32_t -zone_get_hostid(void *zonep) -{ - /* - * We're emulating the system's hostid in userland. - */ - return (strtoul(hw_serial, NULL, 10)); -} - -int -z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) -{ - int ret; - uLongf len = *dstlen; - - if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) - *dstlen = (size_t)len; - - return (ret); -} - -int -z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, - int level) -{ - int ret; - uLongf len = *dstlen; - - if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) - *dstlen = (size_t)len; - - return (ret); -} - -uid_t -crgetuid(cred_t *cr) -{ - return (0); -} - -uid_t -crgetruid(cred_t *cr) -{ - return (0); -} - -gid_t -crgetgid(cred_t *cr) -{ - return (0); -} - -int -crgetngroups(cred_t *cr) -{ - return (0); -} - -gid_t * -crgetgroups(cred_t *cr) -{ - return (NULL); -} - -int -zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) -{ - return (0); -} - -int -zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) -{ - return (0); -} - -int -zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) -{ - return (0); -} - -ksiddomain_t * -ksid_lookupdomain(const char *dom) -{ - ksiddomain_t *kd; - - kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); - kd->kd_name = spa_strdup(dom); - return (kd); -} - -void -ksiddomain_rele(ksiddomain_t *ksid) -{ - spa_strfree(ksid->kd_name); - umem_free(ksid, sizeof (ksiddomain_t)); -} - -/* - * Do not change the length of the returned string; it must be freed - * with strfree(). - */ -char * -kmem_asprintf(const char *fmt, ...) -{ - int size; - va_list adx; - char *buf; - - va_start(adx, fmt); - size = vsnprintf(NULL, 0, fmt, adx) + 1; - va_end(adx); - - buf = kmem_alloc(size, KM_SLEEP); - - va_start(adx, fmt); - size = vsnprintf(buf, size, fmt, adx); - va_end(adx); - - return (buf); -} - -/* ARGSUSED */ -int -zfs_onexit_fd_hold(int fd, minor_t *minorp) -{ - *minorp = 0; - return (0); -} - -/* ARGSUSED */ -void -zfs_onexit_fd_rele(int fd) -{ -} - -/* ARGSUSED */ -int -zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, - uint64_t *action_handle) -{ - return (0); -} - -/* ARGSUSED */ -int -zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) -{ - return (0); -} - -/* ARGSUSED */ -int -zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) -{ - return (0); -} - -#ifdef __FreeBSD__ -/* ARGSUSED */ -int -zvol_create_minors(const char *name) -{ - return (0); -} -#endif - -#ifdef illumos -void -bioinit(buf_t *bp) -{ - bzero(bp, sizeof (buf_t)); -} - -void -biodone(buf_t *bp) -{ - if (bp->b_iodone != NULL) { - (*(bp->b_iodone))(bp); - return; - } - ASSERT((bp->b_flags & B_DONE) == 0); - bp->b_flags |= B_DONE; -} - -void -bioerror(buf_t *bp, int error) -{ - ASSERT(bp != NULL); - ASSERT(error >= 0); - - if (error != 0) { - bp->b_flags |= B_ERROR; - } else { - bp->b_flags &= ~B_ERROR; - } - bp->b_error = error; -} - - -int -geterror(struct buf *bp) -{ - int error = 0; - - if (bp->b_flags & B_ERROR) { - error = bp->b_error; - if (!error) - error = EIO; - } - return (error); -} -#endif diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h deleted file mode 100644 index 6f1a17f27852..000000000000 --- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h +++ /dev/null @@ -1,838 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. - */ -/* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - */ - -#ifndef _SYS_ZFS_CONTEXT_H -#define _SYS_ZFS_CONTEXT_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define _SYS_MUTEX_H -#define _SYS_RWLOCK_H -#define _SYS_CONDVAR_H -#define _SYS_SYSTM_H -#define _SYS_T_LOCK_H -#define _SYS_VNODE_H -#define _SYS_VFS_H -#define _SYS_SUNDDI_H -#define _SYS_CALLB_H -#define _SYS_SCHED_H_ - -#include <solaris.h> -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <stdarg.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <strings.h> -#include <thread.h> -#include <assert.h> -#include <limits.h> -#include <dirent.h> -#include <time.h> -#include <math.h> -#include <umem.h> -#include <inttypes.h> -#include <fsshare.h> -#include <pthread.h> -#include <sched.h> -#include <setjmp.h> -#include <sys/debug.h> -#include <sys/note.h> -#include <sys/types.h> -#include <sys/cred.h> -#include <sys/atomic.h> -#include <sys/sysmacros.h> -#include <sys/bitmap.h> -#include <sys/resource.h> -#include <sys/byteorder.h> -#include <sys/list.h> -#include <sys/time.h> -#include <sys/uio.h> -#include <sys/mntent.h> -#include <sys/mnttab.h> -#include <sys/zfs_debug.h> -#include <sys/sdt.h> -#include <sys/kstat.h> -#include <sys/u8_textprep.h> -#include <sys/kernel.h> -#include <sys/disk.h> -#include <sys/sysevent.h> -#include <sys/sysevent/eventdefs.h> -#include <sys/sysevent/dev.h> -#include <machine/atomic.h> -#include <sys/debug.h> -#ifdef illumos -#include "zfs.h" -#endif - -#define ZFS_EXPORTS_PATH "/etc/zfs/exports" - -/* - * Debugging - */ - -/* - * Note that we are not using the debugging levels. - */ - -#define CE_CONT 0 /* continuation */ -#define CE_NOTE 1 /* notice */ -#define CE_WARN 2 /* warning */ -#define CE_PANIC 3 /* panic */ -#define CE_IGNORE 4 /* print nothing */ - -/* - * ZFS debugging - */ - -#define ZFS_LOG(...) do { } while (0) - -typedef u_longlong_t rlim64_t; -#define RLIM64_INFINITY ((rlim64_t)-3) - -#ifdef ZFS_DEBUG -extern void dprintf_setup(int *argc, char **argv); -#endif /* ZFS_DEBUG */ - -extern void cmn_err(int, const char *, ...); -extern void vcmn_err(int, const char *, __va_list); -extern void panic(const char *, ...) __NORETURN; -extern void vpanic(const char *, __va_list) __NORETURN; - -#define fm_panic panic - -extern int aok; - -/* - * DTrace SDT probes have different signatures in userland than they do in - * the kernel. If they're being used in kernel code, re-define them out of - * existence for their counterparts in libzpool. - * - * Here's an example of how to use the set-error probes in userland: - * zfs$target:::set-error /arg0 == EBUSY/ {stack();} - * - * Here's an example of how to use DTRACE_PROBE probes in userland: - * If there is a probe declared as follows: - * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn); - * Then you can use it as follows: - * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/ - * {printf("%u %p\n", arg1, arg2);} - */ - -#ifdef DTRACE_PROBE -#undef DTRACE_PROBE -#endif /* DTRACE_PROBE */ -#ifdef illumos -#define DTRACE_PROBE(a) \ - ZFS_PROBE0(#a) -#endif - -#ifdef DTRACE_PROBE1 -#undef DTRACE_PROBE1 -#endif /* DTRACE_PROBE1 */ -#ifdef illumos -#define DTRACE_PROBE1(a, b, c) \ - ZFS_PROBE1(#a, (unsigned long)c) -#endif - -#ifdef DTRACE_PROBE2 -#undef DTRACE_PROBE2 -#endif /* DTRACE_PROBE2 */ -#ifdef illumos -#define DTRACE_PROBE2(a, b, c, d, e) \ - ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e) -#endif - -#ifdef DTRACE_PROBE3 -#undef DTRACE_PROBE3 -#endif /* DTRACE_PROBE3 */ -#ifdef illumos -#define DTRACE_PROBE3(a, b, c, d, e, f, g) \ - ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g) -#endif - -#ifdef DTRACE_PROBE4 -#undef DTRACE_PROBE4 -#endif /* DTRACE_PROBE4 */ -#ifdef illumos -#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \ - ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \ - (unsigned long)i) -#endif - -#ifdef illumos -/* - * We use the comma operator so that this macro can be used without much - * additional code. For example, "return (EINVAL);" becomes - * "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated - * twice, so it should not have side effects (e.g. something like: - * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice). - */ -#define SET_ERROR(err) (ZFS_SET_ERROR(err), err) -#else /* !illumos */ - -#define DTRACE_PROBE(a) ((void)0) -#define DTRACE_PROBE1(a, b, c) ((void)0) -#define DTRACE_PROBE2(a, b, c, d, e) ((void)0) -#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0) -#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0) - -#define SET_ERROR(err) (err) -#endif /* !illumos */ - -/* - * Threads - */ -#define curthread ((void *)(uintptr_t)thr_self()) - -#define kpreempt(x) sched_yield() - -typedef struct kthread kthread_t; - -#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ - zk_thread_create(func, arg) -#define thread_exit() thr_exit(NULL) -#define thread_join(t) panic("libzpool cannot join threads") - -#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) - -/* in libzpool, p0 exists only to have its address taken */ -struct proc { - uintptr_t this_is_never_used_dont_dereference_it; -}; - -extern struct proc p0; -#define curproc (&p0) - -#define PS_NONE -1 - -extern kthread_t *zk_thread_create(void (*func)(void*), void *arg); - -#define issig(why) (FALSE) -#define ISSIG(thr, why) (FALSE) - -/* - * Mutexes - */ -typedef struct kmutex { - void *m_owner; - boolean_t initialized; - mutex_t m_lock; -} kmutex_t; - -#define MUTEX_DEFAULT USYNC_THREAD -#undef MUTEX_HELD -#undef MUTEX_NOT_HELD -#define MUTEX_HELD(m) ((m)->m_owner == curthread) -#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m)) -#define _mutex_held(m) pthread_mutex_isowned_np(m) - -/* - * Argh -- we have to get cheesy here because the kernel and userland - * have different signatures for the same routine. - */ -//extern int _mutex_init(mutex_t *mp, int type, void *arg); -//extern int _mutex_destroy(mutex_t *mp); -//extern int _mutex_owned(mutex_t *mp); - -#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp)) -#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp)) -#define mutex_owned(mp) zmutex_owned((kmutex_t *)(mp)) - -extern void zmutex_init(kmutex_t *mp); -extern void zmutex_destroy(kmutex_t *mp); -extern int zmutex_owned(kmutex_t *mp); -extern void mutex_enter(kmutex_t *mp); -extern void mutex_exit(kmutex_t *mp); -extern int mutex_tryenter(kmutex_t *mp); -extern void *mutex_owner(kmutex_t *mp); - -/* - * RW locks - */ -typedef struct krwlock { - int rw_count; - void *rw_owner; - boolean_t initialized; - rwlock_t rw_lock; -} krwlock_t; - -typedef int krw_t; - -#define RW_READER 0 -#define RW_WRITER 1 -#define RW_DEFAULT USYNC_THREAD - -#undef RW_READ_HELD -#define RW_READ_HELD(x) ((x)->rw_owner == NULL && (x)->rw_count > 0) - -#undef RW_WRITE_HELD -#define RW_WRITE_HELD(x) ((x)->rw_owner == curthread) -#define RW_LOCK_HELD(x) rw_lock_held(x) - -#undef RW_LOCK_HELD -#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) - -extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); -extern void rw_destroy(krwlock_t *rwlp); -extern void rw_enter(krwlock_t *rwlp, krw_t rw); -extern int rw_tryenter(krwlock_t *rwlp, krw_t rw); -extern int rw_tryupgrade(krwlock_t *rwlp); -extern void rw_exit(krwlock_t *rwlp); -extern int rw_lock_held(krwlock_t *rwlp); -#define rw_downgrade(rwlp) do { } while (0) - -extern uid_t crgetuid(cred_t *cr); -extern uid_t crgetruid(cred_t *cr); -extern gid_t crgetgid(cred_t *cr); -extern int crgetngroups(cred_t *cr); -extern gid_t *crgetgroups(cred_t *cr); - -/* - * Condition variables - */ -typedef cond_t kcondvar_t; - -#define CV_DEFAULT USYNC_THREAD -#define CALLOUT_FLAG_ABSOLUTE 0x2 - -extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); -extern void cv_destroy(kcondvar_t *cv); -extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); -extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp); -extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); -#define cv_timedwait_sig(cvp, mp, t) cv_timedwait(cvp, mp, t) -extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, - hrtime_t res, int flag); -#define cv_timedwait_sig_hires(cvp, mp, t, r, f) \ - cv_timedwait_hires(cvp, mp, t, r, f) -extern void cv_signal(kcondvar_t *cv); -extern void cv_broadcast(kcondvar_t *cv); - -/* - * Thread-specific data - */ -#define tsd_get(k) pthread_getspecific(k) -#define tsd_set(k, v) pthread_setspecific(k, v) -#define tsd_create(kp, d) pthread_key_create(kp, d) -#define tsd_destroy(kp) /* nothing */ - -/* - * Kernel memory - */ -#define KM_SLEEP UMEM_NOFAIL -#define KM_PUSHPAGE KM_SLEEP -#define KM_NOSLEEP UMEM_DEFAULT -#define KM_NORMALPRI 0 /* not needed with UMEM_DEFAULT */ -#define KMC_NODEBUG UMC_NODEBUG -#define KMC_NOTOUCH 0 /* not needed for userland caches */ -#define KM_NODEBUG 0 -#define kmem_alloc(_s, _f) umem_alloc(_s, _f) -#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f) -#define kmem_free(_b, _s) umem_free(_b, _s) -#define kmem_size() (physmem * PAGESIZE) -#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ - umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) -#define kmem_cache_destroy(_c) umem_cache_destroy(_c) -#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f) -#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b) -#define kmem_debugging() 0 -#define kmem_cache_reap_active() (B_FALSE) -#define kmem_cache_reap_soon(_c) /* nothing */ -#define kmem_cache_set_move(_c, _cb) /* nothing */ -#define POINTER_INVALIDATE(_pp) /* nothing */ -#define POINTER_IS_VALID(_p) 0 - -typedef umem_cache_t kmem_cache_t; - -typedef enum kmem_cbrc { - KMEM_CBRC_YES, - KMEM_CBRC_NO, - KMEM_CBRC_LATER, - KMEM_CBRC_DONT_NEED, - KMEM_CBRC_DONT_KNOW -} kmem_cbrc_t; - -/* - * Task queues - */ -typedef struct taskq taskq_t; -typedef uintptr_t taskqid_t; -typedef void (task_func_t)(void *); - -typedef struct taskq_ent { - struct taskq_ent *tqent_next; - struct taskq_ent *tqent_prev; - task_func_t *tqent_func; - void *tqent_arg; - uintptr_t tqent_flags; -} taskq_ent_t; - -#define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */ - -#define TASKQ_PREPOPULATE 0x0001 -#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */ -#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */ -#define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */ -#define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */ - -#define TQ_SLEEP KM_SLEEP /* Can block for memory */ -#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */ -#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ -#define TQ_FRONT 0x08 /* Queue in front */ - -#define TASKQID_INVALID ((taskqid_t)0) - -extern taskq_t *system_taskq; - -extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); -#define taskq_create_proc(a, b, c, d, e, p, f) \ - (taskq_create(a, b, c, d, e, f)) -#define taskq_create_sysdc(a, b, d, e, p, dc, f) \ - (taskq_create(a, b, maxclsyspri, d, e, f)) -extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); -extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t, - taskq_ent_t *); -extern void taskq_destroy(taskq_t *); -extern void taskq_wait(taskq_t *); -extern void taskq_wait_id(taskq_t *, taskqid_t); -extern int taskq_member(taskq_t *, void *); -extern void system_taskq_init(void); -extern void system_taskq_fini(void); - -#define taskq_dispatch_safe(tq, func, arg, flags, task) \ - taskq_dispatch((tq), (func), (arg), (flags)) - -#define XVA_MAPSIZE 3 -#define XVA_MAGIC 0x78766174 - -/* - * vnodes - */ -typedef struct vnode { - uint64_t v_size; - int v_fd; - char *v_path; - int v_dump_fd; -} vnode_t; - -extern char *vn_dumpdir; -#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ - -typedef struct xoptattr { - timestruc_t xoa_createtime; /* Create time of file */ - uint8_t xoa_archive; - uint8_t xoa_system; - uint8_t xoa_readonly; - uint8_t xoa_hidden; - uint8_t xoa_nounlink; - uint8_t xoa_immutable; - uint8_t xoa_appendonly; - uint8_t xoa_nodump; - uint8_t xoa_settable; - uint8_t xoa_opaque; - uint8_t xoa_av_quarantined; - uint8_t xoa_av_modified; - uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ]; - uint8_t xoa_reparse; - uint8_t xoa_offline; - uint8_t xoa_sparse; -} xoptattr_t; - -typedef struct vattr { - uint_t va_mask; /* bit-mask of attributes */ - u_offset_t va_size; /* file size in bytes */ -} vattr_t; - - -typedef struct xvattr { - vattr_t xva_vattr; /* Embedded vattr structure */ - uint32_t xva_magic; /* Magic Number */ - uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */ - uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */ - uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */ - uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */ - xoptattr_t xva_xoptattrs; /* Optional attributes */ -} xvattr_t; - -typedef struct vsecattr { - uint_t vsa_mask; /* See below */ - int vsa_aclcnt; /* ACL entry count */ - void *vsa_aclentp; /* pointer to ACL entries */ - int vsa_dfaclcnt; /* default ACL entry count */ - void *vsa_dfaclentp; /* pointer to default ACL entries */ - size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */ -} vsecattr_t; - -#define AT_TYPE 0x00001 -#define AT_MODE 0x00002 -#define AT_UID 0x00004 -#define AT_GID 0x00008 -#define AT_FSID 0x00010 -#define AT_NODEID 0x00020 -#define AT_NLINK 0x00040 -#define AT_SIZE 0x00080 -#define AT_ATIME 0x00100 -#define AT_MTIME 0x00200 -#define AT_CTIME 0x00400 -#define AT_RDEV 0x00800 -#define AT_BLKSIZE 0x01000 -#define AT_NBLOCKS 0x02000 -#define AT_SEQ 0x08000 -#define AT_XVATTR 0x10000 - -#define CRCREAT 0 - -extern int fop_getattr(vnode_t *vp, vattr_t *vap); - -#define VOP_CLOSE(vp, f, c, o, cr, ct) 0 -#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0 -#define VOP_GETATTR(vp, vap, cr) fop_getattr((vp), (vap)); - -#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) - -#define VN_RELE(vp) vn_close(vp, 0, NULL, NULL) -#define VN_RELE_ASYNC(vp, taskq) vn_close(vp, 0, NULL, NULL) - -#define vn_lock(vp, type) -#define VOP_UNLOCK(vp) - -extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, - int x2, int x3); -extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp, - int x2, int x3, vnode_t *vp, int fd); -extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, - offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp); -extern void vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td); - -#define vn_remove(path, x1, x2) remove(path) -#define vn_rename(from, to, seg) rename((from), (to)) -#define vn_is_readonly(vp) B_FALSE - -extern vnode_t *rootdir; - -#include <sys/file.h> /* for FREAD, FWRITE, etc */ -#define FTRUNC O_TRUNC - -/* - * Random stuff - */ -#define ddi_get_lbolt() (gethrtime() >> 23) -#define ddi_get_lbolt64() (gethrtime() >> 23) -#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */ - -extern void delay(clock_t ticks); - -#define SEC_TO_TICK(sec) ((sec) * hz) -#define NSEC_TO_TICK(nsec) ((nsec) / (NANOSEC / hz)) - -#define gethrestime_sec() time(NULL) -#define gethrestime(t) \ - do {\ - (t)->tv_sec = gethrestime_sec();\ - (t)->tv_nsec = 0;\ - } while (0); - -#define max_ncpus 64 -#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN)) - -#define minclsyspri 60 -#define maxclsyspri 99 - -#define CPU_SEQID (thr_self() & (max_ncpus - 1)) - -#define kcred NULL -#define CRED() NULL - -#ifndef ptob -#define ptob(x) ((x) * PAGESIZE) -#endif - -extern uint64_t physmem; - -extern int highbit64(uint64_t i); -extern int random_get_bytes(uint8_t *ptr, size_t len); -extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); - -extern void kernel_init(int); -extern void kernel_fini(void); - -struct spa; -extern void nicenum(uint64_t num, char *buf, size_t); -extern void show_pool_stats(struct spa *); -extern int set_global_var(char *arg); - -typedef struct callb_cpr { - kmutex_t *cc_lockp; -} callb_cpr_t; - -#define CALLB_CPR_INIT(cp, lockp, func, name) { \ - (cp)->cc_lockp = lockp; \ -} - -#define CALLB_CPR_SAFE_BEGIN(cp) { \ - ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ -} - -#define CALLB_CPR_SAFE_END(cp, lockp) { \ - ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ -} - -#define CALLB_CPR_EXIT(cp) { \ - ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ - mutex_exit((cp)->cc_lockp); \ -} - -#define zone_dataset_visible(x, y) (1) -#define INGLOBALZONE(z) (1) -extern uint32_t zone_get_hostid(void *zonep); - -extern char *kmem_asprintf(const char *fmt, ...); -#define strfree(str) kmem_free((str), strlen(str) + 1) - -/* - * Hostname information - */ -extern struct utsname utsname; -extern char hw_serial[]; /* for userland-emulated hostid access */ -extern int ddi_strtoul(const char *str, char **nptr, int base, - unsigned long *result); - -extern int ddi_strtoull(const char *str, char **nptr, int base, - u_longlong_t *result); - -/* ZFS Boot Related stuff. */ - -struct _buf { - intptr_t _fd; -}; - -struct bootstat { - uint64_t st_size; -}; - -typedef struct ace_object { - uid_t a_who; - uint32_t a_access_mask; - uint16_t a_flags; - uint16_t a_type; - uint8_t a_obj_type[16]; - uint8_t a_inherit_obj_type[16]; -} ace_object_t; - - -#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 -#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06 -#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 -#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 - -extern struct _buf *kobj_open_file(char *name); -extern int kobj_read_file(struct _buf *file, char *buf, unsigned size, - unsigned off); -extern void kobj_close_file(struct _buf *file); -extern int kobj_get_filesize(struct _buf *file, uint64_t *size); -extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); -extern int zfs_secpolicy_rename_perms(const char *from, const char *to, - cred_t *cr); -extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); -extern zoneid_t getzoneid(void); -/* Random compatibility stuff. */ -#define pwrite64(d, p, n, o) pwrite(d, p, n, o) -#define readdir64(d) readdir(d) -#define SIGPENDING(td) (0) -#define root_mount_wait() do { } while (0) -#define root_mounted() (1) - -#define noinline __attribute__((noinline)) -#define likely(x) __builtin_expect((x), 1) - -struct file { - void *dummy; -}; - -#define FCREAT O_CREAT -#define FOFFMAX 0x0 - -/* SID stuff */ -typedef struct ksiddomain { - uint_t kd_ref; - uint_t kd_len; - char *kd_name; -} ksiddomain_t; - -ksiddomain_t *ksid_lookupdomain(const char *); -void ksiddomain_rele(ksiddomain_t *); - -typedef uint32_t idmap_rid_t; - -#define DDI_SLEEP KM_SLEEP -#define ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) (0) - -#define SX_SYSINIT(name, lock, desc) - -#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \ - intptr_t arg2, struct sysctl_req *req - -/* - * This describes the access space for a sysctl request. This is needed - * so that we can use the interface from the kernel or from user-space. - */ -struct sysctl_req { - struct thread *td; /* used for access checking */ - int lock; /* wiring state */ - void *oldptr; - size_t oldlen; - size_t oldidx; - int (*oldfunc)(struct sysctl_req *, const void *, size_t); - void *newptr; - size_t newlen; - size_t newidx; - int (*newfunc)(struct sysctl_req *, void *, size_t); - size_t validlen; - int flags; -}; - -SLIST_HEAD(sysctl_oid_list, sysctl_oid); - -/* - * This describes one "oid" in the MIB tree. Potentially more nodes can - * be hidden behind it, expanded by the handler. - */ -struct sysctl_oid { - struct sysctl_oid_list *oid_parent; - SLIST_ENTRY(sysctl_oid) oid_link; - int oid_number; - u_int oid_kind; - void *oid_arg1; - intptr_t oid_arg2; - const char *oid_name; - int (*oid_handler)(SYSCTL_HANDLER_ARGS); - const char *oid_fmt; - int oid_refcnt; - u_int oid_running; - const char *oid_descr; -}; - -#define SYSCTL_DECL(...) -#define SYSCTL_NODE(...) -#define SYSCTL_INT(...) -#define SYSCTL_UINT(...) -#define SYSCTL_ULONG(...) -#define SYSCTL_PROC(...) -#define SYSCTL_QUAD(...) -#define SYSCTL_UQUAD(...) -#ifdef TUNABLE_INT -#undef TUNABLE_INT -#undef TUNABLE_ULONG -#undef TUNABLE_QUAD -#endif -#define TUNABLE_INT(...) -#define TUNABLE_ULONG(...) -#define TUNABLE_QUAD(...) - -int sysctl_handle_64(SYSCTL_HANDLER_ARGS); - -/* Errors */ - -#ifndef ERESTART -#define ERESTART (-1) -#endif - -#ifdef illumos -/* - * Cyclic information - */ -extern kmutex_t cpu_lock; - -typedef uintptr_t cyclic_id_t; -typedef uint16_t cyc_level_t; -typedef void (*cyc_func_t)(void *); - -#define CY_LOW_LEVEL 0 -#define CY_INFINITY INT64_MAX -#define CYCLIC_NONE ((cyclic_id_t)0) - -typedef struct cyc_time { - hrtime_t cyt_when; - hrtime_t cyt_interval; -} cyc_time_t; - -typedef struct cyc_handler { - cyc_func_t cyh_func; - void *cyh_arg; - cyc_level_t cyh_level; -} cyc_handler_t; - -extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *); -extern void cyclic_remove(cyclic_id_t); -extern int cyclic_reprogram(cyclic_id_t, hrtime_t); -#endif /* illumos */ - -#ifdef illumos -/* - * Buf structure - */ -#define B_BUSY 0x0001 -#define B_DONE 0x0002 -#define B_ERROR 0x0004 -#define B_READ 0x0040 /* read when I/O occurs */ -#define B_WRITE 0x0100 /* non-read pseudo-flag */ - -typedef struct buf { - int b_flags; - size_t b_bcount; - union { - caddr_t b_addr; - } b_un; - - lldaddr_t _b_blkno; -#define b_lblkno _b_blkno._f - size_t b_resid; - size_t b_bufsize; - int (*b_iodone)(struct buf *); - int b_error; - void *b_private; -} buf_t; - -extern void bioinit(buf_t *); -extern void biodone(buf_t *); -extern void bioerror(buf_t *, int); -extern int geterror(buf_t *); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_ZFS_CONTEXT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c deleted file mode 100644 index 595d766e93df..000000000000 --- a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. - * Copyright (c) 2014 by Delphix. All rights reserved. - */ - -#include <sys/zfs_context.h> - -int taskq_now; -taskq_t *system_taskq; - -#define TASKQ_ACTIVE 0x00010000 -#define TASKQ_NAMELEN 31 - -struct taskq { - char tq_name[TASKQ_NAMELEN + 1]; - kmutex_t tq_lock; - krwlock_t tq_threadlock; - kcondvar_t tq_dispatch_cv; - kcondvar_t tq_wait_cv; - thread_t *tq_threadlist; - int tq_flags; - int tq_active; - int tq_nthreads; - int tq_nalloc; - int tq_minalloc; - int tq_maxalloc; - kcondvar_t tq_maxalloc_cv; - int tq_maxalloc_wait; - taskq_ent_t *tq_freelist; - taskq_ent_t tq_task; -}; - -static taskq_ent_t * -task_alloc(taskq_t *tq, int tqflags) -{ - taskq_ent_t *t; - int rv; - -again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { - tq->tq_freelist = t->tqent_next; - } else { - if (tq->tq_nalloc >= tq->tq_maxalloc) { - if (!(tqflags & KM_SLEEP)) - return (NULL); - - /* - * We don't want to exceed tq_maxalloc, but we can't - * wait for other tasks to complete (and thus free up - * task structures) without risking deadlock with - * the caller. So, we just delay for one second - * to throttle the allocation rate. If we have tasks - * complete before one second timeout expires then - * taskq_ent_free will signal us and we will - * immediately retry the allocation. - */ - tq->tq_maxalloc_wait++; -#ifdef __FreeBSD__ - rv = cv_timedwait(&tq->tq_maxalloc_cv, - &tq->tq_lock, hz); -#else - rv = cv_timedwait(&tq->tq_maxalloc_cv, - &tq->tq_lock, ddi_get_lbolt() + hz); -#endif - tq->tq_maxalloc_wait--; - if (rv > 0) - goto again; /* signaled */ - } - mutex_exit(&tq->tq_lock); - - t = kmem_alloc(sizeof (taskq_ent_t), tqflags & KM_SLEEP); - - mutex_enter(&tq->tq_lock); - if (t != NULL) - tq->tq_nalloc++; - } - return (t); -} - -static void -task_free(taskq_t *tq, taskq_ent_t *t) -{ - if (tq->tq_nalloc <= tq->tq_minalloc) { - t->tqent_next = tq->tq_freelist; - tq->tq_freelist = t; - } else { - tq->tq_nalloc--; - mutex_exit(&tq->tq_lock); - kmem_free(t, sizeof (taskq_ent_t)); - mutex_enter(&tq->tq_lock); - } - - if (tq->tq_maxalloc_wait) - cv_signal(&tq->tq_maxalloc_cv); -} - -taskqid_t -taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) -{ - taskq_ent_t *t; - - if (taskq_now) { - func(arg); - return (1); - } - - mutex_enter(&tq->tq_lock); - ASSERT(tq->tq_flags & TASKQ_ACTIVE); - if ((t = task_alloc(tq, tqflags)) == NULL) { - mutex_exit(&tq->tq_lock); - return (0); - } - if (tqflags & TQ_FRONT) { - t->tqent_next = tq->tq_task.tqent_next; - t->tqent_prev = &tq->tq_task; - } else { - t->tqent_next = &tq->tq_task; - t->tqent_prev = tq->tq_task.tqent_prev; - } - t->tqent_next->tqent_prev = t; - t->tqent_prev->tqent_next = t; - t->tqent_func = func; - t->tqent_arg = arg; - t->tqent_flags = 0; - cv_signal(&tq->tq_dispatch_cv); - mutex_exit(&tq->tq_lock); - return (1); -} - -void -taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, - taskq_ent_t *t) -{ - ASSERT(func != NULL); - ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC)); - - /* - * Mark it as a prealloc'd task. This is important - * to ensure that we don't free it later. - */ - t->tqent_flags |= TQENT_FLAG_PREALLOC; - /* - * Enqueue the task to the underlying queue. - */ - mutex_enter(&tq->tq_lock); - - if (flags & TQ_FRONT) { - t->tqent_next = tq->tq_task.tqent_next; - t->tqent_prev = &tq->tq_task; - } else { - t->tqent_next = &tq->tq_task; - t->tqent_prev = tq->tq_task.tqent_prev; - } - t->tqent_next->tqent_prev = t; - t->tqent_prev->tqent_next = t; - t->tqent_func = func; - t->tqent_arg = arg; - cv_signal(&tq->tq_dispatch_cv); - mutex_exit(&tq->tq_lock); -} - -void -taskq_wait(taskq_t *tq) -{ - mutex_enter(&tq->tq_lock); - while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0) - cv_wait(&tq->tq_wait_cv, &tq->tq_lock); - mutex_exit(&tq->tq_lock); -} - -void -taskq_wait_id(taskq_t *tq, taskqid_t id) -{ - taskq_wait(tq); -} - -static void * -taskq_thread(void *arg) -{ - taskq_t *tq = arg; - taskq_ent_t *t; - boolean_t prealloc; - - mutex_enter(&tq->tq_lock); - while (tq->tq_flags & TASKQ_ACTIVE) { - if ((t = tq->tq_task.tqent_next) == &tq->tq_task) { - if (--tq->tq_active == 0) - cv_broadcast(&tq->tq_wait_cv); - cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock); - tq->tq_active++; - continue; - } - t->tqent_prev->tqent_next = t->tqent_next; - t->tqent_next->tqent_prev = t->tqent_prev; - t->tqent_next = NULL; - t->tqent_prev = NULL; - prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC; - mutex_exit(&tq->tq_lock); - - rw_enter(&tq->tq_threadlock, RW_READER); - t->tqent_func(t->tqent_arg); - rw_exit(&tq->tq_threadlock); - - mutex_enter(&tq->tq_lock); - if (!prealloc) - task_free(tq, t); - } - tq->tq_nthreads--; - cv_broadcast(&tq->tq_wait_cv); - mutex_exit(&tq->tq_lock); - return (NULL); -} - -/*ARGSUSED*/ -taskq_t * -taskq_create(const char *name, int nthreads, pri_t pri, - int minalloc, int maxalloc, uint_t flags) -{ - taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP); - int t; - - if (flags & TASKQ_THREADS_CPU_PCT) { - int pct; - ASSERT3S(nthreads, >=, 0); - ASSERT3S(nthreads, <=, 100); - pct = MIN(nthreads, 100); - pct = MAX(pct, 0); - - nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100; - nthreads = MAX(nthreads, 1); /* need at least 1 thread */ - } else { - ASSERT3S(nthreads, >=, 1); - } - - rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL); - mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); - cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); - cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL); - (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1); - tq->tq_flags = flags | TASKQ_ACTIVE; - tq->tq_active = nthreads; - tq->tq_nthreads = nthreads; - tq->tq_minalloc = minalloc; - tq->tq_maxalloc = maxalloc; - tq->tq_task.tqent_next = &tq->tq_task; - tq->tq_task.tqent_prev = &tq->tq_task; - tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); - - if (flags & TASKQ_PREPOPULATE) { - mutex_enter(&tq->tq_lock); - while (minalloc-- > 0) - task_free(tq, task_alloc(tq, KM_SLEEP)); - mutex_exit(&tq->tq_lock); - } - - for (t = 0; t < nthreads; t++) - (void) thr_create(0, 0, taskq_thread, - tq, THR_BOUND, &tq->tq_threadlist[t]); - - return (tq); -} - -void -taskq_destroy(taskq_t *tq) -{ - int t; - int nthreads = tq->tq_nthreads; - - taskq_wait(tq); - - mutex_enter(&tq->tq_lock); - - tq->tq_flags &= ~TASKQ_ACTIVE; - cv_broadcast(&tq->tq_dispatch_cv); - - while (tq->tq_nthreads != 0) - cv_wait(&tq->tq_wait_cv, &tq->tq_lock); - - tq->tq_minalloc = 0; - while (tq->tq_nalloc != 0) { - ASSERT(tq->tq_freelist != NULL); - task_free(tq, task_alloc(tq, KM_SLEEP)); - } - - mutex_exit(&tq->tq_lock); - - for (t = 0; t < nthreads; t++) - (void) thr_join(tq->tq_threadlist[t], NULL, NULL); - - kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); - - rw_destroy(&tq->tq_threadlock); - mutex_destroy(&tq->tq_lock); - cv_destroy(&tq->tq_dispatch_cv); - cv_destroy(&tq->tq_wait_cv); - cv_destroy(&tq->tq_maxalloc_cv); - - kmem_free(tq, sizeof (taskq_t)); -} - -int -taskq_member(taskq_t *tq, void *t) -{ - int i; - - if (taskq_now) - return (1); - - for (i = 0; i < tq->tq_nthreads; i++) - if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) - return (1); - - return (0); -} - -void -system_taskq_init(void) -{ - system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, - TASKQ_DYNAMIC | TASKQ_PREPOPULATE); -} - -void -system_taskq_fini(void) -{ - taskq_destroy(system_taskq); - system_taskq = NULL; /* defensive */ -} diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/util.c b/cddl/contrib/opensolaris/lib/libzpool/common/util.c deleted file mode 100644 index d2ed31a46832..000000000000 --- a/cddl/contrib/opensolaris/lib/libzpool/common/util.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. - * Copyright (c) 2017, Intel Corporation. - */ - -#include <assert.h> -#include <sys/zfs_context.h> -#include <sys/avl.h> -#include <string.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/spa.h> -#include <sys/fs/zfs.h> -#include <sys/refcount.h> -#include <dlfcn.h> - -/* - * Routines needed by more than one client of libzpool. - */ - -static void -show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent) -{ - vdev_stat_t *vs; - vdev_stat_t v0 = { 0 }; - uint64_t sec; - uint64_t is_log = 0; - nvlist_t **child; - uint_t c, children; - char used[6], avail[6]; - char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6]; - - if (indent == 0 && desc != NULL) { - (void) printf(" " - " capacity operations bandwidth ---- errors ----\n"); - (void) printf("description " - "used avail read write read write read write cksum\n"); - } - - if (desc != NULL) { - char *suffix = "", *bias = NULL; - char bias_suffix[32]; - - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); - (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, - &bias); - if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) != 0) - vs = &v0; - - if (bias != NULL) { - (void) snprintf(bias_suffix, sizeof (bias_suffix), - " (%s)", bias); - suffix = bias_suffix; - } else if (is_log) { - suffix = " (log)"; - } - - sec = MAX(1, vs->vs_timestamp / NANOSEC); - - nicenum(vs->vs_alloc, used, sizeof (used)); - nicenum(vs->vs_space - vs->vs_alloc, avail, sizeof (avail)); - nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops, sizeof (rops)); - nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops, sizeof (wops)); - nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes, - sizeof (rbytes)); - nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes, - sizeof (wbytes)); - nicenum(vs->vs_read_errors, rerr, sizeof (rerr)); - nicenum(vs->vs_write_errors, werr, sizeof (werr)); - nicenum(vs->vs_checksum_errors, cerr, sizeof (cerr)); - - (void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n", - indent, "", - desc, - (int)(indent+strlen(desc)-25-(vs->vs_space ? 0 : 12)), - suffix, - vs->vs_space ? 6 : 0, vs->vs_space ? used : "", - vs->vs_space ? 6 : 0, vs->vs_space ? avail : "", - rops, wops, rbytes, wbytes, rerr, werr, cerr); - } - - if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0) - return; - - for (c = 0; c < children; c++) { - nvlist_t *cnv = child[c]; - char *cname, *tname; - uint64_t np; - if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) && - nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname)) - cname = "<unknown>"; - tname = calloc(1, strlen(cname) + 2); - (void) strcpy(tname, cname); - if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0) - tname[strlen(tname)] = '0' + np; - show_vdev_stats(tname, ctype, cnv, indent + 2); - free(tname); - } -} - -void -show_pool_stats(spa_t *spa) -{ - nvlist_t *config, *nvroot; - char *name; - - VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0); - - VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); - - show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0); - show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0); - show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0); - - nvlist_free(config); -} - -/* - * Sets given global variable in libzpool to given unsigned 32-bit value. - * arg: "<variable>=<value>" - */ -int -set_global_var(char *arg) -{ - void *zpoolhdl; - char *varname = arg, *varval; - u_longlong_t val; - -#ifndef _LITTLE_ENDIAN - /* - * On big endian systems changing a 64-bit variable would set the high - * 32 bits instead of the low 32 bits, which could cause unexpected - * results. - */ - fprintf(stderr, "Setting global variables is only supported on " - "little-endian systems\n", varname); - return (ENOTSUP); -#endif - if ((varval = strchr(arg, '=')) != NULL) { - *varval = '\0'; - varval++; - val = strtoull(varval, NULL, 0); - if (val > UINT32_MAX) { - fprintf(stderr, "Value for global variable '%s' must " - "be a 32-bit unsigned integer\n", varname); - return (EOVERFLOW); - } - } else { - return (EINVAL); - } - - zpoolhdl = dlopen("libzpool.so", RTLD_LAZY); - if (zpoolhdl != NULL) { - uint32_t *var; - var = dlsym(zpoolhdl, varname); - if (var == NULL) { - fprintf(stderr, "Global variable '%s' does not exist " - "in libzpool.so\n", varname); - return (EINVAL); - } - *var = (uint32_t)val; - - dlclose(zpoolhdl); - } else { - fprintf(stderr, "Failed to open libzpool.so to set global " - "variable\n"); - return (EIO); - } - - return (0); -} diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d b/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d deleted file mode 100644 index 1351733c807b..000000000000 --- a/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d +++ /dev/null @@ -1,36 +0,0 @@ -/* - * CDDL HEADER START - * - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2013 by Delphix. All rights reserved. - */ - -provider zfs { - probe probe0(char *probename); - probe probe1(char *probename, unsigned long arg1); - probe probe2(char *probename, unsigned long arg1, unsigned long arg2); - probe probe3(char *probename, unsigned long arg1, unsigned long arg2, - unsigned long arg3); - probe probe4(char *probename, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4); - - probe set__error(int err); -}; - -#pragma D attributes Evolving/Evolving/ISA provider zfs provider -#pragma D attributes Private/Private/Unknown provider zfs module -#pragma D attributes Private/Private/Unknown provider zfs function -#pragma D attributes Evolving/Evolving/ISA provider zfs name -#pragma D attributes Evolving/Evolving/ISA provider zfs args diff --git a/cddl/contrib/opensolaris/tools/ctf/cvt/util.c b/cddl/contrib/opensolaris/tools/ctf/cvt/util.c index fb76cbaeb422..0eda56dbf65a 100644 --- a/cddl/contrib/opensolaris/tools/ctf/cvt/util.c +++ b/cddl/contrib/opensolaris/tools/ctf/cvt/util.c @@ -29,6 +29,7 @@ * Utility functions */ +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> |