diff options
44 files changed, 2729 insertions, 1426 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index ea211bf1f349..d462d3f18e4d 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -57,6 +57,7 @@  #include <sys/arc.h>  #include <sys/ddt.h>  #include <sys/zfeature.h> +#include <zfs_comutil.h>  #undef ZFS_MAXNAMELEN  #undef verify  #include <libzfs.h> @@ -204,6 +205,27 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)  	nvlist_free(nv);  } +/* ARGSUSED */ +static void +dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) +{ +	spa_history_phys_t *shp = data; + +	if (shp == NULL) +		return; + +	(void) printf("\t\tpool_create_len = %llu\n", +	    (u_longlong_t)shp->sh_pool_create_len); +	(void) printf("\t\tphys_max_off = %llu\n", +	    (u_longlong_t)shp->sh_phys_max_off); +	(void) printf("\t\tbof = %llu\n", +	    (u_longlong_t)shp->sh_bof); +	(void) printf("\t\teof = %llu\n", +	    (u_longlong_t)shp->sh_eof); +	(void) printf("\t\trecords_lost = %llu\n", +	    (u_longlong_t)shp->sh_records_lost); +} +  static void  zdb_nicenum(uint64_t num, char *buf)  { @@ -853,21 +875,22 @@ dump_history(spa_t *spa)  	for (int i = 0; i < num; i++) {  		uint64_t time, txg, ievent;  		char *cmd, *intstr; +		boolean_t printed = B_FALSE;  		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,  		    &time) != 0) -			continue; +			goto next;  		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,  		    &cmd) != 0) {  			if (nvlist_lookup_uint64(events[i],  			    ZPOOL_HIST_INT_EVENT, &ievent) != 0) -				continue; +				goto next;  			verify(nvlist_lookup_uint64(events[i],  			    ZPOOL_HIST_TXG, &txg) == 0);  			verify(nvlist_lookup_string(events[i],  			    ZPOOL_HIST_INT_STR, &intstr) == 0); -			if (ievent >= LOG_END) -				continue; +			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) +				goto next;  			(void) snprintf(internalstr,  			    sizeof (internalstr), @@ -880,6 +903,14 @@ dump_history(spa_t *spa)  		(void) localtime_r(&tsec, &t);  		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);  		(void) printf("%s %s\n", tbuf, cmd); +		printed = B_TRUE; + +next: +		if (dump_opt['h'] > 1) { +			if (!printed) +				(void) printf("unrecognized record:\n"); +			dump_nvlist(events[i], 2); +		}  	}  } @@ -1456,7 +1487,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {  	dump_zap,		/* other ZAP			*/  	dump_zap,		/* persistent error log		*/  	dump_uint8,		/* SPA history			*/ -	dump_uint64,		/* SPA history offsets		*/ +	dump_history_offsets,	/* SPA history offsets		*/  	dump_zap,		/* Pool properties		*/  	dump_zap,		/* DSL permissions		*/  	dump_acl,		/* ZFS ACL			*/ diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index b64905f11997..ef02ad240ef5 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -56,6 +56,7 @@  #include <time.h>  #include <libzfs.h> +#include <libzfs_core.h>  #include <zfs_prop.h>  #include <zfs_deleg.h>  #include <libuutil.h> @@ -70,6 +71,7 @@ libzfs_handle_t *g_zfs;  static FILE *mnttab_file;  static char history_str[HIS_MAX_RECORD_LEN]; +static boolean_t log_history = B_TRUE;  static int zfs_do_clone(int argc, char **argv);  static int zfs_do_create(int argc, char **argv); @@ -259,7 +261,7 @@ get_usage(zfs_help_t idx)  		return (gettext("\tshare <-a | filesystem>\n"));  	case HELP_SNAPSHOT:  		return (gettext("\tsnapshot [-r] [-o property=value] ... " -		    "<filesystem@snapname|volume@snapname>\n")); +		    "<filesystem@snapname|volume@snapname> ...\n"));  	case HELP_UNMOUNT:  		return (gettext("\tunmount [-f] "  		    "<-a | filesystem|mountpoint>\n")); @@ -888,9 +890,9 @@ typedef struct destroy_cbdata {  	nvlist_t	*cb_nvl;  	/* first snap in contiguous run */ -	zfs_handle_t	*cb_firstsnap; +	char		*cb_firstsnap;  	/* previous snap in contiguous run */ -	zfs_handle_t	*cb_prevsnap; +	char		*cb_prevsnap;  	int64_t		cb_snapused;  	char		*cb_snapspec;  } destroy_cbdata_t; @@ -1004,11 +1006,13 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg)  	if (nvlist_exists(cb->cb_nvl, name)) {  		if (cb->cb_firstsnap == NULL) -			cb->cb_firstsnap = zfs_handle_dup(zhp); +			cb->cb_firstsnap = strdup(name);  		if (cb->cb_prevsnap != NULL) -			zfs_close(cb->cb_prevsnap); +			free(cb->cb_prevsnap);  		/* this snap continues the current range */ -		cb->cb_prevsnap = zfs_handle_dup(zhp); +		cb->cb_prevsnap = strdup(name); +		if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL) +			nomem();  		if (cb->cb_verbose) {  			if (cb->cb_parsable) {  				(void) printf("destroy\t%s\n", name); @@ -1023,12 +1027,12 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg)  	} else if (cb->cb_firstsnap != NULL) {  		/* end of this range */  		uint64_t used = 0; -		err = zfs_get_snapused_int(cb->cb_firstsnap, +		err = lzc_snaprange_space(cb->cb_firstsnap,  		    cb->cb_prevsnap, &used);  		cb->cb_snapused += used; -		zfs_close(cb->cb_firstsnap); +		free(cb->cb_firstsnap);  		cb->cb_firstsnap = NULL; -		zfs_close(cb->cb_prevsnap); +		free(cb->cb_prevsnap);  		cb->cb_prevsnap = NULL;  	}  	zfs_close(zhp); @@ -1045,13 +1049,13 @@ destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb)  	if (cb->cb_firstsnap != NULL) {  		uint64_t used = 0;  		if (err == 0) { -			err = zfs_get_snapused_int(cb->cb_firstsnap, +			err = lzc_snaprange_space(cb->cb_firstsnap,  			    cb->cb_prevsnap, &used);  		}  		cb->cb_snapused += used; -		zfs_close(cb->cb_firstsnap); +		free(cb->cb_firstsnap);  		cb->cb_firstsnap = NULL; -		zfs_close(cb->cb_prevsnap); +		free(cb->cb_prevsnap);  		cb->cb_prevsnap = NULL;  	}  	return (err); @@ -1904,9 +1908,11 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)  			/*  			 * If they did "zfs upgrade -a", then we could  			 * be doing ioctls to different pools.  We need -			 * to log this history once to each pool. +			 * to log this history once to each pool, and bypass +			 * the normal history logging that happens in main().  			 */ -			verify(zpool_stage_history(g_zfs, history_str) == 0); +			(void) zpool_log_history(g_zfs, history_str); +			log_history = B_FALSE;  		}  		if (zfs_prop_set(zhp, "version", verstr) == 0)  			cb->cb_numupgraded++; @@ -3424,6 +3430,32 @@ zfs_do_set(int argc, char **argv)  	return (ret);  } +typedef struct snap_cbdata { +	nvlist_t *sd_nvl; +	boolean_t sd_recursive; +	const char *sd_snapname; +} snap_cbdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ +	snap_cbdata_t *sd = arg; +	char *name; +	int rv = 0; +	int error; + +	error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname); +	if (error == -1) +		nomem(); +	fnvlist_add_boolean(sd->sd_nvl, name); +	free(name); + +	if (sd->sd_recursive) +		rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); +	zfs_close(zhp); +	return (rv); +} +  /*   * zfs snapshot [-r] [-o prop=value] ... <fs@snap>   * @@ -3433,13 +3465,16 @@ zfs_do_set(int argc, char **argv)  static int  zfs_do_snapshot(int argc, char **argv)  { -	boolean_t recursive = B_FALSE;  	int ret = 0;  	char c;  	nvlist_t *props; +	snap_cbdata_t sd = { 0 }; +	boolean_t multiple_snaps = B_FALSE;  	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)  		nomem(); +	if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0) +		nomem();  	/* check options */  	while ((c = getopt(argc, argv, "ro:")) != -1) { @@ -3449,7 +3484,8 @@ zfs_do_snapshot(int argc, char **argv)  				return (1);  			break;  		case 'r': -			recursive = B_TRUE; +			sd.sd_recursive = B_TRUE; +			multiple_snaps = B_TRUE;  			break;  		case '?':  			(void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -3466,18 +3502,35 @@ zfs_do_snapshot(int argc, char **argv)  		(void) fprintf(stderr, gettext("missing snapshot argument\n"));  		goto usage;  	} -	if (argc > 1) { -		(void) fprintf(stderr, gettext("too many arguments\n")); -		goto usage; + +	if (argc > 1) +		multiple_snaps = B_TRUE; +	for (; argc > 0; argc--, argv++) { +		char *atp; +		zfs_handle_t *zhp; + +		atp = strchr(argv[0], '@'); +		if (atp == NULL) +			goto usage; +		*atp = '\0'; +		sd.sd_snapname = atp + 1; +		zhp = zfs_open(g_zfs, argv[0], +		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); +		if (zhp == NULL) +			goto usage; +		if (zfs_snapshot_cb(zhp, &sd) != 0) +			goto usage;  	} -	ret = zfs_snapshot(g_zfs, argv[0], recursive, props); +	ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props); +	nvlist_free(sd.sd_nvl);  	nvlist_free(props); -	if (ret && recursive) +	if (ret != 0 && multiple_snaps)  		(void) fprintf(stderr, gettext("no snapshots were created\n"));  	return (ret != 0);  usage: +	nvlist_free(sd.sd_nvl);  	nvlist_free(props);  	usage(B_FALSE);  	return (-1); @@ -6479,8 +6532,7 @@ main(int argc, char **argv)  		return (1);  	} -	zpool_set_history_str("zfs", argc, argv, history_str); -	verify(zpool_stage_history(g_zfs, history_str) == 0); +	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));  	libzfs_print_on_error(g_zfs, B_TRUE); @@ -6549,6 +6601,9 @@ main(int argc, char **argv)  	(void) fclose(mnttab_file); +	if (ret == 0 && log_history) +		(void) zpool_log_history(g_zfs, history_str); +  	libzfs_fini(g_zfs);  	/* diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index 2618cea32b41..f4be0b2a55fb 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -279,6 +279,9 @@ feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	zfeature_info_t *feature = arg2;  	spa_feature_enable(spa, feature, tx); +	spa_history_log_internal(spa, "zhack enable feature", tx, +	    "name=%s can_readonly=%u", +	    feature->fi_guid, feature->fi_can_readonly);  }  static void @@ -356,6 +359,8 @@ feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	zfeature_info_t *feature = arg2;  	spa_feature_incr(spa, feature, tx); +	spa_history_log_internal(spa, "zhack feature incr", tx, +	    "name=%s", feature->fi_guid);  }  static void @@ -365,6 +370,8 @@ feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	zfeature_info_t *feature = arg2;  	spa_feature_decr(spa, feature, tx); +	spa_history_log_internal(spa, "zhack feature decr", tx, +	    "name=%s", feature->fi_guid);  }  static void diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index bc300b3bd958..3ecdbec9684d 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -185,9 +185,9 @@ static zpool_command_t command_table[] = {  #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0])) -zpool_command_t *current_command; +static zpool_command_t *current_command;  static char history_str[HIS_MAX_RECORD_LEN]; - +static boolean_t log_history = B_TRUE;  static uint_t timestamp_fmt = NODATE;  static const char * @@ -935,7 +935,10 @@ zpool_do_destroy(int argc, char **argv)  		return (1);  	} -	ret = (zpool_destroy(zhp) != 0); +	/* The history must be logged as part of the export */ +	log_history = B_FALSE; + +	ret = (zpool_destroy(zhp, history_str) != 0);  	zpool_close(zhp); @@ -999,10 +1002,13 @@ zpool_do_export(int argc, char **argv)  			continue;  		} +		/* The history must be logged as part of the export */ +		log_history = B_FALSE; +  		if (hardforce) { -			if (zpool_export_force(zhp) != 0) +			if (zpool_export_force(zhp, history_str) != 0)  				ret = 1; -		} else if (zpool_export(zhp, force) != 0) { +		} else if (zpool_export(zhp, force, history_str) != 0) {  			ret = 1;  		} @@ -4269,6 +4275,14 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)  				(void) printf(gettext("Successfully upgraded "  				    "'%s'\n\n"), zpool_get_name(zhp));  			} +			/* +			 * If they did "zpool upgrade -a", then we could +			 * be doing ioctls to different pools.  We need +			 * to log this history once to each pool, and bypass +			 * the normal history logging that happens in main(). +			 */ +			(void) zpool_log_history(g_zfs, history_str); +			log_history = B_FALSE;  		}  	} else if (cbp->cb_newer && !SPA_VERSION_IS_SUPPORTED(version)) {  		assert(!cbp->cb_all); @@ -4491,8 +4505,8 @@ zpool_do_upgrade(int argc, char **argv)  typedef struct hist_cbdata {  	boolean_t first; -	int longfmt; -	int internal; +	boolean_t longfmt; +	boolean_t internal;  } hist_cbdata_t;  /* @@ -4504,21 +4518,8 @@ get_history_one(zpool_handle_t *zhp, void *data)  	nvlist_t *nvhis;  	nvlist_t **records;  	uint_t numrecords; -	char *cmdstr; -	char *pathstr; -	uint64_t dst_time; -	time_t tsec; -	struct tm t; -	char tbuf[30];  	int ret, i; -	uint64_t who; -	struct passwd *pwd; -	char *hostname; -	char *zonename; -	char internalstr[MAXPATHLEN];  	hist_cbdata_t *cb = (hist_cbdata_t *)data; -	uint64_t txg; -	uint64_t ievent;  	cb->first = B_FALSE; @@ -4530,64 +4531,94 @@ get_history_one(zpool_handle_t *zhp, void *data)  	verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD,  	    &records, &numrecords) == 0);  	for (i = 0; i < numrecords; i++) { -		if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME, -		    &dst_time) != 0) -			continue; +		nvlist_t *rec = records[i]; +		char tbuf[30] = ""; -		/* is it an internal event or a standard event? */ -		if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD, -		    &cmdstr) != 0) { -			if (cb->internal == 0) -				continue; +		if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { +			time_t tsec; +			struct tm t; + +			tsec = fnvlist_lookup_uint64(records[i], +			    ZPOOL_HIST_TIME); +			(void) localtime_r(&tsec, &t); +			(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); +		} -			if (nvlist_lookup_uint64(records[i], -			    ZPOOL_HIST_INT_EVENT, &ievent) != 0) +		if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { +			(void) printf("%s %s", tbuf, +			    fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); +		} else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { +			int ievent = +			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); +			if (!cb->internal) +				continue; +			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { +				(void) printf("%s unrecognized record:\n", +				    tbuf); +				dump_nvlist(rec, 4); +				continue; +			} +			(void) printf("%s [internal %s txg:%lld] %s", tbuf, +			    zfs_history_event_names[ievent], +			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), +			    fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); +		} else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { +			if (!cb->internal)  				continue; -			verify(nvlist_lookup_uint64(records[i], -			    ZPOOL_HIST_TXG, &txg) == 0); -			verify(nvlist_lookup_string(records[i], -			    ZPOOL_HIST_INT_STR, &pathstr) == 0); -			if (ievent >= LOG_END) +			(void) printf("%s [txg:%lld] %s", tbuf, +			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), +			    fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); +			if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { +				(void) printf(" %s (%llu)", +				    fnvlist_lookup_string(rec, +				    ZPOOL_HIST_DSNAME), +				    fnvlist_lookup_uint64(rec, +				    ZPOOL_HIST_DSID)); +			} +			(void) printf(" %s", fnvlist_lookup_string(rec, +			    ZPOOL_HIST_INT_STR)); +		} else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { +			if (!cb->internal) +				continue; +			(void) printf("%s ioctl %s\n", tbuf, +			    fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); +			if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { +				(void) printf("    input:\n"); +				dump_nvlist(fnvlist_lookup_nvlist(rec, +				    ZPOOL_HIST_INPUT_NVL), 8); +			} +			if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { +				(void) printf("    output:\n"); +				dump_nvlist(fnvlist_lookup_nvlist(rec, +				    ZPOOL_HIST_OUTPUT_NVL), 8); +			} +		} else { +			if (!cb->internal)  				continue; -			(void) snprintf(internalstr, -			    sizeof (internalstr), -			    "[internal %s txg:%lld] %s", -			    zfs_history_event_names[ievent], txg, -			    pathstr); -			cmdstr = internalstr; +			(void) printf("%s unrecognized record:\n", tbuf); +			dump_nvlist(rec, 4);  		} -		tsec = dst_time; -		(void) localtime_r(&tsec, &t); -		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); -		(void) printf("%s %s", tbuf, cmdstr);  		if (!cb->longfmt) {  			(void) printf("\n");  			continue;  		}  		(void) printf(" ["); -		if (nvlist_lookup_uint64(records[i], -		    ZPOOL_HIST_WHO, &who) == 0) { -			pwd = getpwuid((uid_t)who); -			if (pwd) -				(void) printf("user %s on", -				    pwd->pw_name); -			else -				(void) printf("user %d on", -				    (int)who); -		} else { -			(void) printf(gettext("no info]\n")); -			continue; +		if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { +			uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); +			struct passwd *pwd = getpwuid(who); +			(void) printf("user %d ", (int)who); +			if (pwd != NULL) +				(void) printf("(%s) ", pwd->pw_name);  		} -		if (nvlist_lookup_string(records[i], -		    ZPOOL_HIST_HOST, &hostname) == 0) { -			(void) printf(" %s", hostname); +		if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { +			(void) printf("on %s", +			    fnvlist_lookup_string(rec, ZPOOL_HIST_HOST));  		} -		if (nvlist_lookup_string(records[i], -		    ZPOOL_HIST_ZONE, &zonename) == 0) { -			(void) printf(":%s", zonename); +		if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { +			(void) printf(":%s", +			    fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE));  		} -  		(void) printf("]");  		(void) printf("\n");  	} @@ -4602,8 +4633,6 @@ get_history_one(zpool_handle_t *zhp, void *data)   *   * Displays the history of commands that modified pools.   */ - -  int  zpool_do_history(int argc, char **argv)  { @@ -4616,10 +4645,10 @@ zpool_do_history(int argc, char **argv)  	while ((c = getopt(argc, argv, "li")) != -1) {  		switch (c) {  		case 'l': -			cbdata.longfmt = 1; +			cbdata.longfmt = B_TRUE;  			break;  		case 'i': -			cbdata.internal = 1; +			cbdata.internal = B_TRUE;  			break;  		case '?':  			(void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -4844,8 +4873,7 @@ main(int argc, char **argv)  	if (strcmp(cmdname, "-?") == 0)  		usage(B_TRUE); -	zpool_set_history_str("zpool", argc, argv, history_str); -	verify(zpool_stage_history(g_zfs, history_str) == 0); +	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));  	/*  	 * Run the appropriate command. @@ -4872,6 +4900,9 @@ main(int argc, char **argv)  		usage(B_FALSE);  	} +	if (ret == 0 && log_history) +		(void) zpool_log_history(g_zfs, history_str); +  	libzfs_fini(g_zfs);  	/* diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index d9bcb0408a0b..63acd0c3aab8 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2252,7 +2252,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)  	 */  	nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);  	VERIFY3U(ENOENT, ==, -	    spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); +	    spa_create("ztest_bad_file", nvroot, NULL, NULL));  	nvlist_free(nvroot);  	/* @@ -2260,7 +2260,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)  	 */  	nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1);  	VERIFY3U(ENOENT, ==, -	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); +	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL));  	nvlist_free(nvroot);  	/* @@ -2269,7 +2269,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)  	 */  	(void) rw_rdlock(&ztest_name_lock);  	nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); -	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); +	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));  	nvlist_free(nvroot);  	VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));  	VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); @@ -3056,8 +3056,7 @@ ztest_snapshot_create(char *osname, uint64_t id)  	(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,  	    (u_longlong_t)id); -	error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1, -	    NULL, NULL, B_FALSE, B_FALSE, -1); +	error = dmu_objset_snapshot_one(osname, strchr(snapname, '@') + 1);  	if (error == ENOSPC) {  		ztest_record_enospc(FTAG);  		return (B_FALSE); @@ -3257,8 +3256,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)  	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);  	(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); -	error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, -	    NULL, NULL, B_FALSE, B_FALSE, -1); +	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);  	if (error && error != EEXIST) {  		if (error == ENOSPC) {  			ztest_record_enospc(FTAG); @@ -3281,8 +3279,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)  		fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);  	} -	error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, -	    NULL, NULL, B_FALSE, B_FALSE, -1); +	error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);  	if (error && error != EEXIST) {  		if (error == ENOSPC) {  			ztest_record_enospc(FTAG); @@ -3291,8 +3288,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)  		fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);  	} -	error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, -	    NULL, NULL, B_FALSE, B_FALSE, -1); +	error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);  	if (error && error != EEXIST) {  		if (error == ENOSPC) {  			ztest_record_enospc(FTAG); @@ -4480,8 +4476,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)  	 * Create snapshot, clone it, mark snap for deferred destroy,  	 * destroy clone, verify snap was also destroyed.  	 */ -	error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, -	    FALSE, -1); +	error = dmu_objset_snapshot_one(osname, snapname);  	if (error) {  		if (error == ENOSPC) {  			ztest_record_enospc("dmu_objset_snapshot"); @@ -4523,8 +4518,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)  	 * destroy a held snapshot, mark for deferred destroy,  	 * release hold, verify snapshot was destroyed.  	 */ -	error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, -	    FALSE, -1); +	error = dmu_objset_snapshot_one(osname, snapname);  	if (error) {  		if (error == ENOSPC) {  			ztest_record_enospc("dmu_objset_snapshot"); @@ -5612,8 +5606,7 @@ ztest_init(ztest_shared_t *zs)  		    spa_feature_table[i].fi_uname);  		VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));  	} -	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, -	    NULL, NULL)); +	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));  	nvlist_free(nvroot);  	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c index ed9b67ea3bc9..7688113e36e1 100644 --- a/common/zfs/zfs_comutil.c +++ b/common/zfs/zfs_comutil.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  /* @@ -157,7 +158,11 @@ zfs_spa_version_map(int zpl_version)  	return (version);  } -const char *zfs_history_event_names[LOG_END] = { +/* + * This is the table of legacy internal event names; it should not be modified. + * The internal events are now stored in the history log as strings. + */ +const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {  	"invalid event",  	"pool create",  	"vdev add", diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h index 61327f9aa909..f89054388a4d 100644 --- a/common/zfs/zfs_comutil.h +++ b/common/zfs/zfs_comutil.h @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #ifndef	_ZFS_COMUTIL_H @@ -37,7 +38,8 @@ extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);  extern int zfs_zpl_version_map(int spa_version);  extern int zfs_spa_version_map(int zpl_version); -extern const char *zfs_history_event_names[LOG_END]; +#define	ZFS_NUM_LEGACY_HISTORY_EVENTS 41 +extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];  #ifdef	__cplusplus  } diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c index 0bbf20d4f02c..03919f0e9132 100644 --- a/common/zfs/zprop_common.c +++ b/common/zfs/zprop_common.c @@ -22,6 +22,9 @@   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms.   */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */  /*   * Common routines used by zfs and zpool property management. @@ -129,7 +132,8 @@ zprop_register_hidden(int prop, const char *name, zprop_type_t type,      zprop_attr_t attr, int objset_types, const char *colname)  {  	zprop_register_impl(prop, name, type, 0, NULL, attr, -	    objset_types, NULL, colname, B_FALSE, B_FALSE, NULL); +	    objset_types, NULL, colname, +	    type == PROP_TYPE_NUMBER, B_FALSE, NULL);  } diff --git a/lib/libzfs/common/libzfs.h b/lib/libzfs/common/libzfs.h index 4dc039c7739e..56ebf530daf5 100644 --- a/lib/libzfs/common/libzfs.h +++ b/lib/libzfs/common/libzfs.h @@ -54,7 +54,8 @@ extern "C" {  /*   * libzfs errors   */ -enum { +typedef enum zfs_error { +	EZFS_SUCCESS = 0,	/* no error -- success */  	EZFS_NOMEM = 2000,	/* out of memory */  	EZFS_BADPROP,		/* invalid property value */  	EZFS_PROPREADONLY,	/* cannot set readonly property */ @@ -126,7 +127,7 @@ enum {  	EZFS_DIFFDATA,		/* bad zfs diff data */  	EZFS_POOLREADONLY,	/* pool is in read-only mode */  	EZFS_UNKNOWN -}; +} zfs_error_t;  /*   * The following data structures are all part @@ -182,6 +183,9 @@ extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);  extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); +extern void zfs_save_arguments(int argc, char **, char *, int); +extern int zpool_log_history(libzfs_handle_t *, const char *); +  extern int libzfs_errno(libzfs_handle_t *);  extern const char *libzfs_error_action(libzfs_handle_t *);  extern const char *libzfs_error_description(libzfs_handle_t *); @@ -216,7 +220,7 @@ extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);   */  extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,      nvlist_t *, nvlist_t *); -extern int zpool_destroy(zpool_handle_t *); +extern int zpool_destroy(zpool_handle_t *, const char *);  extern int zpool_add(zpool_handle_t *, nvlist_t *);  typedef struct splitflags { @@ -338,8 +342,8 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);  /*   * Import and export functions   */ -extern int zpool_export(zpool_handle_t *, boolean_t); -extern int zpool_export_force(zpool_handle_t *); +extern int zpool_export(zpool_handle_t *, boolean_t, const char *); +extern int zpool_export_force(zpool_handle_t *, const char *);  extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,      char *altroot);  extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, @@ -373,7 +377,7 @@ extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,   */  struct zfs_cmd; -extern const char *zfs_history_event_names[LOG_END]; +extern const char *zfs_history_event_names[];  extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,      boolean_t verbose); @@ -381,9 +385,6 @@ extern int zpool_upgrade(zpool_handle_t *, uint64_t);  extern int zpool_get_history(zpool_handle_t *, nvlist_t **);  extern int zpool_history_unpack(char *, uint64_t, uint64_t *,      nvlist_t ***, uint_t *); -extern void zpool_set_history_str(const char *subcommand, int argc, -    char **argv, char *history_str); -extern int zpool_stage_history(libzfs_handle_t *, const char *);  extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,      size_t len);  extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); @@ -436,8 +437,6 @@ extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,      char *propbuf, int proplen, boolean_t literal);  extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,      char *buf, size_t len); -extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, -    uint64_t *usedp);  extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);  extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);  extern const char *zfs_prop_values(zfs_prop_t); @@ -553,6 +552,8 @@ extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);  extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);  extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);  extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); +extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, +    nvlist_t *props);  extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);  extern int zfs_rename(zfs_handle_t *, const char *, boolean_t, boolean_t); diff --git a/lib/libzfs/common/libzfs_dataset.c b/lib/libzfs/common/libzfs_dataset.c index c1767cb910c0..db7111c21c5f 100644 --- a/lib/libzfs/common/libzfs_dataset.c +++ b/lib/libzfs/common/libzfs_dataset.c @@ -1407,8 +1407,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)  	libzfs_handle_t *hdl = zhp->zfs_hdl;  	nvlist_t *nvl = NULL, *realprops;  	zfs_prop_t prop; -	boolean_t do_prefix; -	uint64_t idx; +	boolean_t do_prefix = B_TRUE;  	int added_resv;  	(void) snprintf(errbuf, sizeof (errbuf), @@ -1447,12 +1446,17 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)  	}  	/* -	 * If the dataset's canmount property is being set to noauto, -	 * then we want to prevent unmounting & remounting it. +	 * We don't want to unmount & remount the dataset when changing +	 * its canmount property to 'on' or 'noauto'.  We only use +	 * the changelist logic to unmount when setting canmount=off.  	 */ -	do_prefix = !((prop == ZFS_PROP_CANMOUNT) && -	    (zprop_string_to_index(prop, propval, &idx, -	    ZFS_TYPE_DATASET) == 0) && (idx == ZFS_CANMOUNT_NOAUTO)); +	if (prop == ZFS_PROP_CANMOUNT) { +		uint64_t idx; +		int err = zprop_string_to_index(prop, propval, &idx, +		    ZFS_TYPE_DATASET); +		if (err == 0 && idx != ZFS_CANMOUNT_OFF) +			do_prefix = B_FALSE; +	}  	if (do_prefix && (ret = changelist_prefix(cl)) != 0)  		goto error; @@ -2641,25 +2645,6 @@ zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,  	return (0);  } -int -zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, -    uint64_t *usedp) -{ -	int err; -	zfs_cmd_t zc = { 0 }; - -	(void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name)); -	(void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value)); - -	err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc); -	if (err) -		return (err); - -	*usedp = zc.zc_cookie; - -	return (0); -} -  /*   * Returns the name of the given zfs handle.   */ @@ -2860,7 +2845,6 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)  	 */  	for (cp = target + prefixlen + 1;  	    cp = strchr(cp, '/'); *cp = '/', cp++) { -		char *logstr;  		*cp = '\0'; @@ -2871,16 +2855,12 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)  			continue;  		} -		logstr = hdl->libzfs_log_str; -		hdl->libzfs_log_str = NULL;  		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,  		    NULL) != 0) { -			hdl->libzfs_log_str = logstr;  			opname = dgettext(TEXT_DOMAIN, "create");  			goto ancestorerr;  		} -		hdl->libzfs_log_str = logstr;  		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);  		if (h == NULL) {  			opname = dgettext(TEXT_DOMAIN, "open"); @@ -2938,12 +2918,12 @@ int  zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,      nvlist_t *props)  { -	zfs_cmd_t zc = { 0 };  	int ret;  	uint64_t size = 0;  	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);  	char errbuf[1024];  	uint64_t zoned; +	dmu_objset_type_t ost;  	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,  	    "cannot create '%s'"), path); @@ -2963,17 +2943,16 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,  	 * will return ENOENT, not EEXIST.  To prevent this from happening, we  	 * first try to see if the dataset exists.  	 */ -	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name)); -	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { +	if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) {  		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,  		    "dataset already exists"));  		return (zfs_error(hdl, EZFS_EXISTS, errbuf));  	}  	if (type == ZFS_TYPE_VOLUME) -		zc.zc_objset_type = DMU_OST_ZVOL; +		ost = DMU_OST_ZVOL;  	else -		zc.zc_objset_type = DMU_OST_ZFS; +		ost = DMU_OST_ZFS;  	if (props && (props = zfs_valid_proplist(hdl, type, props,  	    zoned, NULL, errbuf)) == 0) @@ -3025,14 +3004,9 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,  		}  	} -	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) -		return (-1); -	nvlist_free(props); -  	/* create the dataset */ -	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc); - -	zcmd_free_nvlists(&zc); +	ret = lzc_create(path, ost, props); +	nvlist_free(props);  	/* check for failure */  	if (ret != 0) { @@ -3170,33 +3144,35 @@ int  zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)  {  	int ret; -	zfs_cmd_t zc = { 0 }; +	nvlist_t *errlist; -	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); -	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0) -		return (-1); -	zc.zc_defer_destroy = defer; +	ret = lzc_destroy_snaps(snaps, defer, &errlist); -	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc);  	if (ret != 0) { -		char errbuf[1024]; - -		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, -		    "cannot destroy snapshots in %s"), zc.zc_name); - -		switch (errno) { -		case EEXIST: -			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, -			    "snapshot is cloned")); -			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf)); +		for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL); +		    pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { +			char errbuf[1024]; +			(void) snprintf(errbuf, sizeof (errbuf), +			    dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), +			    nvpair_name(pair)); -		default: -			return (zfs_standard_error(zhp->zfs_hdl, errno, -			    errbuf)); +			switch (fnvpair_value_int32(pair)) { +			case EEXIST: +				zfs_error_aux(zhp->zfs_hdl, +				    dgettext(TEXT_DOMAIN, +				    "snapshot is cloned")); +				ret = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, +				    errbuf); +				break; +			default: +				ret = zfs_standard_error(zhp->zfs_hdl, errno, +				    errbuf); +				break; +			}  		}  	} -	return (0); +	return (ret);  }  /* @@ -3205,12 +3181,10 @@ zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)  int  zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)  { -	zfs_cmd_t zc = { 0 };  	char parent[ZFS_MAXNAMELEN];  	int ret;  	char errbuf[1024];  	libzfs_handle_t *hdl = zhp->zfs_hdl; -	zfs_type_t type;  	uint64_t zoned;  	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); @@ -3229,32 +3203,21 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)  	(void) parent_name(target, parent, sizeof (parent));  	/* do the clone */ -	if (ZFS_IS_VOLUME(zhp)) { -		zc.zc_objset_type = DMU_OST_ZVOL; -		type = ZFS_TYPE_VOLUME; -	} else { -		zc.zc_objset_type = DMU_OST_ZFS; -		type = ZFS_TYPE_FILESYSTEM; -	}  	if (props) { +		zfs_type_t type; +		if (ZFS_IS_VOLUME(zhp)) { +			type = ZFS_TYPE_VOLUME; +		} else { +			type = ZFS_TYPE_FILESYSTEM; +		}  		if ((props = zfs_valid_proplist(hdl, type, props, zoned,  		    zhp, errbuf)) == NULL)  			return (-1); - -		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { -			nvlist_free(props); -			return (-1); -		} - -		nvlist_free(props);  	} -	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name)); -	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value)); -	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc); - -	zcmd_free_nvlists(&zc); +	ret = lzc_clone(target, zhp->zfs_name, props); +	nvlist_free(props);  	if (ret != 0) {  		switch (errno) { @@ -3339,74 +3302,134 @@ zfs_promote(zfs_handle_t *zhp)  	return (ret);  } +typedef struct snapdata { +	nvlist_t *sd_nvl; +	const char *sd_snapname; +} snapdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ +	snapdata_t *sd = arg; +	char name[ZFS_MAXNAMELEN]; +	int rv = 0; + +	(void) snprintf(name, sizeof (name), +	    "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + +	fnvlist_add_boolean(sd->sd_nvl, name); + +	rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); +	zfs_close(zhp); +	return (rv); +} +  /* - * Takes a snapshot of the given dataset. + * Creates snapshots.  The keys in the snaps nvlist are the snapshots to be + * created.   */  int -zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, -    nvlist_t *props) +zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)  { -	const char *delim; -	char parent[ZFS_MAXNAMELEN]; -	zfs_handle_t *zhp; -	zfs_cmd_t zc = { 0 };  	int ret;  	char errbuf[1024]; +	nvpair_t *elem; +	nvlist_t *errors;  	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, -	    "cannot snapshot '%s'"), path); +	    "cannot create snapshots ")); -	/* validate the target name */ -	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) -		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); +	elem = NULL; +	while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) { +		const char *snapname = nvpair_name(elem); -	if (props) { -		if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, -		    props, B_FALSE, NULL, errbuf)) == NULL) -			return (-1); +		/* validate the target name */ +		if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT, +		    B_TRUE)) { +			(void) snprintf(errbuf, sizeof (errbuf), +			    dgettext(TEXT_DOMAIN, +			    "cannot create snapshot '%s'"), snapname); +			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); +		} +	} -		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { -			nvlist_free(props); -			return (-1); +	if (props != NULL && +	    (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, +	    props, B_FALSE, NULL, errbuf)) == NULL) { +		return (-1); +	} + +	ret = lzc_snapshot(snaps, props, &errors); + +	if (ret != 0) { +		boolean_t printed = B_FALSE; +		for (elem = nvlist_next_nvpair(errors, NULL); +		    elem != NULL; +		    elem = nvlist_next_nvpair(errors, elem)) { +			(void) snprintf(errbuf, sizeof (errbuf), +			    dgettext(TEXT_DOMAIN, +			    "cannot create snapshot '%s'"), nvpair_name(elem)); +			(void) zfs_standard_error(hdl, +			    fnvpair_value_int32(elem), errbuf); +			printed = B_TRUE;  		} +		if (!printed) { +			switch (ret) { +			case EXDEV: +				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, +				    "multiple snapshots of same " +				    "fs not allowed")); +				(void) zfs_error(hdl, EZFS_EXISTS, errbuf); -		nvlist_free(props); +				break; +			default: +				(void) zfs_standard_error(hdl, ret, errbuf); +			} +		}  	} -	/* make sure the parent exists and is of the appropriate type */ -	delim = strchr(path, '@'); -	(void) strncpy(parent, path, delim - path); -	parent[delim - path] = '\0'; +	nvlist_free(props); +	nvlist_free(errors); +	return (ret); +} + +int +zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, +    nvlist_t *props) +{ +	int ret; +	snapdata_t sd = { 0 }; +	char fsname[ZFS_MAXNAMELEN]; +	char *cp; +	zfs_handle_t *zhp; +	char errbuf[1024]; + +	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, +	    "cannot snapshot %s"), path); + +	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) +		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); -	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM | +	(void) strlcpy(fsname, path, sizeof (fsname)); +	cp = strchr(fsname, '@'); +	*cp = '\0'; +	sd.sd_snapname = cp + 1; + +	if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM |  	    ZFS_TYPE_VOLUME)) == NULL) { -		zcmd_free_nvlists(&zc);  		return (-1);  	} -	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); -	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value)); -	if (ZFS_IS_VOLUME(zhp)) -		zc.zc_objset_type = DMU_OST_ZVOL; -	else -		zc.zc_objset_type = DMU_OST_ZFS; -	zc.zc_cookie = recursive; -	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc); - -	zcmd_free_nvlists(&zc); - -	/* -	 * if it was recursive, the one that actually failed will be in -	 * zc.zc_name. -	 */ -	if (ret != 0) { -		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, -		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value); -		(void) zfs_standard_error(hdl, errno, errbuf); +	verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0); +	if (recursive) { +		(void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd); +	} else { +		fnvlist_add_boolean(sd.sd_nvl, path);  	} +	ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props); +	nvlist_free(sd.sd_nvl);  	zfs_close(zhp); -  	return (ret);  } @@ -3434,17 +3457,13 @@ rollback_destroy(zfs_handle_t *zhp, void *data)  		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&  		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >  		    cbp->cb_create) { -			char *logstr;  			cbp->cb_dependent = B_TRUE;  			cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,  			    rollback_destroy, cbp);  			cbp->cb_dependent = B_FALSE; -			logstr = zhp->zfs_hdl->libzfs_log_str; -			zhp->zfs_hdl->libzfs_log_str = NULL;  			cbp->cb_error |= zfs_destroy(zhp, B_FALSE); -			zhp->zfs_hdl->libzfs_log_str = logstr;  		}  	} else {  		/* We must destroy this clone; first unmount it */ diff --git a/lib/libzfs/common/libzfs_impl.h b/lib/libzfs/common/libzfs_impl.h index b1eae47ed204..576b2af5d2c2 100644 --- a/lib/libzfs/common/libzfs_impl.h +++ b/lib/libzfs/common/libzfs_impl.h @@ -21,11 +21,11 @@  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */ -#ifndef	_LIBFS_IMPL_H -#define	_LIBFS_IMPL_H +#ifndef	_LIBZFS_IMPL_H +#define	_LIBZFS_IMPL_H  #include <sys/dmu.h>  #include <sys/fs/zfs.h> @@ -36,6 +36,7 @@  #include <libuutil.h>  #include <libzfs.h>  #include <libshare.h> +#include <libzfs_core.h>  #include <fm/libtopo.h> @@ -67,7 +68,6 @@ struct libzfs_handle {  	int libzfs_desc_active;  	char libzfs_action[1024];  	char libzfs_desc[1024]; -	char *libzfs_log_str;  	int libzfs_printerr;  	int libzfs_storeerr; /* stuff error messages into buffer */  	void *libzfs_sharehdl; /* libshare handle */ @@ -213,4 +213,4 @@ extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);  }  #endif -#endif	/* _LIBFS_IMPL_H */ +#endif	/* _LIBZFS_IMPL_H */ diff --git a/lib/libzfs/common/libzfs_iter.c b/lib/libzfs/common/libzfs_iter.c index 212383d0e6fb..be5767f542d6 100644 --- a/lib/libzfs/common/libzfs_iter.c +++ b/lib/libzfs/common/libzfs_iter.c @@ -22,7 +22,7 @@  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.   * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #include <stdio.h> @@ -301,12 +301,11 @@ int  zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig,      zfs_iter_f func, void *arg)  { -	char buf[ZFS_MAXNAMELEN]; -	char *comma_separated, *cp; +	char *buf, *comma_separated, *cp;  	int err = 0;  	int ret = 0; -	(void) strlcpy(buf, spec_orig, sizeof (buf)); +	buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig);  	cp = buf;  	while ((comma_separated = strsep(&cp, ",")) != NULL) { @@ -364,6 +363,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig,  		}  	} +	free(buf);  	return (ret);  } diff --git a/lib/libzfs/common/libzfs_pool.c b/lib/libzfs/common/libzfs_pool.c index df89a2b445f6..1c6fb371e3bc 100644 --- a/lib/libzfs/common/libzfs_pool.c +++ b/lib/libzfs/common/libzfs_pool.c @@ -34,6 +34,7 @@  #include <stdlib.h>  #include <strings.h>  #include <unistd.h> +#include <libgen.h>  #include <sys/efi_partition.h>  #include <sys/vtoc.h>  #include <sys/zfs_ioctl.h> @@ -1205,7 +1206,7 @@ create_failed:   * datasets left in the pool.   */  int -zpool_destroy(zpool_handle_t *zhp) +zpool_destroy(zpool_handle_t *zhp, const char *log_str)  {  	zfs_cmd_t zc = { 0 };  	zfs_handle_t *zfp = NULL; @@ -1217,6 +1218,7 @@ zpool_destroy(zpool_handle_t *zhp)  		return (-1);  	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); +	zc.zc_history = (uint64_t)(uintptr_t)log_str;  	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {  		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -1371,8 +1373,9 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)   * Exports the pool from the system.  The caller must ensure that there are no   * mounted datasets in the pool.   */ -int -zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) +static int +zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, +    const char *log_str)  {  	zfs_cmd_t zc = { 0 };  	char msg[1024]; @@ -1383,6 +1386,7 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)  	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));  	zc.zc_cookie = force;  	zc.zc_guid = hardforce; +	zc.zc_history = (uint64_t)(uintptr_t)log_str;  	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {  		switch (errno) { @@ -1404,15 +1408,15 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)  }  int -zpool_export(zpool_handle_t *zhp, boolean_t force) +zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)  { -	return (zpool_export_common(zhp, force, B_FALSE)); +	return (zpool_export_common(zhp, force, B_FALSE, log_str));  }  int -zpool_export_force(zpool_handle_t *zhp) +zpool_export_force(zpool_handle_t *zhp, const char *log_str)  { -	return (zpool_export_common(zhp, B_TRUE, B_TRUE)); +	return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));  }  static void @@ -3574,40 +3578,30 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)  }  void -zpool_set_history_str(const char *subcommand, int argc, char **argv, -    char *history_str) +zfs_save_arguments(int argc, char **argv, char *string, int len)  { -	int i; - -	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN); -	for (i = 1; i < argc; i++) { -		if (strlen(history_str) + 1 + strlen(argv[i]) > -		    HIS_MAX_RECORD_LEN) -			break; -		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN); -		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN); +	(void) strlcpy(string, basename(argv[0]), len); +	for (int i = 1; i < argc; i++) { +		(void) strlcat(string, " ", len); +		(void) strlcat(string, argv[i], len);  	}  } -/* - * Stage command history for logging. - */  int -zpool_stage_history(libzfs_handle_t *hdl, const char *history_str) +zpool_log_history(libzfs_handle_t *hdl, const char *message)  { -	if (history_str == NULL) -		return (EINVAL); - -	if (strlen(history_str) > HIS_MAX_RECORD_LEN) -		return (EINVAL); - -	if (hdl->libzfs_log_str != NULL) -		free(hdl->libzfs_log_str); - -	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL) -		return (no_memory(hdl)); - -	return (0); +	zfs_cmd_t zc = { 0 }; +	nvlist_t *args; +	int err; + +	args = fnvlist_alloc(); +	fnvlist_add_string(args, "message", message); +	err = zcmd_write_src_nvlist(hdl, &zc, args); +	if (err == 0) +		err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc); +	nvlist_free(args); +	zcmd_free_nvlists(&zc); +	return (err);  }  /* diff --git a/lib/libzfs/common/libzfs_util.c b/lib/libzfs/common/libzfs_util.c index 41db2fdd81fa..41e25e9100a0 100644 --- a/lib/libzfs/common/libzfs_util.c +++ b/lib/libzfs/common/libzfs_util.c @@ -43,6 +43,7 @@  #include <sys/types.h>  #include <libzfs.h> +#include <libzfs_core.h>  #include "libzfs_impl.h"  #include "zfs_prop.h" @@ -630,6 +631,14 @@ libzfs_init(void)  	hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r"); +	if (libzfs_core_init() != 0) { +		(void) close(hdl->libzfs_fd); +		(void) fclose(hdl->libzfs_mnttab); +		(void) fclose(hdl->libzfs_sharetab); +		free(hdl); +		return (NULL); +	} +  	zfs_prop_init();  	zpool_prop_init();  	zpool_feature_init(); @@ -647,12 +656,11 @@ libzfs_fini(libzfs_handle_t *hdl)  	if (hdl->libzfs_sharetab)  		(void) fclose(hdl->libzfs_sharetab);  	zfs_uninit_libshare(hdl); -	if (hdl->libzfs_log_str) -		(void) free(hdl->libzfs_log_str);  	zpool_free_handles(hdl);  	libzfs_fru_clear(hdl, B_TRUE);  	namespace_clear(hdl);  	libzfs_mnttab_fini(hdl); +	libzfs_core_fini();  	free(hdl);  } @@ -814,17 +822,7 @@ zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)  int  zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)  { -	int error; - -	zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str; -	error = ioctl(hdl->libzfs_fd, request, zc); -	if (hdl->libzfs_log_str) { -		free(hdl->libzfs_log_str); -		hdl->libzfs_log_str = NULL; -	} -	zc->zc_history = 0; - -	return (error); +	return (ioctl(hdl->libzfs_fd, request, zc));  }  /* diff --git a/lib/libzfs_core/common/libzfs_core.c b/lib/libzfs_core/common/libzfs_core.c new file mode 100644 index 000000000000..73afd50b8de2 --- /dev/null +++ b/lib/libzfs_core/common/libzfs_core.c @@ -0,0 +1,477 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. + * It has the following characteristics: + * + *  - Thread Safe.  libzfs_core is accessible concurrently from multiple + *  threads.  This is accomplished primarily by avoiding global data + *  (e.g. caching).  Since it's thread-safe, there is no reason for a + *  process to have multiple libzfs "instances".  Therefore, we store + *  our few pieces of data (e.g. the file descriptor) in global + *  variables.  The fd is reference-counted so that the libzfs_core + *  library can be "initialized" multiple times (e.g. by different + *  consumers within the same process). + * + *  - Committed Interface.  The libzfs_core interface will be committed, + *  therefore consumers can compile against it and be confident that + *  their code will continue to work on future releases of this code. + *  Currently, the interface is Evolving (not Committed), but we intend + *  to commit to it once it is more complete and we determine that it + *  meets the needs of all consumers. + * + *  - Programatic Error Handling.  libzfs_core communicates errors with + *  defined error numbers, and doesn't print anything to stdout/stderr. + * + *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments + *  to/from the kernel ioctls.  There is generally a 1:1 correspondence + *  between libzfs_core functions and ioctls to /dev/zfs. + * + *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1 + *  with kernel ioctls, and kernel ioctls are general atomic, each + *  libzfs_core function is atomic.  For example, creating multiple + *  snapshots with a single call to lzc_snapshot() is atomic -- it + *  can't fail with only some of the requested snapshots created, even + *  in the event of power loss or system crash. + * + *  - Continued libzfs Support.  Some higher-level operations (e.g. + *  support for "zfs send -R") are too complicated to fit the scope of + *  libzfs_core.  This functionality will continue to live in libzfs. + *  Where appropriate, libzfs will use the underlying atomic operations + *  of libzfs_core.  For example, libzfs may implement "zfs send -R | + *  zfs receive" by using individual "send one snapshot", rename, + *  destroy, and "receive one snapshot" operations in libzfs_core. + *  /sbin/zfs and /zbin/zpool will link with both libzfs and + *  libzfs_core.  Other consumers should aim to use only libzfs_core, + *  since that will be the supported, stable interface going forwards. + */ + +#include <libzfs_core.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <sys/nvpair.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/zfs_ioctl.h> + +static int g_fd; +static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; +static int g_refcount; + +int +libzfs_core_init(void) +{ +	(void) pthread_mutex_lock(&g_lock); +	if (g_refcount == 0) { +		g_fd = open("/dev/zfs", O_RDWR); +		if (g_fd < 0) { +			(void) pthread_mutex_unlock(&g_lock); +			return (errno); +		} +	} +	g_refcount++; +	(void) pthread_mutex_unlock(&g_lock); +	return (0); +} + +void +libzfs_core_fini(void) +{ +	(void) pthread_mutex_lock(&g_lock); +	ASSERT3S(g_refcount, >, 0); +	g_refcount--; +	if (g_refcount == 0) +		(void) close(g_fd); +	(void) pthread_mutex_unlock(&g_lock); +} + +static int +lzc_ioctl(zfs_ioc_t ioc, const char *name, +    nvlist_t *source, nvlist_t **resultp) +{ +	zfs_cmd_t zc = { 0 }; +	int error = 0; +	char *packed; +	size_t size; + +	ASSERT3S(g_refcount, >, 0); + +	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + +	packed = fnvlist_pack(source, &size); +	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; +	zc.zc_nvlist_src_size = size; + +	if (resultp != NULL) { +		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); +		zc.zc_nvlist_dst = (uint64_t)(uintptr_t) +		    malloc(zc.zc_nvlist_dst_size); +		if (zc.zc_nvlist_dst == NULL) { +			error = ENOMEM; +			goto out; +		} +	} + +	while (ioctl(g_fd, ioc, &zc) != 0) { +		if (errno == ENOMEM && resultp != NULL) { +			free((void *)(uintptr_t)zc.zc_nvlist_dst); +			zc.zc_nvlist_dst_size *= 2; +			zc.zc_nvlist_dst = (uint64_t)(uintptr_t) +			    malloc(zc.zc_nvlist_dst_size); +			if (zc.zc_nvlist_dst == NULL) { +				error = ENOMEM; +				goto out; +			} +		} else { +			error = errno; +			break; +		} +	} +	if (zc.zc_nvlist_dst_filled) { +		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, +		    zc.zc_nvlist_dst_size); +	} else if (resultp != NULL) { +		*resultp = NULL; +	} + +out: +	fnvlist_pack_free(packed, size); +	free((void *)(uintptr_t)zc.zc_nvlist_dst); +	return (error); +} + +int +lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) +{ +	int error; +	nvlist_t *args = fnvlist_alloc(); +	fnvlist_add_int32(args, "type", type); +	if (props != NULL) +		fnvlist_add_nvlist(args, "props", props); +	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); +	nvlist_free(args); +	return (error); +} + +int +lzc_clone(const char *fsname, const char *origin, +    nvlist_t *props) +{ +	int error; +	nvlist_t *args = fnvlist_alloc(); +	fnvlist_add_string(args, "origin", origin); +	if (props != NULL) +		fnvlist_add_nvlist(args, "props", props); +	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); +	nvlist_free(args); +	return (error); +} + +/* + * Creates snapshots. + * + * The keys in the snaps nvlist are the snapshots to be created. + * They must all be in the same pool. + * + * The props nvlist is properties to set.  Currently only user properties + * are supported.  { user:prop_name -> string value } + * + * The returned results nvlist will have an entry for each snapshot that failed. + * The value will be the (int32) error code. + * + * The return value will be 0 if all snapshots were created, otherwise it will + * be the errno of a (undetermined) snapshot that failed. + */ +int +lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) +{ +	nvpair_t *elem; +	nvlist_t *args; +	int error; +	char pool[MAXNAMELEN]; + +	*errlist = NULL; + +	/* determine the pool name */ +	elem = nvlist_next_nvpair(snaps, NULL); +	if (elem == NULL) +		return (0); +	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); +	pool[strcspn(pool, "/@")] = '\0'; + +	args = fnvlist_alloc(); +	fnvlist_add_nvlist(args, "snaps", snaps); +	if (props != NULL) +		fnvlist_add_nvlist(args, "props", props); + +	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); +	nvlist_free(args); + +	return (error); +} + +/* + * Destroys snapshots. + * + * The keys in the snaps nvlist are the snapshots to be destroyed. + * They must all be in the same pool. + * + * Snapshots that do not exist will be silently ignored. + * + * If 'defer' is not set, and a snapshot has user holds or clones, the + * destroy operation will fail and none of the snapshots will be + * destroyed. + * + * If 'defer' is set, and a snapshot has user holds or clones, it will be + * marked for deferred destruction, and will be destroyed when the last hold + * or clone is removed/destroyed. + * + * The return value will be 0 if all snapshots were destroyed (or marked for + * later destruction if 'defer' is set) or didn't exist to begin with. + * + * Otherwise the return value will be the errno of a (undetermined) snapshot + * that failed, no snapshots will be destroyed, and the errlist will have an + * entry for each snapshot that failed.  The value in the errlist will be + * the (int32) error code. + */ +int +lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) +{ +	nvpair_t *elem; +	nvlist_t *args; +	int error; +	char pool[MAXNAMELEN]; + +	/* determine the pool name */ +	elem = nvlist_next_nvpair(snaps, NULL); +	if (elem == NULL) +		return (0); +	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); +	pool[strcspn(pool, "/@")] = '\0'; + +	args = fnvlist_alloc(); +	fnvlist_add_nvlist(args, "snaps", snaps); +	if (defer) +		fnvlist_add_boolean(args, "defer"); + +	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); +	nvlist_free(args); + +	return (error); + +} + +int +lzc_snaprange_space(const char *firstsnap, const char *lastsnap, +    uint64_t *usedp) +{ +	nvlist_t *args; +	nvlist_t *result; +	int err; +	char fs[MAXNAMELEN]; +	char *atp; + +	/* determine the fs name */ +	(void) strlcpy(fs, firstsnap, sizeof (fs)); +	atp = strchr(fs, '@'); +	if (atp == NULL) +		return (EINVAL); +	*atp = '\0'; + +	args = fnvlist_alloc(); +	fnvlist_add_string(args, "firstsnap", firstsnap); + +	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); +	nvlist_free(args); +	if (err == 0) +		*usedp = fnvlist_lookup_uint64(result, "used"); +	fnvlist_free(result); + +	return (err); +} + +boolean_t +lzc_exists(const char *dataset) +{ +	/* +	 * The objset_stats ioctl is still legacy, so we need to construct our +	 * own zfs_cmd_t rather than using zfsc_ioctl(). +	 */ +	zfs_cmd_t zc = { 0 }; + +	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); +	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be sent. + */ +int +lzc_send(const char *snapname, const char *fromsnap, int fd) +{ +	nvlist_t *args; +	int err; + +	args = fnvlist_alloc(); +	fnvlist_add_int32(args, "fd", fd); +	if (fromsnap != NULL) +		fnvlist_add_string(args, "fromsnap", fromsnap); +	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); +	nvlist_free(args); +	return (err); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be estimated. + */ +int +lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) +{ +	nvlist_t *args; +	nvlist_t *result; +	int err; + +	args = fnvlist_alloc(); +	if (fromsnap != NULL) +		fnvlist_add_string(args, "fromsnap", fromsnap); +	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); +	nvlist_free(args); +	if (err == 0) +		*spacep = fnvlist_lookup_uint64(result, "space"); +	nvlist_free(result); +	return (err); +} + +static int +recv_read(int fd, void *buf, int ilen) +{ +	char *cp = buf; +	int rv; +	int len = ilen; + +	do { +		rv = read(fd, cp, len); +		cp += rv; +		len -= rv; +	} while (rv > 0); + +	if (rv < 0 || len != 0) +		return (EIO); + +	return (0); +} + +/* + * The simplest receive case: receive from the specified fd, creating the + * specified snapshot.  Apply the specified properties a "received" properties + * (which can be overridden by locally-set properties).  If the stream is a + * clone, its origin snapshot must be specified by 'origin'.  The 'force' + * flag will cause the target filesystem to be rolled back or destroyed if + * necessary to receive. + * + * Return 0 on success or an errno on failure. + * + * Note: this interface does not work on dedup'd streams + * (those with DMU_BACKUP_FEATURE_DEDUP). + */ +int +lzc_receive(const char *snapname, nvlist_t *props, const char *origin, +    boolean_t force, int fd) +{ +	/* +	 * The receive ioctl is still legacy, so we need to construct our own +	 * zfs_cmd_t rather than using zfsc_ioctl(). +	 */ +	zfs_cmd_t zc = { 0 }; +	char *atp; +	char *packed = NULL; +	size_t size; +	dmu_replay_record_t drr; +	int error; + +	ASSERT3S(g_refcount, >, 0); + +	/* zc_name is name of containing filesystem */ +	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); +	atp = strchr(zc.zc_name, '@'); +	if (atp == NULL) +		return (EINVAL); +	*atp = '\0'; + +	/* if the fs does not exist, try its parent. */ +	if (!lzc_exists(zc.zc_name)) { +		char *slashp = strrchr(zc.zc_name, '/'); +		if (slashp == NULL) +			return (ENOENT); +		*slashp = '\0'; + +	} + +	/* zc_value is full name of the snapshot to create */ +	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + +	if (props != NULL) { +		/* zc_nvlist_src is props to set */ +		packed = fnvlist_pack(props, &size); +		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; +		zc.zc_nvlist_src_size = size; +	} + +	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ +	if (origin != NULL) +		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); + +	/* zc_begin_record is non-byteswapped BEGIN record */ +	error = recv_read(fd, &drr, sizeof (drr)); +	if (error != 0) +		goto out; +	zc.zc_begin_record = drr.drr_u.drr_begin; + +	/* zc_cookie is fd to read from */ +	zc.zc_cookie = fd; + +	/* zc guid is force flag */ +	zc.zc_guid = force; + +	/* zc_cleanup_fd is unused */ +	zc.zc_cleanup_fd = -1; + +	error = ioctl(g_fd, ZFS_IOC_RECV, &zc); +	if (error != 0) +		error = errno; + +out: +	if (packed != NULL) +		fnvlist_pack_free(packed, size); +	free((void*)(uintptr_t)zc.zc_nvlist_dst); +	return (error); +} diff --git a/lib/libzfs_core/common/libzfs_core.h b/lib/libzfs_core/common/libzfs_core.h new file mode 100644 index 000000000000..9edc884a14d1 --- /dev/null +++ b/lib/libzfs_core/common/libzfs_core.h @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#ifndef	_LIBZFS_CORE_H +#define	_LIBZFS_CORE_H + +#include <libnvpair.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/fs/zfs.h> + +#ifdef	__cplusplus +extern "C" { +#endif + +int libzfs_core_init(void); +void libzfs_core_fini(void); + +int lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist); +int lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props); +int lzc_clone(const char *fsname, const char *origin, nvlist_t *props); +int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist); + +int lzc_snaprange_space(const char *firstsnap, const char *lastsnap, +    uint64_t *usedp); + +int lzc_send(const char *snapname, const char *fromsnap, int fd); +int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, +    boolean_t force, int fd); +int lzc_send_space(const char *snapname, const char *fromsnap, +    uint64_t *result); + +boolean_t lzc_exists(const char *dataset); + + +#ifdef	__cplusplus +} +#endif + +#endif	/* _LIBZFS_CORE_H */ diff --git a/lib/libzpool/common/kernel.c b/lib/libzpool/common/kernel.c index 8e1e7f7e649e..04d530727f5c 100644 --- a/lib/libzpool/common/kernel.c +++ b/lib/libzpool/common/kernel.c @@ -871,6 +871,12 @@ crgetuid(cred_t *cr)  	return (0);  } +uid_t +crgetruid(cred_t *cr) +{ +	return (0); +} +  gid_t  crgetgid(cred_t *cr)  { diff --git a/lib/libzpool/common/sys/zfs_context.h b/lib/libzpool/common/sys/zfs_context.h index 1f5e758721b6..39af927f7105 100644 --- a/lib/libzpool/common/sys/zfs_context.h +++ b/lib/libzpool/common/sys/zfs_context.h @@ -286,6 +286,7 @@ extern void rw_exit(krwlock_t *rwlp);  #define	rw_downgrade(rwlp) do { } while (0)  extern uid_t crgetuid(cred_t *cr); +extern uid_t crgetruid(cred_t *cr);  extern gid_t crgetgid(cred_t *cr);  extern int crgetngroups(cred_t *cr);  extern gid_t *crgetgroups(cred_t *cr); diff --git a/man/man1m/zfs.1m b/man/man1m/zfs.1m index e713566ba2ff..32b0cb282ef9 100644 --- a/man/man1m/zfs.1m +++ b/man/man1m/zfs.1m @@ -39,7 +39,7 @@ zfs \- configures ZFS file systems  .LP  .nf  \fBzfs\fR \fBsnapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... -      \fIfilesystem@snapname\fR|\fIvolume@snapname\fR +      \fIfilesystem@snapname\fR|\fIvolume@snapname\fR...  .fi  .LP @@ -1837,13 +1837,14 @@ behavior for mounted file systems in use.  .ne 2  .na  \fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... -\fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR +\fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR...  .ad  .sp .6  .RS 4n -Creates a snapshot with the given name. All previous modifications by -successful system calls to the file system are part of the snapshot. See the -"Snapshots" section for details. +Creates snapshots with the given names. All previous modifications by +successful system calls to the file system are part of the snapshots. +Snapshots are taken atomically, so that all snapshots correspond to the same +moment in time. See the "Snapshots" section for details.  .sp  .ne 2  .na @@ -1851,9 +1852,7 @@ successful system calls to the file system are part of the snapshot. See the  .ad  .sp .6  .RS 4n -Recursively create snapshots of all descendent datasets. Snapshots are taken -atomically, so that all recursive snapshots correspond to the same moment in -time. +Recursively create snapshots of all descendent datasets  .RE  .sp diff --git a/uts/common/fs/zfs/dmu_objset.c b/uts/common/fs/zfs/dmu_objset.c index 7caebd979f02..09c4ecf4dd58 100644 --- a/uts/common/fs/zfs/dmu_objset.c +++ b/uts/common/fs/zfs/dmu_objset.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  /* Portions Copyright 2010 Robert Milkowski */ @@ -699,30 +700,33 @@ dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	spa_t *spa = dd->dd_pool->dp_spa;  	struct oscarg *oa = arg2;  	uint64_t obj; +	dsl_dataset_t *ds; +	blkptr_t *bp;  	ASSERT(dmu_tx_is_syncing(tx));  	obj = dsl_dataset_create_sync(dd, oa->lastname,  	    oa->clone_origin, oa->flags, oa->cr, tx); -	if (oa->clone_origin == NULL) { -		dsl_pool_t *dp = dd->dd_pool; -		dsl_dataset_t *ds; -		blkptr_t *bp; -		objset_t *os; - -		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); -		bp = dsl_dataset_get_blkptr(ds); -		ASSERT(BP_IS_HOLE(bp)); - -		os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); +	VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds)); +	bp = dsl_dataset_get_blkptr(ds); +	if (BP_IS_HOLE(bp)) { +		objset_t *os = +		    dmu_objset_create_impl(spa, ds, bp, oa->type, tx);  		if (oa->userfunc)  			oa->userfunc(os, oa->userarg, oa->cr, tx); -		dsl_dataset_rele(ds, FTAG);  	} -	spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); +	if (oa->clone_origin == NULL) { +		spa_history_log_internal_ds(ds, "create", tx, ""); +	} else { +		char namebuf[MAXNAMELEN]; +		dsl_dataset_name(oa->clone_origin, namebuf); +		spa_history_log_internal_ds(ds, "clone", tx, +		    "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object); +	} +	dsl_dataset_rele(ds, FTAG);  }  int @@ -799,34 +803,40 @@ dmu_objset_destroy(const char *name, boolean_t defer)  	return (error);  } -struct snaparg { -	dsl_sync_task_group_t *dstg; -	char *snapname; -	char *htag; -	char failed[MAXPATHLEN]; -	boolean_t recursive; -	boolean_t needsuspend; -	boolean_t temporary; -	nvlist_t *props; -	struct dsl_ds_holdarg *ha;	/* only needed in the temporary case */ -	dsl_dataset_t *newds; -}; +typedef struct snapallarg { +	dsl_sync_task_group_t *saa_dstg; +	boolean_t saa_needsuspend; +	nvlist_t *saa_props; + +	/* the following are used only if 'temporary' is set: */ +	boolean_t saa_temporary; +	const char *saa_htag; +	struct dsl_ds_holdarg *saa_ha; +	dsl_dataset_t *saa_newds; +} snapallarg_t; + +typedef struct snaponearg { +	const char *soa_longname; /* long snap name */ +	const char *soa_snapname; /* short snap name */ +	snapallarg_t *soa_saa; +} snaponearg_t;  static int  snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)  {  	objset_t *os = arg1; -	struct snaparg *sn = arg2; +	snaponearg_t *soa = arg2; +	snapallarg_t *saa = soa->soa_saa;  	int error;  	/* The props have already been checked by zfs_check_userprops(). */  	error = dsl_dataset_snapshot_check(os->os_dsl_dataset, -	    sn->snapname, tx); +	    soa->soa_snapname, tx);  	if (error)  		return (error); -	if (sn->temporary) { +	if (saa->saa_temporary) {  		/*  		 * Ideally we would just call  		 * dsl_dataset_user_hold_check() and @@ -844,12 +854,13 @@ snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)  		 * Not checking number of tags because the tag will be  		 * unique, as it will be the only tag.  		 */ -		if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) +		if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)  			return (E2BIG); -		sn->ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); -		sn->ha->temphold = B_TRUE; -		sn->ha->htag = sn->htag; +		saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), +		    KM_SLEEP); +		saa->saa_ha->temphold = B_TRUE; +		saa->saa_ha->htag = saa->saa_htag;  	}  	return (error);  } @@ -859,24 +870,25 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)  {  	objset_t *os = arg1;  	dsl_dataset_t *ds = os->os_dsl_dataset; -	struct snaparg *sn = arg2; +	snaponearg_t *soa = arg2; +	snapallarg_t *saa = soa->soa_saa; -	dsl_dataset_snapshot_sync(ds, sn->snapname, tx); +	dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx); -	if (sn->props) { +	if (saa->saa_props != NULL) {  		dsl_props_arg_t pa; -		pa.pa_props = sn->props; +		pa.pa_props = saa->saa_props;  		pa.pa_source = ZPROP_SRC_LOCAL;  		dsl_props_set_sync(ds->ds_prev, &pa, tx);  	} -	if (sn->temporary) { +	if (saa->saa_temporary) {  		struct dsl_ds_destroyarg da; -		dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); -		kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); -		sn->ha = NULL; -		sn->newds = ds->ds_prev; +		dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx); +		kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg)); +		saa->saa_ha = NULL; +		saa->saa_newds = ds->ds_prev;  		da.ds = ds->ds_prev;  		da.defer = B_TRUE; @@ -885,131 +897,180 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)  }  static int -dmu_objset_snapshot_one(const char *name, void *arg) +snapshot_one_impl(const char *snapname, void *arg)  { -	struct snaparg *sn = arg; +	char fsname[MAXPATHLEN]; +	snapallarg_t *saa = arg; +	snaponearg_t *soa;  	objset_t *os;  	int err; -	char *cp; - -	/* -	 * If the objset starts with a '%', then ignore it unless it was -	 * explicitly named (ie, not recursive).  These hidden datasets -	 * are always inconsistent, and by not opening them here, we can -	 * avoid a race with dsl_dir_destroy_check(). -	 */ -	cp = strrchr(name, '/'); -	if (cp && cp[1] == '%' && sn->recursive) -		return (0); -	(void) strcpy(sn->failed, name); - -	/* -	 * Check permissions if we are doing a recursive snapshot.  The -	 * permission checks for the starting dataset have already been -	 * performed in zfs_secpolicy_snapshot() -	 */ -	if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) -		return (err); +	(void) strlcpy(fsname, snapname, sizeof (fsname)); +	strchr(fsname, '@')[0] = '\0'; -	err = dmu_objset_hold(name, sn, &os); +	err = dmu_objset_hold(fsname, saa, &os);  	if (err != 0)  		return (err);  	/*  	 * If the objset is in an inconsistent state (eg, in the process -	 * of being destroyed), don't snapshot it.  As with %hidden -	 * datasets, we return EBUSY if this name was explicitly -	 * requested (ie, not recursive), and otherwise ignore it. +	 * of being destroyed), don't snapshot it.  	 */  	if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { -		dmu_objset_rele(os, sn); -		return (sn->recursive ? 0 : EBUSY); +		dmu_objset_rele(os, saa); +		return (EBUSY);  	} -	if (sn->needsuspend) { +	if (saa->saa_needsuspend) {  		err = zil_suspend(dmu_objset_zil(os));  		if (err) { -			dmu_objset_rele(os, sn); +			dmu_objset_rele(os, saa);  			return (err);  		}  	} -	dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, -	    os, sn, 3); + +	soa = kmem_zalloc(sizeof (*soa), KM_SLEEP); +	soa->soa_saa = saa; +	soa->soa_longname = snapname; +	soa->soa_snapname = strchr(snapname, '@') + 1; + +	dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync, +	    os, soa, 3);  	return (0);  } +/* + * The snapshots must all be in the same pool. + */  int -dmu_objset_snapshot(char *fsname, char *snapname, char *tag, -    nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) +dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)  {  	dsl_sync_task_t *dst; -	struct snaparg sn; +	snapallarg_t saa = { 0 };  	spa_t *spa; -	minor_t minor; +	int rv = 0;  	int err; +	nvpair_t *pair; -	(void) strcpy(sn.failed, fsname); +	pair = nvlist_next_nvpair(snaps, NULL); +	if (pair == NULL) +		return (0); -	err = spa_open(fsname, &spa, FTAG); +	err = spa_open(nvpair_name(pair), &spa, FTAG);  	if (err)  		return (err); - -	if (temporary) { -		if (cleanup_fd < 0) { -			spa_close(spa, FTAG); -			return (EINVAL); +	saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); +	saa.saa_props = props; +	saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + +	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; +	    pair = nvlist_next_nvpair(snaps, pair)) { +		err = snapshot_one_impl(nvpair_name(pair), &saa); +		if (err != 0) { +			if (errors != NULL) { +				fnvlist_add_int32(errors, +				    nvpair_name(pair), err); +			} +			rv = err;  		} -		if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { -			spa_close(spa, FTAG); -			return (err); +	} + +	/* +	 * If any call to snapshot_one_impl() failed, don't execute the +	 * sync task.  The error handling code below will clean up the +	 * snaponearg_t from any successful calls to +	 * snapshot_one_impl(). +	 */ +	if (rv == 0) +		err = dsl_sync_task_group_wait(saa.saa_dstg); +	if (err != 0) +		rv = err; + +	for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; +	    dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { +		objset_t *os = dst->dst_arg1; +		snaponearg_t *soa = dst->dst_arg2; +		if (dst->dst_err != 0) { +			if (errors != NULL) { +				fnvlist_add_int32(errors, +				    soa->soa_longname, dst->dst_err); +			} +			rv = dst->dst_err;  		} + +		if (saa.saa_needsuspend) +			zil_resume(dmu_objset_zil(os)); +		dmu_objset_rele(os, &saa); +		kmem_free(soa, sizeof (*soa));  	} -	sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); -	sn.snapname = snapname; -	sn.htag = tag; -	sn.props = props; -	sn.recursive = recursive; -	sn.needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); -	sn.temporary = temporary; -	sn.ha = NULL; -	sn.newds = NULL; - -	if (recursive) { -		err = dmu_objset_find(fsname, -		    dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); -	} else { -		err = dmu_objset_snapshot_one(fsname, &sn); +	dsl_sync_task_group_destroy(saa.saa_dstg); +	spa_close(spa, FTAG); +	return (rv); +} + +int +dmu_objset_snapshot_one(const char *fsname, const char *snapname) +{ +	int err; +	char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); +	nvlist_t *snaps = fnvlist_alloc(); + +	fnvlist_add_boolean(snaps, longsnap); +	err = dmu_objset_snapshot(snaps, NULL, NULL); +	fnvlist_free(snaps); +	strfree(longsnap); +	return (err); +} + +int +dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd) +{ +	dsl_sync_task_t *dst; +	snapallarg_t saa = { 0 }; +	spa_t *spa; +	minor_t minor; +	int err; + +	err = spa_open(snapname, &spa, FTAG); +	if (err) +		return (err); +	saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); +	saa.saa_htag = tag; +	saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); +	saa.saa_temporary = B_TRUE; + +	if (cleanup_fd < 0) { +		spa_close(spa, FTAG); +		return (EINVAL); +	} +	if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { +		spa_close(spa, FTAG); +		return (err);  	} +	err = snapshot_one_impl(snapname, &saa); +  	if (err == 0) -		err = dsl_sync_task_group_wait(sn.dstg); +		err = dsl_sync_task_group_wait(saa.saa_dstg); -	for (dst = list_head(&sn.dstg->dstg_tasks); dst; -	    dst = list_next(&sn.dstg->dstg_tasks, dst)) { +	for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; +	    dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {  		objset_t *os = dst->dst_arg1; -		dsl_dataset_t *ds = os->os_dsl_dataset; -		if (dst->dst_err) { -			dsl_dataset_name(ds, sn.failed); -		} else if (temporary) { -			dsl_register_onexit_hold_cleanup(sn.newds, tag, minor); -		} -		if (sn.needsuspend) +		dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor); +		if (saa.saa_needsuspend)  			zil_resume(dmu_objset_zil(os)); -		dmu_objset_rele(os, &sn); +		dmu_objset_rele(os, &saa);  	} -	if (err) -		(void) strcpy(fsname, sn.failed); -	if (temporary) -		zfs_onexit_fd_rele(cleanup_fd); -	dsl_sync_task_group_destroy(sn.dstg); +	zfs_onexit_fd_rele(cleanup_fd); +	dsl_sync_task_group_destroy(saa.saa_dstg);  	spa_close(spa, FTAG);  	return (err);  } +  static void  dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)  { diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c index 96fb0c02be95..5a2c6e2ce759 100644 --- a/uts/common/fs/zfs/dmu_send.c +++ b/uts/common/fs/zfs/dmu_send.c @@ -387,9 +387,48 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,  	return (err);  } +/* + * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. + * For example, they could both be snapshots of the same filesystem, and + * 'earlier' is before 'later'.  Or 'earlier' could be the origin of + * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's + * filesystem.  Or 'earlier' could be the origin's origin. + */ +static boolean_t +is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +{ +	dsl_pool_t *dp = later->ds_dir->dd_pool; +	int error; +	boolean_t ret; +	dsl_dataset_t *origin; + +	if (earlier->ds_phys->ds_creation_txg >= +	    later->ds_phys->ds_creation_txg) +		return (B_FALSE); + +	if (later->ds_dir == earlier->ds_dir) +		return (B_TRUE); +	if (!dsl_dir_is_clone(later->ds_dir)) +		return (B_FALSE); + +	rw_enter(&dp->dp_config_rwlock, RW_READER); +	if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) { +		rw_exit(&dp->dp_config_rwlock); +		return (B_TRUE); +	} +	error = dsl_dataset_hold_obj(dp, +	    later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); +	rw_exit(&dp->dp_config_rwlock); +	if (error != 0) +		return (B_FALSE); +	ret = is_before(origin, earlier); +	dsl_dataset_rele(origin, FTAG); +	return (ret); +} +  int -dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, -    int outfd, vnode_t *vp, offset_t *off) +dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, +    offset_t *off)  {  	dsl_dataset_t *ds = tosnap->os_dsl_dataset;  	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -402,30 +441,13 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,  	if (ds->ds_phys->ds_next_snap_obj == 0)  		return (EINVAL); -	/* fromsnap must be an earlier snapshot from the same fs as tosnap */ -	if (fromds && (ds->ds_dir != fromds->ds_dir || -	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) +	/* +	 * fromsnap must be an earlier snapshot from the same fs as tosnap, +	 * or the origin's fs. +	 */ +	if (fromds != NULL && !is_before(ds, fromds))  		return (EXDEV); -	if (fromorigin) { -		dsl_pool_t *dp = ds->ds_dir->dd_pool; - -		if (fromsnap) -			return (EINVAL); - -		if (dsl_dir_is_clone(ds->ds_dir)) { -			rw_enter(&dp->dp_config_rwlock, RW_READER); -			err = dsl_dataset_hold_obj(dp, -			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); -			rw_exit(&dp->dp_config_rwlock); -			if (err) -				return (err); -		} else { -			fromorigin = B_FALSE; -		} -	} - -  	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);  	drr->drr_type = DRR_BEGIN;  	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -450,7 +472,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,  	drr->drr_u.drr_begin.drr_creation_time =  	    ds->ds_phys->ds_creation_time;  	drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; -	if (fromorigin) +	if (fromds != NULL && ds->ds_dir != fromds->ds_dir)  		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;  	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;  	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) @@ -462,8 +484,6 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,  	if (fromds)  		fromtxg = fromds->ds_phys->ds_creation_txg; -	if (fromorigin) -		dsl_dataset_rele(fromds, FTAG);  	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -521,8 +541,7 @@ out:  }  int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, -    uint64_t *sizep) +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)  {  	dsl_dataset_t *ds = tosnap->os_dsl_dataset;  	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -534,27 +553,13 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,  	if (ds->ds_phys->ds_next_snap_obj == 0)  		return (EINVAL); -	/* fromsnap must be an earlier snapshot from the same fs as tosnap */ -	if (fromds && (ds->ds_dir != fromds->ds_dir || -	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) +	/* +	 * fromsnap must be an earlier snapshot from the same fs as tosnap, +	 * or the origin's fs. +	 */ +	if (fromds != NULL && !is_before(ds, fromds))  		return (EXDEV); -	if (fromorigin) { -		if (fromsnap) -			return (EINVAL); - -		if (dsl_dir_is_clone(ds->ds_dir)) { -			rw_enter(&dp->dp_config_rwlock, RW_READER); -			err = dsl_dataset_hold_obj(dp, -			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); -			rw_exit(&dp->dp_config_rwlock); -			if (err) -				return (err); -		} else { -			fromorigin = B_FALSE; -		} -	} -  	/* Get uncompressed size estimate of changed data. */  	if (fromds == NULL) {  		size = ds->ds_phys->ds_uncompressed_bytes; @@ -562,8 +567,6 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,  		uint64_t used, comp;  		err = dsl_dataset_space_written(fromds, ds,  		    &used, &comp, &size); -		if (fromorigin) -			dsl_dataset_rele(fromds, FTAG);  		if (err)  			return (err);  	} @@ -662,8 +665,7 @@ recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)  		    rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);  	} -	spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, -	    dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); +	spa_history_log_internal_ds(rbsa->ds, "receive new", tx, "");  }  /* ARGSUSED */ @@ -764,8 +766,7 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	rbsa->ds = cds; -	spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, -	    dp->dp_spa, tx, "dataset = %lld", dsobj); +	spa_history_log_internal_ds(cds, "receive over existing", tx, "");  }  static boolean_t @@ -1573,6 +1574,7 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	dmu_buf_will_dirty(ds->ds_dbuf, tx);  	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; +	spa_history_log_internal_ds(ds, "finished receiving", tx, "");  }  static int diff --git a/uts/common/fs/zfs/dmu_tx.c b/uts/common/fs/zfs/dmu_tx.c index 3dd5f2c573d7..723d62b48542 100644 --- a/uts/common/fs/zfs/dmu_tx.c +++ b/uts/common/fs/zfs/dmu_tx.c @@ -48,7 +48,7 @@ dmu_tx_create_dd(dsl_dir_t *dd)  {  	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);  	tx->tx_dir = dd; -	if (dd) +	if (dd != NULL)  		tx->tx_pool = dd->dd_pool;  	list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),  	    offsetof(dmu_tx_hold_t, txh_node)); diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c index ccfa71c672a2..555797e77efe 100644 --- a/uts/common/fs/zfs/dsl_dataset.c +++ b/uts/common/fs/zfs/dsl_dataset.c @@ -914,7 +914,8 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,   * The snapshots must all be in the same pool.   */  int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) +dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, +    nvlist_t *errlist)  {  	int err;  	dsl_sync_task_t *dst; @@ -949,7 +950,7 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)  		} else if (err == ENOENT) {  			err = 0;  		} else { -			(void) strcpy(failed, nvpair_name(pair)); +			fnvlist_add_int32(errlist, nvpair_name(pair), err);  			break;  		}  	} @@ -963,10 +964,12 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)  		dsl_dataset_t *ds = dsda->ds;  		/* -		 * Return the file system name that triggered the error +		 * Return the snapshots that triggered the error.  		 */ -		if (dst->dst_err) { -			dsl_dataset_name(ds, failed); +		if (dst->dst_err != 0) { +			char name[ZFS_MAXNAMELEN]; +			dsl_dataset_name(ds, name); +			fnvlist_add_int32(errlist, name, dst->dst_err);  		}  		ASSERT3P(dsda->rm_origin, ==, NULL);  		dsl_dataset_disown(ds, dstg); @@ -1045,7 +1048,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)  	dsl_dir_t *dd;  	uint64_t obj;  	struct dsl_ds_destroyarg dsda = { 0 }; -	dsl_dataset_t dummy_ds = { 0 };  	dsda.ds = ds; @@ -1065,8 +1067,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)  	}  	dd = ds->ds_dir; -	dummy_ds.ds_dir = dd; -	dummy_ds.ds_object = ds->ds_object;  	/*  	 * Check for errors and mark this ds as inconsistent, in @@ -1153,7 +1153,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)  		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,  		    dsl_dataset_destroy_sync, &dsda, tag, 0);  		dsl_sync_task_create(dstg, dsl_dir_destroy_check, -		    dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); +		    dsl_dir_destroy_sync, dd, FTAG, 0);  		err = dsl_sync_task_group_wait(dstg);  		dsl_sync_task_group_destroy(dstg); @@ -1328,14 +1328,12 @@ static void  dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)  {  	dsl_dataset_t *ds = arg1; -	dsl_pool_t *dp = ds->ds_dir->dd_pool;  	/* Mark it as inconsistent on-disk, in case we crash */  	dmu_buf_will_dirty(ds->ds_dbuf, tx);  	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; -	spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, -	    "dataset = %llu", ds->ds_object); +	spa_history_log_internal_ds(ds, "destroy begin", tx, "");  }  static int @@ -1660,9 +1658,13 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)  		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);  		dmu_buf_will_dirty(ds->ds_dbuf, tx);  		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; +		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");  		return;  	} +	/* We need to log before removing it from the namespace. */ +	spa_history_log_internal_ds(ds, "destroy", tx, ""); +  	/* signal any waiters that this dataset is going away */  	mutex_enter(&ds->ds_lock);  	ds->ds_owner = dsl_reaper; @@ -1957,8 +1959,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)  		dsl_dataset_rele(ds_prev, FTAG);  	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); -	spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, -	    "dataset = %llu", ds->ds_object);  	if (ds->ds_phys->ds_next_clones_obj != 0) {  		uint64_t count; @@ -2006,7 +2006,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)  		return (ENOSPC);  	/* -	 * Propogate any reserved space for this snapshot to other +	 * Propagate any reserved space for this snapshot to other  	 * snapshot checks in this sync group.  	 */  	if (asize > 0) @@ -2016,10 +2016,9 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)  }  int -dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, +    dmu_tx_t *tx)  { -	dsl_dataset_t *ds = arg1; -	const char *snapname = arg2;  	int err;  	uint64_t value; @@ -2031,7 +2030,7 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)  		return (EAGAIN);  	/* -	 * Check for conflicting name snapshot name. +	 * Check for conflicting snapshot name.  	 */  	err = dsl_dataset_snap_lookup(ds, snapname, &value);  	if (err == 0) @@ -2055,10 +2054,9 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)  }  void -dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, +    dmu_tx_t *tx)  { -	dsl_dataset_t *ds = arg1; -	const char *snapname = arg2;  	dsl_pool_t *dp = ds->ds_dir->dd_pool;  	dmu_buf_t *dbuf;  	dsl_dataset_phys_t *dsphys; @@ -2164,8 +2162,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	dsl_dir_snap_cmtime_update(ds->ds_dir); -	spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, -	    "dataset = %llu", dsobj); +	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");  }  void @@ -2252,7 +2249,20 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)  {  	uint64_t refd, avail, uobjs, aobjs, ratio; -	dsl_dir_stats(ds->ds_dir, nv); +	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : +	    (ds->ds_phys->ds_uncompressed_bytes * 100 / +	    ds->ds_phys->ds_compressed_bytes); + +	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + +	if (dsl_dataset_is_snapshot(ds)) { +		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); +		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, +		    ds->ds_phys->ds_unique_bytes); +		get_clones_stat(ds, nv); +	} else { +		dsl_dir_stats(ds->ds_dir, nv); +	}  	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);  	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); @@ -2297,22 +2307,6 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)  		}  	} -	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : -	    (ds->ds_phys->ds_uncompressed_bytes * 100 / -	    ds->ds_phys->ds_compressed_bytes); -	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); - -	if (ds->ds_phys->ds_next_snap_obj) { -		/* -		 * This is a snapshot; override the dd's space used with -		 * our unique space and compression ratio. -		 */ -		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, -		    ds->ds_phys->ds_unique_bytes); -		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - -		get_clones_stat(ds, nv); -	}  }  void @@ -2321,27 +2315,25 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)  	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;  	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;  	stat->dds_guid = ds->ds_phys->ds_guid; -	if (ds->ds_phys->ds_next_snap_obj) { +	stat->dds_origin[0] = '\0'; +	if (dsl_dataset_is_snapshot(ds)) {  		stat->dds_is_snapshot = B_TRUE;  		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;  	} else {  		stat->dds_is_snapshot = B_FALSE;  		stat->dds_num_clones = 0; -	} -	/* clone origin is really a dsl_dir thing... */ -	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); -	if (dsl_dir_is_clone(ds->ds_dir)) { -		dsl_dataset_t *ods; +		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); +		if (dsl_dir_is_clone(ds->ds_dir)) { +			dsl_dataset_t *ods; -		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, -		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); -		dsl_dataset_name(ods, stat->dds_origin); -		dsl_dataset_drop_ref(ods, FTAG); -	} else { -		stat->dds_origin[0] = '\0'; +			VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, +			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); +			dsl_dataset_name(ods, stat->dds_origin); +			dsl_dataset_drop_ref(ods, FTAG); +		} +		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);  	} -	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);  }  uint64_t @@ -2458,8 +2450,8 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);  	ASSERT3U(err, ==, 0); -	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, -	    "dataset = %llu", ds->ds_object); +	spa_history_log_internal_ds(ds, "rename", tx, +	    "-> @%s", newsnapname);  	dsl_dataset_rele(hds, FTAG);  } @@ -2939,8 +2931,7 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	origin_ds->ds_phys->ds_unique_bytes = pa->unique;  	/* log history record */ -	spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, -	    "dataset = %llu", hds->ds_object); +	spa_history_log_internal_ds(hds, "promote", tx, "");  	dsl_dir_close(odd, FTAG);  } @@ -3298,6 +3289,9 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	    csa->ohds->ds_phys->ds_deadlist_obj);  	dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); + +	spa_history_log_internal_ds(csa->cds, "clone swap", tx, +	    "parent=%s", csa->ohds->ds_dir->dd_myname);  }  /* @@ -3454,9 +3448,8 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)  		dmu_buf_will_dirty(ds->ds_dbuf, tx);  		ds->ds_quota = effective_value; -		spa_history_log_internal(LOG_DS_REFQUOTA, -		    ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ", -		    (longlong_t)ds->ds_quota, ds->ds_object); +		spa_history_log_internal_ds(ds, "set refquota", tx, +		    "refquota=%lld", (longlong_t)ds->ds_quota);  	}  } @@ -3561,9 +3554,8 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);  	mutex_exit(&ds->ds_dir->dd_lock); -	spa_history_log_internal(LOG_DS_REFRESERV, -	    ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu", -	    (longlong_t)effective_value, ds->ds_object); +	spa_history_log_internal_ds(ds, "set refreservation", tx, +	    "refreservation=%lld", (longlong_t)effective_value);  }  int @@ -3629,7 +3621,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)  {  	dsl_dataset_t *ds = arg1;  	struct dsl_ds_holdarg *ha = arg2; -	char *htag = ha->htag; +	const char *htag = ha->htag;  	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;  	int error = 0; @@ -3663,7 +3655,7 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)  {  	dsl_dataset_t *ds = arg1;  	struct dsl_ds_holdarg *ha = arg2; -	char *htag = ha->htag; +	const char *htag = ha->htag;  	dsl_pool_t *dp = ds->ds_dir->dd_pool;  	objset_t *mos = dp->dp_meta_objset;  	uint64_t now = gethrestime_sec(); @@ -3691,9 +3683,9 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)  		    htag, &now, tx));  	} -	spa_history_log_internal(LOG_DS_USER_HOLD, -	    dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, -	    (int)ha->temphold, ds->ds_object); +	spa_history_log_internal_ds(ds, "hold", tx, +	    "tag = %s temp = %d holds now = %llu", +	    htag, (int)ha->temphold, ds->ds_userrefs);  }  static int @@ -3900,7 +3892,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)  	dsl_pool_t *dp = ds->ds_dir->dd_pool;  	objset_t *mos = dp->dp_meta_objset;  	uint64_t zapobj; -	uint64_t dsobj = ds->ds_object;  	uint64_t refs;  	int error; @@ -3923,9 +3914,8 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)  		dsl_dataset_destroy_sync(&dsda, tag, tx);  	} -	spa_history_log_internal(LOG_DS_USER_RELEASE, -	    dp->dp_spa, tx, "<%s> %lld dataset = %llu", -	    ra->htag, (longlong_t)refs, dsobj); +	spa_history_log_internal_ds(ds, "release", tx, +	    "tag = %s refs now = %lld", ra->htag, (longlong_t)refs);  }  static int diff --git a/uts/common/fs/zfs/dsl_deleg.c b/uts/common/fs/zfs/dsl_deleg.c index c13ddd4aa9ee..ba620bd6fbed 100644 --- a/uts/common/fs/zfs/dsl_deleg.c +++ b/uts/common/fs/zfs/dsl_deleg.c @@ -181,10 +181,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)  			VERIFY(zap_update(mos, jumpobj,  			    perm, 8, 1, &n, tx) == 0); -			spa_history_log_internal(LOG_DS_PERM_UPDATE, -			    dd->dd_pool->dp_spa, tx, -			    "%s %s dataset = %llu", whokey, perm, -			    dd->dd_phys->dd_head_dataset_obj); +			spa_history_log_internal_dd(dd, "permission update", tx, +			    "%s %s", whokey, perm);  		}  	}  } @@ -213,10 +211,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)  				(void) zap_remove(mos, zapobj, whokey, tx);  				VERIFY(0 == zap_destroy(mos, jumpobj, tx));  			} -			spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, -			    dd->dd_pool->dp_spa, tx, -			    "%s dataset = %llu", whokey, -			    dd->dd_phys->dd_head_dataset_obj); +			spa_history_log_internal_dd(dd, "permission who remove", +			    tx, "%s", whokey);  			continue;  		} @@ -234,10 +230,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)  				VERIFY(0 == zap_destroy(mos,  				    jumpobj, tx));  			} -			spa_history_log_internal(LOG_DS_PERM_REMOVE, -			    dd->dd_pool->dp_spa, tx, -			    "%s %s dataset = %llu", whokey, perm, -			    dd->dd_phys->dd_head_dataset_obj); +			spa_history_log_internal_dd(dd, "permission remove", tx, +			    "%s %s", whokey, perm);  		}  	}  } @@ -524,12 +518,10 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl,  }  /* - * Check if user has requested permission.  If descendent is set, must have - * descendent perms. + * Check if user has requested permission.   */  int -dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, -    cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)  {  	dsl_dir_t *dd;  	dsl_pool_t *dp; @@ -550,7 +542,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,  	    SPA_VERSION_DELEGATED_PERMS)  		return (EPERM); -	if (dsl_dataset_is_snapshot(ds) || descendent) { +	if (dsl_dataset_is_snapshot(ds)) {  		/*  		 * Snapshots are treated as descendents only,  		 * local permissions do not apply. @@ -643,7 +635,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)  	if (error)  		return (error); -	error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); +	error = dsl_deleg_access_impl(ds, perm, cr);  	dsl_dataset_rele(ds, FTAG);  	return (error); diff --git a/uts/common/fs/zfs/dsl_dir.c b/uts/common/fs/zfs/dsl_dir.c index 1cd49c8274e8..74c1050fabf0 100644 --- a/uts/common/fs/zfs/dsl_dir.c +++ b/uts/common/fs/zfs/dsl_dir.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #include <sys/dmu.h> @@ -39,8 +40,8 @@  #include "zfs_namecheck.h"  static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); - +static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, +    uint64_t value, dmu_tx_t *tx);  /* ARGSUSED */  static void @@ -447,8 +448,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,  int  dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)  { -	dsl_dataset_t *ds = arg1; -	dsl_dir_t *dd = ds->ds_dir; +	dsl_dir_t *dd = arg1;  	dsl_pool_t *dp = dd->dd_pool;  	objset_t *mos = dp->dp_meta_objset;  	int err; @@ -477,24 +477,19 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)  void  dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)  { -	dsl_dataset_t *ds = arg1; -	dsl_dir_t *dd = ds->ds_dir; +	dsl_dir_t *dd = arg1;  	objset_t *mos = dd->dd_pool->dp_meta_objset; -	dsl_prop_setarg_t psa; -	uint64_t value = 0;  	uint64_t obj;  	dd_used_t t;  	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));  	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); -	/* Remove our reservation. */ -	dsl_prop_setarg_init_uint64(&psa, "reservation", -	    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), -	    &value); -	psa.psa_effective_value = 0;	/* predict default value */ - -	dsl_dir_set_reservation_sync(ds, &psa, tx); +	/* +	 * Remove our reservation. The impl() routine avoids setting the +	 * actual property, which would require the (already destroyed) ds. +	 */ +	dsl_dir_set_reservation_sync_impl(dd, 0, tx);  	ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);  	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); @@ -1060,9 +1055,8 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	dd->dd_phys->dd_quota = effective_value;  	mutex_exit(&dd->dd_lock); -	spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa, -	    tx, "%lld dataset = %llu ", -	    (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj); +	spa_history_log_internal_dd(dd, "set quota", tx, +	    "quota=%lld", (longlong_t)effective_value);  }  int @@ -1149,25 +1143,17 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)  }  static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)  { -	dsl_dataset_t *ds = arg1; -	dsl_dir_t *dd = ds->ds_dir; -	dsl_prop_setarg_t *psa = arg2; -	uint64_t effective_value = psa->psa_effective_value;  	uint64_t used;  	int64_t delta; -	dsl_prop_set_sync(ds, psa, tx); -	DSL_PROP_CHECK_PREDICTION(dd, psa); -  	dmu_buf_will_dirty(dd->dd_dbuf, tx);  	mutex_enter(&dd->dd_lock);  	used = dd->dd_phys->dd_used_bytes; -	delta = MAX(used, effective_value) - -	    MAX(used, dd->dd_phys->dd_reserved); -	dd->dd_phys->dd_reserved = effective_value; +	delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); +	dd->dd_phys->dd_reserved = value;  	if (dd->dd_parent != NULL) {  		/* Roll up this additional usage into our ancestors */ @@ -1175,10 +1161,24 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)  		    delta, 0, 0, tx);  	}  	mutex_exit(&dd->dd_lock); +} + + +static void +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ +	dsl_dataset_t *ds = arg1; +	dsl_dir_t *dd = ds->ds_dir; +	dsl_prop_setarg_t *psa = arg2; +	uint64_t value = psa->psa_effective_value; + +	dsl_prop_set_sync(ds, psa, tx); +	DSL_PROP_CHECK_PREDICTION(dd, psa); -	spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa, -	    tx, "%lld dataset = %llu", -	    (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj); +	dsl_dir_set_reservation_sync_impl(dd, value, tx); + +	spa_history_log_internal_dd(dd, "set reservation", tx, +	    "reservation=%lld", (longlong_t)value);  }  int @@ -1299,9 +1299,15 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	dsl_pool_t *dp = dd->dd_pool;  	objset_t *mos = dp->dp_meta_objset;  	int err; +	char namebuf[MAXNAMELEN];  	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); +	/* Log this before we change the name. */ +	dsl_dir_name(ra->newparent, namebuf); +	spa_history_log_internal_dd(dd, "rename", tx, +	    "-> %s/%s", namebuf, ra->mynewname); +  	if (ra->newparent != dd->dd_parent) {  		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,  		    -dd->dd_phys->dd_used_bytes, @@ -1341,8 +1347,6 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	    dd->dd_myname, 8, 1, &dd->dd_object, tx);  	ASSERT3U(err, ==, 0); -	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, -	    tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);  }  int diff --git a/uts/common/fs/zfs/dsl_prop.c b/uts/common/fs/zfs/dsl_prop.c index aa66b32e7938..5bbe14ff691d 100644 --- a/uts/common/fs/zfs/dsl_prop.c +++ b/uts/common/fs/zfs/dsl_prop.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #include <sys/zfs_context.h> @@ -702,11 +703,9 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)  		}  	} -	spa_history_log_internal((source == ZPROP_SRC_NONE || -	    source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : -	    LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, -	    "%s=%s dataset = %llu", propname, -	    (valstr == NULL ? "" : valstr), ds->ds_object); +	spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE || +	    source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx, +	    "%s=%s", propname, (valstr == NULL ? "" : valstr));  	if (tbuf != NULL)  		kmem_free(tbuf, ZAP_MAXVALUELEN); @@ -755,24 +754,6 @@ dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	}  } -void -dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, -    dmu_tx_t *tx) -{ -	objset_t *mos = dd->dd_pool->dp_meta_objset; -	uint64_t zapobj = dd->dd_phys->dd_props_zapobj; - -	ASSERT(dmu_tx_is_syncing(tx)); - -	VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); - -	dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); - -	spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, -	    "%s=%llu dataset = %llu", name, (u_longlong_t)val, -	    dd->dd_phys->dd_head_dataset_obj); -} -  int  dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,      int intsz, int numints, const void *buf) diff --git a/uts/common/fs/zfs/dsl_scan.c b/uts/common/fs/zfs/dsl_scan.c index 328f4d085cc0..8f08f04a0655 100644 --- a/uts/common/fs/zfs/dsl_scan.c +++ b/uts/common/fs/zfs/dsl_scan.c @@ -228,7 +228,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	dsl_scan_sync_state(scn, tx); -	spa_history_log_internal(LOG_POOL_SCAN, spa, tx, +	spa_history_log_internal(spa, "scan setup", tx,  	    "func=%u mintxg=%llu maxtxg=%llu",  	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);  } @@ -277,7 +277,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)  	else  		scn->scn_phys.scn_state = DSS_CANCELED; -	spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, +	spa_history_log_internal(spa, "scan done", tx,  	    "complete=%u", complete);  	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { diff --git a/uts/common/fs/zfs/dsl_synctask.c b/uts/common/fs/zfs/dsl_synctask.c index b0818ce274d4..312423e943b5 100644 --- a/uts/common/fs/zfs/dsl_synctask.c +++ b/uts/common/fs/zfs/dsl_synctask.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #include <sys/dmu.h> @@ -85,17 +86,17 @@ top:  	/* Do a preliminary error check. */  	dstg->dstg_err = 0; +#ifdef ZFS_DEBUG +	/* +	 * Only check half the time, otherwise, the sync-context +	 * check will almost never fail. +	 */ +	if (spa_get_random(2) == 0) +		goto skip; +#endif  	rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);  	for (dst = list_head(&dstg->dstg_tasks); dst;  	    dst = list_next(&dstg->dstg_tasks, dst)) { -#ifdef ZFS_DEBUG -		/* -		 * Only check half the time, otherwise, the sync-context -		 * check will almost never fail. -		 */ -		if (spa_get_random(2) == 0) -			continue; -#endif  		dst->dst_err =  		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);  		if (dst->dst_err) @@ -107,6 +108,7 @@ top:  		dmu_tx_commit(tx);  		return (dstg->dstg_err);  	} +skip:  	/*  	 * We don't generally have many sync tasks, so pay the price of diff --git a/uts/common/fs/zfs/rrwlock.c b/uts/common/fs/zfs/rrwlock.c index 4cef53f95132..7f9290bd44c1 100644 --- a/uts/common/fs/zfs/rrwlock.c +++ b/uts/common/fs/zfs/rrwlock.c @@ -22,6 +22,9 @@   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms.   */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */  #include <sys/refcount.h>  #include <sys/rrwlock.h> @@ -262,3 +265,13 @@ rrw_held(rrwlock_t *rrl, krw_t rw)  	return (held);  } + +void +rrw_tsd_destroy(void *arg) +{ +	rrw_node_t *rn = arg; +	if (rn != NULL) { +		panic("thread %p terminating with rrw lock %p held", +		    (void *)curthread, (void *)rn->rn_rrl); +	} +} diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c index c5f11ed5c1c0..828d5e266643 100644 --- a/uts/common/fs/zfs/spa.c +++ b/uts/common/fs/zfs/spa.c @@ -2545,6 +2545,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,  			spa_async_request(spa, SPA_ASYNC_RESILVER);  		/* +		 * Log the fact that we booted up (so that we can detect if +		 * we rebooted in the middle of an operation). +		 */ +		spa_history_log_version(spa, "open"); + +		/*  		 * Delete any inconsistent datasets.  		 */  		(void) dmu_objset_find(spa_name(spa), @@ -3220,7 +3226,7 @@ spa_l2cache_drop(spa_t *spa)   */  int  spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, -    const char *history_str, nvlist_t *zplprops) +    nvlist_t *zplprops)  {  	spa_t *spa;  	char *altroot = NULL; @@ -3439,9 +3445,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,  	spa_config_sync(spa, B_FALSE, B_TRUE); -	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) -		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); -	spa_history_log_version(spa, LOG_POOL_CREATE); +	spa_history_log_version(spa, "create");  	spa->spa_minref = refcount_count(&spa->spa_refcount); @@ -3641,7 +3645,6 @@ spa_import_rootpool(char *devpath, char *devid)  	}  	error = 0; -	spa_history_log_version(spa, LOG_POOL_IMPORT);  out:  	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);  	vdev_free(rvd); @@ -3703,7 +3706,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)  		spa_config_sync(spa, B_FALSE, B_TRUE);  		mutex_exit(&spa_namespace_lock); -		spa_history_log_version(spa, LOG_POOL_IMPORT); +		spa_history_log_version(spa, "import");  		return (0);  	} @@ -3834,7 +3837,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)  	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);  	mutex_exit(&spa_namespace_lock); -	spa_history_log_version(spa, LOG_POOL_IMPORT); +	spa_history_log_version(spa, "import");  	return (0);  } @@ -4372,7 +4375,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)  	 */  	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); -	spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, +	spa_history_log_internal(spa, "vdev attach", NULL,  	    "%s vdev=%s %s vdev=%s",  	    replacing && newvd_isspare ? "spare in" :  	    replacing ? "replace" : "attach", newvdpath, @@ -4589,7 +4592,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)  	error = spa_vdev_exit(spa, vd, txg, 0); -	spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, +	spa_history_log_internal(spa, "detach", NULL,  	    "vdev=%s", vdpath);  	spa_strfree(vdpath); @@ -4858,9 +4861,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,  		if (vml[c] != NULL) {  			vdev_split(vml[c]);  			if (error == 0) -				spa_history_log_internal(LOG_POOL_VDEV_DETACH, -				    spa, tx, "vdev=%s", -				    vml[c]->vdev_path); +				spa_history_log_internal(spa, "detach", tx, +				    "vdev=%s", vml[c]->vdev_path);  			vdev_free(vml[c]);  		}  	} @@ -4875,8 +4877,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,  		zio_handle_panic_injection(spa, FTAG, 3);  	/* split is complete; log a history record */ -	spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, -	    "split new pool %s from pool %s", newname, spa_name(spa)); +	spa_history_log_internal(newspa, "split", NULL, +	    "from pool %s", spa_name(spa));  	kmem_free(vml, children * sizeof (vdev_t *)); @@ -5462,8 +5464,7 @@ spa_async_thread(spa_t *spa)  		 * then log an internal history event.  		 */  		if (new_space != old_space) { -			spa_history_log_internal(LOG_POOL_VDEV_ONLINE, -			    spa, NULL, +			spa_history_log_internal(spa, "vdev online", NULL,  			    "pool '%s' size: %llu(+%llu)",  			    spa_name(spa), new_space, new_space - old_space);  		} @@ -5699,6 +5700,7 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)  	spa->spa_uberblock.ub_version = version;  	vdev_config_dirty(spa->spa_root_vdev); +	spa_history_log_internal(spa, "set", tx, "version=%lld", version);  }  /* @@ -5733,6 +5735,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)  			VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));  			spa_feature_enable(spa, feature, tx); +			spa_history_log_internal(spa, "set", tx, +			    "%s=enabled", nvpair_name(elem));  			break;  		case ZPOOL_PROP_VERSION: @@ -5772,6 +5776,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)  			 */  			if (tx->tx_txg != TXG_INITIAL)  				vdev_config_dirty(spa->spa_root_vdev); +			spa_history_log_internal(spa, "set", tx, +			    "%s=%s", nvpair_name(elem), strval);  			break;  		default:  			/* @@ -5794,7 +5800,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)  				VERIFY(zap_update(mos,  				    spa->spa_pool_props_object, propname,  				    1, strlen(strval) + 1, strval, tx) == 0); - +				spa_history_log_internal(spa, "set", tx, +				    "%s=%s", nvpair_name(elem), strval);  			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {  				VERIFY(nvpair_value_uint64(elem, &intval) == 0); @@ -5806,6 +5813,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)  				VERIFY(zap_update(mos,  				    spa->spa_pool_props_object, propname,  				    8, 1, &intval, tx) == 0); +				spa_history_log_internal(spa, "set", tx, +				    "%s=%lld", nvpair_name(elem), intval);  			} else {  				ASSERT(0); /* not allowed */  			} @@ -5834,13 +5843,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)  			}  		} -		/* log internal history if this is not a zpool create */ -		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && -		    tx->tx_txg != TXG_INITIAL) { -			spa_history_log_internal(LOG_POOL_PROPSET, -			    spa, tx, "%s %lld %s", -			    nvpair_name(elem), intval, spa_name(spa)); -		}  	}  	mutex_exit(&spa->spa_props_lock); diff --git a/uts/common/fs/zfs/spa_history.c b/uts/common/fs/zfs/spa_history.c index 6df8411349c9..f2c32f548b41 100644 --- a/uts/common/fs/zfs/spa_history.c +++ b/uts/common/fs/zfs/spa_history.c @@ -21,7 +21,7 @@  /*   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #include <sys/spa.h> @@ -30,9 +30,12 @@  #include <sys/dsl_synctask.h>  #include <sys/dmu_tx.h>  #include <sys/dmu_objset.h> +#include <sys/dsl_dataset.h> +#include <sys/dsl_dir.h>  #include <sys/utsname.h>  #include <sys/cmn_err.h>  #include <sys/sunddi.h> +#include <sys/cred.h>  #include "zfs_comutil.h"  #ifdef _KERNEL  #include <sys/zone.h> @@ -176,12 +179,14 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,  }  static char * -spa_history_zone() +spa_history_zone(void)  {  #ifdef _KERNEL +	if (INGLOBALZONE(curproc)) +		return (NULL);  	return (curproc->p_zone->zone_name);  #else -	return ("global"); +	return (NULL);  #endif  } @@ -193,14 +198,12 @@ static void  spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)  {  	spa_t		*spa = arg1; -	history_arg_t	*hap = arg2; -	const char	*history_str = hap->ha_history_str; +	nvlist_t	*nvl = arg2;  	objset_t	*mos = spa->spa_meta_objset;  	dmu_buf_t	*dbp;  	spa_history_phys_t *shpp;  	size_t		reclen;  	uint64_t	le_len; -	nvlist_t	*nvrecord;  	char		*record_packed = NULL;  	int		ret; @@ -230,46 +233,35 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	}  #endif -	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0); -	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, -	    gethrestime_sec()) == 0); -	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); -	if (hap->ha_zone != NULL) -		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, -		    hap->ha_zone) == 0); +	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());  #ifdef _KERNEL -	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, -	    utsname.nodename) == 0); +	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);  #endif -	if (hap->ha_log_type == LOG_CMD_POOL_CREATE || -	    hap->ha_log_type == LOG_CMD_NORMAL) { -		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, -		    history_str) == 0); - -		zfs_dbgmsg("command: %s", history_str); -	} else { -		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, -		    hap->ha_event) == 0); -		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, -		    tx->tx_txg) == 0); -		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, -		    history_str) == 0); - -		zfs_dbgmsg("internal %s pool:%s txg:%llu %s", -		    zfs_history_event_names[hap->ha_event], spa_name(spa), -		    (longlong_t)tx->tx_txg, history_str); - +	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { +		zfs_dbgmsg("command: %s", +		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); +	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { +		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { +			zfs_dbgmsg("txg %lld %s %s (id %llu) %s", +			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), +			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), +			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), +			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), +			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); +		} else { +			zfs_dbgmsg("txg %lld %s %s", +			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), +			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), +			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); +		} +	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { +		zfs_dbgmsg("ioctl %s", +		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));  	} -	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); -	record_packed = kmem_alloc(reclen, KM_SLEEP); - -	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, -	    NV_ENCODE_XDR, KM_SLEEP) == 0); +	record_packed = fnvlist_pack(nvl, &reclen);  	mutex_enter(&spa->spa_history_lock); -	if (hap->ha_log_type == LOG_CMD_POOL_CREATE) -		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);  	/* write out the packed length as little endian */  	le_len = LE_64((uint64_t)reclen); @@ -277,33 +269,42 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)  	if (!ret)  		ret = spa_history_write(spa, record_packed, reclen, shpp, tx); -	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { -		shpp->sh_pool_create_len += sizeof (le_len) + reclen; -		shpp->sh_bof = shpp->sh_pool_create_len; +	/* The first command is the create, which we keep forever */ +	if (ret == 0 && shpp->sh_pool_create_len == 0 && +	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) { +		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;  	}  	mutex_exit(&spa->spa_history_lock); -	nvlist_free(nvrecord); -	kmem_free(record_packed, reclen); +	fnvlist_pack_free(record_packed, reclen);  	dmu_buf_rele(dbp, FTAG); - -	strfree(hap->ha_history_str); -	if (hap->ha_zone != NULL) -		strfree(hap->ha_zone); -	kmem_free(hap, sizeof (history_arg_t)); +	fnvlist_free(nvl);  }  /*   * Write out a history event.   */  int -spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) +spa_history_log(spa_t *spa, const char *msg) +{ +	int err; +	nvlist_t *nvl = fnvlist_alloc(); + +	fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); +	err = spa_history_log_nvl(spa, nvl); +	fnvlist_free(nvl); +	return (err); +} + +int +spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)  { -	history_arg_t *ha;  	int err = 0;  	dmu_tx_t *tx; +	nvlist_t *nvarg; -	ASSERT(what != LOG_INTERNAL); +	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) +		return (EINVAL);  	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);  	err = dmu_tx_assign(tx, TXG_WAIT); @@ -312,19 +313,21 @@ spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)  		return (err);  	} -	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); -	ha->ha_history_str = strdup(history_str); -	ha->ha_zone = strdup(spa_history_zone()); -	ha->ha_log_type = what; -	ha->ha_uid = crgetuid(CRED()); +	nvarg = fnvlist_dup(nvl); +	if (spa_history_zone() != NULL) { +		fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, +		    spa_history_zone()); +	} +	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));  	/* Kick this off asynchronously; errors are ignored. */  	dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, -	    spa_history_log_sync, spa, ha, 0, tx); +	    spa_history_log_sync, spa, nvarg, 0, tx);  	dmu_tx_commit(tx); -	/* spa_history_log_sync will free ha and strings */ +	/* spa_history_log_sync will free nvl */  	return (err); +  }  /* @@ -341,7 +344,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)  	int err;  	/* -	 * If the command history  doesn't exist (older pool), +	 * If the command history doesn't exist (older pool),  	 * that's ok, just return ENOENT.  	 */  	if (!spa->spa_history) @@ -424,11 +427,14 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)  	return (err);  } +/* + * The nvlist will be consumed by this call. + */  static void -log_internal(history_internal_events_t event, spa_t *spa, +log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,      dmu_tx_t *tx, const char *fmt, va_list adx)  { -	history_arg_t *ha; +	char *msg;  	/*  	 * If this is part of creating a pool, not everything is @@ -437,28 +443,25 @@ log_internal(history_internal_events_t event, spa_t *spa,  	if (tx->tx_txg == TXG_INITIAL)  		return; -	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); -	ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, -	    KM_SLEEP); - -	(void) vsprintf(ha->ha_history_str, fmt, adx); +	msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP); +	(void) vsprintf(msg, fmt, adx); +	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); +	strfree(msg); -	ha->ha_log_type = LOG_INTERNAL; -	ha->ha_event = event; -	ha->ha_zone = NULL; -	ha->ha_uid = 0; +	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); +	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);  	if (dmu_tx_is_syncing(tx)) { -		spa_history_log_sync(spa, ha, tx); +		spa_history_log_sync(spa, nvl, tx);  	} else {  		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, -		    spa_history_log_sync, spa, ha, 0, tx); +		    spa_history_log_sync, spa, nvl, 0, tx);  	} -	/* spa_history_log_sync() will free ha and strings */ +	/* spa_history_log_sync() will free nvl */  }  void -spa_history_log_internal(history_internal_events_t event, spa_t *spa, +spa_history_log_internal(spa_t *spa, const char *operation,      dmu_tx_t *tx, const char *fmt, ...)  {  	dmu_tx_t *htx = tx; @@ -474,7 +477,7 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa,  	}  	va_start(adx, fmt); -	log_internal(event, spa, htx, fmt, adx); +	log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx);  	va_end(adx);  	/* if we didn't get a tx from the caller, commit the one we made */ @@ -483,21 +486,56 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa,  }  void -spa_history_log_version(spa_t *spa, history_internal_events_t event) +spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, +    dmu_tx_t *tx, const char *fmt, ...) +{ +	va_list adx; +	char namebuf[MAXNAMELEN]; +	nvlist_t *nvl = fnvlist_alloc(); + +	ASSERT(tx != NULL); + +	dsl_dataset_name(ds, namebuf); +	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); +	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); + +	va_start(adx, fmt); +	log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); +	va_end(adx); +} + +void +spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, +    dmu_tx_t *tx, const char *fmt, ...) +{ +	va_list adx; +	char namebuf[MAXNAMELEN]; +	nvlist_t *nvl = fnvlist_alloc(); + +	ASSERT(tx != NULL); + +	dsl_dir_name(dd, namebuf); +	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); +	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, +	    dd->dd_phys->dd_head_dataset_obj); + +	va_start(adx, fmt); +	log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); +	va_end(adx); +} + +void +spa_history_log_version(spa_t *spa, const char *operation)  {  #ifdef _KERNEL  	uint64_t current_vers = spa_version(spa); -	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { -		spa_history_log_internal(event, spa, NULL, -		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", -		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, -		    utsname.nodename, utsname.release, utsname.version, -		    utsname.machine); -	} -	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", -	    event == LOG_POOL_IMPORT ? "imported" : -	    event == LOG_POOL_CREATE ? "created" : "accessed", +	spa_history_log_internal(spa, operation, NULL, +	    "pool version %llu; software version %llu/%d; uts %s %s %s %s", +	    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, +	    utsname.nodename, utsname.release, utsname.version, +	    utsname.machine); +	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,  	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);  #endif  } diff --git a/uts/common/fs/zfs/sys/dmu.h b/uts/common/fs/zfs/sys/dmu.h index 2dac73232f8a..d60483575574 100644 --- a/uts/common/fs/zfs/sys/dmu.h +++ b/uts/common/fs/zfs/sys/dmu.h @@ -44,6 +44,7 @@  #include <sys/param.h>  #include <sys/cred.h>  #include <sys/time.h> +#include <sys/fs/zfs.h>  #ifdef	__cplusplus  extern "C" { @@ -216,16 +217,6 @@ typedef enum dmu_object_type {  	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),  } dmu_object_type_t; -typedef enum dmu_objset_type { -	DMU_OST_NONE, -	DMU_OST_META, -	DMU_OST_ZFS, -	DMU_OST_ZVOL, -	DMU_OST_OTHER,			/* For testing only! */ -	DMU_OST_ANY,			/* Be careful! */ -	DMU_OST_NUMTYPES -} dmu_objset_type_t; -  void byteswap_uint64_array(void *buf, size_t size);  void byteswap_uint32_array(void *buf, size_t size);  void byteswap_uint16_array(void *buf, size_t size); @@ -270,9 +261,11 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,  int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,      uint64_t flags);  int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *); -int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, -    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); +int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, +    struct nvlist *errlist); +int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *); +int dmu_objset_snapshot_one(const char *fsname, const char *snapname); +int dmu_objset_snapshot_tmp(const char *, const char *, int);  int dmu_objset_rename(const char *name, const char *newname,      boolean_t recursive);  int dmu_objset_find(char *name, int func(const char *, void *), void *arg, @@ -789,10 +782,9 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,  void dmu_traverse_objset(objset_t *os, uint64_t txg_start,      dmu_traverse_cb_t cb, void *arg); -int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, +int dmu_send(objset_t *tosnap, objset_t *fromsnap,      int outfd, struct vnode *vp, offset_t *off); -int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign, -    uint64_t *sizep); +int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep);  typedef struct dmu_recv_cookie {  	/* diff --git a/uts/common/fs/zfs/sys/dmu_objset.h b/uts/common/fs/zfs/sys/dmu_objset.h index c6d202e2e81a..9439993ace78 100644 --- a/uts/common/fs/zfs/sys/dmu_objset.h +++ b/uts/common/fs/zfs/sys/dmu_objset.h @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  /* Portions Copyright 2010 Robert Milkowski */ @@ -137,24 +138,14 @@ void dmu_objset_rele(objset_t *os, void *tag);  void dmu_objset_disown(objset_t *os, void *tag);  int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); -int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, -    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); -int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, -    uint64_t flags); -int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, -    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);  void dmu_objset_stats(objset_t *os, nvlist_t *nv);  void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);  void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,      uint64_t *usedobjsp, uint64_t *availobjsp);  uint64_t dmu_objset_fsid_guid(objset_t *os); -int dmu_objset_find(char *name, int func(const char *, void *), void *arg, -    int flags);  int dmu_objset_find_spa(spa_t *spa, const char *name,      int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);  int dmu_objset_prefetch(const char *name, void *arg); -void dmu_objset_byteswap(void *buf, size_t size);  int dmu_objset_evict_dbufs(objset_t *os);  timestruc_t dmu_objset_snap_cmtime(objset_t *os); diff --git a/uts/common/fs/zfs/sys/dsl_dataset.h b/uts/common/fs/zfs/sys/dsl_dataset.h index 77b7d70e2f62..6c43d97fd9d6 100644 --- a/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/uts/common/fs/zfs/sys/dsl_dataset.h @@ -180,7 +180,7 @@ struct dsl_ds_destroyarg {  struct dsl_ds_holdarg {  	dsl_sync_task_group_t *dstg; -	char *htag; +	const char *htag;  	char *snapname;  	boolean_t recursive;  	boolean_t gotone; @@ -215,12 +215,11 @@ uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,  uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,      uint64_t flags, dmu_tx_t *tx);  int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); -int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);  dsl_checkfunc_t dsl_dataset_destroy_check;  dsl_syncfunc_t dsl_dataset_destroy_sync; -dsl_checkfunc_t dsl_dataset_snapshot_check; -dsl_syncfunc_t dsl_dataset_snapshot_sync;  dsl_syncfunc_t dsl_dataset_user_hold_sync; +int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); +void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);  int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);  int dsl_dataset_promote(const char *name, char *conflsnap);  int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, diff --git a/uts/common/fs/zfs/sys/dsl_deleg.h b/uts/common/fs/zfs/sys/dsl_deleg.h index 9db6d07e87e7..5842639aafba 100644 --- a/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/uts/common/fs/zfs/sys/dsl_deleg.h @@ -20,7 +20,7 @@   */  /*   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #ifndef	_SYS_DSL_DELEG_H @@ -65,8 +65,7 @@ extern "C" {  int dsl_deleg_get(const char *ddname, nvlist_t **nvp);  int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);  int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); -int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent, -    const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr);  void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);  int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);  int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); diff --git a/uts/common/fs/zfs/sys/dsl_prop.h b/uts/common/fs/zfs/sys/dsl_prop.h index a636ad35096b..b0d9a52cdfd7 100644 --- a/uts/common/fs/zfs/sys/dsl_prop.h +++ b/uts/common/fs/zfs/sys/dsl_prop.h @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #ifndef	_SYS_DSL_PROP_H @@ -89,8 +90,6 @@ dsl_syncfunc_t dsl_props_set_sync;  int dsl_prop_set(const char *ddname, const char *propname,      zprop_source_t source, int intsz, int numints, const void *buf);  int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); -void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, -    dmu_tx_t *tx);  void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,      zprop_source_t source, uint64_t *value); diff --git a/uts/common/fs/zfs/sys/rrwlock.h b/uts/common/fs/zfs/sys/rrwlock.h index 19a43c97fc3c..239268bd58e7 100644 --- a/uts/common/fs/zfs/sys/rrwlock.h +++ b/uts/common/fs/zfs/sys/rrwlock.h @@ -22,12 +22,13 @@   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms.   */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */  #ifndef	_SYS_RR_RW_LOCK_H  #define	_SYS_RR_RW_LOCK_H -#pragma ident	"%Z%%M%	%I%	%E% SMI" -  #ifdef	__cplusplus  extern "C" {  #endif @@ -69,6 +70,7 @@ void rrw_destroy(rrwlock_t *rrl);  void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);  void rrw_exit(rrwlock_t *rrl, void *tag);  boolean_t rrw_held(rrwlock_t *rrl, krw_t rw); +void rrw_tsd_destroy(void *arg);  #define	RRW_READ_HELD(x)	rrw_held(x, RW_READER)  #define	RRW_WRITE_HELD(x)	rrw_held(x, RW_WRITER) diff --git a/uts/common/fs/zfs/sys/spa.h b/uts/common/fs/zfs/sys/spa.h index c790f370d22d..1043f4038a30 100644 --- a/uts/common/fs/zfs/sys/spa.h +++ b/uts/common/fs/zfs/sys/spa.h @@ -52,6 +52,7 @@ typedef struct spa_aux_vdev spa_aux_vdev_t;  typedef struct ddt ddt_t;  typedef struct ddt_entry ddt_entry_t;  struct dsl_pool; +struct dsl_dataset;  /*   * General-purpose 32-bit and 64-bit bitfield encodings. @@ -418,7 +419,7 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,  extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,      size_t buflen);  extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, -    const char *history_str, nvlist_t *zplprops); +    nvlist_t *zplprops);  extern int spa_import_rootpool(char *devpath, char *devid);  extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,      uint64_t flags); @@ -632,31 +633,20 @@ extern boolean_t spa_writeable(spa_t *spa);  extern int spa_mode(spa_t *spa);  extern uint64_t strtonum(const char *str, char **nptr); -/* history logging */ -typedef enum history_log_type { -	LOG_CMD_POOL_CREATE, -	LOG_CMD_NORMAL, -	LOG_INTERNAL -} history_log_type_t; - -typedef struct history_arg { -	char *ha_history_str; -	history_log_type_t ha_log_type; -	history_internal_events_t ha_event; -	char *ha_zone; -	uid_t ha_uid; -} history_arg_t; -  extern char *spa_his_ievent_table[];  extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);  extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,      char *his_buf); -extern int spa_history_log(spa_t *spa, const char *his_buf, -    history_log_type_t what); -extern void spa_history_log_internal(history_internal_events_t event, -    spa_t *spa, dmu_tx_t *tx, const char *fmt, ...); -extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt); +extern int spa_history_log(spa_t *spa, const char *his_buf); +extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl); +extern void spa_history_log_version(spa_t *spa, const char *operation); +extern void spa_history_log_internal(spa_t *spa, const char *operation, +    dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op, +    dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, +    dmu_tx_t *tx, const char *fmt, ...);  /* error handling */  struct zbookmark; diff --git a/uts/common/fs/zfs/sys/zfs_ioctl.h b/uts/common/fs/zfs/sys/zfs_ioctl.h index 84bf794fe5f0..4d781ad2a46c 100644 --- a/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  #ifndef	_SYS_ZFS_IOCTL_H @@ -41,6 +42,15 @@ extern "C" {  #endif  /* + * The structures in this file are passed between userland and the + * kernel.  Userland may be running a 32-bit process, while the kernel + * is 64-bit.  Therefore, these structures need to compile the same in + * 32-bit and 64-bit.  This means not using type "long", and adding + * explicit padding so that the 32-bit structure will not be packed more + * tightly than the 64-bit structure (which requires 64-bit alignment). + */ + +/*   * Property values for snapdir   */  #define	ZFS_SNAPDIR_HIDDEN		0 @@ -256,22 +266,29 @@ typedef enum zfs_case {  } zfs_case_t;  typedef struct zfs_cmd { -	char		zc_name[MAXPATHLEN]; +	char		zc_name[MAXPATHLEN];	/* name of pool or dataset */ +	uint64_t	zc_nvlist_src;		/* really (char *) */ +	uint64_t	zc_nvlist_src_size; +	uint64_t	zc_nvlist_dst;		/* really (char *) */ +	uint64_t	zc_nvlist_dst_size; +	boolean_t	zc_nvlist_dst_filled;	/* put an nvlist in dst? */ +	int		zc_pad2; + +	/* +	 * The following members are for legacy ioctls which haven't been +	 * converted to the new method. +	 */ +	uint64_t	zc_history;		/* really (char *) */  	char		zc_value[MAXPATHLEN * 2];  	char		zc_string[MAXNAMELEN];  	char		zc_top_ds[MAXPATHLEN];  	uint64_t	zc_guid;  	uint64_t	zc_nvlist_conf;		/* really (char *) */  	uint64_t	zc_nvlist_conf_size; -	uint64_t	zc_nvlist_src;		/* really (char *) */ -	uint64_t	zc_nvlist_src_size; -	uint64_t	zc_nvlist_dst;		/* really (char *) */ -	uint64_t	zc_nvlist_dst_size;  	uint64_t	zc_cookie;  	uint64_t	zc_objset_type;  	uint64_t	zc_perm_action; -	uint64_t 	zc_history;		/* really (char *) */ -	uint64_t 	zc_history_len; +	uint64_t	zc_history_len;  	uint64_t	zc_history_offset;  	uint64_t	zc_obj;  	uint64_t	zc_iflags;		/* internal to zfs(7fs) */ diff --git a/uts/common/fs/zfs/zfs_ctldir.c b/uts/common/fs/zfs/zfs_ctldir.c index 815f8895e702..d902ff637c38 100644 --- a/uts/common/fs/zfs/zfs_ctldir.c +++ b/uts/common/fs/zfs/zfs_ctldir.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  /* @@ -749,8 +750,7 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t  **vpp,  		return (err);  	if (err == 0) { -		err = dmu_objset_snapshot(name, dirname, NULL, NULL, -		    B_FALSE, B_FALSE, -1); +		err = dmu_objset_snapshot_one(name, dirname);  		if (err)  			return (err);  		err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); diff --git a/uts/common/fs/zfs/zfs_ioctl.c b/uts/common/fs/zfs/zfs_ioctl.c index 137816e23483..213142740162 100644 --- a/uts/common/fs/zfs/zfs_ioctl.c +++ b/uts/common/fs/zfs/zfs_ioctl.c @@ -23,8 +23,109 @@   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.   * Portions Copyright 2011 Martin Matuska   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved.   * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * ZFS ioctls. + * + * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage + * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool. + * + * There are two ways that we handle ioctls: the legacy way where almost + * all of the logic is in the ioctl callback, and the new way where most + * of the marshalling is handled in the common entry point, zfsdev_ioctl(). + * + * Non-legacy ioctls should be registered by calling + * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked + * from userland by lzc_ioctl(). + * + * The registration arguments are as follows: + * + * const char *name + *   The name of the ioctl.  This is used for history logging.  If the + *   ioctl returns successfully (the callback returns 0), and allow_log + *   is true, then a history log entry will be recorded with the input & + *   output nvlists.  The log entry can be printed with "zpool history -i". + * + * zfs_ioc_t ioc + *   The ioctl request number, which userland will pass to ioctl(2). + *   The ioctl numbers can change from release to release, because + *   the caller (libzfs) must be matched to the kernel. + * + * zfs_secpolicy_func_t *secpolicy + *   This function will be called before the zfs_ioc_func_t, to + *   determine if this operation is permitted.  It should return EPERM + *   on failure, and 0 on success.  Checks include determining if the + *   dataset is visible in this zone, and if the user has either all + *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission + *   to do this operation on this dataset with "zfs allow". + * + * zfs_ioc_namecheck_t namecheck + *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool + *   name, a dataset name, or nothing.  If the name is not well-formed, + *   the ioctl will fail and the callback will not be called. + *   Therefore, the callback can assume that the name is well-formed + *   (e.g. is null-terminated, doesn't have more than one '@' character, + *   doesn't have invalid characters). + * + * zfs_ioc_poolcheck_t pool_check + *   This specifies requirements on the pool state.  If the pool does + *   not meet them (is suspended or is readonly), the ioctl will fail + *   and the callback will not be called.  If any checks are specified + *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME. + *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED | + *   POOL_CHECK_READONLY). + * + * boolean_t smush_outnvlist + *   If smush_outnvlist is true, then the output is presumed to be a + *   list of errors, and it will be "smushed" down to fit into the + *   caller's buffer, by removing some entries and replacing them with a + *   single "N_MORE_ERRORS" entry indicating how many were removed.  See + *   nvlist_smush() for details.  If smush_outnvlist is false, and the + *   outnvlist does not fit into the userland-provided buffer, then the + *   ioctl will fail with ENOMEM. + * + * zfs_ioc_func_t *func + *   The callback function that will perform the operation. + * + *   The callback should return 0 on success, or an error number on + *   failure.  If the function fails, the userland ioctl will return -1, + *   and errno will be set to the callback's return value.  The callback + *   will be called with the following arguments: + * + *   const char *name + *     The name of the pool or dataset to operate on, from + *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the + *     expected type (pool, dataset, or none). + * + *   nvlist_t *innvl + *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or + *     NULL if no input nvlist was provided.  Changes to this nvlist are + *     ignored.  If the input nvlist could not be deserialized, the + *     ioctl will fail and the callback will not be called. + * + *   nvlist_t *outnvl + *     The output nvlist, initially empty.  The callback can fill it in, + *     and it will be returned to userland by serializing it into + *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization + *     fails (e.g. because the caller didn't supply a large enough + *     buffer), then the overall ioctl will fail.  See the + *     'smush_nvlist' argument above for additional behaviors. + * + *     There are two typical uses of the output nvlist: + *       - To return state, e.g. property values.  In this case, + *         smush_outnvlist should be false.  If the buffer was not large + *         enough, the caller will reallocate a larger buffer and try + *         the ioctl again. + * + *       - To return multiple errors from an ioctl which makes on-disk + *         changes.  In this case, smush_outnvlist should be true. + *         Ioctls which make on-disk modifications should generally not + *         use the outnvl if they succeed, because the caller can not + *         distinguish between the operation failing, and + *         deserialization failing.   */  #include <sys/types.h> @@ -85,8 +186,13 @@ extern void zfs_fini(void);  ldi_ident_t zfs_li = NULL;  dev_info_t *zfs_dip; -typedef int zfs_ioc_func_t(zfs_cmd_t *); -typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; +static uint_t zfs_allow_log_key; + +typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); +typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);  typedef enum {  	NO_NAME, @@ -97,15 +203,18 @@ typedef enum {  typedef enum {  	POOL_CHECK_NONE		= 1 << 0,  	POOL_CHECK_SUSPENDED	= 1 << 1, -	POOL_CHECK_READONLY	= 1 << 2 +	POOL_CHECK_READONLY	= 1 << 2,  } zfs_ioc_poolcheck_t;  typedef struct zfs_ioc_vec { +	zfs_ioc_legacy_func_t	*zvec_legacy_func;  	zfs_ioc_func_t		*zvec_func;  	zfs_secpolicy_func_t	*zvec_secpolicy;  	zfs_ioc_namecheck_t	zvec_namecheck; -	boolean_t		zvec_his_log; +	boolean_t		zvec_allow_log;  	zfs_ioc_poolcheck_t	zvec_pool_check; +	boolean_t		zvec_smush_outnvlist; +	const char		*zvec_name;  } zfs_ioc_vec_t;  /* This array is indexed by zfs_userquota_prop_t */ @@ -123,7 +232,8 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props,      nvlist_t **errors);  static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,      boolean_t *); -int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); +static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);  /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */  void @@ -261,7 +371,7 @@ zfs_log_history(zfs_cmd_t *zc)  	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {  		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) -			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL); +			(void) spa_history_log(spa, buf);  		spa_close(spa, FTAG);  	}  	history_str_free(buf); @@ -273,7 +383,7 @@ zfs_log_history(zfs_cmd_t *zc)   */  /* ARGSUSED */  static int -zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (0);  } @@ -284,7 +394,7 @@ zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)   */  /* ARGSUSED */  static int -zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	if (INGLOBALZONE(curproc) ||  	    zone_dataset_visible(zc->zc_name, NULL)) @@ -353,26 +463,13 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)  	return (zfs_dozonecheck_impl(dataset, zoned, cr));  } -/* - * If name ends in a '@', then require recursive permissions. - */ -int +static int  zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)  {  	int error; -	boolean_t descendent = B_FALSE;  	dsl_dataset_t *ds; -	char *at; - -	at = strchr(name, '@'); -	if (at != NULL && at[1] == '\0') { -		*at = '\0'; -		descendent = B_TRUE; -	}  	error = dsl_dataset_hold(name, FTAG, &ds); -	if (at != NULL) -		*at = '@';  	if (error != 0)  		return (error); @@ -380,14 +477,14 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)  	if (error == 0) {  		error = secpolicy_zfs(cr);  		if (error) -			error = dsl_deleg_access_impl(ds, descendent, perm, cr); +			error = dsl_deleg_access_impl(ds, perm, cr);  	}  	dsl_dataset_rele(ds, FTAG);  	return (error);  } -int +static int  zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,      const char *perm, cred_t *cr)  { @@ -397,7 +494,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,  	if (error == 0) {  		error = secpolicy_zfs(cr);  		if (error) -			error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); +			error = dsl_deleg_access_impl(ds, perm, cr);  	}  	return (error);  } @@ -551,8 +648,9 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,  	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));  } -int -zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	int error; @@ -567,15 +665,17 @@ zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)  	return (0);  } -int -zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (zfs_secpolicy_write_perms(zc->zc_name,  	    ZFS_DELEG_PERM_ROLLBACK, cr));  } -int -zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	spa_t *spa;  	dsl_pool_t *dp; @@ -611,8 +711,17 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)  	return (error);  } +/* ARGSUSED */  static int -zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ +	return (zfs_secpolicy_write_perms(zc->zc_name, +	    ZFS_DELEG_PERM_SEND, cr)); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	vnode_t *vp;  	int error; @@ -636,7 +745,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)  }  int -zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	if (!INGLOBALZONE(curproc))  		return (EPERM); @@ -644,12 +753,12 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)  	if (secpolicy_nfs(cr) == 0) {  		return (0);  	} else { -		return (zfs_secpolicy_deleg_share(zc, cr)); +		return (zfs_secpolicy_deleg_share(zc, innvl, cr));  	}  }  int -zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	if (!INGLOBALZONE(curproc))  		return (EPERM); @@ -657,7 +766,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)  	if (secpolicy_smb(cr) == 0) {  		return (0);  	} else { -		return (zfs_secpolicy_deleg_share(zc, cr)); +		return (zfs_secpolicy_deleg_share(zc, innvl, cr));  	}  } @@ -695,27 +804,55 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)  	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));  } +/* ARGSUSED */  static int -zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));  }  /*   * Destroying snapshots with delegated permissions requires - * descendent mount and destroy permissions. + * descendant mount and destroy permissions.   */ +/* ARGSUSED */  static int -zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  { -	int error; -	char *dsname; +	nvlist_t *snaps; +	nvpair_t *pair, *nextpair; +	int error = 0; -	dsname = kmem_asprintf("%s@", zc->zc_name); +	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) +		return (EINVAL); +	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; +	    pair = nextpair) { +		dsl_dataset_t *ds; -	error = zfs_secpolicy_destroy_perms(dsname, cr); +		nextpair = nvlist_next_nvpair(snaps, pair); +		error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds); +		if (error == 0) { +			dsl_dataset_rele(ds, FTAG); +		} else if (error == ENOENT) { +			/* +			 * Ignore any snapshots that don't exist (we consider +			 * them "already destroyed").  Remove the name from the +			 * nvl here in case the snapshot is created between +			 * now and when we try to destroy it (in which case +			 * we don't want to destroy it since we haven't +			 * checked for permission). +			 */ +			fnvlist_remove_nvpair(snaps, pair); +			error = 0; +			continue; +		} else { +			break; +		} +		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); +		if (error != 0) +			break; +	} -	strfree(dsname);  	return (error);  } @@ -748,14 +885,16 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)  	return (error);  } +/* ARGSUSED */  static int -zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));  } +/* ARGSUSED */  static int -zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	char	parentname[MAXNAMELEN];  	objset_t *clone; @@ -795,8 +934,9 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)  	return (error);  } +/* ARGSUSED */  static int -zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	int error; @@ -819,49 +959,72 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)  	    ZFS_DELEG_PERM_SNAPSHOT, cr));  } +/* + * Check for permission to create each snapshot in the nvlist. + */ +/* ARGSUSED */  static int -zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  { +	nvlist_t *snaps; +	int error; +	nvpair_t *pair; -	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); +	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) +		return (EINVAL); +	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; +	    pair = nvlist_next_nvpair(snaps, pair)) { +		char *name = nvpair_name(pair); +		char *atp = strchr(name, '@'); + +		if (atp == NULL) { +			error = EINVAL; +			break; +		} +		*atp = '\0'; +		error = zfs_secpolicy_snapshot_perms(name, cr); +		*atp = '@'; +		if (error != 0) +			break; +	} +	return (error);  } +/* ARGSUSED */  static int -zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ +	/* +	 * Even root must have a proper TSD so that we know what pool +	 * to log to. +	 */ +	if (tsd_get(zfs_allow_log_key) == NULL) +		return (EPERM); +	return (0); +} + +static int +zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	char	parentname[MAXNAMELEN];  	int	error; +	char	*origin;  	if ((error = zfs_get_parent(zc->zc_name, parentname,  	    sizeof (parentname))) != 0)  		return (error); -	if (zc->zc_value[0] != '\0') { -		if ((error = zfs_secpolicy_write_perms(zc->zc_value, -		    ZFS_DELEG_PERM_CLONE, cr)) != 0) -			return (error); -	} +	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 && +	    (error = zfs_secpolicy_write_perms(origin, +	    ZFS_DELEG_PERM_CLONE, cr)) != 0) +		return (error);  	if ((error = zfs_secpolicy_write_perms(parentname,  	    ZFS_DELEG_PERM_CREATE, cr)) != 0)  		return (error); -	error = zfs_secpolicy_write_perms(parentname, -	    ZFS_DELEG_PERM_MOUNT, cr); - -	return (error); -} - -static int -zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) -{ -	int error; - -	error = secpolicy_fs_unmount(cr, NULL); -	if (error) { -		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); -	} -	return (error); +	return (zfs_secpolicy_write_perms(parentname, +	    ZFS_DELEG_PERM_MOUNT, cr));  }  /* @@ -870,7 +1033,7 @@ zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)   */  /* ARGSUSED */  static int -zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	if (secpolicy_sys_config(cr, B_FALSE) != 0)  		return (EPERM); @@ -883,7 +1046,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)   */  /* ARGSUSED */  static int -zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	int error; @@ -899,13 +1062,14 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)   */  /* ARGSUSED */  static int -zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (secpolicy_zinject(cr));  } +/* ARGSUSED */  static int -zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); @@ -921,9 +1085,9 @@ zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)  }  static int -zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  { -	int err = zfs_secpolicy_read(zc, cr); +	int err = zfs_secpolicy_read(zc, innvl, cr);  	if (err)  		return (err); @@ -950,9 +1114,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)  }  static int -zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  { -	int err = zfs_secpolicy_read(zc, cr); +	int err = zfs_secpolicy_read(zc, innvl, cr);  	if (err)  		return (err); @@ -963,22 +1127,25 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)  	    userquota_perms[zc->zc_objset_type], cr));  } +/* ARGSUSED */  static int -zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,  	    NULL, cr));  } +/* ARGSUSED */  static int -zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (zfs_secpolicy_write_perms(zc->zc_name,  	    ZFS_DELEG_PERM_HOLD, cr));  } +/* ARGSUSED */  static int -zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	return (zfs_secpolicy_write_perms(zc->zc_name,  	    ZFS_DELEG_PERM_RELEASE, cr)); @@ -988,7 +1155,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)   * Policy for allowing temporary snapshots to be taken or released   */  static int -zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)  {  	/*  	 * A temporary snapshot is the same as a snapshot, @@ -1001,13 +1168,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)  	    ZFS_DELEG_PERM_DIFF, cr)) == 0)  		return (0); -	error = zfs_secpolicy_snapshot(zc, cr); +	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);  	if (!error) -		error = zfs_secpolicy_hold(zc, cr); +		error = zfs_secpolicy_hold(zc, innvl, cr);  	if (!error) -		error = zfs_secpolicy_release(zc, cr); +		error = zfs_secpolicy_release(zc, innvl, cr);  	if (!error) -		error = zfs_secpolicy_destroy(zc, cr); +		error = zfs_secpolicy_destroy(zc, innvl, cr);  	return (error);  } @@ -1046,36 +1213,40 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)  	return (0);  } +/* + * Reduce the size of this nvlist until it can be serialized in 'max' bytes. + * Entries will be removed from the end of the nvlist, and one int32 entry + * named "N_MORE_ERRORS" will be added indicating how many entries were + * removed. + */  static int -fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) +nvlist_smush(nvlist_t *errors, size_t max)  {  	size_t size; -	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); +	size = fnvlist_size(errors); -	if (size > zc->zc_nvlist_dst_size) { +	if (size > max) {  		nvpair_t *more_errors;  		int n = 0; -		if (zc->zc_nvlist_dst_size < 1024) +		if (max < 1024)  			return (ENOMEM); -		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); -		more_errors = nvlist_prev_nvpair(*errors, NULL); +		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0); +		more_errors = nvlist_prev_nvpair(errors, NULL);  		do { -			nvpair_t *pair = nvlist_prev_nvpair(*errors, +			nvpair_t *pair = nvlist_prev_nvpair(errors,  			    more_errors); -			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); +			fnvlist_remove_nvpair(errors, pair);  			n++; -			VERIFY(nvlist_size(*errors, &size, -			    NV_ENCODE_NATIVE) == 0); -		} while (size > zc->zc_nvlist_dst_size); +			size = fnvlist_size(errors); +		} while (size > max); -		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); -		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); -		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); -		ASSERT(size <= zc->zc_nvlist_dst_size); +		fnvlist_remove_nvpair(errors, more_errors); +		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n); +		ASSERT3U(fnvlist_size(errors), <=, max);  	}  	return (0); @@ -1088,21 +1259,20 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)  	int error = 0;  	size_t size; -	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); +	size = fnvlist_size(nvl);  	if (size > zc->zc_nvlist_dst_size) {  		error = ENOMEM;  	} else { -		packed = kmem_alloc(size, KM_SLEEP); -		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, -		    KM_SLEEP) == 0); +		packed = fnvlist_pack(nvl, &size);  		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,  		    size, zc->zc_iflags) != 0)  			error = EFAULT; -		kmem_free(packed, size); +		fnvlist_pack_free(packed, size);  	}  	zc->zc_nvlist_dst_size = size; +	zc->zc_nvlist_dst_filled = B_TRUE;  	return (error);  } @@ -1181,7 +1351,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)  	nvlist_t *config, *props = NULL;  	nvlist_t *rootprops = NULL;  	nvlist_t *zplprops = NULL; -	char *buf;  	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,  	    zc->zc_iflags, &config)) @@ -1221,9 +1390,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)  			goto pool_props_bad;  	} -	buf = history_str_get(zc); - -	error = spa_create(zc->zc_name, config, props, buf, zplprops); +	error = spa_create(zc->zc_name, config, props, zplprops);  	/*  	 * Set the remaining root properties @@ -1232,9 +1399,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)  	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)  		(void) spa_destroy(zc->zc_name); -	if (buf != NULL) -		history_str_free(buf); -  pool_props_bad:  	nvlist_free(rootprops);  	nvlist_free(zplprops); @@ -2229,31 +2393,25 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,  /*   * This function is best effort. If it fails to set any of the given properties, - * it continues to set as many as it can and returns the first error - * encountered. If the caller provides a non-NULL errlist, it also gives the - * complete list of names of all the properties it failed to set along with the - * corresponding error numbers. The caller is responsible for freeing the - * returned errlist. + * it continues to set as many as it can and returns the last error + * encountered. If the caller provides a non-NULL errlist, it will be filled in + * with the list of names of all the properties that failed along with the + * corresponding error numbers.   * - * If every property is set successfully, zero is returned and the list pointed - * at by errlist is NULL. + * If every property is set successfully, zero is returned and errlist is not + * modified.   */  int  zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, -    nvlist_t **errlist) +    nvlist_t *errlist)  {  	nvpair_t *pair;  	nvpair_t *propval;  	int rv = 0;  	uint64_t intval;  	char *strval; -	nvlist_t *genericnvl; -	nvlist_t *errors; -	nvlist_t *retrynvl; - -	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); -	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); -	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); +	nvlist_t *genericnvl = fnvlist_alloc(); +	nvlist_t *retrynvl = fnvlist_alloc();  retry:  	pair = NULL; @@ -2266,7 +2424,7 @@ retry:  		propval = pair;  		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {  			nvlist_t *attrs; -			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); +			attrs = fnvpair_value_nvlist(pair);  			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,  			    &propval) != 0)  				err = EINVAL; @@ -2291,8 +2449,7 @@ retry:  			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {  				const char *unused; -				VERIFY(nvpair_value_uint64(propval, -				    &intval) == 0); +				intval = fnvpair_value_uint64(propval);  				switch (zfs_prop_get_type(prop)) {  				case PROP_TYPE_NUMBER: @@ -2336,8 +2493,11 @@ retry:  			}  		} -		if (err != 0) -			VERIFY(nvlist_add_int32(errors, propname, err) == 0); +		if (err != 0) { +			if (errlist != NULL) +				fnvlist_add_int32(errlist, propname, err); +			rv = err; +		}  	}  	if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2359,44 +2519,33 @@ retry:  			propval = pair;  			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {  				nvlist_t *attrs; -				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); -				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, -				    &propval) == 0); +				attrs = fnvpair_value_nvlist(pair); +				propval = fnvlist_lookup_nvpair(attrs, +				    ZPROP_VALUE);  			}  			if (nvpair_type(propval) == DATA_TYPE_STRING) { -				VERIFY(nvpair_value_string(propval, -				    &strval) == 0); +				strval = fnvpair_value_string(propval);  				err = dsl_prop_set(dsname, propname, source, 1,  				    strlen(strval) + 1, strval);  			} else { -				VERIFY(nvpair_value_uint64(propval, -				    &intval) == 0); +				intval = fnvpair_value_uint64(propval);  				err = dsl_prop_set(dsname, propname, source, 8,  				    1, &intval);  			}  			if (err != 0) { -				VERIFY(nvlist_add_int32(errors, propname, -				    err) == 0); +				if (errlist != NULL) { +					fnvlist_add_int32(errlist, propname, +					    err); +				} +				rv = err;  			}  		}  	}  	nvlist_free(genericnvl);  	nvlist_free(retrynvl); -	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { -		nvlist_free(errors); -		errors = NULL; -	} else { -		VERIFY(nvpair_value_int32(pair, &rv) == 0); -	} - -	if (errlist == NULL) -		nvlist_free(errors); -	else -		*errlist = errors; -  	return (rv);  } @@ -2404,7 +2553,7 @@ retry:   * Check that all the properties are valid user properties.   */  static int -zfs_check_userprops(char *fsname, nvlist_t *nvl) +zfs_check_userprops(const char *fsname, nvlist_t *nvl)  {  	nvpair_t *pair = NULL;  	int error = 0; @@ -2484,7 +2633,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc)  	boolean_t received = zc->zc_cookie;  	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :  	    ZPROP_SRC_LOCAL); -	nvlist_t *errors = NULL; +	nvlist_t *errors;  	int error;  	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, @@ -2507,7 +2656,8 @@ zfs_ioc_set_prop(zfs_cmd_t *zc)  		}  	} -	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); +	errors = fnvlist_alloc(); +	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);  	if (zc->zc_nvlist_dst != NULL && errors != NULL) {  		(void) put_nvlist(zc, errors); @@ -2589,7 +2739,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)  			return (EINVAL);  	} -	/* the property name has been validated by zfs_secpolicy_inherit() */ +	/* property name has been validated by zfs_secpolicy_inherit_prop() */  	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));  } @@ -2932,26 +3082,30 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,  }  /* - * inputs: - * zc_objset_type	type of objset to create (fs vs zvol) - * zc_name		name of new objset - * zc_value		name of snapshot to clone from (may be empty) - * zc_nvlist_src{_size}	nvlist of properties to apply + * innvl: { + *     "type" -> dmu_objset_type_t (int32) + *     (optional) "props" -> { prop -> value } + * }   * - * outputs: none + * outnvl: propname -> error code (int32)   */  static int -zfs_ioc_create(zfs_cmd_t *zc) +zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)  { -	objset_t *clone;  	int error = 0; -	zfs_creat_t zct; +	zfs_creat_t zct = { 0 };  	nvlist_t *nvprops = NULL;  	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); -	dmu_objset_type_t type = zc->zc_objset_type; +	int32_t type32; +	dmu_objset_type_t type; +	boolean_t is_insensitive = B_FALSE; -	switch (type) { +	if (nvlist_lookup_int32(innvl, "type", &type32) != 0) +		return (EINVAL); +	type = type32; +	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops); +	switch (type) {  	case DMU_OST_ZFS:  		cbfunc = zfs_create_cb;  		break; @@ -2964,230 +3118,290 @@ zfs_ioc_create(zfs_cmd_t *zc)  		cbfunc = NULL;  		break;  	} -	if (strchr(zc->zc_name, '@') || -	    strchr(zc->zc_name, '%')) +	if (strchr(fsname, '@') || +	    strchr(fsname, '%'))  		return (EINVAL); -	if (zc->zc_nvlist_src != NULL && -	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, -	    zc->zc_iflags, &nvprops)) != 0) -		return (error); - -	zct.zct_zplprops = NULL;  	zct.zct_props = nvprops; -	if (zc->zc_value[0] != '\0') { -		/* -		 * We're creating a clone of an existing snapshot. -		 */ -		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; -		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { -			nvlist_free(nvprops); +	if (cbfunc == NULL) +		return (EINVAL); + +	if (type == DMU_OST_ZVOL) { +		uint64_t volsize, volblocksize; + +		if (nvprops == NULL) +			return (EINVAL); +		if (nvlist_lookup_uint64(nvprops, +		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)  			return (EINVAL); -		} -		error = dmu_objset_hold(zc->zc_value, FTAG, &clone); -		if (error) { -			nvlist_free(nvprops); +		if ((error = nvlist_lookup_uint64(nvprops, +		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), +		    &volblocksize)) != 0 && error != ENOENT) +			return (EINVAL); + +		if (error != 0) +			volblocksize = zfs_prop_default_numeric( +			    ZFS_PROP_VOLBLOCKSIZE); + +		if ((error = zvol_check_volblocksize( +		    volblocksize)) != 0 || +		    (error = zvol_check_volsize(volsize, +		    volblocksize)) != 0)  			return (error); -		} +	} else if (type == DMU_OST_ZFS) { +		int error; -		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); -		dmu_objset_rele(clone, FTAG); -		if (error) { -			nvlist_free(nvprops); +		/* +		 * We have to have normalization and +		 * case-folding flags correct when we do the +		 * file system creation, so go figure them out +		 * now. +		 */ +		VERIFY(nvlist_alloc(&zct.zct_zplprops, +		    NV_UNIQUE_NAME, KM_SLEEP) == 0); +		error = zfs_fill_zplprops(fsname, nvprops, +		    zct.zct_zplprops, &is_insensitive); +		if (error != 0) { +			nvlist_free(zct.zct_zplprops);  			return (error);  		} -	} else { -		boolean_t is_insensitive = B_FALSE; +	} -		if (cbfunc == NULL) { -			nvlist_free(nvprops); -			return (EINVAL); -		} +	error = dmu_objset_create(fsname, type, +	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); +	nvlist_free(zct.zct_zplprops); -		if (type == DMU_OST_ZVOL) { -			uint64_t volsize, volblocksize; +	/* +	 * It would be nice to do this atomically. +	 */ +	if (error == 0) { +		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, +		    nvprops, outnvl); +		if (error != 0) +			(void) dmu_objset_destroy(fsname, B_FALSE); +	} +	return (error); +} -			if (nvprops == NULL || -			    nvlist_lookup_uint64(nvprops, -			    zfs_prop_to_name(ZFS_PROP_VOLSIZE), -			    &volsize) != 0) { -				nvlist_free(nvprops); -				return (EINVAL); -			} +/* + * innvl: { + *     "origin" -> name of origin snapshot + *     (optional) "props" -> { prop -> value } + * } + * + * outnvl: propname -> error code (int32) + */ +static int +zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) +{ +	int error = 0; +	nvlist_t *nvprops = NULL; +	char *origin_name; +	dsl_dataset_t *origin; -			if ((error = nvlist_lookup_uint64(nvprops, -			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), -			    &volblocksize)) != 0 && error != ENOENT) { -				nvlist_free(nvprops); -				return (EINVAL); -			} +	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) +		return (EINVAL); +	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops); -			if (error != 0) -				volblocksize = zfs_prop_default_numeric( -				    ZFS_PROP_VOLBLOCKSIZE); +	if (strchr(fsname, '@') || +	    strchr(fsname, '%')) +		return (EINVAL); -			if ((error = zvol_check_volblocksize( -			    volblocksize)) != 0 || -			    (error = zvol_check_volsize(volsize, -			    volblocksize)) != 0) { -				nvlist_free(nvprops); -				return (error); -			} -		} else if (type == DMU_OST_ZFS) { -			int error; +	if (dataset_namecheck(origin_name, NULL, NULL) != 0) +		return (EINVAL); -			/* -			 * We have to have normalization and -			 * case-folding flags correct when we do the -			 * file system creation, so go figure them out -			 * now. -			 */ -			VERIFY(nvlist_alloc(&zct.zct_zplprops, -			    NV_UNIQUE_NAME, KM_SLEEP) == 0); -			error = zfs_fill_zplprops(zc->zc_name, nvprops, -			    zct.zct_zplprops, &is_insensitive); -			if (error != 0) { -				nvlist_free(nvprops); -				nvlist_free(zct.zct_zplprops); -				return (error); -			} -		} -		error = dmu_objset_create(zc->zc_name, type, -		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); -		nvlist_free(zct.zct_zplprops); -	} +	error = dsl_dataset_hold(origin_name, FTAG, &origin); +	if (error) +		return (error); + +	error = dmu_objset_clone(fsname, origin, 0); +	dsl_dataset_rele(origin, FTAG); +	if (error) +		return (error);  	/*  	 * It would be nice to do this atomically.  	 */  	if (error == 0) { -		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, -		    nvprops, NULL); +		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, +		    nvprops, outnvl);  		if (error != 0) -			(void) dmu_objset_destroy(zc->zc_name, B_FALSE); +			(void) dmu_objset_destroy(fsname, B_FALSE);  	} -	nvlist_free(nvprops);  	return (error);  }  /* - * inputs: - * zc_name	name of filesystem - * zc_value	short name of snapshot - * zc_cookie	recursive flag - * zc_nvlist_src[_size] property list + * innvl: { + *     "snaps" -> { snapshot1, snapshot2 } + *     (optional) "props" -> { prop -> value (string) } + * } + * + * outnvl: snapshot -> error code (int32)   * - * outputs: - * zc_value	short snapname (i.e. part after the '@')   */  static int -zfs_ioc_snapshot(zfs_cmd_t *zc) +zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)  { -	nvlist_t *nvprops = NULL; -	int error; -	boolean_t recursive = zc->zc_cookie; +	nvlist_t *snaps; +	nvlist_t *props = NULL; +	int error, poollen; +	nvpair_t *pair; -	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) +	(void) nvlist_lookup_nvlist(innvl, "props", &props); +	if ((error = zfs_check_userprops(poolname, props)) != 0) +		return (error); + +	if (!nvlist_empty(props) && +	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) +		return (ENOTSUP); + +	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)  		return (EINVAL); +	poollen = strlen(poolname); +	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; +	    pair = nvlist_next_nvpair(snaps, pair)) { +		const char *name = nvpair_name(pair); +		const char *cp = strchr(name, '@'); -	if (zc->zc_nvlist_src != NULL && -	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, -	    zc->zc_iflags, &nvprops)) != 0) -		return (error); +		/* +		 * The snap name must contain an @, and the part after it must +		 * contain only valid characters. +		 */ +		if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) +			return (EINVAL); -	error = zfs_check_userprops(zc->zc_name, nvprops); -	if (error) -		goto out; +		/* +		 * The snap must be in the specified pool. +		 */ +		if (strncmp(name, poolname, poollen) != 0 || +		    (name[poollen] != '/' && name[poollen] != '@')) +			return (EXDEV); + +		/* This must be the only snap of this fs. */ +		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair); +		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { +			if (strncmp(name, nvpair_name(pair2), cp - name + 1) +			    == 0) { +				return (EXDEV); +			} +		} +	} -	if (!nvlist_empty(nvprops) && -	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { -		error = ENOTSUP; -		goto out; +	error = dmu_objset_snapshot(snaps, props, outnvl); +	return (error); +} + +/* + * innvl: "message" -> string + */ +/* ARGSUSED */ +static int +zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +{ +	char *message; +	spa_t *spa; +	int error; +	char *poolname; + +	/* +	 * The poolname in the ioctl is not set, we get it from the TSD, +	 * which was set at the end of the last successful ioctl that allows +	 * logging.  The secpolicy func already checked that it is set. +	 * Only one log ioctl is allowed after each successful ioctl, so +	 * we clear the TSD here. +	 */ +	poolname = tsd_get(zfs_allow_log_key); +	(void) tsd_set(zfs_allow_log_key, NULL); +	error = spa_open(poolname, &spa, FTAG); +	strfree(poolname); +	if (error != 0) +		return (error); + +	if (nvlist_lookup_string(innvl, "message", &message) != 0)  { +		spa_close(spa, FTAG); +		return (EINVAL);  	} -	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, -	    nvprops, recursive, B_FALSE, -1); +	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { +		spa_close(spa, FTAG); +		return (ENOTSUP); +	} -out: -	nvlist_free(nvprops); +	error = spa_history_log(spa, message); +	spa_close(spa, FTAG);  	return (error);  } +/* ARGSUSED */  int  zfs_unmount_snap(const char *name, void *arg)  { -	vfs_t *vfsp = NULL; +	vfs_t *vfsp; +	int err; -	if (arg) { -		char *snapname = arg; -		char *fullname = kmem_asprintf("%s@%s", name, snapname); -		vfsp = zfs_get_vfs(fullname); -		strfree(fullname); -	} else if (strchr(name, '@')) { -		vfsp = zfs_get_vfs(name); -	} +	if (strchr(name, '@') == NULL) +		return (0); -	if (vfsp) { -		/* -		 * Always force the unmount for snapshots. -		 */ -		int flag = MS_FORCE; -		int err; +	vfsp = zfs_get_vfs(name); +	if (vfsp == NULL) +		return (0); -		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { -			VFS_RELE(vfsp); -			return (err); -		} +	if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {  		VFS_RELE(vfsp); -		if ((err = dounmount(vfsp, flag, kcred)) != 0) -			return (err); +		return (err);  	} -	return (0); +	VFS_RELE(vfsp); + +	/* +	 * Always force the unmount for snapshots. +	 */ +	return (dounmount(vfsp, MS_FORCE, kcred));  }  /* - * inputs: - * zc_name		name of filesystem, snaps must be under it - * zc_nvlist_src[_size]	full names of snapshots to destroy - * zc_defer_destroy	mark for deferred destroy + * innvl: { + *     "snaps" -> { snapshot1, snapshot2 } + *     (optional boolean) "defer" + * } + * + * outnvl: snapshot -> error code (int32)   * - * outputs: - * zc_name		on failure, name of failed snapshot   */  static int -zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)  { -	int err, len; -	nvlist_t *nvl; +	int poollen; +	nvlist_t *snaps;  	nvpair_t *pair; +	boolean_t defer; -	if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, -	    zc->zc_iflags, &nvl)) != 0) -		return (err); +	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) +		return (EINVAL); +	defer = nvlist_exists(innvl, "defer"); -	len = strlen(zc->zc_name); -	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; -	    pair = nvlist_next_nvpair(nvl, pair)) { +	poollen = strlen(poolname); +	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; +	    pair = nvlist_next_nvpair(snaps, pair)) {  		const char *name = nvpair_name(pair); +  		/* -		 * The snap name must be underneath the zc_name.  This ensures -		 * that our permission checks were legitimate. +		 * The snap must be in the specified pool.  		 */ -		if (strncmp(zc->zc_name, name, len) != 0 || -		    (name[len] != '@' && name[len] != '/')) { -			nvlist_free(nvl); -			return (EINVAL); -		} +		if (strncmp(name, poolname, poollen) != 0 || +		    (name[poollen] != '/' && name[poollen] != '@')) +			return (EXDEV); +		/* +		 * Ignore failures to unmount; dmu_snapshots_destroy_nvl() +		 * will deal with this gracefully (by filling in outnvl). +		 */  		(void) zfs_unmount_snap(name, NULL);  	} -	err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, -	    zc->zc_name); -	nvlist_free(nvl); -	return (err); +	return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl));  }  /* @@ -3491,7 +3705,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)  		(void) strcpy(zc->zc_value, nvpair_name(pair));  		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || -		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { +		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {  			VERIFY(nvlist_remove_nvpair(props, pair) == 0);  			VERIFY(nvlist_add_int32(errors,  			    zc->zc_value, err) == 0); @@ -3699,8 +3913,6 @@ zfs_ioc_recv(zfs_cmd_t *zc)  	 * dmu_recv_begin() succeeds.  	 */  	if (props) { -		nvlist_t *errlist; -  		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {  			if (drc.drc_newfs) {  				if (spa_version(os->os_spa) >= @@ -3719,12 +3931,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)  		}  		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, -		    props, &errlist); -		(void) nvlist_merge(errors, errlist, 0); -		nvlist_free(errlist); +		    props, errors);  	} -	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { +	if (zc->zc_nvlist_dst_size != 0 && +	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || +	    put_nvlist(zc, errors) != 0)) {  		/*  		 * Caller made zc->zc_nvlist_dst less than the minimum expected  		 * size or supplied an invalid address. @@ -3856,15 +4068,13 @@ zfs_ioc_send(zfs_cmd_t *zc)  	rw_enter(&dp->dp_config_rwlock, RW_READER);  	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);  	rw_exit(&dp->dp_config_rwlock); -	if (error) { -		spa_close(spa, FTAG); +	spa_close(spa, FTAG); +	if (error)  		return (error); -	}  	error = dmu_objset_from_ds(ds, &tosnap);  	if (error) {  		dsl_dataset_rele(ds, FTAG); -		spa_close(spa, FTAG);  		return (error);  	} @@ -3872,7 +4082,6 @@ zfs_ioc_send(zfs_cmd_t *zc)  		rw_enter(&dp->dp_config_rwlock, RW_READER);  		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);  		rw_exit(&dp->dp_config_rwlock); -		spa_close(spa, FTAG);  		if (error) {  			dsl_dataset_rele(ds, FTAG);  			return (error); @@ -3883,12 +4092,37 @@ zfs_ioc_send(zfs_cmd_t *zc)  			dsl_dataset_rele(ds, FTAG);  			return (error);  		} -	} else { -		spa_close(spa, FTAG); +	} + +	if (zc->zc_obj) { +		dsl_pool_t *dp = ds->ds_dir->dd_pool; + +		if (fromsnap != NULL) { +			dsl_dataset_rele(dsfrom, FTAG); +			dsl_dataset_rele(ds, FTAG); +			return (EINVAL); +		} + +		if (dsl_dir_is_clone(ds->ds_dir)) { +			rw_enter(&dp->dp_config_rwlock, RW_READER); +			error = dsl_dataset_hold_obj(dp, +			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom); +			rw_exit(&dp->dp_config_rwlock); +			if (error) { +				dsl_dataset_rele(ds, FTAG); +				return (error); +			} +			error = dmu_objset_from_ds(dsfrom, &fromsnap); +			if (error) { +				dsl_dataset_rele(dsfrom, FTAG); +				dsl_dataset_rele(ds, FTAG); +				return (error); +			} +		}  	}  	if (estimate) { -		error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, +		error = dmu_send_estimate(tosnap, fromsnap,  		    &zc->zc_objset_type);  	} else {  		file_t *fp = getf(zc->zc_cookie); @@ -3900,7 +4134,7 @@ zfs_ioc_send(zfs_cmd_t *zc)  		}  		off = fp->f_offset; -		error = dmu_send(tosnap, fromsnap, zc->zc_obj, +		error = dmu_send(tosnap, fromsnap,  		    zc->zc_cookie, fp->f_vnode, &off);  		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -4414,6 +4648,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc)   * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process   *   * outputs: + * zc_value		short name of new snapshot   */  static int  zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) @@ -4421,22 +4656,21 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)  	char *snap_name;  	int error; -	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, +	snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value,  	    (u_longlong_t)ddi_get_lbolt64()); -	if (strlen(snap_name) >= MAXNAMELEN) { +	if (strlen(snap_name) >= MAXPATHLEN) {  		strfree(snap_name);  		return (E2BIG);  	} -	error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, -	    NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); +	error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd);  	if (error != 0) {  		strfree(snap_name);  		return (error);  	} -	(void) strcpy(zc->zc_value, snap_name); +	(void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1);  	strfree(snap_name);  	return (0);  } @@ -4790,172 +5024,427 @@ zfs_ioc_space_written(zfs_cmd_t *zc)  	dsl_dataset_rele(new, FTAG);  	return (error);  } -  /* - * inputs: - * zc_name		full name of last snapshot - * zc_value		full name of first snapshot + * innvl: { + *     "firstsnap" -> snapshot name + * }   * - * outputs: - * zc_cookie		space in bytes - * zc_objset_type	compressed space in bytes - * zc_perm_action	uncompressed space in bytes + * outnvl: { + *     "used" -> space in bytes + *     "compressed" -> compressed space in bytes + *     "uncompressed" -> uncompressed space in bytes + * }   */  static int -zfs_ioc_space_snaps(zfs_cmd_t *zc) +zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)  {  	int error;  	dsl_dataset_t *new, *old; +	char *firstsnap; +	uint64_t used, comp, uncomp; -	error = dsl_dataset_hold(zc->zc_name, FTAG, &new); +	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) +		return (EINVAL); + +	error = dsl_dataset_hold(lastsnap, FTAG, &new);  	if (error != 0)  		return (error); -	error = dsl_dataset_hold(zc->zc_value, FTAG, &old); +	error = dsl_dataset_hold(firstsnap, FTAG, &old);  	if (error != 0) {  		dsl_dataset_rele(new, FTAG);  		return (error);  	} -	error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, -	    &zc->zc_objset_type, &zc->zc_perm_action); +	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);  	dsl_dataset_rele(old, FTAG);  	dsl_dataset_rele(new, FTAG); +	fnvlist_add_uint64(outnvl, "used", used); +	fnvlist_add_uint64(outnvl, "compressed", comp); +	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);  	return (error);  }  /* - * pool create, destroy, and export don't log the history as part of - * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export - * do the logging of those commands. + * innvl: { + *     "fd" -> file descriptor to write stream to (int32) + *     (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl is unused   */ -static zfs_ioc_vec_t zfs_ioc_vec[] = { -	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, -	    POOL_CHECK_READONLY }, -	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, -	    B_FALSE, POOL_CHECK_NONE }, -	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, -	    B_FALSE, POOL_CHECK_NONE }, -	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, -	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE }, -	{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, -	    B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, -	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, -	{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, -	    POOL_CHECK_SUSPENDED }, -	{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, -	    POOL_CHECK_NONE } -}; +/* ARGSUSED */ +static int +zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ +	objset_t *fromsnap = NULL; +	objset_t *tosnap; +	int error; +	offset_t off; +	char *fromname; +	int fd; + +	error = nvlist_lookup_int32(innvl, "fd", &fd); +	if (error != 0) +		return (EINVAL); + +	error = dmu_objset_hold(snapname, FTAG, &tosnap); +	if (error) +		return (error); + +	error = nvlist_lookup_string(innvl, "fromsnap", &fromname); +	if (error == 0) { +		error = dmu_objset_hold(fromname, FTAG, &fromsnap); +		if (error) { +			dmu_objset_rele(tosnap, FTAG); +			return (error); +		} +	} + +	file_t *fp = getf(fd); +	if (fp == NULL) { +		dmu_objset_rele(tosnap, FTAG); +		if (fromsnap != NULL) +			dmu_objset_rele(fromsnap, FTAG); +		return (EBADF); +	} + +	off = fp->f_offset; +	error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off); + +	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) +		fp->f_offset = off; +	releasef(fd); +	if (fromsnap != NULL) +		dmu_objset_rele(fromsnap, FTAG); +	dmu_objset_rele(tosnap, FTAG); +	return (error); +} + +/* + * Determine approximately how large a zfs send stream will be -- the number + * of bytes that will be written to the fd supplied to zfs_ioc_send_new(). + * + * innvl: { + *     (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl: { + *     "space" -> bytes of space (uint64) + * } + */ +static int +zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ +	objset_t *fromsnap = NULL; +	objset_t *tosnap; +	int error; +	char *fromname; +	uint64_t space; + +	error = dmu_objset_hold(snapname, FTAG, &tosnap); +	if (error) +		return (error); + +	error = nvlist_lookup_string(innvl, "fromsnap", &fromname); +	if (error == 0) { +		error = dmu_objset_hold(fromname, FTAG, &fromsnap); +		if (error) { +			dmu_objset_rele(tosnap, FTAG); +			return (error); +		} +	} + +	error = dmu_send_estimate(tosnap, fromsnap, &space); +	fnvlist_add_uint64(outnvl, "space", space); + +	if (fromsnap != NULL) +		dmu_objset_rele(fromsnap, FTAG); +	dmu_objset_rele(tosnap, FTAG); +	return (error); +} + + +static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; + +static void +zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, +    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, +    boolean_t log_history, zfs_ioc_poolcheck_t pool_check) +{ +	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + +	ASSERT3U(ioc, >=, ZFS_IOC_FIRST); +	ASSERT3U(ioc, <, ZFS_IOC_LAST); +	ASSERT3P(vec->zvec_legacy_func, ==, NULL); +	ASSERT3P(vec->zvec_func, ==, NULL); + +	vec->zvec_legacy_func = func; +	vec->zvec_secpolicy = secpolicy; +	vec->zvec_namecheck = namecheck; +	vec->zvec_allow_log = log_history; +	vec->zvec_pool_check = pool_check; +} + +/* + * See the block comment at the beginning of this file for details on + * each argument to this function. + */ +static void +zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, +    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, +    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist, +    boolean_t allow_log) +{ +	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + +	ASSERT3U(ioc, >=, ZFS_IOC_FIRST); +	ASSERT3U(ioc, <, ZFS_IOC_LAST); +	ASSERT3P(vec->zvec_legacy_func, ==, NULL); +	ASSERT3P(vec->zvec_func, ==, NULL); + +	/* if we are logging, the name must be valid */ +	ASSERT(!allow_log || namecheck != NO_NAME); + +	vec->zvec_name = name; +	vec->zvec_func = func; +	vec->zvec_secpolicy = secpolicy; +	vec->zvec_namecheck = namecheck; +	vec->zvec_pool_check = pool_check; +	vec->zvec_smush_outnvlist = smush_outnvlist; +	vec->zvec_allow_log = allow_log; +} + +static void +zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, +    zfs_secpolicy_func_t *secpolicy, boolean_t log_history, +    zfs_ioc_poolcheck_t pool_check) +{ +	zfs_ioctl_register_legacy(ioc, func, secpolicy, +	    POOL_NAME, log_history, pool_check); +} + +static void +zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, +    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check) +{ +	zfs_ioctl_register_legacy(ioc, func, secpolicy, +	    DATASET_NAME, B_FALSE, pool_check); +} + +static void +zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ +	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config, +	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, +    zfs_secpolicy_func_t *secpolicy) +{ +	zfs_ioctl_register_legacy(ioc, func, secpolicy, +	    NO_NAME, B_FALSE, POOL_CHECK_NONE); +} + +static void +zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc, +    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy) +{ +	zfs_ioctl_register_legacy(ioc, func, secpolicy, +	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED); +} + +static void +zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ +	zfs_ioctl_register_dataset_read_secpolicy(ioc, func, +	    zfs_secpolicy_read); +} + +static void +zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, +	zfs_secpolicy_func_t *secpolicy) +{ +	zfs_ioctl_register_legacy(ioc, func, secpolicy, +	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_init(void) +{ +	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT, +	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + +	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY, +	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); + +	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS, +	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, +	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + +	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW, +	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME, +	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + +	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE, +	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME, +	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + +	zfs_ioctl_register("create", ZFS_IOC_CREATE, +	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + +	zfs_ioctl_register("clone", ZFS_IOC_CLONE, +	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + +	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS, +	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + +	/* IOCTLS that use the legacy function signature */ + +	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, +	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY); + +	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create, +	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); +	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN, +	    zfs_ioc_pool_scan); +	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE, +	    zfs_ioc_pool_upgrade); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD, +	    zfs_ioc_vdev_add); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE, +	    zfs_ioc_vdev_remove); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE, +	    zfs_ioc_vdev_set_state); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH, +	    zfs_ioc_vdev_attach); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH, +	    zfs_ioc_vdev_detach); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH, +	    zfs_ioc_vdev_setpath); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU, +	    zfs_ioc_vdev_setfru); +	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS, +	    zfs_ioc_pool_set_props); +	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT, +	    zfs_ioc_vdev_split); +	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID, +	    zfs_ioc_pool_reguid); + +	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS, +	    zfs_ioc_pool_configs, zfs_secpolicy_none); +	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT, +	    zfs_ioc_pool_tryimport, zfs_secpolicy_config); +	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT, +	    zfs_ioc_inject_fault, zfs_secpolicy_inject); +	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT, +	    zfs_ioc_clear_fault, zfs_secpolicy_inject); +	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, +	    zfs_ioc_inject_list_next, zfs_secpolicy_inject); + +	/* +	 * pool destroy, and export don't log the history as part of +	 * zfsdev_ioctl, but rather zfs_ioc_pool_export +	 * does the logging of those commands. +	 */ +	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy, +	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); +	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export, +	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + +	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, +	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); +	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, +	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + +	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, +	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); +	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME, +	    zfs_ioc_dsobj_to_dsname, +	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED); +	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY, +	    zfs_ioc_pool_get_history, +	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED); + +	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import, +	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + +	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear, +	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); +	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, +	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + +	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, +	    zfs_ioc_space_written); +	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS, +	    zfs_ioc_get_holds); +	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, +	    zfs_ioc_objset_recvd_props); +	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, +	    zfs_ioc_next_obj); +	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL, +	    zfs_ioc_get_fsacl); +	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS, +	    zfs_ioc_objset_stats); +	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS, +	    zfs_ioc_objset_zplprops); +	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT, +	    zfs_ioc_dataset_list_next); +	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT, +	    zfs_ioc_snapshot_list_next); +	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS, +	    zfs_ioc_send_progress); + +	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF, +	    zfs_ioc_diff, zfs_secpolicy_diff); +	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS, +	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff); +	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH, +	    zfs_ioc_obj_to_path, zfs_secpolicy_diff); +	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE, +	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one); +	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY, +	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many); +	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND, +	    zfs_ioc_send, zfs_secpolicy_send); + +	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop, +	    zfs_secpolicy_none); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy, +	    zfs_secpolicy_destroy); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback, +	    zfs_secpolicy_rollback); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename, +	    zfs_secpolicy_rename); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv, +	    zfs_secpolicy_recv); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, +	    zfs_secpolicy_promote); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold, +	    zfs_secpolicy_hold); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release, +	    zfs_secpolicy_release); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, +	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); +	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, +	    zfs_secpolicy_set_fsacl); + +	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share, +	    zfs_secpolicy_share, POOL_CHECK_NONE); +	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl, +	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE); +	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE, +	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT, +	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, +	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +}  int  pool_status_check(const char *name, zfs_ioc_namecheck_t type, @@ -5092,67 +5581,145 @@ static int  zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)  {  	zfs_cmd_t *zc; -	uint_t vec; -	int error, rc; +	uint_t vecnum; +	int error, rc, len;  	minor_t minor = getminor(dev); +	const zfs_ioc_vec_t *vec; +	char *saved_poolname = NULL; +	nvlist_t *innvl = NULL;  	if (minor != 0 &&  	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)  		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); -	vec = cmd - ZFS_IOC; +	vecnum = cmd - ZFS_IOC_FIRST;  	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); -	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) +	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))  		return (EINVAL); +	vec = &zfs_ioc_vec[vecnum];  	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);  	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); -	if (error != 0) +	if (error != 0) {  		error = EFAULT; +		goto out; +	} -	if ((error == 0) && !(flag & FKIOCTL)) -		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr); +	zc->zc_iflags = flag & FKIOCTL; +	if (zc->zc_nvlist_src_size != 0) { +		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, +		    zc->zc_iflags, &innvl); +		if (error != 0) +			goto out; +	}  	/*  	 * Ensure that all pool/dataset names are valid before we pass down to  	 * the lower layers.  	 */ -	if (error == 0) { -		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; -		zc->zc_iflags = flag & FKIOCTL; -		switch (zfs_ioc_vec[vec].zvec_namecheck) { -		case POOL_NAME: -			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) -				error = EINVAL; +	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; +	switch (vec->zvec_namecheck) { +	case POOL_NAME: +		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) +			error = EINVAL; +		else  			error = pool_status_check(zc->zc_name, -			    zfs_ioc_vec[vec].zvec_namecheck, -			    zfs_ioc_vec[vec].zvec_pool_check); -			break; +			    vec->zvec_namecheck, vec->zvec_pool_check); +		break; -		case DATASET_NAME: -			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) -				error = EINVAL; +	case DATASET_NAME: +		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) +			error = EINVAL; +		else  			error = pool_status_check(zc->zc_name, -			    zfs_ioc_vec[vec].zvec_namecheck, -			    zfs_ioc_vec[vec].zvec_pool_check); -			break; +			    vec->zvec_namecheck, vec->zvec_pool_check); +		break; -		case NO_NAME: -			break; -		} +	case NO_NAME: +		break;  	} -	if (error == 0) -		error = zfs_ioc_vec[vec].zvec_func(zc); +	if (error == 0 && !(flag & FKIOCTL)) +		error = vec->zvec_secpolicy(zc, innvl, cr); + +	if (error != 0) +		goto out; + +	/* legacy ioctls can modify zc_name */ +	len = strcspn(zc->zc_name, "/@") + 1; +	saved_poolname = kmem_alloc(len, KM_SLEEP); +	(void) strlcpy(saved_poolname, zc->zc_name, len); + +	if (vec->zvec_func != NULL) { +		nvlist_t *outnvl; +		int puterror = 0; +		spa_t *spa; +		nvlist_t *lognv = NULL; + +		ASSERT(vec->zvec_legacy_func == NULL); + +		/* +		 * Add the innvl to the lognv before calling the func, +		 * in case the func changes the innvl. +		 */ +		if (vec->zvec_allow_log) { +			lognv = fnvlist_alloc(); +			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL, +			    vec->zvec_name); +			if (!nvlist_empty(innvl)) { +				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL, +				    innvl); +			} +		} + +		outnvl = fnvlist_alloc(); +		error = vec->zvec_func(zc->zc_name, innvl, outnvl); + +		if (error == 0 && vec->zvec_allow_log && +		    spa_open(zc->zc_name, &spa, FTAG) == 0) { +			if (!nvlist_empty(outnvl)) { +				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, +				    outnvl); +			} +			(void) spa_history_log_nvl(spa, lognv); +			spa_close(spa, FTAG); +		} +		fnvlist_free(lognv); + +		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) { +			int smusherror = 0; +			if (vec->zvec_smush_outnvlist) { +				smusherror = nvlist_smush(outnvl, +				    zc->zc_nvlist_dst_size); +			} +			if (smusherror == 0) +				puterror = put_nvlist(zc, outnvl); +		} + +		if (puterror != 0) +			error = puterror; + +		nvlist_free(outnvl); +	} else { +		error = vec->zvec_legacy_func(zc); +	} + +out: +	nvlist_free(innvl);  	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); -	if (error == 0) { -		if (rc != 0) -			error = EFAULT; -		if (zfs_ioc_vec[vec].zvec_his_log) -			zfs_log_history(zc); +	if (error == 0 && rc != 0) +		error = EFAULT; +	if (error == 0 && vec->zvec_allow_log) { +		char *s = tsd_get(zfs_allow_log_key); +		if (s != NULL) +			strfree(s); +		(void) tsd_set(zfs_allow_log_key, saved_poolname); +	} else { +		if (saved_poolname != NULL) +			strfree(saved_poolname);  	}  	kmem_free(zc, sizeof (zfs_cmd_t)); @@ -5268,9 +5835,12 @@ static struct modlinkage modlinkage = {  	NULL  }; - -uint_t zfs_fsyncer_key; -extern uint_t rrw_tsd_key; +static void +zfs_allow_log_destroy(void *arg) +{ +	char *poolname = arg; +	strfree(poolname); +}  int  _init(void) @@ -5280,6 +5850,7 @@ _init(void)  	spa_init(FREAD | FWRITE);  	zfs_init();  	zvol_init(); +	zfs_ioctl_init();  	if ((error = mod_install(&modlinkage)) != 0) {  		zvol_fini(); @@ -5289,7 +5860,8 @@ _init(void)  	}  	tsd_create(&zfs_fsyncer_key, NULL); -	tsd_create(&rrw_tsd_key, NULL); +	tsd_create(&rrw_tsd_key, rrw_tsd_destroy); +	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);  	error = ldi_ident_from_mod(&modlinkage, &zfs_li);  	ASSERT(error == 0); diff --git a/uts/common/fs/zfs/zfs_vfsops.c b/uts/common/fs/zfs/zfs_vfsops.c index 14b888b29e03..21ac731c1eb9 100644 --- a/uts/common/fs/zfs/zfs_vfsops.c +++ b/uts/common/fs/zfs/zfs_vfsops.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  /* Portions Copyright 2010 Robert Milkowski */ @@ -2248,9 +2249,8 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)  		sa_register_update_callback(os, zfs_sa_upgrade);  	} -	spa_history_log_internal(LOG_DS_UPGRADE, -	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", -	    zfsvfs->z_version, newvers, dmu_objset_id(os)); +	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, +	    "from %llu to %llu", zfsvfs->z_version, newvers);  	dmu_tx_commit(tx); diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c index df9a16bccb46..edf574e3c9ba 100644 --- a/uts/common/fs/zfs/zvol.c +++ b/uts/common/fs/zfs/zvol.c @@ -24,6 +24,7 @@   * Portions Copyright 2010 Robert Milkowski   *   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved.   */  /* @@ -135,7 +136,7 @@ typedef struct zvol_state {  int zvol_maxphys = DMU_MAX_ACCESS/2;  extern int zfs_set_prop_nvlist(const char *, zprop_source_t, -    nvlist_t *, nvlist_t **); +    nvlist_t *, nvlist_t *);  static int zvol_remove_zv(zvol_state_t *);  static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);  static int zvol_dumpify(zvol_state_t *zv); @@ -1885,7 +1886,7 @@ zvol_dumpify(zvol_state_t *zv)  	if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,  	    8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { -		boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; +		boolean_t resize = (dumpsize > 0);  		if ((error = zvol_dump_init(zv, resize)) != 0) {  			(void) zvol_dump_fini(zv); diff --git a/uts/common/sys/fs/zfs.h b/uts/common/sys/fs/zfs.h index 4cf7bba9fe25..511fa9589817 100644 --- a/uts/common/sys/fs/zfs.h +++ b/uts/common/sys/fs/zfs.h @@ -52,6 +52,16 @@ typedef enum {  	ZFS_TYPE_POOL		= 0x8  } zfs_type_t; +typedef enum dmu_objset_type { +	DMU_OST_NONE, +	DMU_OST_META, +	DMU_OST_ZFS, +	DMU_OST_ZVOL, +	DMU_OST_OTHER,			/* For testing only! */ +	DMU_OST_ANY,			/* Be careful! */ +	DMU_OST_NUMTYPES +} dmu_objset_type_t; +  #define	ZFS_TYPE_DATASET	\  	(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) @@ -744,10 +754,10 @@ typedef struct ddt_histogram {  /*   * /dev/zfs ioctl numbers.   */ -#define	ZFS_IOC		('Z' << 8) -  typedef enum zfs_ioc { -	ZFS_IOC_POOL_CREATE = ZFS_IOC, +	ZFS_IOC_FIRST =	('Z' << 8), +	ZFS_IOC = ZFS_IOC_FIRST, +	ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST,  	ZFS_IOC_POOL_DESTROY,  	ZFS_IOC_POOL_IMPORT,  	ZFS_IOC_POOL_EXPORT, @@ -806,10 +816,15 @@ typedef enum zfs_ioc {  	ZFS_IOC_OBJ_TO_STATS,  	ZFS_IOC_SPACE_WRITTEN,  	ZFS_IOC_SPACE_SNAPS, -	ZFS_IOC_DESTROY_SNAPS_NVL, +	ZFS_IOC_DESTROY_SNAPS,  	ZFS_IOC_POOL_REGUID,  	ZFS_IOC_POOL_REOPEN, -	ZFS_IOC_SEND_PROGRESS +	ZFS_IOC_SEND_PROGRESS, +	ZFS_IOC_LOG_HISTORY, +	ZFS_IOC_SEND_NEW, +	ZFS_IOC_SEND_SPACE, +	ZFS_IOC_CLONE, +	ZFS_IOC_LAST  } zfs_ioc_t;  /* @@ -846,6 +861,12 @@ typedef enum {  #define	ZPOOL_HIST_TXG		"history txg"  #define	ZPOOL_HIST_INT_EVENT	"history internal event"  #define	ZPOOL_HIST_INT_STR	"history internal str" +#define	ZPOOL_HIST_INT_NAME	"internal_name" +#define	ZPOOL_HIST_IOCTL	"ioctl" +#define	ZPOOL_HIST_INPUT_NVL	"in_nvl" +#define	ZPOOL_HIST_OUTPUT_NVL	"out_nvl" +#define	ZPOOL_HIST_DSNAME	"dsname" +#define	ZPOOL_HIST_DSID		"dsid"  /*   * Flags for ZFS_IOC_VDEV_SET_STATE @@ -891,56 +912,6 @@ typedef enum {  #define	ZFS_EV_VDEV_PATH	"vdev_path"  #define	ZFS_EV_VDEV_GUID	"vdev_guid" -/* - * Note: This is encoded on-disk, so new events must be added to the - * end, and unused events can not be removed.  Be sure to edit - * libzfs_pool.c: hist_event_table[]. - */ -typedef enum history_internal_events { -	LOG_NO_EVENT = 0, -	LOG_POOL_CREATE, -	LOG_POOL_VDEV_ADD, -	LOG_POOL_REMOVE, -	LOG_POOL_DESTROY, -	LOG_POOL_EXPORT, -	LOG_POOL_IMPORT, -	LOG_POOL_VDEV_ATTACH, -	LOG_POOL_VDEV_REPLACE, -	LOG_POOL_VDEV_DETACH, -	LOG_POOL_VDEV_ONLINE, -	LOG_POOL_VDEV_OFFLINE, -	LOG_POOL_UPGRADE, -	LOG_POOL_CLEAR, -	LOG_POOL_SCAN, -	LOG_POOL_PROPSET, -	LOG_DS_CREATE, -	LOG_DS_CLONE, -	LOG_DS_DESTROY, -	LOG_DS_DESTROY_BEGIN, -	LOG_DS_INHERIT, -	LOG_DS_PROPSET, -	LOG_DS_QUOTA, -	LOG_DS_PERM_UPDATE, -	LOG_DS_PERM_REMOVE, -	LOG_DS_PERM_WHO_REMOVE, -	LOG_DS_PROMOTE, -	LOG_DS_RECEIVE, -	LOG_DS_RENAME, -	LOG_DS_RESERVATION, -	LOG_DS_REPLAY_INC_SYNC, -	LOG_DS_REPLAY_FULL_SYNC, -	LOG_DS_ROLLBACK, -	LOG_DS_SNAPSHOT, -	LOG_DS_UPGRADE, -	LOG_DS_REFQUOTA, -	LOG_DS_REFRESERV, -	LOG_POOL_SCAN_DONE, -	LOG_DS_USER_HOLD, -	LOG_DS_USER_RELEASE, -	LOG_POOL_SPLIT, -	LOG_END -} history_internal_events_t; -  #ifdef	__cplusplus  }  #endif  | 
