summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--OPENSOLARIS.LICENSE384
-rw-r--r--common/acl/acl_common.c117
-rw-r--r--common/acl/acl_common.h20
-rw-r--r--common/list/list.c251
-rw-r--r--common/nvpair/fnvpair.c496
-rw-r--r--common/zfs/zfeature_common.c156
-rw-r--r--common/zfs/zfeature_common.h71
-rw-r--r--common/zfs/zfs_comutil.c7
-rw-r--r--common/zfs/zfs_comutil.h4
-rw-r--r--common/zfs/zfs_deleg.c3
-rw-r--r--common/zfs/zfs_deleg.h2
-rw-r--r--common/zfs/zfs_prop.c39
-rw-r--r--common/zfs/zpool_prop.c28
-rw-r--r--common/zfs/zprop_common.c6
-rw-r--r--uts/common/Makefile.files51
-rw-r--r--uts/common/dtrace/dcpc.c1218
-rw-r--r--uts/common/dtrace/dtrace.c665
-rw-r--r--uts/common/dtrace/fasttrap.c23
-rw-r--r--uts/common/dtrace/profile.c25
-rw-r--r--uts/common/fs/zfs/arc.c63
-rw-r--r--uts/common/fs/zfs/bpobj.c7
-rw-r--r--uts/common/fs/zfs/bptree.c224
-rw-r--r--uts/common/fs/zfs/dbuf.c23
-rw-r--r--uts/common/fs/zfs/ddt.c9
-rw-r--r--uts/common/fs/zfs/dmu.c126
-rw-r--r--uts/common/fs/zfs/dmu_objset.c297
-rw-r--r--uts/common/fs/zfs/dmu_send.c521
-rw-r--r--uts/common/fs/zfs/dmu_traverse.c107
-rw-r--r--uts/common/fs/zfs/dmu_tx.c27
-rw-r--r--uts/common/fs/zfs/dnode.c19
-rw-r--r--uts/common/fs/zfs/dnode_sync.c4
-rw-r--r--uts/common/fs/zfs/dsl_dataset.c607
-rw-r--r--uts/common/fs/zfs/dsl_deadlist.c30
-rw-r--r--uts/common/fs/zfs/dsl_deleg.c25
-rw-r--r--uts/common/fs/zfs/dsl_dir.c72
-rw-r--r--uts/common/fs/zfs/dsl_pool.c59
-rw-r--r--uts/common/fs/zfs/dsl_prop.c27
-rw-r--r--uts/common/fs/zfs/dsl_scan.c136
-rw-r--r--uts/common/fs/zfs/dsl_synctask.c18
-rw-r--r--uts/common/fs/zfs/metaslab.c105
-rw-r--r--uts/common/fs/zfs/rrwlock.c13
-rw-r--r--uts/common/fs/zfs/sa.c21
-rw-r--r--uts/common/fs/zfs/spa.c640
-rw-r--r--uts/common/fs/zfs/spa_config.c13
-rw-r--r--uts/common/fs/zfs/spa_history.c219
-rw-r--r--uts/common/fs/zfs/spa_misc.c78
-rw-r--r--uts/common/fs/zfs/sys/bptree.h64
-rw-r--r--uts/common/fs/zfs/sys/dmu.h120
-rw-r--r--uts/common/fs/zfs/sys/dmu_impl.h28
-rw-r--r--uts/common/fs/zfs/sys/dmu_objset.h11
-rw-r--r--uts/common/fs/zfs/sys/dmu_traverse.h4
-rw-r--r--uts/common/fs/zfs/sys/dsl_dataset.h23
-rw-r--r--uts/common/fs/zfs/sys/dsl_deleg.h1
-rw-r--r--uts/common/fs/zfs/sys/dsl_pool.h9
-rw-r--r--uts/common/fs/zfs/sys/dsl_prop.h3
-rw-r--r--uts/common/fs/zfs/sys/dsl_scan.h4
-rw-r--r--uts/common/fs/zfs/sys/metaslab.h3
-rw-r--r--uts/common/fs/zfs/sys/metaslab_impl.h2
-rw-r--r--uts/common/fs/zfs/sys/rrwlock.h6
-rw-r--r--uts/common/fs/zfs/sys/spa.h58
-rw-r--r--uts/common/fs/zfs/sys/spa_impl.h14
-rw-r--r--uts/common/fs/zfs/sys/vdev.h8
-rw-r--r--uts/common/fs/zfs/sys/vdev_impl.h9
-rw-r--r--uts/common/fs/zfs/sys/zap.h9
-rw-r--r--uts/common/fs/zfs/sys/zfeature.h52
-rw-r--r--uts/common/fs/zfs/sys/zfs_acl.h2
-rw-r--r--uts/common/fs/zfs/sys/zfs_context.h4
-rw-r--r--uts/common/fs/zfs/sys/zfs_ioctl.h31
-rw-r--r--uts/common/fs/zfs/sys/zfs_vfsops.h1
-rw-r--r--uts/common/fs/zfs/sys/zio.h20
-rw-r--r--uts/common/fs/zfs/txg.c3
-rw-r--r--uts/common/fs/zfs/vdev.c59
-rw-r--r--uts/common/fs/zfs/vdev_cache.c9
-rw-r--r--uts/common/fs/zfs/vdev_disk.c64
-rw-r--r--uts/common/fs/zfs/vdev_file.c6
-rw-r--r--uts/common/fs/zfs/vdev_label.c103
-rw-r--r--uts/common/fs/zfs/vdev_mirror.c8
-rw-r--r--uts/common/fs/zfs/vdev_missing.c8
-rw-r--r--uts/common/fs/zfs/vdev_raidz.c6
-rw-r--r--uts/common/fs/zfs/vdev_root.c8
-rw-r--r--uts/common/fs/zfs/zap.c14
-rw-r--r--uts/common/fs/zfs/zap_micro.c7
-rw-r--r--uts/common/fs/zfs/zfeature.c414
-rw-r--r--uts/common/fs/zfs/zfs_acl.c155
-rw-r--r--uts/common/fs/zfs/zfs_ctldir.c4
-rw-r--r--uts/common/fs/zfs/zfs_fm.c8
-rw-r--r--uts/common/fs/zfs/zfs_ioctl.c1794
-rw-r--r--uts/common/fs/zfs/zfs_vfsops.c20
-rw-r--r--uts/common/fs/zfs/zfs_vnops.c11
-rw-r--r--uts/common/fs/zfs/zil.c41
-rw-r--r--uts/common/fs/zfs/zio.c105
-rw-r--r--uts/common/fs/zfs/zvol.c117
-rw-r--r--uts/common/os/fm.c2
-rw-r--r--uts/common/sys/ccompile.h26
-rw-r--r--uts/common/sys/cmn_err.h21
-rw-r--r--uts/common/sys/dtrace.h66
-rw-r--r--uts/common/sys/dtrace_impl.h17
-rw-r--r--uts/common/sys/feature_tests.h4
-rw-r--r--uts/common/sys/fs/zfs.h123
-rw-r--r--uts/common/sys/nvpair.h69
-rw-r--r--uts/common/sys/sysevent/eventdefs.h2
-rw-r--r--uts/common/sys/sysmacros.h14
-rw-r--r--uts/common/zmod/crc32.c428
103 files changed, 3221 insertions, 8057 deletions
diff --git a/OPENSOLARIS.LICENSE b/OPENSOLARIS.LICENSE
new file mode 100644
index 000000000000..da23621dc843
--- /dev/null
+++ b/OPENSOLARIS.LICENSE
@@ -0,0 +1,384 @@
+Unless otherwise noted, all files in this distribution are released
+under the Common Development and Distribution License (CDDL).
+Exceptions are noted within the associated source files.
+
+--------------------------------------------------------------------
+
+
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0
+
+1. Definitions.
+
+ 1.1. "Contributor" means each individual or entity that creates
+ or contributes to the creation of Modifications.
+
+ 1.2. "Contributor Version" means the combination of the Original
+ Software, prior Modifications used by a Contributor (if any),
+ and the Modifications made by that particular Contributor.
+
+ 1.3. "Covered Software" means (a) the Original Software, or (b)
+ Modifications, or (c) the combination of files containing
+ Original Software with files containing Modifications, in
+ each case including portions thereof.
+
+ 1.4. "Executable" means the Covered Software in any form other
+ than Source Code.
+
+ 1.5. "Initial Developer" means the individual or entity that first
+ makes Original Software available under this License.
+
+ 1.6. "Larger Work" means a work which combines Covered Software or
+ portions thereof with code not governed by the terms of this
+ License.
+
+ 1.7. "License" means this document.
+
+ 1.8. "Licensable" means having the right to grant, to the maximum
+ extent possible, whether at the time of the initial grant or
+ subsequently acquired, any and all of the rights conveyed
+ herein.
+
+ 1.9. "Modifications" means the Source Code and Executable form of
+ any of the following:
+
+ A. Any file that results from an addition to, deletion from or
+ modification of the contents of a file containing Original
+ Software or previous Modifications;
+
+ B. Any new file that contains any part of the Original
+ Software or previous Modifications; or
+
+ C. Any new file that is contributed or otherwise made
+ available under the terms of this License.
+
+ 1.10. "Original Software" means the Source Code and Executable
+ form of computer software code that is originally released
+ under this License.
+
+ 1.11. "Patent Claims" means any patent claim(s), now owned or
+ hereafter acquired, including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by
+ grantor.
+
+ 1.12. "Source Code" means (a) the common form of computer software
+ code in which modifications are made and (b) associated
+ documentation included in or with such code.
+
+ 1.13. "You" (or "Your") means an individual or a legal entity
+ exercising rights under, and complying with all of the terms
+ of, this License. For legal entities, "You" includes any
+ entity which controls, is controlled by, or is under common
+ control with You. For purposes of this definition,
+ "control" means (a) the power, direct or indirect, to cause
+ the direction or management of such entity, whether by
+ contract or otherwise, or (b) ownership of more than fifty
+ percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants.
+
+ 2.1. The Initial Developer Grant.
+
+ Conditioned upon Your compliance with Section 3.1 below and
+ subject to third party intellectual property claims, the Initial
+ Developer hereby grants You a world-wide, royalty-free,
+ non-exclusive license:
+
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Initial Developer, to use,
+ reproduce, modify, display, perform, sublicense and
+ distribute the Original Software (or portions thereof),
+ with or without Modifications, and/or as part of a Larger
+ Work; and
+
+ (b) under Patent Claims infringed by the making, using or
+ selling of Original Software, to make, have made, use,
+ practice, sell, and offer for sale, and/or otherwise
+ dispose of the Original Software (or portions thereof).
+
+ (c) The licenses granted in Sections 2.1(a) and (b) are
+ effective on the date Initial Developer first distributes
+ or otherwise makes the Original Software available to a
+ third party under the terms of this License.
+
+ (d) Notwithstanding Section 2.1(b) above, no patent license is
+ granted: (1) for code that You delete from the Original
+ Software, or (2) for infringements caused by: (i) the
+ modification of the Original Software, or (ii) the
+ combination of the Original Software with other software
+ or devices.
+
+ 2.2. Contributor Grant.
+
+ Conditioned upon Your compliance with Section 3.1 below and
+ subject to third party intellectual property claims, each
+ Contributor hereby grants You a world-wide, royalty-free,
+ non-exclusive license:
+
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Contributor to use, reproduce,
+ modify, display, perform, sublicense and distribute the
+ Modifications created by such Contributor (or portions
+ thereof), either on an unmodified basis, with other
+ Modifications, as Covered Software and/or as part of a
+ Larger Work; and
+
+ (b) under Patent Claims infringed by the making, using, or
+ selling of Modifications made by that Contributor either
+ alone and/or in combination with its Contributor Version
+ (or portions of such combination), to make, use, sell,
+ offer for sale, have made, and/or otherwise dispose of:
+ (1) Modifications made by that Contributor (or portions
+ thereof); and (2) the combination of Modifications made by
+ that Contributor with its Contributor Version (or portions
+ of such combination).
+
+ (c) The licenses granted in Sections 2.2(a) and 2.2(b) are
+ effective on the date Contributor first distributes or
+ otherwise makes the Modifications available to a third
+ party.
+
+ (d) Notwithstanding Section 2.2(b) above, no patent license is
+ granted: (1) for any code that Contributor has deleted
+ from the Contributor Version; (2) for infringements caused
+ by: (i) third party modifications of Contributor Version,
+ or (ii) the combination of Modifications made by that
+ Contributor with other software (except as part of the
+ Contributor Version) or other devices; or (3) under Patent
+ Claims infringed by Covered Software in the absence of
+ Modifications made by that Contributor.
+
+3. Distribution Obligations.
+
+ 3.1. Availability of Source Code.
+
+ Any Covered Software that You distribute or otherwise make
+ available in Executable form must also be made available in Source
+ Code form and that Source Code form must be distributed only under
+ the terms of this License. You must include a copy of this
+ License with every copy of the Source Code form of the Covered
+ Software You distribute or otherwise make available. You must
+ inform recipients of any such Covered Software in Executable form
+ as to how they can obtain such Covered Software in Source Code
+ form in a reasonable manner on or through a medium customarily
+ used for software exchange.
+
+ 3.2. Modifications.
+
+ The Modifications that You create or to which You contribute are
+ governed by the terms of this License. You represent that You
+ believe Your Modifications are Your original creation(s) and/or
+ You have sufficient rights to grant the rights conveyed by this
+ License.
+
+ 3.3. Required Notices.
+
+ You must include a notice in each of Your Modifications that
+ identifies You as the Contributor of the Modification. You may
+ not remove or alter any copyright, patent or trademark notices
+ contained within the Covered Software, or any notices of licensing
+ or any descriptive text giving attribution to any Contributor or
+ the Initial Developer.
+
+ 3.4. Application of Additional Terms.
+
+ You may not offer or impose any terms on any Covered Software in
+ Source Code form that alters or restricts the applicable version
+ of this License or the recipients' rights hereunder. You may
+ choose to offer, and to charge a fee for, warranty, support,
+ indemnity or liability obligations to one or more recipients of
+ Covered Software. However, you may do so only on Your own behalf,
+ and not on behalf of the Initial Developer or any Contributor.
+ You must make it absolutely clear that any such warranty, support,
+ indemnity or liability obligation is offered by You alone, and You
+ hereby agree to indemnify the Initial Developer and every
+ Contributor for any liability incurred by the Initial Developer or
+ such Contributor as a result of warranty, support, indemnity or
+ liability terms You offer.
+
+ 3.5. Distribution of Executable Versions.
+
+ You may distribute the Executable form of the Covered Software
+ under the terms of this License or under the terms of a license of
+ Your choice, which may contain terms different from this License,
+ provided that You are in compliance with the terms of this License
+ and that the license for the Executable form does not attempt to
+ limit or alter the recipient's rights in the Source Code form from
+ the rights set forth in this License. If You distribute the
+ Covered Software in Executable form under a different license, You
+ must make it absolutely clear that any terms which differ from
+ this License are offered by You alone, not by the Initial
+ Developer or Contributor. You hereby agree to indemnify the
+ Initial Developer and every Contributor for any liability incurred
+ by the Initial Developer or such Contributor as a result of any
+ such terms You offer.
+
+ 3.6. Larger Works.
+
+ You may create a Larger Work by combining Covered Software with
+ other code not governed by the terms of this License and
+ distribute the Larger Work as a single product. In such a case,
+ You must make sure the requirements of this License are fulfilled
+ for the Covered Software.
+
+4. Versions of the License.
+
+ 4.1. New Versions.
+
+ Sun Microsystems, Inc. is the initial license steward and may
+ publish revised and/or new versions of this License from time to
+ time. Each version will be given a distinguishing version number.
+ Except as provided in Section 4.3, no one other than the license
+ steward has the right to modify this License.
+
+ 4.2. Effect of New Versions.
+
+ You may always continue to use, distribute or otherwise make the
+ Covered Software available under the terms of the version of the
+ License under which You originally received the Covered Software.
+ If the Initial Developer includes a notice in the Original
+ Software prohibiting it from being distributed or otherwise made
+ available under any subsequent version of the License, You must
+ distribute and make the Covered Software available under the terms
+ of the version of the License under which You originally received
+ the Covered Software. Otherwise, You may also choose to use,
+ distribute or otherwise make the Covered Software available under
+ the terms of any subsequent version of the License published by
+ the license steward.
+
+ 4.3. Modified Versions.
+
+ When You are an Initial Developer and You want to create a new
+ license for Your Original Software, You may create and use a
+ modified version of this License if You: (a) rename the license
+ and remove any references to the name of the license steward
+ (except to note that the license differs from this License); and
+ (b) otherwise make it clear that the license contains terms which
+ differ from this License.
+
+5. DISCLAIMER OF WARRANTY.
+
+ COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS"
+ BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
+ INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED
+ SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
+ PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
+ PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY
+ COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
+ INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY
+ NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
+ WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
+ ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
+ DISCLAIMER.
+
+6. TERMINATION.
+
+ 6.1. This License and the rights granted hereunder will terminate
+ automatically if You fail to comply with terms herein and fail to
+ cure such breach within 30 days of becoming aware of the breach.
+ Provisions which, by their nature, must remain in effect beyond
+ the termination of this License shall survive.
+
+ 6.2. If You assert a patent infringement claim (excluding
+ declaratory judgment actions) against Initial Developer or a
+ Contributor (the Initial Developer or Contributor against whom You
+ assert such claim is referred to as "Participant") alleging that
+ the Participant Software (meaning the Contributor Version where
+ the Participant is a Contributor or the Original Software where
+ the Participant is the Initial Developer) directly or indirectly
+ infringes any patent, then any and all rights granted directly or
+ indirectly to You by such Participant, the Initial Developer (if
+ the Initial Developer is not the Participant) and all Contributors
+ under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
+ notice from Participant terminate prospectively and automatically
+ at the expiration of such 60 day notice period, unless if within
+ such 60 day period You withdraw Your claim with respect to the
+ Participant Software against such Participant either unilaterally
+ or pursuant to a written agreement with Participant.
+
+ 6.3. In the event of termination under Sections 6.1 or 6.2 above,
+ all end user licenses that have been validly granted by You or any
+ distributor hereunder prior to termination (excluding licenses
+ granted to You by any distributor) shall survive termination.
+
+7. LIMITATION OF LIABILITY.
+
+ UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+ (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
+ INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
+ COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE
+ LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
+ CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
+ LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK
+ STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
+ COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
+ INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
+ LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
+ INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT
+ APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
+ NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR
+ CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT
+ APPLY TO YOU.
+
+8. U.S. GOVERNMENT END USERS.
+
+ The Covered Software is a "commercial item," as that term is
+ defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial
+ computer software" (as that term is defined at 48
+ C.F.R. 252.227-7014(a)(1)) and "commercial computer software
+ documentation" as such terms are used in 48 C.F.R. 12.212
+ (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48
+ C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all
+ U.S. Government End Users acquire Covered Software with only those
+ rights set forth herein. This U.S. Government Rights clause is in
+ lieu of, and supersedes, any other FAR, DFAR, or other clause or
+ provision that addresses Government rights in computer software
+ under this License.
+
+9. MISCELLANEOUS.
+
+ This License represents the complete agreement concerning subject
+ matter hereof. If any provision of this License is held to be
+ unenforceable, such provision shall be reformed only to the extent
+ necessary to make it enforceable. This License shall be governed
+ by the law of the jurisdiction specified in a notice contained
+ within the Original Software (except to the extent applicable law,
+ if any, provides otherwise), excluding such jurisdiction's
+ conflict-of-law provisions. Any litigation relating to this
+ License shall be subject to the jurisdiction of the courts located
+ in the jurisdiction and venue specified in a notice contained
+ within the Original Software, with the losing party responsible
+ for costs, including, without limitation, court costs and
+ reasonable attorneys' fees and expenses. The application of the
+ United Nations Convention on Contracts for the International Sale
+ of Goods is expressly excluded. Any law or regulation which
+ provides that the language of a contract shall be construed
+ against the drafter shall not apply to this License. You agree
+ that You alone are responsible for compliance with the United
+ States export administration regulations (and the export control
+ laws and regulation of any other countries) when You use,
+ distribute or otherwise make available any Covered Software.
+
+10. RESPONSIBILITY FOR CLAIMS.
+
+ As between Initial Developer and the Contributors, each party is
+ responsible for claims and damages arising, directly or
+ indirectly, out of its utilization of rights under this License
+ and You agree to work with Initial Developer and Contributors to
+ distribute such responsibility on an equitable basis. Nothing
+ herein is intended or shall be deemed to constitute any admission
+ of liability.
+
+--------------------------------------------------------------------
+
+NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND
+DISTRIBUTION LICENSE (CDDL)
+
+For Covered Software in this distribution, this License shall
+be governed by the laws of the State of California (excluding
+conflict-of-law provisions).
+
+Any litigation relating to this License shall be subject to the
+jurisdiction of the Federal Courts of the Northern District of
+California and the state courts of the State of California, with
+venue lying in Santa Clara County, California.
diff --git a/common/acl/acl_common.c b/common/acl/acl_common.c
index 494c5f73f4b7..eafc47d10f2d 100644
--- a/common/acl/acl_common.c
+++ b/common/acl/acl_common.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -373,7 +372,7 @@ access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow)
* by nfsace, assuming aclent_t -> nfsace semantics.
*/
static uint32_t
-mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow)
+mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow)
{
uint32_t access = 0;
int haswriteperm = 0;
@@ -416,7 +415,7 @@ mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow)
access |= ACE_DELETE_CHILD;
}
/* exec */
- if (mode & S_IXOTH) {
+ if (mode & 01) {
access |= ACE_EXECUTE;
}
@@ -667,7 +666,7 @@ out:
}
static int
-convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir,
+convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir,
ace_t **retacep, int *retacecnt)
{
ace_t *acep;
@@ -693,7 +692,7 @@ convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir,
dfaclcnt = aclcnt - i;
}
- if (dfaclcnt && !isdir) {
+ if (dfaclcnt && isdir == 0) {
return (EINVAL);
}
@@ -731,7 +730,7 @@ convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir,
}
static int
-ace_mask_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir)
+ace_mask_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
{
int error = 0;
o_mode_t mode = 0;
@@ -1028,7 +1027,7 @@ out:
}
static int
-ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir)
+ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
{
/* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */
if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) !=
@@ -1041,7 +1040,7 @@ ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir)
static int
acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list,
- uid_t owner, gid_t group, boolean_t isdir)
+ uid_t owner, gid_t group, int isdir)
{
int error;
uint32_t flips = ACE_POSIX_SUPPORTED_BITS;
@@ -1081,7 +1080,7 @@ out:
static int
ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt,
- uid_t owner, gid_t group, boolean_t isdir)
+ uid_t owner, gid_t group, int isdir)
{
int error = 0;
aclent_t *aent, *result = NULL;
@@ -1261,7 +1260,7 @@ acevals_compare(const void *va, const void *vb)
static int
ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group,
aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt,
- boolean_t isdir)
+ int isdir)
{
int error = 0;
ace_t *acep;
@@ -1456,7 +1455,7 @@ out:
}
static int
-convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir,
+convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir,
uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt)
{
int error = 0;
@@ -1498,7 +1497,7 @@ convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir,
int
-acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, uid_t owner,
+acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner,
gid_t group)
{
int aclcnt;
@@ -1569,105 +1568,101 @@ out:
}
void
-acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
+acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
+ uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
{
- uint32_t read_mask = ACE_READ_DATA;
- uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA;
- uint32_t execute_mask = ACE_EXECUTE;
+ *deny1 = *deny2 = *allow0 = *group = 0;
- (void) isdir; /* will need this later */
-
- masks->deny1 = 0;
if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
- masks->deny1 |= read_mask;
+ *deny1 |= ACE_READ_DATA;
if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
- masks->deny1 |= write_mask;
+ *deny1 |= ACE_WRITE_DATA;
if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
- masks->deny1 |= execute_mask;
+ *deny1 |= ACE_EXECUTE;
- masks->deny2 = 0;
if (!(mode & S_IRGRP) && (mode & S_IROTH))
- masks->deny2 |= read_mask;
+ *deny2 = ACE_READ_DATA;
if (!(mode & S_IWGRP) && (mode & S_IWOTH))
- masks->deny2 |= write_mask;
+ *deny2 |= ACE_WRITE_DATA;
if (!(mode & S_IXGRP) && (mode & S_IXOTH))
- masks->deny2 |= execute_mask;
+ *deny2 |= ACE_EXECUTE;
- masks->allow0 = 0;
if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
- masks->allow0 |= read_mask;
+ *allow0 |= ACE_READ_DATA;
if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
- masks->allow0 |= write_mask;
+ *allow0 |= ACE_WRITE_DATA;
if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
- masks->allow0 |= execute_mask;
+ *allow0 |= ACE_EXECUTE;
- masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
+ *owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
if (mode & S_IRUSR)
- masks->owner |= read_mask;
+ *owner |= ACE_READ_DATA;
if (mode & S_IWUSR)
- masks->owner |= write_mask;
+ *owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
if (mode & S_IXUSR)
- masks->owner |= execute_mask;
+ *owner |= ACE_EXECUTE;
- masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+ *group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
ACE_SYNCHRONIZE;
if (mode & S_IRGRP)
- masks->group |= read_mask;
+ *group |= ACE_READ_DATA;
if (mode & S_IWGRP)
- masks->group |= write_mask;
+ *group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
if (mode & S_IXGRP)
- masks->group |= execute_mask;
+ *group |= ACE_EXECUTE;
- masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+ *everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
ACE_SYNCHRONIZE;
if (mode & S_IROTH)
- masks->everyone |= read_mask;
+ *everyone |= ACE_READ_DATA;
if (mode & S_IWOTH)
- masks->everyone |= write_mask;
+ *everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
if (mode & S_IXOTH)
- masks->everyone |= execute_mask;
+ *everyone |= ACE_EXECUTE;
}
int
-acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count)
+acl_trivial_create(mode_t mode, ace_t **acl, int *count)
{
+ uint32_t deny1, deny2;
+ uint32_t allow0;
+ uint32_t owner, group, everyone;
int index = 0;
int error;
- trivial_acl_t masks;
*count = 3;
- acl_trivial_access_masks(mode, isdir, &masks);
+ acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group,
+ &everyone);
- if (masks.allow0)
+ if (allow0)
(*count)++;
- if (masks.deny1)
+ if (deny1)
(*count)++;
- if (masks.deny2)
+ if (deny2)
(*count)++;
if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0)
return (error);
- if (masks.allow0) {
- SET_ACE(acl, index, -1, masks.allow0,
- ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
+ if (allow0) {
+ SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE,
+ ACE_OWNER);
}
- if (masks.deny1) {
- SET_ACE(acl, index, -1, masks.deny1,
- ACE_ACCESS_DENIED_ACE_TYPE, ACE_OWNER);
+ if (deny1) {
+ SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE,
+ ACE_OWNER);
}
- if (masks.deny2) {
- SET_ACE(acl, index, -1, masks.deny2,
- ACE_ACCESS_DENIED_ACE_TYPE, ACE_GROUP|ACE_IDENTIFIER_GROUP);
+ if (deny2) {
+ SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE,
+ ACE_GROUP|ACE_IDENTIFIER_GROUP);
}
- SET_ACE(acl, index, -1, masks.owner, ACE_ACCESS_ALLOWED_ACE_TYPE,
- ACE_OWNER);
- SET_ACE(acl, index, -1, masks.group, ACE_ACCESS_ALLOWED_ACE_TYPE,
+ SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
+ SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE,
ACE_IDENTIFIER_GROUP|ACE_GROUP);
- SET_ACE(acl, index, -1, masks.everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
+ SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
ACE_EVERYONE);
return (0);
diff --git a/common/acl/acl_common.h b/common/acl/acl_common.h
index be4fd0c9e1d3..f76cbd3b450f 100644
--- a/common/acl/acl_common.h
+++ b/common/acl/acl_common.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _ACL_COMMON_H
@@ -34,14 +33,7 @@
extern "C" {
#endif
-typedef struct trivial_acl {
- uint32_t allow0; /* allow mask for bits only in owner */
- uint32_t deny1; /* deny mask for bits not in owner */
- uint32_t deny2; /* deny mask for bits not in group */
- uint32_t owner; /* allow mask matching mode */
- uint32_t group; /* allow mask matching mode */
- uint32_t everyone; /* allow mask matching mode */
-} trivial_acl_t;
+extern ace_t trivial_acl[6];
extern int acltrivial(const char *);
extern void adjust_ace_pair(ace_t *pair, mode_t mode);
@@ -52,13 +44,13 @@ extern int ace_trivial_common(void *, int,
uint32_t *mask));
extern acl_t *acl_alloc(acl_type_t);
extern void acl_free(acl_t *aclp);
-extern int acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir,
- uid_t owner, gid_t group);
+extern int acl_translate(acl_t *aclp, int target_flavor,
+ int isdir, uid_t owner, gid_t group);
void ksort(caddr_t v, int n, int s, int (*f)());
int cmp2acls(void *a, void *b);
-int acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count);
-void acl_trivial_access_masks(mode_t mode, boolean_t isdir,
- trivial_acl_t *masks);
+int acl_trivial_create(mode_t mode, ace_t **acl, int *count);
+void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
+ uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone);
#ifdef __cplusplus
}
diff --git a/common/list/list.c b/common/list/list.c
new file mode 100644
index 000000000000..94f7782a87d2
--- /dev/null
+++ b/common/list/list.c
@@ -0,0 +1,251 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/list.h>
+#include <sys/list_impl.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#ifdef _KERNEL
+#include <sys/debug.h>
+#else
+#include <assert.h>
+#define ASSERT(a) assert(a)
+#endif
+
+#ifdef lint
+extern list_node_t *list_d2l(list_t *list, void *obj);
+#else
+#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#endif
+#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+#define list_insert_after_node(list, node, object) { \
+ list_node_t *lnew = list_d2l(list, object); \
+ lnew->list_prev = (node); \
+ lnew->list_next = (node)->list_next; \
+ (node)->list_next->list_prev = lnew; \
+ (node)->list_next = lnew; \
+}
+
+#define list_insert_before_node(list, node, object) { \
+ list_node_t *lnew = list_d2l(list, object); \
+ lnew->list_next = (node); \
+ lnew->list_prev = (node)->list_prev; \
+ (node)->list_prev->list_next = lnew; \
+ (node)->list_prev = lnew; \
+}
+
+#define list_remove_node(node) \
+ (node)->list_prev->list_next = (node)->list_next; \
+ (node)->list_next->list_prev = (node)->list_prev; \
+ (node)->list_next = (node)->list_prev = NULL
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+ ASSERT(list);
+ ASSERT(size > 0);
+ ASSERT(size >= offset + sizeof (list_node_t));
+
+ list->list_size = size;
+ list->list_offset = offset;
+ list->list_head.list_next = list->list_head.list_prev =
+ &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+ list_node_t *node = &list->list_head;
+
+ ASSERT(list);
+ ASSERT(list->list_head.list_next == node);
+ ASSERT(list->list_head.list_prev == node);
+
+ node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+ if (object == NULL) {
+ list_insert_head(list, nobject);
+ } else {
+ list_node_t *lold = list_d2l(list, object);
+ list_insert_after_node(list, lold, nobject);
+ }
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+ if (object == NULL) {
+ list_insert_tail(list, nobject);
+ } else {
+ list_node_t *lold = list_d2l(list, object);
+ list_insert_before_node(list, lold, nobject);
+ }
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+ list_node_t *lold = &list->list_head;
+ list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+ list_node_t *lold = &list->list_head;
+ list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+ list_node_t *lold = list_d2l(list, object);
+ ASSERT(!list_empty(list));
+ ASSERT(lold->list_next != NULL);
+ list_remove_node(lold);
+}
+
+void *
+list_remove_head(list_t *list)
+{
+ list_node_t *head = list->list_head.list_next;
+ if (head == &list->list_head)
+ return (NULL);
+ list_remove_node(head);
+ return (list_object(list, head));
+}
+
+void *
+list_remove_tail(list_t *list)
+{
+ list_node_t *tail = list->list_head.list_prev;
+ if (tail == &list->list_head)
+ return (NULL);
+ list_remove_node(tail);
+ return (list_object(list, tail));
+}
+
+void *
+list_head(list_t *list)
+{
+ if (list_empty(list))
+ return (NULL);
+ return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+ if (list_empty(list))
+ return (NULL);
+ return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+ list_node_t *node = list_d2l(list, object);
+
+ if (node->list_next != &list->list_head)
+ return (list_object(list, node->list_next));
+
+ return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+ list_node_t *node = list_d2l(list, object);
+
+ if (node->list_prev != &list->list_head)
+ return (list_object(list, node->list_prev));
+
+ return (NULL);
+}
+
+/*
+ * Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+ list_node_t *dstnode = &dst->list_head;
+ list_node_t *srcnode = &src->list_head;
+
+ ASSERT(dst->list_size == src->list_size);
+ ASSERT(dst->list_offset == src->list_offset);
+
+ if (list_empty(src))
+ return;
+
+ dstnode->list_prev->list_next = srcnode->list_next;
+ srcnode->list_next->list_prev = dstnode->list_prev;
+ dstnode->list_prev = srcnode->list_prev;
+ srcnode->list_prev->list_next = dstnode;
+
+ /* empty src list */
+ srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+void
+list_link_replace(list_node_t *lold, list_node_t *lnew)
+{
+ ASSERT(list_link_active(lold));
+ ASSERT(!list_link_active(lnew));
+
+ lnew->list_next = lold->list_next;
+ lnew->list_prev = lold->list_prev;
+ lold->list_prev->list_next = lnew;
+ lold->list_next->list_prev = lnew;
+ lold->list_next = lold->list_prev = NULL;
+}
+
+void
+list_link_init(list_node_t *link)
+{
+ link->list_next = NULL;
+ link->list_prev = NULL;
+}
+
+int
+list_link_active(list_node_t *link)
+{
+ return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+ return (list_empty(list));
+}
diff --git a/common/nvpair/fnvpair.c b/common/nvpair/fnvpair.c
deleted file mode 100644
index 8d1bb98be32c..000000000000
--- a/common/nvpair/fnvpair.c
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#include <sys/nvpair.h>
-#include <sys/kmem.h>
-#include <sys/debug.h>
-#ifndef _KERNEL
-#include <stdlib.h>
-#endif
-
-/*
- * "Force" nvlist wrapper.
- *
- * These functions wrap the nvlist_* functions with assertions that assume
- * the operation is successful. This allows the caller's code to be much
- * more readable, especially for the fnvlist_lookup_* and fnvpair_value_*
- * functions, which can return the requested value (rather than filling in
- * a pointer).
- *
- * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate
- * with KM_SLEEP.
- *
- * More wrappers should be added as needed -- for example
- * nvlist_lookup_*_array and nvpair_value_*_array.
- */
-
-nvlist_t *
-fnvlist_alloc(void)
-{
- nvlist_t *nvl;
- VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0);
- return (nvl);
-}
-
-void
-fnvlist_free(nvlist_t *nvl)
-{
- nvlist_free(nvl);
-}
-
-size_t
-fnvlist_size(nvlist_t *nvl)
-{
- size_t size;
- VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0);
- return (size);
-}
-
-/*
- * Returns allocated buffer of size *sizep. Caller must free the buffer with
- * fnvlist_pack_free().
- */
-char *
-fnvlist_pack(nvlist_t *nvl, size_t *sizep)
-{
- char *packed = 0;
- VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE,
- KM_SLEEP), ==, 0);
- return (packed);
-}
-
-/*ARGSUSED*/
-void
-fnvlist_pack_free(char *pack, size_t size)
-{
-#ifdef _KERNEL
- kmem_free(pack, size);
-#else
- free(pack);
-#endif
-}
-
-nvlist_t *
-fnvlist_unpack(char *buf, size_t buflen)
-{
- nvlist_t *rv;
- VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0);
- return (rv);
-}
-
-nvlist_t *
-fnvlist_dup(nvlist_t *nvl)
-{
- nvlist_t *rv;
- VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0);
- return (rv);
-}
-
-void
-fnvlist_merge(nvlist_t *dst, nvlist_t *src)
-{
- VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0);
-}
-
-void
-fnvlist_add_boolean(nvlist_t *nvl, const char *name)
-{
- VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0);
-}
-
-void
-fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
-{
- VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
-{
- VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
-{
- VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
-{
- VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
-{
- VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
-{
- VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
-{
- VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
-{
- VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
-{
- VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
-{
- VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
-{
- VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
-{
- VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0);
-}
-
-void
-fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
-{
- VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0);
-}
-
-void
-fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
- boolean_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
- uint16_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
- uint32_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
- uint64_t *val, uint_t n)
-{
- VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_string_array(nvlist_t *nvl, const char *name,
- char * const *val, uint_t n)
-{
- VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
- nvlist_t **val, uint_t n)
-{
- VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0);
-}
-
-void
-fnvlist_remove(nvlist_t *nvl, const char *name)
-{
- VERIFY3U(nvlist_remove_all(nvl, name), ==, 0);
-}
-
-void
-fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
-{
- VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0);
-}
-
-nvpair_t *
-fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
-{
- nvpair_t *rv;
- VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-/* returns B_TRUE if the entry exists */
-boolean_t
-fnvlist_lookup_boolean(nvlist_t *nvl, const char *name)
-{
- return (nvlist_lookup_boolean(nvl, name) == 0);
-}
-
-boolean_t
-fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
-{
- boolean_t rv;
- VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-uchar_t
-fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
-{
- uchar_t rv;
- VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-int8_t
-fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
-{
- int8_t rv;
- VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-int16_t
-fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
-{
- int16_t rv;
- VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-int32_t
-fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
-{
- int32_t rv;
- VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-int64_t
-fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
-{
- int64_t rv;
- VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-uint8_t
-fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name)
-{
- uint8_t rv;
- VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-uint16_t
-fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
-{
- uint16_t rv;
- VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-uint32_t
-fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
-{
- uint32_t rv;
- VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-uint64_t
-fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
-{
- uint64_t rv;
- VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-char *
-fnvlist_lookup_string(nvlist_t *nvl, const char *name)
-{
- char *rv;
- VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-nvlist_t *
-fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
-{
- nvlist_t *rv;
- VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0);
- return (rv);
-}
-
-boolean_t
-fnvpair_value_boolean_value(nvpair_t *nvp)
-{
- boolean_t rv;
- VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0);
- return (rv);
-}
-
-uchar_t
-fnvpair_value_byte(nvpair_t *nvp)
-{
- uchar_t rv;
- VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0);
- return (rv);
-}
-
-int8_t
-fnvpair_value_int8(nvpair_t *nvp)
-{
- int8_t rv;
- VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0);
- return (rv);
-}
-
-int16_t
-fnvpair_value_int16(nvpair_t *nvp)
-{
- int16_t rv;
- VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0);
- return (rv);
-}
-
-int32_t
-fnvpair_value_int32(nvpair_t *nvp)
-{
- int32_t rv;
- VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0);
- return (rv);
-}
-
-int64_t
-fnvpair_value_int64(nvpair_t *nvp)
-{
- int64_t rv;
- VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0);
- return (rv);
-}
-
-uint8_t
-fnvpair_value_uint8_t(nvpair_t *nvp)
-{
- uint8_t rv;
- VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0);
- return (rv);
-}
-
-uint16_t
-fnvpair_value_uint16(nvpair_t *nvp)
-{
- uint16_t rv;
- VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0);
- return (rv);
-}
-
-uint32_t
-fnvpair_value_uint32(nvpair_t *nvp)
-{
- uint32_t rv;
- VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0);
- return (rv);
-}
-
-uint64_t
-fnvpair_value_uint64(nvpair_t *nvp)
-{
- uint64_t rv;
- VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0);
- return (rv);
-}
-
-char *
-fnvpair_value_string(nvpair_t *nvp)
-{
- char *rv;
- VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0);
- return (rv);
-}
-
-nvlist_t *
-fnvpair_value_nvlist(nvpair_t *nvp)
-{
- nvlist_t *rv;
- VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0);
- return (rv);
-}
diff --git a/common/zfs/zfeature_common.c b/common/zfs/zfeature_common.c
deleted file mode 100644
index 9c0b67b42452..000000000000
--- a/common/zfs/zfeature_common.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <errno.h>
-#include <string.h>
-#endif
-#include <sys/debug.h>
-#include <sys/fs/zfs.h>
-#include <sys/inttypes.h>
-#include <sys/types.h>
-#include "zfeature_common.h"
-
-/*
- * Set to disable all feature checks while opening pools, allowing pools with
- * unsupported features to be opened. Set for testing only.
- */
-boolean_t zfeature_checks_disable = B_FALSE;
-
-zfeature_info_t spa_feature_table[SPA_FEATURES];
-
-/*
- * Valid characters for feature guids. This list is mainly for aesthetic
- * purposes and could be expanded in the future. There are different allowed
- * characters in the guids reverse dns portion (before the colon) and its
- * short name (after the colon).
- */
-static int
-valid_char(char c, boolean_t after_colon)
-{
- return ((c >= 'a' && c <= 'z') ||
- (c >= '0' && c <= '9') ||
- c == (after_colon ? '_' : '.'));
-}
-
-/*
- * Every feature guid must contain exactly one colon which separates a reverse
- * dns organization name from the feature's "short" name (e.g.
- * "com.company:feature_name").
- */
-boolean_t
-zfeature_is_valid_guid(const char *name)
-{
- int i;
- boolean_t has_colon = B_FALSE;
-
- i = 0;
- while (name[i] != '\0') {
- char c = name[i++];
- if (c == ':') {
- if (has_colon)
- return (B_FALSE);
- has_colon = B_TRUE;
- continue;
- }
- if (!valid_char(c, has_colon))
- return (B_FALSE);
- }
-
- return (has_colon);
-}
-
-boolean_t
-zfeature_is_supported(const char *guid)
-{
- if (zfeature_checks_disable)
- return (B_TRUE);
-
- return (0 == zfeature_lookup_guid(guid, NULL));
-}
-
-int
-zfeature_lookup_guid(const char *guid, zfeature_info_t **res)
-{
- for (int i = 0; i < SPA_FEATURES; i++) {
- zfeature_info_t *feature = &spa_feature_table[i];
- if (strcmp(guid, feature->fi_guid) == 0) {
- if (res != NULL)
- *res = feature;
- return (0);
- }
- }
-
- return (ENOENT);
-}
-
-int
-zfeature_lookup_name(const char *name, zfeature_info_t **res)
-{
- for (int i = 0; i < SPA_FEATURES; i++) {
- zfeature_info_t *feature = &spa_feature_table[i];
- if (strcmp(name, feature->fi_uname) == 0) {
- if (res != NULL)
- *res = feature;
- return (0);
- }
- }
-
- return (ENOENT);
-}
-
-static void
-zfeature_register(int fid, const char *guid, const char *name, const char *desc,
- boolean_t readonly, boolean_t mos, zfeature_info_t **deps)
-{
- zfeature_info_t *feature = &spa_feature_table[fid];
- static zfeature_info_t *nodeps[] = { NULL };
-
- ASSERT(name != NULL);
- ASSERT(desc != NULL);
- ASSERT(!readonly || !mos);
- ASSERT3U(fid, <, SPA_FEATURES);
- ASSERT(zfeature_is_valid_guid(guid));
-
- if (deps == NULL)
- deps = nodeps;
-
- feature->fi_guid = guid;
- feature->fi_uname = name;
- feature->fi_desc = desc;
- feature->fi_can_readonly = readonly;
- feature->fi_mos = mos;
- feature->fi_depends = deps;
-}
-
-void
-zpool_feature_init(void)
-{
- zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
- "com.delphix:async_destroy", "async_destroy",
- "Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
-}
diff --git a/common/zfs/zfeature_common.h b/common/zfs/zfeature_common.h
deleted file mode 100644
index 93ba2b76f1dd..000000000000
--- a/common/zfs/zfeature_common.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#ifndef _ZFEATURE_COMMON_H
-#define _ZFEATURE_COMMON_H
-
-#include <sys/fs/zfs.h>
-#include <sys/inttypes.h>
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct zfeature_info;
-
-typedef struct zfeature_info {
- const char *fi_uname; /* User-facing feature name */
- const char *fi_guid; /* On-disk feature identifier */
- const char *fi_desc; /* Feature description */
- boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
- boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
- struct zfeature_info **fi_depends; /* array; null terminated */
-} zfeature_info_t;
-
-typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
-
-#define ZFS_FEATURE_DEBUG
-
-enum spa_feature {
- SPA_FEATURE_ASYNC_DESTROY,
- SPA_FEATURES
-} spa_feature_t;
-
-extern zfeature_info_t spa_feature_table[SPA_FEATURES];
-
-extern boolean_t zfeature_is_valid_guid(const char *);
-
-extern boolean_t zfeature_is_supported(const char *);
-extern int zfeature_lookup_guid(const char *, zfeature_info_t **res);
-extern int zfeature_lookup_name(const char *, zfeature_info_t **res);
-
-extern void zpool_feature_init(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZFEATURE_COMMON_H */
diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c
index 7688113e36e1..ed9b67ea3bc9 100644
--- a/common/zfs/zfs_comutil.c
+++ b/common/zfs/zfs_comutil.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -158,11 +157,7 @@ zfs_spa_version_map(int zpl_version)
return (version);
}
-/*
- * This is the table of legacy internal event names; it should not be modified.
- * The internal events are now stored in the history log as strings.
- */
-const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
+const char *zfs_history_event_names[LOG_END] = {
"invalid event",
"pool create",
"vdev add",
diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h
index f89054388a4d..61327f9aa909 100644
--- a/common/zfs/zfs_comutil.h
+++ b/common/zfs/zfs_comutil.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _ZFS_COMUTIL_H
@@ -38,8 +37,7 @@ extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
extern int zfs_zpl_version_map(int spa_version);
extern int zfs_spa_version_map(int zpl_version);
-#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41
-extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];
+extern const char *zfs_history_event_names[LOG_END];
#ifdef __cplusplus
}
diff --git a/common/zfs/zfs_deleg.c b/common/zfs/zfs_deleg.c
index 18681035d6e1..83d9edb21389 100644
--- a/common/zfs/zfs_deleg.c
+++ b/common/zfs/zfs_deleg.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#if defined(_KERNEL)
@@ -61,7 +60,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
- {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
+ {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
diff --git a/common/zfs/zfs_deleg.h b/common/zfs/zfs_deleg.h
index 9997dffae7d0..b4cb8e2b4e37 100644
--- a/common/zfs/zfs_deleg.h
+++ b/common/zfs/zfs_deleg.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _ZFS_DELEG_H
@@ -52,7 +51,6 @@ typedef enum {
ZFS_DELEG_NOTE_CLONE,
ZFS_DELEG_NOTE_PROMOTE,
ZFS_DELEG_NOTE_RENAME,
- ZFS_DELEG_NOTE_SEND,
ZFS_DELEG_NOTE_RECEIVE,
ZFS_DELEG_NOTE_ALLOW,
ZFS_DELEG_NOTE_USERPROP,
diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c
index 5d45361eeaca..f29bcf62718f 100644
--- a/common/zfs/zfs_prop.c
+++ b/common/zfs/zfs_prop.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -105,13 +104,6 @@ zfs_prop_init(void)
{ NULL }
};
- static zprop_index_t acl_mode_table[] = {
- { "discard", ZFS_ACL_DISCARD },
- { "groupmask", ZFS_ACL_GROUPMASK },
- { "passthrough", ZFS_ACL_PASSTHROUGH },
- { NULL }
- };
-
static zprop_index_t acl_inherit_table[] = {
{ "discard", ZFS_ACL_DISCARD },
{ "noallow", ZFS_ACL_NOALLOW },
@@ -215,9 +207,6 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"hidden | visible", "SNAPDIR", snapdir_table);
- zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD,
- PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
- "discard | groupmask | passthrough", "ACLMODE", acl_mode_table);
zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"discard | noallow | restricted | passthrough | passthrough-x",
@@ -267,7 +256,7 @@ zfs_prop_init(void)
/* default index properties */
zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table);
+ "1 | 2 | 3 | 4 | current", "VERSION", version_table);
zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
"CANMOUNT", canmount_table);
@@ -297,8 +286,6 @@ zfs_prop_init(void)
/* string properties */
zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
- zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY,
- ZFS_TYPE_SNAPSHOT, "<dataset>[,...]", "CLONES");
zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
"MOUNTPOINT");
@@ -324,9 +311,6 @@ zfs_prop_init(void)
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
PROP_READONLY, ZFS_TYPE_DATASET,
"<1.00x or higher if compressed>", "RATIO");
- zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
- PROP_READONLY, ZFS_TYPE_DATASET,
- "<1.00x or higher if compressed>", "REFRATIO");
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
@@ -344,8 +328,6 @@ zfs_prop_init(void)
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
- zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY,
- ZFS_TYPE_DATASET, "<size>", "WRITTEN");
/* default number properties */
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
@@ -388,6 +370,13 @@ zfs_prop_init(void)
zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
+ /*
+ * Property to be removed once libbe is integrated
+ */
+ zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
+ PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
+ "PRIV_PROP");
+
/* oddball properties */
zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
NULL, PROP_READONLY, ZFS_TYPE_DATASET,
@@ -472,18 +461,6 @@ zfs_prop_userquota(const char *name)
}
/*
- * Returns true if this is a valid written@ property.
- * Note that after the @, any character is valid (eg, another @, for
- * written@pool/fs@origin).
- */
-boolean_t
-zfs_prop_written(const char *name)
-{
- static const char *prefix = "written@";
- return (strncmp(name, prefix, strlen(prefix)) == 0);
-}
-
-/*
* Tables of index types, plus functions to convert between the user view
* (strings) and internal representation (uint64_t).
*/
diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c
index 72db87937110..988d05de6e20 100644
--- a/common/zfs/zpool_prop.c
+++ b/common/zfs/zpool_prop.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zio.h>
@@ -71,20 +69,14 @@ zpool_prop_init(void)
ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
- zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL,
- PROP_DEFAULT, ZFS_TYPE_POOL, "<comment-string>", "COMMENT");
/* readonly number properties */
zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "SIZE");
zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "FREE");
- zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY,
- ZFS_TYPE_POOL, "<size>", "FREEING");
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
- zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,
- PROP_READONLY, ZFS_TYPE_POOL, "<size>", "EXPANDSZ");
zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "CAP");
zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
@@ -168,26 +160,6 @@ zpool_prop_default_numeric(zpool_prop_t prop)
return (zpool_prop_table[prop].pd_numdefault);
}
-/*
- * Returns true if this is a valid feature@ property.
- */
-boolean_t
-zpool_prop_feature(const char *name)
-{
- static const char *prefix = "feature@";
- return (strncmp(name, prefix, strlen(prefix)) == 0);
-}
-
-/*
- * Returns true if this is a valid unsupported@ property.
- */
-boolean_t
-zpool_prop_unsupported(const char *name)
-{
- static const char *prefix = "unsupported@";
- return (strncmp(name, prefix, strlen(prefix)) == 0);
-}
-
int
zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
uint64_t *index)
diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c
index 03919f0e9132..0bbf20d4f02c 100644
--- a/common/zfs/zprop_common.c
+++ b/common/zfs/zprop_common.c
@@ -22,9 +22,6 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
/*
* Common routines used by zfs and zpool property management.
@@ -132,8 +129,7 @@ zprop_register_hidden(int prop, const char *name, zprop_type_t type,
zprop_attr_t attr, int objset_types, const char *colname)
{
zprop_register_impl(prop, name, type, 0, NULL, attr,
- objset_types, NULL, colname,
- type == PROP_TYPE_NUMBER, B_FALSE, NULL);
+ objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
}
diff --git a/uts/common/Makefile.files b/uts/common/Makefile.files
index a2b4396f7316..ec08410b4ff3 100644
--- a/uts/common/Makefile.files
+++ b/uts/common/Makefile.files
@@ -21,8 +21,6 @@
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2012 by Delphix. All rights reserved.
#
#
@@ -192,6 +190,7 @@ GENUNIX_OBJS += \
gid.o \
groups.o \
grow.o \
+ hat.o \
hat_refmod.o \
id32.o \
id_space.o \
@@ -243,7 +242,6 @@ GENUNIX_OBJS += \
nvpair.o \
nvpair_alloc_system.o \
nvpair_alloc_fixed.o \
- fnvpair.o \
octet.o \
open.o \
p_online.o \
@@ -455,8 +453,6 @@ AUDIO810_OBJS += audio810.o
AUDIOCMI_OBJS += audiocmi.o
-AUDIOCMIHD_OBJS += audiocmihd.o
-
AUDIOHD_OBJS += audiohd.o
AUDIOIXP_OBJS += audioixp.o
@@ -502,9 +498,9 @@ MD4_OBJS += md4.o md4_mod.o
MD5_OBJS += md5.o md5_mod.o
-SHA1_OBJS += sha1.o sha1_mod.o
+SHA1_OBJS += sha1.o sha1_mod.o fips_sha1_util.o
-SHA2_OBJS += sha2.o sha2_mod.o
+SHA2_OBJS += sha2.o sha2_mod.o fips_sha2_util.o
IPGPC_OBJS += classifierddi.o classifier.o filters.o trie.o table.o \
ba_table.o
@@ -939,7 +935,7 @@ ST_OBJS += st.o st_conf.o
EMLXS_OBJS += emlxs_clock.o emlxs_dfc.o emlxs_dhchap.o emlxs_diag.o \
emlxs_download.o emlxs_dump.o emlxs_els.o emlxs_event.o \
- emlxs_fcf.o emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \
+ emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \
emlxs_mbox.o emlxs_mem.o emlxs_msg.o emlxs_node.o \
emlxs_pkt.o emlxs_sli3.o emlxs_sli4.o emlxs_solaris.o \
emlxs_thread.o
@@ -1087,7 +1083,7 @@ DRM_OBJS += drm_sunmod.o drm_kstat.o drm_agpsupport.o \
drm_auth.o drm_bufs.o drm_context.o drm_dma.o \
drm_drawable.o drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \
drm_lock.o drm_memory.o drm_msg.o drm_pci.o drm_scatter.o \
- drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o
+ drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o
FM_OBJS += devfm.o devfm_machdep.o
@@ -1329,7 +1325,6 @@ ZFS_COMMON_OBJS += \
arc.o \
bplist.o \
bpobj.o \
- bptree.o \
dbuf.o \
ddt.o \
ddt_zap.o \
@@ -1351,7 +1346,6 @@ ZFS_COMMON_OBJS += \
dsl_deleg.o \
dsl_prop.o \
dsl_scan.o \
- zfeature.o \
gzip.o \
lzjb.o \
metaslab.o \
@@ -1394,12 +1388,11 @@ ZFS_COMMON_OBJS += \
zrlock.o
ZFS_SHARED_OBJS += \
- zfeature_common.o \
- zfs_comutil.o \
- zfs_deleg.o \
- zfs_fletcher.o \
zfs_namecheck.o \
+ zfs_deleg.o \
zfs_prop.o \
+ zfs_comutil.o \
+ zfs_fletcher.o \
zpool_prop.o \
zprop_common.o
@@ -1526,7 +1519,7 @@ KCF_OBJS += kcf.o kcf_callprov.o kcf_cbufcall.o kcf_cipher.o kcf_crypto.o \
kcf_object.o kcf_policy.o kcf_prov_lib.o kcf_prov_tabs.o \
kcf_sched.o kcf_session.o kcf_sign.o kcf_spi.o kcf_verify.o \
kcf_random.o modes.o ecb.o cbc.o ctr.o ccm.o gcm.o \
- fips_random.o
+ fips_random.o fips_checksum.o fips_test_vectors.o
CRYPTOADM_OBJS += cryptoadm.o
@@ -1537,7 +1530,7 @@ DPROV_OBJS += dprov.o
DCA_OBJS += dca.o dca_3des.o dca_debug.o dca_dsa.o dca_kstat.o dca_rng.o \
dca_rsa.o
-AESPROV_OBJS += aes.o aes_impl.o aes_modes.o
+AESPROV_OBJS += aes.o aes_impl.o aes_modes.o fips_aes_util.o
ARCFOURPROV_OBJS += arcfour.o arcfour_crypt.o
@@ -1548,16 +1541,16 @@ ECCPROV_OBJS += ecc.o ec.o ec2_163.o ec2_mont.o ecdecode.o ecl_mult.o \
ecp_jm.o ec2_233.o ecl_curve.o ecp_224.o ecp_aff.o \
ecp_mont.o ec2_aff.o ec_naf.o ecl_gf.o ecp_256.o mp_gf2m.o \
mpi.o mplogic.o mpmontg.o mpprime.o oid.o \
- secitem.o ec2_test.o ecp_test.o
+ secitem.o ec2_test.o ecp_test.o fips_ecc_util.o
-RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o
+RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o fips_rsa_util.o
-SWRANDPROV_OBJS += swrand.o
+SWRANDPROV_OBJS += swrand.o fips_random_util.o
#
# kernel SSL
#
-KSSL_OBJS += kssl.o ksslioctl.o
+KSSL_OBJS += kssl.o ksslioctl.o
KSSL_SOCKFIL_MOD_OBJS += ksslfilter.o ksslapi.o ksslrec.o
@@ -1671,7 +1664,7 @@ KGSS_KRB5_OBJS += krb5mech.o \
$(CRYPTO_OLD) \
$(CRYPTO_RAW) $(K5_KRB) $(K5_OS)
-DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o
+DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o fips_des_util.o
DLBOOT_OBJS += bootparam_xdr.o nfs_dlinet.o scan.o
@@ -1770,8 +1763,6 @@ BGE_OBJS += bge_main2.o bge_chip2.o bge_kstats.o bge_log.o bge_ndd.o \
DMFE_OBJS += dmfe_log.o dmfe_main.o dmfe_mii.o
-EFE_OBJS += efe.o
-
ELXL_OBJS += elxl.o
HME_OBJS += hme.o
@@ -1782,8 +1773,6 @@ IXGB_OBJS += ixgb.o ixgb_atomic.o ixgb_chip.o ixgb_gld.o ixgb_kstats.o \
NGE_OBJS += nge_main.o nge_atomic.o nge_chip.o nge_ndd.o nge_kstats.o \
nge_log.o nge_rx.o nge_tx.o nge_xmii.o
-PCN_OBJS += pcn.o
-
RGE_OBJS += rge_main.o rge_chip.o rge_ndd.o rge_kstats.o rge_log.o rge_rxtx.o
URTW_OBJS += urtw.o
@@ -1909,11 +1898,6 @@ IGB_OBJS = igb_82575.o igb_api.o igb_mac.o igb_manage.o \
igb_rx.o igb_stat.o igb_tx.o
#
-# Intel Pro/100 NIC driver module
-#
-IPRB_OBJS = iprb.o
-
-#
# Intel 10GbE PCIE NIC driver module
#
IXGBE_OBJS = ixgbe_82598.o ixgbe_82599.o ixgbe_api.o \
@@ -1948,6 +1932,11 @@ NXGE_HCALL_OBJS = \
#
KICONV_EMEA_OBJS += kiconv_emea.o
+#
+# blk2scsa
+#
+BLK2SCSA_OBJS = blk2scsa.o
+
KICONV_JA_OBJS += kiconv_ja.o
KICONV_KO_OBJS += kiconv_cck_common.o kiconv_ko.o
diff --git a/uts/common/dtrace/dcpc.c b/uts/common/dtrace/dcpc.c
deleted file mode 100644
index 8fd96cc24c6c..000000000000
--- a/uts/common/dtrace/dcpc.c
+++ /dev/null
@@ -1,1218 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/errno.h>
-#include <sys/cpuvar.h>
-#include <sys/stat.h>
-#include <sys/modctl.h>
-#include <sys/cmn_err.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-#include <sys/ksynch.h>
-#include <sys/conf.h>
-#include <sys/kmem.h>
-#include <sys/kcpc.h>
-#include <sys/cap_util.h>
-#include <sys/cpc_pcbe.h>
-#include <sys/cpc_impl.h>
-#include <sys/dtrace_impl.h>
-
-/*
- * DTrace CPU Performance Counter Provider
- * ---------------------------------------
- *
- * The DTrace cpc provider allows DTrace consumers to access the CPU
- * performance counter overflow mechanism of a CPU. The configuration
- * presented in a probe specification is programmed into the performance
- * counter hardware of all available CPUs on a system. Programming the
- * hardware causes a counter on each CPU to begin counting events of the
- * given type. When the specified number of events have occurred, an overflow
- * interrupt will be generated and the probe is fired.
- *
- * The required configuration for the performance counter is encoded into
- * the probe specification and this includes the performance counter event
- * name, processor mode, overflow rate and an optional unit mask.
- *
- * Most processors provide several counters (PICs) which can count all or a
- * subset of the events available for a given CPU. However, when overflow
- * profiling is being used, not all CPUs can detect which counter generated the
- * overflow interrupt. In this case we cannot reliably determine which counter
- * overflowed and we therefore only allow such CPUs to configure one event at
- * a time. Processors that can determine the counter which overflowed are
- * allowed to program as many events at one time as possible (in theory up to
- * the number of instrumentation counters supported by that platform).
- * Therefore, multiple consumers can enable multiple probes at the same time
- * on such platforms. Platforms which cannot determine the source of an
- * overflow interrupt are only allowed to program a single event at one time.
- *
- * The performance counter hardware is made available to consumers on a
- * first-come, first-served basis. Only a finite amount of hardware resource
- * is available and, while we make every attempt to accomodate requests from
- * consumers, we must deny requests when hardware resources have been exhausted.
- * A consumer will fail to enable probes when resources are currently in use.
- *
- * The cpc provider contends for shared hardware resources along with other
- * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)).
- * Only one such consumer can use the performance counters at any one time and
- * counters are made available on a first-come, first-served basis. As with
- * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g.
- * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP
- * counter contexts to be invalidated.
- */
-
-typedef struct dcpc_probe {
- char dcpc_event_name[CPC_MAX_EVENT_LEN];
- int dcpc_flag; /* flags (USER/SYS) */
- uint32_t dcpc_ovfval; /* overflow value */
- int64_t dcpc_umask; /* umask/emask for this event */
- int dcpc_picno; /* pic this event is programmed in */
- int dcpc_enabled; /* probe is actually enabled? */
- int dcpc_disabling; /* probe is currently being disabled */
- dtrace_id_t dcpc_id; /* probeid this request is enabling */
- int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */
-} dcpc_probe_t;
-
-static dev_info_t *dcpc_devi;
-static dtrace_provider_id_t dcpc_pid;
-static dcpc_probe_t **dcpc_actv_reqs;
-static uint32_t dcpc_enablings = 0;
-static int dcpc_ovf_mask = 0;
-static int dcpc_mult_ovf_cap = 0;
-static int dcpc_mask_type = 0;
-
-/*
- * When the dcpc provider is loaded, dcpc_min_overflow is set to either
- * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in
- * the dcpc.conf file. Decrease this value to set probes with smaller
- * overflow values. Remember that very small values could render a system
- * unusable with frequently occurring events.
- */
-#define DCPC_MIN_OVF_DEFAULT 5000
-static uint32_t dcpc_min_overflow;
-
-static int dcpc_aframes = 0; /* override for artificial frame setting */
-#if defined(__x86)
-#define DCPC_ARTIFICIAL_FRAMES 8
-#elif defined(__sparc)
-#define DCPC_ARTIFICIAL_FRAMES 2
-#endif
-
-/*
- * Called from the platform overflow interrupt handler. 'bitmap' is a mask
- * which contains the pic(s) that have overflowed.
- */
-static void
-dcpc_fire(uint64_t bitmap)
-{
- int i;
-
- /*
- * No counter was marked as overflowing. Shout about it and get out.
- */
- if ((bitmap & dcpc_ovf_mask) == 0) {
- cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n");
- return;
- }
-
- /*
- * This is the common case of a processor that doesn't support
- * multiple overflow events. Such systems are only allowed a single
- * enabling and therefore we just look for the first entry in
- * the active request array.
- */
- if (!dcpc_mult_ovf_cap) {
- for (i = 0; i < cpc_ncounters; i++) {
- if (dcpc_actv_reqs[i] != NULL) {
- dtrace_probe(dcpc_actv_reqs[i]->dcpc_id,
- CPU->cpu_cpcprofile_pc,
- CPU->cpu_cpcprofile_upc, 0, 0, 0);
- return;
- }
- }
- return;
- }
-
- /*
- * This is a processor capable of handling multiple overflow events.
- * Iterate over the array of active requests and locate the counters
- * that overflowed (note: it is possible for more than one counter to
- * have overflowed at the same time).
- */
- for (i = 0; i < cpc_ncounters; i++) {
- if (dcpc_actv_reqs[i] != NULL &&
- (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) {
- dtrace_probe(dcpc_actv_reqs[i]->dcpc_id,
- CPU->cpu_cpcprofile_pc,
- CPU->cpu_cpcprofile_upc, 0, 0, 0);
- }
- }
-}
-
-static void
-dcpc_create_probe(dtrace_provider_id_t id, const char *probename,
- char *eventname, int64_t umask, uint32_t ovfval, char flag)
-{
- dcpc_probe_t *pp;
- int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes();
-
- if (dcpc_aframes)
- nr_frames = dcpc_aframes;
-
- if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0)
- return;
-
- pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP);
- (void) strncpy(pp->dcpc_event_name, eventname,
- sizeof (pp->dcpc_event_name) - 1);
- pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0';
- pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT;
- pp->dcpc_ovfval = ovfval;
- pp->dcpc_umask = umask;
- pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
-
- pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename,
- nr_frames, pp);
-}
-
-/*ARGSUSED*/
-static void
-dcpc_provide(void *arg, const dtrace_probedesc_t *desc)
-{
- /*
- * The format of a probe is:
- *
- * event_name-mode-{optional_umask}-overflow_rate
- * e.g.
- * DC_refill_from_system-user-0x1e-50000, or,
- * DC_refill_from_system-all-10000
- *
- */
- char *str, *end, *p;
- int i, flag = 0;
- char event[CPC_MAX_EVENT_LEN];
- long umask = -1, val = 0;
- size_t evlen, len;
-
- /*
- * The 'cpc' provider offers no probes by default.
- */
- if (desc == NULL)
- return;
-
- len = strlen(desc->dtpd_name);
- p = str = kmem_alloc(len + 1, KM_SLEEP);
- (void) strcpy(str, desc->dtpd_name);
-
- /*
- * We have a poor man's strtok() going on here. Replace any hyphens
- * in the the probe name with NULL characters in order to make it
- * easy to parse the string with regular string functions.
- */
- for (i = 0; i < len; i++) {
- if (str[i] == '-')
- str[i] = '\0';
- }
-
- /*
- * The first part of the string must be either a platform event
- * name or a generic event name.
- */
- evlen = strlen(p);
- (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1);
- event[CPC_MAX_EVENT_LEN - 1] = '\0';
-
- /*
- * The next part of the name is the mode specification. Valid
- * settings are "user", "kernel" or "all".
- */
- p += evlen + 1;
-
- if (strcmp(p, "user") == 0)
- flag |= CPC_COUNT_USER;
- else if (strcmp(p, "kernel") == 0)
- flag |= CPC_COUNT_SYSTEM;
- else if (strcmp(p, "all") == 0)
- flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM;
- else
- goto err;
-
- /*
- * Next we either have a mask specification followed by an overflow
- * rate or just an overflow rate on its own.
- */
- p += strlen(p) + 1;
- if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- /*
- * A unit mask can only be specified if:
- * 1) this performance counter back end supports masks.
- * 2) the specified event is platform specific.
- * 3) a valid hex number is converted.
- * 4) no extraneous characters follow the mask specification.
- */
- if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 &&
- ddi_strtol(p, &end, 16, &umask) == 0 &&
- end == p + strlen(p)) {
- p += strlen(p) + 1;
- } else {
- goto err;
- }
- }
-
- /*
- * This final part must be an overflow value which has to be greater
- * than the minimum permissible overflow rate.
- */
- if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) ||
- val < dcpc_min_overflow)
- goto err;
-
- /*
- * Validate the event and create the probe.
- */
- for (i = 0; i < cpc_ncounters; i++) {
- char *events, *cp, *p, *end;
- int found = 0, j;
- size_t llen;
-
- if ((events = kcpc_list_events(i)) == NULL)
- goto err;
-
- llen = strlen(events);
- p = cp = ddi_strdup(events, KM_NOSLEEP);
- end = cp + llen;
-
- for (j = 0; j < llen; j++) {
- if (cp[j] == ',')
- cp[j] = '\0';
- }
-
- while (p < end && found == 0) {
- if (strcmp(p, event) == 0) {
- dcpc_create_probe(dcpc_pid, desc->dtpd_name,
- event, umask, (uint32_t)val, flag);
- found = 1;
- }
- p += strlen(p) + 1;
- }
- kmem_free(cp, llen + 1);
-
- if (found)
- break;
- }
-
-err:
- kmem_free(str, len + 1);
-}
-
-/*ARGSUSED*/
-static void
-dcpc_destroy(void *arg, dtrace_id_t id, void *parg)
-{
- dcpc_probe_t *pp = parg;
-
- ASSERT(pp->dcpc_enabled == 0);
- kmem_free(pp, sizeof (dcpc_probe_t));
-}
-
-/*ARGSUSED*/
-static int
-dcpc_mode(void *arg, dtrace_id_t id, void *parg)
-{
- if (CPU->cpu_cpcprofile_pc == 0) {
- return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_USER);
- } else {
- return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_KERNEL);
- }
-}
-
-static void
-dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno)
-{
- kcpc_set_t *oset;
- int i;
-
- (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name,
- CPC_MAX_EVENT_LEN);
- set->ks_req[reqno].kr_config = NULL;
- set->ks_req[reqno].kr_index = reqno;
- set->ks_req[reqno].kr_picnum = -1;
- set->ks_req[reqno].kr_flags = pp->dcpc_flag;
-
- /*
- * If a unit mask has been specified then detect which attribute
- * the platform needs. For now, it's either "umask" or "emask".
- */
- if (pp->dcpc_umask >= 0) {
- set->ks_req[reqno].kr_attr =
- kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP);
- set->ks_req[reqno].kr_nattrs = 1;
- if (dcpc_mask_type & DCPC_UMASK)
- (void) strncpy(set->ks_req[reqno].kr_attr->ka_name,
- "umask", 5);
- else
- (void) strncpy(set->ks_req[reqno].kr_attr->ka_name,
- "emask", 5);
- set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask;
- } else {
- set->ks_req[reqno].kr_attr = NULL;
- set->ks_req[reqno].kr_nattrs = 0;
- }
-
- /*
- * If this probe is enabled, obtain its current countdown value
- * and use that. The CPUs cpc context might not exist yet if we
- * are dealing with a CPU that is just coming online.
- */
- if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) {
- oset = c->cpu_cpc_ctx->kc_set;
-
- for (i = 0; i < oset->ks_nreqs; i++) {
- if (strcmp(oset->ks_req[i].kr_event,
- set->ks_req[reqno].kr_event) == 0) {
- set->ks_req[reqno].kr_preset =
- *(oset->ks_req[i].kr_data);
- }
- }
- } else {
- set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval;
- }
-
- set->ks_nreqs++;
-}
-
-
-/*
- * Create a fresh request set for the enablings represented in the
- * 'dcpc_actv_reqs' array which contains the probes we want to be
- * in the set. This can be called for several reasons:
- *
- * 1) We are on a single or multi overflow platform and we have no
- * current events so we can just create the set and initialize it.
- * 2) We are on a multi-overflow platform and we already have one or
- * more existing events and we are adding a new enabling. Create a
- * new set and copy old requests in and then add the new request.
- * 3) We are on a multi-overflow platform and we have just removed an
- * enabling but we still have enablings whch are valid. Create a new
- * set and copy in still valid requests.
- */
-static kcpc_set_t *
-dcpc_create_set(cpu_t *c)
-{
- int i, reqno = 0;
- int active_requests = 0;
- kcpc_set_t *set;
-
- /*
- * First get a count of the number of currently active requests.
- * Note that dcpc_actv_reqs[] should always reflect which requests
- * we want to be in the set that is to be created. It is the
- * responsibility of the caller of dcpc_create_set() to adjust that
- * array accordingly beforehand.
- */
- for (i = 0; i < cpc_ncounters; i++) {
- if (dcpc_actv_reqs[i] != NULL)
- active_requests++;
- }
-
- set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP);
-
- set->ks_req =
- kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP);
-
- set->ks_data =
- kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP);
-
- /*
- * Look for valid entries in the active requests array and populate
- * the request set for any entries found.
- */
- for (i = 0; i < cpc_ncounters; i++) {
- if (dcpc_actv_reqs[i] != NULL) {
- dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno);
- reqno++;
- }
- }
-
- return (set);
-}
-
-static int
-dcpc_program_cpu_event(cpu_t *c)
-{
- int i, j, subcode;
- kcpc_ctx_t *ctx, *octx;
- kcpc_set_t *set;
-
- set = dcpc_create_set(c);
-
- set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP);
- ctx->kc_set = set;
- ctx->kc_cpuid = c->cpu_id;
-
- if (kcpc_assign_reqs(set, ctx) != 0)
- goto err;
-
- if (kcpc_configure_reqs(ctx, set, &subcode) != 0)
- goto err;
-
- for (i = 0; i < set->ks_nreqs; i++) {
- for (j = 0; j < cpc_ncounters; j++) {
- if (dcpc_actv_reqs[j] != NULL &&
- strcmp(set->ks_req[i].kr_event,
- dcpc_actv_reqs[j]->dcpc_event_name) == 0) {
- dcpc_actv_reqs[j]->dcpc_picno =
- set->ks_req[i].kr_picnum;
- }
- }
- }
-
- /*
- * If we already have an active enabling then save the current cpc
- * context away.
- */
- octx = c->cpu_cpc_ctx;
-
- kcpc_cpu_program(c, ctx);
-
- if (octx != NULL) {
- kcpc_set_t *oset = octx->kc_set;
- kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t));
- kcpc_free_configs(oset);
- kcpc_free_set(oset);
- kcpc_ctx_free(octx);
- }
-
- return (0);
-
-err:
- /*
- * We failed to configure this request up so free things up and
- * get out.
- */
- kcpc_free_configs(set);
- kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
- kcpc_free_set(set);
- kcpc_ctx_free(ctx);
-
- return (-1);
-}
-
-static void
-dcpc_disable_cpu(cpu_t *c)
-{
- kcpc_ctx_t *ctx;
- kcpc_set_t *set;
-
- /*
- * Leave this CPU alone if it's already offline.
- */
- if (c->cpu_flags & CPU_OFFLINE)
- return;
-
- /*
- * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and
- * changes it.
- */
- ctx = c->cpu_cpc_ctx;
-
- kcpc_cpu_stop(c, B_FALSE);
-
- set = ctx->kc_set;
-
- kcpc_free_configs(set);
- kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
- kcpc_free_set(set);
- kcpc_ctx_free(ctx);
-}
-
-/*
- * The dcpc_*_interrupts() routines are responsible for manipulating the
- * per-CPU dcpc interrupt state byte. The purpose of the state byte is to
- * synchronize processing of hardware overflow interrupts wth configuration
- * changes made to the CPU performance counter subsystem by the dcpc provider.
- *
- * The dcpc provider claims ownership of the overflow interrupt mechanism
- * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the
- * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the
- * overflow mechanism and interrupts may be processed). Before modifying
- * a CPUs configuration state the state byte is transitioned from
- * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state).
- * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process
- * an interrupt when a configuration is not in process (i.e. the state is
- * marked as free). During interrupt processing the state is set to
- * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based
- * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate
- * the dcpc provider is no longer interested in overflow interrupts.
- */
-static void
-dcpc_block_interrupts(void)
-{
- cpu_t *c = cpu_list;
- uint8_t *state;
-
- ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
-
- do {
- state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state;
-
- while (atomic_cas_8(state, DCPC_INTR_FREE,
- DCPC_INTR_CONFIG) != DCPC_INTR_FREE)
- continue;
-
- } while ((c = c->cpu_next) != cpu_list);
-}
-
-/*
- * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that
- * overflow interrupts can be processed safely.
- */
-static void
-dcpc_release_interrupts(void)
-{
- cpu_t *c = cpu_list;
-
- ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
-
- do {
- cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE;
- membar_producer();
- } while ((c = c->cpu_next) != cpu_list);
-}
-
-/*
- * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to
- * to DCPC_INTR_FREE. This indicates that the dcpc provider is now
- * responsible for handling all overflow interrupt activity. Should only be
- * called before enabling the first dcpc based probe.
- */
-static void
-dcpc_claim_interrupts(void)
-{
- cpu_t *c = cpu_list;
-
- ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE);
-
- do {
- cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE;
- membar_producer();
- } while ((c = c->cpu_next) != cpu_list);
-}
-
-/*
- * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that
- * the dcpc provider is no longer processing overflow interrupts. Only called
- * during removal of the last dcpc based enabling.
- */
-static void
-dcpc_surrender_interrupts(void)
-{
- cpu_t *c = cpu_list;
-
- ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
-
- do {
- cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE;
- membar_producer();
- } while ((c = c->cpu_next) != cpu_list);
-}
-
-/*
- * dcpc_program_event() can be called owing to a new enabling or if a multi
- * overflow platform has disabled a request but needs to program the requests
- * that are still valid.
- *
- * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t
- * and a new request set which contains the new enabling and any old enablings
- * which are still valid (possible with multi-overflow platforms).
- */
-static int
-dcpc_program_event(dcpc_probe_t *pp)
-{
- cpu_t *c;
- int ret = 0;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- kpreempt_disable();
-
- dcpc_block_interrupts();
-
- c = cpu_list;
-
- do {
- /*
- * Skip CPUs that are currently offline.
- */
- if (c->cpu_flags & CPU_OFFLINE)
- continue;
-
- /*
- * Stop counters but preserve existing DTrace CPC context
- * if there is one.
- *
- * If we come here when the first event is programmed for a CPU,
- * there should be no DTrace CPC context installed. In this
- * case, kcpc_cpu_stop() will ensure that there is no other
- * context on the CPU.
- *
- * If we add new enabling to the original one, the CPU should
- * have the old DTrace CPC context which we need to keep around
- * since dcpc_program_event() will add to it.
- */
- if (c->cpu_cpc_ctx != NULL)
- kcpc_cpu_stop(c, B_TRUE);
- } while ((c = c->cpu_next) != cpu_list);
-
- dcpc_release_interrupts();
-
- /*
- * If this enabling is being removed (in the case of a multi event
- * capable system with more than one active enabling), we can now
- * update the active request array to reflect the enablings that need
- * to be reprogrammed.
- */
- if (pp->dcpc_disabling == 1)
- dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
-
- do {
- /*
- * Skip CPUs that are currently offline.
- */
- if (c->cpu_flags & CPU_OFFLINE)
- continue;
-
- ret = dcpc_program_cpu_event(c);
- } while ((c = c->cpu_next) != cpu_list && ret == 0);
-
- /*
- * If dcpc_program_cpu_event() fails then it is because we couldn't
- * configure the requests in the set for the CPU and not because of
- * an error programming the hardware. If we have a failure here then
- * we assume no CPUs have been programmed in the above step as they
- * are all configured identically.
- */
- if (ret != 0) {
- pp->dcpc_enabled = 0;
- kpreempt_enable();
- return (-1);
- }
-
- if (pp->dcpc_disabling != 1)
- pp->dcpc_enabled = 1;
-
- kpreempt_enable();
-
- return (0);
-}
-
-/*ARGSUSED*/
-static int
-dcpc_enable(void *arg, dtrace_id_t id, void *parg)
-{
- dcpc_probe_t *pp = parg;
- int i, found = 0;
- cpu_t *c;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- /*
- * Bail out if the counters are being used by a libcpc consumer.
- */
- rw_enter(&kcpc_cpuctx_lock, RW_READER);
- if (kcpc_cpuctx > 0) {
- rw_exit(&kcpc_cpuctx_lock);
- return (-1);
- }
-
- dtrace_cpc_in_use++;
- rw_exit(&kcpc_cpuctx_lock);
-
- /*
- * Locate this enabling in the first free entry of the active
- * request array.
- */
- for (i = 0; i < cpc_ncounters; i++) {
- if (dcpc_actv_reqs[i] == NULL) {
- dcpc_actv_reqs[i] = pp;
- pp->dcpc_actv_req_idx = i;
- found = 1;
- break;
- }
- }
-
- /*
- * If we couldn't find a slot for this probe then there is no
- * room at the inn.
- */
- if (!found) {
- dtrace_cpc_in_use--;
- return (-1);
- }
-
- ASSERT(pp->dcpc_actv_req_idx >= 0);
-
- /*
- * DTrace is taking over CPC contexts, so stop collecting
- * capacity/utilization data for all CPUs.
- */
- if (dtrace_cpc_in_use == 1)
- cu_disable();
-
- /*
- * The following must hold true if we are to (attempt to) enable
- * this request:
- *
- * 1) No enablings currently exist. We allow all platforms to
- * proceed if this is true.
- *
- * OR
- *
- * 2) If the platform is multi overflow capable and there are
- * less valid enablings than there are counters. There is no
- * guarantee that a platform can accommodate as many events as
- * it has counters for but we will at least try to program
- * up to that many requests.
- *
- * The 'dcpc_enablings' variable is implictly protected by locking
- * provided by the DTrace framework and the cpu management framework.
- */
- if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap &&
- dcpc_enablings < cpc_ncounters)) {
- /*
- * Before attempting to program the first enabling we need to
- * invalidate any lwp-based contexts and lay claim to the
- * overflow interrupt mechanism.
- */
- if (dcpc_enablings == 0) {
- kcpc_invalidate_all();
- dcpc_claim_interrupts();
- }
-
- if (dcpc_program_event(pp) == 0) {
- dcpc_enablings++;
- return (0);
- }
- }
-
- /*
- * If active enablings existed before we failed to enable this probe
- * on a multi event capable platform then we need to restart counters
- * as they will have been stopped in the attempted configuration. The
- * context should now just contain the request prior to this failed
- * enabling.
- */
- if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) {
- c = cpu_list;
-
- ASSERT(dcpc_mult_ovf_cap == 1);
- do {
- /*
- * Skip CPUs that are currently offline.
- */
- if (c->cpu_flags & CPU_OFFLINE)
- continue;
-
- kcpc_cpu_program(c, c->cpu_cpc_ctx);
- } while ((c = c->cpu_next) != cpu_list);
- }
-
- /*
- * Give up any claim to the overflow interrupt mechanism if no
- * dcpc based enablings exist.
- */
- if (dcpc_enablings == 0)
- dcpc_surrender_interrupts();
-
- dtrace_cpc_in_use--;
- dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
- pp->dcpc_actv_req_idx = pp->dcpc_picno = -1;
-
- /*
- * If all probes are removed, enable capacity/utilization data
- * collection for every CPU.
- */
- if (dtrace_cpc_in_use == 0)
- cu_enable();
-
- return (-1);
-}
-
-/*
- * If only one enabling is active then remove the context and free
- * everything up. If there are multiple enablings active then remove this
- * one, its associated meta-data and re-program the hardware.
- */
-/*ARGSUSED*/
-static void
-dcpc_disable(void *arg, dtrace_id_t id, void *parg)
-{
- cpu_t *c;
- dcpc_probe_t *pp = parg;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- kpreempt_disable();
-
- /*
- * This probe didn't actually make it as far as being fully enabled
- * so we needn't do anything with it.
- */
- if (pp->dcpc_enabled == 0) {
- /*
- * If we actually allocated this request a slot in the
- * request array but failed to enabled it then remove the
- * entry in the array.
- */
- if (pp->dcpc_actv_req_idx >= 0) {
- dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
- pp->dcpc_actv_req_idx = pp->dcpc_picno =
- pp->dcpc_disabling = -1;
- }
-
- kpreempt_enable();
- return;
- }
-
- /*
- * If this is the only enabling then stop all the counters and
- * free up the meta-data.
- */
- if (dcpc_enablings == 1) {
- ASSERT(dtrace_cpc_in_use == 1);
-
- dcpc_block_interrupts();
-
- c = cpu_list;
-
- do {
- dcpc_disable_cpu(c);
- } while ((c = c->cpu_next) != cpu_list);
-
- dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
- dcpc_surrender_interrupts();
- } else {
- /*
- * This platform can support multiple overflow events and
- * the enabling being disabled is not the last one. Remove this
- * enabling and re-program the hardware with the new config.
- */
- ASSERT(dcpc_mult_ovf_cap);
- ASSERT(dcpc_enablings > 1);
-
- pp->dcpc_disabling = 1;
- (void) dcpc_program_event(pp);
- }
-
- kpreempt_enable();
-
- dcpc_enablings--;
- dtrace_cpc_in_use--;
- pp->dcpc_enabled = 0;
- pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
-
- /*
- * If all probes are removed, enable capacity/utilization data
- * collection for every CPU
- */
- if (dtrace_cpc_in_use == 0)
- cu_enable();
-}
-
-/*ARGSUSED*/
-static int
-dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg)
-{
- cpu_t *c;
- uint8_t *state;
-
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- switch (what) {
- case CPU_OFF:
- /*
- * Offline CPUs are not allowed to take part so remove this
- * CPU if we are actively tracing.
- */
- if (dtrace_cpc_in_use) {
- c = cpu_get(cpu);
- state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state;
-
- /*
- * Indicate that a configuration is in process in
- * order to stop overflow interrupts being processed
- * on this CPU while we disable it.
- */
- while (atomic_cas_8(state, DCPC_INTR_FREE,
- DCPC_INTR_CONFIG) != DCPC_INTR_FREE)
- continue;
-
- dcpc_disable_cpu(c);
-
- /*
- * Reset this CPUs interrupt state as the configuration
- * has ended.
- */
- cpu_core[c->cpu_id].cpuc_dcpc_intr_state =
- DCPC_INTR_FREE;
- membar_producer();
- }
- break;
-
- case CPU_ON:
- case CPU_SETUP:
- /*
- * This CPU is being initialized or brought online so program
- * it with the current request set if we are actively tracing.
- */
- if (dtrace_cpc_in_use) {
- c = cpu_get(cpu);
- (void) dcpc_program_cpu_event(c);
- }
- break;
-
- default:
- break;
- }
-
- return (0);
-}
-
-static dtrace_pattr_t dcpc_attr = {
-{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
-{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
-{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
-{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU },
-{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
-};
-
-static dtrace_pops_t dcpc_pops = {
- dcpc_provide,
- NULL,
- dcpc_enable,
- dcpc_disable,
- NULL,
- NULL,
- NULL,
- NULL,
- dcpc_mode,
- dcpc_destroy
-};
-
-/*ARGSUSED*/
-static int
-dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
-{
- return (0);
-}
-
-/*ARGSUSED*/
-static int
-dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
-{
- int error;
-
- switch (infocmd) {
- case DDI_INFO_DEVT2DEVINFO:
- *result = (void *)dcpc_devi;
- error = DDI_SUCCESS;
- break;
- case DDI_INFO_DEVT2INSTANCE:
- *result = (void *)0;
- error = DDI_SUCCESS;
- break;
- default:
- error = DDI_FAILURE;
- }
- return (error);
-}
-
-static int
-dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
-{
- switch (cmd) {
- case DDI_DETACH:
- break;
- case DDI_SUSPEND:
- return (DDI_SUCCESS);
- default:
- return (DDI_FAILURE);
- }
-
- if (dtrace_unregister(dcpc_pid) != 0)
- return (DDI_FAILURE);
-
- ddi_remove_minor_node(devi, NULL);
-
- mutex_enter(&cpu_lock);
- unregister_cpu_setup_func(dcpc_cpu_setup, NULL);
- mutex_exit(&cpu_lock);
-
- kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *));
-
- kcpc_unregister_dcpc();
-
- return (DDI_SUCCESS);
-}
-
-static int
-dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
-{
- uint_t caps;
- char *attrs;
-
- switch (cmd) {
- case DDI_ATTACH:
- break;
- case DDI_RESUME:
- return (DDI_SUCCESS);
- default:
- return (DDI_FAILURE);
- }
-
- if (kcpc_pcbe_loaded() == -1)
- return (DDI_FAILURE);
-
- caps = kcpc_pcbe_capabilities();
-
- if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) {
- cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\
- " on this processor");
- return (DDI_FAILURE);
- }
-
- if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0,
- DDI_PSEUDO, NULL) == DDI_FAILURE ||
- dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL,
- NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) {
- ddi_remove_minor_node(devi, NULL);
- return (DDI_FAILURE);
- }
-
- mutex_enter(&cpu_lock);
- register_cpu_setup_func(dcpc_cpu_setup, NULL);
- mutex_exit(&cpu_lock);
-
- dcpc_ovf_mask = (1 << cpc_ncounters) - 1;
- ASSERT(dcpc_ovf_mask != 0);
-
- if (caps & CPC_CAP_OVERFLOW_PRECISE)
- dcpc_mult_ovf_cap = 1;
-
- /*
- * Determine which, if any, mask attribute the back-end can use.
- */
- attrs = kcpc_list_attrs();
- if (strstr(attrs, "umask") != NULL)
- dcpc_mask_type |= DCPC_UMASK;
- else if (strstr(attrs, "emask") != NULL)
- dcpc_mask_type |= DCPC_EMASK;
-
- /*
- * The dcpc_actv_reqs array is used to store the requests that
- * we currently have programmed. The order of requests in this
- * array is not necessarily the order that the event appears in
- * the kcpc_request_t array. Once entered into a slot in the array
- * the entry is not moved until it's removed.
- */
- dcpc_actv_reqs =
- kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP);
-
- dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
- DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT);
-
- kcpc_register_dcpc(dcpc_fire);
-
- ddi_report_dev(devi);
- dcpc_devi = devi;
-
- return (DDI_SUCCESS);
-}
-
-static struct cb_ops dcpc_cb_ops = {
- dcpc_open, /* open */
- nodev, /* close */
- nulldev, /* strategy */
- nulldev, /* print */
- nodev, /* dump */
- nodev, /* read */
- nodev, /* write */
- nodev, /* ioctl */
- nodev, /* devmap */
- nodev, /* mmap */
- nodev, /* segmap */
- nochpoll, /* poll */
- ddi_prop_op, /* cb_prop_op */
- 0, /* streamtab */
- D_NEW | D_MP /* Driver compatibility flag */
-};
-
-static struct dev_ops dcpc_ops = {
- DEVO_REV, /* devo_rev, */
- 0, /* refcnt */
- dcpc_info, /* get_dev_info */
- nulldev, /* identify */
- nulldev, /* probe */
- dcpc_attach, /* attach */
- dcpc_detach, /* detach */
- nodev, /* reset */
- &dcpc_cb_ops, /* driver operations */
- NULL, /* bus operations */
- nodev, /* dev power */
- ddi_quiesce_not_needed /* quiesce */
-};
-
-/*
- * Module linkage information for the kernel.
- */
-static struct modldrv modldrv = {
- &mod_driverops, /* module type */
- "DTrace CPC Module", /* name of module */
- &dcpc_ops, /* driver ops */
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1,
- (void *)&modldrv,
- NULL
-};
-
-int
-_init(void)
-{
- return (mod_install(&modlinkage));
-}
-
-int
-_info(struct modinfo *modinfop)
-{
- return (mod_info(&modlinkage, modinfop));
-}
-
-int
-_fini(void)
-{
- return (mod_remove(&modlinkage));
-}
diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c
index 0c5e4b3a011a..2a9df6d403f2 100644
--- a/uts/common/dtrace/dtrace.c
+++ b/uts/common/dtrace/dtrace.c
@@ -21,7 +21,6 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
/*
@@ -119,7 +118,7 @@ dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
size_t dtrace_global_maxsize = (16 * 1024);
size_t dtrace_actions_max = (16 * 1024);
size_t dtrace_retain_max = 1024;
-dtrace_optval_t dtrace_helper_actions_max = 1024;
+dtrace_optval_t dtrace_helper_actions_max = 32;
dtrace_optval_t dtrace_helper_providers_max = 32;
dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
size_t dtrace_strsize_default = 256;
@@ -144,7 +143,6 @@ int dtrace_err_verbose;
hrtime_t dtrace_deadman_interval = NANOSEC;
hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
-hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
/*
* DTrace External Variables
@@ -461,13 +459,11 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
static void dtrace_enabling_provide(dtrace_provider_t *);
static int dtrace_enabling_match(dtrace_enabling_t *, int *);
static void dtrace_enabling_matchall(void);
-static void dtrace_enabling_reap(void);
static dtrace_state_t *dtrace_anon_grab(void);
static uint64_t dtrace_helper(int, dtrace_mstate_t *,
dtrace_state_t *, uint64_t, uint64_t);
static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
static void dtrace_buffer_drop(dtrace_buffer_t *);
-static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
dtrace_state_t *, dtrace_mstate_t *);
static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
@@ -1108,13 +1104,10 @@ dtrace_priv_proc_common_nocd()
}
static int
-dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate)
+dtrace_priv_proc_destructive(dtrace_state_t *state)
{
int action = state->dts_cred.dcr_action;
- if (!(mstate->dtms_access & DTRACE_ACCESS_PROC))
- goto bad;
-
if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
dtrace_priv_proc_common_zone(state) == 0)
goto bad;
@@ -1136,17 +1129,15 @@ bad:
}
static int
-dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
+dtrace_priv_proc_control(dtrace_state_t *state)
{
- if (mstate->dtms_access & DTRACE_ACCESS_PROC) {
- if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
- return (1);
+ if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
+ return (1);
- if (dtrace_priv_proc_common_zone(state) &&
- dtrace_priv_proc_common_user(state) &&
- dtrace_priv_proc_common_nocd())
- return (1);
- }
+ if (dtrace_priv_proc_common_zone(state) &&
+ dtrace_priv_proc_common_user(state) &&
+ dtrace_priv_proc_common_nocd())
+ return (1);
cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
@@ -1154,10 +1145,9 @@ dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate)
}
static int
-dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate)
+dtrace_priv_proc(dtrace_state_t *state)
{
- if ((mstate->dtms_access & DTRACE_ACCESS_PROC) &&
- (state->dts_cred.dcr_action & DTRACE_CRA_PROC))
+ if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
return (1);
cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
@@ -1188,109 +1178,6 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state)
}
/*
- * Determine if the dte_cond of the specified ECB allows for processing of
- * the current probe to continue. Note that this routine may allow continued
- * processing, but with access(es) stripped from the mstate's dtms_access
- * field.
- */
-static int
-dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
- dtrace_ecb_t *ecb)
-{
- dtrace_probe_t *probe = ecb->dte_probe;
- dtrace_provider_t *prov = probe->dtpr_provider;
- dtrace_pops_t *pops = &prov->dtpv_pops;
- int mode = DTRACE_MODE_NOPRIV_DROP;
-
- ASSERT(ecb->dte_cond);
-
- if (pops->dtps_mode != NULL) {
- mode = pops->dtps_mode(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg);
-
- ASSERT((mode & DTRACE_MODE_USER) ||
- (mode & DTRACE_MODE_KERNEL));
- ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
- (mode & DTRACE_MODE_NOPRIV_DROP));
- }
-
- /*
- * If the dte_cond bits indicate that this consumer is only allowed to
- * see user-mode firings of this probe, call the provider's dtps_mode()
- * entry point to check that the probe was fired while in a user
- * context. If that's not the case, use the policy specified by the
- * provider to determine if we drop the probe or merely restrict
- * operation.
- */
- if (ecb->dte_cond & DTRACE_COND_USERMODE) {
- ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
-
- if (!(mode & DTRACE_MODE_USER)) {
- if (mode & DTRACE_MODE_NOPRIV_DROP)
- return (0);
-
- mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
- }
- }
-
- /*
- * This is more subtle than it looks. We have to be absolutely certain
- * that CRED() isn't going to change out from under us so it's only
- * legit to examine that structure if we're in constrained situations.
- * Currently, the only times we'll this check is if a non-super-user
- * has enabled the profile or syscall providers -- providers that
- * allow visibility of all processes. For the profile case, the check
- * above will ensure that we're examining a user context.
- */
- if (ecb->dte_cond & DTRACE_COND_OWNER) {
- cred_t *cr;
- cred_t *s_cr = state->dts_cred.dcr_cred;
- proc_t *proc;
-
- ASSERT(s_cr != NULL);
-
- if ((cr = CRED()) == NULL ||
- s_cr->cr_uid != cr->cr_uid ||
- s_cr->cr_uid != cr->cr_ruid ||
- s_cr->cr_uid != cr->cr_suid ||
- s_cr->cr_gid != cr->cr_gid ||
- s_cr->cr_gid != cr->cr_rgid ||
- s_cr->cr_gid != cr->cr_sgid ||
- (proc = ttoproc(curthread)) == NULL ||
- (proc->p_flag & SNOCD)) {
- if (mode & DTRACE_MODE_NOPRIV_DROP)
- return (0);
-
- mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
- }
- }
-
- /*
- * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
- * in our zone, check to see if our mode policy is to restrict rather
- * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
- * and DTRACE_ACCESS_ARGS
- */
- if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
- cred_t *cr;
- cred_t *s_cr = state->dts_cred.dcr_cred;
-
- ASSERT(s_cr != NULL);
-
- if ((cr = CRED()) == NULL ||
- s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
- if (mode & DTRACE_MODE_NOPRIV_DROP)
- return (0);
-
- mstate->dtms_access &=
- ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
- }
- }
-
- return (1);
-}
-
-/*
* Note: not called from probe context. This function is called
* asynchronously (and at a regular interval) from outside of probe context to
* clean the dirty dynamic variable lists on all CPUs. Dynamic variable
@@ -1972,75 +1859,6 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
lquanta[levels + 1] += incr;
}
-static int
-dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
- uint16_t high, uint16_t nsteps, int64_t value)
-{
- int64_t this = 1, last, next;
- int base = 1, order;
-
- ASSERT(factor <= nsteps);
- ASSERT(nsteps % factor == 0);
-
- for (order = 0; order < low; order++)
- this *= factor;
-
- /*
- * If our value is less than our factor taken to the power of the
- * low order of magnitude, it goes into the zeroth bucket.
- */
- if (value < (last = this))
- return (0);
-
- for (this *= factor; order <= high; order++) {
- int nbuckets = this > nsteps ? nsteps : this;
-
- if ((next = this * factor) < this) {
- /*
- * We should not generally get log/linear quantizations
- * with a high magnitude that allows 64-bits to
- * overflow, but we nonetheless protect against this
- * by explicitly checking for overflow, and clamping
- * our value accordingly.
- */
- value = this - 1;
- }
-
- if (value < this) {
- /*
- * If our value lies within this order of magnitude,
- * determine its position by taking the offset within
- * the order of magnitude, dividing by the bucket
- * width, and adding to our (accumulated) base.
- */
- return (base + (value - last) / (this / nbuckets));
- }
-
- base += nbuckets - (nbuckets / factor);
- last = this;
- this = next;
- }
-
- /*
- * Our value is greater than or equal to our factor taken to the
- * power of one plus the high magnitude -- return the top bucket.
- */
- return (base);
-}
-
-static void
-dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
-{
- uint64_t arg = *llquanta++;
- uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
- uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
- uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
- uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
-
- llquanta[dtrace_aggregate_llquantize_bucket(factor,
- low, high, nsteps, nval)] += incr;
-}
-
/*ARGSUSED*/
static void
dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
@@ -2822,12 +2640,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
switch (v) {
case DIF_VAR_ARGS:
- if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) {
- cpu_core[CPU->cpu_id].cpuc_dtrace_flags |=
- CPU_DTRACE_KPRIV;
- return (0);
- }
-
ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
if (ndx >= sizeof (mstate->dtms_arg) /
sizeof (mstate->dtms_arg[0])) {
@@ -2863,7 +2675,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
case DIF_VAR_UREGS: {
klwp_t *lwp;
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
if ((lwp = curthread->t_lwp) == NULL) {
@@ -2875,22 +2687,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (dtrace_getreg(lwp->lwp_regs, ndx));
}
- case DIF_VAR_VMREGS: {
- uint64_t rval;
-
- if (!dtrace_priv_kernel(state))
- return (0);
-
- DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
-
- rval = dtrace_getvmreg(ndx,
- &cpu_core[CPU->cpu_id].cpuc_dtrace_flags);
-
- DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
-
- return (rval);
- }
-
case DIF_VAR_CURTHREAD:
if (!dtrace_priv_kernel(state))
return (0);
@@ -2943,7 +2739,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (mstate->dtms_stackdepth);
case DIF_VAR_USTACKDEPTH:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
/*
@@ -2998,7 +2794,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return (mstate->dtms_caller);
case DIF_VAR_UCALLER:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
@@ -3046,7 +2842,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_PID:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3068,7 +2864,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
case DIF_VAR_PPID:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3095,7 +2891,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_tid);
case DIF_VAR_EXECNAME:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3115,7 +2911,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_ZONENAME:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3135,7 +2931,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
state, mstate));
case DIF_VAR_UID:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3156,7 +2952,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
case DIF_VAR_GID:
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3178,7 +2974,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
case DIF_VAR_ERRNO: {
klwp_t *lwp;
- if (!dtrace_priv_proc(state, mstate))
+ if (!dtrace_priv_proc(state))
return (0);
/*
@@ -3518,7 +3314,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
- dtrace_priv_proc_control(state, mstate) &&
+ dtrace_priv_proc_control(state) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyout(kaddr, uaddr, size, flags);
@@ -3533,7 +3329,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
- dtrace_priv_proc_control(state, mstate) &&
+ dtrace_priv_proc_control(state) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyoutstr(kaddr, uaddr, size, flags);
@@ -3904,54 +3700,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
break;
}
- case DIF_SUBR_TOUPPER:
- case DIF_SUBR_TOLOWER: {
- uintptr_t s = tupregs[0].dttk_value;
- uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
- char *dest = (char *)mstate->dtms_scratch_ptr, c;
- size_t len = dtrace_strlen((char *)s, size);
- char lower, upper, convert;
- int64_t i;
-
- if (subr == DIF_SUBR_TOUPPER) {
- lower = 'a';
- upper = 'z';
- convert = 'A';
- } else {
- lower = 'A';
- upper = 'Z';
- convert = 'a';
- }
-
- if (!dtrace_canload(s, len + 1, mstate, vstate)) {
- regs[rd] = NULL;
- break;
- }
-
- if (!DTRACE_INSCRATCH(mstate, size)) {
- DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
- regs[rd] = NULL;
- break;
- }
-
- for (i = 0; i < size - 1; i++) {
- if ((c = dtrace_load8(s + i)) == '\0')
- break;
-
- if (c >= lower && c <= upper)
- c = convert + (c - lower);
-
- dest[i] = c;
- }
-
- ASSERT(i < size);
- dest[i] = '\0';
- regs[rd] = (uintptr_t)dest;
- mstate->dtms_scratch_ptr += size;
- break;
- }
-
-case DIF_SUBR_GETMAJOR:
+ case DIF_SUBR_GETMAJOR:
#ifdef _LP64
regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
#else
@@ -4213,20 +3962,9 @@ case DIF_SUBR_GETMAJOR:
case DIF_SUBR_LLTOSTR: {
int64_t i = (int64_t)tupregs[0].dttk_value;
- uint64_t val, digit;
- uint64_t size = 65; /* enough room for 2^64 in binary */
+ int64_t val = i < 0 ? i * -1 : i;
+ uint64_t size = 22; /* enough room for 2^64 in decimal */
char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
- int base = 10;
-
- if (nargs > 1) {
- if ((base = tupregs[1].dttk_value) <= 1 ||
- base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
- *flags |= CPU_DTRACE_ILLOP;
- break;
- }
- }
-
- val = (base == 10 && i < 0) ? i * -1 : i;
if (!DTRACE_INSCRATCH(mstate, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
@@ -4234,24 +3972,13 @@ case DIF_SUBR_GETMAJOR:
break;
}
- for (*end-- = '\0'; val; val /= base) {
- if ((digit = val % base) <= '9' - '0') {
- *end-- = '0' + digit;
- } else {
- *end-- = 'a' + (digit - ('9' - '0') - 1);
- }
- }
-
- if (i == 0 && base == 16)
- *end-- = '0';
-
- if (base == 16)
- *end-- = 'x';
+ for (*end-- = '\0'; val; val /= 10)
+ *end-- = '0' + (val % 10);
- if (i == 0 || base == 8 || base == 16)
+ if (i == 0)
*end-- = '0';
- if (i < 0 && base == 10)
+ if (i < 0)
*end-- = '-';
regs[rd] = (uintptr_t)end + 1;
@@ -5886,7 +5613,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
dtrace_vstate_t *vstate = &state->dts_vstate;
dtrace_provider_t *prov = probe->dtpr_provider;
- uint64_t tracememsize = 0;
int committed = 0;
caddr_t tomax;
@@ -5907,7 +5633,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
#endif
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
- mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
@@ -5945,8 +5670,65 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
}
}
- if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb))
- continue;
+ if (ecb->dte_cond) {
+ /*
+ * If the dte_cond bits indicate that this
+ * consumer is only allowed to see user-mode firings
+ * of this probe, call the provider's dtps_usermode()
+ * entry point to check that the probe was fired
+ * while in a user context. Skip this ECB if that's
+ * not the case.
+ */
+ if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
+ prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg) == 0)
+ continue;
+
+ /*
+ * This is more subtle than it looks. We have to be
+ * absolutely certain that CRED() isn't going to
+ * change out from under us so it's only legit to
+ * examine that structure if we're in constrained
+ * situations. Currently, the only times we'll this
+ * check is if a non-super-user has enabled the
+ * profile or syscall providers -- providers that
+ * allow visibility of all processes. For the
+ * profile case, the check above will ensure that
+ * we're examining a user context.
+ */
+ if (ecb->dte_cond & DTRACE_COND_OWNER) {
+ cred_t *cr;
+ cred_t *s_cr =
+ ecb->dte_state->dts_cred.dcr_cred;
+ proc_t *proc;
+
+ ASSERT(s_cr != NULL);
+
+ if ((cr = CRED()) == NULL ||
+ s_cr->cr_uid != cr->cr_uid ||
+ s_cr->cr_uid != cr->cr_ruid ||
+ s_cr->cr_uid != cr->cr_suid ||
+ s_cr->cr_gid != cr->cr_gid ||
+ s_cr->cr_gid != cr->cr_rgid ||
+ s_cr->cr_gid != cr->cr_sgid ||
+ (proc = ttoproc(curthread)) == NULL ||
+ (proc->p_flag & SNOCD))
+ continue;
+ }
+
+ if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
+ cred_t *cr;
+ cred_t *s_cr =
+ ecb->dte_state->dts_cred.dcr_cred;
+
+ ASSERT(s_cr != NULL);
+
+ if ((cr = CRED()) == NULL ||
+ s_cr->cr_zone->zone_id !=
+ cr->cr_zone->zone_id)
+ continue;
+ }
+ }
if (now - state->dts_alive > dtrace_deadman_timeout) {
/*
@@ -5986,7 +5768,9 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
mstate.dtms_present |= DTRACE_MSTATE_EPID;
if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
- mstate.dtms_access |= DTRACE_ACCESS_KERNEL;
+ mstate.dtms_access = DTRACE_ACCESS_KERNEL;
+ else
+ mstate.dtms_access = 0;
if (pred != NULL) {
dtrace_difo_t *dp = pred->dtp_difo;
@@ -6046,8 +5830,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
switch (act->dta_kind) {
case DTRACEACT_STOP:
- if (dtrace_priv_proc_destructive(state,
- &mstate))
+ if (dtrace_priv_proc_destructive(state))
dtrace_action_stop();
continue;
@@ -6074,7 +5857,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_JSTACK:
case DTRACEACT_USTACK:
- if (!dtrace_priv_proc(state, &mstate))
+ if (!dtrace_priv_proc(state))
continue;
/*
@@ -6107,23 +5890,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
}
- /*
- * Clear the string space, since there's no
- * helper to do it for us.
- */
- if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) {
- int depth = DTRACE_USTACK_NFRAMES(
- rec->dtrd_arg);
- size_t strsize = DTRACE_USTACK_STRSIZE(
- rec->dtrd_arg);
- uint64_t *buf = (uint64_t *)(tomax +
- valoffs);
- void *strspace = &buf[depth + 1];
-
- dtrace_bzero(strspace,
- MIN(depth, strsize));
- }
-
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_getupcstack((uint64_t *)
(tomax + valoffs),
@@ -6177,8 +5943,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
case DTRACEACT_RAISE:
- if (dtrace_priv_proc_destructive(state,
- &mstate))
+ if (dtrace_priv_proc_destructive(state))
dtrace_action_raise(val);
continue;
@@ -6205,11 +5970,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_PRINTA:
case DTRACEACT_SYSTEM:
case DTRACEACT_FREOPEN:
- case DTRACEACT_TRACEMEM:
- break;
-
- case DTRACEACT_TRACEMEM_DYNSIZE:
- tracememsize = val;
break;
case DTRACEACT_SYM:
@@ -6223,7 +5983,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
case DTRACEACT_UADDR: {
struct pid *pid = curthread->t_procp->p_pidp;
- if (!dtrace_priv_proc(state, &mstate))
+ if (!dtrace_priv_proc(state))
continue;
DTRACE_STORE(uint64_t, tomax,
@@ -6275,12 +6035,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
uintptr_t end = valoffs + size;
- if (tracememsize != 0 &&
- valoffs + tracememsize < end) {
- end = valoffs + tracememsize;
- tracememsize = 0;
- }
-
if (!dtrace_vcanload((void *)(uintptr_t)val,
&dp->dtdo_rtype, &mstate, vstate))
continue;
@@ -7161,9 +6915,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
if ((priv & DTRACE_PRIV_KERNEL) &&
(priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
- pops->dtps_mode == NULL) {
+ pops->dtps_usermode == NULL) {
cmn_err(CE_WARN, "failed to register provider '%s': need "
- "dtps_mode() op for given privilege attributes", name);
+ "dtps_usermode() op for given privilege attributes", name);
return (EINVAL);
}
@@ -7260,7 +7014,7 @@ dtrace_unregister(dtrace_provider_id_t id)
{
dtrace_provider_t *old = (dtrace_provider_t *)id;
dtrace_provider_t *prev = NULL;
- int i, self = 0, noreap = 0;
+ int i, self = 0;
dtrace_probe_t *probe, *first = NULL;
if (old->dtpv_pops.dtps_enable ==
@@ -7317,31 +7071,14 @@ dtrace_unregister(dtrace_provider_id_t id)
continue;
/*
- * If we are trying to unregister a defunct provider, and the
- * provider was made defunct within the interval dictated by
- * dtrace_unregister_defunct_reap, we'll (asynchronously)
- * attempt to reap our enablings. To denote that the provider
- * should reattempt to unregister itself at some point in the
- * future, we will return a differentiable error code (EAGAIN
- * instead of EBUSY) in this case.
+ * We have at least one ECB; we can't remove this provider.
*/
- if (dtrace_gethrtime() - old->dtpv_defunct >
- dtrace_unregister_defunct_reap)
- noreap = 1;
-
if (!self) {
mutex_exit(&dtrace_lock);
mutex_exit(&mod_lock);
mutex_exit(&dtrace_provider_lock);
}
-
- if (noreap)
- return (EBUSY);
-
- (void) taskq_dispatch(dtrace_taskq,
- (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
-
- return (EAGAIN);
+ return (EBUSY);
}
/*
@@ -7432,7 +7169,7 @@ dtrace_invalidate(dtrace_provider_id_t id)
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
- pvp->dtpv_defunct = dtrace_gethrtime();
+ pvp->dtpv_defunct = 1;
mutex_exit(&dtrace_lock);
mutex_exit(&dtrace_provider_lock);
@@ -9639,35 +9376,6 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
break;
}
- case DTRACEAGG_LLQUANTIZE: {
- uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
- uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
- uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
- uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
- int64_t v;
-
- agg->dtag_initial = desc->dtad_arg;
- agg->dtag_aggregate = dtrace_aggregate_llquantize;
-
- if (factor < 2 || low >= high || nsteps < factor)
- goto err;
-
- /*
- * Now check that the number of steps evenly divides a power
- * of the factor. (This assures both integer bucket size and
- * linearity within each magnitude.)
- */
- for (v = factor; v < nsteps; v *= factor)
- continue;
-
- if ((v % nsteps) || (nsteps % factor))
- goto err;
-
- size = (dtrace_aggregate_llquantize_bucket(factor,
- low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
- break;
- }
-
case DTRACEAGG_AVG:
agg->dtag_aggregate = dtrace_aggregate_avg;
size = sizeof (uint64_t) * 2;
@@ -9837,14 +9545,12 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
case DTRACEACT_PRINTA:
case DTRACEACT_SYSTEM:
case DTRACEACT_FREOPEN:
- case DTRACEACT_DIFEXPR:
/*
* We know that our arg is a string -- turn it into a
* format.
*/
if (arg == NULL) {
- ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
- desc->dtad_kind == DTRACEACT_DIFEXPR);
+ ASSERT(desc->dtad_kind == DTRACEACT_PRINTA);
format = 0;
} else {
ASSERT(arg != NULL);
@@ -9855,8 +9561,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
/*FALLTHROUGH*/
case DTRACEACT_LIBACT:
- case DTRACEACT_TRACEMEM:
- case DTRACEACT_TRACEMEM_DYNSIZE:
+ case DTRACEACT_DIFEXPR:
if (dp == NULL)
return (EINVAL);
@@ -10339,7 +10044,6 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
caddr_t tomax = buf->dtb_tomax;
caddr_t xamot = buf->dtb_xamot;
dtrace_icookie_t cookie;
- hrtime_t now = dtrace_gethrtime();
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
@@ -10355,8 +10059,6 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
buf->dtb_drops = 0;
buf->dtb_errors = 0;
buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
- buf->dtb_interval = now - buf->dtb_switched;
- buf->dtb_switched = now;
dtrace_interrupt_enable(cookie);
}
@@ -10389,17 +10091,14 @@ dtrace_buffer_activate(dtrace_state_t *state)
static int
dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
- processorid_t cpu, int *factor)
+ processorid_t cpu)
{
cpu_t *cp;
dtrace_buffer_t *buf;
- int allocated = 0, desired = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(MUTEX_HELD(&dtrace_lock));
- *factor = 1;
-
if (size > dtrace_nonroot_maxsize &&
!PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
return (EFBIG);
@@ -10424,8 +10123,7 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
ASSERT(buf->dtb_xamot == NULL);
- if ((buf->dtb_tomax = kmem_zalloc(size,
- KM_NOSLEEP | KM_NORMALPRI)) == NULL)
+ if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
goto err;
buf->dtb_size = size;
@@ -10436,8 +10134,7 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
if (flags & DTRACEBUF_NOSWITCH)
continue;
- if ((buf->dtb_xamot = kmem_zalloc(size,
- KM_NOSLEEP | KM_NORMALPRI)) == NULL)
+ if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
goto err;
} while ((cp = cp->cpu_next) != cpu_list);
@@ -10451,19 +10148,16 @@ err:
continue;
buf = &bufs[cp->cpu_id];
- desired += 2;
if (buf->dtb_xamot != NULL) {
ASSERT(buf->dtb_tomax != NULL);
ASSERT(buf->dtb_size == size);
kmem_free(buf->dtb_xamot, size);
- allocated++;
}
if (buf->dtb_tomax != NULL) {
ASSERT(buf->dtb_size == size);
kmem_free(buf->dtb_tomax, size);
- allocated++;
}
buf->dtb_tomax = NULL;
@@ -10471,8 +10165,6 @@ err:
buf->dtb_size = 0;
} while ((cp = cp->cpu_next) != cpu_list);
- *factor = desired / (allocated > 0 ? allocated : 1);
-
return (ENOMEM);
}
@@ -10774,36 +10466,6 @@ dtrace_buffer_polish(dtrace_buffer_t *buf)
}
}
-/*
- * This routine determines if data generated at the specified time has likely
- * been entirely consumed at user-level. This routine is called to determine
- * if an ECB on a defunct probe (but for an active enabling) can be safely
- * disabled and destroyed.
- */
-static int
-dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
-{
- int i;
-
- for (i = 0; i < NCPU; i++) {
- dtrace_buffer_t *buf = &bufs[i];
-
- if (buf->dtb_size == 0)
- continue;
-
- if (buf->dtb_flags & DTRACEBUF_RING)
- return (0);
-
- if (!buf->dtb_switched && buf->dtb_offset != 0)
- return (0);
-
- if (buf->dtb_switched - buf->dtb_interval < when)
- return (0);
- }
-
- return (1);
-}
-
static void
dtrace_buffer_free(dtrace_buffer_t *bufs)
{
@@ -11189,12 +10851,10 @@ dtrace_enabling_matchall(void)
* block pending our completion.
*/
for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
- dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred;
- cred_t *cr = dcr->dcr_cred;
- zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0;
+ cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred;
- if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL &&
- (zone == GLOBAL_ZONEID || getzoneid() == zone)))
+ if (INGLOBALZONE(curproc) ||
+ cr != NULL && getzoneid() == crgetzoneid(cr))
(void) dtrace_enabling_match(enab, NULL);
}
@@ -11294,85 +10954,6 @@ retry:
}
/*
- * Called to reap ECBs that are attached to probes from defunct providers.
- */
-static void
-dtrace_enabling_reap(void)
-{
- dtrace_provider_t *prov;
- dtrace_probe_t *probe;
- dtrace_ecb_t *ecb;
- hrtime_t when;
- int i;
-
- mutex_enter(&cpu_lock);
- mutex_enter(&dtrace_lock);
-
- for (i = 0; i < dtrace_nprobes; i++) {
- if ((probe = dtrace_probes[i]) == NULL)
- continue;
-
- if (probe->dtpr_ecb == NULL)
- continue;
-
- prov = probe->dtpr_provider;
-
- if ((when = prov->dtpv_defunct) == 0)
- continue;
-
- /*
- * We have ECBs on a defunct provider: we want to reap these
- * ECBs to allow the provider to unregister. The destruction
- * of these ECBs must be done carefully: if we destroy the ECB
- * and the consumer later wishes to consume an EPID that
- * corresponds to the destroyed ECB (and if the EPID metadata
- * has not been previously consumed), the consumer will abort
- * processing on the unknown EPID. To reduce (but not, sadly,
- * eliminate) the possibility of this, we will only destroy an
- * ECB for a defunct provider if, for the state that
- * corresponds to the ECB:
- *
- * (a) There is no speculative tracing (which can effectively
- * cache an EPID for an arbitrary amount of time).
- *
- * (b) The principal buffers have been switched twice since the
- * provider became defunct.
- *
- * (c) The aggregation buffers are of zero size or have been
- * switched twice since the provider became defunct.
- *
- * We use dts_speculates to determine (a) and call a function
- * (dtrace_buffer_consumed()) to determine (b) and (c). Note
- * that as soon as we've been unable to destroy one of the ECBs
- * associated with the probe, we quit trying -- reaping is only
- * fruitful in as much as we can destroy all ECBs associated
- * with the defunct provider's probes.
- */
- while ((ecb = probe->dtpr_ecb) != NULL) {
- dtrace_state_t *state = ecb->dte_state;
- dtrace_buffer_t *buf = state->dts_buffer;
- dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
-
- if (state->dts_speculates)
- break;
-
- if (!dtrace_buffer_consumed(buf, when))
- break;
-
- if (!dtrace_buffer_consumed(aggbuf, when))
- break;
-
- dtrace_ecb_disable(ecb);
- ASSERT(probe->dtpr_ecb != ecb);
- dtrace_ecb_destroy(ecb);
- }
- }
-
- mutex_exit(&dtrace_lock);
- mutex_exit(&cpu_lock);
-}
-
-/*
* DTrace DOF Functions
*/
/*ARGSUSED*/
@@ -11877,20 +11458,15 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
(uintptr_t)sec->dofs_offset + offs);
kind = (dtrace_actkind_t)desc->dofa_kind;
- if ((DTRACEACT_ISPRINTFLIKE(kind) &&
+ if (DTRACEACT_ISPRINTFLIKE(kind) &&
(kind != DTRACEACT_PRINTA ||
- desc->dofa_strtab != DOF_SECIDX_NONE)) ||
- (kind == DTRACEACT_DIFEXPR &&
desc->dofa_strtab != DOF_SECIDX_NONE)) {
dof_sec_t *strtab;
char *str, *fmt;
uint64_t i;
/*
- * The argument to these actions is an index into the
- * DOF string table. For printf()-like actions, this
- * is the format string. For print(), this is the
- * CTF type of the expression result.
+ * printf()-like actions must have a format string.
*/
if ((strtab = dtrace_dof_sect(dof,
DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
@@ -12376,7 +11952,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
size = min;
- if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
+ if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
return (ENOMEM);
dstate->dtds_size = size;
@@ -12738,7 +12314,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
{
dtrace_optval_t *opt = state->dts_options, size;
processorid_t cpu;
- int flags = 0, rval, factor, divisor = 1;
+ int flags = 0, rval;
ASSERT(MUTEX_HELD(&dtrace_lock));
ASSERT(MUTEX_HELD(&cpu_lock));
@@ -12768,7 +12344,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
flags |= DTRACEBUF_INACTIVE;
}
- for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
+ for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) {
/*
* The size must be 8-byte aligned. If the size is not 8-byte
* aligned, drop it down by the difference.
@@ -12786,7 +12362,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
return (E2BIG);
}
- rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
+ rval = dtrace_buffer_alloc(buf, size, flags, cpu);
if (rval != ENOMEM) {
opt[which] = size;
@@ -12795,9 +12371,6 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
return (rval);
-
- for (divisor = 2; divisor < factor; divisor <<= 1)
- continue;
}
return (ENOMEM);
@@ -12897,8 +12470,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
goto out;
}
- spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
- KM_NOSLEEP | KM_NORMALPRI);
+ spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP);
if (spec == NULL) {
rval = ENOMEM;
@@ -12909,8 +12481,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
state->dts_nspeculations = (int)nspec;
for (i = 0; i < nspec; i++) {
- if ((buf = kmem_zalloc(bufsize,
- KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
+ if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) {
rval = ENOMEM;
goto err;
}
diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c
index 8cfe4cd33beb..42263e4ef274 100644
--- a/uts/common/dtrace/fasttrap.c
+++ b/uts/common/dtrace/fasttrap.c
@@ -24,9 +24,6 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
- */
#include <sys/atomic.h>
#include <sys/errno.h>
@@ -276,7 +273,7 @@ fasttrap_pid_cleanup_cb(void *data)
fasttrap_provider_t **fpp, *fp;
fasttrap_bucket_t *bucket;
dtrace_provider_id_t provid;
- int i, later, rval;
+ int i, later;
static volatile int in = 0;
ASSERT(in == 0);
@@ -338,13 +335,9 @@ fasttrap_pid_cleanup_cb(void *data)
* clean out the unenabled probes.
*/
provid = fp->ftp_provid;
- if ((rval = dtrace_unregister(provid)) != 0) {
+ if (dtrace_unregister(provid) != 0) {
if (fasttrap_total > fasttrap_max / 2)
(void) dtrace_condense(provid);
-
- if (rval == EAGAIN)
- fp->ftp_marked = 1;
-
later += fp->ftp_marked;
fpp = &fp->ftp_next;
} else {
@@ -370,16 +363,12 @@ fasttrap_pid_cleanup_cb(void *data)
* get a chance to do that work if and when the timeout is reenabled
* (if detach fails).
*/
- if (later > 0) {
- if (fasttrap_timeout != (timeout_id_t)1) {
- fasttrap_timeout =
- timeout(&fasttrap_pid_cleanup_cb, NULL, hz);
- }
-
+ if (later > 0 && fasttrap_timeout != (timeout_id_t)1)
+ fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, hz);
+ else if (later > 0)
fasttrap_cleanup_work = 1;
- } else {
+ else
fasttrap_timeout = 0;
- }
mutex_exit(&fasttrap_cleanup_mtx);
in = 0;
diff --git a/uts/common/dtrace/profile.c b/uts/common/dtrace/profile.c
index fc809d3579a5..c1a2d1f1c12f 100644
--- a/uts/common/dtrace/profile.c
+++ b/uts/common/dtrace/profile.c
@@ -23,9 +23,6 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
- */
#include <sys/errno.h>
#include <sys/stat.h>
@@ -411,25 +408,9 @@ profile_disable(void *arg, dtrace_id_t id, void *parg)
/*ARGSUSED*/
static int
-profile_mode(void *arg, dtrace_id_t id, void *parg)
+profile_usermode(void *arg, dtrace_id_t id, void *parg)
{
- profile_probe_t *prof = parg;
- int mode;
-
- if (CPU->cpu_profile_pc != 0) {
- mode = DTRACE_MODE_KERNEL;
- } else {
- mode = DTRACE_MODE_USER;
- }
-
- if (prof->prof_kind == PROF_TICK) {
- mode |= DTRACE_MODE_NOPRIV_RESTRICT;
- } else {
- ASSERT(prof->prof_kind == PROF_PROFILE);
- mode |= DTRACE_MODE_NOPRIV_DROP;
- }
-
- return (mode);
+ return (CPU->cpu_profile_pc == 0);
}
static dtrace_pattr_t profile_attr = {
@@ -449,7 +430,7 @@ static dtrace_pops_t profile_pops = {
NULL,
NULL,
NULL,
- profile_mode,
+ profile_usermode,
profile_destroy
};
diff --git a/uts/common/fs/zfs/arc.c b/uts/common/fs/zfs/arc.c
index bd6bda5c9bfc..a82718e8bc6e 100644
--- a/uts/common/fs/zfs/arc.c
+++ b/uts/common/fs/zfs/arc.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -80,9 +78,9 @@
* types of locks: 1) the hash table lock array, and 2) the
* arc list locks.
*
- * Buffers do not have their own mutexes, rather they rely on the
- * hash table mutexes for the bulk of their protection (i.e. most
- * fields in the arc_buf_hdr_t are protected by these mutexes).
+ * Buffers do not have their own mutexs, rather they rely on the
+ * hash table mutexs for the bulk of their protection (i.e. most
+ * fields in the arc_buf_hdr_t are protected by these mutexs).
*
* buf_hash_find() returns the appropriate mutex (held) when it
* locates the requested buffer in the hash table. It returns
@@ -1219,7 +1217,7 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
ASSERT(BUF_EMPTY(hdr));
hdr->b_size = size;
hdr->b_type = type;
- hdr->b_spa = spa_load_guid(spa);
+ hdr->b_spa = spa_guid(spa);
hdr->b_state = arc_anon;
hdr->b_arc_access = 0;
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
@@ -1921,7 +1919,7 @@ arc_flush(spa_t *spa)
uint64_t guid = 0;
if (spa)
- guid = spa_load_guid(spa);
+ guid = spa_guid(spa);
while (list_head(&arc_mru->arcs_list[ARC_BUFC_DATA])) {
(void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA);
@@ -1982,11 +1980,6 @@ arc_shrink(void)
arc_adjust();
}
-/*
- * Determine if the system is under memory pressure and is asking
- * to reclaim memory. A return value of 1 indicates that the system
- * is under memory pressure and that the arc should adjust accordingly.
- */
static int
arc_reclaim_needed(void)
{
@@ -2034,24 +2027,11 @@ arc_reclaim_needed(void)
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
- if (vmem_size(heap_arena, VMEM_FREE) <
- (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2))
+ if (btop(vmem_size(heap_arena, VMEM_FREE)) <
+ (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
return (1);
#endif
- /*
- * If zio data pages are being allocated out of a separate heap segment,
- * then enforce that the size of available vmem for this arena remains
- * above about 1/16th free.
- *
- * Note: The 1/16th arena free requirement was put in place
- * to aggressively evict memory from the arc in order to avoid
- * memory fragmentation issues.
- */
- if (zio_arena != NULL &&
- vmem_size(zio_arena, VMEM_FREE) <
- (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
- return (1);
#else
if (spa_get_random(100) == 0)
return (1);
@@ -2103,13 +2083,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat)
}
kmem_cache_reap_now(buf_cache);
kmem_cache_reap_now(hdr_cache);
-
- /*
- * Ask the vmem areana to reclaim unused memory from its
- * quantum caches.
- */
- if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
- vmem_qcache_reap(zio_arena);
}
static void
@@ -2243,6 +2216,18 @@ arc_evict_needed(arc_buf_contents_t type)
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
return (1);
+#ifdef _KERNEL
+ /*
+ * If zio data pages are being allocated out of a separate heap segment,
+ * then enforce that the size of available vmem for this area remains
+ * above about 1/32nd free.
+ */
+ if (type == ARC_BUFC_DATA && zio_arena != NULL &&
+ vmem_size(zio_arena, VMEM_FREE) <
+ (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
+ return (1);
+#endif
+
if (arc_reclaim_needed())
return (1);
@@ -2547,11 +2532,9 @@ arc_read_done(zio_t *zio)
callback_list = hdr->b_acb;
ASSERT(callback_list != NULL);
if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
- dmu_object_byteswap_t bswap =
- DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
byteswap_uint64_array :
- dmu_ot_byteswap[bswap].ob_func;
+ dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
func(buf->b_data, hdr->b_size);
}
@@ -2636,7 +2619,7 @@ arc_read_done(zio_t *zio)
}
/*
- * "Read" the block at the specified DVA (in bp) via the
+ * "Read" the block block at the specified DVA (in bp) via the
* cache. If the block is found in the cache, invoke the provided
* callback immediately and return. Note that the `zio' parameter
* in the callback will be NULL in this case, since no IO was
@@ -2693,7 +2676,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_buf_t *buf;
kmutex_t *hash_lock;
zio_t *rzio;
- uint64_t guid = spa_load_guid(spa);
+ uint64_t guid = spa_guid(spa);
top:
hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
@@ -4251,7 +4234,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
boolean_t have_lock, full;
l2arc_write_callback_t *cb;
zio_t *pio, *wzio;
- uint64_t guid = spa_load_guid(spa);
+ uint64_t guid = spa_guid(spa);
ASSERT(dev->l2ad_vdev != NULL);
diff --git a/uts/common/fs/zfs/bpobj.c b/uts/common/fs/zfs/bpobj.c
index 022921c666b8..72be31235607 100644
--- a/uts/common/fs/zfs/bpobj.c
+++ b/uts/common/fs/zfs/bpobj.c
@@ -20,13 +20,11 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/bpobj.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
-#include <sys/dsl_pool.h>
uint64_t
bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
@@ -442,10 +440,7 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
struct space_range_arg *sra = arg;
if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
- if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
- sra->used += bp_get_dsize_sync(sra->spa, bp);
- else
- sra->used += bp_get_dsize(sra->spa, bp);
+ sra->used += bp_get_dsize_sync(sra->spa, bp);
sra->comp += BP_GET_PSIZE(bp);
sra->uncomp += BP_GET_UCSIZE(bp);
}
diff --git a/uts/common/fs/zfs/bptree.c b/uts/common/fs/zfs/bptree.c
deleted file mode 100644
index 8c5a7d40ef37..000000000000
--- a/uts/common/fs/zfs/bptree.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#include <sys/arc.h>
-#include <sys/bptree.h>
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_pool.h>
-#include <sys/dnode.h>
-#include <sys/refcount.h>
-#include <sys/spa.h>
-
-/*
- * A bptree is a queue of root block pointers from destroyed datasets. When a
- * dataset is destroyed its root block pointer is put on the end of the pool's
- * bptree queue so the dataset's blocks can be freed asynchronously by
- * dsl_scan_sync. This allows the delete operation to finish without traversing
- * all the dataset's blocks.
- *
- * Note that while bt_begin and bt_end are only ever incremented in this code
- * they are effectively reset to 0 every time the entire bptree is freed because
- * the bptree's object is destroyed and re-created.
- */
-
-struct bptree_args {
- bptree_phys_t *ba_phys; /* data in bonus buffer, dirtied if freeing */
- boolean_t ba_free; /* true if freeing during traversal */
-
- bptree_itor_t *ba_func; /* function to call for each blockpointer */
- void *ba_arg; /* caller supplied argument to ba_func */
- dmu_tx_t *ba_tx; /* caller supplied tx, NULL if not freeing */
-} bptree_args_t;
-
-uint64_t
-bptree_alloc(objset_t *os, dmu_tx_t *tx)
-{
- uint64_t obj;
- dmu_buf_t *db;
- bptree_phys_t *bt;
-
- obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
- SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
- sizeof (bptree_phys_t), tx);
-
- /*
- * Bonus buffer contents are already initialized to 0, but for
- * readability we make it explicit.
- */
- VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
- bt = db->db_data;
- bt->bt_begin = 0;
- bt->bt_end = 0;
- bt->bt_bytes = 0;
- bt->bt_comp = 0;
- bt->bt_uncomp = 0;
- dmu_buf_rele(db, FTAG);
-
- return (obj);
-}
-
-int
-bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
-{
- dmu_buf_t *db;
- bptree_phys_t *bt;
-
- VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
- bt = db->db_data;
- ASSERT3U(bt->bt_begin, ==, bt->bt_end);
- ASSERT3U(bt->bt_bytes, ==, 0);
- ASSERT3U(bt->bt_comp, ==, 0);
- ASSERT3U(bt->bt_uncomp, ==, 0);
- dmu_buf_rele(db, FTAG);
-
- return (dmu_object_free(os, obj, tx));
-}
-
-void
-bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
- uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx)
-{
- dmu_buf_t *db;
- bptree_phys_t *bt;
- bptree_entry_phys_t bte;
-
- /*
- * bptree objects are in the pool mos, therefore they can only be
- * modified in syncing context. Furthermore, this is only modified
- * by the sync thread, so no locking is necessary.
- */
- ASSERT(dmu_tx_is_syncing(tx));
-
- VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
- bt = db->db_data;
-
- bte.be_birth_txg = birth_txg;
- bte.be_bp = *bp;
- bzero(&bte.be_zb, sizeof (bte.be_zb));
- dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx);
-
- dmu_buf_will_dirty(db, tx);
- bt->bt_end++;
- bt->bt_bytes += bytes;
- bt->bt_comp += comp;
- bt->bt_uncomp += uncomp;
- dmu_buf_rele(db, FTAG);
-}
-
-/* ARGSUSED */
-static int
-bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
- const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
-{
- int err;
- struct bptree_args *ba = arg;
-
- if (bp == NULL)
- return (0);
-
- err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
- if (err == 0 && ba->ba_free) {
- ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp);
- ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp);
- ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp);
- }
- return (err);
-}
-
-int
-bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
- void *arg, dmu_tx_t *tx)
-{
- int err;
- uint64_t i;
- dmu_buf_t *db;
- struct bptree_args ba;
-
- ASSERT(!free || dmu_tx_is_syncing(tx));
-
- err = dmu_bonus_hold(os, obj, FTAG, &db);
- if (err != 0)
- return (err);
-
- if (free)
- dmu_buf_will_dirty(db, tx);
-
- ba.ba_phys = db->db_data;
- ba.ba_free = free;
- ba.ba_func = func;
- ba.ba_arg = arg;
- ba.ba_tx = tx;
-
- err = 0;
- for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
- bptree_entry_phys_t bte;
-
- ASSERT(!free || i == ba.ba_phys->bt_begin);
-
- err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
- &bte, DMU_READ_NO_PREFETCH);
- if (err != 0)
- break;
-
- err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
- bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
- bptree_visit_cb, &ba);
- if (free) {
- ASSERT(err == 0 || err == ERESTART);
- if (err != 0) {
- /* save bookmark for future resume */
- ASSERT3U(bte.be_zb.zb_objset, ==,
- ZB_DESTROYED_OBJSET);
- ASSERT3U(bte.be_zb.zb_level, ==, 0);
- dmu_write(os, obj, i * sizeof (bte),
- sizeof (bte), &bte, tx);
- break;
- } else {
- ba.ba_phys->bt_begin++;
- (void) dmu_free_range(os, obj,
- i * sizeof (bte), sizeof (bte), tx);
- }
- }
- }
-
- ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end);
-
- /* if all blocks are free there should be no used space */
- if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
- ASSERT3U(ba.ba_phys->bt_bytes, ==, 0);
- ASSERT3U(ba.ba_phys->bt_comp, ==, 0);
- ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0);
- }
-
- dmu_buf_rele(db, FTAG);
-
- return (err);
-}
diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c
index 145cc01c67dd..9c4e0296db2b 100644
--- a/uts/common/fs/zfs/dbuf.c
+++ b/uts/common/fs/zfs/dbuf.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -228,7 +226,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
boolean_t is_metadata;
DB_DNODE_ENTER(db);
- is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
+ is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
DB_DNODE_EXIT(db);
return (is_metadata);
@@ -1302,17 +1300,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* it, since one of the current holders may be in the
* middle of an update. Note that users of dbuf_undirty()
* should not place a hold on the dbuf before the call.
- * Also note: we can get here with a spill block, so
- * test for that similar to how dbuf_dirty does.
*/
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
mutex_exit(&db->db_mtx);
/* Make sure we don't toss this buffer at sync phase */
- if (db->db_blkid != DMU_SPILL_BLKID) {
- mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, db->db_blkid, 1, tx);
- mutex_exit(&dn->dn_mtx);
- }
+ mutex_enter(&dn->dn_mtx);
+ dnode_clear_range(dn, db->db_blkid, 1, tx);
+ mutex_exit(&dn->dn_mtx);
DB_DNODE_EXIT(db);
return (0);
}
@@ -1325,18 +1319,11 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
*drp = dr->dr_next;
- /*
- * Note that there are three places in dbuf_dirty()
- * where this dirty record may be put on a list.
- * Make sure to do a list_remove corresponding to
- * every one of those list_insert calls.
- */
if (dr->dr_parent) {
mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
- } else if (db->db_blkid == DMU_SPILL_BLKID ||
- db->db_level+1 == dn->dn_nlevels) {
+ } else if (db->db_level+1 == dn->dn_nlevels) {
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
mutex_enter(&dn->dn_mtx);
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
diff --git a/uts/common/fs/zfs/ddt.c b/uts/common/fs/zfs/ddt.c
index b3ec3ccbd27c..718331496765 100644
--- a/uts/common/fs/zfs/ddt.c
+++ b/uts/common/fs/zfs/ddt.c
@@ -21,7 +21,6 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -1062,9 +1061,11 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
if (spa->spa_ddt_stat_object == 0) {
- spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
- DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_DDT_STATS, tx);
+ spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
+ DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
+ VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
+ &spa->spa_ddt_stat_object, tx) == 0);
}
while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
diff --git a/uts/common/fs/zfs/dmu.c b/uts/common/fs/zfs/dmu.c
index 94fa52f40d4f..39234eba53b2 100644
--- a/uts/common/fs/zfs/dmu.c
+++ b/uts/common/fs/zfs/dmu.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -47,73 +46,60 @@
#endif
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
- { DMU_BSWAP_UINT8, TRUE, "unallocated" },
- { DMU_BSWAP_ZAP, TRUE, "object directory" },
- { DMU_BSWAP_UINT64, TRUE, "object array" },
- { DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
- { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
- { DMU_BSWAP_UINT64, TRUE, "bpobj" },
- { DMU_BSWAP_UINT64, TRUE, "bpobj header" },
- { DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
- { DMU_BSWAP_UINT64, TRUE, "SPA space map" },
- { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" },
- { DMU_BSWAP_DNODE, TRUE, "DMU dnode" },
- { DMU_BSWAP_OBJSET, TRUE, "DMU objset" },
- { DMU_BSWAP_UINT64, TRUE, "DSL directory" },
- { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"},
- { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" },
- { DMU_BSWAP_ZAP, TRUE, "DSL props" },
- { DMU_BSWAP_UINT64, TRUE, "DSL dataset" },
- { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" },
- { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" },
- { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS directory" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS master node" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" },
- { DMU_BSWAP_UINT8, FALSE, "zvol object" },
- { DMU_BSWAP_ZAP, TRUE, "zvol prop" },
- { DMU_BSWAP_UINT8, FALSE, "other uint8[]" },
- { DMU_BSWAP_UINT64, FALSE, "other uint64[]" },
- { DMU_BSWAP_ZAP, TRUE, "other ZAP" },
- { DMU_BSWAP_ZAP, TRUE, "persistent error log" },
- { DMU_BSWAP_UINT8, TRUE, "SPA history" },
- { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" },
- { DMU_BSWAP_ZAP, TRUE, "Pool properties" },
- { DMU_BSWAP_ZAP, TRUE, "DSL permissions" },
- { DMU_BSWAP_ACL, TRUE, "ZFS ACL" },
- { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" },
- { DMU_BSWAP_UINT8, TRUE, "FUID table" },
- { DMU_BSWAP_UINT64, TRUE, "FUID table size" },
- { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"},
- { DMU_BSWAP_ZAP, TRUE, "scan work queue" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" },
- { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"},
- { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" },
- { DMU_BSWAP_ZAP, TRUE, "DDT statistics" },
- { DMU_BSWAP_UINT8, TRUE, "System attributes" },
- { DMU_BSWAP_ZAP, TRUE, "SA master node" },
- { DMU_BSWAP_ZAP, TRUE, "SA attr registration" },
- { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" },
- { DMU_BSWAP_ZAP, TRUE, "scan translations" },
- { DMU_BSWAP_UINT8, FALSE, "deduplicated block" },
- { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" },
- { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" },
- { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" },
- { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" }
-};
-
-const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
- { byteswap_uint8_array, "uint8" },
- { byteswap_uint16_array, "uint16" },
- { byteswap_uint32_array, "uint32" },
- { byteswap_uint64_array, "uint64" },
- { zap_byteswap, "zap" },
- { dnode_buf_byteswap, "dnode" },
- { dmu_objset_byteswap, "objset" },
- { zfs_znode_byteswap, "znode" },
- { zfs_oldacl_byteswap, "oldacl" },
- { zfs_acl_byteswap, "acl" }
+ { byteswap_uint8_array, TRUE, "unallocated" },
+ { zap_byteswap, TRUE, "object directory" },
+ { byteswap_uint64_array, TRUE, "object array" },
+ { byteswap_uint8_array, TRUE, "packed nvlist" },
+ { byteswap_uint64_array, TRUE, "packed nvlist size" },
+ { byteswap_uint64_array, TRUE, "bpobj" },
+ { byteswap_uint64_array, TRUE, "bpobj header" },
+ { byteswap_uint64_array, TRUE, "SPA space map header" },
+ { byteswap_uint64_array, TRUE, "SPA space map" },
+ { byteswap_uint64_array, TRUE, "ZIL intent log" },
+ { dnode_buf_byteswap, TRUE, "DMU dnode" },
+ { dmu_objset_byteswap, TRUE, "DMU objset" },
+ { byteswap_uint64_array, TRUE, "DSL directory" },
+ { zap_byteswap, TRUE, "DSL directory child map"},
+ { zap_byteswap, TRUE, "DSL dataset snap map" },
+ { zap_byteswap, TRUE, "DSL props" },
+ { byteswap_uint64_array, TRUE, "DSL dataset" },
+ { zfs_znode_byteswap, TRUE, "ZFS znode" },
+ { zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" },
+ { byteswap_uint8_array, FALSE, "ZFS plain file" },
+ { zap_byteswap, TRUE, "ZFS directory" },
+ { zap_byteswap, TRUE, "ZFS master node" },
+ { zap_byteswap, TRUE, "ZFS delete queue" },
+ { byteswap_uint8_array, FALSE, "zvol object" },
+ { zap_byteswap, TRUE, "zvol prop" },
+ { byteswap_uint8_array, FALSE, "other uint8[]" },
+ { byteswap_uint64_array, FALSE, "other uint64[]" },
+ { zap_byteswap, TRUE, "other ZAP" },
+ { zap_byteswap, TRUE, "persistent error log" },
+ { byteswap_uint8_array, TRUE, "SPA history" },
+ { byteswap_uint64_array, TRUE, "SPA history offsets" },
+ { zap_byteswap, TRUE, "Pool properties" },
+ { zap_byteswap, TRUE, "DSL permissions" },
+ { zfs_acl_byteswap, TRUE, "ZFS ACL" },
+ { byteswap_uint8_array, TRUE, "ZFS SYSACL" },
+ { byteswap_uint8_array, TRUE, "FUID table" },
+ { byteswap_uint64_array, TRUE, "FUID table size" },
+ { zap_byteswap, TRUE, "DSL dataset next clones"},
+ { zap_byteswap, TRUE, "scan work queue" },
+ { zap_byteswap, TRUE, "ZFS user/group used" },
+ { zap_byteswap, TRUE, "ZFS user/group quota" },
+ { zap_byteswap, TRUE, "snapshot refcount tags"},
+ { zap_byteswap, TRUE, "DDT ZAP algorithm" },
+ { zap_byteswap, TRUE, "DDT statistics" },
+ { byteswap_uint8_array, TRUE, "System attributes" },
+ { zap_byteswap, TRUE, "SA master node" },
+ { zap_byteswap, TRUE, "SA attr registration" },
+ { zap_byteswap, TRUE, "SA attr layouts" },
+ { zap_byteswap, TRUE, "scan translations" },
+ { byteswap_uint8_array, FALSE, "deduplicated block" },
+ { zap_byteswap, TRUE, "DSL deadlist map" },
+ { byteswap_uint64_array, TRUE, "DSL deadlist map hdr" },
+ { zap_byteswap, TRUE, "DSL dir clones" },
+ { byteswap_uint64_array, TRUE, "bpobj subobj" },
};
int
@@ -190,7 +176,7 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
- if (!DMU_OT_IS_VALID(type)) {
+ if (type > DMU_OT_NUMTYPES) {
error = EINVAL;
} else if (dn->dn_bonus != db) {
error = EINVAL;
@@ -1517,7 +1503,7 @@ void
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
{
dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
- boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
+ boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
(wp & WP_SPILL));
enum zio_checksum checksum = os->os_checksum;
enum zio_compress compress = os->os_compress;
diff --git a/uts/common/fs/zfs/dmu_objset.c b/uts/common/fs/zfs/dmu_objset.c
index 09c4ecf4dd58..7caebd979f02 100644
--- a/uts/common/fs/zfs/dmu_objset.c
+++ b/uts/common/fs/zfs/dmu_objset.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -700,33 +699,30 @@ dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_t *spa = dd->dd_pool->dp_spa;
struct oscarg *oa = arg2;
uint64_t obj;
- dsl_dataset_t *ds;
- blkptr_t *bp;
ASSERT(dmu_tx_is_syncing(tx));
obj = dsl_dataset_create_sync(dd, oa->lastname,
oa->clone_origin, oa->flags, oa->cr, tx);
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds));
- bp = dsl_dataset_get_blkptr(ds);
- if (BP_IS_HOLE(bp)) {
- objset_t *os =
- dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
+ if (oa->clone_origin == NULL) {
+ dsl_pool_t *dp = dd->dd_pool;
+ dsl_dataset_t *ds;
+ blkptr_t *bp;
+ objset_t *os;
+
+ VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
+ bp = dsl_dataset_get_blkptr(ds);
+ ASSERT(BP_IS_HOLE(bp));
+
+ os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
if (oa->userfunc)
oa->userfunc(os, oa->userarg, oa->cr, tx);
+ dsl_dataset_rele(ds, FTAG);
}
- if (oa->clone_origin == NULL) {
- spa_history_log_internal_ds(ds, "create", tx, "");
- } else {
- char namebuf[MAXNAMELEN];
- dsl_dataset_name(oa->clone_origin, namebuf);
- spa_history_log_internal_ds(ds, "clone", tx,
- "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object);
- }
- dsl_dataset_rele(ds, FTAG);
+ spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj);
}
int
@@ -803,40 +799,34 @@ dmu_objset_destroy(const char *name, boolean_t defer)
return (error);
}
-typedef struct snapallarg {
- dsl_sync_task_group_t *saa_dstg;
- boolean_t saa_needsuspend;
- nvlist_t *saa_props;
-
- /* the following are used only if 'temporary' is set: */
- boolean_t saa_temporary;
- const char *saa_htag;
- struct dsl_ds_holdarg *saa_ha;
- dsl_dataset_t *saa_newds;
-} snapallarg_t;
-
-typedef struct snaponearg {
- const char *soa_longname; /* long snap name */
- const char *soa_snapname; /* short snap name */
- snapallarg_t *soa_saa;
-} snaponearg_t;
+struct snaparg {
+ dsl_sync_task_group_t *dstg;
+ char *snapname;
+ char *htag;
+ char failed[MAXPATHLEN];
+ boolean_t recursive;
+ boolean_t needsuspend;
+ boolean_t temporary;
+ nvlist_t *props;
+ struct dsl_ds_holdarg *ha; /* only needed in the temporary case */
+ dsl_dataset_t *newds;
+};
static int
snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
objset_t *os = arg1;
- snaponearg_t *soa = arg2;
- snapallarg_t *saa = soa->soa_saa;
+ struct snaparg *sn = arg2;
int error;
/* The props have already been checked by zfs_check_userprops(). */
error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
- soa->soa_snapname, tx);
+ sn->snapname, tx);
if (error)
return (error);
- if (saa->saa_temporary) {
+ if (sn->temporary) {
/*
* Ideally we would just call
* dsl_dataset_user_hold_check() and
@@ -854,13 +844,12 @@ snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
* Not checking number of tags because the tag will be
* unique, as it will be the only tag.
*/
- if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
+ if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
return (E2BIG);
- saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg),
- KM_SLEEP);
- saa->saa_ha->temphold = B_TRUE;
- saa->saa_ha->htag = saa->saa_htag;
+ sn->ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+ sn->ha->temphold = B_TRUE;
+ sn->ha->htag = sn->htag;
}
return (error);
}
@@ -870,25 +859,24 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
objset_t *os = arg1;
dsl_dataset_t *ds = os->os_dsl_dataset;
- snaponearg_t *soa = arg2;
- snapallarg_t *saa = soa->soa_saa;
+ struct snaparg *sn = arg2;
- dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx);
+ dsl_dataset_snapshot_sync(ds, sn->snapname, tx);
- if (saa->saa_props != NULL) {
+ if (sn->props) {
dsl_props_arg_t pa;
- pa.pa_props = saa->saa_props;
+ pa.pa_props = sn->props;
pa.pa_source = ZPROP_SRC_LOCAL;
dsl_props_set_sync(ds->ds_prev, &pa, tx);
}
- if (saa->saa_temporary) {
+ if (sn->temporary) {
struct dsl_ds_destroyarg da;
- dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx);
- kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg));
- saa->saa_ha = NULL;
- saa->saa_newds = ds->ds_prev;
+ dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx);
+ kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg));
+ sn->ha = NULL;
+ sn->newds = ds->ds_prev;
da.ds = ds->ds_prev;
da.defer = B_TRUE;
@@ -897,180 +885,131 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
static int
-snapshot_one_impl(const char *snapname, void *arg)
+dmu_objset_snapshot_one(const char *name, void *arg)
{
- char fsname[MAXPATHLEN];
- snapallarg_t *saa = arg;
- snaponearg_t *soa;
+ struct snaparg *sn = arg;
objset_t *os;
int err;
+ char *cp;
+
+ /*
+ * If the objset starts with a '%', then ignore it unless it was
+ * explicitly named (ie, not recursive). These hidden datasets
+ * are always inconsistent, and by not opening them here, we can
+ * avoid a race with dsl_dir_destroy_check().
+ */
+ cp = strrchr(name, '/');
+ if (cp && cp[1] == '%' && sn->recursive)
+ return (0);
- (void) strlcpy(fsname, snapname, sizeof (fsname));
- strchr(fsname, '@')[0] = '\0';
+ (void) strcpy(sn->failed, name);
+
+ /*
+ * Check permissions if we are doing a recursive snapshot. The
+ * permission checks for the starting dataset have already been
+ * performed in zfs_secpolicy_snapshot()
+ */
+ if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED())))
+ return (err);
- err = dmu_objset_hold(fsname, saa, &os);
+ err = dmu_objset_hold(name, sn, &os);
if (err != 0)
return (err);
/*
* If the objset is in an inconsistent state (eg, in the process
- * of being destroyed), don't snapshot it.
+ * of being destroyed), don't snapshot it. As with %hidden
+ * datasets, we return EBUSY if this name was explicitly
+ * requested (ie, not recursive), and otherwise ignore it.
*/
if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
- dmu_objset_rele(os, saa);
- return (EBUSY);
+ dmu_objset_rele(os, sn);
+ return (sn->recursive ? 0 : EBUSY);
}
- if (saa->saa_needsuspend) {
+ if (sn->needsuspend) {
err = zil_suspend(dmu_objset_zil(os));
if (err) {
- dmu_objset_rele(os, saa);
+ dmu_objset_rele(os, sn);
return (err);
}
}
-
- soa = kmem_zalloc(sizeof (*soa), KM_SLEEP);
- soa->soa_saa = saa;
- soa->soa_longname = snapname;
- soa->soa_snapname = strchr(snapname, '@') + 1;
-
- dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync,
- os, soa, 3);
+ dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync,
+ os, sn, 3);
return (0);
}
-/*
- * The snapshots must all be in the same pool.
- */
int
-dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
+dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
+ nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd)
{
dsl_sync_task_t *dst;
- snapallarg_t saa = { 0 };
+ struct snaparg sn;
spa_t *spa;
- int rv = 0;
+ minor_t minor;
int err;
- nvpair_t *pair;
- pair = nvlist_next_nvpair(snaps, NULL);
- if (pair == NULL)
- return (0);
+ (void) strcpy(sn.failed, fsname);
- err = spa_open(nvpair_name(pair), &spa, FTAG);
+ err = spa_open(fsname, &spa, FTAG);
if (err)
return (err);
- saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- saa.saa_props = props;
- saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
-
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- err = snapshot_one_impl(nvpair_name(pair), &saa);
- if (err != 0) {
- if (errors != NULL) {
- fnvlist_add_int32(errors,
- nvpair_name(pair), err);
- }
- rv = err;
- }
- }
-
- /*
- * If any call to snapshot_one_impl() failed, don't execute the
- * sync task. The error handling code below will clean up the
- * snaponearg_t from any successful calls to
- * snapshot_one_impl().
- */
- if (rv == 0)
- err = dsl_sync_task_group_wait(saa.saa_dstg);
- if (err != 0)
- rv = err;
- for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
- dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
- objset_t *os = dst->dst_arg1;
- snaponearg_t *soa = dst->dst_arg2;
- if (dst->dst_err != 0) {
- if (errors != NULL) {
- fnvlist_add_int32(errors,
- soa->soa_longname, dst->dst_err);
- }
- rv = dst->dst_err;
+ if (temporary) {
+ if (cleanup_fd < 0) {
+ spa_close(spa, FTAG);
+ return (EINVAL);
+ }
+ if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
+ spa_close(spa, FTAG);
+ return (err);
}
-
- if (saa.saa_needsuspend)
- zil_resume(dmu_objset_zil(os));
- dmu_objset_rele(os, &saa);
- kmem_free(soa, sizeof (*soa));
}
- dsl_sync_task_group_destroy(saa.saa_dstg);
- spa_close(spa, FTAG);
- return (rv);
-}
-
-int
-dmu_objset_snapshot_one(const char *fsname, const char *snapname)
-{
- int err;
- char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
- nvlist_t *snaps = fnvlist_alloc();
-
- fnvlist_add_boolean(snaps, longsnap);
- err = dmu_objset_snapshot(snaps, NULL, NULL);
- fnvlist_free(snaps);
- strfree(longsnap);
- return (err);
-}
-
-int
-dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd)
-{
- dsl_sync_task_t *dst;
- snapallarg_t saa = { 0 };
- spa_t *spa;
- minor_t minor;
- int err;
-
- err = spa_open(snapname, &spa, FTAG);
- if (err)
- return (err);
- saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- saa.saa_htag = tag;
- saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
- saa.saa_temporary = B_TRUE;
-
- if (cleanup_fd < 0) {
- spa_close(spa, FTAG);
- return (EINVAL);
- }
- if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
- spa_close(spa, FTAG);
- return (err);
+ sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+ sn.snapname = snapname;
+ sn.htag = tag;
+ sn.props = props;
+ sn.recursive = recursive;
+ sn.needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+ sn.temporary = temporary;
+ sn.ha = NULL;
+ sn.newds = NULL;
+
+ if (recursive) {
+ err = dmu_objset_find(fsname,
+ dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN);
+ } else {
+ err = dmu_objset_snapshot_one(fsname, &sn);
}
- err = snapshot_one_impl(snapname, &saa);
-
if (err == 0)
- err = dsl_sync_task_group_wait(saa.saa_dstg);
+ err = dsl_sync_task_group_wait(sn.dstg);
- for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
- dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
+ for (dst = list_head(&sn.dstg->dstg_tasks); dst;
+ dst = list_next(&sn.dstg->dstg_tasks, dst)) {
objset_t *os = dst->dst_arg1;
- dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor);
- if (saa.saa_needsuspend)
+ dsl_dataset_t *ds = os->os_dsl_dataset;
+ if (dst->dst_err) {
+ dsl_dataset_name(ds, sn.failed);
+ } else if (temporary) {
+ dsl_register_onexit_hold_cleanup(sn.newds, tag, minor);
+ }
+ if (sn.needsuspend)
zil_resume(dmu_objset_zil(os));
- dmu_objset_rele(os, &saa);
+ dmu_objset_rele(os, &sn);
}
- zfs_onexit_fd_rele(cleanup_fd);
- dsl_sync_task_group_destroy(saa.saa_dstg);
+ if (err)
+ (void) strcpy(fsname, sn.failed);
+ if (temporary)
+ zfs_onexit_fd_rele(cleanup_fd);
+ dsl_sync_task_group_destroy(sn.dstg);
spa_close(spa, FTAG);
return (err);
}
-
static void
dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
{
diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c
index 5a2c6e2ce759..e47d533a44f4 100644
--- a/uts/common/fs/zfs/dmu_send.c
+++ b/uts/common/fs/zfs/dmu_send.c
@@ -20,9 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/dmu.h>
@@ -47,38 +44,50 @@
#include <sys/ddt.h>
#include <sys/zfs_onexit.h>
-/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
-int zfs_send_corrupt_data = B_FALSE;
-
static char *dmu_recv_tag = "dmu_recv_tag";
+/*
+ * The list of data whose inclusion in a send stream can be pending from
+ * one call to backup_cb to another. Multiple calls to dump_free() and
+ * dump_freeobjects() can be aggregated into a single DRR_FREE or
+ * DRR_FREEOBJECTS replay record.
+ */
+typedef enum {
+ PENDING_NONE,
+ PENDING_FREE,
+ PENDING_FREEOBJECTS
+} pendop_t;
+
+struct backuparg {
+ dmu_replay_record_t *drr;
+ vnode_t *vp;
+ offset_t *off;
+ objset_t *os;
+ zio_cksum_t zc;
+ uint64_t toguid;
+ int err;
+ pendop_t pending_op;
+};
+
static int
-dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
+dump_bytes(struct backuparg *ba, void *buf, int len)
{
- dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
ssize_t resid; /* have to get resid to get detailed errno */
ASSERT3U(len % 8, ==, 0);
- fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
- dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
+ fletcher_4_incremental_native(buf, len, &ba->zc);
+ ba->err = vn_rdwr(UIO_WRITE, ba->vp,
(caddr_t)buf, len,
0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
-
- mutex_enter(&ds->ds_sendstream_lock);
- *dsp->dsa_off += len;
- mutex_exit(&ds->ds_sendstream_lock);
-
- return (dsp->dsa_err);
+ *ba->off += len;
+ return (ba->err);
}
static int
-dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
+dump_free(struct backuparg *ba, uint64_t object, uint64_t offset,
uint64_t length)
{
- struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
-
- if (length != -1ULL && offset + length < offset)
- length = -1ULL;
+ struct drr_free *drrf = &(ba->drr->drr_u.drr_free);
/*
* If there is a pending op, but it's not PENDING_FREE, push it out,
@@ -87,15 +96,13 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
* other DRR_FREE records. DRR_FREEOBJECTS records can only be
* aggregated with other DRR_FREEOBJECTS records.
*/
- if (dsp->dsa_pending_op != PENDING_NONE &&
- dsp->dsa_pending_op != PENDING_FREE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) {
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
- if (dsp->dsa_pending_op == PENDING_FREE) {
+ if (ba->pending_op == PENDING_FREE) {
/*
* There should never be a PENDING_FREE if length is -1
* (because dump_dnode is the only place where this
@@ -113,35 +120,34 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
return (0);
} else {
/* not a continuation. Push out pending record */
- if (dump_bytes(dsp, dsp->dsa_drr,
+ if (dump_bytes(ba, ba->drr,
sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
}
/* create a FREE record and make it pending */
- bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
- dsp->dsa_drr->drr_type = DRR_FREE;
+ bzero(ba->drr, sizeof (dmu_replay_record_t));
+ ba->drr->drr_type = DRR_FREE;
drrf->drr_object = object;
drrf->drr_offset = offset;
drrf->drr_length = length;
- drrf->drr_toguid = dsp->dsa_toguid;
+ drrf->drr_toguid = ba->toguid;
if (length == -1ULL) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
} else {
- dsp->dsa_pending_op = PENDING_FREE;
+ ba->pending_op = PENDING_FREE;
}
return (0);
}
static int
-dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
+dump_data(struct backuparg *ba, dmu_object_type_t type,
uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
{
- struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
+ struct drr_write *drrw = &(ba->drr->drr_u.drr_write);
/*
@@ -150,20 +156,19 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
* the stream, since aggregation can't be done across operations
* of different types.
*/
- if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (ba->pending_op != PENDING_NONE) {
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
/* write a DATA record */
- bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
- dsp->dsa_drr->drr_type = DRR_WRITE;
+ bzero(ba->drr, sizeof (dmu_replay_record_t));
+ ba->drr->drr_type = DRR_WRITE;
drrw->drr_object = object;
drrw->drr_type = type;
drrw->drr_offset = offset;
drrw->drr_length = blksz;
- drrw->drr_toguid = dsp->dsa_toguid;
+ drrw->drr_toguid = ba->toguid;
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
@@ -172,43 +177,42 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
drrw->drr_key.ddk_cksum = bp->blk_cksum;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- if (dump_bytes(dsp, data, blksz) != 0)
+ if (dump_bytes(ba, data, blksz) != 0)
return (EINTR);
return (0);
}
static int
-dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
+dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data)
{
- struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
+ struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill);
- if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (ba->pending_op != PENDING_NONE) {
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
/* write a SPILL record */
- bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
- dsp->dsa_drr->drr_type = DRR_SPILL;
+ bzero(ba->drr, sizeof (dmu_replay_record_t));
+ ba->drr->drr_type = DRR_SPILL;
drrs->drr_object = object;
drrs->drr_length = blksz;
- drrs->drr_toguid = dsp->dsa_toguid;
+ drrs->drr_toguid = ba->toguid;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
return (EINTR);
- if (dump_bytes(dsp, data, blksz))
+ if (dump_bytes(ba, data, blksz))
return (EINTR);
return (0);
}
static int
-dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
+dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
{
- struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
+ struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects);
/*
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
@@ -217,14 +221,13 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
* aggregated with other DRR_FREE records. DRR_FREEOBJECTS records
* can only be aggregated with other DRR_FREEOBJECTS records.
*/
- if (dsp->dsa_pending_op != PENDING_NONE &&
- dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (ba->pending_op != PENDING_NONE &&
+ ba->pending_op != PENDING_FREEOBJECTS) {
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
- if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
+ if (ba->pending_op == PENDING_FREEOBJECTS) {
/*
* See whether this free object array can be aggregated
* with pending one
@@ -234,43 +237,42 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
return (0);
} else {
/* can't be aggregated. Push out pending record */
- if (dump_bytes(dsp, dsp->dsa_drr,
+ if (dump_bytes(ba, ba->drr,
sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
}
/* write a FREEOBJECTS record */
- bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
- dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
+ bzero(ba->drr, sizeof (dmu_replay_record_t));
+ ba->drr->drr_type = DRR_FREEOBJECTS;
drrfo->drr_firstobj = firstobj;
drrfo->drr_numobjs = numobjs;
- drrfo->drr_toguid = dsp->dsa_toguid;
+ drrfo->drr_toguid = ba->toguid;
- dsp->dsa_pending_op = PENDING_FREEOBJECTS;
+ ba->pending_op = PENDING_FREEOBJECTS;
return (0);
}
static int
-dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
+dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp)
{
- struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+ struct drr_object *drro = &(ba->drr->drr_u.drr_object);
if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
- return (dump_freeobjects(dsp, object, 1));
+ return (dump_freeobjects(ba, object, 1));
- if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (ba->pending_op != PENDING_NONE) {
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- dsp->dsa_pending_op = PENDING_NONE;
+ ba->pending_op = PENDING_NONE;
}
/* write an OBJECT record */
- bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
- dsp->dsa_drr->drr_type = DRR_OBJECT;
+ bzero(ba->drr, sizeof (dmu_replay_record_t));
+ ba->drr->drr_type = DRR_OBJECT;
drro->drr_object = object;
drro->drr_type = dnp->dn_type;
drro->drr_bonustype = dnp->dn_bonustype;
@@ -278,19 +280,19 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
drro->drr_bonuslen = dnp->dn_bonuslen;
drro->drr_checksumtype = dnp->dn_checksum;
drro->drr_compress = dnp->dn_compress;
- drro->drr_toguid = dsp->dsa_toguid;
+ drro->drr_toguid = ba->toguid;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+ if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
return (EINTR);
- if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+ if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
return (EINTR);
/* free anything past the end of the file */
- if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
+ if (dump_free(ba, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
return (EINTR);
- if (dsp->dsa_err)
+ if (ba->err)
return (EINTR);
return (0);
}
@@ -304,7 +306,7 @@ static int
backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
- dmu_sendarg_t *dsp = arg;
+ struct backuparg *ba = arg;
dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
int err = 0;
@@ -317,10 +319,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
uint64_t span = BP_SPAN(dnp, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
- err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
+ err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
} else if (bp == NULL) {
uint64_t span = BP_SPAN(dnp, zb->zb_level);
- err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
+ err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span);
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
return (0);
} else if (type == DMU_OT_DNODE) {
@@ -339,7 +341,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
uint64_t dnobj = (zb->zb_blkid <<
(DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
- err = dump_dnode(dsp, dnobj, blk+i);
+ err = dump_dnode(ba, dnobj, blk+i);
if (err)
break;
}
@@ -354,7 +356,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
return (EIO);
- err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
+ err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
} else { /* it's a level-0 block of a regular object */
uint32_t aflags = ARC_WAIT;
@@ -363,22 +365,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
if (dsl_read(NULL, spa, bp, pbuf,
arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
- if (zfs_send_corrupt_data) {
- /* Send a block filled with 0x"zfs badd bloc" */
- abuf = arc_buf_alloc(spa, blksz, &abuf,
- ARC_BUFC_DATA);
- uint64_t *ptr;
- for (ptr = abuf->b_data;
- (char *)ptr < (char *)abuf->b_data + blksz;
- ptr++)
- *ptr = 0x2f5baddb10c;
- } else {
- return (EIO);
- }
- }
+ ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
+ return (EIO);
- err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
+ err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz,
blksz, bp, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
}
@@ -387,53 +377,14 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
return (err);
}
-/*
- * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
- * For example, they could both be snapshots of the same filesystem, and
- * 'earlier' is before 'later'. Or 'earlier' could be the origin of
- * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
- * filesystem. Or 'earlier' could be the origin's origin.
- */
-static boolean_t
-is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
-{
- dsl_pool_t *dp = later->ds_dir->dd_pool;
- int error;
- boolean_t ret;
- dsl_dataset_t *origin;
-
- if (earlier->ds_phys->ds_creation_txg >=
- later->ds_phys->ds_creation_txg)
- return (B_FALSE);
-
- if (later->ds_dir == earlier->ds_dir)
- return (B_TRUE);
- if (!dsl_dir_is_clone(later->ds_dir))
- return (B_FALSE);
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
- rw_exit(&dp->dp_config_rwlock);
- return (B_TRUE);
- }
- error = dsl_dataset_hold_obj(dp,
- later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
- rw_exit(&dp->dp_config_rwlock);
- if (error != 0)
- return (B_FALSE);
- ret = is_before(origin, earlier);
- dsl_dataset_rele(origin, FTAG);
- return (ret);
-}
-
int
-dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
- offset_t *off)
+dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+ vnode_t *vp, offset_t *off)
{
dsl_dataset_t *ds = tosnap->os_dsl_dataset;
dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
dmu_replay_record_t *drr;
- dmu_sendarg_t *dsp;
+ struct backuparg ba;
int err;
uint64_t fromtxg = 0;
@@ -441,13 +392,30 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
if (ds->ds_phys->ds_next_snap_obj == 0)
return (EINVAL);
- /*
- * fromsnap must be an earlier snapshot from the same fs as tosnap,
- * or the origin's fs.
- */
- if (fromds != NULL && !is_before(ds, fromds))
+ /* fromsnap must be an earlier snapshot from the same fs as tosnap */
+ if (fromds && (ds->ds_dir != fromds->ds_dir ||
+ fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
return (EXDEV);
+ if (fromorigin) {
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+ if (fromsnap)
+ return (EINVAL);
+
+ if (dsl_dir_is_clone(ds->ds_dir)) {
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
+ rw_exit(&dp->dp_config_rwlock);
+ if (err)
+ return (err);
+ } else {
+ fromorigin = B_FALSE;
+ }
+ }
+
+
drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
drr->drr_type = DRR_BEGIN;
drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
@@ -457,10 +425,8 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
#ifdef _KERNEL
if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
uint64_t version;
- if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
- kmem_free(drr, sizeof (dmu_replay_record_t));
+ if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0)
return (EINVAL);
- }
if (version == ZPL_VERSION_SA) {
DMU_SET_FEATUREFLAGS(
drr->drr_u.drr_begin.drr_versioninfo,
@@ -472,7 +438,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
drr->drr_u.drr_begin.drr_creation_time =
ds->ds_phys->ds_creation_time;
drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
- if (fromds != NULL && ds->ds_dir != fromds->ds_dir)
+ if (fromorigin)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
@@ -484,121 +450,47 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
if (fromds)
fromtxg = fromds->ds_phys->ds_creation_txg;
-
- dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
-
- dsp->dsa_drr = drr;
- dsp->dsa_vp = vp;
- dsp->dsa_outfd = outfd;
- dsp->dsa_proc = curproc;
- dsp->dsa_os = tosnap;
- dsp->dsa_off = off;
- dsp->dsa_toguid = ds->ds_phys->ds_guid;
- ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
- dsp->dsa_pending_op = PENDING_NONE;
-
- mutex_enter(&ds->ds_sendstream_lock);
- list_insert_head(&ds->ds_sendstreams, dsp);
- mutex_exit(&ds->ds_sendstream_lock);
-
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
- err = dsp->dsa_err;
- goto out;
+ if (fromorigin)
+ dsl_dataset_rele(fromds, FTAG);
+
+ ba.drr = drr;
+ ba.vp = vp;
+ ba.os = tosnap;
+ ba.off = off;
+ ba.toguid = ds->ds_phys->ds_guid;
+ ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0);
+ ba.pending_op = PENDING_NONE;
+
+ if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) {
+ kmem_free(drr, sizeof (dmu_replay_record_t));
+ return (ba.err);
}
err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
- backup_cb, dsp);
+ backup_cb, &ba);
- if (dsp->dsa_pending_op != PENDING_NONE)
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
+ if (ba.pending_op != PENDING_NONE)
+ if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0)
err = EINTR;
if (err) {
- if (err == EINTR && dsp->dsa_err)
- err = dsp->dsa_err;
- goto out;
+ if (err == EINTR && ba.err)
+ err = ba.err;
+ kmem_free(drr, sizeof (dmu_replay_record_t));
+ return (err);
}
bzero(drr, sizeof (dmu_replay_record_t));
drr->drr_type = DRR_END;
- drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
- drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
+ drr->drr_u.drr_end.drr_checksum = ba.zc;
+ drr->drr_u.drr_end.drr_toguid = ba.toguid;
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
- err = dsp->dsa_err;
- goto out;
+ if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) {
+ kmem_free(drr, sizeof (dmu_replay_record_t));
+ return (ba.err);
}
-out:
- mutex_enter(&ds->ds_sendstream_lock);
- list_remove(&ds->ds_sendstreams, dsp);
- mutex_exit(&ds->ds_sendstream_lock);
-
kmem_free(drr, sizeof (dmu_replay_record_t));
- kmem_free(dsp, sizeof (dmu_sendarg_t));
-
- return (err);
-}
-
-int
-dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
-{
- dsl_dataset_t *ds = tosnap->os_dsl_dataset;
- dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- int err;
- uint64_t size;
-
- /* tosnap must be a snapshot */
- if (ds->ds_phys->ds_next_snap_obj == 0)
- return (EINVAL);
-
- /*
- * fromsnap must be an earlier snapshot from the same fs as tosnap,
- * or the origin's fs.
- */
- if (fromds != NULL && !is_before(ds, fromds))
- return (EXDEV);
-
- /* Get uncompressed size estimate of changed data. */
- if (fromds == NULL) {
- size = ds->ds_phys->ds_uncompressed_bytes;
- } else {
- uint64_t used, comp;
- err = dsl_dataset_space_written(fromds, ds,
- &used, &comp, &size);
- if (err)
- return (err);
- }
-
- /*
- * Assume that space (both on-disk and in-stream) is dominated by
- * data. We will adjust for indirect blocks and the copies property,
- * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
- */
-
- /*
- * Subtract out approximate space used by indirect blocks.
- * Assume most space is used by data blocks (non-indirect, non-dnode).
- * Assume all blocks are recordsize. Assume ditto blocks and
- * internal fragmentation counter out compression.
- *
- * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
- * block, which we observe in practice.
- */
- uint64_t recordsize;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_ds(ds, "recordsize",
- sizeof (recordsize), 1, &recordsize, NULL);
- rw_exit(&dp->dp_config_rwlock);
- if (err)
- return (err);
- size -= size / recordsize * sizeof (blkptr_t);
-
- /* Add in the space for the record associated with each block. */
- size += size / recordsize * sizeof (dmu_replay_record_t);
-
- *sizep = size;
return (0);
}
@@ -665,7 +557,8 @@ recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
}
- spa_history_log_internal_ds(rbsa->ds, "receive new", tx, "");
+ spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC,
+ dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj);
}
/* ARGSUSED */
@@ -766,7 +659,8 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
rbsa->ds = cds;
- spa_history_log_internal_ds(cds, "receive over existing", tx, "");
+ spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC,
+ dp->dp_spa, tx, "dataset = %lld", dsobj);
}
static boolean_t
@@ -939,6 +833,61 @@ guid_compare(const void *arg1, const void *arg2)
return (0);
}
+/*
+ * This function is a callback used by dmu_objset_find() (which
+ * enumerates the object sets) to build an avl tree that maps guids
+ * to datasets. The resulting table is used when processing DRR_WRITE_BYREF
+ * send stream records. These records, which are used in dedup'ed
+ * streams, do not contain data themselves, but refer to a copy
+ * of the data block that has already been written because it was
+ * earlier in the stream. That previous copy is identified by the
+ * guid of the dataset with the referenced data.
+ */
+int
+find_ds_by_guid(const char *name, void *arg)
+{
+ avl_tree_t *guid_map = arg;
+ dsl_dataset_t *ds, *snapds;
+ guid_map_entry_t *gmep;
+ dsl_pool_t *dp;
+ int err;
+ uint64_t lastobj, firstobj;
+
+ if (dsl_dataset_hold(name, FTAG, &ds) != 0)
+ return (0);
+
+ dp = ds->ds_dir->dd_pool;
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ firstobj = ds->ds_dir->dd_phys->dd_origin_obj;
+ lastobj = ds->ds_phys->ds_prev_snap_obj;
+
+ while (lastobj != firstobj) {
+ err = dsl_dataset_hold_obj(dp, lastobj, guid_map, &snapds);
+ if (err) {
+ /*
+ * Skip this snapshot and move on. It's not
+ * clear why this would ever happen, but the
+ * remainder of the snapshot streadm can be
+ * processed.
+ */
+ rw_exit(&dp->dp_config_rwlock);
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+ }
+
+ gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
+ gmep->guid = snapds->ds_phys->ds_guid;
+ gmep->gme_ds = snapds;
+ avl_add(guid_map, gmep);
+ lastobj = snapds->ds_phys->ds_prev_snap_obj;
+ }
+
+ rw_exit(&dp->dp_config_rwlock);
+ dsl_dataset_rele(ds, FTAG);
+
+ return (0);
+}
+
static void
free_guid_map_onexit(void *arg)
{
@@ -1076,8 +1025,8 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
void *data = NULL;
if (drro->drr_type == DMU_OT_NONE ||
- !DMU_OT_IS_VALID(drro->drr_type) ||
- !DMU_OT_IS_VALID(drro->drr_bonustype) ||
+ drro->drr_type >= DMU_OT_NUMTYPES ||
+ drro->drr_bonustype >= DMU_OT_NUMTYPES ||
drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
@@ -1142,9 +1091,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
bcopy(data, db->db_data, drro->drr_bonuslen);
if (ra->byteswap) {
- dmu_object_byteswap_t byteswap =
- DMU_OT_BYTESWAP(drro->drr_bonustype);
- dmu_ot_byteswap[byteswap].ob_func(db->db_data,
+ dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
drro->drr_bonuslen);
}
dmu_buf_rele(db, FTAG);
@@ -1187,7 +1134,7 @@ restore_write(struct restorearg *ra, objset_t *os,
int err;
if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
- !DMU_OT_IS_VALID(drrw->drr_type))
+ drrw->drr_type >= DMU_OT_NUMTYPES)
return (EINVAL);
data = restore_read(ra, drrw->drr_length);
@@ -1206,11 +1153,8 @@ restore_write(struct restorearg *ra, objset_t *os,
dmu_tx_abort(tx);
return (err);
}
- if (ra->byteswap) {
- dmu_object_byteswap_t byteswap =
- DMU_OT_BYTESWAP(drrw->drr_type);
- dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
- }
+ if (ra->byteswap)
+ dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
dmu_write(os, drrw->drr_object,
drrw->drr_offset, drrw->drr_length, data, tx);
dmu_tx_commit(tx);
@@ -1426,6 +1370,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
avl_create(ra.guid_to_ds_map, guid_compare,
sizeof (guid_map_entry_t),
offsetof(guid_map_entry_t, avlnode));
+ (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid,
+ (void *)ra.guid_to_ds_map,
+ DS_FIND_CHILDREN);
ra.err = zfs_onexit_add_cb(minor,
free_guid_map_onexit, ra.guid_to_ds_map,
action_handlep);
@@ -1437,8 +1384,6 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
if (ra.err)
goto out;
}
-
- drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
}
/*
@@ -1574,31 +1519,6 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
- spa_history_log_internal_ds(ds, "finished receiving", tx, "");
-}
-
-static int
-add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
-{
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
- dsl_dataset_t *snapds;
- guid_map_entry_t *gmep;
- int err;
-
- ASSERT(guid_map != NULL);
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
- if (err == 0) {
- gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
- gmep->guid = snapds->ds_phys->ds_guid;
- gmep->gme_ds = snapds;
- avl_add(guid_map, gmep);
- }
-
- rw_exit(&dp->dp_config_rwlock);
- return (err);
}
static int
@@ -1606,7 +1526,7 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
{
struct recvendsyncarg resa;
dsl_dataset_t *ds = drc->drc_logical_ds;
- int err, myerr;
+ int err;
/*
* XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
@@ -1641,11 +1561,8 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
out:
mutex_exit(&ds->ds_recvlock);
- if (err == 0 && drc->drc_guid_to_ds_map != NULL)
- (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
dsl_dataset_disown(ds, dmu_recv_tag);
- myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
- ASSERT3U(myerr, ==, 0);
+ (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
return (err);
}
@@ -1673,8 +1590,6 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
/* clean up the fs we just recv'd into */
(void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
} else {
- if (drc->drc_guid_to_ds_map != NULL)
- (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
/* release the hold from dmu_recv_begin */
dsl_dataset_disown(ds, dmu_recv_tag);
}
diff --git a/uts/common/fs/zfs/dmu_traverse.c b/uts/common/fs/zfs/dmu_traverse.c
index bfe9e6506426..023f90e12e34 100644
--- a/uts/common/fs/zfs/dmu_traverse.c
+++ b/uts/common/fs/zfs/dmu_traverse.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -54,7 +53,6 @@ typedef struct traverse_data {
uint64_t td_objset;
blkptr_t *td_rootbp;
uint64_t td_min_txg;
- zbookmark_t *td_resume;
int td_flags;
prefetch_data_t *td_pfd;
blkptr_cb_t *td_func;
@@ -130,54 +128,6 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
zil_free(zilog);
}
-typedef enum resume_skip {
- RESUME_SKIP_ALL,
- RESUME_SKIP_NONE,
- RESUME_SKIP_CHILDREN
-} resume_skip_t;
-
-/*
- * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
- * the block indicated by zb does not need to be visited at all. Returns
- * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
- * resume point. This indicates that this block should be visited but not its
- * children (since they must have been visited in a previous traversal).
- * Otherwise returns RESUME_SKIP_NONE.
- */
-static resume_skip_t
-resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
- const zbookmark_t *zb)
-{
- if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
- /*
- * If we already visited this bp & everything below,
- * don't bother doing it again.
- */
- if (zbookmark_is_before(dnp, zb, td->td_resume))
- return (RESUME_SKIP_ALL);
-
- /*
- * If we found the block we're trying to resume from, zero
- * the bookmark out to indicate that we have resumed.
- */
- ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
- if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
- bzero(td->td_resume, sizeof (*zb));
- if (td->td_flags & TRAVERSE_POST)
- return (RESUME_SKIP_CHILDREN);
- }
- }
- return (RESUME_SKIP_NONE);
-}
-
-static void
-traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
-{
- ASSERT(td->td_resume != NULL);
- ASSERT3U(zb->zb_level, ==, 0);
- bcopy(zb, td->td_resume, sizeof (*td->td_resume));
-}
-
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
@@ -187,20 +137,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_buf_t *buf = NULL;
prefetch_data_t *pd = td->td_pfd;
boolean_t hard = td->td_flags & TRAVERSE_HARD;
- boolean_t pause = B_FALSE;
-
- switch (resume_skip_check(td, dnp, zb)) {
- case RESUME_SKIP_ALL:
- return (0);
- case RESUME_SKIP_CHILDREN:
- goto post;
- case RESUME_SKIP_NONE:
- break;
- default:
- ASSERT(0);
- }
- if (BP_IS_HOLE(bp)) {
+ if (bp->blk_birth == 0) {
err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
td->td_arg);
return (err);
@@ -226,10 +164,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
td->td_arg);
if (err == TRAVERSE_VISIT_NO_CHILDREN)
return (0);
- if (err == ERESTART)
- pause = B_TRUE; /* handle pausing at a common point */
- if (err != 0)
- goto post;
+ if (err)
+ return (err);
}
if (BP_GET_LEVEL(bp) > 0) {
@@ -317,18 +253,9 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
if (buf)
(void) arc_buf_remove_ref(buf, &buf);
-post:
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
td->td_arg);
- if (err == ERESTART)
- pause = B_TRUE;
- }
-
- if (pause && td->td_resume != NULL) {
- ASSERT3U(err, ==, ERESTART);
- ASSERT(!hard);
- traverse_pause(td, zb);
}
return (err != 0 ? err : lasterr);
@@ -426,23 +353,18 @@ traverse_prefetch_thread(void *arg)
* in syncing context).
*/
static int
-traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
- uint64_t txg_start, zbookmark_t *resume, int flags,
- blkptr_cb_t func, void *arg)
+traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
+ uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
{
traverse_data_t td;
prefetch_data_t pd = { 0 };
zbookmark_t czb;
int err;
- ASSERT(ds == NULL || objset == ds->ds_object);
- ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
-
td.td_spa = spa;
- td.td_objset = objset;
+ td.td_objset = ds ? ds->ds_object : 0;
td.td_rootbp = rootbp;
td.td_min_txg = txg_start;
- td.td_resume = resume;
td.td_func = func;
td.td_arg = arg;
td.td_pfd = &pd;
@@ -494,17 +416,8 @@ int
traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
blkptr_cb_t func, void *arg)
{
- return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
- &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
-}
-
-int
-traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
- uint64_t txg_start, zbookmark_t *resume, int flags,
- blkptr_cb_t func, void *arg)
-{
- return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
- blkptr, txg_start, resume, flags, func, arg));
+ return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
+ &ds->ds_phys->ds_bp, txg_start, flags, func, arg));
}
/*
@@ -521,8 +434,8 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
boolean_t hard = (flags & TRAVERSE_HARD);
/* visit the MOS */
- err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
- txg_start, NULL, flags, func, arg);
+ err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
+ txg_start, flags, func, arg);
if (err)
return (err);
diff --git a/uts/common/fs/zfs/dmu_tx.c b/uts/common/fs/zfs/dmu_tx.c
index 723d62b48542..bd5c71a2265e 100644
--- a/uts/common/fs/zfs/dmu_tx.c
+++ b/uts/common/fs/zfs/dmu_tx.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -48,7 +46,7 @@ dmu_tx_create_dd(dsl_dir_t *dd)
{
dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
tx->tx_dir = dd;
- if (dd != NULL)
+ if (dd)
tx->tx_pool = dd->dd_pool;
list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
offsetof(dmu_tx_hold_t, txh_node));
@@ -675,11 +673,9 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
return;
}
- ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
+ ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
if (dn->dn_maxblkid == 0 && !add) {
- blkptr_t *bp;
-
/*
* If there is only one block (i.e. this is a micro-zap)
* and we are not adding anything, the accounting is simple.
@@ -694,13 +690,14 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
* Use max block size here, since we don't know how much
* the size will change between now and the dbuf dirty call.
*/
- bp = &dn->dn_phys->dn_blkptr[0];
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- bp, bp->blk_birth))
+ &dn->dn_phys->dn_blkptr[0],
+ dn->dn_phys->dn_blkptr[0].blk_birth)) {
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
- else
+ } else {
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- if (!BP_IS_HOLE(bp))
+ }
+ if (dn->dn_phys->dn_blkptr[0].blk_birth)
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
return;
}
@@ -1276,6 +1273,7 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
{
dnode_t *dn;
dmu_tx_hold_t *txh;
+ blkptr_t *bp;
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
THT_SPILL, 0, 0);
@@ -1286,18 +1284,17 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
return;
/* If blkptr doesn't exist then add space to towrite */
- if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
+ bp = &dn->dn_phys->dn_spill;
+ if (BP_IS_HOLE(bp)) {
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
+ txh->txh_space_tounref = 0;
} else {
- blkptr_t *bp;
-
- bp = &dn->dn_phys->dn_spill;
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
bp, bp->blk_birth))
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
else
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- if (!BP_IS_HOLE(bp))
+ if (bp->blk_birth)
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
}
}
diff --git a/uts/common/fs/zfs/dnode.c b/uts/common/fs/zfs/dnode.c
index 05ccf9fc62d3..850dd5816bf3 100644
--- a/uts/common/fs/zfs/dnode.c
+++ b/uts/common/fs/zfs/dnode.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -194,7 +193,7 @@ dnode_verify(dnode_t *dn)
ASSERT(dn->dn_objset);
ASSERT(dn->dn_handle->dnh_dnode == dn);
- ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
+ ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
return;
@@ -213,7 +212,7 @@ dnode_verify(dnode_t *dn)
ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
}
ASSERT3U(dn->dn_nlevels, <=, 30);
- ASSERT(DMU_OT_IS_VALID(dn->dn_type));
+ ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
ASSERT3U(dn->dn_nblkptr, >=, 1);
ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
@@ -279,10 +278,8 @@ dnode_byteswap(dnode_phys_t *dnp)
*/
int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
size_t len = DN_MAX_BONUSLEN - off;
- ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
- dmu_object_byteswap_t byteswap =
- DMU_OT_BYTESWAP(dnp->dn_bonustype);
- dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
+ ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
+ dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
}
/* Swap SPILL block if we have one */
@@ -410,7 +407,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dmu_zfetch_init(&dn->dn_zfetch, dn);
- ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
+ ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
mutex_enter(&os->os_lock);
list_insert_head(&os->os_dnodes, dn);
@@ -499,11 +496,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
ASSERT(ot != DMU_OT_NONE);
- ASSERT(DMU_OT_IS_VALID(ot));
+ ASSERT3U(ot, <, DMU_OT_NUMTYPES);
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
- ASSERT(DMU_OT_IS_VALID(bonustype));
+ ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
ASSERT(dn->dn_type == DMU_OT_NONE);
ASSERT3U(dn->dn_maxblkid, ==, 0);
@@ -571,7 +568,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0));
- ASSERT(DMU_OT_IS_VALID(bonustype));
+ ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
/* clean up any unreferenced dbufs */
diff --git a/uts/common/fs/zfs/dnode_sync.c b/uts/common/fs/zfs/dnode_sync.c
index 8d817919b34a..2ee990a3b32c 100644
--- a/uts/common/fs/zfs/dnode_sync.c
+++ b/uts/common/fs/zfs/dnode_sync.c
@@ -18,10 +18,8 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -596,7 +594,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}
if (dn->dn_next_bonustype[txgoff]) {
- ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
+ ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
dn->dn_next_bonustype[txgoff] = 0;
}
diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c
index 555797e77efe..59ac4a60947a 100644
--- a/uts/common/fs/zfs/dsl_dataset.c
+++ b/uts/common/fs/zfs/dsl_dataset.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/dmu_objset.h>
@@ -30,12 +28,10 @@
#include <sys/dsl_prop.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_traverse.h>
-#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
#include <sys/arc.h>
#include <sys/zio.h>
#include <sys/zap.h>
-#include <sys/zfeature.h>
#include <sys/unique.h>
#include <sys/zfs_context.h>
#include <sys/zfs_ioctl.h>
@@ -101,7 +97,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
if (BP_IS_HOLE(bp))
return;
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
- ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
+ ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
if (ds == NULL) {
/*
* Account for the meta-objset space in its placeholder
@@ -118,7 +114,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
- ds->ds_phys->ds_referenced_bytes += used;
+ ds->ds_phys->ds_used_bytes += used;
ds->ds_phys->ds_compressed_bytes += compressed;
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
ds->ds_phys->ds_unique_bytes += used;
@@ -212,8 +208,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
}
}
mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
- ds->ds_phys->ds_referenced_bytes -= used;
+ ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
+ ds->ds_phys->ds_used_bytes -= used;
ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
ds->ds_phys->ds_compressed_bytes -= compressed;
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
@@ -397,8 +393,6 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
-
rw_init(&ds->ds_rwlock, 0, 0, 0);
cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
@@ -406,9 +400,6 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
dsl_deadlist_open(&ds->ds_deadlist,
mos, ds->ds_phys->ds_deadlist_obj);
- list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
- offsetof(dmu_sendarg_t, dsa_link));
-
if (err == 0) {
err = dsl_dir_open_obj(dp,
ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
@@ -819,8 +810,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsphys->ds_prev_snap_obj = origin->ds_object;
dsphys->ds_prev_snap_txg =
origin->ds_phys->ds_creation_txg;
- dsphys->ds_referenced_bytes =
- origin->ds_phys->ds_referenced_bytes;
+ dsphys->ds_used_bytes =
+ origin->ds_phys->ds_used_bytes;
dsphys->ds_compressed_bytes =
origin->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes =
@@ -910,76 +901,87 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
return (dsobj);
}
+struct destroyarg {
+ dsl_sync_task_group_t *dstg;
+ char *snapname;
+ char *failed;
+ boolean_t defer;
+};
+
+static int
+dsl_snapshot_destroy_one(const char *name, void *arg)
+{
+ struct destroyarg *da = arg;
+ dsl_dataset_t *ds;
+ int err;
+ char *dsname;
+
+ dsname = kmem_asprintf("%s@%s", name, da->snapname);
+ err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds);
+ strfree(dsname);
+ if (err == 0) {
+ struct dsl_ds_destroyarg *dsda;
+
+ dsl_dataset_make_exclusive(ds, da->dstg);
+ dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
+ dsda->ds = ds;
+ dsda->defer = da->defer;
+ dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
+ dsl_dataset_destroy_sync, dsda, da->dstg, 0);
+ } else if (err == ENOENT) {
+ err = 0;
+ } else {
+ (void) strcpy(da->failed, name);
+ }
+ return (err);
+}
+
/*
- * The snapshots must all be in the same pool.
+ * Destroy 'snapname' in all descendants of 'fsname'.
*/
+#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
int
-dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer,
- nvlist_t *errlist)
+dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
{
int err;
+ struct destroyarg da;
dsl_sync_task_t *dst;
spa_t *spa;
- nvpair_t *pair;
- dsl_sync_task_group_t *dstg;
-
- pair = nvlist_next_nvpair(snaps, NULL);
- if (pair == NULL)
- return (0);
- err = spa_open(nvpair_name(pair), &spa, FTAG);
+ err = spa_open(fsname, &spa, FTAG);
if (err)
return (err);
- dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- dsl_dataset_t *ds;
+ da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+ da.snapname = snapname;
+ da.failed = fsname;
+ da.defer = defer;
- err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
- if (err == 0) {
- struct dsl_ds_destroyarg *dsda;
-
- dsl_dataset_make_exclusive(ds, dstg);
- dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
- KM_SLEEP);
- dsda->ds = ds;
- dsda->defer = defer;
- dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, dsda, dstg, 0);
- } else if (err == ENOENT) {
- err = 0;
- } else {
- fnvlist_add_int32(errlist, nvpair_name(pair), err);
- break;
- }
- }
+ err = dmu_objset_find(fsname,
+ dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
if (err == 0)
- err = dsl_sync_task_group_wait(dstg);
+ err = dsl_sync_task_group_wait(da.dstg);
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
+ for (dst = list_head(&da.dstg->dstg_tasks); dst;
+ dst = list_next(&da.dstg->dstg_tasks, dst)) {
struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
dsl_dataset_t *ds = dsda->ds;
/*
- * Return the snapshots that triggered the error.
+ * Return the file system name that triggered the error
*/
- if (dst->dst_err != 0) {
- char name[ZFS_MAXNAMELEN];
- dsl_dataset_name(ds, name);
- fnvlist_add_int32(errlist, name, dst->dst_err);
+ if (dst->dst_err) {
+ dsl_dataset_name(ds, fsname);
+ *strchr(fsname, '@') = '\0';
}
ASSERT3P(dsda->rm_origin, ==, NULL);
- dsl_dataset_disown(ds, dstg);
+ dsl_dataset_disown(ds, da.dstg);
kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
}
- dsl_sync_task_group_destroy(dstg);
+ dsl_sync_task_group_destroy(da.dstg);
spa_close(spa, FTAG);
return (err);
-
}
static boolean_t
@@ -1048,6 +1050,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
dsl_dir_t *dd;
uint64_t obj;
struct dsl_ds_destroyarg dsda = { 0 };
+ dsl_dataset_t dummy_ds = { 0 };
dsda.ds = ds;
@@ -1067,6 +1070,8 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
}
dd = ds->ds_dir;
+ dummy_ds.ds_dir = dd;
+ dummy_ds.ds_object = ds->ds_object;
/*
* Check for errors and mark this ds as inconsistent, in
@@ -1082,23 +1087,19 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
goto out;
/*
- * If async destruction is not enabled try to remove all objects
- * while in the open context so that there is less work to do in
- * the syncing context.
+ * remove the objects in open context, so that we won't
+ * have too much to do in syncing context.
*/
- if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
- for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
- ds->ds_phys->ds_prev_snap_txg)) {
- /*
- * Ignore errors, if there is not enough disk space
- * we will deal with it in dsl_dataset_destroy_sync().
- */
- (void) dmu_free_object(os, obj);
- }
- if (err != ESRCH)
- goto out;
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
+ ds->ds_phys->ds_prev_snap_txg)) {
+ /*
+ * Ignore errors, if there is not enough disk space
+ * we will deal with it in dsl_dataset_destroy_sync().
+ */
+ (void) dmu_free_object(os, obj);
}
+ if (err != ESRCH)
+ goto out;
/*
* Only the ZIL knows how to free log blocks.
@@ -1153,7 +1154,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
dsl_dataset_destroy_sync, &dsda, tag, 0);
dsl_sync_task_create(dstg, dsl_dir_destroy_check,
- dsl_dir_destroy_sync, dd, FTAG, 0);
+ dsl_dir_destroy_sync, &dummy_ds, FTAG, 0);
err = dsl_sync_task_group_wait(dstg);
dsl_sync_task_group_destroy(dstg);
@@ -1244,7 +1245,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT(!dsl_dataset_is_snapshot(ds));
if (ds->ds_phys->ds_prev_snap_obj != 0)
- mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
+ mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
else
mrs_used = 0;
@@ -1252,7 +1253,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT3U(dlused, <=, mrs_used);
ds->ds_phys->ds_unique_bytes =
- ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
+ ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
SPA_VERSION_UNIQUE_ACCURATE)
@@ -1328,12 +1329,14 @@ static void
dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
/* Mark it as inconsistent on-disk, in case we crash */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
- spa_history_log_internal_ds(ds, "destroy begin", tx, "");
+ spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
+ "dataset = %llu", ds->ds_object);
}
static int
@@ -1608,30 +1611,6 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
ds_next->ds_phys->ds_deadlist_obj);
}
-static int
-old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
- int err;
- struct killarg ka;
-
- /*
- * Free everything that we point to (that's born after
- * the previous snapshot, if we are a clone)
- *
- * NB: this should be very quick, because we already
- * freed all the objects in open context.
- */
- ka.ds = ds;
- ka.tx = tx;
- err = traverse_dataset(ds,
- ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
- kill_blkptr, &ka);
- ASSERT3U(err, ==, 0);
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
-
- return (err);
-}
-
void
dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
@@ -1658,13 +1637,9 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
- spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
return;
}
- /* We need to log before removing it from the namespace. */
- spa_history_log_internal_ds(ds, "destroy", tx, "");
-
/* signal any waiters that this dataset is going away */
mutex_enter(&ds->ds_lock);
ds->ds_owner = dsl_reaper;
@@ -1782,6 +1757,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
+ dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
@@ -1857,54 +1833,32 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
}
dsl_dataset_rele(ds_next, FTAG);
} else {
- zfeature_info_t *async_destroy =
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
-
/*
* There's no next snapshot, so this is a head dataset.
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
+ struct killarg ka;
+
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
ds->ds_phys->ds_deadlist_obj = 0;
- if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
- err = old_synchronous_dataset_destroy(ds, tx);
- } else {
- /*
- * Move the bptree into the pool's list of trees to
- * clean up and update space accounting information.
- */
- uint64_t used, comp, uncomp;
-
- ASSERT(err == 0 || err == EBUSY);
- if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
- spa_feature_incr(dp->dp_spa, async_destroy, tx);
- dp->dp_bptree_obj = bptree_alloc(
- dp->dp_meta_objset, tx);
- VERIFY(zap_add(dp->dp_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
- &dp->dp_bptree_obj, tx) == 0);
- }
-
- used = ds->ds_dir->dd_phys->dd_used_bytes;
- comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
- uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
-
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
- ds->ds_phys->ds_unique_bytes == used);
-
- bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
- &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
- used, comp, uncomp, tx);
- dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
- -used, -comp, -uncomp, tx);
- dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
- used, comp, uncomp, tx);
- }
+ /*
+ * Free everything that we point to (that's born after
+ * the previous snapshot, if we are a clone)
+ *
+ * NB: this should be very quick, because we already
+ * freed all the objects in open context.
+ */
+ ka.ds = ds;
+ ka.tx = tx;
+ err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
+ TRAVERSE_POST, kill_blkptr, &ka);
+ ASSERT3U(err, ==, 0);
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
+ ds->ds_phys->ds_unique_bytes == 0);
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
@@ -1959,6 +1913,8 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
dsl_dataset_rele(ds_prev, FTAG);
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
+ spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx,
+ "dataset = %llu", ds->ds_object);
if (ds->ds_phys->ds_next_clones_obj != 0) {
uint64_t count;
@@ -2006,7 +1962,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
return (ENOSPC);
/*
- * Propagate any reserved space for this snapshot to other
+ * Propogate any reserved space for this snapshot to other
* snapshot checks in this sync group.
*/
if (asize > 0)
@@ -2016,9 +1972,10 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
}
int
-dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
- dmu_tx_t *tx)
+dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
+ dsl_dataset_t *ds = arg1;
+ const char *snapname = arg2;
int err;
uint64_t value;
@@ -2030,7 +1987,7 @@ dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
return (EAGAIN);
/*
- * Check for conflicting snapshot name.
+ * Check for conflicting name snapshot name.
*/
err = dsl_dataset_snap_lookup(ds, snapname, &value);
if (err == 0)
@@ -2054,9 +2011,10 @@ dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
}
void
-dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
- dmu_tx_t *tx)
+dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
+ dsl_dataset_t *ds = arg1;
+ const char *snapname = arg2;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
dmu_buf_t *dbuf;
dsl_dataset_phys_t *dsphys;
@@ -2091,7 +2049,7 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
dsphys->ds_creation_time = gethrestime_sec();
dsphys->ds_creation_txg = crtxg;
dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
- dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
+ dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
dsphys->ds_flags = ds->ds_phys->ds_flags;
@@ -2162,7 +2120,8 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
dsl_dir_snap_cmtime_update(ds->ds_dir);
- spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
+ spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx,
+ "dataset = %llu", dsobj);
}
void
@@ -2183,86 +2142,12 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dmu_objset_sync(ds->ds_objset, zio, tx);
}
-static void
-get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
-{
- uint64_t count = 0;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- zap_cursor_t zc;
- zap_attribute_t za;
- nvlist_t *propval;
- nvlist_t *val;
-
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- /*
- * There may me missing entries in ds_next_clones_obj
- * due to a bug in a previous version of the code.
- * Only trust it if it has the right number of entries.
- */
- if (ds->ds_phys->ds_next_clones_obj != 0) {
- ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
- &count));
- }
- if (count != ds->ds_phys->ds_num_children - 1) {
- goto fail;
- }
- for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
- dsl_dataset_t *clone;
- char buf[ZFS_MAXNAMELEN];
- /*
- * Even though we hold the dp_config_rwlock, the dataset
- * may fail to open, returning ENOENT. If there is a
- * thread concurrently attempting to destroy this
- * dataset, it will have the ds_rwlock held for
- * RW_WRITER. Our call to dsl_dataset_hold_obj() ->
- * dsl_dataset_hold_ref() will fail its
- * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
- * dp_config_rwlock, and wait for the destroy progress
- * and signal ds_exclusive_cv. If the destroy was
- * successful, we will see that
- * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
- */
- if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- za.za_first_integer, FTAG, &clone) != 0)
- continue;
- dsl_dir_name(clone->ds_dir, buf);
- VERIFY(nvlist_add_boolean(val, buf) == 0);
- dsl_dataset_rele(clone, FTAG);
- }
- zap_cursor_fini(&zc);
- VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
- VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
- propval) == 0);
-fail:
- nvlist_free(val);
- nvlist_free(propval);
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
-}
-
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
- uint64_t refd, avail, uobjs, aobjs, ratio;
-
- ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
- (ds->ds_phys->ds_uncompressed_bytes * 100 /
- ds->ds_phys->ds_compressed_bytes);
-
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
+ uint64_t refd, avail, uobjs, aobjs;
- if (dsl_dataset_is_snapshot(ds)) {
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
- ds->ds_phys->ds_unique_bytes);
- get_clones_stat(ds, nv);
- } else {
- dsl_dir_stats(ds->ds_dir, nv);
- }
+ dsl_dir_stats(ds->ds_dir, nv);
dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
@@ -2287,26 +2172,18 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
- if (ds->ds_phys->ds_prev_snap_obj != 0) {
- uint64_t written, comp, uncomp;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- dsl_dataset_t *prev;
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- int err = dsl_dataset_hold_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
- rw_exit(&dp->dp_config_rwlock);
- if (err == 0) {
- err = dsl_dataset_space_written(prev, ds, &written,
- &comp, &uncomp);
- dsl_dataset_rele(prev, FTAG);
- if (err == 0) {
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
- written);
- }
- }
+ if (ds->ds_phys->ds_next_snap_obj) {
+ /*
+ * This is a snapshot; override the dd's space used with
+ * our unique space and compression ratio.
+ */
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
+ ds->ds_phys->ds_unique_bytes);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
+ ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
+ (ds->ds_phys->ds_uncompressed_bytes * 100 /
+ ds->ds_phys->ds_compressed_bytes));
}
-
}
void
@@ -2315,25 +2192,27 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
stat->dds_guid = ds->ds_phys->ds_guid;
- stat->dds_origin[0] = '\0';
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_phys->ds_next_snap_obj) {
stat->dds_is_snapshot = B_TRUE;
stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
} else {
stat->dds_is_snapshot = B_FALSE;
stat->dds_num_clones = 0;
+ }
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- if (dsl_dir_is_clone(ds->ds_dir)) {
- dsl_dataset_t *ods;
+ /* clone origin is really a dsl_dir thing... */
+ rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
+ if (dsl_dir_is_clone(ds->ds_dir)) {
+ dsl_dataset_t *ods;
- VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
- ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
- dsl_dataset_name(ods, stat->dds_origin);
- dsl_dataset_drop_ref(ods, FTAG);
- }
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+ VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
+ ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
+ dsl_dataset_name(ods, stat->dds_origin);
+ dsl_dataset_drop_ref(ods, FTAG);
+ } else {
+ stat->dds_origin[0] = '\0';
}
+ rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
}
uint64_t
@@ -2347,7 +2226,7 @@ dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
{
- *refdbytesp = ds->ds_phys->ds_referenced_bytes;
+ *refdbytesp = ds->ds_phys->ds_used_bytes;
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
@@ -2450,8 +2329,8 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
ds->ds_snapname, 8, 1, &ds->ds_object, tx);
ASSERT3U(err, ==, 0);
- spa_history_log_internal_ds(ds, "rename", tx,
- "-> @%s", newsnapname);
+ spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
+ "dataset = %llu", ds->ds_object);
dsl_dataset_rele(hds, FTAG);
}
@@ -2684,7 +2563,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
- pa->used = origin_ds->ds_phys->ds_referenced_bytes;
+ pa->used = origin_ds->ds_phys->ds_used_bytes;
pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
for (snap = list_head(&pa->shared_snaps); snap;
@@ -2718,7 +2597,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* so we need to subtract out the clone origin's used space.
*/
if (pa->origin_origin) {
- pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
+ pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
@@ -2931,7 +2810,8 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
origin_ds->ds_phys->ds_unique_bytes = pa->unique;
/* log history record */
- spa_history_log_internal_ds(hds, "promote", tx, "");
+ spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
+ "dataset = %llu", hds->ds_object);
dsl_dir_close(odd, FTAG);
}
@@ -3233,8 +3113,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deadlist_space(&csa->ohds->ds_deadlist,
&odl_used, &odl_comp, &odl_uncomp);
- dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
- (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
+ dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
+ (csa->ohds->ds_phys->ds_used_bytes + odl_used);
dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
(csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
@@ -3263,8 +3143,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
/* swap ds_*_bytes */
- SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
- csa->cds->ds_phys->ds_referenced_bytes);
+ SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
+ csa->cds->ds_phys->ds_used_bytes);
SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
csa->cds->ds_phys->ds_compressed_bytes);
SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
@@ -3289,9 +3169,6 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
csa->ohds->ds_phys->ds_deadlist_obj);
dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
-
- spa_history_log_internal_ds(csa->cds, "clone swap", tx,
- "parent=%s", csa->ohds->ds_dir->dd_myname);
}
/*
@@ -3396,9 +3273,8 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
- if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
- if (inflight > 0 ||
- ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
+ if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
+ if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
error = ERESTART;
else
error = EDQUOT;
@@ -3425,7 +3301,7 @@ dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (psa->psa_effective_value == 0)
return (0);
- if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
+ if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
psa->psa_effective_value < ds->ds_reserved)
return (ENOSPC);
@@ -3448,8 +3324,9 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_quota = effective_value;
- spa_history_log_internal_ds(ds, "set refquota", tx,
- "refquota=%lld", (longlong_t)ds->ds_quota);
+ spa_history_log_internal(LOG_DS_REFQUOTA,
+ ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ",
+ (longlong_t)ds->ds_quota, ds->ds_object);
}
}
@@ -3554,8 +3431,9 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
mutex_exit(&ds->ds_dir->dd_lock);
- spa_history_log_internal_ds(ds, "set refreservation", tx,
- "refreservation=%lld", (longlong_t)effective_value);
+ spa_history_log_internal(LOG_DS_REFRESERV,
+ ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu",
+ (longlong_t)effective_value, ds->ds_object);
}
int
@@ -3621,7 +3499,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
struct dsl_ds_holdarg *ha = arg2;
- const char *htag = ha->htag;
+ char *htag = ha->htag;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int error = 0;
@@ -3655,7 +3533,7 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
struct dsl_ds_holdarg *ha = arg2;
- const char *htag = ha->htag;
+ char *htag = ha->htag;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t now = gethrestime_sec();
@@ -3683,9 +3561,9 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
htag, &now, tx));
}
- spa_history_log_internal_ds(ds, "hold", tx,
- "tag = %s temp = %d holds now = %llu",
- htag, (int)ha->temphold, ds->ds_userrefs);
+ spa_history_log_internal(LOG_DS_USER_HOLD,
+ dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag,
+ (int)ha->temphold, ds->ds_object);
}
static int
@@ -3892,6 +3770,7 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
+ uint64_t dsobj = ds->ds_object;
uint64_t refs;
int error;
@@ -3914,8 +3793,9 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
dsl_dataset_destroy_sync(&dsda, tag, tx);
}
- spa_history_log_internal_ds(ds, "release", tx,
- "tag = %s refs now = %lld", ra->htag, (longlong_t)refs);
+ spa_history_log_internal(LOG_DS_USER_RELEASE,
+ dp->dp_spa, tx, "<%s> %lld dataset = %llu",
+ ra->htag, (longlong_t)refs, dsobj);
}
static int
@@ -4129,7 +4009,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
}
/*
- * Note, this function is used as the callback for dmu_objset_find(). We
+ * Note, this fuction is used as the callback for dmu_objset_find(). We
* always return 0 so that we will continue to find and process
* inconsistent datasets, even if we encounter an error trying to
* process one of them.
@@ -4148,156 +4028,3 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
}
return (0);
}
-
-/*
- * Return (in *usedp) the amount of space written in new that is not
- * present in oldsnap. New may be a snapshot or the head. Old must be
- * a snapshot before new, in new's filesystem (or its origin). If not then
- * fail and return EINVAL.
- *
- * The written space is calculated by considering two components: First, we
- * ignore any freed space, and calculate the written as new's used space
- * minus old's used space. Next, we add in the amount of space that was freed
- * between the two snapshots, thus reducing new's used space relative to old's.
- * Specifically, this is the space that was born before old->ds_creation_txg,
- * and freed before new (ie. on new's deadlist or a previous deadlist).
- *
- * space freed [---------------------]
- * snapshots ---O-------O--------O-------O------
- * oldsnap new
- */
-int
-dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
- uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
-{
- int err = 0;
- uint64_t snapobj;
- dsl_pool_t *dp = new->ds_dir->dd_pool;
-
- *usedp = 0;
- *usedp += new->ds_phys->ds_referenced_bytes;
- *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
-
- *compp = 0;
- *compp += new->ds_phys->ds_compressed_bytes;
- *compp -= oldsnap->ds_phys->ds_compressed_bytes;
-
- *uncompp = 0;
- *uncompp += new->ds_phys->ds_uncompressed_bytes;
- *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- snapobj = new->ds_object;
- while (snapobj != oldsnap->ds_object) {
- dsl_dataset_t *snap;
- uint64_t used, comp, uncomp;
-
- if (snapobj == new->ds_object) {
- snap = new;
- } else {
- err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
- if (err != 0)
- break;
- }
-
- if (snap->ds_phys->ds_prev_snap_txg ==
- oldsnap->ds_phys->ds_creation_txg) {
- /*
- * The blocks in the deadlist can not be born after
- * ds_prev_snap_txg, so get the whole deadlist space,
- * which is more efficient (especially for old-format
- * deadlists). Unfortunately the deadlist code
- * doesn't have enough information to make this
- * optimization itself.
- */
- dsl_deadlist_space(&snap->ds_deadlist,
- &used, &comp, &uncomp);
- } else {
- dsl_deadlist_space_range(&snap->ds_deadlist,
- 0, oldsnap->ds_phys->ds_creation_txg,
- &used, &comp, &uncomp);
- }
- *usedp += used;
- *compp += comp;
- *uncompp += uncomp;
-
- /*
- * If we get to the beginning of the chain of snapshots
- * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
- * was not a snapshot of/before new.
- */
- snapobj = snap->ds_phys->ds_prev_snap_obj;
- if (snap != new)
- dsl_dataset_rele(snap, FTAG);
- if (snapobj == 0) {
- err = EINVAL;
- break;
- }
-
- }
- rw_exit(&dp->dp_config_rwlock);
- return (err);
-}
-
-/*
- * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
- * lastsnap, and all snapshots in between are deleted.
- *
- * blocks that would be freed [---------------------------]
- * snapshots ---O-------O--------O-------O--------O
- * firstsnap lastsnap
- *
- * This is the set of blocks that were born after the snap before firstsnap,
- * (birth > firstsnap->prev_snap_txg) and died before the snap after the
- * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
- * We calculate this by iterating over the relevant deadlists (from the snap
- * after lastsnap, backward to the snap after firstsnap), summing up the
- * space on the deadlist that was born after the snap before firstsnap.
- */
-int
-dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
- dsl_dataset_t *lastsnap,
- uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
-{
- int err = 0;
- uint64_t snapobj;
- dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
-
- ASSERT(dsl_dataset_is_snapshot(firstsnap));
- ASSERT(dsl_dataset_is_snapshot(lastsnap));
-
- /*
- * Check that the snapshots are in the same dsl_dir, and firstsnap
- * is before lastsnap.
- */
- if (firstsnap->ds_dir != lastsnap->ds_dir ||
- firstsnap->ds_phys->ds_creation_txg >
- lastsnap->ds_phys->ds_creation_txg)
- return (EINVAL);
-
- *usedp = *compp = *uncompp = 0;
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- snapobj = lastsnap->ds_phys->ds_next_snap_obj;
- while (snapobj != firstsnap->ds_object) {
- dsl_dataset_t *ds;
- uint64_t used, comp, uncomp;
-
- err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
- if (err != 0)
- break;
-
- dsl_deadlist_space_range(&ds->ds_deadlist,
- firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
- &used, &comp, &uncomp);
- *usedp += used;
- *compp += comp;
- *uncompp += uncomp;
-
- snapobj = ds->ds_phys->ds_prev_snap_obj;
- ASSERT3U(snapobj, !=, 0);
- dsl_dataset_rele(ds, FTAG);
- }
- rw_exit(&dp->dp_config_rwlock);
- return (err);
-}
diff --git a/uts/common/fs/zfs/dsl_deadlist.c b/uts/common/fs/zfs/dsl_deadlist.c
index dd6db2120b31..064f8aceb8ee 100644
--- a/uts/common/fs/zfs/dsl_deadlist.c
+++ b/uts/common/fs/zfs/dsl_deadlist.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/dsl_dataset.h>
@@ -30,26 +29,6 @@
#include <sys/zfs_context.h>
#include <sys/dsl_pool.h>
-/*
- * Deadlist concurrency:
- *
- * Deadlists can only be modified from the syncing thread.
- *
- * Except for dsl_deadlist_insert(), it can only be modified with the
- * dp_config_rwlock held with RW_WRITER.
- *
- * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can
- * be called concurrently, from open context, with the dl_config_rwlock held
- * with RW_READER.
- *
- * Therefore, we only need to provide locking between dsl_deadlist_insert() and
- * the accessors, protecting:
- * dl_phys->dl_used,comp,uncomp
- * and protecting the dl_tree from being loaded.
- * The locking is provided by dl_lock. Note that locking on the bpobj_t
- * provides its own locking, and dl_oldfmt is immutable.
- */
-
static int
dsl_deadlist_compare(const void *arg1, const void *arg2)
{
@@ -330,14 +309,14 @@ dsl_deadlist_space(dsl_deadlist_t *dl,
* return space used in the range (mintxg, maxtxg].
* Includes maxtxg, does not include mintxg.
* mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
- * larger than any bp in the deadlist (eg. UINT64_MAX)).
+ * UINT64_MAX).
*/
void
dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
- dsl_deadlist_entry_t *dle;
dsl_deadlist_entry_t dle_tofind;
+ dsl_deadlist_entry_t *dle;
avl_index_t where;
if (dl->dl_oldfmt) {
@@ -346,10 +325,9 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
return;
}
+ dsl_deadlist_load_tree(dl);
*usedp = *compp = *uncompp = 0;
- mutex_enter(&dl->dl_lock);
- dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
/*
@@ -358,7 +336,6 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
*/
ASSERT(dle != NULL ||
avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
-
for (; dle && dle->dle_mintxg < maxtxg;
dle = AVL_NEXT(&dl->dl_tree, dle)) {
uint64_t used, comp, uncomp;
@@ -370,7 +347,6 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
*compp += comp;
*uncompp += uncomp;
}
- mutex_exit(&dl->dl_lock);
}
static void
diff --git a/uts/common/fs/zfs/dsl_deleg.c b/uts/common/fs/zfs/dsl_deleg.c
index ba620bd6fbed..529fb052fa75 100644
--- a/uts/common/fs/zfs/dsl_deleg.c
+++ b/uts/common/fs/zfs/dsl_deleg.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -171,8 +170,10 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
- jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
- zapobj, whokey, tx);
+ jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
+ DMU_OT_NONE, 0, tx);
+ VERIFY(zap_update(mos, zapobj,
+ whokey, 8, 1, &jumpobj, tx) == 0);
}
while (permpair = nvlist_next_nvpair(perms, permpair)) {
@@ -181,8 +182,10 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(zap_update(mos, jumpobj,
perm, 8, 1, &n, tx) == 0);
- spa_history_log_internal_dd(dd, "permission update", tx,
- "%s %s", whokey, perm);
+ spa_history_log_internal(LOG_DS_PERM_UPDATE,
+ dd->dd_pool->dp_spa, tx,
+ "%s %s dataset = %llu", whokey, perm,
+ dd->dd_phys->dd_head_dataset_obj);
}
}
}
@@ -211,8 +214,10 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
(void) zap_remove(mos, zapobj, whokey, tx);
VERIFY(0 == zap_destroy(mos, jumpobj, tx));
}
- spa_history_log_internal_dd(dd, "permission who remove",
- tx, "%s", whokey);
+ spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
+ dd->dd_pool->dp_spa, tx,
+ "%s dataset = %llu", whokey,
+ dd->dd_phys->dd_head_dataset_obj);
continue;
}
@@ -230,8 +235,10 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(0 == zap_destroy(mos,
jumpobj, tx));
}
- spa_history_log_internal_dd(dd, "permission remove", tx,
- "%s %s", whokey, perm);
+ spa_history_log_internal(LOG_DS_PERM_REMOVE,
+ dd->dd_pool->dp_spa, tx,
+ "%s %s dataset = %llu", whokey, perm,
+ dd->dd_phys->dd_head_dataset_obj);
}
}
}
diff --git a/uts/common/fs/zfs/dsl_dir.c b/uts/common/fs/zfs/dsl_dir.c
index 74c1050fabf0..1cd49c8274e8 100644
--- a/uts/common/fs/zfs/dsl_dir.c
+++ b/uts/common/fs/zfs/dsl_dir.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -40,8 +39,8 @@
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
- uint64_t value, dmu_tx_t *tx);
+static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
+
/* ARGSUSED */
static void
@@ -448,7 +447,8 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
int
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
+ dsl_dataset_t *ds = arg1;
+ dsl_dir_t *dd = ds->ds_dir;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
@@ -477,19 +477,24 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
void
dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
+ dsl_dataset_t *ds = arg1;
+ dsl_dir_t *dd = ds->ds_dir;
objset_t *mos = dd->dd_pool->dp_meta_objset;
+ dsl_prop_setarg_t psa;
+ uint64_t value = 0;
uint64_t obj;
dd_used_t t;
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
- /*
- * Remove our reservation. The impl() routine avoids setting the
- * actual property, which would require the (already destroyed) ds.
- */
- dsl_dir_set_reservation_sync_impl(dd, 0, tx);
+ /* Remove our reservation. */
+ dsl_prop_setarg_init_uint64(&psa, "reservation",
+ (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
+ &value);
+ psa.psa_effective_value = 0; /* predict default value */
+
+ dsl_dir_set_reservation_sync(ds, &psa, tx);
ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
@@ -1055,8 +1060,9 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dd->dd_phys->dd_quota = effective_value;
mutex_exit(&dd->dd_lock);
- spa_history_log_internal_dd(dd, "set quota", tx,
- "quota=%lld", (longlong_t)effective_value);
+ spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
+ tx, "%lld dataset = %llu ",
+ (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
}
int
@@ -1143,17 +1149,25 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
-dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
+dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
+ dsl_dataset_t *ds = arg1;
+ dsl_dir_t *dd = ds->ds_dir;
+ dsl_prop_setarg_t *psa = arg2;
+ uint64_t effective_value = psa->psa_effective_value;
uint64_t used;
int64_t delta;
+ dsl_prop_set_sync(ds, psa, tx);
+ DSL_PROP_CHECK_PREDICTION(dd, psa);
+
dmu_buf_will_dirty(dd->dd_dbuf, tx);
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
- delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
- dd->dd_phys->dd_reserved = value;
+ delta = MAX(used, effective_value) -
+ MAX(used, dd->dd_phys->dd_reserved);
+ dd->dd_phys->dd_reserved = effective_value;
if (dd->dd_parent != NULL) {
/* Roll up this additional usage into our ancestors */
@@ -1161,24 +1175,10 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
delta, 0, 0, tx);
}
mutex_exit(&dd->dd_lock);
-}
-
-
-static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t value = psa->psa_effective_value;
-
- dsl_prop_set_sync(ds, psa, tx);
- DSL_PROP_CHECK_PREDICTION(dd, psa);
- dsl_dir_set_reservation_sync_impl(dd, value, tx);
-
- spa_history_log_internal_dd(dd, "set reservation", tx,
- "reservation=%lld", (longlong_t)value);
+ spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
+ tx, "%lld dataset = %llu",
+ (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
}
int
@@ -1299,15 +1299,9 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
- char namebuf[MAXNAMELEN];
ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
- /* Log this before we change the name. */
- dsl_dir_name(ra->newparent, namebuf);
- spa_history_log_internal_dd(dd, "rename", tx,
- "-> %s/%s", namebuf, ra->mynewname);
-
if (ra->newparent != dd->dd_parent) {
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
-dd->dd_phys->dd_used_bytes,
@@ -1347,6 +1341,8 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dd->dd_myname, 8, 1, &dd->dd_object, tx);
ASSERT3U(err, ==, 0);
+ spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
+ tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
}
int
diff --git a/uts/common/fs/zfs/dsl_pool.c b/uts/common/fs/zfs/dsl_pool.c
index e9223944d1b4..700cc962865d 100644
--- a/uts/common/fs/zfs/dsl_pool.c
+++ b/uts/common/fs/zfs/dsl_pool.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_pool.h>
@@ -40,8 +39,6 @@
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
-#include <sys/bptree.h>
-#include <sys/zfeature.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@@ -102,32 +99,20 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
}
int
-dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
+dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
{
int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
-
- err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
- &dp->dp_meta_objset);
- if (err != 0)
- dsl_pool_close(dp);
- else
- *dpp = dp;
-
- return (err);
-}
-
-int
-dsl_pool_open(dsl_pool_t *dp)
-{
- int err;
dsl_dir_t *dd;
dsl_dataset_t *ds;
uint64_t obj;
- ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset));
-
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
+ err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
+ &dp->dp_meta_objset);
+ if (err)
+ goto out;
+
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
@@ -143,7 +128,7 @@ dsl_pool_open(dsl_pool_t *dp)
if (err)
goto out;
- if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
+ if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
if (err)
goto out;
@@ -160,7 +145,7 @@ dsl_pool_open(dsl_pool_t *dp)
goto out;
}
- if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
+ if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
&dp->dp_free_dir);
if (err)
@@ -174,15 +159,6 @@ dsl_pool_open(dsl_pool_t *dp)
dp->dp_meta_objset, obj));
}
- if (spa_feature_is_active(dp->dp_spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
- err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
- &dp->dp_bptree_obj);
- if (err != 0)
- goto out;
- }
-
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
&dp->dp_tmp_userrefs_obj);
@@ -191,10 +167,15 @@ dsl_pool_open(dsl_pool_t *dp)
if (err)
goto out;
- err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
+ err = dsl_scan_init(dp, txg);
out:
rw_exit(&dp->dp_config_rwlock);
+ if (err)
+ dsl_pool_close(dp);
+ else
+ *dpp = dp;
+
return (err);
}
@@ -310,10 +291,7 @@ static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
- dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
- rw_exit(&dp->dp_config_rwlock);
return (0);
}
@@ -488,7 +466,7 @@ int
dsl_pool_sync_context(dsl_pool_t *dp)
{
return (curthread == dp->dp_tx.tx_sync_thread ||
- spa_is_initializing(dp->dp_spa));
+ spa_get_dsl(dp->dp_spa) == NULL);
}
uint64_t
@@ -806,8 +784,11 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dp->dp_tmp_userrefs_obj == 0);
ASSERT(dmu_tx_is_syncing(tx));
- dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
+ dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
+ DMU_OT_NONE, 0, tx);
+
+ VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
+ sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
}
static int
diff --git a/uts/common/fs/zfs/dsl_prop.c b/uts/common/fs/zfs/dsl_prop.c
index 5bbe14ff691d..aa66b32e7938 100644
--- a/uts/common/fs/zfs/dsl_prop.c
+++ b/uts/common/fs/zfs/dsl_prop.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -703,9 +702,11 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
}
- spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE ||
- source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx,
- "%s=%s", propname, (valstr == NULL ? "" : valstr));
+ spa_history_log_internal((source == ZPROP_SRC_NONE ||
+ source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT :
+ LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx,
+ "%s=%s dataset = %llu", propname,
+ (valstr == NULL ? "" : valstr), ds->ds_object);
if (tbuf != NULL)
kmem_free(tbuf, ZAP_MAXVALUELEN);
@@ -754,6 +755,24 @@ dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
}
+void
+dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+ dmu_tx_t *tx)
+{
+ objset_t *mos = dd->dd_pool->dp_meta_objset;
+ uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
+
+ ASSERT(dmu_tx_is_syncing(tx));
+
+ VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx));
+
+ dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
+
+ spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx,
+ "%s=%llu dataset = %llu", name, (u_longlong_t)val,
+ dd->dd_phys->dd_head_dataset_obj);
+}
+
int
dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
int intsz, int numints, const void *buf)
diff --git a/uts/common/fs/zfs/dsl_scan.c b/uts/common/fs/zfs/dsl_scan.c
index 8f08f04a0655..56d41083673e 100644
--- a/uts/common/fs/zfs/dsl_scan.c
+++ b/uts/common/fs/zfs/dsl_scan.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_scan.h>
@@ -45,7 +44,6 @@
#include <sys/ddt.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
-#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/zfs_vfsops.h>
#endif
@@ -228,7 +226,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_sync_state(scn, tx);
- spa_history_log_internal(spa, "scan setup", tx,
+ spa_history_log_internal(LOG_POOL_SCAN, spa, tx,
"func=%u mintxg=%llu maxtxg=%llu",
*funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
}
@@ -277,7 +275,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
else
scn->scn_phys.scn_state = DSS_CANCELED;
- spa_history_log_internal(spa, "scan done", tx,
+ spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
"complete=%u", complete);
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
@@ -384,6 +382,55 @@ dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
priority, zio_flags, arc_flags, zb));
}
+static boolean_t
+bookmark_is_zero(const zbookmark_t *zb)
+{
+ return (zb->zb_objset == 0 && zb->zb_object == 0 &&
+ zb->zb_level == 0 && zb->zb_blkid == 0);
+}
+
+/* dnp is the dnode for zb1->zb_object */
+static boolean_t
+bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
+ const zbookmark_t *zb2)
+{
+ uint64_t zb1nextL0, zb2thisobj;
+
+ ASSERT(zb1->zb_objset == zb2->zb_objset);
+ ASSERT(zb2->zb_level == 0);
+
+ /*
+ * A bookmark in the deadlist is considered to be after
+ * everything else.
+ */
+ if (zb2->zb_object == DMU_DEADLIST_OBJECT)
+ return (B_TRUE);
+
+ /* The objset_phys_t isn't before anything. */
+ if (dnp == NULL)
+ return (B_FALSE);
+
+ zb1nextL0 = (zb1->zb_blkid + 1) <<
+ ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+
+ zb2thisobj = zb2->zb_object ? zb2->zb_object :
+ zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
+
+ if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
+ uint64_t nextobj = zb1nextL0 *
+ (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
+ return (nextobj <= zb2thisobj);
+ }
+
+ if (zb1->zb_object < zb2thisobj)
+ return (B_TRUE);
+ if (zb1->zb_object > zb2thisobj)
+ return (B_FALSE);
+ if (zb2->zb_object == DMU_META_DNODE_OBJECT)
+ return (B_FALSE);
+ return (zb1nextL0 <= zb2->zb_blkid);
+}
+
static uint64_t
dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
{
@@ -415,7 +462,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
if (scn->scn_pausing)
return (B_TRUE); /* we're already pausing */
- if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
+ if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
/* We only know how to resume from level-0 blocks. */
@@ -570,13 +617,13 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/*
* We never skip over user/group accounting objects (obj<0)
*/
- if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
+ if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
(int64_t)zb->zb_object >= 0) {
/*
* If we already visited this bp & everything below (in
* a prior txg sync), don't bother doing it again.
*/
- if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
+ if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
return (B_TRUE);
/*
@@ -769,6 +816,22 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return;
+ if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
+ /*
+ * For non-user-accounting blocks, we need to read the
+ * new bp (from a deleted snapshot, found in
+ * check_existing_xlation). If we used the old bp,
+ * pointers inside this block from before we resumed
+ * would be untranslated.
+ *
+ * For user-accounting blocks, we need to read the old
+ * bp, because we will apply the entire space delta to
+ * it (original untranslated -> translations from
+ * deleted snap -> now).
+ */
+ bp_toread = *bp;
+ }
+
if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
&buf) != 0)
return;
@@ -1333,28 +1396,19 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
zap_cursor_fini(&zc);
}
-static boolean_t
-dsl_scan_free_should_pause(dsl_scan_t *scn)
+static int
+dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
+ dsl_scan_t *scn = arg;
uint64_t elapsed_nanosecs;
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
- return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
+
+ if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
(elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
txg_sync_waiting(scn->scn_dp)) ||
- spa_shutting_down(scn->scn_dp->dp_spa));
-}
-
-static int
-dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
-{
- dsl_scan_t *scn = arg;
-
- if (!scn->scn_is_bptree ||
- (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
- if (dsl_scan_free_should_pause(scn))
- return (ERESTART);
- }
+ spa_shutting_down(scn->scn_dp->dp_spa))
+ return (ERESTART);
zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
dmu_tx_get_txg(tx), bp, 0));
@@ -1379,10 +1433,6 @@ dsl_scan_active(dsl_scan_t *scn)
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (B_TRUE);
- if (spa_feature_is_active(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
- return (B_TRUE);
- }
if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
&used, &comp, &uncomp);
@@ -1429,40 +1479,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* traversing it.
*/
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
- scn->scn_is_bptree = B_FALSE;
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_MUSTSUCCEED);
err = bpobj_iterate(&dp->dp_free_bpobj,
- dsl_scan_free_block_cb, scn, tx);
+ dsl_scan_free_cb, scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
-
- if (err == 0 && spa_feature_is_active(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
- scn->scn_is_bptree = B_TRUE;
- scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
- NULL, ZIO_FLAG_MUSTSUCCEED);
- err = bptree_iterate(dp->dp_meta_objset,
- dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
- scn, tx);
- VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
- if (err != 0)
- return;
-
- /* disable async destroy feature */
- spa_feature_decr(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
- ASSERT(!spa_feature_is_active(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
- VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, tx));
- VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
- dp->dp_bptree_obj, tx));
- dp->dp_bptree_obj = 0;
- }
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
- "free_bpobj/bptree txg %llu",
+ "free_bpobj txg %llu",
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)
(gethrtime() - scn->scn_sync_start_time) / MICROSEC,
@@ -1577,8 +1601,6 @@ count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
- if (t & DMU_OT_NEWTYPE)
- t = DMU_OT_OTHER;
zfs_blkstat_t *zb = &zab->zab_type[l][t];
int equal;
diff --git a/uts/common/fs/zfs/dsl_synctask.c b/uts/common/fs/zfs/dsl_synctask.c
index 312423e943b5..b0818ce274d4 100644
--- a/uts/common/fs/zfs/dsl_synctask.c
+++ b/uts/common/fs/zfs/dsl_synctask.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -86,17 +85,17 @@ top:
/* Do a preliminary error check. */
dstg->dstg_err = 0;
-#ifdef ZFS_DEBUG
- /*
- * Only check half the time, otherwise, the sync-context
- * check will almost never fail.
- */
- if (spa_get_random(2) == 0)
- goto skip;
-#endif
rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
+#ifdef ZFS_DEBUG
+ /*
+ * Only check half the time, otherwise, the sync-context
+ * check will almost never fail.
+ */
+ if (spa_get_random(2) == 0)
+ continue;
+#endif
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
@@ -108,7 +107,6 @@ top:
dmu_tx_commit(tx);
return (dstg->dstg_err);
}
-skip:
/*
* We don't generally have many sync tasks, so pay the price of
diff --git a/uts/common/fs/zfs/metaslab.c b/uts/common/fs/zfs/metaslab.c
index 2f7c882c8c51..17b4b12c4ee4 100644
--- a/uts/common/fs/zfs/metaslab.c
+++ b/uts/common/fs/zfs/metaslab.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -31,29 +30,10 @@
#include <sys/vdev_impl.h>
#include <sys/zio.h>
-/*
- * Allow allocations to switch to gang blocks quickly. We do this to
- * avoid having to load lots of space_maps in a given txg. There are,
- * however, some cases where we want to avoid "fast" ganging and instead
- * we want to do an exhaustive search of all metaslabs on this device.
- * Currently we don't allow any gang, zil, or dump device related allocations
- * to "fast" gang.
- */
-#define CAN_FASTGANG(flags) \
- (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
- METASLAB_GANG_AVOID)))
-
uint64_t metaslab_aliquot = 512ULL << 10;
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
/*
- * This value defines the number of allowed allocation failures per vdev.
- * If a device reaches this threshold in a given txg then we consider skipping
- * allocations on that device.
- */
-int zfs_mg_alloc_failures;
-
-/*
* Metaslab debugging: when set, keeps all space maps in core to verify frees.
*/
static int metaslab_debug = 0;
@@ -691,7 +671,7 @@ static space_map_ops_t metaslab_ndf_ops = {
metaslab_ndf_fragmented
};
-space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
+space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops;
/*
* ==========================================================================
@@ -864,7 +844,7 @@ metaslab_prefetch(metaslab_group_t *mg)
}
static int
-metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
+metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
{
metaslab_group_t *mg = msp->ms_group;
space_map_t *sm = &msp->ms_map;
@@ -897,6 +877,13 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
mutex_exit(&mg->mg_lock);
}
+ /*
+ * If we were able to load the map then make sure
+ * that this map is still able to satisfy our request.
+ */
+ if (msp->ms_weight < size)
+ return (ENOSPC);
+
metaslab_group_sort(msp->ms_group, msp,
msp->ms_weight | activation_weight);
}
@@ -1112,7 +1099,6 @@ void
metaslab_sync_reassess(metaslab_group_t *mg)
{
vdev_t *vd = mg->mg_vd;
- int64_t failures = mg->mg_alloc_failures;
/*
* Re-evaluate all metaslabs which have lower offsets than the
@@ -1129,8 +1115,6 @@ metaslab_sync_reassess(metaslab_group_t *mg)
mutex_exit(&msp->ms_lock);
}
- atomic_add_64(&mg->mg_alloc_failures, -failures);
-
/*
* Prefetch the next potential metaslabs
*/
@@ -1155,10 +1139,9 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
}
static uint64_t
-metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
- uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
+metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
+ uint64_t min_distance, dva_t *dva, int d)
{
- spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_t *msp = NULL;
uint64_t offset = -1ULL;
avl_tree_t *t = &mg->mg_metaslab_tree;
@@ -1179,17 +1162,11 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
mutex_enter(&mg->mg_lock);
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
- if (msp->ms_weight < asize) {
- spa_dbgmsg(spa, "%s: failed to meet weight "
- "requirement: vdev %llu, txg %llu, mg %p, "
- "msp %p, psize %llu, asize %llu, "
- "failures %llu, weight %llu",
- spa_name(spa), mg->mg_vd->vdev_id, txg,
- mg, msp, psize, asize,
- mg->mg_alloc_failures, msp->ms_weight);
+ if (msp->ms_weight < size) {
mutex_exit(&mg->mg_lock);
return (-1ULL);
}
+
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
break;
@@ -1208,25 +1185,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
if (msp == NULL)
return (-1ULL);
- /*
- * If we've already reached the allowable number of failed
- * allocation attempts on this metaslab group then we
- * consider skipping it. We skip it only if we're allowed
- * to "fast" gang, the physical size is larger than
- * a gang block, and we're attempting to allocate from
- * the primary metaslab.
- */
- if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
- CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
- activation_weight == METASLAB_WEIGHT_PRIMARY) {
- spa_dbgmsg(spa, "%s: skipping metaslab group: "
- "vdev %llu, txg %llu, mg %p, psize %llu, "
- "asize %llu, failures %llu", spa_name(spa),
- mg->mg_vd->vdev_id, txg, mg, psize, asize,
- mg->mg_alloc_failures);
- return (-1ULL);
- }
-
mutex_enter(&msp->ms_lock);
/*
@@ -1235,7 +1193,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
* another thread may have changed the weight while we
* were blocked on the metaslab lock.
*/
- if (msp->ms_weight < asize || (was_active &&
+ if (msp->ms_weight < size || (was_active &&
!(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
activation_weight == METASLAB_WEIGHT_PRIMARY)) {
mutex_exit(&msp->ms_lock);
@@ -1250,16 +1208,14 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
continue;
}
- if (metaslab_activate(msp, activation_weight) != 0) {
+ if (metaslab_activate(msp, activation_weight, size) != 0) {
mutex_exit(&msp->ms_lock);
continue;
}
- if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
+ if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
break;
- atomic_inc_64(&mg->mg_alloc_failures);
-
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
mutex_exit(&msp->ms_lock);
@@ -1268,7 +1224,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
+ space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
mutex_exit(&msp->ms_lock);
@@ -1395,8 +1351,7 @@ top:
asize = vdev_psize_to_asize(vd, psize);
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
- offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
- dva, d, flags);
+ offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d);
if (offset != -1ULL) {
/*
* If we've just selected this metaslab group,
@@ -1408,24 +1363,18 @@ top:
vdev_stat_t *vs = &vd->vdev_stat;
int64_t vu, cu;
- vu = (vs->vs_alloc * 100) / (vs->vs_space + 1);
- cu = (mc->mc_alloc * 100) / (mc->mc_space + 1);
+ /*
+ * Determine percent used in units of 0..1024.
+ * (This is just to avoid floating point.)
+ */
+ vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
+ cu = (mc->mc_alloc << 10) / (mc->mc_space + 1);
/*
- * Calculate how much more or less we should
- * try to allocate from this device during
- * this iteration around the rotor.
- * For example, if a device is 80% full
- * and the pool is 20% full then we should
- * reduce allocations by 60% on this device.
- *
- * mg_bias = (20 - 80) * 512K / 100 = -307K
- *
- * This reduces allocations by 307K for this
- * iteration.
+ * Bias by at most +/- 25% of the aliquot.
*/
mg->mg_bias = ((cu - vu) *
- (int64_t)mg->mg_aliquot) / 100;
+ (int64_t)mg->mg_aliquot) / (1024 * 4);
}
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
@@ -1539,7 +1488,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
mutex_enter(&msp->ms_lock);
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
- error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
+ error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
error = ENOENT;
diff --git a/uts/common/fs/zfs/rrwlock.c b/uts/common/fs/zfs/rrwlock.c
index 7f9290bd44c1..4cef53f95132 100644
--- a/uts/common/fs/zfs/rrwlock.c
+++ b/uts/common/fs/zfs/rrwlock.c
@@ -22,9 +22,6 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
#include <sys/refcount.h>
#include <sys/rrwlock.h>
@@ -265,13 +262,3 @@ rrw_held(rrwlock_t *rrl, krw_t rw)
return (held);
}
-
-void
-rrw_tsd_destroy(void *arg)
-{
- rrw_node_t *rn = arg;
- if (rn != NULL) {
- panic("thread %p terminating with rrw lock %p held",
- (void *)curthread, (void *)rn->rn_rrl);
- }
-}
diff --git a/uts/common/fs/zfs/sa.c b/uts/common/fs/zfs/sa.c
index 06607d784e42..4cb4546b2511 100644
--- a/uts/common/fs/zfs/sa.c
+++ b/uts/common/fs/zfs/sa.c
@@ -18,11 +18,8 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Portions Copyright 2011 iXsystems, Inc
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -429,9 +426,10 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
char attr_name[8];
if (sa->sa_layout_attr_obj == 0) {
- sa->sa_layout_attr_obj = zap_create_link(os,
- DMU_OT_SA_ATTR_LAYOUTS,
- sa->sa_master_obj, SA_LAYOUTS, tx);
+ sa->sa_layout_attr_obj = zap_create(os,
+ DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
+ VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
+ &sa->sa_layout_attr_obj, tx) == 0);
}
(void) snprintf(attr_name, sizeof (attr_name),
@@ -607,14 +605,14 @@ sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
* and spill buffer.
*/
if (buftype == SA_BONUS && *index == -1 &&
- *total + P2ROUNDUP(hdrsize, 8) >
+ P2ROUNDUP(*total + hdrsize, 8) >
(full_space - sizeof (blkptr_t))) {
*index = i;
done = B_TRUE;
}
next:
- if (*total + P2ROUNDUP(hdrsize, 8) > full_space &&
+ if (P2ROUNDUP(*total + hdrsize, 8) > full_space &&
buftype == SA_BONUS)
*will_spill = B_TRUE;
}
@@ -1553,9 +1551,10 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
}
if (sa->sa_reg_attr_obj == NULL) {
- sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
- DMU_OT_SA_ATTR_REGISTRATION,
- sa->sa_master_obj, SA_REGISTRY, tx);
+ sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
+ DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
+ VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj,
+ SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0);
}
for (i = 0; i != sa->sa_num_attrs; i++) {
if (sa->sa_attr_table[i].sa_registered)
diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c
index 828d5e266643..b6190e4cfafe 100644
--- a/uts/common/fs/zfs/spa.c
+++ b/uts/common/fs/zfs/spa.c
@@ -21,8 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -62,7 +60,6 @@
#include <sys/spa_boot.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
-#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/bootprops.h>
@@ -114,7 +111,6 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
-static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
@@ -169,18 +165,15 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
static void
spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
{
- vdev_t *rvd = spa->spa_root_vdev;
- dsl_pool_t *pool = spa->spa_dsl_pool;
uint64_t size;
uint64_t alloc;
- uint64_t space;
uint64_t cap, version;
zprop_source_t src = ZPROP_SRC_NONE;
spa_config_dirent_t *dp;
ASSERT(MUTEX_HELD(&spa->spa_props_lock));
- if (rvd != NULL) {
+ if (spa->spa_root_vdev != NULL) {
alloc = metaslab_class_get_alloc(spa_normal_class(spa));
size = metaslab_class_get_space(spa_normal_class(spa));
spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
@@ -188,15 +181,6 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
size - alloc, src);
-
- space = 0;
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *tvd = rvd->vdev_child[c];
- space += tvd->vdev_max_asize - tvd->vdev_asize;
- }
- spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space,
- src);
-
spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
(spa_mode(spa) == FREAD), src);
@@ -207,7 +191,7 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
ddt_get_pool_dedup_ratio(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
- rvd->vdev_state, src);
+ spa->spa_root_vdev->vdev_state, src);
version = spa_version(spa);
if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
@@ -217,29 +201,8 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
}
- if (pool != NULL) {
- dsl_dir_t *freedir = pool->dp_free_dir;
-
- /*
- * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
- * when opening pools before this version freedir will be NULL.
- */
- if (freedir != NULL) {
- spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
- freedir->dd_phys->dd_used_bytes, src);
- } else {
- spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
- NULL, 0, src);
- }
- }
-
spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
- if (spa->spa_comment != NULL) {
- spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
- 0, ZPROP_SRC_LOCAL);
- }
-
if (spa->spa_root != NULL)
spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
0, ZPROP_SRC_LOCAL);
@@ -372,55 +335,25 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
nvpair_t *elem;
int error = 0, reset_bootfs = 0;
uint64_t objnum;
- boolean_t has_feature = B_FALSE;
elem = NULL;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+ zpool_prop_t prop;
+ char *propname, *strval;
uint64_t intval;
- char *strval, *slash, *check, *fname;
- const char *propname = nvpair_name(elem);
- zpool_prop_t prop = zpool_name_to_prop(propname);
-
- switch (prop) {
- case ZPROP_INVAL:
- if (!zpool_prop_feature(propname)) {
- error = EINVAL;
- break;
- }
-
- /*
- * Sanitize the input.
- */
- if (nvpair_type(elem) != DATA_TYPE_UINT64) {
- error = EINVAL;
- break;
- }
+ objset_t *os;
+ char *slash;
- if (nvpair_value_uint64(elem, &intval) != 0) {
- error = EINVAL;
- break;
- }
+ propname = nvpair_name(elem);
- if (intval != 0) {
- error = EINVAL;
- break;
- }
-
- fname = strchr(propname, '@') + 1;
- if (zfeature_lookup_name(fname, NULL) != 0) {
- error = EINVAL;
- break;
- }
-
- has_feature = B_TRUE;
- break;
+ if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
+ return (EINVAL);
+ switch (prop) {
case ZPOOL_PROP_VERSION:
error = nvpair_value_uint64(elem, &intval);
if (!error &&
- (intval < spa_version(spa) ||
- intval > SPA_VERSION_BEFORE_FEATURES ||
- has_feature))
+ (intval < spa_version(spa) || intval > SPA_VERSION))
error = EINVAL;
break;
@@ -457,7 +390,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = nvpair_value_string(elem, &strval);
if (!error) {
- objset_t *os;
uint64_t compress;
if (strval == NULL || strval[0] == '\0') {
@@ -530,26 +462,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = EINVAL;
break;
- case ZPOOL_PROP_COMMENT:
- if ((error = nvpair_value_string(elem, &strval)) != 0)
- break;
- for (check = strval; *check != '\0'; check++) {
- /*
- * The kernel doesn't have an easy isprint()
- * check. For this kernel check, we merely
- * check ASCII apart from DEL. Fix this if
- * there is an easy-to-use kernel isprint().
- */
- if (*check >= 0x7f) {
- error = EINVAL;
- break;
- }
- check++;
- }
- if (strlen(strval) > ZPROP_MAX_COMMENT)
- error = E2BIG;
- break;
-
case ZPOOL_PROP_DEDUPDITTO:
if (spa_version(spa) < SPA_VERSION_DEDUP)
error = ENOTSUP;
@@ -607,58 +519,33 @@ int
spa_prop_set(spa_t *spa, nvlist_t *nvp)
{
int error;
- nvpair_t *elem = NULL;
+ nvpair_t *elem;
boolean_t need_sync = B_FALSE;
+ zpool_prop_t prop;
if ((error = spa_prop_validate(spa, nvp)) != 0)
return (error);
+ elem = NULL;
while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
- zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
+ if ((prop = zpool_name_to_prop(
+ nvpair_name(elem))) == ZPROP_INVAL)
+ return (EINVAL);
if (prop == ZPOOL_PROP_CACHEFILE ||
prop == ZPOOL_PROP_ALTROOT ||
prop == ZPOOL_PROP_READONLY)
continue;
- if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
- uint64_t ver;
-
- if (prop == ZPOOL_PROP_VERSION) {
- VERIFY(nvpair_value_uint64(elem, &ver) == 0);
- } else {
- ASSERT(zpool_prop_feature(nvpair_name(elem)));
- ver = SPA_VERSION_FEATURES;
- need_sync = B_TRUE;
- }
-
- /* Save time if the version is already set. */
- if (ver == spa_version(spa))
- continue;
-
- /*
- * In addition to the pool directory object, we might
- * create the pool properties object, the features for
- * read object, the features for write object, or the
- * feature descriptions object.
- */
- error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
- spa_sync_version, spa, &ver, 6);
- if (error)
- return (error);
- continue;
- }
-
need_sync = B_TRUE;
break;
}
- if (need_sync) {
+ if (need_sync)
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
- spa, nvp, 6));
- }
-
- return (0);
+ spa, nvp, 3));
+ else
+ return (0);
}
/*
@@ -676,43 +563,6 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
}
/*
- * Change the GUID for the pool. This is done so that we can later
- * re-import a pool built from a clone of our own vdevs. We will modify
- * the root vdev's guid, our own pool guid, and then mark all of our
- * vdevs dirty. Note that we must make sure that all our vdevs are
- * online when we do this, or else any vdevs that weren't present
- * would be orphaned from our pool. We are also going to issue a
- * sysevent to update any watchers.
- */
-int
-spa_change_guid(spa_t *spa)
-{
- uint64_t oldguid, newguid;
- uint64_t txg;
-
- if (!(spa_mode_global & FWRITE))
- return (EROFS);
-
- txg = spa_vdev_enter(spa);
-
- if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY)
- return (spa_vdev_exit(spa, NULL, txg, ENXIO));
-
- oldguid = spa_guid(spa);
- newguid = spa_generate_guid(NULL);
- ASSERT3U(oldguid, !=, newguid);
-
- spa->spa_root_vdev->vdev_guid = newguid;
- spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid);
-
- vdev_config_dirty(spa->spa_root_vdev);
-
- spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID);
-
- return (spa_vdev_exit(spa, NULL, txg, 0));
-}
-
-/*
* ==========================================================================
* SPA state manipulation (open/create/destroy/import/export)
* ==========================================================================
@@ -760,7 +610,7 @@ static taskq_t *
spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
uint_t value)
{
- uint_t flags = 0;
+ uint_t flags = TASKQ_PREPOPULATE;
boolean_t batch = B_FALSE;
switch (mode) {
@@ -1138,10 +988,8 @@ spa_unload(spa_t *spa)
}
spa->spa_spares.sav_count = 0;
- for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
- vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
+ for (i = 0; i < spa->spa_l2cache.sav_count; i++)
vdev_free(spa->spa_l2cache.sav_vdevs[i]);
- }
if (spa->spa_l2cache.sav_vdevs) {
kmem_free(spa->spa_l2cache.sav_vdevs,
spa->spa_l2cache.sav_count * sizeof (void *));
@@ -1155,11 +1003,6 @@ spa_unload(spa_t *spa)
spa->spa_async_suspended = 0;
- if (spa->spa_comment != NULL) {
- spa_strfree(spa->spa_comment);
- spa->spa_comment = NULL;
- }
-
spa_config_exit(spa, SCL_ALL, FTAG);
}
@@ -1369,13 +1212,11 @@ spa_load_l2cache(spa_t *spa)
vd = oldvdevs[i];
if (vd != NULL) {
- ASSERT(vd->vdev_isl2cache);
-
if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
pool != 0ULL && l2arc_vdev_present(vd))
l2arc_remove_vdev(vd);
- vdev_clear_stats(vd);
- vdev_free(vd);
+ (void) vdev_close(vd);
+ spa_l2cache_remove(vd);
}
}
@@ -1682,7 +1523,7 @@ spa_load_verify_done(zio_t *zio)
int error = zio->io_error;
if (error) {
- if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
+ if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
type != DMU_OT_INTENT_LOG)
atomic_add_64(&sle->sle_meta_count, 1);
else
@@ -1877,7 +1718,6 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
{
nvlist_t *config = spa->spa_config;
char *ereport = FM_EREPORT_ZFS_POOL;
- char *comment;
int error;
uint64_t pool_guid;
nvlist_t *nvl;
@@ -1885,10 +1725,6 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
return (EINVAL);
- ASSERT(spa->spa_comment == NULL);
- if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
- spa->spa_comment = spa_strdup(comment);
-
/*
* Versioning wasn't explicitly added to the label until later, so if
* it's not present treat it as the initial version.
@@ -1904,7 +1740,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
spa_guid_exists(pool_guid, 0)) {
error = EEXIST;
} else {
- spa->spa_config_guid = pool_guid;
+ spa->spa_load_guid = pool_guid;
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
&nvl) == 0) {
@@ -1912,9 +1748,6 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
KM_SLEEP) == 0);
}
- nvlist_free(spa->spa_load_info);
- spa->spa_load_info = fnvlist_alloc();
-
gethrestime(&spa->spa_loaded_ts);
error = spa_load_impl(spa, pool_guid, config, state, type,
mosconfig, &ereport);
@@ -1947,14 +1780,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
{
int error = 0;
nvlist_t *nvroot = NULL;
- nvlist_t *label;
vdev_t *rvd;
uberblock_t *ub = &spa->spa_uberblock;
uint64_t children, config_cache_txg = spa->spa_config_txg;
int orig_mode = spa->spa_mode;
int parse;
uint64_t obj;
- boolean_t missing_feat_write = B_FALSE;
/*
* If this is an untrusted config, access the pool in read-only mode.
@@ -2021,7 +1852,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
*/
if (type != SPA_IMPORT_ASSEMBLE) {
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- error = vdev_validate(rvd, mosconfig);
+ error = vdev_validate(rvd);
spa_config_exit(spa, SCL_ALL, FTAG);
if (error != 0)
@@ -2034,78 +1865,19 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
/*
* Find the best uberblock.
*/
- vdev_uberblock_load(rvd, ub, &label);
+ vdev_uberblock_load(NULL, rvd, ub);
/*
* If we weren't able to find a single valid uberblock, return failure.
*/
- if (ub->ub_txg == 0) {
- nvlist_free(label);
+ if (ub->ub_txg == 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
- }
/*
- * If the pool has an unsupported version we can't open it.
+ * If the pool is newer than the code, we can't open it.
*/
- if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
- nvlist_free(label);
+ if (ub->ub_version > SPA_VERSION)
return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
- }
-
- if (ub->ub_version >= SPA_VERSION_FEATURES) {
- nvlist_t *features;
-
- /*
- * If we weren't able to find what's necessary for reading the
- * MOS in the label, return failure.
- */
- if (label == NULL || nvlist_lookup_nvlist(label,
- ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
- nvlist_free(label);
- return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
- ENXIO));
- }
-
- /*
- * Update our in-core representation with the definitive values
- * from the label.
- */
- nvlist_free(spa->spa_label_features);
- VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
- }
-
- nvlist_free(label);
-
- /*
- * Look through entries in the label nvlist's features_for_read. If
- * there is a feature listed there which we don't understand then we
- * cannot open a pool.
- */
- if (ub->ub_version >= SPA_VERSION_FEATURES) {
- nvlist_t *unsup_feat;
-
- VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
- 0);
-
- for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
- NULL); nvp != NULL;
- nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
- if (!zfeature_is_supported(nvpair_name(nvp))) {
- VERIFY(nvlist_add_string(unsup_feat,
- nvpair_name(nvp), "") == 0);
- }
- }
-
- if (!nvlist_empty(unsup_feat)) {
- VERIFY(nvlist_add_nvlist(spa->spa_load_info,
- ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
- nvlist_free(unsup_feat);
- return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
- ENOTSUP));
- }
-
- nvlist_free(unsup_feat);
- }
/*
* If the vdev guid sum doesn't match the uberblock, we have an
@@ -2139,7 +1911,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa->spa_claim_max_txg = spa->spa_first_txg;
spa->spa_prev_software_version = ub->ub_software_version;
- error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
+ error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
if (error)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
@@ -2147,84 +1919,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- if (spa_version(spa) >= SPA_VERSION_FEATURES) {
- boolean_t missing_feat_read = B_FALSE;
- nvlist_t *unsup_feat;
-
- if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
- &spa->spa_feat_for_read_obj) != 0) {
- return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- }
-
- if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
- &spa->spa_feat_for_write_obj) != 0) {
- return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- }
-
- if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
- &spa->spa_feat_desc_obj) != 0) {
- return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- }
-
- VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
- 0);
-
- if (!feature_is_supported(spa->spa_meta_objset,
- spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
- unsup_feat))
- missing_feat_read = B_TRUE;
-
- if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
- if (!feature_is_supported(spa->spa_meta_objset,
- spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
- unsup_feat))
- missing_feat_write = B_TRUE;
- }
-
- if (!nvlist_empty(unsup_feat)) {
- VERIFY(nvlist_add_nvlist(spa->spa_load_info,
- ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
- }
-
- nvlist_free(unsup_feat);
-
- if (!missing_feat_read) {
- fnvlist_add_boolean(spa->spa_load_info,
- ZPOOL_CONFIG_CAN_RDONLY);
- }
-
- /*
- * If the state is SPA_LOAD_TRYIMPORT, our objective is
- * twofold: to determine whether the pool is available for
- * import in read-write mode and (if it is not) whether the
- * pool is available for import in read-only mode. If the pool
- * is available for import in read-write mode, it is displayed
- * as available in userland; if it is not available for import
- * in read-only mode, it is displayed as unavailable in
- * userland. If the pool is available for import in read-only
- * mode but not read-write mode, it is displayed as unavailable
- * in userland with a special note that the pool is actually
- * available for open in read-only mode.
- *
- * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
- * missing a feature for write, we must first determine whether
- * the pool can be opened read-only before returning to
- * userland in order to know whether to display the
- * abovementioned note.
- */
- if (missing_feat_read || (missing_feat_write &&
- spa_writeable(spa))) {
- return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
- ENOTSUP));
- }
- }
-
- spa->spa_is_initializing = B_TRUE;
- error = dsl_pool_open(spa->spa_dsl_pool);
- spa->spa_is_initializing = B_FALSE;
- if (error != 0)
- return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
-
if (!mosconfig) {
uint64_t hostid;
nvlist_t *policy = NULL, *nvconfig;
@@ -2255,7 +1949,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
cmn_err(CE_WARN, "pool '%s' could not be "
"loaded as it was last accessed by "
"another system (host: %s hostid: 0x%lx). "
- "See: http://illumos.org/msg/ZFS-8000-EY",
+ "See: http://www.sun.com/msg/ZFS-8000-EY",
spa_name(spa), hostname,
(unsigned long)hostid);
return (EBADF);
@@ -2442,7 +2136,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
nvlist_free(nvconfig);
/*
- * Now that we've validated the config, check the state of the
+ * Now that we've validate the config, check the state of the
* root vdev. If it can't be opened, it indicates one or
* more toplevel vdevs are faulted.
*/
@@ -2455,17 +2149,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
}
}
- if (missing_feat_write) {
- ASSERT(state == SPA_LOAD_TRYIMPORT);
-
- /*
- * At this point, we know that we can open the pool in
- * read-only mode but not read-write mode. We now have enough
- * information and can return to userland.
- */
- return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
- }
-
/*
* We've successfully opened the pool, verify that we're ready
* to start pushing transactions.
@@ -2545,12 +2228,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa_async_request(spa, SPA_ASYNC_RESILVER);
/*
- * Log the fact that we booted up (so that we can detect if
- * we rebooted in the middle of an operation).
- */
- spa_history_log_version(spa, "open");
-
- /*
* Delete any inconsistent datasets.
*/
(void) dmu_objset_find(spa_name(spa),
@@ -2581,18 +2258,10 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
}
-/*
- * If spa_load() fails this function will try loading prior txg's. If
- * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
- * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
- * function will not rewind the pool and will return the same error as
- * spa_load().
- */
static int
spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
uint64_t max_request, int rewind_flags)
{
- nvlist_t *loadinfo = NULL;
nvlist_t *config = NULL;
int load_error, rewind_error;
uint64_t safe_rewind_txg;
@@ -2621,18 +2290,9 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
return (load_error);
}
- if (state == SPA_LOAD_RECOVER) {
- /* Price of rolling back is discarding txgs, including log */
+ /* Price of rolling back is discarding txgs, including log */
+ if (state == SPA_LOAD_RECOVER)
spa_set_log_state(spa, SPA_LOG_CLEAR);
- } else {
- /*
- * If we aren't rolling back save the load info from our first
- * import attempt so that we can restore it after attempting
- * to rewind.
- */
- loadinfo = spa->spa_load_info;
- spa->spa_load_info = fnvlist_alloc();
- }
spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
@@ -2656,20 +2316,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
if (config && (rewind_error || state != SPA_LOAD_RECOVER))
spa_config_set(spa, config);
- if (state == SPA_LOAD_RECOVER) {
- ASSERT3P(loadinfo, ==, NULL);
- return (rewind_error);
- } else {
- /* Store the rewind info as part of the initial load info */
- fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
- spa->spa_load_info);
-
- /* Restore the initial load info */
- fnvlist_free(spa->spa_load_info);
- spa->spa_load_info = loadinfo;
-
- return (load_error);
- }
+ return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
}
/*
@@ -2939,50 +2586,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
}
}
-static void
-spa_add_feature_stats(spa_t *spa, nvlist_t *config)
-{
- nvlist_t *features;
- zap_cursor_t zc;
- zap_attribute_t za;
-
- ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
- VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- if (spa->spa_feat_for_read_obj != 0) {
- for (zap_cursor_init(&zc, spa->spa_meta_objset,
- spa->spa_feat_for_read_obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
- ASSERT(za.za_integer_length == sizeof (uint64_t) &&
- za.za_num_integers == 1);
- VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
- za.za_first_integer));
- }
- zap_cursor_fini(&zc);
- }
-
- if (spa->spa_feat_for_write_obj != 0) {
- for (zap_cursor_init(&zc, spa->spa_meta_objset,
- spa->spa_feat_for_write_obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
- ASSERT(za.za_integer_length == sizeof (uint64_t) &&
- za.za_num_integers == 1);
- VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
- za.za_first_integer));
- }
- zap_cursor_fini(&zc);
- }
-
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
- features) == 0);
- nvlist_free(features);
-}
-
int
-spa_get_stats(const char *name, nvlist_t **config,
- char *altroot, size_t buflen)
+spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
{
int error;
spa_t *spa;
@@ -3017,7 +2622,6 @@ spa_get_stats(const char *name, nvlist_t **config,
spa_add_spares(spa, *config);
spa_add_l2cache(spa, *config);
- spa_add_feature_stats(spa, *config);
}
}
@@ -3108,7 +2712,6 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
error = ENOTBLK;
- vdev_free(vd);
goto out;
}
#endif
@@ -3218,6 +2821,10 @@ spa_l2cache_drop(spa_t *spa)
if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
pool != 0ULL && l2arc_vdev_present(vd))
l2arc_remove_vdev(vd);
+ if (vd->vdev_isl2cache)
+ spa_l2cache_remove(vd);
+ vdev_clear_stats(vd);
+ (void) vdev_close(vd);
}
}
@@ -3226,7 +2833,7 @@ spa_l2cache_drop(spa_t *spa)
*/
int
spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
- nvlist_t *zplprops)
+ const char *history_str, nvlist_t *zplprops)
{
spa_t *spa;
char *altroot = NULL;
@@ -3238,7 +2845,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
uint64_t version, obj;
- boolean_t has_features;
/*
* If this pool already exists, return failure.
@@ -3264,18 +2870,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
return (error);
}
- has_features = B_FALSE;
- for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
- elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
- if (zpool_prop_feature(nvpair_name(elem)))
- has_features = B_TRUE;
- }
-
- if (has_features || nvlist_lookup_uint64(props,
- zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
+ if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
+ &version) != 0)
version = SPA_VERSION;
- }
- ASSERT(SPA_VERSION_IS_SUPPORTED(version));
+ ASSERT(version <= SPA_VERSION);
spa->spa_first_txg = txg;
spa->spa_uberblock.ub_txg = txg - 1;
@@ -3351,10 +2949,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_l2cache.sav_sync = B_TRUE;
}
- spa->spa_is_initializing = B_TRUE;
spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
spa->spa_meta_objset = dp->dp_meta_objset;
- spa->spa_is_initializing = B_FALSE;
/*
* Create DDTs (dedup tables).
@@ -3378,9 +2974,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
cmn_err(CE_PANIC, "failed to add pool config");
}
- if (spa_version(spa) >= SPA_VERSION_FEATURES)
- spa_feature_create_zap_objects(spa, tx);
-
if (zap_add(spa->spa_meta_objset,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
sizeof (uint64_t), 1, &version, tx) != 0) {
@@ -3445,7 +3038,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_config_sync(spa, B_FALSE, B_TRUE);
- spa_history_log_version(spa, "create");
+ if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
+ (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
+ spa_history_log_version(spa, LOG_POOL_CREATE);
spa->spa_minref = refcount_count(&spa->spa_refcount);
@@ -3569,7 +3164,7 @@ spa_import_rootpool(char *devpath, char *devid)
}
#endif
if (config == NULL) {
- cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
+ cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
devpath);
return (EIO);
}
@@ -3645,6 +3240,7 @@ spa_import_rootpool(char *devpath, char *devid)
}
error = 0;
+ spa_history_log_version(spa, LOG_POOL_IMPORT);
out:
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
vdev_free(rvd);
@@ -3706,7 +3302,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_config_sync(spa, B_FALSE, B_TRUE);
mutex_exit(&spa_namespace_lock);
- spa_history_log_version(spa, "import");
+ spa_history_log_version(spa, LOG_POOL_IMPORT);
return (0);
}
@@ -3837,7 +3433,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
mutex_exit(&spa_namespace_lock);
- spa_history_log_version(spa, "import");
+ spa_history_log_version(spa, LOG_POOL_IMPORT);
return (0);
}
@@ -3882,8 +3478,6 @@ spa_tryimport(nvlist_t *tryconfig)
state) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
spa->spa_uberblock.ub_timestamp) == 0);
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
- spa->spa_load_info) == 0);
/*
* If the bootfs property exists on this pool then we
@@ -4222,7 +3816,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
pvd = oldvd->vdev_parent;
if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
- VDEV_ALLOC_ATTACH)) != 0)
+ VDEV_ALLOC_ADD)) != 0)
return (spa_vdev_exit(spa, NULL, txg, EINVAL));
if (newrootvd->vdev_children != 1)
@@ -4375,7 +3969,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
- spa_history_log_internal(spa, "vdev attach", NULL,
+ spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
"%s vdev=%s %s vdev=%s",
replacing && newvd_isspare ? "spare in" :
replacing ? "replace" : "attach", newvdpath,
@@ -4592,7 +4186,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
error = spa_vdev_exit(spa, vd, txg, 0);
- spa_history_log_internal(spa, "detach", NULL,
+ spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
"vdev=%s", vdpath);
spa_strfree(vdpath);
@@ -4861,8 +4455,9 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
if (vml[c] != NULL) {
vdev_split(vml[c]);
if (error == 0)
- spa_history_log_internal(spa, "detach", tx,
- "vdev=%s", vml[c]->vdev_path);
+ spa_history_log_internal(LOG_POOL_VDEV_DETACH,
+ spa, tx, "vdev=%s",
+ vml[c]->vdev_path);
vdev_free(vml[c]);
}
}
@@ -4877,8 +4472,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
zio_handle_panic_injection(spa, FTAG, 3);
/* split is complete; log a history record */
- spa_history_log_internal(newspa, "split", NULL,
- "from pool %s", spa_name(spa));
+ spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
+ "split new pool %s from pool %s", newname, spa_name(spa));
kmem_free(vml, children * sizeof (vdev_t *));
@@ -5464,7 +5059,8 @@ spa_async_thread(spa_t *spa)
* then log an internal history event.
*/
if (new_space != old_space) {
- spa_history_log_internal(spa, "vdev online", NULL,
+ spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
+ spa, NULL,
"pool '%s' size: %llu(+%llu)",
spa_name(spa), new_space, new_space - old_space);
}
@@ -5599,7 +5195,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
* information. This avoids the dbuf_will_dirty() path and
* saves us a pre-read to get data we don't actually care about.
*/
- bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
+ bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
packed = kmem_alloc(bufsize, KM_SLEEP);
VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
@@ -5684,25 +5280,6 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
}
-static void
-spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- spa_t *spa = arg1;
- uint64_t version = *(uint64_t *)arg2;
-
- /*
- * Setting the version is special cased when first creating the pool.
- */
- ASSERT(tx->tx_txg != TXG_INITIAL);
-
- ASSERT(version <= SPA_VERSION);
- ASSERT(version >= spa_version(spa));
-
- spa->spa_uberblock.ub_version = version;
- vdev_config_dirty(spa->spa_root_vdev);
- spa_history_log_internal(spa, "set", tx, "version=%lld", version);
-}
-
/*
* Set zpool properties.
*/
@@ -5712,40 +5289,32 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
spa_t *spa = arg1;
objset_t *mos = spa->spa_meta_objset;
nvlist_t *nvp = arg2;
- nvpair_t *elem = NULL;
+ nvpair_t *elem;
+ uint64_t intval;
+ char *strval;
+ zpool_prop_t prop;
+ const char *propname;
+ zprop_type_t proptype;
mutex_enter(&spa->spa_props_lock);
+ elem = NULL;
while ((elem = nvlist_next_nvpair(nvp, elem))) {
- uint64_t intval;
- char *strval, *fname;
- zpool_prop_t prop;
- const char *propname;
- zprop_type_t proptype;
- zfeature_info_t *feature;
-
switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
- case ZPROP_INVAL:
- /*
- * We checked this earlier in spa_prop_validate().
- */
- ASSERT(zpool_prop_feature(nvpair_name(elem)));
-
- fname = strchr(nvpair_name(elem), '@') + 1;
- VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
-
- spa_feature_enable(spa, feature, tx);
- spa_history_log_internal(spa, "set", tx,
- "%s=enabled", nvpair_name(elem));
- break;
-
case ZPOOL_PROP_VERSION:
- VERIFY(nvpair_value_uint64(elem, &intval) == 0);
/*
- * The version is synced seperatly before other
- * properties and should be correct by now.
+ * Only set version for non-zpool-creation cases
+ * (set/import). spa_create() needs special care
+ * for version setting.
*/
- ASSERT3U(spa_version(spa), >=, intval);
+ if (tx->tx_txg != TXG_INITIAL) {
+ VERIFY(nvpair_value_uint64(elem,
+ &intval) == 0);
+ ASSERT(intval <= SPA_VERSION);
+ ASSERT(intval >= spa_version(spa));
+ spa->spa_uberblock.ub_version = intval;
+ vdev_config_dirty(spa->spa_root_vdev);
+ }
break;
case ZPOOL_PROP_ALTROOT:
@@ -5763,31 +5332,19 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
* properties.
*/
break;
- case ZPOOL_PROP_COMMENT:
- VERIFY(nvpair_value_string(elem, &strval) == 0);
- if (spa->spa_comment != NULL)
- spa_strfree(spa->spa_comment);
- spa->spa_comment = spa_strdup(strval);
- /*
- * We need to dirty the configuration on all the vdevs
- * so that their labels get updated. It's unnecessary
- * to do this for pool creation since the vdev's
- * configuratoin has already been dirtied.
- */
- if (tx->tx_txg != TXG_INITIAL)
- vdev_config_dirty(spa->spa_root_vdev);
- spa_history_log_internal(spa, "set", tx,
- "%s=%s", nvpair_name(elem), strval);
- break;
default:
/*
* Set pool property values in the poolprops mos object.
*/
if (spa->spa_pool_props_object == 0) {
- spa->spa_pool_props_object =
- zap_create_link(mos, DMU_OT_POOL_PROPS,
+ VERIFY((spa->spa_pool_props_object =
+ zap_create(mos, DMU_OT_POOL_PROPS,
+ DMU_OT_NONE, 0, tx)) > 0);
+
+ VERIFY(zap_update(mos,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
- tx);
+ 8, 1, &spa->spa_pool_props_object, tx)
+ == 0);
}
/* normalize the property name */
@@ -5800,8 +5357,7 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(zap_update(mos,
spa->spa_pool_props_object, propname,
1, strlen(strval) + 1, strval, tx) == 0);
- spa_history_log_internal(spa, "set", tx,
- "%s=%s", nvpair_name(elem), strval);
+
} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
VERIFY(nvpair_value_uint64(elem, &intval) == 0);
@@ -5813,8 +5369,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(zap_update(mos,
spa->spa_pool_props_object, propname,
8, 1, &intval, tx) == 0);
- spa_history_log_internal(spa, "set", tx,
- "%s=%lld", nvpair_name(elem), intval);
} else {
ASSERT(0); /* not allowed */
}
@@ -5843,6 +5397,13 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
}
}
+ /* log internal history if this is not a zpool create */
+ if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
+ tx->tx_txg != TXG_INITIAL) {
+ spa_history_log_internal(LOG_POOL_PROPSET,
+ spa, tx, "%s %lld %s",
+ nvpair_name(elem), intval, spa_name(spa));
+ }
}
mutex_exit(&spa->spa_props_lock);
@@ -5882,11 +5443,6 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
/* Keeping the freedir open increases spa_minref */
spa->spa_minref += 3;
}
-
- if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
- spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
- spa_feature_create_zap_objects(spa, tx);
- }
}
/*
diff --git a/uts/common/fs/zfs/spa_config.c b/uts/common/fs/zfs/spa_config.c
index 366545035d6c..69d57f66dbb6 100644
--- a/uts/common/fs/zfs/spa_config.c
+++ b/uts/common/fs/zfs/spa_config.c
@@ -21,8 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@@ -35,7 +33,6 @@
#include <sys/utsname.h>
#include <sys/systeminfo.h>
#include <sys/sunddi.h>
-#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/kobj.h>
#include <sys/zone.h>
@@ -348,10 +345,6 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
txg) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
spa_guid(spa)) == 0);
- VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
- ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
-
-
#ifdef _KERNEL
hostid = zone_get_hostid(NULL);
#else /* _KERNEL */
@@ -410,12 +403,6 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
nvlist_free(nvroot);
- /*
- * Store what's necessary for reading the MOS in the label.
- */
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
- spa->spa_label_features) == 0);
-
if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
ddt_histogram_t *ddh;
ddt_stat_t *dds;
diff --git a/uts/common/fs/zfs/spa_history.c b/uts/common/fs/zfs/spa_history.c
index f2c32f548b41..212abae5b80c 100644
--- a/uts/common/fs/zfs/spa_history.c
+++ b/uts/common/fs/zfs/spa_history.c
@@ -21,7 +21,6 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@@ -30,12 +29,9 @@
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
#include <sys/utsname.h>
#include <sys/cmn_err.h>
#include <sys/sunddi.h>
-#include <sys/cred.h>
#include "zfs_comutil.h"
#ifdef _KERNEL
#include <sys/zone.h>
@@ -105,11 +101,11 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
/*
* Figure out maximum size of history log. We set it at
- * 0.1% of pool size, with a max of 1G and min of 128KB.
+ * 1% of pool size, with a max of 32MB and min of 128KB.
*/
shpp->sh_phys_max_off =
- metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
- shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
+ metaslab_class_get_dspace(spa_normal_class(spa)) / 100;
+ shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20);
shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
dmu_buf_rele(dbp, FTAG);
@@ -179,14 +175,12 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
}
static char *
-spa_history_zone(void)
+spa_history_zone()
{
#ifdef _KERNEL
- if (INGLOBALZONE(curproc))
- return (NULL);
return (curproc->p_zone->zone_name);
#else
- return (NULL);
+ return ("global");
#endif
}
@@ -198,12 +192,14 @@ static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
- nvlist_t *nvl = arg2;
+ history_arg_t *hap = arg2;
+ const char *history_str = hap->ha_history_str;
objset_t *mos = spa->spa_meta_objset;
dmu_buf_t *dbp;
spa_history_phys_t *shpp;
size_t reclen;
uint64_t le_len;
+ nvlist_t *nvrecord;
char *record_packed = NULL;
int ret;
@@ -233,35 +229,46 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
#endif
- fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
+ VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
+ gethrestime_sec()) == 0);
+ VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
+ if (hap->ha_zone != NULL)
+ VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
+ hap->ha_zone) == 0);
#ifdef _KERNEL
- fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
+ VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
+ utsname.nodename) == 0);
#endif
- if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
- zfs_dbgmsg("command: %s",
- fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
- } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
- if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
- zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
- fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
- fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
- fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
- fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
- fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
- } else {
- zfs_dbgmsg("txg %lld %s %s",
- fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
- fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
- fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
- }
- } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
- zfs_dbgmsg("ioctl %s",
- fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
+ if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
+ hap->ha_log_type == LOG_CMD_NORMAL) {
+ VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
+ history_str) == 0);
+
+ zfs_dbgmsg("command: %s", history_str);
+ } else {
+ VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
+ hap->ha_event) == 0);
+ VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
+ tx->tx_txg) == 0);
+ VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
+ history_str) == 0);
+
+ zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
+ zfs_history_event_names[hap->ha_event], spa_name(spa),
+ (longlong_t)tx->tx_txg, history_str);
+
}
- record_packed = fnvlist_pack(nvl, &reclen);
+ VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
+ record_packed = kmem_alloc(reclen, KM_SLEEP);
+
+ VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
+ NV_ENCODE_XDR, KM_SLEEP) == 0);
mutex_enter(&spa->spa_history_lock);
+ if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
+ VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);
/* write out the packed length as little endian */
le_len = LE_64((uint64_t)reclen);
@@ -269,42 +276,33 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
if (!ret)
ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
- /* The first command is the create, which we keep forever */
- if (ret == 0 && shpp->sh_pool_create_len == 0 &&
- nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
- shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
+ if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
+ shpp->sh_pool_create_len += sizeof (le_len) + reclen;
+ shpp->sh_bof = shpp->sh_pool_create_len;
}
mutex_exit(&spa->spa_history_lock);
- fnvlist_pack_free(record_packed, reclen);
+ nvlist_free(nvrecord);
+ kmem_free(record_packed, reclen);
dmu_buf_rele(dbp, FTAG);
- fnvlist_free(nvl);
+
+ strfree(hap->ha_history_str);
+ if (hap->ha_zone != NULL)
+ strfree(hap->ha_zone);
+ kmem_free(hap, sizeof (history_arg_t));
}
/*
* Write out a history event.
*/
int
-spa_history_log(spa_t *spa, const char *msg)
-{
- int err;
- nvlist_t *nvl = fnvlist_alloc();
-
- fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg);
- err = spa_history_log_nvl(spa, nvl);
- fnvlist_free(nvl);
- return (err);
-}
-
-int
-spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
+spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
{
+ history_arg_t *ha;
int err = 0;
dmu_tx_t *tx;
- nvlist_t *nvarg;
- if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY)
- return (EINVAL);
+ ASSERT(what != LOG_INTERNAL);
tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
err = dmu_tx_assign(tx, TXG_WAIT);
@@ -313,21 +311,19 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
return (err);
}
- nvarg = fnvlist_dup(nvl);
- if (spa_history_zone() != NULL) {
- fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
- spa_history_zone());
- }
- fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
+ ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
+ ha->ha_history_str = strdup(history_str);
+ ha->ha_zone = strdup(spa_history_zone());
+ ha->ha_log_type = what;
+ ha->ha_uid = crgetuid(CRED());
/* Kick this off asynchronously; errors are ignored. */
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
- spa_history_log_sync, spa, nvarg, 0, tx);
+ spa_history_log_sync, spa, ha, 0, tx);
dmu_tx_commit(tx);
- /* spa_history_log_sync will free nvl */
+ /* spa_history_log_sync will free ha and strings */
return (err);
-
}
/*
@@ -344,7 +340,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
int err;
/*
- * If the command history doesn't exist (older pool),
+ * If the command history doesn't exist (older pool),
* that's ok, just return ENOENT.
*/
if (!spa->spa_history)
@@ -427,14 +423,11 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
return (err);
}
-/*
- * The nvlist will be consumed by this call.
- */
static void
-log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
+log_internal(history_internal_events_t event, spa_t *spa,
dmu_tx_t *tx, const char *fmt, va_list adx)
{
- char *msg;
+ history_arg_t *ha;
/*
* If this is part of creating a pool, not everything is
@@ -443,25 +436,28 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
if (tx->tx_txg == TXG_INITIAL)
return;
- msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
- (void) vsprintf(msg, fmt, adx);
- fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
- strfree(msg);
+ ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
+ ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1,
+ KM_SLEEP);
+
+ (void) vsprintf(ha->ha_history_str, fmt, adx);
- fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
- fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
+ ha->ha_log_type = LOG_INTERNAL;
+ ha->ha_event = event;
+ ha->ha_zone = NULL;
+ ha->ha_uid = 0;
if (dmu_tx_is_syncing(tx)) {
- spa_history_log_sync(spa, nvl, tx);
+ spa_history_log_sync(spa, ha, tx);
} else {
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
- spa_history_log_sync, spa, nvl, 0, tx);
+ spa_history_log_sync, spa, ha, 0, tx);
}
- /* spa_history_log_sync() will free nvl */
+ /* spa_history_log_sync() will free ha and strings */
}
void
-spa_history_log_internal(spa_t *spa, const char *operation,
+spa_history_log_internal(history_internal_events_t event, spa_t *spa,
dmu_tx_t *tx, const char *fmt, ...)
{
dmu_tx_t *htx = tx;
@@ -477,7 +473,7 @@ spa_history_log_internal(spa_t *spa, const char *operation,
}
va_start(adx, fmt);
- log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx);
+ log_internal(event, spa, htx, fmt, adx);
va_end(adx);
/* if we didn't get a tx from the caller, commit the one we made */
@@ -486,56 +482,21 @@ spa_history_log_internal(spa_t *spa, const char *operation,
}
void
-spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
- dmu_tx_t *tx, const char *fmt, ...)
-{
- va_list adx;
- char namebuf[MAXNAMELEN];
- nvlist_t *nvl = fnvlist_alloc();
-
- ASSERT(tx != NULL);
-
- dsl_dataset_name(ds, namebuf);
- fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
- fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object);
-
- va_start(adx, fmt);
- log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx);
- va_end(adx);
-}
-
-void
-spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
- dmu_tx_t *tx, const char *fmt, ...)
-{
- va_list adx;
- char namebuf[MAXNAMELEN];
- nvlist_t *nvl = fnvlist_alloc();
-
- ASSERT(tx != NULL);
-
- dsl_dir_name(dd, namebuf);
- fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
- fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID,
- dd->dd_phys->dd_head_dataset_obj);
-
- va_start(adx, fmt);
- log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx);
- va_end(adx);
-}
-
-void
-spa_history_log_version(spa_t *spa, const char *operation)
+spa_history_log_version(spa_t *spa, history_internal_events_t event)
{
#ifdef _KERNEL
uint64_t current_vers = spa_version(spa);
- spa_history_log_internal(spa, operation, NULL,
- "pool version %llu; software version %llu/%d; uts %s %s %s %s",
- (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
- utsname.nodename, utsname.release, utsname.version,
- utsname.machine);
- cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,
+ if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
+ spa_history_log_internal(event, spa, NULL,
+ "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
+ (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
+ utsname.nodename, utsname.release, utsname.version,
+ utsname.machine);
+ }
+ cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
+ event == LOG_POOL_IMPORT ? "imported" :
+ event == LOG_POOL_CREATE ? "created" : "accessed",
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
}
diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c
index 9400194a93b8..1b54afb0be5e 100644
--- a/uts/common/fs/zfs/spa_misc.c
+++ b/uts/common/fs/zfs/spa_misc.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -48,7 +46,6 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include "zfs_prop.h"
-#include "zfeature_common.h"
/*
* SPA locking
@@ -217,7 +214,7 @@
* Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
*
- * spa_rename() is also implemented within this file since it requires
+ * spa_rename() is also implemented within this file since is requires
* manipulation of the namespace.
*/
@@ -484,22 +481,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
KM_SLEEP) == 0);
- if (config != NULL) {
- nvlist_t *features;
-
- if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
- &features) == 0) {
- VERIFY(nvlist_dup(features, &spa->spa_label_features,
- 0) == 0);
- }
-
+ if (config != NULL)
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
- }
-
- if (spa->spa_label_features == NULL) {
- VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
- }
return (spa);
}
@@ -536,7 +519,6 @@ spa_remove(spa_t *spa)
list_destroy(&spa->spa_config_list);
- nvlist_free(spa->spa_label_features);
nvlist_free(spa->spa_load_info);
spa_config_set(spa, NULL);
@@ -1045,20 +1027,6 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
* ==========================================================================
*/
-void
-spa_activate_mos_feature(spa_t *spa, const char *feature)
-{
- (void) nvlist_add_boolean(spa->spa_label_features, feature);
- vdev_config_dirty(spa->spa_root_vdev);
-}
-
-void
-spa_deactivate_mos_feature(spa_t *spa, const char *feature)
-{
- (void) nvlist_remove_all(spa->spa_label_features, feature);
- vdev_config_dirty(spa->spa_root_vdev);
-}
-
/*
* Rename a spa_t.
*/
@@ -1209,22 +1177,12 @@ spa_generate_guid(spa_t *spa)
void
sprintf_blkptr(char *buf, const blkptr_t *bp)
{
- char type[256];
+ char *type = NULL;
char *checksum = NULL;
char *compress = NULL;
if (bp != NULL) {
- if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
- dmu_object_byteswap_t bswap =
- DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
- (void) snprintf(type, sizeof (type), "bswap %s %s",
- DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
- "metadata" : "data",
- dmu_ot_byteswap[bswap].ob_name);
- } else {
- (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
- sizeof (type));
- }
+ type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
@@ -1306,12 +1264,6 @@ spa_get_dsl(spa_t *spa)
return (spa->spa_dsl_pool);
}
-boolean_t
-spa_is_initializing(spa_t *spa)
-{
- return (spa->spa_is_initializing);
-}
-
blkptr_t *
spa_get_rootblkptr(spa_t *spa)
{
@@ -1351,24 +1303,13 @@ spa_guid(spa_t *spa)
/*
* If we fail to parse the config during spa_load(), we can go through
* the error path (which posts an ereport) and end up here with no root
- * vdev. We stash the original pool guid in 'spa_config_guid' to handle
+ * vdev. We stash the original pool guid in 'spa_load_guid' to handle
* this case.
*/
if (spa->spa_root_vdev != NULL)
return (spa->spa_root_vdev->vdev_guid);
else
- return (spa->spa_config_guid);
-}
-
-uint64_t
-spa_load_guid(spa_t *spa)
-{
- /*
- * This is a GUID that exists solely as a reference for the
- * purposes of the arc. It is generated at load time, and
- * is never written to persistent storage.
- */
- return (spa->spa_load_guid);
+ return (spa->spa_load_guid);
}
uint64_t
@@ -1595,7 +1536,6 @@ spa_init(int mode)
vdev_cache_stat_init();
zfs_prop_init();
zpool_prop_init();
- zpool_feature_init();
spa_config_load();
l2arc_start();
}
@@ -1730,9 +1670,3 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
return (0);
}
-
-boolean_t
-spa_debug_enabled(spa_t *spa)
-{
- return (spa->spa_debug);
-}
diff --git a/uts/common/fs/zfs/sys/bptree.h b/uts/common/fs/zfs/sys/bptree.h
deleted file mode 100644
index 971507211875..000000000000
--- a/uts/common/fs/zfs/sys/bptree.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#ifndef _SYS_BPTREE_H
-#define _SYS_BPTREE_H
-
-#include <sys/spa.h>
-#include <sys/zio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct bptree_phys {
- uint64_t bt_begin;
- uint64_t bt_end;
- uint64_t bt_bytes;
- uint64_t bt_comp;
- uint64_t bt_uncomp;
-} bptree_phys_t;
-
-typedef struct bptree_entry_phys {
- blkptr_t be_bp;
- uint64_t be_birth_txg; /* only delete blocks born after this txg */
- zbookmark_t be_zb; /* holds traversal resume point if needed */
-} bptree_entry_phys_t;
-
-typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
-
-uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx);
-int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
-
-void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
- uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx);
-
-int bptree_iterate(objset_t *os, uint64_t obj, boolean_t free,
- bptree_itor_t func, void *arg, dmu_tx_t *tx);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_BPTREE_H */
diff --git a/uts/common/fs/zfs/sys/dmu.h b/uts/common/fs/zfs/sys/dmu.h
index d60483575574..07f5949ebfea 100644
--- a/uts/common/fs/zfs/sys/dmu.h
+++ b/uts/common/fs/zfs/sys/dmu.h
@@ -18,12 +18,8 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -44,7 +40,6 @@
#include <sys/param.h>
#include <sys/cred.h>
#include <sys/time.h>
-#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
@@ -75,53 +70,6 @@ typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
-typedef enum dmu_object_byteswap {
- DMU_BSWAP_UINT8,
- DMU_BSWAP_UINT16,
- DMU_BSWAP_UINT32,
- DMU_BSWAP_UINT64,
- DMU_BSWAP_ZAP,
- DMU_BSWAP_DNODE,
- DMU_BSWAP_OBJSET,
- DMU_BSWAP_ZNODE,
- DMU_BSWAP_OLDACL,
- DMU_BSWAP_ACL,
- /*
- * Allocating a new byteswap type number makes the on-disk format
- * incompatible with any other format that uses the same number.
- *
- * Data can usually be structured to work with one of the
- * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
- */
- DMU_BSWAP_NUMFUNCS
-} dmu_object_byteswap_t;
-
-#define DMU_OT_NEWTYPE 0x80
-#define DMU_OT_METADATA 0x40
-#define DMU_OT_BYTESWAP_MASK 0x3f
-
-/*
- * Defines a uint8_t object type. Object types specify if the data
- * in the object is metadata (boolean) and how to byteswap the data
- * (dmu_object_byteswap_t).
- */
-#define DMU_OT(byteswap, metadata) \
- (DMU_OT_NEWTYPE | \
- ((metadata) ? DMU_OT_METADATA : 0) | \
- ((byteswap) & DMU_OT_BYTESWAP_MASK))
-
-#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
- (ot) < DMU_OT_NUMTYPES)
-
-#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_METADATA) : \
- dmu_ot[(ot)].ot_metadata)
-
-#define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
- ((ot) & DMU_OT_BYTESWAP_MASK) : \
- dmu_ot[(ot)].ot_byteswap)
-
typedef enum dmu_object_type {
DMU_OT_NONE,
/* general: */
@@ -186,37 +134,19 @@ typedef enum dmu_object_type {
DMU_OT_DEADLIST_HDR, /* UINT64 */
DMU_OT_DSL_CLONES, /* ZAP */
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
- /*
- * Do not allocate new object types here. Doing so makes the on-disk
- * format incompatible with any other format that uses the same object
- * type number.
- *
- * When creating an object which does not have one of the above types
- * use the DMU_OTN_* type with the correct byteswap and metadata
- * values.
- *
- * The DMU_OTN_* types do not have entries in the dmu_ot table,
- * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
- * of indexing into dmu_ot directly (this works for both DMU_OT_* types
- * and DMU_OTN_* types).
- */
- DMU_OT_NUMTYPES,
-
- /*
- * Names for valid types declared with DMU_OT().
- */
- DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
- DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
- DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
- DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
- DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
- DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
- DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
- DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
- DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
- DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
+ DMU_OT_NUMTYPES
} dmu_object_type_t;
+typedef enum dmu_objset_type {
+ DMU_OST_NONE,
+ DMU_OST_META,
+ DMU_OST_ZFS,
+ DMU_OST_ZVOL,
+ DMU_OST_OTHER, /* For testing only! */
+ DMU_OST_ANY, /* Be careful! */
+ DMU_OST_NUMTYPES
+} dmu_objset_type_t;
+
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
void byteswap_uint16_array(void *buf, size_t size);
@@ -261,11 +191,9 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer,
- struct nvlist *errlist);
-int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *);
-int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
-int dmu_objset_snapshot_tmp(const char *, const char *, int);
+int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
+int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
+ struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
@@ -286,9 +214,6 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
-#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write"
-#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
-#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
@@ -304,7 +229,6 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
-#define DMU_POOL_BPTREE_OBJ "bptree_obj"
/*
* Allocate an object from this objset. The range of object numbers
@@ -565,7 +489,7 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
/*
* Free up the data blocks for a defined range of a file. If size is
- * -1, the range from offset to end-of-file is freed.
+ * zero, the range from offset to end-of-file is freed.
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx);
@@ -635,18 +559,12 @@ typedef struct dmu_object_info {
typedef void arc_byteswap_func_t(void *buf, size_t size);
typedef struct dmu_object_type_info {
- dmu_object_byteswap_t ot_byteswap;
+ arc_byteswap_func_t *ot_byteswap;
boolean_t ot_metadata;
char *ot_name;
} dmu_object_type_info_t;
-typedef struct dmu_object_byteswap_info {
- arc_byteswap_func_t *ob_func;
- char *ob_name;
-} dmu_object_byteswap_info_t;
-
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
-extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
/*
* Get information on a DMU object.
@@ -782,9 +700,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
-int dmu_send(objset_t *tosnap, objset_t *fromsnap,
- int outfd, struct vnode *vp, offset_t *off);
-int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep);
+int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+ struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
/*
@@ -801,7 +718,6 @@ typedef struct dmu_recv_cookie {
char *drc_top_ds;
boolean_t drc_newfs;
boolean_t drc_force;
- struct avl_tree *drc_guid_to_ds_map;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
diff --git a/uts/common/fs/zfs/sys/dmu_impl.h b/uts/common/fs/zfs/sys/dmu_impl.h
index defcdb29ca60..22f9f5f8c88c 100644
--- a/uts/common/fs/zfs/sys/dmu_impl.h
+++ b/uts/common/fs/zfs/sys/dmu_impl.h
@@ -21,7 +21,6 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DMU_IMPL_H
@@ -31,7 +30,6 @@
#include <sys/zio.h>
#include <sys/dnode.h>
#include <sys/zfs_context.h>
-#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
@@ -266,32 +264,6 @@ static xuio_stats_t xuio_stats = {
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
-/*
- * The list of data whose inclusion in a send stream can be pending from
- * one call to backup_cb to another. Multiple calls to dump_free() and
- * dump_freeobjects() can be aggregated into a single DRR_FREE or
- * DRR_FREEOBJECTS replay record.
- */
-typedef enum {
- PENDING_NONE,
- PENDING_FREE,
- PENDING_FREEOBJECTS
-} dmu_pendop_t;
-
-typedef struct dmu_sendarg {
- list_node_t dsa_link;
- dmu_replay_record_t *dsa_drr;
- vnode_t *dsa_vp;
- int dsa_outfd;
- struct proc *dsa_proc;
- offset_t *dsa_off;
- objset_t *dsa_os;
- zio_cksum_t dsa_zc;
- uint64_t dsa_toguid;
- int dsa_err;
- dmu_pendop_t dsa_pending_op;
-} dmu_sendarg_t;
-
#ifdef __cplusplus
}
diff --git a/uts/common/fs/zfs/sys/dmu_objset.h b/uts/common/fs/zfs/sys/dmu_objset.h
index 9439993ace78..c6d202e2e81a 100644
--- a/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/uts/common/fs/zfs/sys/dmu_objset.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -138,14 +137,24 @@ void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
+int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
+ void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
+ uint64_t flags);
+int dmu_objset_destroy(const char *name, boolean_t defer);
+int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
+ struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
+int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
+ int flags);
int dmu_objset_find_spa(spa_t *spa, const char *name,
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
int dmu_objset_prefetch(const char *name, void *arg);
+void dmu_objset_byteswap(void *buf, size_t size);
int dmu_objset_evict_dbufs(objset_t *os);
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
diff --git a/uts/common/fs/zfs/sys/dmu_traverse.h b/uts/common/fs/zfs/sys/dmu_traverse.h
index 3cbf42f56a60..5b326cd99c09 100644
--- a/uts/common/fs/zfs/sys/dmu_traverse.h
+++ b/uts/common/fs/zfs/sys/dmu_traverse.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TRAVERSE_H
@@ -55,9 +54,6 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
-int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
- uint64_t txg_start, zbookmark_t *resume, int flags,
- blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
diff --git a/uts/common/fs/zfs/sys/dsl_dataset.h b/uts/common/fs/zfs/sys/dsl_dataset.h
index 6c43d97fd9d6..22733d070e8b 100644
--- a/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_DATASET_H
@@ -86,12 +84,7 @@ typedef struct dsl_dataset_phys {
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
- /*
- * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
- * include all blocks referenced by this dataset, including those
- * shared with any other datasets.
- */
- uint64_t ds_referenced_bytes;
+ uint64_t ds_used_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
uint64_t ds_unique_bytes; /* only relevant to snapshots */
@@ -156,9 +149,6 @@ typedef struct dsl_dataset {
uint64_t ds_reserved; /* cached refreservation */
uint64_t ds_quota; /* cached refquota */
- kmutex_t ds_sendstream_lock;
- list_t ds_sendstreams;
-
/* Protected by ds_lock; keep at end of struct for better locality */
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
@@ -180,7 +170,7 @@ struct dsl_ds_destroyarg {
struct dsl_ds_holdarg {
dsl_sync_task_group_t *dstg;
- const char *htag;
+ char *htag;
char *snapname;
boolean_t recursive;
boolean_t gotone;
@@ -215,11 +205,12 @@ uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
+int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
+dsl_checkfunc_t dsl_dataset_snapshot_check;
+dsl_syncfunc_t dsl_dataset_snapshot_sync;
dsl_syncfunc_t dsl_dataset_user_hold_sync;
-int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
-void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
@@ -258,10 +249,6 @@ void dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
-int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
- uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
-int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
- uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
diff --git a/uts/common/fs/zfs/sys/dsl_deleg.h b/uts/common/fs/zfs/sys/dsl_deleg.h
index 5842639aafba..73c43bd23879 100644
--- a/uts/common/fs/zfs/sys/dsl_deleg.h
+++ b/uts/common/fs/zfs/sys/dsl_deleg.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_DELEG_H
diff --git a/uts/common/fs/zfs/sys/dsl_pool.h b/uts/common/fs/zfs/sys/dsl_pool.h
index 9ff414888cb0..7d25bd7c020d 100644
--- a/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/uts/common/fs/zfs/sys/dsl_pool.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_POOL_H
@@ -35,7 +34,6 @@
#include <sys/ddt.h>
#include <sys/arc.h>
#include <sys/bpobj.h>
-#include <sys/bptree.h>
#ifdef __cplusplus
extern "C" {
@@ -50,8 +48,7 @@ struct dsl_scan;
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
-#define DMU_OT_OTHER DMU_OT_NUMTYPES /* place holder for DMU_OT() types */
-#define DMU_OT_TOTAL (DMU_OT_NUMTYPES + 1)
+#define DMU_OT_TOTAL DMU_OT_NUMTYPES
typedef struct zfs_blkstat {
uint64_t zb_count;
@@ -88,7 +85,6 @@ typedef struct dsl_pool {
uint64_t dp_write_limit;
uint64_t dp_tmp_userrefs_obj;
bpobj_t dp_free_bpobj;
- uint64_t dp_bptree_obj;
struct dsl_scan *dp_scan;
@@ -114,8 +110,7 @@ typedef struct dsl_pool {
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
-int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
-int dsl_pool_open(dsl_pool_t *dp);
+int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
diff --git a/uts/common/fs/zfs/sys/dsl_prop.h b/uts/common/fs/zfs/sys/dsl_prop.h
index b0d9a52cdfd7..a636ad35096b 100644
--- a/uts/common/fs/zfs/sys/dsl_prop.h
+++ b/uts/common/fs/zfs/sys/dsl_prop.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_PROP_H
@@ -90,6 +89,8 @@ dsl_syncfunc_t dsl_props_set_sync;
int dsl_prop_set(const char *ddname, const char *propname,
zprop_source_t source, int intsz, int numints, const void *buf);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
+void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+ dmu_tx_t *tx);
void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value);
diff --git a/uts/common/fs/zfs/sys/dsl_scan.h b/uts/common/fs/zfs/sys/dsl_scan.h
index 5691f4d14d93..c79666e67de0 100644
--- a/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/uts/common/fs/zfs/sys/dsl_scan.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -80,9 +79,6 @@ typedef struct dsl_scan {
uint64_t scn_sync_start_time;
zio_t *scn_zio_root;
- /* for freeing blocks */
- boolean_t scn_is_bptree;
-
/* for debugging / information */
uint64_t scn_visited_this_txg;
diff --git a/uts/common/fs/zfs/sys/metaslab.h b/uts/common/fs/zfs/sys/metaslab.h
index 2cf4d2b489bd..583d6303bd5a 100644
--- a/uts/common/fs/zfs/sys/metaslab.h
+++ b/uts/common/fs/zfs/sys/metaslab.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@@ -48,8 +47,6 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg);
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
#define METASLAB_GANG_HEADER 0x2
-#define METASLAB_GANG_CHILD 0x4
-#define METASLAB_GANG_AVOID 0x8
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
diff --git a/uts/common/fs/zfs/sys/metaslab_impl.h b/uts/common/fs/zfs/sys/metaslab_impl.h
index 6c670a1624ab..07988dd51a73 100644
--- a/uts/common/fs/zfs/sys/metaslab_impl.h
+++ b/uts/common/fs/zfs/sys/metaslab_impl.h
@@ -21,7 +21,6 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2011 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_IMPL_H
@@ -53,7 +52,6 @@ struct metaslab_group {
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
uint64_t mg_bonus_area;
- uint64_t mg_alloc_failures;
int64_t mg_bias;
int64_t mg_activation_count;
metaslab_class_t *mg_class;
diff --git a/uts/common/fs/zfs/sys/rrwlock.h b/uts/common/fs/zfs/sys/rrwlock.h
index 239268bd58e7..19a43c97fc3c 100644
--- a/uts/common/fs/zfs/sys/rrwlock.h
+++ b/uts/common/fs/zfs/sys/rrwlock.h
@@ -22,13 +22,12 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
#ifndef _SYS_RR_RW_LOCK_H
#define _SYS_RR_RW_LOCK_H
+#pragma ident "%Z%%M% %I% %E% SMI"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -70,7 +69,6 @@ void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
-void rrw_tsd_destroy(void *arg);
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
diff --git a/uts/common/fs/zfs/sys/spa.h b/uts/common/fs/zfs/sys/spa.h
index 1043f4038a30..456ec06dc456 100644
--- a/uts/common/fs/zfs/sys/spa.h
+++ b/uts/common/fs/zfs/sys/spa.h
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_SPA_H
@@ -52,7 +50,6 @@ typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t;
struct dsl_pool;
-struct dsl_dataset;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
@@ -95,7 +92,7 @@ struct dsl_dataset;
/*
* Size of block to hold the configuration data (a packed nvlist)
*/
-#define SPA_CONFIG_BLOCKSIZE (1ULL << 14)
+#define SPA_CONFIG_BLOCKSIZE (1 << 14)
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
@@ -263,7 +260,7 @@ typedef struct blkptr {
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
- ((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
+ ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
@@ -404,8 +401,8 @@ typedef struct blkptr {
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
- (((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
- ARC_BUFC_METADATA : ARC_BUFC_DATA)
+ (((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
+ ARC_BUFC_METADATA : ARC_BUFC_DATA);
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
@@ -416,10 +413,10 @@ typedef enum spa_import_type {
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
-extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
- size_t buflen);
+extern int spa_get_stats(const char *pool, nvlist_t **config,
+ char *altroot, size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
- nvlist_t *zplprops);
+ const char *history_str, nvlist_t *zplprops);
extern int spa_import_rootpool(char *devpath, char *devid);
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
@@ -574,14 +571,12 @@ extern void spa_claim_notify(zio_t *zio);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
-extern boolean_t spa_is_initializing(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
extern int spa_sync_pass(spa_t *spa);
extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
-extern uint64_t spa_load_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_syncing_txg(spa_t *spa);
@@ -606,8 +601,6 @@ extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
/* Miscellaneous support routines */
-extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
-extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
extern int spa_rename(const char *oldname, const char *newname);
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
@@ -617,7 +610,6 @@ extern uint64_t spa_get_random(uint64_t range);
extern uint64_t spa_generate_guid(spa_t *spa);
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
-extern int spa_change_guid(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
extern void spa_evict_all(void);
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
@@ -633,20 +625,31 @@ extern boolean_t spa_writeable(spa_t *spa);
extern int spa_mode(spa_t *spa);
extern uint64_t strtonum(const char *str, char **nptr);
+/* history logging */
+typedef enum history_log_type {
+ LOG_CMD_POOL_CREATE,
+ LOG_CMD_NORMAL,
+ LOG_INTERNAL
+} history_log_type_t;
+
+typedef struct history_arg {
+ char *ha_history_str;
+ history_log_type_t ha_log_type;
+ history_internal_events_t ha_event;
+ char *ha_zone;
+ uid_t ha_uid;
+} history_arg_t;
+
extern char *spa_his_ievent_table[];
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
-extern int spa_history_log(spa_t *spa, const char *his_buf);
-extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl);
-extern void spa_history_log_version(spa_t *spa, const char *operation);
-extern void spa_history_log_internal(spa_t *spa, const char *operation,
- dmu_tx_t *tx, const char *fmt, ...);
-extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
- dmu_tx_t *tx, const char *fmt, ...);
-extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
- dmu_tx_t *tx, const char *fmt, ...);
+extern int spa_history_log(spa_t *spa, const char *his_buf,
+ history_log_type_t what);
+extern void spa_history_log_internal(history_internal_events_t event,
+ spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
+extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
/* error handling */
struct zbookmark;
@@ -694,13 +697,6 @@ _NOTE(CONSTCOND) } while (0)
#define dprintf_bp(bp, fmt, ...)
#endif
-extern boolean_t spa_debug_enabled(spa_t *spa);
-#define spa_dbgmsg(spa, ...) \
-{ \
- if (spa_debug_enabled(spa)) \
- zfs_dbgmsg(__VA_ARGS__); \
-}
-
extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
#ifdef __cplusplus
diff --git a/uts/common/fs/zfs/sys/spa_impl.h b/uts/common/fs/zfs/sys/spa_impl.h
index 5118954b0016..c965ffbbef87 100644
--- a/uts/common/fs/zfs/sys/spa_impl.h
+++ b/uts/common/fs/zfs/sys/spa_impl.h
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -112,7 +110,6 @@ struct spa {
* Fields protected by spa_namespace_lock.
*/
char spa_name[MAXNAMELEN]; /* pool name */
- char *spa_comment; /* comment */
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
@@ -127,7 +124,6 @@ struct spa {
uint64_t spa_import_flags; /* import specific flags */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
- boolean_t spa_is_initializing; /* true while opening pool */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
uint64_t spa_first_txg; /* first txg after spa_open() */
@@ -139,13 +135,11 @@ struct spa {
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
- uint64_t spa_config_guid; /* config pool guid */
- uint64_t spa_load_guid; /* spa_load initialized guid */
+ uint64_t spa_load_guid; /* initial guid for spa_load */
list_t spa_config_dirty_list; /* vdevs with dirty config */
list_t spa_state_dirty_list; /* vdevs with dirty state */
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
- nvlist_t *spa_label_features; /* Features for reading MOS */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_config_generation; /* config generation number */
uint64_t spa_syncing_txg; /* txg currently syncing */
@@ -202,7 +196,6 @@ struct spa {
kcondvar_t spa_suspend_cv; /* notification of resume */
uint8_t spa_suspended; /* pool is suspended */
uint8_t spa_claiming; /* pool is doing zil_claim() */
- boolean_t spa_debug; /* debug enabled? */
boolean_t spa_is_root; /* pool is root */
int spa_minref; /* num refs when first opened */
int spa_mode; /* FREAD | FWRITE */
@@ -222,10 +215,7 @@ struct spa {
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
- uint64_t spa_prev_software_version; /* See ub_software_version */
- uint64_t spa_feat_for_write_obj; /* required to write to pool */
- uint64_t spa_feat_for_read_obj; /* required to read from pool */
- uint64_t spa_feat_desc_obj; /* Feature descriptions */
+ uint64_t spa_prev_software_version;
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
diff --git a/uts/common/fs/zfs/sys/vdev.h b/uts/common/fs/zfs/sys/vdev.h
index 2329d5b85c68..941f234dc68f 100644
--- a/uts/common/fs/zfs/sys/vdev.h
+++ b/uts/common/fs/zfs/sys/vdev.h
@@ -18,10 +18,8 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_VDEV_H
@@ -50,7 +48,7 @@ extern boolean_t zfs_nocacheflush;
extern int vdev_open(vdev_t *);
extern void vdev_open_children(vdev_t *);
extern boolean_t vdev_uses_zvols(vdev_t *);
-extern int vdev_validate(vdev_t *, boolean_t);
+extern int vdev_validate(vdev_t *);
extern void vdev_close(vdev_t *);
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
extern void vdev_reopen(vdev_t *);
@@ -142,8 +140,8 @@ extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern int vdev_label_number(uint64_t psise, uint64_t offset);
-extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label);
-extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
+extern nvlist_t *vdev_label_read_config(vdev_t *vd);
+extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
diff --git a/uts/common/fs/zfs/sys/vdev_impl.h b/uts/common/fs/zfs/sys/vdev_impl.h
index 6d2e962fdd37..161bd21f05a6 100644
--- a/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/uts/common/fs/zfs/sys/vdev_impl.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_VDEV_IMPL_H
@@ -56,8 +55,7 @@ typedef struct vdev_cache_entry vdev_cache_entry_t;
/*
* Virtual device operations
*/
-typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
- uint64_t *ashift);
+typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
typedef void vdev_close_func_t(vdev_t *vd);
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef int vdev_io_start_func_t(zio_t *zio);
@@ -120,7 +118,6 @@ struct vdev {
uint64_t vdev_orig_guid; /* orig. guid prior to remove */
uint64_t vdev_asize; /* allocatable device capacity */
uint64_t vdev_min_asize; /* min acceptable asize */
- uint64_t vdev_max_asize; /* max acceptable asize */
uint64_t vdev_ashift; /* block alignment shift */
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
uint64_t vdev_prevstate; /* used when reopening a vdev */
@@ -202,7 +199,7 @@ struct vdev {
* For DTrace to work in userland (libzpool) context, these fields must
* remain at the end of the structure. DTrace will use the kernel's
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
- * larger in userland, the offsets for the rest of the fields would be
+ * larger in userland, the offsets for the rest fields would be
* incorrect.
*/
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
@@ -257,7 +254,6 @@ typedef struct vdev_label {
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
#define VDEV_LABELS 4
-#define VDEV_BEST_LABEL VDEV_LABELS
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1
@@ -265,7 +261,6 @@ typedef struct vdev_label {
#define VDEV_ALLOC_L2CACHE 3
#define VDEV_ALLOC_ROOTPOOL 4
#define VDEV_ALLOC_SPLIT 5
-#define VDEV_ALLOC_ATTACH 6
/*
* Allocate or free a vdev
diff --git a/uts/common/fs/zfs/sys/zap.h b/uts/common/fs/zfs/sys/zap.h
index 4d7b315597c5..a1130bbbaaae 100644
--- a/uts/common/fs/zfs/sys/zap.h
+++ b/uts/common/fs/zfs/sys/zap.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZAP_H
@@ -133,8 +132,6 @@ uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
- uint64_t parent_obj, const char *name, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
@@ -303,6 +300,12 @@ int zap_add_int_key(objset_t *os, uint64_t obj,
int zap_lookup_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t *valuep);
+/*
+ * They name is a stringified version of key; increment its value by
+ * delta. Zero values will be zap_remove()-ed.
+ */
+int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+ dmu_tx_t *tx);
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
dmu_tx_t *tx);
diff --git a/uts/common/fs/zfs/sys/zfeature.h b/uts/common/fs/zfs/sys/zfeature.h
deleted file mode 100644
index 9ff1c93df7f4..000000000000
--- a/uts/common/fs/zfs/sys/zfeature.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#ifndef _SYS_ZFEATURE_H
-#define _SYS_ZFEATURE_H
-
-#include <sys/dmu.h>
-#include <sys/nvpair.h>
-#include "zfeature_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
- uint64_t desc_obj, nvlist_t *unsup_feat);
-
-struct spa;
-extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
-extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
-extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZFEATURE_H */
diff --git a/uts/common/fs/zfs/sys/zfs_acl.h b/uts/common/fs/zfs/sys/zfs_acl.h
index d1a64180d5d0..c1a0aeebdce4 100644
--- a/uts/common/fs/zfs/sys/zfs_acl.h
+++ b/uts/common/fs/zfs/sys/zfs_acl.h
@@ -218,7 +218,7 @@ int zfs_fastaccesschk_execute(struct znode *, cred_t *);
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
extern int zfs_acl_access(struct znode *, int, cred_t *);
-int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
+void zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
struct znode *, struct znode *, cred_t *cr);
diff --git a/uts/common/fs/zfs/sys/zfs_context.h b/uts/common/fs/zfs/sys/zfs_context.h
index fdd0412feefd..558e9e1884e3 100644
--- a/uts/common/fs/zfs/sys/zfs_context.h
+++ b/uts/common/fs/zfs/sys/zfs_context.h
@@ -22,9 +22,6 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- */
#ifndef _SYS_ZFS_CONTEXT_H
#define _SYS_ZFS_CONTEXT_H
@@ -42,7 +39,6 @@ extern "C" {
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/taskq.h>
-#include <sys/taskq_impl.h>
#include <sys/buf.h>
#include <sys/param.h>
#include <sys/systm.h>
diff --git a/uts/common/fs/zfs/sys/zfs_ioctl.h b/uts/common/fs/zfs/sys/zfs_ioctl.h
index 4d781ad2a46c..84bf794fe5f0 100644
--- a/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -42,15 +41,6 @@ extern "C" {
#endif
/*
- * The structures in this file are passed between userland and the
- * kernel. Userland may be running a 32-bit process, while the kernel
- * is 64-bit. Therefore, these structures need to compile the same in
- * 32-bit and 64-bit. This means not using type "long", and adding
- * explicit padding so that the 32-bit structure will not be packed more
- * tightly than the 64-bit structure (which requires 64-bit alignment).
- */
-
-/*
* Property values for snapdir
*/
#define ZFS_SNAPDIR_HIDDEN 0
@@ -266,29 +256,22 @@ typedef enum zfs_case {
} zfs_case_t;
typedef struct zfs_cmd {
- char zc_name[MAXPATHLEN]; /* name of pool or dataset */
- uint64_t zc_nvlist_src; /* really (char *) */
- uint64_t zc_nvlist_src_size;
- uint64_t zc_nvlist_dst; /* really (char *) */
- uint64_t zc_nvlist_dst_size;
- boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */
- int zc_pad2;
-
- /*
- * The following members are for legacy ioctls which haven't been
- * converted to the new method.
- */
- uint64_t zc_history; /* really (char *) */
+ char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
char zc_top_ds[MAXPATHLEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
+ uint64_t zc_nvlist_src; /* really (char *) */
+ uint64_t zc_nvlist_src_size;
+ uint64_t zc_nvlist_dst; /* really (char *) */
+ uint64_t zc_nvlist_dst_size;
uint64_t zc_cookie;
uint64_t zc_objset_type;
uint64_t zc_perm_action;
- uint64_t zc_history_len;
+ uint64_t zc_history; /* really (char *) */
+ uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
uint64_t zc_iflags; /* internal to zfs(7fs) */
diff --git a/uts/common/fs/zfs/sys/zfs_vfsops.h b/uts/common/fs/zfs/sys/zfs_vfsops.h
index 9af5cef05863..38c87df4300f 100644
--- a/uts/common/fs/zfs/sys/zfs_vfsops.h
+++ b/uts/common/fs/zfs/sys/zfs_vfsops.h
@@ -57,7 +57,6 @@ struct zfsvfs {
boolean_t z_fuid_dirty; /* need to sync fuid table ? */
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
zilog_t *z_log; /* intent log pointer */
- uint_t z_acl_mode; /* acl chmod/mode behavior */
uint_t z_acl_inherit; /* acl inheritance behavior */
zfs_case_t z_case; /* case-sense */
boolean_t z_utf8; /* utf8-only */
diff --git a/uts/common/fs/zfs/sys/zio.h b/uts/common/fs/zfs/sys/zio.h
index 9e475b4fcecd..97d8ec74d2e9 100644
--- a/uts/common/fs/zfs/sys/zio.h
+++ b/uts/common/fs/zfs/sys/zio.h
@@ -22,10 +22,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
#ifndef _ZIO_H
#define _ZIO_H
@@ -273,14 +269,6 @@ typedef struct zbookmark {
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)
-#define ZB_IS_ZERO(zb) \
- ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
- (zb)->zb_level == 0 && (zb)->zb_blkid == 0)
-#define ZB_IS_ROOT(zb) \
- ((zb)->zb_object == ZB_ROOT_OBJECT && \
- (zb)->zb_level == ZB_ROOT_LEVEL && \
- (zb)->zb_blkid == ZB_ROOT_BLKID)
-
typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
@@ -298,7 +286,6 @@ typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
-struct dnode_phys;
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
@@ -430,9 +417,6 @@ struct zio {
/* FMA state */
zio_cksum_report_t *io_cksum_report;
uint64_t io_ena;
-
- /* Taskq dispatching state */
- taskq_ent_t io_tqent;
};
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
@@ -568,10 +552,6 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);
-/* zbookmark functions */
-boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
- const zbookmark_t *zb1, const zbookmark_t *zb2);
-
#ifdef __cplusplus
}
#endif
diff --git a/uts/common/fs/zfs/txg.c b/uts/common/fs/zfs/txg.c
index 55b1f3884bf3..9b308ca4e71a 100644
--- a/uts/common/fs/zfs/txg.c
+++ b/uts/common/fs/zfs/txg.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Portions Copyright 2011 Martin Matuska
*/
#include <sys/zfs_context.h>
@@ -480,7 +479,7 @@ void
txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
{
tx_state_t *tx = &dp->dp_tx;
- clock_t timeout = ddi_get_lbolt() + ticks;
+ int timeout = ddi_get_lbolt() + ticks;
/* don't delay if this txg could transition to quiesing immediately */
if (tx->tx_open_txg > txg ||
diff --git a/uts/common/fs/zfs/vdev.c b/uts/common/fs/zfs/vdev.c
index 6fbaf7b7ecca..bac3e86054d6 100644
--- a/uts/common/fs/zfs/vdev.c
+++ b/uts/common/fs/zfs/vdev.c
@@ -21,8 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -108,7 +106,7 @@ vdev_get_min_asize(vdev_t *vd)
vdev_t *pvd = vd->vdev_parent;
/*
- * If our parent is NULL (inactive spare or cache) or is the root,
+ * The our parent is NULL (inactive spare or cache) or is the root,
* just return our own asize.
*/
if (pvd == NULL)
@@ -288,7 +286,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
if (spa->spa_root_vdev == NULL) {
ASSERT(ops == &vdev_root_ops);
spa->spa_root_vdev = vd;
- spa->spa_load_guid = spa_generate_guid(NULL);
}
if (guid == 0 && ops != &vdev_hole_ops) {
@@ -488,7 +485,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_removing);
}
- if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) {
+ if (parent && !parent->vdev_parent) {
ASSERT(alloctype == VDEV_ALLOC_LOAD ||
alloctype == VDEV_ALLOC_ADD ||
alloctype == VDEV_ALLOC_SPLIT ||
@@ -664,8 +661,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
svd->vdev_ms_shift = 0;
svd->vdev_ms_count = 0;
- if (tvd->vdev_mg)
- ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg);
tvd->vdev_mg = svd->vdev_mg;
tvd->vdev_ms = svd->vdev_ms;
@@ -737,7 +732,6 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
mvd->vdev_asize = cvd->vdev_asize;
mvd->vdev_min_asize = cvd->vdev_min_asize;
- mvd->vdev_max_asize = cvd->vdev_max_asize;
mvd->vdev_ashift = cvd->vdev_ashift;
mvd->vdev_state = cvd->vdev_state;
mvd->vdev_crtxg = cvd->vdev_crtxg;
@@ -1109,8 +1103,7 @@ vdev_open(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
int error;
uint64_t osize = 0;
- uint64_t max_osize = 0;
- uint64_t asize, max_asize, psize;
+ uint64_t asize, psize;
uint64_t ashift = 0;
ASSERT(vd->vdev_open_thread == curthread ||
@@ -1141,7 +1134,7 @@ vdev_open(vdev_t *vd)
return (ENXIO);
}
- error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
+ error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
/*
* Reset the vdev_reopening flag so that we actually close
@@ -1199,7 +1192,6 @@ vdev_open(vdev_t *vd)
}
osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t));
- max_osize = P2ALIGN(max_osize, (uint64_t)sizeof (vdev_label_t));
if (vd->vdev_children == 0) {
if (osize < SPA_MINDEVSIZE) {
@@ -1209,8 +1201,6 @@ vdev_open(vdev_t *vd)
}
psize = osize;
asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE);
- max_asize = max_osize - (VDEV_LABEL_START_SIZE +
- VDEV_LABEL_END_SIZE);
} else {
if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE -
(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) {
@@ -1220,7 +1210,6 @@ vdev_open(vdev_t *vd)
}
psize = 0;
asize = osize;
- max_asize = max_osize;
}
vd->vdev_psize = psize;
@@ -1240,22 +1229,16 @@ vdev_open(vdev_t *vd)
* For testing purposes, a higher ashift can be requested.
*/
vd->vdev_asize = asize;
- vd->vdev_max_asize = max_asize;
vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
} else {
/*
- * Detect if the alignment requirement has increased.
- * We don't want to make the pool unavailable, just
- * issue a warning instead.
+ * Make sure the alignment requirement hasn't increased.
*/
- if (ashift > vd->vdev_top->vdev_ashift &&
- vd->vdev_ops->vdev_op_leaf) {
- cmn_err(CE_WARN,
- "Disk, '%s', has a block alignment that is "
- "larger than the pool's alignment\n",
- vd->vdev_path);
+ if (ashift > vd->vdev_top->vdev_ashift) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_BAD_LABEL);
+ return (EINVAL);
}
- vd->vdev_max_asize = max_asize;
}
/*
@@ -1297,18 +1280,13 @@ vdev_open(vdev_t *vd)
* contents. This needs to be done before vdev_load() so that we don't
* inadvertently do repair I/Os to the wrong device.
*
- * If 'strict' is false ignore the spa guid check. This is necessary because
- * if the machine crashed during a re-guid the new guid might have been written
- * to all of the vdev labels, but not the cached config. The strict check
- * will be performed when the pool is opened again using the mos config.
- *
* This function will only return failure if one of the vdevs indicates that it
* has since been destroyed or exported. This is only possible if
* /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state
* will be updated but the function will return 0.
*/
int
-vdev_validate(vdev_t *vd, boolean_t strict)
+vdev_validate(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *label;
@@ -1316,7 +1294,7 @@ vdev_validate(vdev_t *vd, boolean_t strict)
uint64_t state;
for (int c = 0; c < vd->vdev_children; c++)
- if (vdev_validate(vd->vdev_child[c], strict) != 0)
+ if (vdev_validate(vd->vdev_child[c]) != 0)
return (EBADF);
/*
@@ -1328,8 +1306,7 @@ vdev_validate(vdev_t *vd, boolean_t strict)
uint64_t aux_guid = 0;
nvlist_t *nvl;
- if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) ==
- NULL) {
+ if ((label = vdev_label_read_config(vd)) == NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_BAD_LABEL);
return (0);
@@ -1347,9 +1324,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
return (0);
}
- if (strict && (nvlist_lookup_uint64(label,
- ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
- guid != spa_guid(spa))) {
+ if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
+ &guid) != 0 || guid != spa_guid(spa)) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
nvlist_free(label);
@@ -1511,7 +1487,7 @@ vdev_reopen(vdev_t *vd)
!l2arc_vdev_present(vd))
l2arc_add_vdev(spa, vd);
} else {
- (void) vdev_validate(vd, B_TRUE);
+ (void) vdev_validate(vd);
}
/*
@@ -1970,14 +1946,14 @@ vdev_validate_aux(vdev_t *vd)
if (!vdev_readable(vd))
return (0);
- if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) {
+ if ((label = vdev_label_read_config(vd)) == NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
return (-1);
}
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
- !SPA_VERSION_IS_SUPPORTED(version) ||
+ version > SPA_VERSION ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
guid != vd->vdev_guid ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
@@ -2480,7 +2456,6 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
vs->vs_rsize = vdev_get_min_asize(vd);
if (vd->vdev_ops->vdev_op_leaf)
vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
- vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
mutex_exit(&vd->vdev_stat_lock);
/*
diff --git a/uts/common/fs/zfs/vdev_cache.c b/uts/common/fs/zfs/vdev_cache.c
index 77f8116effe6..688d541344cb 100644
--- a/uts/common/fs/zfs/vdev_cache.c
+++ b/uts/common/fs/zfs/vdev_cache.c
@@ -71,16 +71,9 @@
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
* vdev's vdev_cache.
- *
- * TODO: Note that with the current ZFS code, it turns out that the
- * vdev cache is not helpful, and in some cases actually harmful. It
- * is better if we disable this. Once some time has passed, we should
- * actually remove this to simplify the code. For now we just disable
- * it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
- * has made these same changes.
*/
int zfs_vdev_cache_max = 1<<14; /* 16KB */
-int zfs_vdev_cache_size = 0;
+int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */
int zfs_vdev_cache_bshift = 16;
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
diff --git a/uts/common/fs/zfs/vdev_disk.c b/uts/common/fs/zfs/vdev_disk.c
index 759f0f84f1c8..d7417736b4ee 100644
--- a/uts/common/fs/zfs/vdev_disk.c
+++ b/uts/common/fs/zfs/vdev_disk.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -31,7 +30,6 @@
#include <sys/fs/zfs.h>
#include <sys/zio.h>
#include <sys/sunldi.h>
-#include <sys/efi_partition.h>
#include <sys/fm/fs/zfs.h>
/*
@@ -104,39 +102,8 @@ vdev_disk_rele(vdev_t *vd)
}
}
-static uint64_t
-vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz)
-{
- ASSERT(vd->vdev_wholedisk);
-
- vdev_disk_t *dvd = vd->vdev_tsd;
- dk_efi_t dk_ioc;
- efi_gpt_t *efi;
- uint64_t avail_space = 0;
- int efisize = EFI_LABEL_SIZE * 2;
-
- dk_ioc.dki_data = kmem_alloc(efisize, KM_SLEEP);
- dk_ioc.dki_lba = 1;
- dk_ioc.dki_length = efisize;
- dk_ioc.dki_data_64 = (uint64_t)(uintptr_t)dk_ioc.dki_data;
- efi = dk_ioc.dki_data;
-
- if (ldi_ioctl(dvd->vd_lh, DKIOCGETEFI, (intptr_t)&dk_ioc,
- FKIOCTL, kcred, NULL) == 0) {
- uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
-
- zfs_dbgmsg("vdev %s, capacity %llu, altern lba %llu",
- vd->vdev_path, capacity, efi_altern_lba);
- if (capacity > efi_altern_lba)
- avail_space = (capacity - efi_altern_lba) * blksz;
- }
- kmem_free(dk_ioc.dki_data, efisize);
- return (avail_space);
-}
-
static int
-vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
- uint64_t *ashift)
+vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
spa_t *spa = vd->vdev_spa;
vdev_disk_t *dvd;
@@ -307,6 +274,16 @@ skip_open:
}
/*
+ * If we own the whole disk, try to enable disk write caching.
+ * We ignore errors because it's OK if we can't do it.
+ */
+ if (vd->vdev_wholedisk == 1) {
+ int wce = 1;
+ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
+ FKIOCTL, kcred, NULL);
+ }
+
+ /*
* Determine the device's minimum transfer size.
* If the ioctl isn't supported, assume DEV_BSIZE.
*/
@@ -316,25 +293,6 @@ skip_open:
*ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1;
- if (vd->vdev_wholedisk == 1) {
- uint64_t capacity = dkmext.dki_capacity - 1;
- uint64_t blksz = dkmext.dki_lbsize;
- int wce = 1;
-
- /*
- * If we own the whole disk, try to enable disk write caching.
- * We ignore errors because it's OK if we can't do it.
- */
- (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
- FKIOCTL, kcred, NULL);
-
- *max_psize = *psize + vdev_disk_get_space(vd, capacity, blksz);
- zfs_dbgmsg("capacity change: vdev %s, psize %llu, "
- "max_psize %llu", vd->vdev_path, *psize, *max_psize);
- } else {
- *max_psize = *psize;
- }
-
/*
* Clear the nowritecache bit, so that on a vdev_reopen() we will
* try again.
diff --git a/uts/common/fs/zfs/vdev_file.c b/uts/common/fs/zfs/vdev_file.c
index 043fa51294c2..8c22aa5316a1 100644
--- a/uts/common/fs/zfs/vdev_file.c
+++ b/uts/common/fs/zfs/vdev_file.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -48,8 +47,7 @@ vdev_file_rele(vdev_t *vd)
}
static int
-vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
- uint64_t *ashift)
+vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
vdev_file_t *vf;
vnode_t *vp;
@@ -114,7 +112,7 @@ skip_open:
return (error);
}
- *max_psize = *psize = vattr.va_size;
+ *psize = vattr.va_size;
*ashift = SPA_MINBLOCKSHIFT;
return (0);
diff --git a/uts/common/fs/zfs/vdev_label.c b/uts/common/fs/zfs/vdev_label.c
index b9436472495d..c08ed8ba0467 100644
--- a/uts/common/fs/zfs/vdev_label.c
+++ b/uts/common/fs/zfs/vdev_label.c
@@ -18,10 +18,8 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -123,8 +121,6 @@
* txg Transaction group in which this label was written
* pool_guid Unique identifier for this pool
* vdev_tree An nvlist describing vdev tree.
- * features_for_read
- * An nvlist of the features necessary for reading the MOS.
*
* Each leaf device label also contains the following:
*
@@ -432,13 +428,8 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
}
-/*
- * Returns the configuration from the label of the given vdev. If 'label' is
- * VDEV_BEST_LABEL, each label of the vdev will be read until a valid
- * configuration is found; otherwise, only the specified label will be read.
- */
nvlist_t *
-vdev_label_read_config(vdev_t *vd, int label)
+vdev_label_read_config(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *config = NULL;
@@ -456,8 +447,6 @@ vdev_label_read_config(vdev_t *vd, int label)
retry:
for (int l = 0; l < VDEV_LABELS; l++) {
- if (label >= 0 && label < VDEV_LABELS && label != l)
- continue;
zio = zio_root(spa, NULL, NULL, flags);
@@ -507,7 +496,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
/*
* Read the label, if any, and perform some basic sanity checks.
*/
- if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL)
+ if ((label = vdev_label_read_config(vd)) == NULL)
return (B_FALSE);
(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
@@ -844,7 +833,7 @@ retry:
* come back up, we fail to see the uberblock for txg + 1 because, say,
* it was on a mirrored device and the replica to which we wrote txg + 1
* is now offline. If we then make some changes and sync txg + 1, and then
- * the missing replica comes back, then for a few seconds we'll have two
+ * the missing replica comes back, then for a new seconds we'll have two
* conflicting uberblocks on disk with the same txg. The solution is simple:
* among uberblocks with equal txg, choose the one with the latest timestamp.
*/
@@ -864,50 +853,46 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
return (0);
}
-struct ubl_cbdata {
- uberblock_t *ubl_ubbest; /* Best uberblock */
- vdev_t *ubl_vd; /* vdev associated with the above */
- int ubl_label; /* Label associated with the above */
-};
-
static void
vdev_uberblock_load_done(zio_t *zio)
{
- vdev_t *vd = zio->io_vd;
spa_t *spa = zio->io_spa;
zio_t *rio = zio->io_private;
uberblock_t *ub = zio->io_data;
- struct ubl_cbdata *cbp = rio->io_private;
+ uberblock_t *ubbest = rio->io_private;
- ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd));
+ ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
mutex_enter(&rio->io_lock);
if (ub->ub_txg <= spa->spa_load_max_txg &&
- vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
- /*
- * Keep track of the vdev and label in which this
- * uberblock was found. We will use this information
- * later to obtain the config nvlist associated with
- * this uberblock.
- */
- *cbp->ubl_ubbest = *ub;
- cbp->ubl_vd = vd;
- cbp->ubl_label = vdev_label_number(vd->vdev_psize,
- zio->io_offset);
- }
+ vdev_uberblock_compare(ub, ubbest) > 0)
+ *ubbest = *ub;
mutex_exit(&rio->io_lock);
}
zio_buf_free(zio->io_data, zio->io_size);
}
-static void
-vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
- struct ubl_cbdata *cbp)
+void
+vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
{
+ spa_t *spa = vd->vdev_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
+
+ if (vd == rvd) {
+ ASSERT(zio == NULL);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ zio = zio_root(spa, NULL, ubbest, flags);
+ bzero(ubbest, sizeof (uberblock_t));
+ }
+
+ ASSERT(zio != NULL);
+
for (int c = 0; c < vd->vdev_children; c++)
- vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
+ vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
for (int l = 0; l < VDEV_LABELS; l++) {
@@ -920,45 +905,11 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
}
}
}
-}
-/*
- * Reads the 'best' uberblock from disk along with its associated
- * configuration. First, we read the uberblock array of each label of each
- * vdev, keeping track of the uberblock with the highest txg in each array.
- * Then, we read the configuration from the same label as the best uberblock.
- */
-void
-vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
-{
- int i;
- zio_t *zio;
- spa_t *spa = rvd->vdev_spa;
- struct ubl_cbdata cb;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
-
- ASSERT(ub);
- ASSERT(config);
-
- bzero(ub, sizeof (uberblock_t));
- *config = NULL;
-
- cb.ubl_ubbest = ub;
- cb.ubl_vd = NULL;
-
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- zio = zio_root(spa, NULL, &cb, flags);
- vdev_uberblock_load_impl(zio, rvd, flags, &cb);
- (void) zio_wait(zio);
- if (cb.ubl_vd != NULL) {
- for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) {
- *config = vdev_label_read_config(cb.ubl_vd, i);
- if (*config != NULL)
- break;
- }
+ if (vd == rvd) {
+ (void) zio_wait(zio);
+ spa_config_exit(spa, SCL_ALL, FTAG);
}
- spa_config_exit(spa, SCL_ALL, FTAG);
}
/*
diff --git a/uts/common/fs/zfs/vdev_mirror.c b/uts/common/fs/zfs/vdev_mirror.c
index a28ca3e3965b..698c0275d34e 100644
--- a/uts/common/fs/zfs/vdev_mirror.c
+++ b/uts/common/fs/zfs/vdev_mirror.c
@@ -23,10 +23,6 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/vdev_impl.h>
@@ -131,8 +127,7 @@ vdev_mirror_map_alloc(zio_t *zio)
}
static int
-vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
- uint64_t *ashift)
+vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
{
int numerrors = 0;
int lasterror = 0;
@@ -154,7 +149,6 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
}
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
- *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
*ashift = MAX(*ashift, cvd->vdev_ashift);
}
diff --git a/uts/common/fs/zfs/vdev_missing.c b/uts/common/fs/zfs/vdev_missing.c
index 3bd8c90e04c7..6a5588d59213 100644
--- a/uts/common/fs/zfs/vdev_missing.c
+++ b/uts/common/fs/zfs/vdev_missing.c
@@ -24,10 +24,6 @@
*/
/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-/*
* The 'missing' vdev is a special vdev type used only during import. It
* signifies a placeholder in the root vdev for some vdev that we know is
* missing. We pass it down to the kernel to allow the rest of the
@@ -44,8 +40,7 @@
/* ARGSUSED */
static int
-vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
- uint64_t *ashift)
+vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
/*
* Really this should just fail. But then the root vdev will be in the
@@ -54,7 +49,6 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
* will fail the GUID sum check before ever trying to open the pool.
*/
*psize = 0;
- *max_psize = 0;
*ashift = 0;
return (0);
}
diff --git a/uts/common/fs/zfs/vdev_raidz.c b/uts/common/fs/zfs/vdev_raidz.c
index 030ea4293002..4b0f5602c1d4 100644
--- a/uts/common/fs/zfs/vdev_raidz.c
+++ b/uts/common/fs/zfs/vdev_raidz.c
@@ -21,7 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -1442,8 +1441,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
}
static int
-vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
- uint64_t *ashift)
+vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
{
vdev_t *cvd;
uint64_t nparity = vd->vdev_nparity;
@@ -1471,12 +1469,10 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
}
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
- *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
*ashift = MAX(*ashift, cvd->vdev_ashift);
}
*asize *= vd->vdev_children;
- *max_asize *= vd->vdev_children;
if (numerrors > nparity) {
vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
diff --git a/uts/common/fs/zfs/vdev_root.c b/uts/common/fs/zfs/vdev_root.c
index 1abc79d330bb..879f78f3a5b3 100644
--- a/uts/common/fs/zfs/vdev_root.c
+++ b/uts/common/fs/zfs/vdev_root.c
@@ -23,10 +23,6 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/vdev_impl.h>
@@ -54,8 +50,7 @@ too_many_errors(vdev_t *vd, int numerrors)
}
static int
-vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
- uint64_t *ashift)
+vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
{
int lasterror = 0;
int numerrors = 0;
@@ -82,7 +77,6 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
}
*asize = 0;
- *max_asize = 0;
*ashift = 0;
return (0);
diff --git a/uts/common/fs/zfs/zap.c b/uts/common/fs/zfs/zap.c
index fa1d99fec957..288a4d99ab25 100644
--- a/uts/common/fs/zfs/zap.c
+++ b/uts/common/fs/zfs/zap.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -947,19 +946,6 @@ fzap_prefetch(zap_name_t *zn)
* Helper functions for consumers.
*/
-uint64_t
-zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
- const char *name, dmu_tx_t *tx)
-{
- uint64_t new_obj;
-
- VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
- VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
- tx) == 0);
-
- return (new_obj);
-}
-
int
zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
char *name)
diff --git a/uts/common/fs/zfs/zap_micro.c b/uts/common/fs/zfs/zap_micro.c
index 3e80fb9c5d80..2d89c20c47d7 100644
--- a/uts/common/fs/zfs/zap_micro.c
+++ b/uts/common/fs/zfs/zap_micro.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zio.h>
@@ -461,7 +460,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
- ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
+ ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
}
#endif
@@ -585,7 +584,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
- ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
+ ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
}
#endif
@@ -1404,7 +1403,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
}
/*
- * We lock the zap with adding == FALSE. Because, if we pass
+ * We lock the zap with adding == FALSE. Because, if we pass
* the actual value of add, it could trigger a mzap_upgrade().
* At present we are just evaluating the possibility of this operation
* and hence we donot want to trigger an upgrade.
diff --git a/uts/common/fs/zfs/zfeature.c b/uts/common/fs/zfs/zfeature.c
deleted file mode 100644
index ba722088a40f..000000000000
--- a/uts/common/fs/zfs/zfeature.c
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/zfeature.h>
-#include <sys/dmu.h>
-#include <sys/nvpair.h>
-#include <sys/zap.h>
-#include <sys/dmu_tx.h>
-#include "zfeature_common.h"
-#include <sys/spa_impl.h>
-
-/*
- * ZFS Feature Flags
- * -----------------
- *
- * ZFS feature flags are used to provide fine-grained versioning to the ZFS
- * on-disk format. Once enabled on a pool feature flags replace the old
- * spa_version() number.
- *
- * Each new on-disk format change will be given a uniquely identifying string
- * guid rather than a version number. This avoids the problem of different
- * organizations creating new on-disk formats with the same version number. To
- * keep feature guids unique they should consist of the reverse dns name of the
- * organization which implemented the feature and a short name for the feature,
- * separated by a colon (e.g. com.delphix:async_destroy).
- *
- * Reference Counts
- * ----------------
- *
- * Within each pool features can be in one of three states: disabled, enabled,
- * or active. These states are differentiated by a reference count stored on
- * disk for each feature:
- *
- * 1) If there is no reference count stored on disk the feature is disabled.
- * 2) If the reference count is 0 a system administrator has enabled the
- * feature, but the feature has not been used yet, so no on-disk
- * format changes have been made.
- * 3) If the reference count is greater than 0 the feature is active.
- * The format changes required by the feature are currently on disk.
- * Note that if the feature's format changes are reversed the feature
- * may choose to set its reference count back to 0.
- *
- * Feature flags makes no differentiation between non-zero reference counts
- * for an active feature (e.g. a reference count of 1 means the same thing as a
- * reference count of 27834721), but feature implementations may choose to use
- * the reference count to store meaningful information. For example, a new RAID
- * implementation might set the reference count to the number of vdevs using
- * it. If all those disks are removed from the pool the feature goes back to
- * having a reference count of 0.
- *
- * It is the responsibility of the individual features to maintain a non-zero
- * reference count as long as the feature's format changes are present on disk.
- *
- * Dependencies
- * ------------
- *
- * Each feature may depend on other features. The only effect of this
- * relationship is that when a feature is enabled all of its dependencies are
- * automatically enabled as well. Any future work to support disabling of
- * features would need to ensure that features cannot be disabled if other
- * enabled features depend on them.
- *
- * On-disk Format
- * --------------
- *
- * When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES
- * (5000). In order for this to work the pool is automatically upgraded to
- * SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk
- * format changes will be in use.
- *
- * Information about features is stored in 3 ZAP objects in the pool's MOS.
- * These objects are linked to by the following names in the pool directory
- * object:
- *
- * 1) features_for_read: feature guid -> reference count
- * Features needed to open the pool for reading.
- * 2) features_for_write: feature guid -> reference count
- * Features needed to open the pool for writing.
- * 3) feature_descriptions: feature guid -> descriptive string
- * A human readable string.
- *
- * All enabled features appear in either features_for_read or
- * features_for_write, but not both.
- *
- * To open a pool in read-only mode only the features listed in
- * features_for_read need to be supported.
- *
- * To open the pool in read-write mode features in both features_for_read and
- * features_for_write need to be supported.
- *
- * Some features may be required to read the ZAP objects containing feature
- * information. To allow software to check for compatibility with these features
- * before the pool is opened their names must be stored in the label in a
- * new "features_for_read" entry (note that features that are only required
- * to write to a pool never need to be stored in the label since the
- * features_for_write ZAP object can be read before the pool is written to).
- * To save space in the label features must be explicitly marked as needing to
- * be written to the label. Also, reference counts are not stored in the label,
- * instead any feature whose reference count drops to 0 is removed from the
- * label.
- *
- * Adding New Features
- * -------------------
- *
- * Features must be registered in zpool_feature_init() function in
- * zfeature_common.c using the zfeature_register() function. This function
- * has arguments to specify if the feature should be stored in the
- * features_for_read or features_for_write ZAP object and if it needs to be
- * written to the label when active.
- *
- * Once a feature is registered it will appear as a "feature@<feature name>"
- * property which can be set by an administrator. Feature implementors should
- * use the spa_feature_is_enabled() and spa_feature_is_active() functions to
- * query the state of a feature and the spa_feature_incr() and
- * spa_feature_decr() functions to change an enabled feature's reference count.
- * Reference counts may only be updated in the syncing context.
- *
- * Features may not perform enable-time initialization. Instead, any such
- * initialization should occur when the feature is first used. This design
- * enforces that on-disk changes be made only when features are used. Code
- * should only check if a feature is enabled using spa_feature_is_enabled(),
- * not by relying on any feature specific metadata existing. If a feature is
- * enabled, but the feature's metadata is not on disk yet then it should be
- * created as needed.
- *
- * As an example, consider the com.delphix:async_destroy feature. This feature
- * relies on the existence of a bptree in the MOS that store blocks for
- * asynchronous freeing. This bptree is not created when async_destroy is
- * enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is
- * called to check if async_destroy is enabled. If it is and the bptree object
- * does not exist yet, the bptree object is created as part of the dataset
- * destroy and async_destroy's reference count is incremented to indicate it
- * has made an on-disk format change. Later, after the destroyed dataset's
- * blocks have all been asynchronously freed there is no longer any use for the
- * bptree object, so it is destroyed and async_destroy's reference count is
- * decremented back to 0 to indicate that it has undone its on-disk format
- * changes.
- */
-
-typedef enum {
- FEATURE_ACTION_ENABLE,
- FEATURE_ACTION_INCR,
- FEATURE_ACTION_DECR,
-} feature_action_t;
-
-/*
- * Checks that the features active in the specified object are supported by
- * this software. Adds each unsupported feature (name -> description) to
- * the supplied nvlist.
- */
-boolean_t
-feature_is_supported(objset_t *os, uint64_t obj, uint64_t desc_obj,
- nvlist_t *unsup_feat)
-{
- boolean_t supported;
- zap_cursor_t zc;
- zap_attribute_t za;
-
- supported = B_TRUE;
- for (zap_cursor_init(&zc, os, obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
- ASSERT(za.za_integer_length == sizeof (uint64_t) &&
- za.za_num_integers == 1);
-
- if (za.za_first_integer != 0 &&
- !zfeature_is_supported(za.za_name)) {
- supported = B_FALSE;
-
- if (unsup_feat != NULL) {
- char *desc = "";
- char buf[MAXPATHLEN];
-
- if (zap_lookup(os, desc_obj, za.za_name,
- 1, sizeof (buf), buf) == 0)
- desc = buf;
-
- VERIFY(nvlist_add_string(unsup_feat, za.za_name,
- desc) == 0);
- }
- }
- }
- zap_cursor_fini(&zc);
-
- return (supported);
-}
-
-static int
-feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj,
- zfeature_info_t *feature, uint64_t *res)
-{
- int err;
- uint64_t refcount;
- uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
-
- ASSERT(0 != zapobj);
-
- err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
- &refcount);
- if (err != 0) {
- if (err == ENOENT)
- return (ENOTSUP);
- else
- return (err);
- }
- *res = refcount;
- return (0);
-}
-
-static int
-feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj,
- uint64_t desc_obj, zfeature_info_t *feature, feature_action_t action,
- dmu_tx_t *tx)
-{
- int error;
- uint64_t refcount;
- uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
-
- ASSERT(0 != zapobj);
- ASSERT(zfeature_is_valid_guid(feature->fi_guid));
-
- error = zap_lookup(os, zapobj, feature->fi_guid,
- sizeof (uint64_t), 1, &refcount);
-
- /*
- * If we can't ascertain the status of the specified feature, an I/O
- * error occurred.
- */
- if (error != 0 && error != ENOENT)
- return (error);
-
- switch (action) {
- case FEATURE_ACTION_ENABLE:
- /*
- * If the feature is already enabled, ignore the request.
- */
- if (error == 0)
- return (0);
- refcount = 0;
- break;
- case FEATURE_ACTION_INCR:
- if (error == ENOENT)
- return (ENOTSUP);
- if (refcount == UINT64_MAX)
- return (EOVERFLOW);
- refcount++;
- break;
- case FEATURE_ACTION_DECR:
- if (error == ENOENT)
- return (ENOTSUP);
- if (refcount == 0)
- return (EOVERFLOW);
- refcount--;
- break;
- default:
- ASSERT(0);
- break;
- }
-
- if (action == FEATURE_ACTION_ENABLE) {
- int i;
-
- for (i = 0; feature->fi_depends[i] != NULL; i++) {
- zfeature_info_t *dep = feature->fi_depends[i];
-
- error = feature_do_action(os, read_obj, write_obj,
- desc_obj, dep, FEATURE_ACTION_ENABLE, tx);
- if (error != 0)
- return (error);
- }
- }
-
- error = zap_update(os, zapobj, feature->fi_guid,
- sizeof (uint64_t), 1, &refcount, tx);
- if (error != 0)
- return (error);
-
- if (action == FEATURE_ACTION_ENABLE) {
- error = zap_update(os, desc_obj,
- feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
- feature->fi_desc, tx);
- if (error != 0)
- return (error);
- }
-
- if (action == FEATURE_ACTION_INCR && refcount == 1 && feature->fi_mos) {
- spa_activate_mos_feature(dmu_objset_spa(os), feature->fi_guid);
- }
-
- if (action == FEATURE_ACTION_DECR && refcount == 0) {
- spa_deactivate_mos_feature(dmu_objset_spa(os),
- feature->fi_guid);
- }
-
- return (0);
-}
-
-void
-spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
-{
- /*
- * We create feature flags ZAP objects in two instances: during pool
- * creation and during pool upgrade.
- */
- ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
- tx->tx_txg == TXG_INITIAL));
-
- spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
- DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_FEATURES_FOR_READ, tx);
- spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
- DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_FEATURES_FOR_WRITE, tx);
- spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
- DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_FEATURE_DESCRIPTIONS, tx);
-}
-
-/*
- * Enable any required dependencies, then enable the requested feature.
- */
-void
-spa_feature_enable(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
-{
- ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
- VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
- spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
- spa->spa_feat_desc_obj, feature, FEATURE_ACTION_ENABLE, tx));
-}
-
-/*
- * If the specified feature has not yet been enabled, this function returns
- * ENOTSUP; otherwise, this function increments the feature's refcount (or
- * returns EOVERFLOW if the refcount cannot be incremented). This function must
- * be called from syncing context.
- */
-void
-spa_feature_incr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
-{
- ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
- VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
- spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
- spa->spa_feat_desc_obj, feature, FEATURE_ACTION_INCR, tx));
-}
-
-/*
- * If the specified feature has not yet been enabled, this function returns
- * ENOTSUP; otherwise, this function decrements the feature's refcount (or
- * returns EOVERFLOW if the refcount is already 0). This function must
- * be called from syncing context.
- */
-void
-spa_feature_decr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
-{
- ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
- VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
- spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
- spa->spa_feat_desc_obj, feature, FEATURE_ACTION_DECR, tx));
-}
-
-boolean_t
-spa_feature_is_enabled(spa_t *spa, zfeature_info_t *feature)
-{
- int err;
- uint64_t refcount;
-
- if (spa_version(spa) < SPA_VERSION_FEATURES)
- return (B_FALSE);
-
- err = feature_get_refcount(spa->spa_meta_objset,
- spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
- feature, &refcount);
- ASSERT(err == 0 || err == ENOTSUP);
- return (err == 0);
-}
-
-boolean_t
-spa_feature_is_active(spa_t *spa, zfeature_info_t *feature)
-{
- int err;
- uint64_t refcount;
-
- if (spa_version(spa) < SPA_VERSION_FEATURES)
- return (B_FALSE);
-
- err = feature_get_refcount(spa->spa_meta_objset,
- spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
- feature, &refcount);
- ASSERT(err == 0 || err == ENOTSUP);
- return (err == 0 && refcount > 0);
-}
diff --git a/uts/common/fs/zfs/zfs_acl.c b/uts/common/fs/zfs/zfs_acl.c
index 2b93fc8329f9..843b5ff06ef4 100644
--- a/uts/common/fs/zfs/zfs_acl.c
+++ b/uts/common/fs/zfs/zfs_acl.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -1331,8 +1330,75 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
}
+/*
+ * Update access mask for prepended ACE
+ *
+ * This applies the "groupmask" value for aclmode property.
+ */
+static void
+zfs_acl_prepend_fixup(zfs_acl_t *aclp, void *acep, void *origacep,
+ mode_t mode, uint64_t owner)
+{
+ int rmask, wmask, xmask;
+ int user_ace;
+ uint16_t aceflags;
+ uint32_t origmask, acepmask;
+ uint64_t fuid;
+
+ aceflags = aclp->z_ops.ace_flags_get(acep);
+ fuid = aclp->z_ops.ace_who_get(acep);
+ origmask = aclp->z_ops.ace_mask_get(origacep);
+ acepmask = aclp->z_ops.ace_mask_get(acep);
+
+ user_ace = (!(aceflags &
+ (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP)));
+
+ if (user_ace && (fuid == owner)) {
+ rmask = S_IRUSR;
+ wmask = S_IWUSR;
+ xmask = S_IXUSR;
+ } else {
+ rmask = S_IRGRP;
+ wmask = S_IWGRP;
+ xmask = S_IXGRP;
+ }
+
+ if (origmask & ACE_READ_DATA) {
+ if (mode & rmask) {
+ acepmask &= ~ACE_READ_DATA;
+ } else {
+ acepmask |= ACE_READ_DATA;
+ }
+ }
+
+ if (origmask & ACE_WRITE_DATA) {
+ if (mode & wmask) {
+ acepmask &= ~ACE_WRITE_DATA;
+ } else {
+ acepmask |= ACE_WRITE_DATA;
+ }
+ }
+
+ if (origmask & ACE_APPEND_DATA) {
+ if (mode & wmask) {
+ acepmask &= ~ACE_APPEND_DATA;
+ } else {
+ acepmask |= ACE_APPEND_DATA;
+ }
+ }
+
+ if (origmask & ACE_EXECUTE) {
+ if (mode & xmask) {
+ acepmask &= ~ACE_EXECUTE;
+ } else {
+ acepmask |= ACE_EXECUTE;
+ }
+ }
+ aclp->z_ops.ace_mask_set(acep, acepmask);
+}
+
static void
-zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
+zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
{
void *acep = NULL;
uint64_t who;
@@ -1344,31 +1410,30 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
zfs_acl_node_t *newnode;
size_t abstract_size = aclp->z_ops.ace_abstract_size();
void *zacep;
- boolean_t isdir;
- trivial_acl_t masks;
+ uint32_t owner, group, everyone;
+ uint32_t deny1, deny2, allow0;
new_count = new_bytes = 0;
- isdir = (vtype == VDIR);
-
- acl_trivial_access_masks((mode_t)mode, isdir, &masks);
+ acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2,
+ &owner, &group, &everyone);
newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
zacep = newnode->z_acldata;
- if (masks.allow0) {
- zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
+ if (allow0) {
+ zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER);
zacep = (void *)((uintptr_t)zacep + abstract_size);
new_count++;
new_bytes += abstract_size;
- } if (masks.deny1) {
- zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
+ } if (deny1) {
+ zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER);
zacep = (void *)((uintptr_t)zacep + abstract_size);
new_count++;
new_bytes += abstract_size;
}
- if (masks.deny2) {
- zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
+ if (deny2) {
+ zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP);
zacep = (void *)((uintptr_t)zacep + abstract_size);
new_count++;
new_bytes += abstract_size;
@@ -1387,17 +1452,10 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
continue;
}
- /*
- * If this ACL has any inheritable ACEs, mark that in
- * the hints (which are later masked into the pflags)
- * so create knows to do inheritance.
- */
- if (isdir && (inherit_flags &
- (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
- aclp->z_hints |= ZFS_INHERIT_ACE;
-
if ((type != ALLOW && type != DENY) ||
(inherit_flags & ACE_INHERIT_ONLY_ACE)) {
+ if (inherit_flags)
+ aclp->z_hints |= ZFS_INHERIT_ACE;
switch (type) {
case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
@@ -1410,13 +1468,20 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
/*
* Limit permissions to be no greater than
- * group permissions.
- * The "aclinherit" and "aclmode" properties
- * affect policy for create and chmod(2),
- * respectively.
+ * group permissions
*/
- if ((type == ALLOW) && trim)
- access_mask &= masks.group;
+ if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) {
+ if (!(mode & S_IRGRP))
+ access_mask &= ~ACE_READ_DATA;
+ if (!(mode & S_IWGRP))
+ access_mask &=
+ ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
+ if (!(mode & S_IXGRP))
+ access_mask &= ~ACE_EXECUTE;
+ access_mask &=
+ ~(ACE_WRITE_OWNER|ACE_WRITE_ACL|
+ ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS);
+ }
}
zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
ace_size = aclp->z_ops.ace_size(acep);
@@ -1424,11 +1489,11 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
new_count++;
new_bytes += ace_size;
}
- zfs_set_ace(aclp, zacep, masks.owner, 0, -1, ACE_OWNER);
+ zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER);
zacep = (void *)((uintptr_t)zacep + abstract_size);
- zfs_set_ace(aclp, zacep, masks.group, 0, -1, OWNING_GROUP);
+ zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP);
zacep = (void *)((uintptr_t)zacep + abstract_size);
- zfs_set_ace(aclp, zacep, masks.everyone, 0, -1, ACE_EVERYONE);
+ zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE);
new_count += 3;
new_bytes += abstract_size * 3;
@@ -1440,27 +1505,17 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
list_insert_tail(&aclp->z_acl, newnode);
}
-int
+void
zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
{
- int error = 0;
-
mutex_enter(&zp->z_acl_lock);
mutex_enter(&zp->z_lock);
- if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
- *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
- else
- error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
-
- if (error == 0) {
- (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
- zfs_acl_chmod(ZTOV(zp)->v_type, mode,
- (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
- }
+ *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
+ (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
+ zfs_acl_chmod(zp->z_zfsvfs, mode, *aclp);
mutex_exit(&zp->z_lock);
mutex_exit(&zp->z_acl_lock);
-
- return (error);
+ ASSERT(*aclp);
}
/*
@@ -1708,8 +1763,8 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
if (acl_ids->z_aclp == NULL) {
mutex_enter(&dzp->z_acl_lock);
mutex_enter(&dzp->z_lock);
- if (!(flag & IS_ROOT_NODE) &&
- (dzp->z_pflags & ZFS_INHERIT_ACE) &&
+ if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR &&
+ (dzp->z_pflags & ZFS_INHERIT_ACE)) &&
!(dzp->z_pflags & ZFS_XATTR)) {
VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
&paclp, B_FALSE));
@@ -1726,9 +1781,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
if (need_chmod) {
acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ?
ZFS_ACL_AUTO_INHERIT : 0;
- zfs_acl_chmod(vap->va_type, acl_ids->z_mode,
- (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED),
- acl_ids->z_aclp);
+ zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp);
}
}
diff --git a/uts/common/fs/zfs/zfs_ctldir.c b/uts/common/fs/zfs/zfs_ctldir.c
index d902ff637c38..815f8895e702 100644
--- a/uts/common/fs/zfs/zfs_ctldir.c
+++ b/uts/common/fs/zfs/zfs_ctldir.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -750,7 +749,8 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
return (err);
if (err == 0) {
- err = dmu_objset_snapshot_one(name, dirname);
+ err = dmu_objset_snapshot(name, dirname, NULL, NULL,
+ B_FALSE, B_FALSE, -1);
if (err)
return (err);
err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
diff --git a/uts/common/fs/zfs/zfs_fm.c b/uts/common/fs/zfs/zfs_fm.c
index fa5903a432dd..0b4812666442 100644
--- a/uts/common/fs/zfs/zfs_fm.c
+++ b/uts/common/fs/zfs/zfs_fm.c
@@ -23,10 +23,6 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
@@ -713,10 +709,6 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
if (report->zcr_ereport == NULL) {
report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo);
- if (report->zcr_ckinfo != NULL) {
- kmem_free(report->zcr_ckinfo,
- sizeof (*report->zcr_ckinfo));
- }
kmem_free(report, sizeof (*report));
return;
}
diff --git a/uts/common/fs/zfs/zfs_ioctl.c b/uts/common/fs/zfs/zfs_ioctl.c
index 213142740162..1b63c9bf45ef 100644
--- a/uts/common/fs/zfs/zfs_ioctl.c
+++ b/uts/common/fs/zfs/zfs_ioctl.c
@@ -18,114 +18,8 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Portions Copyright 2011 Martin Matuska
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- */
-
-/*
- * ZFS ioctls.
- *
- * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
- * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
- *
- * There are two ways that we handle ioctls: the legacy way where almost
- * all of the logic is in the ioctl callback, and the new way where most
- * of the marshalling is handled in the common entry point, zfsdev_ioctl().
- *
- * Non-legacy ioctls should be registered by calling
- * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
- * from userland by lzc_ioctl().
- *
- * The registration arguments are as follows:
- *
- * const char *name
- * The name of the ioctl. This is used for history logging. If the
- * ioctl returns successfully (the callback returns 0), and allow_log
- * is true, then a history log entry will be recorded with the input &
- * output nvlists. The log entry can be printed with "zpool history -i".
- *
- * zfs_ioc_t ioc
- * The ioctl request number, which userland will pass to ioctl(2).
- * The ioctl numbers can change from release to release, because
- * the caller (libzfs) must be matched to the kernel.
- *
- * zfs_secpolicy_func_t *secpolicy
- * This function will be called before the zfs_ioc_func_t, to
- * determine if this operation is permitted. It should return EPERM
- * on failure, and 0 on success. Checks include determining if the
- * dataset is visible in this zone, and if the user has either all
- * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
- * to do this operation on this dataset with "zfs allow".
- *
- * zfs_ioc_namecheck_t namecheck
- * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
- * name, a dataset name, or nothing. If the name is not well-formed,
- * the ioctl will fail and the callback will not be called.
- * Therefore, the callback can assume that the name is well-formed
- * (e.g. is null-terminated, doesn't have more than one '@' character,
- * doesn't have invalid characters).
- *
- * zfs_ioc_poolcheck_t pool_check
- * This specifies requirements on the pool state. If the pool does
- * not meet them (is suspended or is readonly), the ioctl will fail
- * and the callback will not be called. If any checks are specified
- * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
- * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
- * POOL_CHECK_READONLY).
- *
- * boolean_t smush_outnvlist
- * If smush_outnvlist is true, then the output is presumed to be a
- * list of errors, and it will be "smushed" down to fit into the
- * caller's buffer, by removing some entries and replacing them with a
- * single "N_MORE_ERRORS" entry indicating how many were removed. See
- * nvlist_smush() for details. If smush_outnvlist is false, and the
- * outnvlist does not fit into the userland-provided buffer, then the
- * ioctl will fail with ENOMEM.
- *
- * zfs_ioc_func_t *func
- * The callback function that will perform the operation.
- *
- * The callback should return 0 on success, or an error number on
- * failure. If the function fails, the userland ioctl will return -1,
- * and errno will be set to the callback's return value. The callback
- * will be called with the following arguments:
- *
- * const char *name
- * The name of the pool or dataset to operate on, from
- * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
- * expected type (pool, dataset, or none).
- *
- * nvlist_t *innvl
- * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
- * NULL if no input nvlist was provided. Changes to this nvlist are
- * ignored. If the input nvlist could not be deserialized, the
- * ioctl will fail and the callback will not be called.
- *
- * nvlist_t *outnvl
- * The output nvlist, initially empty. The callback can fill it in,
- * and it will be returned to userland by serializing it into
- * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
- * fails (e.g. because the caller didn't supply a large enough
- * buffer), then the overall ioctl will fail. See the
- * 'smush_nvlist' argument above for additional behaviors.
- *
- * There are two typical uses of the output nvlist:
- * - To return state, e.g. property values. In this case,
- * smush_outnvlist should be false. If the buffer was not large
- * enough, the caller will reallocate a larger buffer and try
- * the ioctl again.
- *
- * - To return multiple errors from an ioctl which makes on-disk
- * changes. In this case, smush_outnvlist should be true.
- * Ioctls which make on-disk modifications should generally not
- * use the outnvl if they succeed, because the caller can not
- * distinguish between the operation failing, and
- * deserialization failing.
*/
#include <sys/types.h>
@@ -154,7 +48,6 @@
#include <sys/dsl_prop.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_objset.h>
-#include <sys/dmu_impl.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunldi.h>
@@ -186,13 +79,8 @@ extern void zfs_fini(void);
ldi_ident_t zfs_li = NULL;
dev_info_t *zfs_dip;
-uint_t zfs_fsyncer_key;
-extern uint_t rrw_tsd_key;
-static uint_t zfs_allow_log_key;
-
-typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
-typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
-typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
+typedef int zfs_ioc_func_t(zfs_cmd_t *);
+typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
typedef enum {
NO_NAME,
@@ -203,18 +91,15 @@ typedef enum {
typedef enum {
POOL_CHECK_NONE = 1 << 0,
POOL_CHECK_SUSPENDED = 1 << 1,
- POOL_CHECK_READONLY = 1 << 2,
+ POOL_CHECK_READONLY = 1 << 2
} zfs_ioc_poolcheck_t;
typedef struct zfs_ioc_vec {
- zfs_ioc_legacy_func_t *zvec_legacy_func;
zfs_ioc_func_t *zvec_func;
zfs_secpolicy_func_t *zvec_secpolicy;
zfs_ioc_namecheck_t zvec_namecheck;
- boolean_t zvec_allow_log;
+ boolean_t zvec_his_log;
zfs_ioc_poolcheck_t zvec_pool_check;
- boolean_t zvec_smush_outnvlist;
- const char *zvec_name;
} zfs_ioc_vec_t;
/* This array is indexed by zfs_userquota_prop_t */
@@ -232,8 +117,7 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props,
nvlist_t **errors);
static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
boolean_t *);
-int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
-static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
+int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
void
@@ -371,7 +255,7 @@ zfs_log_history(zfs_cmd_t *zc)
if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
- (void) spa_history_log(spa, buf);
+ (void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
spa_close(spa, FTAG);
}
history_str_free(buf);
@@ -383,7 +267,7 @@ zfs_log_history(zfs_cmd_t *zc)
*/
/* ARGSUSED */
static int
-zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
{
return (0);
}
@@ -394,7 +278,7 @@ zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
*/
/* ARGSUSED */
static int
-zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
{
if (INGLOBALZONE(curproc) ||
zone_dataset_visible(zc->zc_name, NULL))
@@ -463,28 +347,21 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
return (zfs_dozonecheck_impl(dataset, zoned, cr));
}
-static int
+int
zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
{
int error;
- dsl_dataset_t *ds;
- error = dsl_dataset_hold(name, FTAG, &ds);
- if (error != 0)
- return (error);
-
- error = zfs_dozonecheck_ds(name, ds, cr);
+ error = zfs_dozonecheck(name, cr);
if (error == 0) {
error = secpolicy_zfs(cr);
if (error)
- error = dsl_deleg_access_impl(ds, perm, cr);
+ error = dsl_deleg_access(name, perm, cr);
}
-
- dsl_dataset_rele(ds, FTAG);
return (error);
}
-static int
+int
zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
const char *perm, cred_t *cr)
{
@@ -648,9 +525,8 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
}
-/* ARGSUSED */
-static int
-zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+int
+zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
{
int error;
@@ -665,17 +541,15 @@ zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (0);
}
-/* ARGSUSED */
-static int
-zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+int
+zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
{
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_ROLLBACK, cr));
}
-/* ARGSUSED */
-static int
-zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+int
+zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
{
spa_t *spa;
dsl_pool_t *dp;
@@ -711,17 +585,8 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (error);
}
-/* ARGSUSED */
static int
-zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
-{
- return (zfs_secpolicy_write_perms(zc->zc_name,
- ZFS_DELEG_PERM_SEND, cr));
-}
-
-/* ARGSUSED */
-static int
-zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
{
vnode_t *vp;
int error;
@@ -745,7 +610,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
}
int
-zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
{
if (!INGLOBALZONE(curproc))
return (EPERM);
@@ -753,12 +618,12 @@ zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
if (secpolicy_nfs(cr) == 0) {
return (0);
} else {
- return (zfs_secpolicy_deleg_share(zc, innvl, cr));
+ return (zfs_secpolicy_deleg_share(zc, cr));
}
}
int
-zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
{
if (!INGLOBALZONE(curproc))
return (EPERM);
@@ -766,7 +631,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
if (secpolicy_smb(cr) == 0) {
return (0);
} else {
- return (zfs_secpolicy_deleg_share(zc, innvl, cr));
+ return (zfs_secpolicy_deleg_share(zc, cr));
}
}
@@ -804,55 +669,34 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
{
return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
}
/*
* Destroying snapshots with delegated permissions requires
- * descendant mount and destroy permissions.
+ * descendent mount and destroy permissions.
+ * Reassemble the full filesystem@snap name so dsl_deleg_access()
+ * can do the correct permission check.
+ *
+ * Since this routine is used when doing a recursive destroy of snapshots
+ * and destroying snapshots requires descendent permissions, a successfull
+ * check of the top level snapshot applies to snapshots of all descendent
+ * datasets as well.
*/
-/* ARGSUSED */
static int
-zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
{
- nvlist_t *snaps;
- nvpair_t *pair, *nextpair;
- int error = 0;
+ int error;
+ char *dsname;
- if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
- return (EINVAL);
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nextpair) {
- dsl_dataset_t *ds;
+ dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
- nextpair = nvlist_next_nvpair(snaps, pair);
- error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds);
- if (error == 0) {
- dsl_dataset_rele(ds, FTAG);
- } else if (error == ENOENT) {
- /*
- * Ignore any snapshots that don't exist (we consider
- * them "already destroyed"). Remove the name from the
- * nvl here in case the snapshot is created between
- * now and when we try to destroy it (in which case
- * we don't want to destroy it since we haven't
- * checked for permission).
- */
- fnvlist_remove_nvpair(snaps, pair);
- error = 0;
- continue;
- } else {
- break;
- }
- error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
- if (error != 0)
- break;
- }
+ error = zfs_secpolicy_destroy_perms(dsname, cr);
+ strfree(dsname);
return (error);
}
@@ -885,16 +729,14 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
return (error);
}
-/* ARGSUSED */
static int
-zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
{
return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
{
char parentname[MAXNAMELEN];
objset_t *clone;
@@ -934,9 +776,8 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (error);
}
-/* ARGSUSED */
static int
-zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
{
int error;
@@ -959,72 +800,49 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
ZFS_DELEG_PERM_SNAPSHOT, cr));
}
-/*
- * Check for permission to create each snapshot in the nvlist.
- */
-/* ARGSUSED */
static int
-zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
{
- nvlist_t *snaps;
- int error;
- nvpair_t *pair;
-
- if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
- return (EINVAL);
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- char *name = nvpair_name(pair);
- char *atp = strchr(name, '@');
- if (atp == NULL) {
- error = EINVAL;
- break;
- }
- *atp = '\0';
- error = zfs_secpolicy_snapshot_perms(name, cr);
- *atp = '@';
- if (error != 0)
- break;
- }
- return (error);
+ return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
-{
- /*
- * Even root must have a proper TSD so that we know what pool
- * to log to.
- */
- if (tsd_get(zfs_allow_log_key) == NULL)
- return (EPERM);
- return (0);
-}
-
-static int
-zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
{
char parentname[MAXNAMELEN];
int error;
- char *origin;
if ((error = zfs_get_parent(zc->zc_name, parentname,
sizeof (parentname))) != 0)
return (error);
- if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
- (error = zfs_secpolicy_write_perms(origin,
- ZFS_DELEG_PERM_CLONE, cr)) != 0)
- return (error);
+ if (zc->zc_value[0] != '\0') {
+ if ((error = zfs_secpolicy_write_perms(zc->zc_value,
+ ZFS_DELEG_PERM_CLONE, cr)) != 0)
+ return (error);
+ }
if ((error = zfs_secpolicy_write_perms(parentname,
ZFS_DELEG_PERM_CREATE, cr)) != 0)
return (error);
- return (zfs_secpolicy_write_perms(parentname,
- ZFS_DELEG_PERM_MOUNT, cr));
+ error = zfs_secpolicy_write_perms(parentname,
+ ZFS_DELEG_PERM_MOUNT, cr);
+
+ return (error);
+}
+
+static int
+zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
+{
+ int error;
+
+ error = secpolicy_fs_unmount(cr, NULL);
+ if (error) {
+ error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
+ }
+ return (error);
}
/*
@@ -1033,7 +851,7 @@ zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
*/
/* ARGSUSED */
static int
-zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
{
if (secpolicy_sys_config(cr, B_FALSE) != 0)
return (EPERM);
@@ -1046,7 +864,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
*/
/* ARGSUSED */
static int
-zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
{
int error;
@@ -1062,14 +880,13 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
*/
/* ARGSUSED */
static int
-zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
{
return (secpolicy_zinject(cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
{
zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
@@ -1085,9 +902,9 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
}
static int
-zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
{
- int err = zfs_secpolicy_read(zc, innvl, cr);
+ int err = zfs_secpolicy_read(zc, cr);
if (err)
return (err);
@@ -1114,9 +931,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
}
static int
-zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
{
- int err = zfs_secpolicy_read(zc, innvl, cr);
+ int err = zfs_secpolicy_read(zc, cr);
if (err)
return (err);
@@ -1127,25 +944,22 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
userquota_perms[zc->zc_objset_type], cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
{
return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
NULL, cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
{
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_HOLD, cr));
}
-/* ARGSUSED */
static int
-zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
{
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_RELEASE, cr));
@@ -1155,7 +969,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
* Policy for allowing temporary snapshots to be taken or released
*/
static int
-zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
{
/*
* A temporary snapshot is the same as a snapshot,
@@ -1168,13 +982,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
ZFS_DELEG_PERM_DIFF, cr)) == 0)
return (0);
- error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
+ error = zfs_secpolicy_snapshot(zc, cr);
if (!error)
- error = zfs_secpolicy_hold(zc, innvl, cr);
+ error = zfs_secpolicy_hold(zc, cr);
if (!error)
- error = zfs_secpolicy_release(zc, innvl, cr);
+ error = zfs_secpolicy_release(zc, cr);
if (!error)
- error = zfs_secpolicy_destroy(zc, innvl, cr);
+ error = zfs_secpolicy_destroy(zc, cr);
return (error);
}
@@ -1213,40 +1027,36 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
return (0);
}
-/*
- * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
- * Entries will be removed from the end of the nvlist, and one int32 entry
- * named "N_MORE_ERRORS" will be added indicating how many entries were
- * removed.
- */
static int
-nvlist_smush(nvlist_t *errors, size_t max)
+fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
{
size_t size;
- size = fnvlist_size(errors);
+ VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
- if (size > max) {
+ if (size > zc->zc_nvlist_dst_size) {
nvpair_t *more_errors;
int n = 0;
- if (max < 1024)
+ if (zc->zc_nvlist_dst_size < 1024)
return (ENOMEM);
- fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
- more_errors = nvlist_prev_nvpair(errors, NULL);
+ VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
+ more_errors = nvlist_prev_nvpair(*errors, NULL);
do {
- nvpair_t *pair = nvlist_prev_nvpair(errors,
+ nvpair_t *pair = nvlist_prev_nvpair(*errors,
more_errors);
- fnvlist_remove_nvpair(errors, pair);
+ VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
n++;
- size = fnvlist_size(errors);
- } while (size > max);
+ VERIFY(nvlist_size(*errors, &size,
+ NV_ENCODE_NATIVE) == 0);
+ } while (size > zc->zc_nvlist_dst_size);
- fnvlist_remove_nvpair(errors, more_errors);
- fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
- ASSERT3U(fnvlist_size(errors), <=, max);
+ VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
+ VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
+ ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
+ ASSERT(size <= zc->zc_nvlist_dst_size);
}
return (0);
@@ -1259,20 +1069,21 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
int error = 0;
size_t size;
- size = fnvlist_size(nvl);
+ VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
if (size > zc->zc_nvlist_dst_size) {
error = ENOMEM;
} else {
- packed = fnvlist_pack(nvl, &size);
+ packed = kmem_alloc(size, KM_SLEEP);
+ VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
+ KM_SLEEP) == 0);
if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
size, zc->zc_iflags) != 0)
error = EFAULT;
- fnvlist_pack_free(packed, size);
+ kmem_free(packed, size);
}
zc->zc_nvlist_dst_size = size;
- zc->zc_nvlist_dst_filled = B_TRUE;
return (error);
}
@@ -1305,8 +1116,6 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
/*
* Find a zfsvfs_t for a mounted filesystem, or create our own, in which
* case its z_vfs will be NULL, and it will be opened as the owner.
- * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
- * which prevents all vnode ops from running.
*/
static int
zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
@@ -1351,6 +1160,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
nvlist_t *config, *props = NULL;
nvlist_t *rootprops = NULL;
nvlist_t *zplprops = NULL;
+ char *buf;
if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
zc->zc_iflags, &config))
@@ -1369,7 +1179,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
(void) nvlist_lookup_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
- if (!SPA_VERSION_IS_SUPPORTED(version)) {
+ if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
error = EINVAL;
goto pool_props_bad;
}
@@ -1390,7 +1200,9 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
goto pool_props_bad;
}
- error = spa_create(zc->zc_name, config, props, zplprops);
+ buf = history_str_get(zc);
+
+ error = spa_create(zc->zc_name, config, props, buf, zplprops);
/*
* Set the remaining root properties
@@ -1399,6 +1211,9 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
(void) spa_destroy(zc->zc_name);
+ if (buf != NULL)
+ history_str_free(buf);
+
pool_props_bad:
nvlist_free(rootprops);
nvlist_free(zplprops);
@@ -1488,15 +1303,6 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
return (error);
}
-/*
- * inputs:
- * zc_name name of the pool
- *
- * outputs:
- * zc_cookie real errno
- * zc_nvlist_dst config nvlist
- * zc_nvlist_dst_size size of config nvlist
- */
static int
zfs_ioc_pool_stats(zfs_cmd_t *zc)
{
@@ -1598,8 +1404,7 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
- if (zc->zc_cookie < spa_version(spa) ||
- !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
+ if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
spa_close(spa, FTAG);
return (EINVAL);
}
@@ -1643,20 +1448,6 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc)
}
static int
-zfs_ioc_pool_reguid(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error == 0) {
- error = spa_change_guid(spa);
- spa_close(spa, FTAG);
- }
- return (error);
-}
-
-static int
zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
{
int error;
@@ -1953,12 +1744,9 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
* inconsistent. So this is a bit of a workaround...
* XXX reading with out owning
*/
- if (!zc->zc_objset_stats.dds_inconsistent &&
- dmu_objset_type(os) == DMU_OST_ZVOL) {
- error = zvol_get_stats(os, nv);
- if (error == EIO)
- return (error);
- VERIFY3S(error, ==, 0);
+ if (!zc->zc_objset_stats.dds_inconsistent) {
+ if (dmu_objset_type(os) == DMU_OST_ZVOL)
+ VERIFY(zvol_get_stats(os, nv) == 0);
}
error = put_nvlist(zc, nv);
nvlist_free(nv);
@@ -2155,10 +1943,8 @@ top:
uint64_t cookie = 0;
int len = sizeof (zc->zc_name) - (p - zc->zc_name);
- while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
- if (!dataset_name_hidden(zc->zc_name))
- (void) dmu_objset_prefetch(zc->zc_name, NULL);
- }
+ while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
+ (void) dmu_objset_prefetch(p, NULL);
}
do {
@@ -2167,7 +1953,8 @@ top:
NULL, &zc->zc_cookie);
if (error == ENOENT)
error = ESRCH;
- } while (error == 0 && dataset_name_hidden(zc->zc_name));
+ } while (error == 0 && dataset_name_hidden(zc->zc_name) &&
+ !(zc->zc_iflags & FKIOCTL));
dmu_objset_rele(os, FTAG);
/*
@@ -2393,25 +2180,31 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
/*
* This function is best effort. If it fails to set any of the given properties,
- * it continues to set as many as it can and returns the last error
- * encountered. If the caller provides a non-NULL errlist, it will be filled in
- * with the list of names of all the properties that failed along with the
- * corresponding error numbers.
+ * it continues to set as many as it can and returns the first error
+ * encountered. If the caller provides a non-NULL errlist, it also gives the
+ * complete list of names of all the properties it failed to set along with the
+ * corresponding error numbers. The caller is responsible for freeing the
+ * returned errlist.
*
- * If every property is set successfully, zero is returned and errlist is not
- * modified.
+ * If every property is set successfully, zero is returned and the list pointed
+ * at by errlist is NULL.
*/
int
zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
- nvlist_t *errlist)
+ nvlist_t **errlist)
{
nvpair_t *pair;
nvpair_t *propval;
int rv = 0;
uint64_t intval;
char *strval;
- nvlist_t *genericnvl = fnvlist_alloc();
- nvlist_t *retrynvl = fnvlist_alloc();
+ nvlist_t *genericnvl;
+ nvlist_t *errors;
+ nvlist_t *retrynvl;
+
+ VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
retry:
pair = NULL;
@@ -2424,7 +2217,7 @@ retry:
propval = pair;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
nvlist_t *attrs;
- attrs = fnvpair_value_nvlist(pair);
+ VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
&propval) != 0)
err = EINVAL;
@@ -2439,8 +2232,6 @@ retry:
if (nvpair_type(propval) !=
DATA_TYPE_UINT64_ARRAY)
err = EINVAL;
- } else {
- err = EINVAL;
}
} else if (err == 0) {
if (nvpair_type(propval) == DATA_TYPE_STRING) {
@@ -2449,7 +2240,8 @@ retry:
} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
const char *unused;
- intval = fnvpair_value_uint64(propval);
+ VERIFY(nvpair_value_uint64(propval,
+ &intval) == 0);
switch (zfs_prop_get_type(prop)) {
case PROP_TYPE_NUMBER:
@@ -2493,11 +2285,8 @@ retry:
}
}
- if (err != 0) {
- if (errlist != NULL)
- fnvlist_add_int32(errlist, propname, err);
- rv = err;
- }
+ if (err != 0)
+ VERIFY(nvlist_add_int32(errors, propname, err) == 0);
}
if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
@@ -2519,33 +2308,44 @@ retry:
propval = pair;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
nvlist_t *attrs;
- attrs = fnvpair_value_nvlist(pair);
- propval = fnvlist_lookup_nvpair(attrs,
- ZPROP_VALUE);
+ VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
+ VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
+ &propval) == 0);
}
if (nvpair_type(propval) == DATA_TYPE_STRING) {
- strval = fnvpair_value_string(propval);
+ VERIFY(nvpair_value_string(propval,
+ &strval) == 0);
err = dsl_prop_set(dsname, propname, source, 1,
strlen(strval) + 1, strval);
} else {
- intval = fnvpair_value_uint64(propval);
+ VERIFY(nvpair_value_uint64(propval,
+ &intval) == 0);
err = dsl_prop_set(dsname, propname, source, 8,
1, &intval);
}
if (err != 0) {
- if (errlist != NULL) {
- fnvlist_add_int32(errlist, propname,
- err);
- }
- rv = err;
+ VERIFY(nvlist_add_int32(errors, propname,
+ err) == 0);
}
}
}
nvlist_free(genericnvl);
nvlist_free(retrynvl);
+ if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
+ nvlist_free(errors);
+ errors = NULL;
+ } else {
+ VERIFY(nvpair_value_int32(pair, &rv) == 0);
+ }
+
+ if (errlist == NULL)
+ nvlist_free(errors);
+ else
+ *errlist = errors;
+
return (rv);
}
@@ -2553,7 +2353,7 @@ retry:
* Check that all the properties are valid user properties.
*/
static int
-zfs_check_userprops(const char *fsname, nvlist_t *nvl)
+zfs_check_userprops(char *fsname, nvlist_t *nvl)
{
nvpair_t *pair = NULL;
int error = 0;
@@ -2633,7 +2433,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc)
boolean_t received = zc->zc_cookie;
zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
ZPROP_SRC_LOCAL);
- nvlist_t *errors;
+ nvlist_t *errors = NULL;
int error;
if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
@@ -2656,8 +2456,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc)
}
}
- errors = fnvlist_alloc();
- error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
+ error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
if (zc->zc_nvlist_dst != NULL && errors != NULL) {
(void) put_nvlist(zc, errors);
@@ -2739,7 +2538,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
return (EINVAL);
}
- /* property name has been validated by zfs_secpolicy_inherit_prop() */
+ /* the property name has been validated by zfs_secpolicy_inherit() */
return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
}
@@ -3082,30 +2881,26 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
}
/*
- * innvl: {
- * "type" -> dmu_objset_type_t (int32)
- * (optional) "props" -> { prop -> value }
- * }
+ * inputs:
+ * zc_objset_type type of objset to create (fs vs zvol)
+ * zc_name name of new objset
+ * zc_value name of snapshot to clone from (may be empty)
+ * zc_nvlist_src{_size} nvlist of properties to apply
*
- * outnvl: propname -> error code (int32)
+ * outputs: none
*/
static int
-zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+zfs_ioc_create(zfs_cmd_t *zc)
{
+ objset_t *clone;
int error = 0;
- zfs_creat_t zct = { 0 };
+ zfs_creat_t zct;
nvlist_t *nvprops = NULL;
void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
- int32_t type32;
- dmu_objset_type_t type;
- boolean_t is_insensitive = B_FALSE;
-
- if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
- return (EINVAL);
- type = type32;
- (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
+ dmu_objset_type_t type = zc->zc_objset_type;
switch (type) {
+
case DMU_OST_ZFS:
cbfunc = zfs_create_cb;
break;
@@ -3118,290 +2913,210 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
cbfunc = NULL;
break;
}
- if (strchr(fsname, '@') ||
- strchr(fsname, '%'))
+ if (strchr(zc->zc_name, '@') ||
+ strchr(zc->zc_name, '%'))
return (EINVAL);
- zct.zct_props = nvprops;
-
- if (cbfunc == NULL)
- return (EINVAL);
-
- if (type == DMU_OST_ZVOL) {
- uint64_t volsize, volblocksize;
+ if (zc->zc_nvlist_src != NULL &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvprops)) != 0)
+ return (error);
- if (nvprops == NULL)
- return (EINVAL);
- if (nvlist_lookup_uint64(nvprops,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
- return (EINVAL);
+ zct.zct_zplprops = NULL;
+ zct.zct_props = nvprops;
- if ((error = nvlist_lookup_uint64(nvprops,
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- &volblocksize)) != 0 && error != ENOENT)
+ if (zc->zc_value[0] != '\0') {
+ /*
+ * We're creating a clone of an existing snapshot.
+ */
+ zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
+ if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
+ nvlist_free(nvprops);
return (EINVAL);
+ }
- if (error != 0)
- volblocksize = zfs_prop_default_numeric(
- ZFS_PROP_VOLBLOCKSIZE);
-
- if ((error = zvol_check_volblocksize(
- volblocksize)) != 0 ||
- (error = zvol_check_volsize(volsize,
- volblocksize)) != 0)
+ error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
+ if (error) {
+ nvlist_free(nvprops);
return (error);
- } else if (type == DMU_OST_ZFS) {
- int error;
+ }
- /*
- * We have to have normalization and
- * case-folding flags correct when we do the
- * file system creation, so go figure them out
- * now.
- */
- VERIFY(nvlist_alloc(&zct.zct_zplprops,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- error = zfs_fill_zplprops(fsname, nvprops,
- zct.zct_zplprops, &is_insensitive);
- if (error != 0) {
- nvlist_free(zct.zct_zplprops);
+ error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
+ dmu_objset_rele(clone, FTAG);
+ if (error) {
+ nvlist_free(nvprops);
return (error);
}
- }
+ } else {
+ boolean_t is_insensitive = B_FALSE;
- error = dmu_objset_create(fsname, type,
- is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
- nvlist_free(zct.zct_zplprops);
+ if (cbfunc == NULL) {
+ nvlist_free(nvprops);
+ return (EINVAL);
+ }
- /*
- * It would be nice to do this atomically.
- */
- if (error == 0) {
- error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
- nvprops, outnvl);
- if (error != 0)
- (void) dmu_objset_destroy(fsname, B_FALSE);
- }
- return (error);
-}
+ if (type == DMU_OST_ZVOL) {
+ uint64_t volsize, volblocksize;
-/*
- * innvl: {
- * "origin" -> name of origin snapshot
- * (optional) "props" -> { prop -> value }
- * }
- *
- * outnvl: propname -> error code (int32)
- */
-static int
-zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
-{
- int error = 0;
- nvlist_t *nvprops = NULL;
- char *origin_name;
- dsl_dataset_t *origin;
-
- if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
- return (EINVAL);
- (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
+ if (nvprops == NULL ||
+ nvlist_lookup_uint64(nvprops,
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+ &volsize) != 0) {
+ nvlist_free(nvprops);
+ return (EINVAL);
+ }
- if (strchr(fsname, '@') ||
- strchr(fsname, '%'))
- return (EINVAL);
+ if ((error = nvlist_lookup_uint64(nvprops,
+ zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+ &volblocksize)) != 0 && error != ENOENT) {
+ nvlist_free(nvprops);
+ return (EINVAL);
+ }
- if (dataset_namecheck(origin_name, NULL, NULL) != 0)
- return (EINVAL);
+ if (error != 0)
+ volblocksize = zfs_prop_default_numeric(
+ ZFS_PROP_VOLBLOCKSIZE);
- error = dsl_dataset_hold(origin_name, FTAG, &origin);
- if (error)
- return (error);
+ if ((error = zvol_check_volblocksize(
+ volblocksize)) != 0 ||
+ (error = zvol_check_volsize(volsize,
+ volblocksize)) != 0) {
+ nvlist_free(nvprops);
+ return (error);
+ }
+ } else if (type == DMU_OST_ZFS) {
+ int error;
- error = dmu_objset_clone(fsname, origin, 0);
- dsl_dataset_rele(origin, FTAG);
- if (error)
- return (error);
+ /*
+ * We have to have normalization and
+ * case-folding flags correct when we do the
+ * file system creation, so go figure them out
+ * now.
+ */
+ VERIFY(nvlist_alloc(&zct.zct_zplprops,
+ NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ error = zfs_fill_zplprops(zc->zc_name, nvprops,
+ zct.zct_zplprops, &is_insensitive);
+ if (error != 0) {
+ nvlist_free(nvprops);
+ nvlist_free(zct.zct_zplprops);
+ return (error);
+ }
+ }
+ error = dmu_objset_create(zc->zc_name, type,
+ is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
+ nvlist_free(zct.zct_zplprops);
+ }
/*
* It would be nice to do this atomically.
*/
if (error == 0) {
- error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
- nvprops, outnvl);
+ error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
+ nvprops, NULL);
if (error != 0)
- (void) dmu_objset_destroy(fsname, B_FALSE);
+ (void) dmu_objset_destroy(zc->zc_name, B_FALSE);
}
+ nvlist_free(nvprops);
return (error);
}
/*
- * innvl: {
- * "snaps" -> { snapshot1, snapshot2 }
- * (optional) "props" -> { prop -> value (string) }
- * }
- *
- * outnvl: snapshot -> error code (int32)
+ * inputs:
+ * zc_name name of filesystem
+ * zc_value short name of snapshot
+ * zc_cookie recursive flag
+ * zc_nvlist_src[_size] property list
*
+ * outputs:
+ * zc_value short snapname (i.e. part after the '@')
*/
static int
-zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+zfs_ioc_snapshot(zfs_cmd_t *zc)
{
- nvlist_t *snaps;
- nvlist_t *props = NULL;
- int error, poollen;
- nvpair_t *pair;
-
- (void) nvlist_lookup_nvlist(innvl, "props", &props);
- if ((error = zfs_check_userprops(poolname, props)) != 0)
- return (error);
-
- if (!nvlist_empty(props) &&
- zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
- return (ENOTSUP);
+ nvlist_t *nvprops = NULL;
+ int error;
+ boolean_t recursive = zc->zc_cookie;
- if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+ if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
return (EINVAL);
- poollen = strlen(poolname);
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- const char *name = nvpair_name(pair);
- const char *cp = strchr(name, '@');
-
- /*
- * The snap name must contain an @, and the part after it must
- * contain only valid characters.
- */
- if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
- return (EINVAL);
-
- /*
- * The snap must be in the specified pool.
- */
- if (strncmp(name, poolname, poollen) != 0 ||
- (name[poollen] != '/' && name[poollen] != '@'))
- return (EXDEV);
-
- /* This must be the only snap of this fs. */
- for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
- pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
- if (strncmp(name, nvpair_name(pair2), cp - name + 1)
- == 0) {
- return (EXDEV);
- }
- }
- }
-
- error = dmu_objset_snapshot(snaps, props, outnvl);
- return (error);
-}
-
-/*
- * innvl: "message" -> string
- */
-/* ARGSUSED */
-static int
-zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
-{
- char *message;
- spa_t *spa;
- int error;
- char *poolname;
- /*
- * The poolname in the ioctl is not set, we get it from the TSD,
- * which was set at the end of the last successful ioctl that allows
- * logging. The secpolicy func already checked that it is set.
- * Only one log ioctl is allowed after each successful ioctl, so
- * we clear the TSD here.
- */
- poolname = tsd_get(zfs_allow_log_key);
- (void) tsd_set(zfs_allow_log_key, NULL);
- error = spa_open(poolname, &spa, FTAG);
- strfree(poolname);
- if (error != 0)
+ if (zc->zc_nvlist_src != NULL &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvprops)) != 0)
return (error);
- if (nvlist_lookup_string(innvl, "message", &message) != 0) {
- spa_close(spa, FTAG);
- return (EINVAL);
- }
+ error = zfs_check_userprops(zc->zc_name, nvprops);
+ if (error)
+ goto out;
- if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
- spa_close(spa, FTAG);
- return (ENOTSUP);
+ if (!nvlist_empty(nvprops) &&
+ zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
+ error = ENOTSUP;
+ goto out;
}
- error = spa_history_log(spa, message);
- spa_close(spa, FTAG);
+ error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
+ nvprops, recursive, B_FALSE, -1);
+
+out:
+ nvlist_free(nvprops);
return (error);
}
-/* ARGSUSED */
int
zfs_unmount_snap(const char *name, void *arg)
{
- vfs_t *vfsp;
- int err;
+ vfs_t *vfsp = NULL;
- if (strchr(name, '@') == NULL)
- return (0);
+ if (arg) {
+ char *snapname = arg;
+ char *fullname = kmem_asprintf("%s@%s", name, snapname);
+ vfsp = zfs_get_vfs(fullname);
+ strfree(fullname);
+ } else if (strchr(name, '@')) {
+ vfsp = zfs_get_vfs(name);
+ }
- vfsp = zfs_get_vfs(name);
- if (vfsp == NULL)
- return (0);
+ if (vfsp) {
+ /*
+ * Always force the unmount for snapshots.
+ */
+ int flag = MS_FORCE;
+ int err;
- if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
+ if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
+ VFS_RELE(vfsp);
+ return (err);
+ }
VFS_RELE(vfsp);
- return (err);
+ if ((err = dounmount(vfsp, flag, kcred)) != 0)
+ return (err);
}
- VFS_RELE(vfsp);
-
- /*
- * Always force the unmount for snapshots.
- */
- return (dounmount(vfsp, MS_FORCE, kcred));
+ return (0);
}
/*
- * innvl: {
- * "snaps" -> { snapshot1, snapshot2 }
- * (optional boolean) "defer"
- * }
- *
- * outnvl: snapshot -> error code (int32)
+ * inputs:
+ * zc_name name of filesystem
+ * zc_value short name of snapshot
+ * zc_defer_destroy mark for deferred destroy
*
+ * outputs: none
*/
static int
-zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
{
- int poollen;
- nvlist_t *snaps;
- nvpair_t *pair;
- boolean_t defer;
+ int err;
- if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+ if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
return (EINVAL);
- defer = nvlist_exists(innvl, "defer");
-
- poollen = strlen(poolname);
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- const char *name = nvpair_name(pair);
-
- /*
- * The snap must be in the specified pool.
- */
- if (strncmp(name, poolname, poollen) != 0 ||
- (name[poollen] != '/' && name[poollen] != '@'))
- return (EXDEV);
-
- /*
- * Ignore failures to unmount; dmu_snapshots_destroy_nvl()
- * will deal with this gracefully (by filling in outnvl).
- */
- (void) zfs_unmount_snap(name, NULL);
- }
-
- return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl));
+ err = dmu_objset_find(zc->zc_name,
+ zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
+ if (err)
+ return (err);
+ return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
+ zc->zc_defer_destroy));
}
/*
@@ -3705,7 +3420,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
(void) strcpy(zc->zc_value, nvpair_name(pair));
if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
- (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
+ (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
VERIFY(nvlist_remove_nvpair(props, pair) == 0);
VERIFY(nvlist_add_int32(errors,
zc->zc_value, err) == 0);
@@ -3913,6 +3628,8 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* dmu_recv_begin() succeeds.
*/
if (props) {
+ nvlist_t *errlist;
+
if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
if (drc.drc_newfs) {
if (spa_version(os->os_spa) >=
@@ -3931,12 +3648,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
}
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
- props, errors);
+ props, &errlist);
+ (void) nvlist_merge(errors, errlist, 0);
+ nvlist_free(errlist);
}
- if (zc->zc_nvlist_dst_size != 0 &&
- (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
- put_nvlist(zc, errors) != 0)) {
+ if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
/*
* Caller made zc->zc_nvlist_dst less than the minimum expected
* size or supplied an invalid address.
@@ -4042,8 +3759,6 @@ out:
* zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
* zc_sendobj objsetid of snapshot to send
* zc_fromobj objsetid of incremental fromsnap (may be zero)
- * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
- * output size in zc_objset_type.
*
* outputs: none
*/
@@ -4052,13 +3767,13 @@ zfs_ioc_send(zfs_cmd_t *zc)
{
objset_t *fromsnap = NULL;
objset_t *tosnap;
+ file_t *fp;
int error;
offset_t off;
dsl_dataset_t *ds;
dsl_dataset_t *dsfrom = NULL;
spa_t *spa;
dsl_pool_t *dp;
- boolean_t estimate = (zc->zc_guid != 0);
error = spa_open(zc->zc_name, &spa, FTAG);
if (error)
@@ -4068,13 +3783,15 @@ zfs_ioc_send(zfs_cmd_t *zc)
rw_enter(&dp->dp_config_rwlock, RW_READER);
error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
- if (error)
+ if (error) {
+ spa_close(spa, FTAG);
return (error);
+ }
error = dmu_objset_from_ds(ds, &tosnap);
if (error) {
dsl_dataset_rele(ds, FTAG);
+ spa_close(spa, FTAG);
return (error);
}
@@ -4082,6 +3799,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
rw_enter(&dp->dp_config_rwlock, RW_READER);
error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
rw_exit(&dp->dp_config_rwlock);
+ spa_close(spa, FTAG);
if (error) {
dsl_dataset_rele(ds, FTAG);
return (error);
@@ -4092,104 +3810,30 @@ zfs_ioc_send(zfs_cmd_t *zc)
dsl_dataset_rele(ds, FTAG);
return (error);
}
+ } else {
+ spa_close(spa, FTAG);
}
- if (zc->zc_obj) {
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
- if (fromsnap != NULL) {
+ fp = getf(zc->zc_cookie);
+ if (fp == NULL) {
+ dsl_dataset_rele(ds, FTAG);
+ if (dsfrom)
dsl_dataset_rele(dsfrom, FTAG);
- dsl_dataset_rele(ds, FTAG);
- return (EINVAL);
- }
-
- if (dsl_dir_is_clone(ds->ds_dir)) {
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp,
- ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom);
- rw_exit(&dp->dp_config_rwlock);
- if (error) {
- dsl_dataset_rele(ds, FTAG);
- return (error);
- }
- error = dmu_objset_from_ds(dsfrom, &fromsnap);
- if (error) {
- dsl_dataset_rele(dsfrom, FTAG);
- dsl_dataset_rele(ds, FTAG);
- return (error);
- }
- }
+ return (EBADF);
}
- if (estimate) {
- error = dmu_send_estimate(tosnap, fromsnap,
- &zc->zc_objset_type);
- } else {
- file_t *fp = getf(zc->zc_cookie);
- if (fp == NULL) {
- dsl_dataset_rele(ds, FTAG);
- if (dsfrom)
- dsl_dataset_rele(dsfrom, FTAG);
- return (EBADF);
- }
-
- off = fp->f_offset;
- error = dmu_send(tosnap, fromsnap,
- zc->zc_cookie, fp->f_vnode, &off);
+ off = fp->f_offset;
+ error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
- if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
- fp->f_offset = off;
- releasef(zc->zc_cookie);
- }
+ if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
+ fp->f_offset = off;
+ releasef(zc->zc_cookie);
if (dsfrom)
dsl_dataset_rele(dsfrom, FTAG);
dsl_dataset_rele(ds, FTAG);
return (error);
}
-/*
- * inputs:
- * zc_name name of snapshot on which to report progress
- * zc_cookie file descriptor of send stream
- *
- * outputs:
- * zc_cookie number of bytes written in send stream thus far
- */
-static int
-zfs_ioc_send_progress(zfs_cmd_t *zc)
-{
- dsl_dataset_t *ds;
- dmu_sendarg_t *dsp = NULL;
- int error;
-
- if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
- return (error);
-
- mutex_enter(&ds->ds_sendstream_lock);
-
- /*
- * Iterate over all the send streams currently active on this dataset.
- * If there's one which matches the specified file descriptor _and_ the
- * stream was started by the current process, return the progress of
- * that stream.
- */
- for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
- dsp = list_next(&ds->ds_sendstreams, dsp)) {
- if (dsp->dsa_outfd == zc->zc_cookie &&
- dsp->dsa_proc == curproc)
- break;
- }
-
- if (dsp != NULL)
- zc->zc_cookie = *(dsp->dsa_off);
- else
- error = ENOENT;
-
- mutex_exit(&ds->ds_sendstream_lock);
- dsl_dataset_rele(ds, FTAG);
- return (error);
-}
-
static int
zfs_ioc_inject_fault(zfs_cmd_t *zc)
{
@@ -4324,22 +3968,6 @@ zfs_ioc_clear(zfs_cmd_t *zc)
return (error);
}
-static int
-zfs_ioc_pool_reopen(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error)
- return (error);
-
- spa_vdev_state_enter(spa, SCL_NONE);
- vdev_reopen(spa->spa_root_vdev);
- (void) spa_vdev_state_exit(spa, NULL, 0);
- spa_close(spa, FTAG);
- return (0);
-}
/*
* inputs:
* zc_name name of filesystem
@@ -4648,7 +4276,6 @@ zfs_ioc_next_obj(zfs_cmd_t *zc)
* zc_cleanup_fd cleanup-on-exit file descriptor for calling process
*
* outputs:
- * zc_value short name of new snapshot
*/
static int
zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
@@ -4656,21 +4283,22 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
char *snap_name;
int error;
- snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value,
+ snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
(u_longlong_t)ddi_get_lbolt64());
- if (strlen(snap_name) >= MAXPATHLEN) {
+ if (strlen(snap_name) >= MAXNAMELEN) {
strfree(snap_name);
return (E2BIG);
}
- error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd);
+ error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
+ NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
if (error != 0) {
strfree(snap_name);
return (error);
}
- (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1);
+ (void) strcpy(zc->zc_value, snap_name);
strfree(snap_name);
return (0);
}
@@ -4994,457 +4622,128 @@ zfs_ioc_get_holds(zfs_cmd_t *zc)
}
/*
- * inputs:
- * zc_name name of new filesystem or snapshot
- * zc_value full name of old snapshot
- *
- * outputs:
- * zc_cookie space in bytes
- * zc_objset_type compressed space in bytes
- * zc_perm_action uncompressed space in bytes
- */
-static int
-zfs_ioc_space_written(zfs_cmd_t *zc)
-{
- int error;
- dsl_dataset_t *new, *old;
-
- error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
- if (error != 0)
- return (error);
- error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
- if (error != 0) {
- dsl_dataset_rele(new, FTAG);
- return (error);
- }
-
- error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
- &zc->zc_objset_type, &zc->zc_perm_action);
- dsl_dataset_rele(old, FTAG);
- dsl_dataset_rele(new, FTAG);
- return (error);
-}
-/*
- * innvl: {
- * "firstsnap" -> snapshot name
- * }
- *
- * outnvl: {
- * "used" -> space in bytes
- * "compressed" -> compressed space in bytes
- * "uncompressed" -> uncompressed space in bytes
- * }
- */
-static int
-zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
-{
- int error;
- dsl_dataset_t *new, *old;
- char *firstsnap;
- uint64_t used, comp, uncomp;
-
- if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
- return (EINVAL);
-
- error = dsl_dataset_hold(lastsnap, FTAG, &new);
- if (error != 0)
- return (error);
- error = dsl_dataset_hold(firstsnap, FTAG, &old);
- if (error != 0) {
- dsl_dataset_rele(new, FTAG);
- return (error);
- }
-
- error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
- dsl_dataset_rele(old, FTAG);
- dsl_dataset_rele(new, FTAG);
- fnvlist_add_uint64(outnvl, "used", used);
- fnvlist_add_uint64(outnvl, "compressed", comp);
- fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
- return (error);
-}
-
-/*
- * innvl: {
- * "fd" -> file descriptor to write stream to (int32)
- * (optional) "fromsnap" -> full snap name to send an incremental from
- * }
- *
- * outnvl is unused
+ * pool create, destroy, and export don't log the history as part of
+ * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
+ * do the logging of those commands.
*/
-/* ARGSUSED */
-static int
-zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
-{
- objset_t *fromsnap = NULL;
- objset_t *tosnap;
- int error;
- offset_t off;
- char *fromname;
- int fd;
-
- error = nvlist_lookup_int32(innvl, "fd", &fd);
- if (error != 0)
- return (EINVAL);
-
- error = dmu_objset_hold(snapname, FTAG, &tosnap);
- if (error)
- return (error);
-
- error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
- if (error == 0) {
- error = dmu_objset_hold(fromname, FTAG, &fromsnap);
- if (error) {
- dmu_objset_rele(tosnap, FTAG);
- return (error);
- }
- }
-
- file_t *fp = getf(fd);
- if (fp == NULL) {
- dmu_objset_rele(tosnap, FTAG);
- if (fromsnap != NULL)
- dmu_objset_rele(fromsnap, FTAG);
- return (EBADF);
- }
-
- off = fp->f_offset;
- error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off);
-
- if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
- fp->f_offset = off;
- releasef(fd);
- if (fromsnap != NULL)
- dmu_objset_rele(fromsnap, FTAG);
- dmu_objset_rele(tosnap, FTAG);
- return (error);
-}
-
-/*
- * Determine approximately how large a zfs send stream will be -- the number
- * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
- *
- * innvl: {
- * (optional) "fromsnap" -> full snap name to send an incremental from
- * }
- *
- * outnvl: {
- * "space" -> bytes of space (uint64)
- * }
- */
-static int
-zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
-{
- objset_t *fromsnap = NULL;
- objset_t *tosnap;
- int error;
- char *fromname;
- uint64_t space;
-
- error = dmu_objset_hold(snapname, FTAG, &tosnap);
- if (error)
- return (error);
-
- error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
- if (error == 0) {
- error = dmu_objset_hold(fromname, FTAG, &fromsnap);
- if (error) {
- dmu_objset_rele(tosnap, FTAG);
- return (error);
- }
- }
-
- error = dmu_send_estimate(tosnap, fromsnap, &space);
- fnvlist_add_uint64(outnvl, "space", space);
-
- if (fromsnap != NULL)
- dmu_objset_rele(fromsnap, FTAG);
- dmu_objset_rele(tosnap, FTAG);
- return (error);
-}
-
-
-static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
-
-static void
-zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
- zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
- boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
-{
- zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
-
- ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
- ASSERT3U(ioc, <, ZFS_IOC_LAST);
- ASSERT3P(vec->zvec_legacy_func, ==, NULL);
- ASSERT3P(vec->zvec_func, ==, NULL);
-
- vec->zvec_legacy_func = func;
- vec->zvec_secpolicy = secpolicy;
- vec->zvec_namecheck = namecheck;
- vec->zvec_allow_log = log_history;
- vec->zvec_pool_check = pool_check;
-}
-
-/*
- * See the block comment at the beginning of this file for details on
- * each argument to this function.
- */
-static void
-zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
- zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
- zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
- boolean_t allow_log)
-{
- zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
-
- ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
- ASSERT3U(ioc, <, ZFS_IOC_LAST);
- ASSERT3P(vec->zvec_legacy_func, ==, NULL);
- ASSERT3P(vec->zvec_func, ==, NULL);
-
- /* if we are logging, the name must be valid */
- ASSERT(!allow_log || namecheck != NO_NAME);
-
- vec->zvec_name = name;
- vec->zvec_func = func;
- vec->zvec_secpolicy = secpolicy;
- vec->zvec_namecheck = namecheck;
- vec->zvec_pool_check = pool_check;
- vec->zvec_smush_outnvlist = smush_outnvlist;
- vec->zvec_allow_log = allow_log;
-}
-
-static void
-zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
- zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
- zfs_ioc_poolcheck_t pool_check)
-{
- zfs_ioctl_register_legacy(ioc, func, secpolicy,
- POOL_NAME, log_history, pool_check);
-}
-
-static void
-zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
- zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
-{
- zfs_ioctl_register_legacy(ioc, func, secpolicy,
- DATASET_NAME, B_FALSE, pool_check);
-}
-
-static void
-zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
-{
- zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
- POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
-}
-
-static void
-zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
- zfs_secpolicy_func_t *secpolicy)
-{
- zfs_ioctl_register_legacy(ioc, func, secpolicy,
- NO_NAME, B_FALSE, POOL_CHECK_NONE);
-}
-
-static void
-zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
- zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
-{
- zfs_ioctl_register_legacy(ioc, func, secpolicy,
- DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
-}
-
-static void
-zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
-{
- zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
- zfs_secpolicy_read);
-}
-
-static void
-zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
- zfs_secpolicy_func_t *secpolicy)
-{
- zfs_ioctl_register_legacy(ioc, func, secpolicy,
- DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
-}
-
-static void
-zfs_ioctl_init(void)
-{
- zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
- zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
-
- zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
- zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
-
- zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
- zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
- POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
-
- zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
- zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
- POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
-
- zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
- zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
- POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
-
- zfs_ioctl_register("create", ZFS_IOC_CREATE,
- zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
-
- zfs_ioctl_register("clone", ZFS_IOC_CLONE,
- zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
-
- zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
- zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
-
- /* IOCTLS that use the legacy function signature */
-
- zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
- zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
-
- zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
- zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
- zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
- zfs_ioc_pool_scan);
- zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
- zfs_ioc_pool_upgrade);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
- zfs_ioc_vdev_add);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
- zfs_ioc_vdev_remove);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
- zfs_ioc_vdev_set_state);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
- zfs_ioc_vdev_attach);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
- zfs_ioc_vdev_detach);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
- zfs_ioc_vdev_setpath);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
- zfs_ioc_vdev_setfru);
- zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
- zfs_ioc_pool_set_props);
- zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
- zfs_ioc_vdev_split);
- zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
- zfs_ioc_pool_reguid);
-
- zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
- zfs_ioc_pool_configs, zfs_secpolicy_none);
- zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
- zfs_ioc_pool_tryimport, zfs_secpolicy_config);
- zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
- zfs_ioc_inject_fault, zfs_secpolicy_inject);
- zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
- zfs_ioc_clear_fault, zfs_secpolicy_inject);
- zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
- zfs_ioc_inject_list_next, zfs_secpolicy_inject);
-
- /*
- * pool destroy, and export don't log the history as part of
- * zfsdev_ioctl, but rather zfs_ioc_pool_export
- * does the logging of those commands.
- */
- zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
- zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
- zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
- zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
-
- zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
- zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
- zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
- zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
-
- zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
- zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
- zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
- zfs_ioc_dsobj_to_dsname,
- zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
- zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
- zfs_ioc_pool_get_history,
- zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
-
- zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
- zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
-
- zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
- zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
- zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
- zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
-
- zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
- zfs_ioc_space_written);
- zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS,
- zfs_ioc_get_holds);
- zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
- zfs_ioc_objset_recvd_props);
- zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
- zfs_ioc_next_obj);
- zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
- zfs_ioc_get_fsacl);
- zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
- zfs_ioc_objset_stats);
- zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
- zfs_ioc_objset_zplprops);
- zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
- zfs_ioc_dataset_list_next);
- zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
- zfs_ioc_snapshot_list_next);
- zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
- zfs_ioc_send_progress);
-
- zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
- zfs_ioc_diff, zfs_secpolicy_diff);
- zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
- zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
- zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
- zfs_ioc_obj_to_path, zfs_secpolicy_diff);
- zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
- zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
- zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
- zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
- zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
- zfs_ioc_send, zfs_secpolicy_send);
-
- zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
- zfs_secpolicy_none);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
- zfs_secpolicy_destroy);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback,
- zfs_secpolicy_rollback);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
- zfs_secpolicy_rename);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
- zfs_secpolicy_recv);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
- zfs_secpolicy_promote);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold,
- zfs_secpolicy_hold);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release,
- zfs_secpolicy_release);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
- zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
- zfs_secpolicy_set_fsacl);
-
- zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
- zfs_secpolicy_share, POOL_CHECK_NONE);
- zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
- zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
- zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
- zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
- zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
- zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
- POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
-}
+static zfs_ioc_vec_t zfs_ioc_vec[] = {
+ { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
+ POOL_CHECK_READONLY },
+ { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
+ B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
+ B_FALSE, POOL_CHECK_NONE },
+ { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
+ B_FALSE, POOL_CHECK_NONE },
+ { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
+ DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED },
+ { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
+ POOL_CHECK_NONE },
+ { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
+ B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
+ { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
+ POOL_CHECK_SUSPENDED }
+};
int
pool_status_check(const char *name, zfs_ioc_namecheck_t type,
@@ -5581,145 +4880,67 @@ static int
zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
{
zfs_cmd_t *zc;
- uint_t vecnum;
- int error, rc, len;
+ uint_t vec;
+ int error, rc;
minor_t minor = getminor(dev);
- const zfs_ioc_vec_t *vec;
- char *saved_poolname = NULL;
- nvlist_t *innvl = NULL;
if (minor != 0 &&
zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
- vecnum = cmd - ZFS_IOC_FIRST;
+ vec = cmd - ZFS_IOC;
ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
- if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
+ if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
return (EINVAL);
- vec = &zfs_ioc_vec[vecnum];
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
- if (error != 0) {
+ if (error != 0)
error = EFAULT;
- goto out;
- }
- zc->zc_iflags = flag & FKIOCTL;
- if (zc->zc_nvlist_src_size != 0) {
- error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
- zc->zc_iflags, &innvl);
- if (error != 0)
- goto out;
- }
+ if ((error == 0) && !(flag & FKIOCTL))
+ error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
/*
* Ensure that all pool/dataset names are valid before we pass down to
* the lower layers.
*/
- zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
- switch (vec->zvec_namecheck) {
- case POOL_NAME:
- if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
- error = EINVAL;
- else
+ if (error == 0) {
+ zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
+ zc->zc_iflags = flag & FKIOCTL;
+ switch (zfs_ioc_vec[vec].zvec_namecheck) {
+ case POOL_NAME:
+ if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
+ error = EINVAL;
error = pool_status_check(zc->zc_name,
- vec->zvec_namecheck, vec->zvec_pool_check);
- break;
+ zfs_ioc_vec[vec].zvec_namecheck,
+ zfs_ioc_vec[vec].zvec_pool_check);
+ break;
- case DATASET_NAME:
- if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
- error = EINVAL;
- else
+ case DATASET_NAME:
+ if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
+ error = EINVAL;
error = pool_status_check(zc->zc_name,
- vec->zvec_namecheck, vec->zvec_pool_check);
- break;
-
- case NO_NAME:
- break;
- }
-
-
- if (error == 0 && !(flag & FKIOCTL))
- error = vec->zvec_secpolicy(zc, innvl, cr);
-
- if (error != 0)
- goto out;
-
- /* legacy ioctls can modify zc_name */
- len = strcspn(zc->zc_name, "/@") + 1;
- saved_poolname = kmem_alloc(len, KM_SLEEP);
- (void) strlcpy(saved_poolname, zc->zc_name, len);
-
- if (vec->zvec_func != NULL) {
- nvlist_t *outnvl;
- int puterror = 0;
- spa_t *spa;
- nvlist_t *lognv = NULL;
-
- ASSERT(vec->zvec_legacy_func == NULL);
-
- /*
- * Add the innvl to the lognv before calling the func,
- * in case the func changes the innvl.
- */
- if (vec->zvec_allow_log) {
- lognv = fnvlist_alloc();
- fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
- vec->zvec_name);
- if (!nvlist_empty(innvl)) {
- fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
- innvl);
- }
- }
-
- outnvl = fnvlist_alloc();
- error = vec->zvec_func(zc->zc_name, innvl, outnvl);
-
- if (error == 0 && vec->zvec_allow_log &&
- spa_open(zc->zc_name, &spa, FTAG) == 0) {
- if (!nvlist_empty(outnvl)) {
- fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
- outnvl);
- }
- (void) spa_history_log_nvl(spa, lognv);
- spa_close(spa, FTAG);
- }
- fnvlist_free(lognv);
+ zfs_ioc_vec[vec].zvec_namecheck,
+ zfs_ioc_vec[vec].zvec_pool_check);
+ break;
- if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
- int smusherror = 0;
- if (vec->zvec_smush_outnvlist) {
- smusherror = nvlist_smush(outnvl,
- zc->zc_nvlist_dst_size);
- }
- if (smusherror == 0)
- puterror = put_nvlist(zc, outnvl);
+ case NO_NAME:
+ break;
}
-
- if (puterror != 0)
- error = puterror;
-
- nvlist_free(outnvl);
- } else {
- error = vec->zvec_legacy_func(zc);
}
-out:
- nvlist_free(innvl);
+ if (error == 0)
+ error = zfs_ioc_vec[vec].zvec_func(zc);
+
rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
- if (error == 0 && rc != 0)
- error = EFAULT;
- if (error == 0 && vec->zvec_allow_log) {
- char *s = tsd_get(zfs_allow_log_key);
- if (s != NULL)
- strfree(s);
- (void) tsd_set(zfs_allow_log_key, saved_poolname);
- } else {
- if (saved_poolname != NULL)
- strfree(saved_poolname);
+ if (error == 0) {
+ if (rc != 0)
+ error = EFAULT;
+ if (zfs_ioc_vec[vec].zvec_his_log)
+ zfs_log_history(zc);
}
kmem_free(zc, sizeof (zfs_cmd_t));
@@ -5835,12 +5056,9 @@ static struct modlinkage modlinkage = {
NULL
};
-static void
-zfs_allow_log_destroy(void *arg)
-{
- char *poolname = arg;
- strfree(poolname);
-}
+
+uint_t zfs_fsyncer_key;
+extern uint_t rrw_tsd_key;
int
_init(void)
@@ -5850,7 +5068,6 @@ _init(void)
spa_init(FREAD | FWRITE);
zfs_init();
zvol_init();
- zfs_ioctl_init();
if ((error = mod_install(&modlinkage)) != 0) {
zvol_fini();
@@ -5860,8 +5077,7 @@ _init(void)
}
tsd_create(&zfs_fsyncer_key, NULL);
- tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
- tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
+ tsd_create(&rrw_tsd_key, NULL);
error = ldi_ident_from_mod(&modlinkage, &zfs_li);
ASSERT(error == 0);
diff --git a/uts/common/fs/zfs/zfs_vfsops.c b/uts/common/fs/zfs/zfs_vfsops.c
index 21ac731c1eb9..4970552d0cb7 100644
--- a/uts/common/fs/zfs/zfs_vfsops.c
+++ b/uts/common/fs/zfs/zfs_vfsops.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -385,14 +384,6 @@ vscan_changed_cb(void *arg, uint64_t newval)
}
static void
-acl_mode_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_acl_mode = newval;
-}
-
-static void
acl_inherit_changed_cb(void *arg, uint64_t newval)
{
zfsvfs_t *zfsvfs = arg;
@@ -523,8 +514,6 @@ zfs_register_callbacks(vfs_t *vfsp)
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "aclmode", acl_mode_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zfsvfs);
@@ -565,7 +554,6 @@ unregister:
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
@@ -1248,9 +1236,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
zfsvfs) == 0);
- VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
- zfsvfs) == 0);
-
VERIFY(dsl_prop_unregister(ds, "aclinherit",
acl_inherit_changed_cb, zfsvfs) == 0);
@@ -2249,8 +2234,9 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
sa_register_update_callback(os, zfs_sa_upgrade);
}
- spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
- "from %llu to %llu", zfsvfs->z_version, newvers);
+ spa_history_log_internal(LOG_DS_UPGRADE,
+ dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
+ zfsvfs->z_version, newvers, dmu_objset_id(os));
dmu_tx_commit(tx);
diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c
index 0c39274caf18..a0720079cf46 100644
--- a/uts/common/fs/zfs/zfs_vnops.c
+++ b/uts/common/fs/zfs/zfs_vnops.c
@@ -2975,8 +2975,7 @@ top:
uint64_t acl_obj;
new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
- if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
- goto out;
+ zfs_acl_chmod_setattr(zp, &aclp, new_mode);
mutex_enter(&zp->z_lock);
if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
@@ -4194,14 +4193,6 @@ zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
ZFS_VERIFY_ZP(zp);
/*
- * There's nothing to do if no data is cached.
- */
- if (!vn_has_cached_data(vp)) {
- ZFS_EXIT(zfsvfs);
- return (0);
- }
-
- /*
* Align this request to the file block size in case we kluster.
* XXX - this can result in pretty aggresive locking, which can
* impact simultanious read/write access. One option might be
diff --git a/uts/common/fs/zfs/zil.c b/uts/common/fs/zfs/zil.c
index 081242cece5d..c66313ff6f85 100644
--- a/uts/common/fs/zfs/zil.c
+++ b/uts/common/fs/zfs/zil.c
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -561,7 +560,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
if (!list_is_empty(&zilog->zl_lwb_list)) {
ASSERT(zh->zh_claim_txg == 0);
- VERIFY(!keep_first);
+ ASSERT(!keep_first);
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
list_remove(&zilog->zl_lwb_list, lwb);
if (lwb->lwb_buf != NULL)
@@ -1662,9 +1661,20 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
void
zil_free(zilog_t *zilog)
{
+ lwb_t *head_lwb;
+
zilog->zl_stop_sync = 1;
- ASSERT(list_is_empty(&zilog->zl_lwb_list));
+ /*
+ * After zil_close() there should only be one lwb with a buffer.
+ */
+ head_lwb = list_head(&zilog->zl_lwb_list);
+ if (head_lwb) {
+ ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list));
+ list_remove(&zilog->zl_lwb_list, head_lwb);
+ zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz);
+ kmem_cache_free(zil_lwb_cache, head_lwb);
+ }
list_destroy(&zilog->zl_lwb_list);
avl_destroy(&zilog->zl_vdev_tree);
@@ -1704,10 +1714,6 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
{
zilog_t *zilog = dmu_objset_zil(os);
- ASSERT(zilog->zl_clean_taskq == NULL);
- ASSERT(zilog->zl_get_data == NULL);
- ASSERT(list_is_empty(&zilog->zl_lwb_list));
-
zilog->zl_get_data = get_data;
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
2, 2, TASKQ_PREPOPULATE);
@@ -1721,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
void
zil_close(zilog_t *zilog)
{
- lwb_t *lwb;
+ lwb_t *tail_lwb;
uint64_t txg = 0;
zil_commit(zilog, 0); /* commit all itx */
@@ -1733,9 +1739,9 @@ zil_close(zilog_t *zilog)
* destroy the zl_clean_taskq.
*/
mutex_enter(&zilog->zl_lock);
- lwb = list_tail(&zilog->zl_lwb_list);
- if (lwb != NULL)
- txg = lwb->lwb_max_txg;
+ tail_lwb = list_tail(&zilog->zl_lwb_list);
+ if (tail_lwb != NULL)
+ txg = tail_lwb->lwb_max_txg;
mutex_exit(&zilog->zl_lock);
if (txg)
txg_wait_synced(zilog->zl_dmu_pool, txg);
@@ -1743,19 +1749,6 @@ zil_close(zilog_t *zilog)
taskq_destroy(zilog->zl_clean_taskq);
zilog->zl_clean_taskq = NULL;
zilog->zl_get_data = NULL;
-
- /*
- * We should have only one LWB left on the list; remove it now.
- */
- mutex_enter(&zilog->zl_lock);
- lwb = list_head(&zilog->zl_lwb_list);
- if (lwb != NULL) {
- ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
- list_remove(&zilog->zl_lwb_list, lwb);
- zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
- kmem_cache_free(zil_lwb_cache, lwb);
- }
- mutex_exit(&zilog->zl_lock);
}
/*
diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c
index cfb5733f2bd7..eb509c5911f7 100644
--- a/uts/common/fs/zfs/zio.c
+++ b/uts/common/fs/zfs/zio.c
@@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -80,7 +78,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
#ifdef _KERNEL
extern vmem_t *zio_alloc_arena;
#endif
-extern int zfs_mg_alloc_failures;
/*
* An allocating zio is one that either currently has the DVA allocate
@@ -161,12 +158,6 @@ zio_init(void)
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
}
- /*
- * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
- * to fail 3 times per txg or 8 failures, whichever is greater.
- */
- zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
-
zio_inject_init();
}
@@ -619,7 +610,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
zp->zp_compress >= ZIO_COMPRESS_OFF &&
zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
- DMU_OT_IS_VALID(zp->zp_type) &&
+ zp->zp_type < DMU_OT_NUMTYPES &&
zp->zp_level < 32 &&
zp->zp_copies > 0 &&
zp->zp_copies <= spa_max_replication(spa) &&
@@ -903,7 +894,7 @@ zio_read_bp_init(zio_t *zio)
zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
}
- if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
+ if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
@@ -1062,7 +1053,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
{
spa_t *spa = zio->io_spa;
zio_type_t t = zio->io_type;
- int flags = (cutinline ? TQ_FRONT : 0);
+ int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0);
/*
* If we're a config writer or a probe, the normal issue and
@@ -1086,15 +1077,8 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
q++;
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
-
- /*
- * NB: We are assuming that the zio can only be dispatched
- * to a single taskq at a time. It would be a grievous error
- * to dispatch the zio to another taskq at the same time.
- */
- ASSERT(zio->io_tqent.tqent_next == NULL);
- taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
- (task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
+ (void) taskq_dispatch(spa->spa_zio_taskq[t][q],
+ (task_func_t *)zio_execute, zio, flags);
}
static boolean_t
@@ -2130,7 +2114,6 @@ zio_dva_allocate(zio_t *zio)
metaslab_class_t *mc = spa_normal_class(spa);
blkptr_t *bp = zio->io_bp;
int error;
- int flags = 0;
if (zio->io_gang_leader == NULL) {
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
@@ -2143,21 +2126,10 @@ zio_dva_allocate(zio_t *zio)
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
- /*
- * The dump device does not support gang blocks so allocation on
- * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
- * the "fast" gang feature.
- */
- flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
- flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
- METASLAB_GANG_CHILD : 0;
error = metaslab_alloc(spa, mc, zio->io_size, bp,
- zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
+ zio->io_prop.zp_copies, zio->io_txg, NULL, 0);
if (error) {
- spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
- "size %llu, error %d", spa_name(spa), zio, zio->io_size,
- error);
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
return (zio_write_gang_block(zio));
zio->io_error = error;
@@ -2219,22 +2191,13 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
ASSERT(txg > spa_syncing_txg(spa));
- /*
- * ZIL blocks are always contiguous (i.e. not gang blocks) so we
- * set the METASLAB_GANG_AVOID flag so that they don't "fast gang"
- * when allocating them.
- */
- if (use_slog) {
+ if (use_slog)
error = metaslab_alloc(spa, spa_log_class(spa), size,
- new_bp, 1, txg, old_bp,
- METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
- }
+ new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
- if (error) {
+ if (error)
error = metaslab_alloc(spa, spa_normal_class(spa), size,
- new_bp, 1, txg, old_bp,
- METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
- }
+ new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
if (error == 0) {
BP_SET_LSIZE(new_bp, size);
@@ -2906,11 +2869,9 @@ zio_done(zio_t *zio)
* Reexecution is potentially a huge amount of work.
* Hand it off to the otherwise-unused claim taskq.
*/
- ASSERT(zio->io_tqent.tqent_next == NULL);
- (void) taskq_dispatch_ent(
+ (void) taskq_dispatch(
spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
- (task_func_t *)zio_reexecute, zio, 0,
- &zio->io_tqent);
+ (task_func_t *)zio_reexecute, zio, TQ_SLEEP);
}
return (ZIO_PIPELINE_STOP);
}
@@ -2989,45 +2950,3 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_checksum_verify,
zio_done
};
-
-/* dnp is the dnode for zb1->zb_object */
-boolean_t
-zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
- const zbookmark_t *zb2)
-{
- uint64_t zb1nextL0, zb2thisobj;
-
- ASSERT(zb1->zb_objset == zb2->zb_objset);
- ASSERT(zb2->zb_level == 0);
-
- /*
- * A bookmark in the deadlist is considered to be after
- * everything else.
- */
- if (zb2->zb_object == DMU_DEADLIST_OBJECT)
- return (B_TRUE);
-
- /* The objset_phys_t isn't before anything. */
- if (dnp == NULL)
- return (B_FALSE);
-
- zb1nextL0 = (zb1->zb_blkid + 1) <<
- ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
-
- zb2thisobj = zb2->zb_object ? zb2->zb_object :
- zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
-
- if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
- uint64_t nextobj = zb1nextL0 *
- (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
- return (nextobj <= zb2thisobj);
- }
-
- if (zb1->zb_object < zb2thisobj)
- return (B_TRUE);
- if (zb1->zb_object > zb2thisobj)
- return (B_FALSE);
- if (zb2->zb_object == DMU_META_DNODE_OBJECT)
- return (B_FALSE);
- return (zb1nextL0 <= zb2->zb_blkid);
-}
diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c
index edf574e3c9ba..47b6c5a87a52 100644
--- a/uts/common/fs/zfs/zvol.c
+++ b/uts/common/fs/zfs/zvol.c
@@ -20,13 +20,10 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Portions Copyright 2010 Robert Milkowski
- *
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
+/* Portions Copyright 2010 Robert Milkowski */
+
/*
* ZFS volume emulation driver.
*
@@ -136,7 +133,7 @@ typedef struct zvol_state {
int zvol_maxphys = DMU_MAX_ACCESS/2;
extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
- nvlist_t *, nvlist_t *);
+ nvlist_t *, nvlist_t **);
static int zvol_remove_zv(zvol_state_t *);
static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
static int zvol_dumpify(zvol_state_t *zv);
@@ -345,24 +342,6 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
}
/*
- * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
- * implement DKIOCFREE/free-long-range.
- */
-static int
-zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
-{
- uint64_t offset, length;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- offset = lr->lr_offset;
- length = lr->lr_length;
-
- return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
-}
-
-/*
* Replay a TX_WRITE ZIL transaction that didn't get committed
* after a system failure
*/
@@ -412,7 +391,7 @@ zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
/*
* Callback vectors for replaying records.
- * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
+ * Only TX_WRITE is needed for zvol.
*/
zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* 0 no such transaction type */
@@ -425,7 +404,7 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* TX_LINK */
zvol_replay_err, /* TX_RENAME */
zvol_replay_write, /* TX_WRITE */
- zvol_replay_truncate, /* TX_TRUNCATE */
+ zvol_replay_err, /* TX_TRUNCATE */
zvol_replay_err, /* TX_SETATTR */
zvol_replay_err, /* TX_ACL */
zvol_replay_err, /* TX_CREATE_ACL */
@@ -1533,32 +1512,7 @@ zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
*/
/*
- * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
- */
-static void
-zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
- boolean_t sync)
-{
- itx_t *itx;
- lr_truncate_t *lr;
- zilog_t *zilog = zv->zv_zilog;
-
- if (zil_replaying(zilog, tx))
- return;
-
- itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
- lr = (lr_truncate_t *)&itx->itx_lr;
- lr->lr_foid = ZVOL_OBJ;
- lr->lr_offset = off;
- lr->lr_length = len;
-
- itx->itx_sync = sync;
- zil_itx_assign(zilog, itx, tx);
-}
-
-/*
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
- * Also a dirtbag dkio ioctl for unmap/free-block functionality.
*/
/*ARGSUSED*/
int
@@ -1677,65 +1631,6 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
zfs_range_unlock(rl);
break;
- case DKIOCFREE:
- {
- dkioc_free_t df;
- dmu_tx_t *tx;
-
- if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
- error = EFAULT;
- break;
- }
-
- /*
- * Apply Postel's Law to length-checking. If they overshoot,
- * just blank out until the end, if there's a need to blank
- * out anything.
- */
- if (df.df_start >= zv->zv_volsize)
- break; /* No need to do anything... */
- if (df.df_start + df.df_length > zv->zv_volsize)
- df.df_length = DMU_OBJECT_END;
-
- rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
- RL_WRITER);
- tx = dmu_tx_create(zv->zv_objset);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error != 0) {
- dmu_tx_abort(tx);
- } else {
- zvol_log_truncate(zv, tx, df.df_start,
- df.df_length, B_TRUE);
- dmu_tx_commit(tx);
- error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
- df.df_start, df.df_length);
- }
-
- zfs_range_unlock(rl);
-
- if (error == 0) {
- /*
- * If the write-cache is disabled or 'sync' property
- * is set to 'always' then treat this as a synchronous
- * operation (i.e. commit to zil).
- */
- if (!(zv->zv_flags & ZVOL_WCE) ||
- (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS))
- zil_commit(zv->zv_zilog, ZVOL_OBJ);
-
- /*
- * If the caller really wants synchronous writes, and
- * can't wait for them, don't return until the write
- * is done.
- */
- if (df.df_flags & DF_WAIT_SYNC) {
- txg_wait_synced(
- dmu_objset_pool(zv->zv_objset), 0);
- }
- }
- break;
- }
-
default:
error = ENOTTY;
break;
@@ -1886,7 +1781,7 @@ zvol_dumpify(zvol_state_t *zv)
if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,
8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) {
- boolean_t resize = (dumpsize > 0);
+ boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE;
if ((error = zvol_dump_init(zv, resize)) != 0) {
(void) zvol_dump_fini(zv);
diff --git a/uts/common/os/fm.c b/uts/common/os/fm.c
index eff91aee5e64..4efcff4f464a 100644
--- a/uts/common/os/fm.c
+++ b/uts/common/os/fm.c
@@ -79,7 +79,7 @@
* URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These
* values must be kept in sync with the FMA source code in usr/src/cmd/fm.
*/
-static const char *fm_url = "http://illumos.org/msg";
+static const char *fm_url = "http://www.sun.com/msg";
static const char *fm_msgid = "SUNOS-8000-0G";
static char *volatile fm_panicstr = NULL;
diff --git a/uts/common/sys/ccompile.h b/uts/common/sys/ccompile.h
index 690bb7afb73a..c9857b086575 100644
--- a/uts/common/sys/ccompile.h
+++ b/uts/common/sys/ccompile.h
@@ -27,6 +27,8 @@
#ifndef _SYS_CCOMPILE_H
#define _SYS_CCOMPILE_H
+#pragma ident "%Z%%M% %I% %E% SMI"
+
/*
* This file contains definitions designed to enable different compilers
* to be used harmoniously on Solaris systems.
@@ -77,27 +79,6 @@ extern "C" {
*/
#define __sun_attr___noreturn__ __attribute__((__noreturn__))
-/*
- * The function is 'extern inline' and expects GNU C89 behaviour, not C99
- * behaviour.
- *
- * Should only be used on 'extern inline' definitions for GCC.
- */
-#if __GNUC_VERSION >= 40200
-#define __sun_attr___gnu_inline__ __attribute__((__gnu_inline__))
-#else
-#define __sun_attr___gnu_inline__
-#endif
-
-/*
- * The function has control flow such that it may return multiple times (in
- * the manner of setjmp or vfork)
- */
-#if __GNUC_VERSION >= 40100
-#define __sun_attr___returns_twice__ __attribute__((__returns_twice__))
-#else
-#define __sun_attr___returns_twice__
-#endif
/*
* This is an appropriate label for functions that do not
@@ -135,11 +116,10 @@ extern "C" {
#define __KPRINTFLIKE(__n) __sun_attr__((__KPRINTFLIKE__(__n)))
#define __KVPRINTFLIKE(__n) __sun_attr__((__KVPRINTFLIKE__(__n)))
#define __NORETURN __sun_attr__((__noreturn__))
-#define __GNU_INLINE __inline__ __sun_attr__((__gnu_inline__))
-#define __RETURNS_TWICE __sun_attr__((__returns_twice__))
#define __CONST __sun_attr__((__const__))
#define __PURE __sun_attr__((__pure__))
+
#ifdef __cplusplus
}
#endif
diff --git a/uts/common/sys/cmn_err.h b/uts/common/sys/cmn_err.h
index 736c77b9dcf8..e710d8e5c30b 100644
--- a/uts/common/sys/cmn_err.h
+++ b/uts/common/sys/cmn_err.h
@@ -26,19 +26,17 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- *
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_CMN_ERR_H
#define _SYS_CMN_ERR_H
+#pragma ident "%Z%%M% %I% %E% SMI"
+
#if defined(_KERNEL) && !defined(_ASM)
#include <sys/va_list.h>
#endif
-#include <sys/dditypes.h>
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -58,40 +56,47 @@ extern "C" {
/*PRINTFLIKE2*/
extern void cmn_err(int, const char *, ...)
__KPRINTFLIKE(2);
+#pragma rarely_called(cmn_err)
extern void vzcmn_err(zoneid_t, int, const char *, __va_list)
__KVPRINTFLIKE(3);
-
-extern void dev_err(dev_info_t *, int, char *, ...)
- __KPRINTFLIKE(3);
+#pragma rarely_called(vzcmn_err)
extern void vcmn_err(int, const char *, __va_list)
__KVPRINTFLIKE(2);
+#pragma rarely_called(vcmn_err)
/*PRINTFLIKE3*/
extern void zcmn_err(zoneid_t, int, const char *, ...)
__KPRINTFLIKE(3);
+#pragma rarely_called(zcmn_err)
/*PRINTFLIKE1*/
extern void printf(const char *, ...)
__KPRINTFLIKE(1);
+#pragma rarely_called(printf)
extern void vzprintf(zoneid_t, const char *, __va_list)
__KVPRINTFLIKE(2);
+#pragma rarely_called(vzprintf)
/*PRINTFLIKE2*/
extern void zprintf(zoneid_t, const char *, ...)
__KPRINTFLIKE(2);
+#pragma rarely_called(zprintf)
extern void vprintf(const char *, __va_list)
__KVPRINTFLIKE(1);
+#pragma rarely_called(vprintf)
/*PRINTFLIKE1*/
extern void uprintf(const char *, ...)
__KPRINTFLIKE(1);
+#pragma rarely_called(uprintf)
extern void vuprintf(const char *, __va_list)
__KVPRINTFLIKE(1);
+#pragma rarely_called(vuprintf)
/*PRINTFLIKE3*/
extern size_t snprintf(char *, size_t, const char *, ...)
@@ -107,9 +112,11 @@ extern char *vsprintf(char *, const char *, __va_list)
/*PRINTFLIKE1*/
extern void panic(const char *, ...)
__KPRINTFLIKE(1) __NORETURN;
+#pragma rarely_called(panic)
extern void vpanic(const char *, __va_list)
__KVPRINTFLIKE(1) __NORETURN;
+#pragma rarely_called(vpanic)
#endif /* _KERNEL */
#endif /* !_ASM */
diff --git a/uts/common/sys/dtrace.h b/uts/common/sys/dtrace.h
index c15799a4e4a2..007502d7d856 100644
--- a/uts/common/sys/dtrace.h
+++ b/uts/common/sys/dtrace.h
@@ -24,10 +24,6 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
- */
-
#ifndef _SYS_DTRACE_H
#define _SYS_DTRACE_H
@@ -206,7 +202,6 @@ typedef enum dtrace_probespec {
#define DIF_VAR_ARGS 0x0000 /* arguments array */
#define DIF_VAR_REGS 0x0001 /* registers array */
#define DIF_VAR_UREGS 0x0002 /* user registers array */
-#define DIF_VAR_VMREGS 0x0003 /* virtual machine registers array */
#define DIF_VAR_CURTHREAD 0x0100 /* thread pointer */
#define DIF_VAR_TIMESTAMP 0x0101 /* timestamp */
#define DIF_VAR_VTIMESTAMP 0x0102 /* virtual timestamp */
@@ -285,10 +280,8 @@ typedef enum dtrace_probespec {
#define DIF_SUBR_INET_NTOP 41
#define DIF_SUBR_INET_NTOA 42
#define DIF_SUBR_INET_NTOA6 43
-#define DIF_SUBR_TOUPPER 44
-#define DIF_SUBR_TOLOWER 45
-#define DIF_SUBR_MAX 45 /* max subroutine value */
+#define DIF_SUBR_MAX 43 /* max subroutine value */
typedef uint32_t dif_instr_t;
@@ -397,8 +390,6 @@ typedef struct dtrace_difv {
#define DTRACEACT_PRINTF 3 /* printf() action */
#define DTRACEACT_PRINTA 4 /* printa() action */
#define DTRACEACT_LIBACT 5 /* library-controlled action */
-#define DTRACEACT_TRACEMEM 6 /* tracemem() action */
-#define DTRACEACT_TRACEMEM_DYNSIZE 7 /* dynamic tracemem() size */
#define DTRACEACT_PROC 0x0100
#define DTRACEACT_USTACK (DTRACEACT_PROC + 1)
@@ -464,7 +455,6 @@ typedef struct dtrace_difv {
#define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6)
#define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7)
#define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8)
-#define DTRACEAGG_LLQUANTIZE (DTRACEACT_AGGREGATION + 9)
#define DTRACEACT_ISAGG(x) \
(DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION)
@@ -499,31 +489,6 @@ typedef struct dtrace_difv {
(int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \
DTRACE_LQUANTIZE_BASESHIFT)
-#define DTRACE_LLQUANTIZE_FACTORSHIFT 48
-#define DTRACE_LLQUANTIZE_FACTORMASK ((uint64_t)UINT16_MAX << 48)
-#define DTRACE_LLQUANTIZE_LOWSHIFT 32
-#define DTRACE_LLQUANTIZE_LOWMASK ((uint64_t)UINT16_MAX << 32)
-#define DTRACE_LLQUANTIZE_HIGHSHIFT 16
-#define DTRACE_LLQUANTIZE_HIGHMASK ((uint64_t)UINT16_MAX << 16)
-#define DTRACE_LLQUANTIZE_NSTEPSHIFT 0
-#define DTRACE_LLQUANTIZE_NSTEPMASK UINT16_MAX
-
-#define DTRACE_LLQUANTIZE_FACTOR(x) \
- (uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \
- DTRACE_LLQUANTIZE_FACTORSHIFT)
-
-#define DTRACE_LLQUANTIZE_LOW(x) \
- (uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \
- DTRACE_LLQUANTIZE_LOWSHIFT)
-
-#define DTRACE_LLQUANTIZE_HIGH(x) \
- (uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \
- DTRACE_LLQUANTIZE_HIGHSHIFT)
-
-#define DTRACE_LLQUANTIZE_NSTEP(x) \
- (uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \
- DTRACE_LLQUANTIZE_NSTEPSHIFT)
-
#define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX)
#define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32)
#define DTRACE_USTACK_ARG(x, y) \
@@ -1356,7 +1321,7 @@ typedef struct dof_helper {
* dtps_resume() <-- Resume specified probe
* dtps_getargdesc() <-- Get the argument description for args[X]
* dtps_getargval() <-- Get the value for an argX or args[X] variable
- * dtps_mode() <-- Return the mode of the fired probe
+ * dtps_usermode() <-- Find out if the probe was fired in user mode
* dtps_destroy() <-- Destroy all state associated with this probe
*
* 1.2 void dtps_provide(void *arg, const dtrace_probedesc_t *spec)
@@ -1605,32 +1570,24 @@ typedef struct dof_helper {
* This is called from within dtrace_probe() meaning that interrupts
* are disabled. No locks should be taken within this entry point.
*
- * 1.10 int dtps_mode(void *arg, dtrace_id_t id, void *parg)
+ * 1.10 int dtps_usermode(void *arg, dtrace_id_t id, void *parg)
*
* 1.10.1 Overview
*
- * Called to determine the mode of a fired probe.
+ * Called to determine if the probe was fired in a user context.
*
* 1.10.2 Arguments and notes
*
* The first argument is the cookie as passed to dtrace_register(). The
- * second argument is the identifier of the current probe. The third
+ * second argument is the identifier of the current probe. The third
* argument is the probe argument as passed to dtrace_probe_create(). This
* entry point must not be left NULL for providers whose probes allow for
- * mixed mode tracing, that is to say those unanchored probes that can fire
- * during kernel- or user-mode execution.
+ * mixed mode tracing, that is to say those probes that can fire during
+ * kernel- _or_ user-mode execution
*
* 1.10.3 Return value
*
- * A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
- * or DTRACE_MODE_USER) and the policy when the privilege of the enabling
- * is insufficient for that mode (either DTRACE_MODE_NOPRIV_DROP or
- * DTRACE_MODE_NOPRIV_RESTRICT). If the policy is DTRACE_MODE_NOPRIV_DROP,
- * insufficient privilege will result in the probe firing being silently
- * ignored for the enabling; if the policy is DTRACE_NODE_NOPRIV_RESTRICT,
- * insufficient privilege will not prevent probe processing for the
- * enabling, but restrictions will be in place that induce a UPRIV fault
- * upon attempt to examine probe arguments or current process state.
+ * A boolean value.
*
* 1.10.4 Caller's context
*
@@ -2021,15 +1978,10 @@ typedef struct dtrace_pops {
dtrace_argdesc_t *desc);
uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg,
int argno, int aframes);
- int (*dtps_mode)(void *arg, dtrace_id_t id, void *parg);
+ int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg);
void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg);
} dtrace_pops_t;
-#define DTRACE_MODE_KERNEL 0x01
-#define DTRACE_MODE_USER 0x02
-#define DTRACE_MODE_NOPRIV_DROP 0x10
-#define DTRACE_MODE_NOPRIV_RESTRICT 0x20
-
typedef uintptr_t dtrace_provider_id_t;
extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t,
diff --git a/uts/common/sys/dtrace_impl.h b/uts/common/sys/dtrace_impl.h
index 3bebd0cb30b0..fed537e18ba0 100644
--- a/uts/common/sys/dtrace_impl.h
+++ b/uts/common/sys/dtrace_impl.h
@@ -24,13 +24,11 @@
* Use is subject to license terms.
*/
-/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
- */
-
#ifndef _SYS_DTRACE_IMPL_H
#define _SYS_DTRACE_IMPL_H
+#pragma ident "%Z%%M% %I% %E% SMI"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -421,11 +419,8 @@ typedef struct dtrace_buffer {
uint32_t dtb_errors; /* number of errors */
uint32_t dtb_xamot_errors; /* errors in inactive buffer */
#ifndef _LP64
- uint64_t dtb_pad1; /* pad out to 64 bytes */
+ uint64_t dtb_pad1;
#endif
- uint64_t dtb_switched; /* time of last switch */
- uint64_t dtb_interval; /* observed switch interval */
- uint64_t dtb_pad2[6]; /* pad to avoid false sharing */
} dtrace_buffer_t;
/*
@@ -929,8 +924,7 @@ typedef struct dtrace_mstate {
* Access flag used by dtrace_mstate.dtms_access.
*/
#define DTRACE_ACCESS_KERNEL 0x1 /* the priv to read kmem */
-#define DTRACE_ACCESS_PROC 0x2 /* the priv for proc state */
-#define DTRACE_ACCESS_ARGS 0x4 /* the priv to examine args */
+
/*
* DTrace Activity
@@ -1145,7 +1139,7 @@ struct dtrace_provider {
dtrace_pops_t dtpv_pops; /* provider operations */
char *dtpv_name; /* provider name */
void *dtpv_arg; /* provider argument */
- hrtime_t dtpv_defunct; /* when made defunct */
+ uint_t dtpv_defunct; /* boolean: defunct provider */
struct dtrace_provider *dtpv_next; /* next provider */
};
@@ -1252,7 +1246,6 @@ extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t,
volatile uint16_t *);
extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *);
extern ulong_t dtrace_getreg(struct regs *, uint_t);
-extern uint64_t dtrace_getvmreg(uint_t, volatile uint16_t *);
extern int dtrace_getstackdepth(int);
extern void dtrace_getupcstack(uint64_t *, int);
extern void dtrace_getufpstack(uint64_t *, uint64_t *, int);
diff --git a/uts/common/sys/feature_tests.h b/uts/common/sys/feature_tests.h
index e6ababd3d409..43339a83cd7f 100644
--- a/uts/common/sys/feature_tests.h
+++ b/uts/common/sys/feature_tests.h
@@ -27,6 +27,8 @@
#ifndef _SYS_FEATURE_TESTS_H
#define _SYS_FEATURE_TESTS_H
+#pragma ident "%Z%%M% %I% %E% SMI"
+
#include <sys/ccompile.h>
#include <sys/isa_defs.h>
@@ -363,7 +365,7 @@ extern "C" {
* compiler is used. This allows for the use of single prototype
* declarations regardless of compiler version.
*/
-#if (defined(__STDC__) && defined(_STDC_C99)) && !defined(__cplusplus)
+#if (defined(__STDC__) && defined(_STDC_C99))
#define _RESTRICT_KYWD restrict
#else
#define _RESTRICT_KYWD
diff --git a/uts/common/sys/fs/zfs.h b/uts/common/sys/fs/zfs.h
index 511fa9589817..da0b12bab4a9 100644
--- a/uts/common/sys/fs/zfs.h
+++ b/uts/common/sys/fs/zfs.h
@@ -21,9 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -52,16 +49,6 @@ typedef enum {
ZFS_TYPE_POOL = 0x8
} zfs_type_t;
-typedef enum dmu_objset_type {
- DMU_OST_NONE,
- DMU_OST_META,
- DMU_OST_ZFS,
- DMU_OST_ZVOL,
- DMU_OST_OTHER, /* For testing only! */
- DMU_OST_ANY, /* Be careful! */
- DMU_OST_NUMTYPES
-} dmu_objset_type_t;
-
#define ZFS_TYPE_DATASET \
(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
@@ -100,7 +87,7 @@ typedef enum {
ZFS_PROP_READONLY,
ZFS_PROP_ZONED,
ZFS_PROP_SNAPDIR,
- ZFS_PROP_ACLMODE,
+ ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
ZFS_PROP_ACLINHERIT,
ZFS_PROP_CREATETXG, /* not exposed to the user */
ZFS_PROP_NAME, /* not exposed to the user */
@@ -135,9 +122,6 @@ typedef enum {
ZFS_PROP_DEDUP,
ZFS_PROP_MLSLABEL,
ZFS_PROP_SYNC,
- ZFS_PROP_REFRATIO,
- ZFS_PROP_WRITTEN,
- ZFS_PROP_CLONES,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -177,15 +161,9 @@ typedef enum {
ZPOOL_PROP_FREE,
ZPOOL_PROP_ALLOCATED,
ZPOOL_PROP_READONLY,
- ZPOOL_PROP_COMMENT,
- ZPOOL_PROP_EXPANDSZ,
- ZPOOL_PROP_FREEING,
ZPOOL_NUM_PROPS
} zpool_prop_t;
-/* Small enough to not hog a whole line of printout in zpool(1M). */
-#define ZPROP_MAX_COMMENT 32
-
#define ZPROP_CONT -2
#define ZPROP_INVAL -1
@@ -240,7 +218,6 @@ const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
boolean_t zfs_prop_userquota(const char *);
-boolean_t zfs_prop_written(const char *);
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
@@ -254,8 +231,6 @@ const char *zpool_prop_to_name(zpool_prop_t);
const char *zpool_prop_default_string(zpool_prop_t);
uint64_t zpool_prop_default_numeric(zpool_prop_t);
boolean_t zpool_prop_readonly(zpool_prop_t);
-boolean_t zpool_prop_feature(const char *);
-boolean_t zpool_prop_unsupported(const char *name);
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
@@ -363,7 +338,6 @@ typedef enum {
#define SPA_VERSION_26 26ULL
#define SPA_VERSION_27 27ULL
#define SPA_VERSION_28 28ULL
-#define SPA_VERSION_5000 5000ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
@@ -371,8 +345,8 @@ typedef enum {
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
-#define SPA_VERSION SPA_VERSION_5000
-#define SPA_VERSION_STRING "5000"
+#define SPA_VERSION SPA_VERSION_28
+#define SPA_VERSION_STRING "28"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -423,12 +397,6 @@ typedef enum {
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
-#define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28
-#define SPA_VERSION_FEATURES SPA_VERSION_5000
-
-#define SPA_VERSION_IS_SUPPORTED(v) \
- (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
- ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@@ -520,17 +488,11 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
#define ZPOOL_CONFIG_REMOVING "removing"
#define ZPOOL_CONFIG_RESILVERING "resilvering"
-#define ZPOOL_CONFIG_COMMENT "comment"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
-#define ZPOOL_CONFIG_REWIND_INFO "rewind_info" /* not stored on disk */
-#define ZPOOL_CONFIG_UNSUP_FEAT "unsup_feat" /* not stored on disk */
-#define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */
-#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
-#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
@@ -609,7 +571,6 @@ typedef enum vdev_aux {
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
- VDEV_AUX_UNSUP_FEAT, /* unsupported features */
VDEV_AUX_SPARED, /* hot spare used in another pool */
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
@@ -700,7 +661,6 @@ typedef struct vdev_stat {
uint64_t vs_space; /* total capacity */
uint64_t vs_dspace; /* deflated capacity */
uint64_t vs_rsize; /* replaceable dev size */
- uint64_t vs_esize; /* expandable dev size */
uint64_t vs_ops[ZIO_TYPES]; /* operation count */
uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */
uint64_t vs_read_errors; /* read errors */
@@ -754,10 +714,10 @@ typedef struct ddt_histogram {
/*
* /dev/zfs ioctl numbers.
*/
+#define ZFS_IOC ('Z' << 8)
+
typedef enum zfs_ioc {
- ZFS_IOC_FIRST = ('Z' << 8),
- ZFS_IOC = ZFS_IOC_FIRST,
- ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST,
+ ZFS_IOC_POOL_CREATE = ZFS_IOC,
ZFS_IOC_POOL_DESTROY,
ZFS_IOC_POOL_IMPORT,
ZFS_IOC_POOL_EXPORT,
@@ -792,6 +752,7 @@ typedef enum zfs_ioc {
ZFS_IOC_ERROR_LOG,
ZFS_IOC_CLEAR,
ZFS_IOC_PROMOTE,
+ ZFS_IOC_DESTROY_SNAPS,
ZFS_IOC_SNAPSHOT,
ZFS_IOC_DSOBJ_TO_DSNAME,
ZFS_IOC_OBJ_TO_PATH,
@@ -813,18 +774,7 @@ typedef enum zfs_ioc {
ZFS_IOC_NEXT_OBJ,
ZFS_IOC_DIFF,
ZFS_IOC_TMP_SNAPSHOT,
- ZFS_IOC_OBJ_TO_STATS,
- ZFS_IOC_SPACE_WRITTEN,
- ZFS_IOC_SPACE_SNAPS,
- ZFS_IOC_DESTROY_SNAPS,
- ZFS_IOC_POOL_REGUID,
- ZFS_IOC_POOL_REOPEN,
- ZFS_IOC_SEND_PROGRESS,
- ZFS_IOC_LOG_HISTORY,
- ZFS_IOC_SEND_NEW,
- ZFS_IOC_SEND_SPACE,
- ZFS_IOC_CLONE,
- ZFS_IOC_LAST
+ ZFS_IOC_OBJ_TO_STATS
} zfs_ioc_t;
/*
@@ -861,12 +811,6 @@ typedef enum {
#define ZPOOL_HIST_TXG "history txg"
#define ZPOOL_HIST_INT_EVENT "history internal event"
#define ZPOOL_HIST_INT_STR "history internal str"
-#define ZPOOL_HIST_INT_NAME "internal_name"
-#define ZPOOL_HIST_IOCTL "ioctl"
-#define ZPOOL_HIST_INPUT_NVL "in_nvl"
-#define ZPOOL_HIST_OUTPUT_NVL "out_nvl"
-#define ZPOOL_HIST_DSNAME "dsname"
-#define ZPOOL_HIST_DSID "dsid"
/*
* Flags for ZFS_IOC_VDEV_SET_STATE
@@ -893,7 +837,6 @@ typedef enum {
* ESC_ZFS_RESILVER_START
* ESC_ZFS_RESILVER_END
* ESC_ZFS_POOL_DESTROY
- * ESC_ZFS_POOL_REGUID
*
* ZFS_EV_POOL_NAME DATA_TYPE_STRING
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
@@ -912,6 +855,56 @@ typedef enum {
#define ZFS_EV_VDEV_PATH "vdev_path"
#define ZFS_EV_VDEV_GUID "vdev_guid"
+/*
+ * Note: This is encoded on-disk, so new events must be added to the
+ * end, and unused events can not be removed. Be sure to edit
+ * libzfs_pool.c: hist_event_table[].
+ */
+typedef enum history_internal_events {
+ LOG_NO_EVENT = 0,
+ LOG_POOL_CREATE,
+ LOG_POOL_VDEV_ADD,
+ LOG_POOL_REMOVE,
+ LOG_POOL_DESTROY,
+ LOG_POOL_EXPORT,
+ LOG_POOL_IMPORT,
+ LOG_POOL_VDEV_ATTACH,
+ LOG_POOL_VDEV_REPLACE,
+ LOG_POOL_VDEV_DETACH,
+ LOG_POOL_VDEV_ONLINE,
+ LOG_POOL_VDEV_OFFLINE,
+ LOG_POOL_UPGRADE,
+ LOG_POOL_CLEAR,
+ LOG_POOL_SCAN,
+ LOG_POOL_PROPSET,
+ LOG_DS_CREATE,
+ LOG_DS_CLONE,
+ LOG_DS_DESTROY,
+ LOG_DS_DESTROY_BEGIN,
+ LOG_DS_INHERIT,
+ LOG_DS_PROPSET,
+ LOG_DS_QUOTA,
+ LOG_DS_PERM_UPDATE,
+ LOG_DS_PERM_REMOVE,
+ LOG_DS_PERM_WHO_REMOVE,
+ LOG_DS_PROMOTE,
+ LOG_DS_RECEIVE,
+ LOG_DS_RENAME,
+ LOG_DS_RESERVATION,
+ LOG_DS_REPLAY_INC_SYNC,
+ LOG_DS_REPLAY_FULL_SYNC,
+ LOG_DS_ROLLBACK,
+ LOG_DS_SNAPSHOT,
+ LOG_DS_UPGRADE,
+ LOG_DS_REFQUOTA,
+ LOG_DS_REFRESERV,
+ LOG_POOL_SCAN_DONE,
+ LOG_DS_USER_HOLD,
+ LOG_DS_USER_RELEASE,
+ LOG_POOL_SPLIT,
+ LOG_END
+} history_internal_events_t;
+
#ifdef __cplusplus
}
#endif
diff --git a/uts/common/sys/nvpair.h b/uts/common/sys/nvpair.h
index ad25effc2994..30ff4e0667b3 100644
--- a/uts/common/sys/nvpair.h
+++ b/uts/common/sys/nvpair.h
@@ -20,14 +20,12 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_NVPAIR_H
#define _SYS_NVPAIR_H
#include <sys/types.h>
-#include <sys/time.h>
#include <sys/errno.h>
#include <sys/va_list.h>
@@ -276,73 +274,6 @@ int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
int nvpair_value_double(nvpair_t *, double *);
#endif
-nvlist_t *fnvlist_alloc(void);
-void fnvlist_free(nvlist_t *);
-size_t fnvlist_size(nvlist_t *);
-char *fnvlist_pack(nvlist_t *, size_t *);
-void fnvlist_pack_free(char *, size_t);
-nvlist_t *fnvlist_unpack(char *, size_t);
-nvlist_t *fnvlist_dup(nvlist_t *);
-void fnvlist_merge(nvlist_t *, nvlist_t *);
-
-void fnvlist_add_boolean(nvlist_t *, const char *);
-void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
-void fnvlist_add_byte(nvlist_t *, const char *, uchar_t);
-void fnvlist_add_int8(nvlist_t *, const char *, int8_t);
-void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t);
-void fnvlist_add_int16(nvlist_t *, const char *, int16_t);
-void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t);
-void fnvlist_add_int32(nvlist_t *, const char *, int32_t);
-void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t);
-void fnvlist_add_int64(nvlist_t *, const char *, int64_t);
-void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t);
-void fnvlist_add_string(nvlist_t *, const char *, const char *);
-void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
-void fnvlist_add_nvpair(nvlist_t *, nvpair_t *);
-void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
-void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
-void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
-void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
-void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
-void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
-void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
-void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
-void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
-void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
-void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t);
-void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
-
-void fnvlist_remove(nvlist_t *, const char *);
-void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *);
-
-nvpair_t *fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name);
-boolean_t fnvlist_lookup_boolean(nvlist_t *nvl, const char *name);
-boolean_t fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name);
-uchar_t fnvlist_lookup_byte(nvlist_t *nvl, const char *name);
-int8_t fnvlist_lookup_int8(nvlist_t *nvl, const char *name);
-int16_t fnvlist_lookup_int16(nvlist_t *nvl, const char *name);
-int32_t fnvlist_lookup_int32(nvlist_t *nvl, const char *name);
-int64_t fnvlist_lookup_int64(nvlist_t *nvl, const char *name);
-uint8_t fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name);
-uint16_t fnvlist_lookup_uint16(nvlist_t *nvl, const char *name);
-uint32_t fnvlist_lookup_uint32(nvlist_t *nvl, const char *name);
-uint64_t fnvlist_lookup_uint64(nvlist_t *nvl, const char *name);
-char *fnvlist_lookup_string(nvlist_t *nvl, const char *name);
-nvlist_t *fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name);
-
-boolean_t fnvpair_value_boolean_value(nvpair_t *nvp);
-uchar_t fnvpair_value_byte(nvpair_t *nvp);
-int8_t fnvpair_value_int8(nvpair_t *nvp);
-int16_t fnvpair_value_int16(nvpair_t *nvp);
-int32_t fnvpair_value_int32(nvpair_t *nvp);
-int64_t fnvpair_value_int64(nvpair_t *nvp);
-uint8_t fnvpair_value_uint8_t(nvpair_t *nvp);
-uint16_t fnvpair_value_uint16(nvpair_t *nvp);
-uint32_t fnvpair_value_uint32(nvpair_t *nvp);
-uint64_t fnvpair_value_uint64(nvpair_t *nvp);
-char *fnvpair_value_string(nvpair_t *nvp);
-nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp);
-
#ifdef __cplusplus
}
#endif
diff --git a/uts/common/sys/sysevent/eventdefs.h b/uts/common/sys/sysevent/eventdefs.h
index 5a75c5d84460..3ed9bb298018 100644
--- a/uts/common/sys/sysevent/eventdefs.h
+++ b/uts/common/sys/sysevent/eventdefs.h
@@ -20,7 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_SYSEVENT_EVENTDEFS_H
@@ -257,7 +256,6 @@ extern "C" {
#define ESC_ZFS_SCRUB_FINISH "ESC_ZFS_scrub_finish"
#define ESC_ZFS_VDEV_SPARE "ESC_ZFS_vdev_spare"
#define ESC_ZFS_BOOTFS_VDEV_ATTACH "ESC_ZFS_bootfs_vdev_attach"
-#define ESC_ZFS_POOL_REGUID "ESC_ZFS_pool_reguid"
/*
* datalink subclass definitions.
diff --git a/uts/common/sys/sysmacros.h b/uts/common/sys/sysmacros.h
index 71042eba85ae..89a672db2f8c 100644
--- a/uts/common/sys/sysmacros.h
+++ b/uts/common/sys/sysmacros.h
@@ -25,8 +25,6 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- *
- * Copyright 2011, 2012 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_SYSMACROS_H
@@ -366,18 +364,12 @@ extern unsigned char bcd_to_byte[256];
#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
#endif /* _BIT_FIELDS_LTOH */
+#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
+
/* avoid any possibility of clashing with <stddef.h> version */
-#if defined(_KERNEL) && !defined(_KMEMUSER)
-#if !defined(offsetof)
#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
-#endif /* !offsetof */
-
-#define container_of(m, s, name) \
- (void *)((uintptr_t)(m) - (uintptr_t)offsetof(s, name))
-
-#define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0]))
-#endif /* _KERNEL, !_KMEMUSER */
+#endif
#ifdef __cplusplus
}
diff --git a/uts/common/zmod/crc32.c b/uts/common/zmod/crc32.c
new file mode 100644
index 000000000000..61ad581ef562
--- /dev/null
+++ b/uts/common/zmod/crc32.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors. This results in about a
+ * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+ protection on the static variables used to control the first-use generation
+ of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+ first call get_crc_table() to initialize the tables before allowing more than
+ one thread to use crc32().
+ */
+
+#ifdef MAKECRCH
+# include <stdio.h>
+# ifndef DYNAMIC_CRC_TABLE
+# define DYNAMIC_CRC_TABLE
+# endif /* !DYNAMIC_CRC_TABLE */
+#endif /* MAKECRCH */
+
+#include "zutil.h" /* for STDC and FAR definitions */
+
+#define local static
+
+/* Find a four-byte integer type for crc32_little() and crc32_big(). */
+#ifndef NOBYFOUR
+# ifdef STDC /* need ANSI C limits.h to determine sizes */
+# include <limits.h>
+# define BYFOUR
+# if (UINT_MAX == 0xffffffffUL)
+ typedef unsigned int u4;
+# else
+# if (ULONG_MAX == 0xffffffffUL)
+ typedef unsigned long u4;
+# else
+# if (USHRT_MAX == 0xffffffffUL)
+ typedef unsigned short u4;
+# else
+# undef BYFOUR /* can't find a four-byte integer type! */
+# endif
+# endif
+# endif
+# endif /* STDC */
+#endif /* !NOBYFOUR */
+
+/* Definitions for doing the crc four data bytes at a time. */
+#ifdef BYFOUR
+# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
+ (((w)&0xff00)<<8)+(((w)&0xff)<<24))
+ local unsigned long crc32_little OF((unsigned long,
+ const unsigned char FAR *, unsigned));
+ local unsigned long crc32_big OF((unsigned long,
+ const unsigned char FAR *, unsigned));
+# define TBLS 8
+#else
+# define TBLS 1
+#endif /* BYFOUR */
+
+/* Local functions for crc concatenation */
+local unsigned long gf2_matrix_times OF((unsigned long *mat,
+ unsigned long vec));
+local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local volatile int crc_table_empty = 1;
+local unsigned long FAR crc_table[TBLS][256];
+local void make_crc_table OF((void));
+#ifdef MAKECRCH
+ local void write_table OF((FILE *, const unsigned long FAR *));
+#endif /* MAKECRCH */
+/*
+ Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+ x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+ Polynomials over GF(2) are represented in binary, one bit per coefficient,
+ with the lowest powers in the most significant bit. Then adding polynomials
+ is just exclusive-or, and multiplying a polynomial by x is a right shift by
+ one. If we call the above polynomial p, and represent a byte as the
+ polynomial q, also with the lowest power in the most significant bit (so the
+ byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+ where a mod b means the remainder after dividing a by b.
+
+ This calculation is done using the shift-register method of multiplying and
+ taking the remainder. The register is initialized to zero, and for each
+ incoming bit, x^32 is added mod p to the register if the bit is a one (where
+ x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+ x (which is shifting right by one and adding x^32 mod p if the bit shifted
+ out is a one). We start with the highest power (least significant bit) of
+ q and repeat for all eight bits of q.
+
+ The first table is simply the CRC of all possible eight bit values. This is
+ all the information needed to generate CRCs on data a byte at a time for all
+ combinations of CRC register values and incoming bytes. The remaining tables
+ allow for word-at-a-time CRC calculation for both big-endian and little-
+ endian machines, where a word is four bytes.
+*/
+local void make_crc_table()
+{
+ unsigned long c;
+ int n, k;
+ unsigned long poly; /* polynomial exclusive-or pattern */
+ /* terms of polynomial defining this crc (except x^32): */
+ static volatile int first = 1; /* flag to limit concurrent making */
+ static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+ /* See if another task is already doing this (not thread-safe, but better
+ than nothing -- significantly reduces duration of vulnerability in
+ case the advice about DYNAMIC_CRC_TABLE is ignored) */
+ if (first) {
+ first = 0;
+
+ /* make exclusive-or pattern from polynomial (0xedb88320UL) */
+ poly = 0UL;
+ for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
+ poly |= 1UL << (31 - p[n]);
+
+ /* generate a crc for every 8-bit value */
+ for (n = 0; n < 256; n++) {
+ c = (unsigned long)n;
+ for (k = 0; k < 8; k++)
+ c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+ crc_table[0][n] = c;
+ }
+
+#ifdef BYFOUR
+ /* generate crc for each value followed by one, two, and three zeros,
+ and then the byte reversal of those as well as the first table */
+ for (n = 0; n < 256; n++) {
+ c = crc_table[0][n];
+ crc_table[4][n] = REV(c);
+ for (k = 1; k < 4; k++) {
+ c = crc_table[0][c & 0xff] ^ (c >> 8);
+ crc_table[k][n] = c;
+ crc_table[k + 4][n] = REV(c);
+ }
+ }
+#endif /* BYFOUR */
+
+ crc_table_empty = 0;
+ }
+ else { /* not first */
+ /* wait for the other guy to finish (not efficient, but rare) */
+ while (crc_table_empty)
+ ;
+ }
+
+#ifdef MAKECRCH
+ /* write out CRC tables to crc32.h */
+ {
+ FILE *out;
+
+ out = fopen("crc32.h", "w");
+ if (out == NULL) return;
+ fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
+ fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
+ fprintf(out, "local const unsigned long FAR ");
+ fprintf(out, "crc_table[TBLS][256] =\n{\n {\n");
+ write_table(out, crc_table[0]);
+# ifdef BYFOUR
+ fprintf(out, "#ifdef BYFOUR\n");
+ for (k = 1; k < 8; k++) {
+ fprintf(out, " },\n {\n");
+ write_table(out, crc_table[k]);
+ }
+ fprintf(out, "#endif\n");
+# endif /* BYFOUR */
+ fprintf(out, " }\n};\n");
+ fclose(out);
+ }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+local void write_table(out, table)
+ FILE *out;
+ const unsigned long FAR *table;
+{
+ int n;
+
+ for (n = 0; n < 256; n++)
+ fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n],
+ n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+}
+#endif /* MAKECRCH */
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+ */
+#include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const unsigned long FAR * ZEXPORT get_crc_table()
+{
+#ifdef DYNAMIC_CRC_TABLE
+ if (crc_table_empty)
+ make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+ return (const unsigned long FAR *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ if (buf == Z_NULL) return 0UL;
+
+#ifdef DYNAMIC_CRC_TABLE
+ if (crc_table_empty)
+ make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+#ifdef BYFOUR
+ if (sizeof(void *) == sizeof(ptrdiff_t)) {
+ u4 endian;
+
+ endian = 1;
+ if (*((unsigned char *)(&endian)))
+ return crc32_little(crc, buf, len);
+ else
+ return crc32_big(crc, buf, len);
+ }
+#endif /* BYFOUR */
+ crc = crc ^ 0xffffffffUL;
+ while (len >= 8) {
+ DO8;
+ len -= 8;
+ }
+ if (len) do {
+ DO1;
+ } while (--len);
+ return crc ^ 0xffffffffUL;
+}
+
+#ifdef BYFOUR
+
+/* ========================================================================= */
+#define DOLIT4 c ^= *buf4++; \
+ c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+ crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+
+/* ========================================================================= */
+local unsigned long crc32_little(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ register u4 c;
+ register const u4 FAR *buf4;
+
+ c = (u4)crc;
+ c = ~c;
+ while (len && ((ptrdiff_t)buf & 3)) {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ len--;
+ }
+
+ buf4 = (const u4 FAR *)(const void FAR *)buf;
+ while (len >= 32) {
+ DOLIT32;
+ len -= 32;
+ }
+ while (len >= 4) {
+ DOLIT4;
+ len -= 4;
+ }
+ buf = (const unsigned char FAR *)buf4;
+
+ if (len) do {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ } while (--len);
+ c = ~c;
+ return (unsigned long)c;
+}
+
+/* ========================================================================= */
+#define DOBIG4 c ^= *++buf4; \
+ c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+ crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+
+/* ========================================================================= */
+local unsigned long crc32_big(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ register u4 c;
+ register const u4 FAR *buf4;
+
+ c = REV((u4)crc);
+ c = ~c;
+ while (len && ((ptrdiff_t)buf & 3)) {
+ c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+ len--;
+ }
+
+ buf4 = (const u4 FAR *)(const void FAR *)buf;
+ buf4--;
+ while (len >= 32) {
+ DOBIG32;
+ len -= 32;
+ }
+ while (len >= 4) {
+ DOBIG4;
+ len -= 4;
+ }
+ buf4++;
+ buf = (const unsigned char FAR *)buf4;
+
+ if (len) do {
+ c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+ } while (--len);
+ c = ~c;
+ return (unsigned long)(REV(c));
+}
+
+#endif /* BYFOUR */
+
+#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
+
+/* ========================================================================= */
+local unsigned long gf2_matrix_times(mat, vec)
+ unsigned long *mat;
+ unsigned long vec;
+{
+ unsigned long sum;
+
+ sum = 0;
+ while (vec) {
+ if (vec & 1)
+ sum ^= *mat;
+ vec >>= 1;
+ mat++;
+ }
+ return sum;
+}
+
+/* ========================================================================= */
+local void gf2_matrix_square(square, mat)
+ unsigned long *square;
+ unsigned long *mat;
+{
+ int n;
+
+ for (n = 0; n < GF2_DIM; n++)
+ square[n] = gf2_matrix_times(mat, mat[n]);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine(crc1, crc2, len2)
+ uLong crc1;
+ uLong crc2;
+ z_off_t len2;
+{
+ int n;
+ unsigned long row;
+ unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */
+ unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */
+
+ /* degenerate case */
+ if (len2 == 0)
+ return crc1;
+
+ /* put operator for one zero bit in odd */
+ odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
+ row = 1;
+ for (n = 1; n < GF2_DIM; n++) {
+ odd[n] = row;
+ row <<= 1;
+ }
+
+ /* put operator for two zero bits in even */
+ gf2_matrix_square(even, odd);
+
+ /* put operator for four zero bits in odd */
+ gf2_matrix_square(odd, even);
+
+ /* apply len2 zeros to crc1 (first square will put the operator for one
+ zero byte, eight zero bits, in even) */
+ do {
+ /* apply zeros operator for this bit of len2 */
+ gf2_matrix_square(even, odd);
+ if (len2 & 1)
+ crc1 = gf2_matrix_times(even, crc1);
+ len2 >>= 1;
+
+ /* if no more bits set, then done */
+ if (len2 == 0)
+ break;
+
+ /* another iteration of the loop with odd and even swapped */
+ gf2_matrix_square(odd, even);
+ if (len2 & 1)
+ crc1 = gf2_matrix_times(odd, crc1);
+ len2 >>= 1;
+
+ /* if no more bits set, then done */
+ } while (len2 != 0);
+
+ /* return combined crc */
+ crc1 ^= crc2;
+ return crc1;
+}