diff options
103 files changed, 3221 insertions, 8057 deletions
diff --git a/OPENSOLARIS.LICENSE b/OPENSOLARIS.LICENSE new file mode 100644 index 000000000000..da23621dc843 --- /dev/null +++ b/OPENSOLARIS.LICENSE @@ -0,0 +1,384 @@ +Unless otherwise noted, all files in this distribution are released +under the Common Development and Distribution License (CDDL). +Exceptions are noted within the associated source files. + +-------------------------------------------------------------------- + + +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0 + +1. Definitions. + + 1.1. "Contributor" means each individual or entity that creates + or contributes to the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Software, prior Modifications used by a Contributor (if any), + and the Modifications made by that particular Contributor. + + 1.3. "Covered Software" means (a) the Original Software, or (b) + Modifications, or (c) the combination of files containing + Original Software with files containing Modifications, in + each case including portions thereof. + + 1.4. "Executable" means the Covered Software in any form other + than Source Code. + + 1.5. "Initial Developer" means the individual or entity that first + makes Original Software available under this License. + + 1.6. "Larger Work" means a work which combines Covered Software or + portions thereof with code not governed by the terms of this + License. + + 1.7. "License" means this document. + + 1.8. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed + herein. + + 1.9. "Modifications" means the Source Code and Executable form of + any of the following: + + A. Any file that results from an addition to, deletion from or + modification of the contents of a file containing Original + Software or previous Modifications; + + B. Any new file that contains any part of the Original + Software or previous Modifications; or + + C. Any new file that is contributed or otherwise made + available under the terms of this License. + + 1.10. "Original Software" means the Source Code and Executable + form of computer software code that is originally released + under this License. + + 1.11. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, + process, and apparatus claims, in any patent Licensable by + grantor. + + 1.12. "Source Code" means (a) the common form of computer software + code in which modifications are made and (b) associated + documentation included in or with such code. + + 1.13. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms + of, this License. For legal entities, "You" includes any + entity which controls, is controlled by, or is under common + control with You. For purposes of this definition, + "control" means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty + percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, the Initial + Developer hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer, to use, + reproduce, modify, display, perform, sublicense and + distribute the Original Software (or portions thereof), + with or without Modifications, and/or as part of a Larger + Work; and + + (b) under Patent Claims infringed by the making, using or + selling of Original Software, to make, have made, use, + practice, sell, and offer for sale, and/or otherwise + dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are + effective on the date Initial Developer first distributes + or otherwise makes the Original Software available to a + third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: (1) for code that You delete from the Original + Software, or (2) for infringements caused by: (i) the + modification of the Original Software, or (ii) the + combination of the Original Software with other software + or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, each + Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor to use, reproduce, + modify, display, perform, sublicense and distribute the + Modifications created by such Contributor (or portions + thereof), either on an unmodified basis, with other + Modifications, as Covered Software and/or as part of a + Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either + alone and/or in combination with its Contributor Version + (or portions of such combination), to make, use, sell, + offer for sale, have made, and/or otherwise dispose of: + (1) Modifications made by that Contributor (or portions + thereof); and (2) the combination of Modifications made by + that Contributor with its Contributor Version (or portions + of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first distributes or + otherwise makes the Modifications available to a third + party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: (1) for any code that Contributor has deleted + from the Contributor Version; (2) for infringements caused + by: (i) third party modifications of Contributor Version, + or (ii) the combination of Modifications made by that + Contributor with other software (except as part of the + Contributor Version) or other devices; or (3) under Patent + Claims infringed by Covered Software in the absence of + Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + + Any Covered Software that You distribute or otherwise make + available in Executable form must also be made available in Source + Code form and that Source Code form must be distributed only under + the terms of this License. You must include a copy of this + License with every copy of the Source Code form of the Covered + Software You distribute or otherwise make available. You must + inform recipients of any such Covered Software in Executable form + as to how they can obtain such Covered Software in Source Code + form in a reasonable manner on or through a medium customarily + used for software exchange. + + 3.2. Modifications. + + The Modifications that You create or to which You contribute are + governed by the terms of this License. You represent that You + believe Your Modifications are Your original creation(s) and/or + You have sufficient rights to grant the rights conveyed by this + License. + + 3.3. Required Notices. + + You must include a notice in each of Your Modifications that + identifies You as the Contributor of the Modification. You may + not remove or alter any copyright, patent or trademark notices + contained within the Covered Software, or any notices of licensing + or any descriptive text giving attribution to any Contributor or + the Initial Developer. + + 3.4. Application of Additional Terms. + + You may not offer or impose any terms on any Covered Software in + Source Code form that alters or restricts the applicable version + of this License or the recipients' rights hereunder. You may + choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of + Covered Software. However, you may do so only on Your own behalf, + and not on behalf of the Initial Developer or any Contributor. + You must make it absolutely clear that any such warranty, support, + indemnity or liability obligation is offered by You alone, and You + hereby agree to indemnify the Initial Developer and every + Contributor for any liability incurred by the Initial Developer or + such Contributor as a result of warranty, support, indemnity or + liability terms You offer. + + 3.5. Distribution of Executable Versions. + + You may distribute the Executable form of the Covered Software + under the terms of this License or under the terms of a license of + Your choice, which may contain terms different from this License, + provided that You are in compliance with the terms of this License + and that the license for the Executable form does not attempt to + limit or alter the recipient's rights in the Source Code form from + the rights set forth in this License. If You distribute the + Covered Software in Executable form under a different license, You + must make it absolutely clear that any terms which differ from + this License are offered by You alone, not by the Initial + Developer or Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred + by the Initial Developer or such Contributor as a result of any + such terms You offer. + + 3.6. Larger Works. + + You may create a Larger Work by combining Covered Software with + other code not governed by the terms of this License and + distribute the Larger Work as a single product. In such a case, + You must make sure the requirements of this License are fulfilled + for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + + Sun Microsystems, Inc. is the initial license steward and may + publish revised and/or new versions of this License from time to + time. Each version will be given a distinguishing version number. + Except as provided in Section 4.3, no one other than the license + steward has the right to modify this License. + + 4.2. Effect of New Versions. + + You may always continue to use, distribute or otherwise make the + Covered Software available under the terms of the version of the + License under which You originally received the Covered Software. + If the Initial Developer includes a notice in the Original + Software prohibiting it from being distributed or otherwise made + available under any subsequent version of the License, You must + distribute and make the Covered Software available under the terms + of the version of the License under which You originally received + the Covered Software. Otherwise, You may also choose to use, + distribute or otherwise make the Covered Software available under + the terms of any subsequent version of the License published by + the license steward. + + 4.3. Modified Versions. + + When You are an Initial Developer and You want to create a new + license for Your Original Software, You may create and use a + modified version of this License if You: (a) rename the license + and remove any references to the name of the license steward + (except to note that the license differs from this License); and + (b) otherwise make it clear that the license contains terms which + differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" + BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED + SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR + PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY + COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE + INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS + DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to + cure such breach within 30 days of becoming aware of the breach. + Provisions which, by their nature, must remain in effect beyond + the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding + declaratory judgment actions) against Initial Developer or a + Contributor (the Initial Developer or Contributor against whom You + assert such claim is referred to as "Participant") alleging that + the Participant Software (meaning the Contributor Version where + the Participant is a Contributor or the Original Software where + the Participant is the Initial Developer) directly or indirectly + infringes any patent, then any and all rights granted directly or + indirectly to You by such Participant, the Initial Developer (if + the Initial Developer is not the Participant) and all Contributors + under Sections 2.1 and/or 2.2 of this License shall, upon 60 days + notice from Participant terminate prospectively and automatically + at the expiration of such 60 day notice period, unless if within + such 60 day period You withdraw Your claim with respect to the + Participant Software against such Participant either unilaterally + or pursuant to a written agreement with Participant. + + 6.3. In the event of termination under Sections 6.1 or 6.2 above, + all end user licenses that have been validly granted by You or any + distributor hereunder prior to termination (excluding licenses + granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE + INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF + COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE + LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR + CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT + LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK + STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL + INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT + APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO + NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR + CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT + APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a "commercial item," as that term is + defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial + computer software" (as that term is defined at 48 + C.F.R. 252.227-7014(a)(1)) and "commercial computer software + documentation" as such terms are used in 48 C.F.R. 12.212 + (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 + C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all + U.S. Government End Users acquire Covered Software with only those + rights set forth herein. This U.S. Government Rights clause is in + lieu of, and supersedes, any other FAR, DFAR, or other clause or + provision that addresses Government rights in computer software + under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed + by the law of the jurisdiction specified in a notice contained + within the Original Software (except to the extent applicable law, + if any, provides otherwise), excluding such jurisdiction's + conflict-of-law provisions. Any litigation relating to this + License shall be subject to the jurisdiction of the courts located + in the jurisdiction and venue specified in a notice contained + within the Original Software, with the losing party responsible + for costs, including, without limitation, court costs and + reasonable attorneys' fees and expenses. The application of the + United Nations Convention on Contracts for the International Sale + of Goods is expressly excluded. Any law or regulation which + provides that the language of a contract shall be construed + against the drafter shall not apply to this License. You agree + that You alone are responsible for compliance with the United + States export administration regulations (and the export control + laws and regulation of any other countries) when You use, + distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or + indirectly, out of its utilization of rights under this License + and You agree to work with Initial Developer and Contributors to + distribute such responsibility on an equitable basis. Nothing + herein is intended or shall be deemed to constitute any admission + of liability. + +-------------------------------------------------------------------- + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND +DISTRIBUTION LICENSE (CDDL) + +For Covered Software in this distribution, this License shall +be governed by the laws of the State of California (excluding +conflict-of-law provisions). + +Any litigation relating to this License shall be subject to the +jurisdiction of the Federal Courts of the Northern District of +California and the state courts of the State of California, with +venue lying in Santa Clara County, California. diff --git a/common/acl/acl_common.c b/common/acl/acl_common.c index 494c5f73f4b7..eafc47d10f2d 100644 --- a/common/acl/acl_common.c +++ b/common/acl/acl_common.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/types.h> @@ -373,7 +372,7 @@ access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow) * by nfsace, assuming aclent_t -> nfsace semantics. */ static uint32_t -mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow) +mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow) { uint32_t access = 0; int haswriteperm = 0; @@ -416,7 +415,7 @@ mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow) access |= ACE_DELETE_CHILD; } /* exec */ - if (mode & S_IXOTH) { + if (mode & 01) { access |= ACE_EXECUTE; } @@ -667,7 +666,7 @@ out: } static int -convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir, +convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir, ace_t **retacep, int *retacecnt) { ace_t *acep; @@ -693,7 +692,7 @@ convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir, dfaclcnt = aclcnt - i; } - if (dfaclcnt && !isdir) { + if (dfaclcnt && isdir == 0) { return (EINVAL); } @@ -731,7 +730,7 @@ convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir, } static int -ace_mask_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) +ace_mask_to_mode(uint32_t mask, o_mode_t *modep, int isdir) { int error = 0; o_mode_t mode = 0; @@ -1028,7 +1027,7 @@ out: } static int -ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) +ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir) { /* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */ if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) != @@ -1041,7 +1040,7 @@ ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) static int acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list, - uid_t owner, gid_t group, boolean_t isdir) + uid_t owner, gid_t group, int isdir) { int error; uint32_t flips = ACE_POSIX_SUPPORTED_BITS; @@ -1081,7 +1080,7 @@ out: static int ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt, - uid_t owner, gid_t group, boolean_t isdir) + uid_t owner, gid_t group, int isdir) { int error = 0; aclent_t *aent, *result = NULL; @@ -1261,7 +1260,7 @@ acevals_compare(const void *va, const void *vb) static int ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group, aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt, - boolean_t isdir) + int isdir) { int error = 0; ace_t *acep; @@ -1456,7 +1455,7 @@ out: } static int -convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir, +convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir, uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt) { int error = 0; @@ -1498,7 +1497,7 @@ convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir, int -acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, uid_t owner, +acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner, gid_t group) { int aclcnt; @@ -1569,105 +1568,101 @@ out: } void -acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks) +acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone) { - uint32_t read_mask = ACE_READ_DATA; - uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA; - uint32_t execute_mask = ACE_EXECUTE; + *deny1 = *deny2 = *allow0 = *group = 0; - (void) isdir; /* will need this later */ - - masks->deny1 = 0; if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH))) - masks->deny1 |= read_mask; + *deny1 |= ACE_READ_DATA; if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH))) - masks->deny1 |= write_mask; + *deny1 |= ACE_WRITE_DATA; if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH))) - masks->deny1 |= execute_mask; + *deny1 |= ACE_EXECUTE; - masks->deny2 = 0; if (!(mode & S_IRGRP) && (mode & S_IROTH)) - masks->deny2 |= read_mask; + *deny2 = ACE_READ_DATA; if (!(mode & S_IWGRP) && (mode & S_IWOTH)) - masks->deny2 |= write_mask; + *deny2 |= ACE_WRITE_DATA; if (!(mode & S_IXGRP) && (mode & S_IXOTH)) - masks->deny2 |= execute_mask; + *deny2 |= ACE_EXECUTE; - masks->allow0 = 0; if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH))) - masks->allow0 |= read_mask; + *allow0 |= ACE_READ_DATA; if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH))) - masks->allow0 |= write_mask; + *allow0 |= ACE_WRITE_DATA; if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH))) - masks->allow0 |= execute_mask; + *allow0 |= ACE_EXECUTE; - masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| + *owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE; if (mode & S_IRUSR) - masks->owner |= read_mask; + *owner |= ACE_READ_DATA; if (mode & S_IWUSR) - masks->owner |= write_mask; + *owner |= ACE_WRITE_DATA|ACE_APPEND_DATA; if (mode & S_IXUSR) - masks->owner |= execute_mask; + *owner |= ACE_EXECUTE; - masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS| + *group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| ACE_SYNCHRONIZE; if (mode & S_IRGRP) - masks->group |= read_mask; + *group |= ACE_READ_DATA; if (mode & S_IWGRP) - masks->group |= write_mask; + *group |= ACE_WRITE_DATA|ACE_APPEND_DATA; if (mode & S_IXGRP) - masks->group |= execute_mask; + *group |= ACE_EXECUTE; - masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS| + *everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| ACE_SYNCHRONIZE; if (mode & S_IROTH) - masks->everyone |= read_mask; + *everyone |= ACE_READ_DATA; if (mode & S_IWOTH) - masks->everyone |= write_mask; + *everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA; if (mode & S_IXOTH) - masks->everyone |= execute_mask; + *everyone |= ACE_EXECUTE; } int -acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count) +acl_trivial_create(mode_t mode, ace_t **acl, int *count) { + uint32_t deny1, deny2; + uint32_t allow0; + uint32_t owner, group, everyone; int index = 0; int error; - trivial_acl_t masks; *count = 3; - acl_trivial_access_masks(mode, isdir, &masks); + acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group, + &everyone); - if (masks.allow0) + if (allow0) (*count)++; - if (masks.deny1) + if (deny1) (*count)++; - if (masks.deny2) + if (deny2) (*count)++; if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0) return (error); - if (masks.allow0) { - SET_ACE(acl, index, -1, masks.allow0, - ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER); + if (allow0) { + SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_OWNER); } - if (masks.deny1) { - SET_ACE(acl, index, -1, masks.deny1, - ACE_ACCESS_DENIED_ACE_TYPE, ACE_OWNER); + if (deny1) { + SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE, + ACE_OWNER); } - if (masks.deny2) { - SET_ACE(acl, index, -1, masks.deny2, - ACE_ACCESS_DENIED_ACE_TYPE, ACE_GROUP|ACE_IDENTIFIER_GROUP); + if (deny2) { + SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE, + ACE_GROUP|ACE_IDENTIFIER_GROUP); } - SET_ACE(acl, index, -1, masks.owner, ACE_ACCESS_ALLOWED_ACE_TYPE, - ACE_OWNER); - SET_ACE(acl, index, -1, masks.group, ACE_ACCESS_ALLOWED_ACE_TYPE, + SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER); + SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_IDENTIFIER_GROUP|ACE_GROUP); - SET_ACE(acl, index, -1, masks.everyone, ACE_ACCESS_ALLOWED_ACE_TYPE, + SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_EVERYONE); return (0); diff --git a/common/acl/acl_common.h b/common/acl/acl_common.h index be4fd0c9e1d3..f76cbd3b450f 100644 --- a/common/acl/acl_common.h +++ b/common/acl/acl_common.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _ACL_COMMON_H @@ -34,14 +33,7 @@ extern "C" { #endif -typedef struct trivial_acl { - uint32_t allow0; /* allow mask for bits only in owner */ - uint32_t deny1; /* deny mask for bits not in owner */ - uint32_t deny2; /* deny mask for bits not in group */ - uint32_t owner; /* allow mask matching mode */ - uint32_t group; /* allow mask matching mode */ - uint32_t everyone; /* allow mask matching mode */ -} trivial_acl_t; +extern ace_t trivial_acl[6]; extern int acltrivial(const char *); extern void adjust_ace_pair(ace_t *pair, mode_t mode); @@ -52,13 +44,13 @@ extern int ace_trivial_common(void *, int, uint32_t *mask)); extern acl_t *acl_alloc(acl_type_t); extern void acl_free(acl_t *aclp); -extern int acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, - uid_t owner, gid_t group); +extern int acl_translate(acl_t *aclp, int target_flavor, + int isdir, uid_t owner, gid_t group); void ksort(caddr_t v, int n, int s, int (*f)()); int cmp2acls(void *a, void *b); -int acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count); -void acl_trivial_access_masks(mode_t mode, boolean_t isdir, - trivial_acl_t *masks); +int acl_trivial_create(mode_t mode, ace_t **acl, int *count); +void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone); #ifdef __cplusplus } diff --git a/common/list/list.c b/common/list/list.c new file mode 100644 index 000000000000..94f7782a87d2 --- /dev/null +++ b/common/list/list.c @@ -0,0 +1,251 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Generic doubly-linked list implementation + */ + +#include <sys/list.h> +#include <sys/list_impl.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#ifdef _KERNEL +#include <sys/debug.h> +#else +#include <assert.h> +#define ASSERT(a) assert(a) +#endif + +#ifdef lint +extern list_node_t *list_d2l(list_t *list, void *obj); +#else +#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset)) +#endif +#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) +#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head) + +#define list_insert_after_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_prev = (node); \ + lnew->list_next = (node)->list_next; \ + (node)->list_next->list_prev = lnew; \ + (node)->list_next = lnew; \ +} + +#define list_insert_before_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_next = (node); \ + lnew->list_prev = (node)->list_prev; \ + (node)->list_prev->list_next = lnew; \ + (node)->list_prev = lnew; \ +} + +#define list_remove_node(node) \ + (node)->list_prev->list_next = (node)->list_next; \ + (node)->list_next->list_prev = (node)->list_prev; \ + (node)->list_next = (node)->list_prev = NULL + +void +list_create(list_t *list, size_t size, size_t offset) +{ + ASSERT(list); + ASSERT(size > 0); + ASSERT(size >= offset + sizeof (list_node_t)); + + list->list_size = size; + list->list_offset = offset; + list->list_head.list_next = list->list_head.list_prev = + &list->list_head; +} + +void +list_destroy(list_t *list) +{ + list_node_t *node = &list->list_head; + + ASSERT(list); + ASSERT(list->list_head.list_next == node); + ASSERT(list->list_head.list_prev == node); + + node->list_next = node->list_prev = NULL; +} + +void +list_insert_after(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_head(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_after_node(list, lold, nobject); + } +} + +void +list_insert_before(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_tail(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_before_node(list, lold, nobject); + } +} + +void +list_insert_head(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_after_node(list, lold, object); +} + +void +list_insert_tail(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_before_node(list, lold, object); +} + +void +list_remove(list_t *list, void *object) +{ + list_node_t *lold = list_d2l(list, object); + ASSERT(!list_empty(list)); + ASSERT(lold->list_next != NULL); + list_remove_node(lold); +} + +void * +list_remove_head(list_t *list) +{ + list_node_t *head = list->list_head.list_next; + if (head == &list->list_head) + return (NULL); + list_remove_node(head); + return (list_object(list, head)); +} + +void * +list_remove_tail(list_t *list) +{ + list_node_t *tail = list->list_head.list_prev; + if (tail == &list->list_head) + return (NULL); + list_remove_node(tail); + return (list_object(list, tail)); +} + +void * +list_head(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_next)); +} + +void * +list_tail(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_prev)); +} + +void * +list_next(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_next != &list->list_head) + return (list_object(list, node->list_next)); + + return (NULL); +} + +void * +list_prev(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_prev != &list->list_head) + return (list_object(list, node->list_prev)); + + return (NULL); +} + +/* + * Insert src list after dst list. Empty src list thereafter. + */ +void +list_move_tail(list_t *dst, list_t *src) +{ + list_node_t *dstnode = &dst->list_head; + list_node_t *srcnode = &src->list_head; + + ASSERT(dst->list_size == src->list_size); + ASSERT(dst->list_offset == src->list_offset); + + if (list_empty(src)) + return; + + dstnode->list_prev->list_next = srcnode->list_next; + srcnode->list_next->list_prev = dstnode->list_prev; + dstnode->list_prev = srcnode->list_prev; + srcnode->list_prev->list_next = dstnode; + + /* empty src list */ + srcnode->list_next = srcnode->list_prev = srcnode; +} + +void +list_link_replace(list_node_t *lold, list_node_t *lnew) +{ + ASSERT(list_link_active(lold)); + ASSERT(!list_link_active(lnew)); + + lnew->list_next = lold->list_next; + lnew->list_prev = lold->list_prev; + lold->list_prev->list_next = lnew; + lold->list_next->list_prev = lnew; + lold->list_next = lold->list_prev = NULL; +} + +void +list_link_init(list_node_t *link) +{ + link->list_next = NULL; + link->list_prev = NULL; +} + +int +list_link_active(list_node_t *link) +{ + return (link->list_next != NULL); +} + +int +list_is_empty(list_t *list) +{ + return (list_empty(list)); +} diff --git a/common/nvpair/fnvpair.c b/common/nvpair/fnvpair.c deleted file mode 100644 index 8d1bb98be32c..000000000000 --- a/common/nvpair/fnvpair.c +++ /dev/null @@ -1,496 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#include <sys/nvpair.h> -#include <sys/kmem.h> -#include <sys/debug.h> -#ifndef _KERNEL -#include <stdlib.h> -#endif - -/* - * "Force" nvlist wrapper. - * - * These functions wrap the nvlist_* functions with assertions that assume - * the operation is successful. This allows the caller's code to be much - * more readable, especially for the fnvlist_lookup_* and fnvpair_value_* - * functions, which can return the requested value (rather than filling in - * a pointer). - * - * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate - * with KM_SLEEP. - * - * More wrappers should be added as needed -- for example - * nvlist_lookup_*_array and nvpair_value_*_array. - */ - -nvlist_t * -fnvlist_alloc(void) -{ - nvlist_t *nvl; - VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0); - return (nvl); -} - -void -fnvlist_free(nvlist_t *nvl) -{ - nvlist_free(nvl); -} - -size_t -fnvlist_size(nvlist_t *nvl) -{ - size_t size; - VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0); - return (size); -} - -/* - * Returns allocated buffer of size *sizep. Caller must free the buffer with - * fnvlist_pack_free(). - */ -char * -fnvlist_pack(nvlist_t *nvl, size_t *sizep) -{ - char *packed = 0; - VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE, - KM_SLEEP), ==, 0); - return (packed); -} - -/*ARGSUSED*/ -void -fnvlist_pack_free(char *pack, size_t size) -{ -#ifdef _KERNEL - kmem_free(pack, size); -#else - free(pack); -#endif -} - -nvlist_t * -fnvlist_unpack(char *buf, size_t buflen) -{ - nvlist_t *rv; - VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0); - return (rv); -} - -nvlist_t * -fnvlist_dup(nvlist_t *nvl) -{ - nvlist_t *rv; - VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0); - return (rv); -} - -void -fnvlist_merge(nvlist_t *dst, nvlist_t *src) -{ - VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0); -} - -void -fnvlist_add_boolean(nvlist_t *nvl, const char *name) -{ - VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0); -} - -void -fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val) -{ - VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0); -} - -void -fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val) -{ - VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0); -} - -void -fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val) -{ - VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0); -} - -void -fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val) -{ - VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0); -} - -void -fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val) -{ - VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0); -} - -void -fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val) -{ - VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0); -} - -void -fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val) -{ - VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0); -} - -void -fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val) -{ - VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0); -} - -void -fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val) -{ - VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0); -} - -void -fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val) -{ - VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0); -} - -void -fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val) -{ - VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0); -} - -void -fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) -{ - VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0); -} - -void -fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair) -{ - VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0); -} - -void -fnvlist_add_boolean_array(nvlist_t *nvl, const char *name, - boolean_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_uint16_array(nvlist_t *nvl, const char *name, - uint16_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_uint32_array(nvlist_t *nvl, const char *name, - uint32_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_uint64_array(nvlist_t *nvl, const char *name, - uint64_t *val, uint_t n) -{ - VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_string_array(nvlist_t *nvl, const char *name, - char * const *val, uint_t n) -{ - VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name, - nvlist_t **val, uint_t n) -{ - VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0); -} - -void -fnvlist_remove(nvlist_t *nvl, const char *name) -{ - VERIFY3U(nvlist_remove_all(nvl, name), ==, 0); -} - -void -fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair) -{ - VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0); -} - -nvpair_t * -fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name) -{ - nvpair_t *rv; - VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0); - return (rv); -} - -/* returns B_TRUE if the entry exists */ -boolean_t -fnvlist_lookup_boolean(nvlist_t *nvl, const char *name) -{ - return (nvlist_lookup_boolean(nvl, name) == 0); -} - -boolean_t -fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name) -{ - boolean_t rv; - VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0); - return (rv); -} - -uchar_t -fnvlist_lookup_byte(nvlist_t *nvl, const char *name) -{ - uchar_t rv; - VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0); - return (rv); -} - -int8_t -fnvlist_lookup_int8(nvlist_t *nvl, const char *name) -{ - int8_t rv; - VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0); - return (rv); -} - -int16_t -fnvlist_lookup_int16(nvlist_t *nvl, const char *name) -{ - int16_t rv; - VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0); - return (rv); -} - -int32_t -fnvlist_lookup_int32(nvlist_t *nvl, const char *name) -{ - int32_t rv; - VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0); - return (rv); -} - -int64_t -fnvlist_lookup_int64(nvlist_t *nvl, const char *name) -{ - int64_t rv; - VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0); - return (rv); -} - -uint8_t -fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name) -{ - uint8_t rv; - VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0); - return (rv); -} - -uint16_t -fnvlist_lookup_uint16(nvlist_t *nvl, const char *name) -{ - uint16_t rv; - VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0); - return (rv); -} - -uint32_t -fnvlist_lookup_uint32(nvlist_t *nvl, const char *name) -{ - uint32_t rv; - VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0); - return (rv); -} - -uint64_t -fnvlist_lookup_uint64(nvlist_t *nvl, const char *name) -{ - uint64_t rv; - VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0); - return (rv); -} - -char * -fnvlist_lookup_string(nvlist_t *nvl, const char *name) -{ - char *rv; - VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0); - return (rv); -} - -nvlist_t * -fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name) -{ - nvlist_t *rv; - VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0); - return (rv); -} - -boolean_t -fnvpair_value_boolean_value(nvpair_t *nvp) -{ - boolean_t rv; - VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0); - return (rv); -} - -uchar_t -fnvpair_value_byte(nvpair_t *nvp) -{ - uchar_t rv; - VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0); - return (rv); -} - -int8_t -fnvpair_value_int8(nvpair_t *nvp) -{ - int8_t rv; - VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0); - return (rv); -} - -int16_t -fnvpair_value_int16(nvpair_t *nvp) -{ - int16_t rv; - VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0); - return (rv); -} - -int32_t -fnvpair_value_int32(nvpair_t *nvp) -{ - int32_t rv; - VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0); - return (rv); -} - -int64_t -fnvpair_value_int64(nvpair_t *nvp) -{ - int64_t rv; - VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0); - return (rv); -} - -uint8_t -fnvpair_value_uint8_t(nvpair_t *nvp) -{ - uint8_t rv; - VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0); - return (rv); -} - -uint16_t -fnvpair_value_uint16(nvpair_t *nvp) -{ - uint16_t rv; - VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0); - return (rv); -} - -uint32_t -fnvpair_value_uint32(nvpair_t *nvp) -{ - uint32_t rv; - VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0); - return (rv); -} - -uint64_t -fnvpair_value_uint64(nvpair_t *nvp) -{ - uint64_t rv; - VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0); - return (rv); -} - -char * -fnvpair_value_string(nvpair_t *nvp) -{ - char *rv; - VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0); - return (rv); -} - -nvlist_t * -fnvpair_value_nvlist(nvpair_t *nvp) -{ - nvlist_t *rv; - VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0); - return (rv); -} diff --git a/common/zfs/zfeature_common.c b/common/zfs/zfeature_common.c deleted file mode 100644 index 9c0b67b42452..000000000000 --- a/common/zfs/zfeature_common.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#ifdef _KERNEL -#include <sys/systm.h> -#else -#include <errno.h> -#include <string.h> -#endif -#include <sys/debug.h> -#include <sys/fs/zfs.h> -#include <sys/inttypes.h> -#include <sys/types.h> -#include "zfeature_common.h" - -/* - * Set to disable all feature checks while opening pools, allowing pools with - * unsupported features to be opened. Set for testing only. - */ -boolean_t zfeature_checks_disable = B_FALSE; - -zfeature_info_t spa_feature_table[SPA_FEATURES]; - -/* - * Valid characters for feature guids. This list is mainly for aesthetic - * purposes and could be expanded in the future. There are different allowed - * characters in the guids reverse dns portion (before the colon) and its - * short name (after the colon). - */ -static int -valid_char(char c, boolean_t after_colon) -{ - return ((c >= 'a' && c <= 'z') || - (c >= '0' && c <= '9') || - c == (after_colon ? '_' : '.')); -} - -/* - * Every feature guid must contain exactly one colon which separates a reverse - * dns organization name from the feature's "short" name (e.g. - * "com.company:feature_name"). - */ -boolean_t -zfeature_is_valid_guid(const char *name) -{ - int i; - boolean_t has_colon = B_FALSE; - - i = 0; - while (name[i] != '\0') { - char c = name[i++]; - if (c == ':') { - if (has_colon) - return (B_FALSE); - has_colon = B_TRUE; - continue; - } - if (!valid_char(c, has_colon)) - return (B_FALSE); - } - - return (has_colon); -} - -boolean_t -zfeature_is_supported(const char *guid) -{ - if (zfeature_checks_disable) - return (B_TRUE); - - return (0 == zfeature_lookup_guid(guid, NULL)); -} - -int -zfeature_lookup_guid(const char *guid, zfeature_info_t **res) -{ - for (int i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t *feature = &spa_feature_table[i]; - if (strcmp(guid, feature->fi_guid) == 0) { - if (res != NULL) - *res = feature; - return (0); - } - } - - return (ENOENT); -} - -int -zfeature_lookup_name(const char *name, zfeature_info_t **res) -{ - for (int i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t *feature = &spa_feature_table[i]; - if (strcmp(name, feature->fi_uname) == 0) { - if (res != NULL) - *res = feature; - return (0); - } - } - - return (ENOENT); -} - -static void -zfeature_register(int fid, const char *guid, const char *name, const char *desc, - boolean_t readonly, boolean_t mos, zfeature_info_t **deps) -{ - zfeature_info_t *feature = &spa_feature_table[fid]; - static zfeature_info_t *nodeps[] = { NULL }; - - ASSERT(name != NULL); - ASSERT(desc != NULL); - ASSERT(!readonly || !mos); - ASSERT3U(fid, <, SPA_FEATURES); - ASSERT(zfeature_is_valid_guid(guid)); - - if (deps == NULL) - deps = nodeps; - - feature->fi_guid = guid; - feature->fi_uname = name; - feature->fi_desc = desc; - feature->fi_can_readonly = readonly; - feature->fi_mos = mos; - feature->fi_depends = deps; -} - -void -zpool_feature_init(void) -{ - zfeature_register(SPA_FEATURE_ASYNC_DESTROY, - "com.delphix:async_destroy", "async_destroy", - "Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL); -} diff --git a/common/zfs/zfeature_common.h b/common/zfs/zfeature_common.h deleted file mode 100644 index 93ba2b76f1dd..000000000000 --- a/common/zfs/zfeature_common.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#ifndef _ZFEATURE_COMMON_H -#define _ZFEATURE_COMMON_H - -#include <sys/fs/zfs.h> -#include <sys/inttypes.h> -#include <sys/types.h> - -#ifdef __cplusplus -extern "C" { -#endif - -struct zfeature_info; - -typedef struct zfeature_info { - const char *fi_uname; /* User-facing feature name */ - const char *fi_guid; /* On-disk feature identifier */ - const char *fi_desc; /* Feature description */ - boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */ - boolean_t fi_mos; /* Is the feature necessary to read the MOS? */ - struct zfeature_info **fi_depends; /* array; null terminated */ -} zfeature_info_t; - -typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg); - -#define ZFS_FEATURE_DEBUG - -enum spa_feature { - SPA_FEATURE_ASYNC_DESTROY, - SPA_FEATURES -} spa_feature_t; - -extern zfeature_info_t spa_feature_table[SPA_FEATURES]; - -extern boolean_t zfeature_is_valid_guid(const char *); - -extern boolean_t zfeature_is_supported(const char *); -extern int zfeature_lookup_guid(const char *, zfeature_info_t **res); -extern int zfeature_lookup_name(const char *, zfeature_info_t **res); - -extern void zpool_feature_init(void); - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFEATURE_COMMON_H */ diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c index 7688113e36e1..ed9b67ea3bc9 100644 --- a/common/zfs/zfs_comutil.c +++ b/common/zfs/zfs_comutil.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -158,11 +157,7 @@ zfs_spa_version_map(int zpl_version) return (version); } -/* - * This is the table of legacy internal event names; it should not be modified. - * The internal events are now stored in the history log as strings. - */ -const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = { +const char *zfs_history_event_names[LOG_END] = { "invalid event", "pool create", "vdev add", diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h index f89054388a4d..61327f9aa909 100644 --- a/common/zfs/zfs_comutil.h +++ b/common/zfs/zfs_comutil.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _ZFS_COMUTIL_H @@ -38,8 +37,7 @@ extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *); extern int zfs_zpl_version_map(int spa_version); extern int zfs_spa_version_map(int zpl_version); -#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41 -extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS]; +extern const char *zfs_history_event_names[LOG_END]; #ifdef __cplusplus } diff --git a/common/zfs/zfs_deleg.c b/common/zfs/zfs_deleg.c index 18681035d6e1..83d9edb21389 100644 --- a/common/zfs/zfs_deleg.c +++ b/common/zfs/zfs_deleg.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #if defined(_KERNEL) @@ -61,7 +60,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, {ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, - {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, + {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE }, {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, diff --git a/common/zfs/zfs_deleg.h b/common/zfs/zfs_deleg.h index 9997dffae7d0..b4cb8e2b4e37 100644 --- a/common/zfs/zfs_deleg.h +++ b/common/zfs/zfs_deleg.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #ifndef _ZFS_DELEG_H @@ -52,7 +51,6 @@ typedef enum { ZFS_DELEG_NOTE_CLONE, ZFS_DELEG_NOTE_PROMOTE, ZFS_DELEG_NOTE_RENAME, - ZFS_DELEG_NOTE_SEND, ZFS_DELEG_NOTE_RECEIVE, ZFS_DELEG_NOTE_ALLOW, ZFS_DELEG_NOTE_USERPROP, diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c index 5d45361eeaca..f29bcf62718f 100644 --- a/common/zfs/zfs_prop.c +++ b/common/zfs/zfs_prop.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -105,13 +104,6 @@ zfs_prop_init(void) { NULL } }; - static zprop_index_t acl_mode_table[] = { - { "discard", ZFS_ACL_DISCARD }, - { "groupmask", ZFS_ACL_GROUPMASK }, - { "passthrough", ZFS_ACL_PASSTHROUGH }, - { NULL } - }; - static zprop_index_t acl_inherit_table[] = { { "discard", ZFS_ACL_DISCARD }, { "noallow", ZFS_ACL_NOALLOW }, @@ -215,9 +207,6 @@ zfs_prop_init(void) zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "hidden | visible", "SNAPDIR", snapdir_table); - zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "discard | groupmask | passthrough", "ACLMODE", acl_mode_table); zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "discard | noallow | restricted | passthrough | passthrough-x", @@ -267,7 +256,7 @@ zfs_prop_init(void) /* default index properties */ zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, - "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table); + "1 | 2 | 3 | 4 | current", "VERSION", version_table); zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table); @@ -297,8 +286,6 @@ zfs_prop_init(void) /* string properties */ zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN"); - zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY, - ZFS_TYPE_SNAPSHOT, "<dataset>[,...]", "CLONES"); zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none", "MOUNTPOINT"); @@ -324,9 +311,6 @@ zfs_prop_init(void) zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET, "<1.00x or higher if compressed>", "RATIO"); - zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0, - PROP_READONLY, ZFS_TYPE_DATASET, - "<1.00x or higher if compressed>", "REFRATIO"); zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); @@ -344,8 +328,6 @@ zfs_prop_init(void) ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV"); zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS"); - zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY, - ZFS_TYPE_DATASET, "<size>", "WRITTEN"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, @@ -388,6 +370,13 @@ zfs_prop_init(void) zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID"); + /* + * Property to be removed once libbe is integrated + */ + zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop", + PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM, + "PRIV_PROP"); + /* oddball properties */ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL, PROP_READONLY, ZFS_TYPE_DATASET, @@ -472,18 +461,6 @@ zfs_prop_userquota(const char *name) } /* - * Returns true if this is a valid written@ property. - * Note that after the @, any character is valid (eg, another @, for - * written@pool/fs@origin). - */ -boolean_t -zfs_prop_written(const char *name) -{ - static const char *prefix = "written@"; - return (strncmp(name, prefix, strlen(prefix)) == 0); -} - -/* * Tables of index types, plus functions to convert between the user view * (strings) and internal representation (uint64_t). */ diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c index 72db87937110..988d05de6e20 100644 --- a/common/zfs/zpool_prop.c +++ b/common/zfs/zpool_prop.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zio.h> @@ -71,20 +69,14 @@ zpool_prop_init(void) ZFS_TYPE_POOL, "<filesystem>", "BOOTFS"); zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE"); - zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL, - PROP_DEFAULT, ZFS_TYPE_POOL, "<comment-string>", "COMMENT"); /* readonly number properties */ zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, ZFS_TYPE_POOL, "<size>", "SIZE"); zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY, ZFS_TYPE_POOL, "<size>", "FREE"); - zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<size>", "FREEING"); zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0, PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC"); - zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0, - PROP_READONLY, ZFS_TYPE_POOL, "<size>", "EXPANDSZ"); zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY, ZFS_TYPE_POOL, "<size>", "CAP"); zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY, @@ -168,26 +160,6 @@ zpool_prop_default_numeric(zpool_prop_t prop) return (zpool_prop_table[prop].pd_numdefault); } -/* - * Returns true if this is a valid feature@ property. - */ -boolean_t -zpool_prop_feature(const char *name) -{ - static const char *prefix = "feature@"; - return (strncmp(name, prefix, strlen(prefix)) == 0); -} - -/* - * Returns true if this is a valid unsupported@ property. - */ -boolean_t -zpool_prop_unsupported(const char *name) -{ - static const char *prefix = "unsupported@"; - return (strncmp(name, prefix, strlen(prefix)) == 0); -} - int zpool_prop_string_to_index(zpool_prop_t prop, const char *string, uint64_t *index) diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c index 03919f0e9132..0bbf20d4f02c 100644 --- a/common/zfs/zprop_common.c +++ b/common/zfs/zprop_common.c @@ -22,9 +22,6 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ /* * Common routines used by zfs and zpool property management. @@ -132,8 +129,7 @@ zprop_register_hidden(int prop, const char *name, zprop_type_t type, zprop_attr_t attr, int objset_types, const char *colname) { zprop_register_impl(prop, name, type, 0, NULL, attr, - objset_types, NULL, colname, - type == PROP_TYPE_NUMBER, B_FALSE, NULL); + objset_types, NULL, colname, B_FALSE, B_FALSE, NULL); } diff --git a/uts/common/Makefile.files b/uts/common/Makefile.files index a2b4396f7316..ec08410b4ff3 100644 --- a/uts/common/Makefile.files +++ b/uts/common/Makefile.files @@ -21,8 +21,6 @@ # # Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2011 Nexenta Systems, Inc. All rights reserved. -# Copyright (c) 2012 by Delphix. All rights reserved. # # @@ -192,6 +190,7 @@ GENUNIX_OBJS += \ gid.o \ groups.o \ grow.o \ + hat.o \ hat_refmod.o \ id32.o \ id_space.o \ @@ -243,7 +242,6 @@ GENUNIX_OBJS += \ nvpair.o \ nvpair_alloc_system.o \ nvpair_alloc_fixed.o \ - fnvpair.o \ octet.o \ open.o \ p_online.o \ @@ -455,8 +453,6 @@ AUDIO810_OBJS += audio810.o AUDIOCMI_OBJS += audiocmi.o -AUDIOCMIHD_OBJS += audiocmihd.o - AUDIOHD_OBJS += audiohd.o AUDIOIXP_OBJS += audioixp.o @@ -502,9 +498,9 @@ MD4_OBJS += md4.o md4_mod.o MD5_OBJS += md5.o md5_mod.o -SHA1_OBJS += sha1.o sha1_mod.o +SHA1_OBJS += sha1.o sha1_mod.o fips_sha1_util.o -SHA2_OBJS += sha2.o sha2_mod.o +SHA2_OBJS += sha2.o sha2_mod.o fips_sha2_util.o IPGPC_OBJS += classifierddi.o classifier.o filters.o trie.o table.o \ ba_table.o @@ -939,7 +935,7 @@ ST_OBJS += st.o st_conf.o EMLXS_OBJS += emlxs_clock.o emlxs_dfc.o emlxs_dhchap.o emlxs_diag.o \ emlxs_download.o emlxs_dump.o emlxs_els.o emlxs_event.o \ - emlxs_fcf.o emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \ + emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \ emlxs_mbox.o emlxs_mem.o emlxs_msg.o emlxs_node.o \ emlxs_pkt.o emlxs_sli3.o emlxs_sli4.o emlxs_solaris.o \ emlxs_thread.o @@ -1087,7 +1083,7 @@ DRM_OBJS += drm_sunmod.o drm_kstat.o drm_agpsupport.o \ drm_auth.o drm_bufs.o drm_context.o drm_dma.o \ drm_drawable.o drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \ drm_lock.o drm_memory.o drm_msg.o drm_pci.o drm_scatter.o \ - drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o + drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o FM_OBJS += devfm.o devfm_machdep.o @@ -1329,7 +1325,6 @@ ZFS_COMMON_OBJS += \ arc.o \ bplist.o \ bpobj.o \ - bptree.o \ dbuf.o \ ddt.o \ ddt_zap.o \ @@ -1351,7 +1346,6 @@ ZFS_COMMON_OBJS += \ dsl_deleg.o \ dsl_prop.o \ dsl_scan.o \ - zfeature.o \ gzip.o \ lzjb.o \ metaslab.o \ @@ -1394,12 +1388,11 @@ ZFS_COMMON_OBJS += \ zrlock.o ZFS_SHARED_OBJS += \ - zfeature_common.o \ - zfs_comutil.o \ - zfs_deleg.o \ - zfs_fletcher.o \ zfs_namecheck.o \ + zfs_deleg.o \ zfs_prop.o \ + zfs_comutil.o \ + zfs_fletcher.o \ zpool_prop.o \ zprop_common.o @@ -1526,7 +1519,7 @@ KCF_OBJS += kcf.o kcf_callprov.o kcf_cbufcall.o kcf_cipher.o kcf_crypto.o \ kcf_object.o kcf_policy.o kcf_prov_lib.o kcf_prov_tabs.o \ kcf_sched.o kcf_session.o kcf_sign.o kcf_spi.o kcf_verify.o \ kcf_random.o modes.o ecb.o cbc.o ctr.o ccm.o gcm.o \ - fips_random.o + fips_random.o fips_checksum.o fips_test_vectors.o CRYPTOADM_OBJS += cryptoadm.o @@ -1537,7 +1530,7 @@ DPROV_OBJS += dprov.o DCA_OBJS += dca.o dca_3des.o dca_debug.o dca_dsa.o dca_kstat.o dca_rng.o \ dca_rsa.o -AESPROV_OBJS += aes.o aes_impl.o aes_modes.o +AESPROV_OBJS += aes.o aes_impl.o aes_modes.o fips_aes_util.o ARCFOURPROV_OBJS += arcfour.o arcfour_crypt.o @@ -1548,16 +1541,16 @@ ECCPROV_OBJS += ecc.o ec.o ec2_163.o ec2_mont.o ecdecode.o ecl_mult.o \ ecp_jm.o ec2_233.o ecl_curve.o ecp_224.o ecp_aff.o \ ecp_mont.o ec2_aff.o ec_naf.o ecl_gf.o ecp_256.o mp_gf2m.o \ mpi.o mplogic.o mpmontg.o mpprime.o oid.o \ - secitem.o ec2_test.o ecp_test.o + secitem.o ec2_test.o ecp_test.o fips_ecc_util.o -RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o +RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o fips_rsa_util.o -SWRANDPROV_OBJS += swrand.o +SWRANDPROV_OBJS += swrand.o fips_random_util.o # # kernel SSL # -KSSL_OBJS += kssl.o ksslioctl.o +KSSL_OBJS += kssl.o ksslioctl.o KSSL_SOCKFIL_MOD_OBJS += ksslfilter.o ksslapi.o ksslrec.o @@ -1671,7 +1664,7 @@ KGSS_KRB5_OBJS += krb5mech.o \ $(CRYPTO_OLD) \ $(CRYPTO_RAW) $(K5_KRB) $(K5_OS) -DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o +DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o fips_des_util.o DLBOOT_OBJS += bootparam_xdr.o nfs_dlinet.o scan.o @@ -1770,8 +1763,6 @@ BGE_OBJS += bge_main2.o bge_chip2.o bge_kstats.o bge_log.o bge_ndd.o \ DMFE_OBJS += dmfe_log.o dmfe_main.o dmfe_mii.o -EFE_OBJS += efe.o - ELXL_OBJS += elxl.o HME_OBJS += hme.o @@ -1782,8 +1773,6 @@ IXGB_OBJS += ixgb.o ixgb_atomic.o ixgb_chip.o ixgb_gld.o ixgb_kstats.o \ NGE_OBJS += nge_main.o nge_atomic.o nge_chip.o nge_ndd.o nge_kstats.o \ nge_log.o nge_rx.o nge_tx.o nge_xmii.o -PCN_OBJS += pcn.o - RGE_OBJS += rge_main.o rge_chip.o rge_ndd.o rge_kstats.o rge_log.o rge_rxtx.o URTW_OBJS += urtw.o @@ -1909,11 +1898,6 @@ IGB_OBJS = igb_82575.o igb_api.o igb_mac.o igb_manage.o \ igb_rx.o igb_stat.o igb_tx.o # -# Intel Pro/100 NIC driver module -# -IPRB_OBJS = iprb.o - -# # Intel 10GbE PCIE NIC driver module # IXGBE_OBJS = ixgbe_82598.o ixgbe_82599.o ixgbe_api.o \ @@ -1948,6 +1932,11 @@ NXGE_HCALL_OBJS = \ # KICONV_EMEA_OBJS += kiconv_emea.o +# +# blk2scsa +# +BLK2SCSA_OBJS = blk2scsa.o + KICONV_JA_OBJS += kiconv_ja.o KICONV_KO_OBJS += kiconv_cck_common.o kiconv_ko.o diff --git a/uts/common/dtrace/dcpc.c b/uts/common/dtrace/dcpc.c deleted file mode 100644 index 8fd96cc24c6c..000000000000 --- a/uts/common/dtrace/dcpc.c +++ /dev/null @@ -1,1218 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/errno.h> -#include <sys/cpuvar.h> -#include <sys/stat.h> -#include <sys/modctl.h> -#include <sys/cmn_err.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/ksynch.h> -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/kcpc.h> -#include <sys/cap_util.h> -#include <sys/cpc_pcbe.h> -#include <sys/cpc_impl.h> -#include <sys/dtrace_impl.h> - -/* - * DTrace CPU Performance Counter Provider - * --------------------------------------- - * - * The DTrace cpc provider allows DTrace consumers to access the CPU - * performance counter overflow mechanism of a CPU. The configuration - * presented in a probe specification is programmed into the performance - * counter hardware of all available CPUs on a system. Programming the - * hardware causes a counter on each CPU to begin counting events of the - * given type. When the specified number of events have occurred, an overflow - * interrupt will be generated and the probe is fired. - * - * The required configuration for the performance counter is encoded into - * the probe specification and this includes the performance counter event - * name, processor mode, overflow rate and an optional unit mask. - * - * Most processors provide several counters (PICs) which can count all or a - * subset of the events available for a given CPU. However, when overflow - * profiling is being used, not all CPUs can detect which counter generated the - * overflow interrupt. In this case we cannot reliably determine which counter - * overflowed and we therefore only allow such CPUs to configure one event at - * a time. Processors that can determine the counter which overflowed are - * allowed to program as many events at one time as possible (in theory up to - * the number of instrumentation counters supported by that platform). - * Therefore, multiple consumers can enable multiple probes at the same time - * on such platforms. Platforms which cannot determine the source of an - * overflow interrupt are only allowed to program a single event at one time. - * - * The performance counter hardware is made available to consumers on a - * first-come, first-served basis. Only a finite amount of hardware resource - * is available and, while we make every attempt to accomodate requests from - * consumers, we must deny requests when hardware resources have been exhausted. - * A consumer will fail to enable probes when resources are currently in use. - * - * The cpc provider contends for shared hardware resources along with other - * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)). - * Only one such consumer can use the performance counters at any one time and - * counters are made available on a first-come, first-served basis. As with - * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g. - * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP - * counter contexts to be invalidated. - */ - -typedef struct dcpc_probe { - char dcpc_event_name[CPC_MAX_EVENT_LEN]; - int dcpc_flag; /* flags (USER/SYS) */ - uint32_t dcpc_ovfval; /* overflow value */ - int64_t dcpc_umask; /* umask/emask for this event */ - int dcpc_picno; /* pic this event is programmed in */ - int dcpc_enabled; /* probe is actually enabled? */ - int dcpc_disabling; /* probe is currently being disabled */ - dtrace_id_t dcpc_id; /* probeid this request is enabling */ - int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */ -} dcpc_probe_t; - -static dev_info_t *dcpc_devi; -static dtrace_provider_id_t dcpc_pid; -static dcpc_probe_t **dcpc_actv_reqs; -static uint32_t dcpc_enablings = 0; -static int dcpc_ovf_mask = 0; -static int dcpc_mult_ovf_cap = 0; -static int dcpc_mask_type = 0; - -/* - * When the dcpc provider is loaded, dcpc_min_overflow is set to either - * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in - * the dcpc.conf file. Decrease this value to set probes with smaller - * overflow values. Remember that very small values could render a system - * unusable with frequently occurring events. - */ -#define DCPC_MIN_OVF_DEFAULT 5000 -static uint32_t dcpc_min_overflow; - -static int dcpc_aframes = 0; /* override for artificial frame setting */ -#if defined(__x86) -#define DCPC_ARTIFICIAL_FRAMES 8 -#elif defined(__sparc) -#define DCPC_ARTIFICIAL_FRAMES 2 -#endif - -/* - * Called from the platform overflow interrupt handler. 'bitmap' is a mask - * which contains the pic(s) that have overflowed. - */ -static void -dcpc_fire(uint64_t bitmap) -{ - int i; - - /* - * No counter was marked as overflowing. Shout about it and get out. - */ - if ((bitmap & dcpc_ovf_mask) == 0) { - cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n"); - return; - } - - /* - * This is the common case of a processor that doesn't support - * multiple overflow events. Such systems are only allowed a single - * enabling and therefore we just look for the first entry in - * the active request array. - */ - if (!dcpc_mult_ovf_cap) { - for (i = 0; i < cpc_ncounters; i++) { - if (dcpc_actv_reqs[i] != NULL) { - dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, - CPU->cpu_cpcprofile_pc, - CPU->cpu_cpcprofile_upc, 0, 0, 0); - return; - } - } - return; - } - - /* - * This is a processor capable of handling multiple overflow events. - * Iterate over the array of active requests and locate the counters - * that overflowed (note: it is possible for more than one counter to - * have overflowed at the same time). - */ - for (i = 0; i < cpc_ncounters; i++) { - if (dcpc_actv_reqs[i] != NULL && - (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) { - dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, - CPU->cpu_cpcprofile_pc, - CPU->cpu_cpcprofile_upc, 0, 0, 0); - } - } -} - -static void -dcpc_create_probe(dtrace_provider_id_t id, const char *probename, - char *eventname, int64_t umask, uint32_t ovfval, char flag) -{ - dcpc_probe_t *pp; - int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); - - if (dcpc_aframes) - nr_frames = dcpc_aframes; - - if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0) - return; - - pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP); - (void) strncpy(pp->dcpc_event_name, eventname, - sizeof (pp->dcpc_event_name) - 1); - pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0'; - pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT; - pp->dcpc_ovfval = ovfval; - pp->dcpc_umask = umask; - pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; - - pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename, - nr_frames, pp); -} - -/*ARGSUSED*/ -static void -dcpc_provide(void *arg, const dtrace_probedesc_t *desc) -{ - /* - * The format of a probe is: - * - * event_name-mode-{optional_umask}-overflow_rate - * e.g. - * DC_refill_from_system-user-0x1e-50000, or, - * DC_refill_from_system-all-10000 - * - */ - char *str, *end, *p; - int i, flag = 0; - char event[CPC_MAX_EVENT_LEN]; - long umask = -1, val = 0; - size_t evlen, len; - - /* - * The 'cpc' provider offers no probes by default. - */ - if (desc == NULL) - return; - - len = strlen(desc->dtpd_name); - p = str = kmem_alloc(len + 1, KM_SLEEP); - (void) strcpy(str, desc->dtpd_name); - - /* - * We have a poor man's strtok() going on here. Replace any hyphens - * in the the probe name with NULL characters in order to make it - * easy to parse the string with regular string functions. - */ - for (i = 0; i < len; i++) { - if (str[i] == '-') - str[i] = '\0'; - } - - /* - * The first part of the string must be either a platform event - * name or a generic event name. - */ - evlen = strlen(p); - (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1); - event[CPC_MAX_EVENT_LEN - 1] = '\0'; - - /* - * The next part of the name is the mode specification. Valid - * settings are "user", "kernel" or "all". - */ - p += evlen + 1; - - if (strcmp(p, "user") == 0) - flag |= CPC_COUNT_USER; - else if (strcmp(p, "kernel") == 0) - flag |= CPC_COUNT_SYSTEM; - else if (strcmp(p, "all") == 0) - flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM; - else - goto err; - - /* - * Next we either have a mask specification followed by an overflow - * rate or just an overflow rate on its own. - */ - p += strlen(p) + 1; - if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { - /* - * A unit mask can only be specified if: - * 1) this performance counter back end supports masks. - * 2) the specified event is platform specific. - * 3) a valid hex number is converted. - * 4) no extraneous characters follow the mask specification. - */ - if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 && - ddi_strtol(p, &end, 16, &umask) == 0 && - end == p + strlen(p)) { - p += strlen(p) + 1; - } else { - goto err; - } - } - - /* - * This final part must be an overflow value which has to be greater - * than the minimum permissible overflow rate. - */ - if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) || - val < dcpc_min_overflow) - goto err; - - /* - * Validate the event and create the probe. - */ - for (i = 0; i < cpc_ncounters; i++) { - char *events, *cp, *p, *end; - int found = 0, j; - size_t llen; - - if ((events = kcpc_list_events(i)) == NULL) - goto err; - - llen = strlen(events); - p = cp = ddi_strdup(events, KM_NOSLEEP); - end = cp + llen; - - for (j = 0; j < llen; j++) { - if (cp[j] == ',') - cp[j] = '\0'; - } - - while (p < end && found == 0) { - if (strcmp(p, event) == 0) { - dcpc_create_probe(dcpc_pid, desc->dtpd_name, - event, umask, (uint32_t)val, flag); - found = 1; - } - p += strlen(p) + 1; - } - kmem_free(cp, llen + 1); - - if (found) - break; - } - -err: - kmem_free(str, len + 1); -} - -/*ARGSUSED*/ -static void -dcpc_destroy(void *arg, dtrace_id_t id, void *parg) -{ - dcpc_probe_t *pp = parg; - - ASSERT(pp->dcpc_enabled == 0); - kmem_free(pp, sizeof (dcpc_probe_t)); -} - -/*ARGSUSED*/ -static int -dcpc_mode(void *arg, dtrace_id_t id, void *parg) -{ - if (CPU->cpu_cpcprofile_pc == 0) { - return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_USER); - } else { - return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_KERNEL); - } -} - -static void -dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno) -{ - kcpc_set_t *oset; - int i; - - (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name, - CPC_MAX_EVENT_LEN); - set->ks_req[reqno].kr_config = NULL; - set->ks_req[reqno].kr_index = reqno; - set->ks_req[reqno].kr_picnum = -1; - set->ks_req[reqno].kr_flags = pp->dcpc_flag; - - /* - * If a unit mask has been specified then detect which attribute - * the platform needs. For now, it's either "umask" or "emask". - */ - if (pp->dcpc_umask >= 0) { - set->ks_req[reqno].kr_attr = - kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP); - set->ks_req[reqno].kr_nattrs = 1; - if (dcpc_mask_type & DCPC_UMASK) - (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, - "umask", 5); - else - (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, - "emask", 5); - set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask; - } else { - set->ks_req[reqno].kr_attr = NULL; - set->ks_req[reqno].kr_nattrs = 0; - } - - /* - * If this probe is enabled, obtain its current countdown value - * and use that. The CPUs cpc context might not exist yet if we - * are dealing with a CPU that is just coming online. - */ - if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) { - oset = c->cpu_cpc_ctx->kc_set; - - for (i = 0; i < oset->ks_nreqs; i++) { - if (strcmp(oset->ks_req[i].kr_event, - set->ks_req[reqno].kr_event) == 0) { - set->ks_req[reqno].kr_preset = - *(oset->ks_req[i].kr_data); - } - } - } else { - set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval; - } - - set->ks_nreqs++; -} - - -/* - * Create a fresh request set for the enablings represented in the - * 'dcpc_actv_reqs' array which contains the probes we want to be - * in the set. This can be called for several reasons: - * - * 1) We are on a single or multi overflow platform and we have no - * current events so we can just create the set and initialize it. - * 2) We are on a multi-overflow platform and we already have one or - * more existing events and we are adding a new enabling. Create a - * new set and copy old requests in and then add the new request. - * 3) We are on a multi-overflow platform and we have just removed an - * enabling but we still have enablings whch are valid. Create a new - * set and copy in still valid requests. - */ -static kcpc_set_t * -dcpc_create_set(cpu_t *c) -{ - int i, reqno = 0; - int active_requests = 0; - kcpc_set_t *set; - - /* - * First get a count of the number of currently active requests. - * Note that dcpc_actv_reqs[] should always reflect which requests - * we want to be in the set that is to be created. It is the - * responsibility of the caller of dcpc_create_set() to adjust that - * array accordingly beforehand. - */ - for (i = 0; i < cpc_ncounters; i++) { - if (dcpc_actv_reqs[i] != NULL) - active_requests++; - } - - set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); - - set->ks_req = - kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP); - - set->ks_data = - kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP); - - /* - * Look for valid entries in the active requests array and populate - * the request set for any entries found. - */ - for (i = 0; i < cpc_ncounters; i++) { - if (dcpc_actv_reqs[i] != NULL) { - dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno); - reqno++; - } - } - - return (set); -} - -static int -dcpc_program_cpu_event(cpu_t *c) -{ - int i, j, subcode; - kcpc_ctx_t *ctx, *octx; - kcpc_set_t *set; - - set = dcpc_create_set(c); - - set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP); - ctx->kc_set = set; - ctx->kc_cpuid = c->cpu_id; - - if (kcpc_assign_reqs(set, ctx) != 0) - goto err; - - if (kcpc_configure_reqs(ctx, set, &subcode) != 0) - goto err; - - for (i = 0; i < set->ks_nreqs; i++) { - for (j = 0; j < cpc_ncounters; j++) { - if (dcpc_actv_reqs[j] != NULL && - strcmp(set->ks_req[i].kr_event, - dcpc_actv_reqs[j]->dcpc_event_name) == 0) { - dcpc_actv_reqs[j]->dcpc_picno = - set->ks_req[i].kr_picnum; - } - } - } - - /* - * If we already have an active enabling then save the current cpc - * context away. - */ - octx = c->cpu_cpc_ctx; - - kcpc_cpu_program(c, ctx); - - if (octx != NULL) { - kcpc_set_t *oset = octx->kc_set; - kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t)); - kcpc_free_configs(oset); - kcpc_free_set(oset); - kcpc_ctx_free(octx); - } - - return (0); - -err: - /* - * We failed to configure this request up so free things up and - * get out. - */ - kcpc_free_configs(set); - kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); - kcpc_free_set(set); - kcpc_ctx_free(ctx); - - return (-1); -} - -static void -dcpc_disable_cpu(cpu_t *c) -{ - kcpc_ctx_t *ctx; - kcpc_set_t *set; - - /* - * Leave this CPU alone if it's already offline. - */ - if (c->cpu_flags & CPU_OFFLINE) - return; - - /* - * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and - * changes it. - */ - ctx = c->cpu_cpc_ctx; - - kcpc_cpu_stop(c, B_FALSE); - - set = ctx->kc_set; - - kcpc_free_configs(set); - kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); - kcpc_free_set(set); - kcpc_ctx_free(ctx); -} - -/* - * The dcpc_*_interrupts() routines are responsible for manipulating the - * per-CPU dcpc interrupt state byte. The purpose of the state byte is to - * synchronize processing of hardware overflow interrupts wth configuration - * changes made to the CPU performance counter subsystem by the dcpc provider. - * - * The dcpc provider claims ownership of the overflow interrupt mechanism - * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the - * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the - * overflow mechanism and interrupts may be processed). Before modifying - * a CPUs configuration state the state byte is transitioned from - * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state). - * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process - * an interrupt when a configuration is not in process (i.e. the state is - * marked as free). During interrupt processing the state is set to - * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based - * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate - * the dcpc provider is no longer interested in overflow interrupts. - */ -static void -dcpc_block_interrupts(void) -{ - cpu_t *c = cpu_list; - uint8_t *state; - - ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); - - do { - state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; - - while (atomic_cas_8(state, DCPC_INTR_FREE, - DCPC_INTR_CONFIG) != DCPC_INTR_FREE) - continue; - - } while ((c = c->cpu_next) != cpu_list); -} - -/* - * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that - * overflow interrupts can be processed safely. - */ -static void -dcpc_release_interrupts(void) -{ - cpu_t *c = cpu_list; - - ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); - - do { - cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; - membar_producer(); - } while ((c = c->cpu_next) != cpu_list); -} - -/* - * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to - * to DCPC_INTR_FREE. This indicates that the dcpc provider is now - * responsible for handling all overflow interrupt activity. Should only be - * called before enabling the first dcpc based probe. - */ -static void -dcpc_claim_interrupts(void) -{ - cpu_t *c = cpu_list; - - ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE); - - do { - cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; - membar_producer(); - } while ((c = c->cpu_next) != cpu_list); -} - -/* - * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that - * the dcpc provider is no longer processing overflow interrupts. Only called - * during removal of the last dcpc based enabling. - */ -static void -dcpc_surrender_interrupts(void) -{ - cpu_t *c = cpu_list; - - ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); - - do { - cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE; - membar_producer(); - } while ((c = c->cpu_next) != cpu_list); -} - -/* - * dcpc_program_event() can be called owing to a new enabling or if a multi - * overflow platform has disabled a request but needs to program the requests - * that are still valid. - * - * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t - * and a new request set which contains the new enabling and any old enablings - * which are still valid (possible with multi-overflow platforms). - */ -static int -dcpc_program_event(dcpc_probe_t *pp) -{ - cpu_t *c; - int ret = 0; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - kpreempt_disable(); - - dcpc_block_interrupts(); - - c = cpu_list; - - do { - /* - * Skip CPUs that are currently offline. - */ - if (c->cpu_flags & CPU_OFFLINE) - continue; - - /* - * Stop counters but preserve existing DTrace CPC context - * if there is one. - * - * If we come here when the first event is programmed for a CPU, - * there should be no DTrace CPC context installed. In this - * case, kcpc_cpu_stop() will ensure that there is no other - * context on the CPU. - * - * If we add new enabling to the original one, the CPU should - * have the old DTrace CPC context which we need to keep around - * since dcpc_program_event() will add to it. - */ - if (c->cpu_cpc_ctx != NULL) - kcpc_cpu_stop(c, B_TRUE); - } while ((c = c->cpu_next) != cpu_list); - - dcpc_release_interrupts(); - - /* - * If this enabling is being removed (in the case of a multi event - * capable system with more than one active enabling), we can now - * update the active request array to reflect the enablings that need - * to be reprogrammed. - */ - if (pp->dcpc_disabling == 1) - dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; - - do { - /* - * Skip CPUs that are currently offline. - */ - if (c->cpu_flags & CPU_OFFLINE) - continue; - - ret = dcpc_program_cpu_event(c); - } while ((c = c->cpu_next) != cpu_list && ret == 0); - - /* - * If dcpc_program_cpu_event() fails then it is because we couldn't - * configure the requests in the set for the CPU and not because of - * an error programming the hardware. If we have a failure here then - * we assume no CPUs have been programmed in the above step as they - * are all configured identically. - */ - if (ret != 0) { - pp->dcpc_enabled = 0; - kpreempt_enable(); - return (-1); - } - - if (pp->dcpc_disabling != 1) - pp->dcpc_enabled = 1; - - kpreempt_enable(); - - return (0); -} - -/*ARGSUSED*/ -static int -dcpc_enable(void *arg, dtrace_id_t id, void *parg) -{ - dcpc_probe_t *pp = parg; - int i, found = 0; - cpu_t *c; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - /* - * Bail out if the counters are being used by a libcpc consumer. - */ - rw_enter(&kcpc_cpuctx_lock, RW_READER); - if (kcpc_cpuctx > 0) { - rw_exit(&kcpc_cpuctx_lock); - return (-1); - } - - dtrace_cpc_in_use++; - rw_exit(&kcpc_cpuctx_lock); - - /* - * Locate this enabling in the first free entry of the active - * request array. - */ - for (i = 0; i < cpc_ncounters; i++) { - if (dcpc_actv_reqs[i] == NULL) { - dcpc_actv_reqs[i] = pp; - pp->dcpc_actv_req_idx = i; - found = 1; - break; - } - } - - /* - * If we couldn't find a slot for this probe then there is no - * room at the inn. - */ - if (!found) { - dtrace_cpc_in_use--; - return (-1); - } - - ASSERT(pp->dcpc_actv_req_idx >= 0); - - /* - * DTrace is taking over CPC contexts, so stop collecting - * capacity/utilization data for all CPUs. - */ - if (dtrace_cpc_in_use == 1) - cu_disable(); - - /* - * The following must hold true if we are to (attempt to) enable - * this request: - * - * 1) No enablings currently exist. We allow all platforms to - * proceed if this is true. - * - * OR - * - * 2) If the platform is multi overflow capable and there are - * less valid enablings than there are counters. There is no - * guarantee that a platform can accommodate as many events as - * it has counters for but we will at least try to program - * up to that many requests. - * - * The 'dcpc_enablings' variable is implictly protected by locking - * provided by the DTrace framework and the cpu management framework. - */ - if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap && - dcpc_enablings < cpc_ncounters)) { - /* - * Before attempting to program the first enabling we need to - * invalidate any lwp-based contexts and lay claim to the - * overflow interrupt mechanism. - */ - if (dcpc_enablings == 0) { - kcpc_invalidate_all(); - dcpc_claim_interrupts(); - } - - if (dcpc_program_event(pp) == 0) { - dcpc_enablings++; - return (0); - } - } - - /* - * If active enablings existed before we failed to enable this probe - * on a multi event capable platform then we need to restart counters - * as they will have been stopped in the attempted configuration. The - * context should now just contain the request prior to this failed - * enabling. - */ - if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) { - c = cpu_list; - - ASSERT(dcpc_mult_ovf_cap == 1); - do { - /* - * Skip CPUs that are currently offline. - */ - if (c->cpu_flags & CPU_OFFLINE) - continue; - - kcpc_cpu_program(c, c->cpu_cpc_ctx); - } while ((c = c->cpu_next) != cpu_list); - } - - /* - * Give up any claim to the overflow interrupt mechanism if no - * dcpc based enablings exist. - */ - if (dcpc_enablings == 0) - dcpc_surrender_interrupts(); - - dtrace_cpc_in_use--; - dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; - pp->dcpc_actv_req_idx = pp->dcpc_picno = -1; - - /* - * If all probes are removed, enable capacity/utilization data - * collection for every CPU. - */ - if (dtrace_cpc_in_use == 0) - cu_enable(); - - return (-1); -} - -/* - * If only one enabling is active then remove the context and free - * everything up. If there are multiple enablings active then remove this - * one, its associated meta-data and re-program the hardware. - */ -/*ARGSUSED*/ -static void -dcpc_disable(void *arg, dtrace_id_t id, void *parg) -{ - cpu_t *c; - dcpc_probe_t *pp = parg; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - kpreempt_disable(); - - /* - * This probe didn't actually make it as far as being fully enabled - * so we needn't do anything with it. - */ - if (pp->dcpc_enabled == 0) { - /* - * If we actually allocated this request a slot in the - * request array but failed to enabled it then remove the - * entry in the array. - */ - if (pp->dcpc_actv_req_idx >= 0) { - dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; - pp->dcpc_actv_req_idx = pp->dcpc_picno = - pp->dcpc_disabling = -1; - } - - kpreempt_enable(); - return; - } - - /* - * If this is the only enabling then stop all the counters and - * free up the meta-data. - */ - if (dcpc_enablings == 1) { - ASSERT(dtrace_cpc_in_use == 1); - - dcpc_block_interrupts(); - - c = cpu_list; - - do { - dcpc_disable_cpu(c); - } while ((c = c->cpu_next) != cpu_list); - - dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; - dcpc_surrender_interrupts(); - } else { - /* - * This platform can support multiple overflow events and - * the enabling being disabled is not the last one. Remove this - * enabling and re-program the hardware with the new config. - */ - ASSERT(dcpc_mult_ovf_cap); - ASSERT(dcpc_enablings > 1); - - pp->dcpc_disabling = 1; - (void) dcpc_program_event(pp); - } - - kpreempt_enable(); - - dcpc_enablings--; - dtrace_cpc_in_use--; - pp->dcpc_enabled = 0; - pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; - - /* - * If all probes are removed, enable capacity/utilization data - * collection for every CPU - */ - if (dtrace_cpc_in_use == 0) - cu_enable(); -} - -/*ARGSUSED*/ -static int -dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg) -{ - cpu_t *c; - uint8_t *state; - - ASSERT(MUTEX_HELD(&cpu_lock)); - - switch (what) { - case CPU_OFF: - /* - * Offline CPUs are not allowed to take part so remove this - * CPU if we are actively tracing. - */ - if (dtrace_cpc_in_use) { - c = cpu_get(cpu); - state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; - - /* - * Indicate that a configuration is in process in - * order to stop overflow interrupts being processed - * on this CPU while we disable it. - */ - while (atomic_cas_8(state, DCPC_INTR_FREE, - DCPC_INTR_CONFIG) != DCPC_INTR_FREE) - continue; - - dcpc_disable_cpu(c); - - /* - * Reset this CPUs interrupt state as the configuration - * has ended. - */ - cpu_core[c->cpu_id].cpuc_dcpc_intr_state = - DCPC_INTR_FREE; - membar_producer(); - } - break; - - case CPU_ON: - case CPU_SETUP: - /* - * This CPU is being initialized or brought online so program - * it with the current request set if we are actively tracing. - */ - if (dtrace_cpc_in_use) { - c = cpu_get(cpu); - (void) dcpc_program_cpu_event(c); - } - break; - - default: - break; - } - - return (0); -} - -static dtrace_pattr_t dcpc_attr = { -{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, -{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -}; - -static dtrace_pops_t dcpc_pops = { - dcpc_provide, - NULL, - dcpc_enable, - dcpc_disable, - NULL, - NULL, - NULL, - NULL, - dcpc_mode, - dcpc_destroy -}; - -/*ARGSUSED*/ -static int -dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) -{ - return (0); -} - -/*ARGSUSED*/ -static int -dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)dcpc_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -static int -dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) -{ - switch (cmd) { - case DDI_DETACH: - break; - case DDI_SUSPEND: - return (DDI_SUCCESS); - default: - return (DDI_FAILURE); - } - - if (dtrace_unregister(dcpc_pid) != 0) - return (DDI_FAILURE); - - ddi_remove_minor_node(devi, NULL); - - mutex_enter(&cpu_lock); - unregister_cpu_setup_func(dcpc_cpu_setup, NULL); - mutex_exit(&cpu_lock); - - kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *)); - - kcpc_unregister_dcpc(); - - return (DDI_SUCCESS); -} - -static int -dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) -{ - uint_t caps; - char *attrs; - - switch (cmd) { - case DDI_ATTACH: - break; - case DDI_RESUME: - return (DDI_SUCCESS); - default: - return (DDI_FAILURE); - } - - if (kcpc_pcbe_loaded() == -1) - return (DDI_FAILURE); - - caps = kcpc_pcbe_capabilities(); - - if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) { - cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\ - " on this processor"); - return (DDI_FAILURE); - } - - if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0, - DDI_PSEUDO, NULL) == DDI_FAILURE || - dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL, - NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) { - ddi_remove_minor_node(devi, NULL); - return (DDI_FAILURE); - } - - mutex_enter(&cpu_lock); - register_cpu_setup_func(dcpc_cpu_setup, NULL); - mutex_exit(&cpu_lock); - - dcpc_ovf_mask = (1 << cpc_ncounters) - 1; - ASSERT(dcpc_ovf_mask != 0); - - if (caps & CPC_CAP_OVERFLOW_PRECISE) - dcpc_mult_ovf_cap = 1; - - /* - * Determine which, if any, mask attribute the back-end can use. - */ - attrs = kcpc_list_attrs(); - if (strstr(attrs, "umask") != NULL) - dcpc_mask_type |= DCPC_UMASK; - else if (strstr(attrs, "emask") != NULL) - dcpc_mask_type |= DCPC_EMASK; - - /* - * The dcpc_actv_reqs array is used to store the requests that - * we currently have programmed. The order of requests in this - * array is not necessarily the order that the event appears in - * the kcpc_request_t array. Once entered into a slot in the array - * the entry is not moved until it's removed. - */ - dcpc_actv_reqs = - kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP); - - dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi, - DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT); - - kcpc_register_dcpc(dcpc_fire); - - ddi_report_dev(devi); - dcpc_devi = devi; - - return (DDI_SUCCESS); -} - -static struct cb_ops dcpc_cb_ops = { - dcpc_open, /* open */ - nodev, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - nodev, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops dcpc_ops = { - DEVO_REV, /* devo_rev, */ - 0, /* refcnt */ - dcpc_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - dcpc_attach, /* attach */ - dcpc_detach, /* detach */ - nodev, /* reset */ - &dcpc_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev, /* dev power */ - ddi_quiesce_not_needed /* quiesce */ -}; - -/* - * Module linkage information for the kernel. - */ -static struct modldrv modldrv = { - &mod_driverops, /* module type */ - "DTrace CPC Module", /* name of module */ - &dcpc_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index 0c5e4b3a011a..2a9df6d403f2 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -21,7 +21,6 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ /* @@ -119,7 +118,7 @@ dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); size_t dtrace_global_maxsize = (16 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; -dtrace_optval_t dtrace_helper_actions_max = 1024; +dtrace_optval_t dtrace_helper_actions_max = 32; dtrace_optval_t dtrace_helper_providers_max = 32; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; @@ -144,7 +143,6 @@ int dtrace_err_verbose; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; -hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; /* * DTrace External Variables @@ -461,13 +459,11 @@ static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); static int dtrace_enabling_match(dtrace_enabling_t *, int *); static void dtrace_enabling_matchall(void); -static void dtrace_enabling_reap(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, dtrace_state_t *, uint64_t, uint64_t); static dtrace_helpers_t *dtrace_helpers_create(proc_t *); static void dtrace_buffer_drop(dtrace_buffer_t *); -static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, @@ -1108,13 +1104,10 @@ dtrace_priv_proc_common_nocd() } static int -dtrace_priv_proc_destructive(dtrace_state_t *state, dtrace_mstate_t *mstate) +dtrace_priv_proc_destructive(dtrace_state_t *state) { int action = state->dts_cred.dcr_action; - if (!(mstate->dtms_access & DTRACE_ACCESS_PROC)) - goto bad; - if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) goto bad; @@ -1136,17 +1129,15 @@ bad: } static int -dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) +dtrace_priv_proc_control(dtrace_state_t *state) { - if (mstate->dtms_access & DTRACE_ACCESS_PROC) { - if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) - return (1); + if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) + return (1); - if (dtrace_priv_proc_common_zone(state) && - dtrace_priv_proc_common_user(state) && - dtrace_priv_proc_common_nocd()) - return (1); - } + if (dtrace_priv_proc_common_zone(state) && + dtrace_priv_proc_common_user(state) && + dtrace_priv_proc_common_nocd()) + return (1); cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1154,10 +1145,9 @@ dtrace_priv_proc_control(dtrace_state_t *state, dtrace_mstate_t *mstate) } static int -dtrace_priv_proc(dtrace_state_t *state, dtrace_mstate_t *mstate) +dtrace_priv_proc(dtrace_state_t *state) { - if ((mstate->dtms_access & DTRACE_ACCESS_PROC) && - (state->dts_cred.dcr_action & DTRACE_CRA_PROC)) + if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) return (1); cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; @@ -1188,109 +1178,6 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) } /* - * Determine if the dte_cond of the specified ECB allows for processing of - * the current probe to continue. Note that this routine may allow continued - * processing, but with access(es) stripped from the mstate's dtms_access - * field. - */ -static int -dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, - dtrace_ecb_t *ecb) -{ - dtrace_probe_t *probe = ecb->dte_probe; - dtrace_provider_t *prov = probe->dtpr_provider; - dtrace_pops_t *pops = &prov->dtpv_pops; - int mode = DTRACE_MODE_NOPRIV_DROP; - - ASSERT(ecb->dte_cond); - - if (pops->dtps_mode != NULL) { - mode = pops->dtps_mode(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg); - - ASSERT((mode & DTRACE_MODE_USER) || - (mode & DTRACE_MODE_KERNEL)); - ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || - (mode & DTRACE_MODE_NOPRIV_DROP)); - } - - /* - * If the dte_cond bits indicate that this consumer is only allowed to - * see user-mode firings of this probe, call the provider's dtps_mode() - * entry point to check that the probe was fired while in a user - * context. If that's not the case, use the policy specified by the - * provider to determine if we drop the probe or merely restrict - * operation. - */ - if (ecb->dte_cond & DTRACE_COND_USERMODE) { - ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); - - if (!(mode & DTRACE_MODE_USER)) { - if (mode & DTRACE_MODE_NOPRIV_DROP) - return (0); - - mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; - } - } - - /* - * This is more subtle than it looks. We have to be absolutely certain - * that CRED() isn't going to change out from under us so it's only - * legit to examine that structure if we're in constrained situations. - * Currently, the only times we'll this check is if a non-super-user - * has enabled the profile or syscall providers -- providers that - * allow visibility of all processes. For the profile case, the check - * above will ensure that we're examining a user context. - */ - if (ecb->dte_cond & DTRACE_COND_OWNER) { - cred_t *cr; - cred_t *s_cr = state->dts_cred.dcr_cred; - proc_t *proc; - - ASSERT(s_cr != NULL); - - if ((cr = CRED()) == NULL || - s_cr->cr_uid != cr->cr_uid || - s_cr->cr_uid != cr->cr_ruid || - s_cr->cr_uid != cr->cr_suid || - s_cr->cr_gid != cr->cr_gid || - s_cr->cr_gid != cr->cr_rgid || - s_cr->cr_gid != cr->cr_sgid || - (proc = ttoproc(curthread)) == NULL || - (proc->p_flag & SNOCD)) { - if (mode & DTRACE_MODE_NOPRIV_DROP) - return (0); - - mstate->dtms_access &= ~DTRACE_ACCESS_PROC; - } - } - - /* - * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not - * in our zone, check to see if our mode policy is to restrict rather - * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC - * and DTRACE_ACCESS_ARGS - */ - if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { - cred_t *cr; - cred_t *s_cr = state->dts_cred.dcr_cred; - - ASSERT(s_cr != NULL); - - if ((cr = CRED()) == NULL || - s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { - if (mode & DTRACE_MODE_NOPRIV_DROP) - return (0); - - mstate->dtms_access &= - ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); - } - } - - return (1); -} - -/* * Note: not called from probe context. This function is called * asynchronously (and at a regular interval) from outside of probe context to * clean the dirty dynamic variable lists on all CPUs. Dynamic variable @@ -1972,75 +1859,6 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) lquanta[levels + 1] += incr; } -static int -dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, - uint16_t high, uint16_t nsteps, int64_t value) -{ - int64_t this = 1, last, next; - int base = 1, order; - - ASSERT(factor <= nsteps); - ASSERT(nsteps % factor == 0); - - for (order = 0; order < low; order++) - this *= factor; - - /* - * If our value is less than our factor taken to the power of the - * low order of magnitude, it goes into the zeroth bucket. - */ - if (value < (last = this)) - return (0); - - for (this *= factor; order <= high; order++) { - int nbuckets = this > nsteps ? nsteps : this; - - if ((next = this * factor) < this) { - /* - * We should not generally get log/linear quantizations - * with a high magnitude that allows 64-bits to - * overflow, but we nonetheless protect against this - * by explicitly checking for overflow, and clamping - * our value accordingly. - */ - value = this - 1; - } - - if (value < this) { - /* - * If our value lies within this order of magnitude, - * determine its position by taking the offset within - * the order of magnitude, dividing by the bucket - * width, and adding to our (accumulated) base. - */ - return (base + (value - last) / (this / nbuckets)); - } - - base += nbuckets - (nbuckets / factor); - last = this; - this = next; - } - - /* - * Our value is greater than or equal to our factor taken to the - * power of one plus the high magnitude -- return the top bucket. - */ - return (base); -} - -static void -dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) -{ - uint64_t arg = *llquanta++; - uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); - uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); - uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); - uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); - - llquanta[dtrace_aggregate_llquantize_bucket(factor, - low, high, nsteps, nval)] += incr; -} - /*ARGSUSED*/ static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) @@ -2822,12 +2640,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, switch (v) { case DIF_VAR_ARGS: - if (!(mstate->dtms_access & DTRACE_ACCESS_ARGS)) { - cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= - CPU_DTRACE_KPRIV; - return (0); - } - ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { @@ -2863,7 +2675,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_UREGS: { klwp_t *lwp; - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); if ((lwp = curthread->t_lwp) == NULL) { @@ -2875,22 +2687,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(lwp->lwp_regs, ndx)); } - case DIF_VAR_VMREGS: { - uint64_t rval; - - if (!dtrace_priv_kernel(state)) - return (0); - - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - - rval = dtrace_getvmreg(ndx, - &cpu_core[CPU->cpu_id].cpuc_dtrace_flags); - - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - - return (rval); - } - case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); @@ -2943,7 +2739,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_stackdepth); case DIF_VAR_USTACKDEPTH: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { /* @@ -2998,7 +2794,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_caller); case DIF_VAR_UCALLER: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { @@ -3046,7 +2842,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_PID: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3068,7 +2864,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_pidp->pid_id); case DIF_VAR_PPID: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3095,7 +2891,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_tid); case DIF_VAR_EXECNAME: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3115,7 +2911,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3135,7 +2931,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, state, mstate)); case DIF_VAR_UID: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3156,7 +2952,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_cred->cr_uid); case DIF_VAR_GID: - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3178,7 +2974,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_ERRNO: { klwp_t *lwp; - if (!dtrace_priv_proc(state, mstate)) + if (!dtrace_priv_proc(state)) return (0); /* @@ -3518,7 +3314,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state, mstate) && + dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyout(kaddr, uaddr, size, flags); @@ -3533,7 +3329,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state, mstate) && + dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyoutstr(kaddr, uaddr, size, flags); @@ -3904,54 +3700,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - case DIF_SUBR_TOUPPER: - case DIF_SUBR_TOLOWER: { - uintptr_t s = tupregs[0].dttk_value; - uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - char *dest = (char *)mstate->dtms_scratch_ptr, c; - size_t len = dtrace_strlen((char *)s, size); - char lower, upper, convert; - int64_t i; - - if (subr == DIF_SUBR_TOUPPER) { - lower = 'a'; - upper = 'z'; - convert = 'A'; - } else { - lower = 'A'; - upper = 'Z'; - convert = 'a'; - } - - if (!dtrace_canload(s, len + 1, mstate, vstate)) { - regs[rd] = NULL; - break; - } - - if (!DTRACE_INSCRATCH(mstate, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; - break; - } - - for (i = 0; i < size - 1; i++) { - if ((c = dtrace_load8(s + i)) == '\0') - break; - - if (c >= lower && c <= upper) - c = convert + (c - lower); - - dest[i] = c; - } - - ASSERT(i < size); - dest[i] = '\0'; - regs[rd] = (uintptr_t)dest; - mstate->dtms_scratch_ptr += size; - break; - } - -case DIF_SUBR_GETMAJOR: + case DIF_SUBR_GETMAJOR: #ifdef _LP64 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; #else @@ -4213,20 +3962,9 @@ case DIF_SUBR_GETMAJOR: case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; - uint64_t val, digit; - uint64_t size = 65; /* enough room for 2^64 in binary */ + int64_t val = i < 0 ? i * -1 : i; + uint64_t size = 22; /* enough room for 2^64 in decimal */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; - int base = 10; - - if (nargs > 1) { - if ((base = tupregs[1].dttk_value) <= 1 || - base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - } - - val = (base == 10 && i < 0) ? i * -1 : i; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -4234,24 +3972,13 @@ case DIF_SUBR_GETMAJOR: break; } - for (*end-- = '\0'; val; val /= base) { - if ((digit = val % base) <= '9' - '0') { - *end-- = '0' + digit; - } else { - *end-- = 'a' + (digit - ('9' - '0') - 1); - } - } - - if (i == 0 && base == 16) - *end-- = '0'; - - if (base == 16) - *end-- = 'x'; + for (*end-- = '\0'; val; val /= 10) + *end-- = '0' + (val % 10); - if (i == 0 || base == 8 || base == 16) + if (i == 0) *end-- = '0'; - if (i < 0 && base == 10) + if (i < 0) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; @@ -5886,7 +5613,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; - uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; @@ -5907,7 +5633,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, #endif mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; - mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC; *flags &= ~CPU_DTRACE_ERROR; if (prov == dtrace_provider) { @@ -5945,8 +5670,65 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, } } - if (ecb->dte_cond && !dtrace_priv_probe(state, &mstate, ecb)) - continue; + if (ecb->dte_cond) { + /* + * If the dte_cond bits indicate that this + * consumer is only allowed to see user-mode firings + * of this probe, call the provider's dtps_usermode() + * entry point to check that the probe was fired + * while in a user context. Skip this ECB if that's + * not the case. + */ + if ((ecb->dte_cond & DTRACE_COND_USERMODE) && + prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg) == 0) + continue; + + /* + * This is more subtle than it looks. We have to be + * absolutely certain that CRED() isn't going to + * change out from under us so it's only legit to + * examine that structure if we're in constrained + * situations. Currently, the only times we'll this + * check is if a non-super-user has enabled the + * profile or syscall providers -- providers that + * allow visibility of all processes. For the + * profile case, the check above will ensure that + * we're examining a user context. + */ + if (ecb->dte_cond & DTRACE_COND_OWNER) { + cred_t *cr; + cred_t *s_cr = + ecb->dte_state->dts_cred.dcr_cred; + proc_t *proc; + + ASSERT(s_cr != NULL); + + if ((cr = CRED()) == NULL || + s_cr->cr_uid != cr->cr_uid || + s_cr->cr_uid != cr->cr_ruid || + s_cr->cr_uid != cr->cr_suid || + s_cr->cr_gid != cr->cr_gid || + s_cr->cr_gid != cr->cr_rgid || + s_cr->cr_gid != cr->cr_sgid || + (proc = ttoproc(curthread)) == NULL || + (proc->p_flag & SNOCD)) + continue; + } + + if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { + cred_t *cr; + cred_t *s_cr = + ecb->dte_state->dts_cred.dcr_cred; + + ASSERT(s_cr != NULL); + + if ((cr = CRED()) == NULL || + s_cr->cr_zone->zone_id != + cr->cr_zone->zone_id) + continue; + } + } if (now - state->dts_alive > dtrace_deadman_timeout) { /* @@ -5986,7 +5768,9 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, mstate.dtms_present |= DTRACE_MSTATE_EPID; if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) - mstate.dtms_access |= DTRACE_ACCESS_KERNEL; + mstate.dtms_access = DTRACE_ACCESS_KERNEL; + else + mstate.dtms_access = 0; if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; @@ -6046,8 +5830,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, switch (act->dta_kind) { case DTRACEACT_STOP: - if (dtrace_priv_proc_destructive(state, - &mstate)) + if (dtrace_priv_proc_destructive(state)) dtrace_action_stop(); continue; @@ -6074,7 +5857,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_JSTACK: case DTRACEACT_USTACK: - if (!dtrace_priv_proc(state, &mstate)) + if (!dtrace_priv_proc(state)) continue; /* @@ -6107,23 +5890,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, continue; } - /* - * Clear the string space, since there's no - * helper to do it for us. - */ - if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0) { - int depth = DTRACE_USTACK_NFRAMES( - rec->dtrd_arg); - size_t strsize = DTRACE_USTACK_STRSIZE( - rec->dtrd_arg); - uint64_t *buf = (uint64_t *)(tomax + - valoffs); - void *strspace = &buf[depth + 1]; - - dtrace_bzero(strspace, - MIN(depth, strsize)); - } - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack((uint64_t *) (tomax + valoffs), @@ -6177,8 +5943,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, continue; case DTRACEACT_RAISE: - if (dtrace_priv_proc_destructive(state, - &mstate)) + if (dtrace_priv_proc_destructive(state)) dtrace_action_raise(val); continue; @@ -6205,11 +5970,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: - case DTRACEACT_TRACEMEM: - break; - - case DTRACEACT_TRACEMEM_DYNSIZE: - tracememsize = val; break; case DTRACEACT_SYM: @@ -6223,7 +5983,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, case DTRACEACT_UADDR: { struct pid *pid = curthread->t_procp->p_pidp; - if (!dtrace_priv_proc(state, &mstate)) + if (!dtrace_priv_proc(state)) continue; DTRACE_STORE(uint64_t, tomax, @@ -6275,12 +6035,6 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { uintptr_t end = valoffs + size; - if (tracememsize != 0 && - valoffs + tracememsize < end) { - end = valoffs + tracememsize; - tracememsize = 0; - } - if (!dtrace_vcanload((void *)(uintptr_t)val, &dp->dtdo_rtype, &mstate, vstate)) continue; @@ -7161,9 +6915,9 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, if ((priv & DTRACE_PRIV_KERNEL) && (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && - pops->dtps_mode == NULL) { + pops->dtps_usermode == NULL) { cmn_err(CE_WARN, "failed to register provider '%s': need " - "dtps_mode() op for given privilege attributes", name); + "dtps_usermode() op for given privilege attributes", name); return (EINVAL); } @@ -7260,7 +7014,7 @@ dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; - int i, self = 0, noreap = 0; + int i, self = 0; dtrace_probe_t *probe, *first = NULL; if (old->dtpv_pops.dtps_enable == @@ -7317,31 +7071,14 @@ dtrace_unregister(dtrace_provider_id_t id) continue; /* - * If we are trying to unregister a defunct provider, and the - * provider was made defunct within the interval dictated by - * dtrace_unregister_defunct_reap, we'll (asynchronously) - * attempt to reap our enablings. To denote that the provider - * should reattempt to unregister itself at some point in the - * future, we will return a differentiable error code (EAGAIN - * instead of EBUSY) in this case. + * We have at least one ECB; we can't remove this provider. */ - if (dtrace_gethrtime() - old->dtpv_defunct > - dtrace_unregister_defunct_reap) - noreap = 1; - if (!self) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); } - - if (noreap) - return (EBUSY); - - (void) taskq_dispatch(dtrace_taskq, - (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); - - return (EAGAIN); + return (EBUSY); } /* @@ -7432,7 +7169,7 @@ dtrace_invalidate(dtrace_provider_id_t id) mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); - pvp->dtpv_defunct = dtrace_gethrtime(); + pvp->dtpv_defunct = 1; mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); @@ -9639,35 +9376,6 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; } - case DTRACEAGG_LLQUANTIZE: { - uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); - uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); - uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); - uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); - int64_t v; - - agg->dtag_initial = desc->dtad_arg; - agg->dtag_aggregate = dtrace_aggregate_llquantize; - - if (factor < 2 || low >= high || nsteps < factor) - goto err; - - /* - * Now check that the number of steps evenly divides a power - * of the factor. (This assures both integer bucket size and - * linearity within each magnitude.) - */ - for (v = factor; v < nsteps; v *= factor) - continue; - - if ((v % nsteps) || (nsteps % factor)) - goto err; - - size = (dtrace_aggregate_llquantize_bucket(factor, - low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); - break; - } - case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; @@ -9837,14 +9545,12 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: - case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ if (arg == NULL) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || - desc->dtad_kind == DTRACEACT_DIFEXPR); + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); format = 0; } else { ASSERT(arg != NULL); @@ -9855,8 +9561,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) /*FALLTHROUGH*/ case DTRACEACT_LIBACT: - case DTRACEACT_TRACEMEM: - case DTRACEACT_TRACEMEM_DYNSIZE: + case DTRACEACT_DIFEXPR: if (dp == NULL) return (EINVAL); @@ -10339,7 +10044,6 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) caddr_t tomax = buf->dtb_tomax; caddr_t xamot = buf->dtb_xamot; dtrace_icookie_t cookie; - hrtime_t now = dtrace_gethrtime(); ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); @@ -10355,8 +10059,6 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_drops = 0; buf->dtb_errors = 0; buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); - buf->dtb_interval = now - buf->dtb_switched; - buf->dtb_switched = now; dtrace_interrupt_enable(cookie); } @@ -10389,17 +10091,14 @@ dtrace_buffer_activate(dtrace_state_t *state) static int dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, - processorid_t cpu, int *factor) + processorid_t cpu) { cpu_t *cp; dtrace_buffer_t *buf; - int allocated = 0, desired = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); - *factor = 1; - if (size > dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); @@ -10424,8 +10123,7 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, ASSERT(buf->dtb_xamot == NULL); - if ((buf->dtb_tomax = kmem_zalloc(size, - KM_NOSLEEP | KM_NORMALPRI)) == NULL) + if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; buf->dtb_size = size; @@ -10436,8 +10134,7 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, if (flags & DTRACEBUF_NOSWITCH) continue; - if ((buf->dtb_xamot = kmem_zalloc(size, - KM_NOSLEEP | KM_NORMALPRI)) == NULL) + if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; } while ((cp = cp->cpu_next) != cpu_list); @@ -10451,19 +10148,16 @@ err: continue; buf = &bufs[cp->cpu_id]; - desired += 2; if (buf->dtb_xamot != NULL) { ASSERT(buf->dtb_tomax != NULL); ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_xamot, size); - allocated++; } if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_tomax, size); - allocated++; } buf->dtb_tomax = NULL; @@ -10471,8 +10165,6 @@ err: buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); - *factor = desired / (allocated > 0 ? allocated : 1); - return (ENOMEM); } @@ -10774,36 +10466,6 @@ dtrace_buffer_polish(dtrace_buffer_t *buf) } } -/* - * This routine determines if data generated at the specified time has likely - * been entirely consumed at user-level. This routine is called to determine - * if an ECB on a defunct probe (but for an active enabling) can be safely - * disabled and destroyed. - */ -static int -dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) -{ - int i; - - for (i = 0; i < NCPU; i++) { - dtrace_buffer_t *buf = &bufs[i]; - - if (buf->dtb_size == 0) - continue; - - if (buf->dtb_flags & DTRACEBUF_RING) - return (0); - - if (!buf->dtb_switched && buf->dtb_offset != 0) - return (0); - - if (buf->dtb_switched - buf->dtb_interval < when) - return (0); - } - - return (1); -} - static void dtrace_buffer_free(dtrace_buffer_t *bufs) { @@ -11189,12 +10851,10 @@ dtrace_enabling_matchall(void) * block pending our completion. */ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { - dtrace_cred_t *dcr = &enab->dten_vstate->dtvs_state->dts_cred; - cred_t *cr = dcr->dcr_cred; - zoneid_t zone = cr != NULL ? crgetzoneid(cr) : 0; + cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; - if ((dcr->dcr_visible & DTRACE_CRV_ALLZONE) || (cr != NULL && - (zone == GLOBAL_ZONEID || getzoneid() == zone))) + if (INGLOBALZONE(curproc) || + cr != NULL && getzoneid() == crgetzoneid(cr)) (void) dtrace_enabling_match(enab, NULL); } @@ -11294,85 +10954,6 @@ retry: } /* - * Called to reap ECBs that are attached to probes from defunct providers. - */ -static void -dtrace_enabling_reap(void) -{ - dtrace_provider_t *prov; - dtrace_probe_t *probe; - dtrace_ecb_t *ecb; - hrtime_t when; - int i; - - mutex_enter(&cpu_lock); - mutex_enter(&dtrace_lock); - - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - - if (probe->dtpr_ecb == NULL) - continue; - - prov = probe->dtpr_provider; - - if ((when = prov->dtpv_defunct) == 0) - continue; - - /* - * We have ECBs on a defunct provider: we want to reap these - * ECBs to allow the provider to unregister. The destruction - * of these ECBs must be done carefully: if we destroy the ECB - * and the consumer later wishes to consume an EPID that - * corresponds to the destroyed ECB (and if the EPID metadata - * has not been previously consumed), the consumer will abort - * processing on the unknown EPID. To reduce (but not, sadly, - * eliminate) the possibility of this, we will only destroy an - * ECB for a defunct provider if, for the state that - * corresponds to the ECB: - * - * (a) There is no speculative tracing (which can effectively - * cache an EPID for an arbitrary amount of time). - * - * (b) The principal buffers have been switched twice since the - * provider became defunct. - * - * (c) The aggregation buffers are of zero size or have been - * switched twice since the provider became defunct. - * - * We use dts_speculates to determine (a) and call a function - * (dtrace_buffer_consumed()) to determine (b) and (c). Note - * that as soon as we've been unable to destroy one of the ECBs - * associated with the probe, we quit trying -- reaping is only - * fruitful in as much as we can destroy all ECBs associated - * with the defunct provider's probes. - */ - while ((ecb = probe->dtpr_ecb) != NULL) { - dtrace_state_t *state = ecb->dte_state; - dtrace_buffer_t *buf = state->dts_buffer; - dtrace_buffer_t *aggbuf = state->dts_aggbuffer; - - if (state->dts_speculates) - break; - - if (!dtrace_buffer_consumed(buf, when)) - break; - - if (!dtrace_buffer_consumed(aggbuf, when)) - break; - - dtrace_ecb_disable(ecb); - ASSERT(probe->dtpr_ecb != ecb); - dtrace_ecb_destroy(ecb); - } - } - - mutex_exit(&dtrace_lock); - mutex_exit(&cpu_lock); -} - -/* * DTrace DOF Functions */ /*ARGSUSED*/ @@ -11877,20 +11458,15 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if ((DTRACEACT_ISPRINTFLIKE(kind) && + if (DTRACEACT_ISPRINTFLIKE(kind) && (kind != DTRACEACT_PRINTA || - desc->dofa_strtab != DOF_SECIDX_NONE)) || - (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * The argument to these actions is an index into the - * DOF string table. For printf()-like actions, this - * is the format string. For print(), this is the - * CTF type of the expression result. + * printf()-like actions must have a format string. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -12376,7 +11952,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min; - if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) + if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL) return (ENOMEM); dstate->dtds_size = size; @@ -12738,7 +12314,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu; - int flags = 0, rval, factor, divisor = 1; + int flags = 0, rval; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); @@ -12768,7 +12344,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } - for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { + for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -12786,7 +12362,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) return (E2BIG); } - rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); + rval = dtrace_buffer_alloc(buf, size, flags, cpu); if (rval != ENOMEM) { opt[which] = size; @@ -12795,9 +12371,6 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) return (rval); - - for (divisor = 2; divisor < factor; divisor <<= 1) - continue; } return (ENOMEM); @@ -12897,8 +12470,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) goto out; } - spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), - KM_NOSLEEP | KM_NORMALPRI); + spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP); if (spec == NULL) { rval = ENOMEM; @@ -12909,8 +12481,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) state->dts_nspeculations = (int)nspec; for (i = 0; i < nspec; i++) { - if ((buf = kmem_zalloc(bufsize, - KM_NOSLEEP | KM_NORMALPRI)) == NULL) { + if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) { rval = ENOMEM; goto err; } diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c index 8cfe4cd33beb..42263e4ef274 100644 --- a/uts/common/dtrace/fasttrap.c +++ b/uts/common/dtrace/fasttrap.c @@ -24,9 +24,6 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2011, Joyent, Inc. All rights reserved. - */ #include <sys/atomic.h> #include <sys/errno.h> @@ -276,7 +273,7 @@ fasttrap_pid_cleanup_cb(void *data) fasttrap_provider_t **fpp, *fp; fasttrap_bucket_t *bucket; dtrace_provider_id_t provid; - int i, later, rval; + int i, later; static volatile int in = 0; ASSERT(in == 0); @@ -338,13 +335,9 @@ fasttrap_pid_cleanup_cb(void *data) * clean out the unenabled probes. */ provid = fp->ftp_provid; - if ((rval = dtrace_unregister(provid)) != 0) { + if (dtrace_unregister(provid) != 0) { if (fasttrap_total > fasttrap_max / 2) (void) dtrace_condense(provid); - - if (rval == EAGAIN) - fp->ftp_marked = 1; - later += fp->ftp_marked; fpp = &fp->ftp_next; } else { @@ -370,16 +363,12 @@ fasttrap_pid_cleanup_cb(void *data) * get a chance to do that work if and when the timeout is reenabled * (if detach fails). */ - if (later > 0) { - if (fasttrap_timeout != (timeout_id_t)1) { - fasttrap_timeout = - timeout(&fasttrap_pid_cleanup_cb, NULL, hz); - } - + if (later > 0 && fasttrap_timeout != (timeout_id_t)1) + fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, hz); + else if (later > 0) fasttrap_cleanup_work = 1; - } else { + else fasttrap_timeout = 0; - } mutex_exit(&fasttrap_cleanup_mtx); in = 0; diff --git a/uts/common/dtrace/profile.c b/uts/common/dtrace/profile.c index fc809d3579a5..c1a2d1f1c12f 100644 --- a/uts/common/dtrace/profile.c +++ b/uts/common/dtrace/profile.c @@ -23,9 +23,6 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2011, Joyent, Inc. All rights reserved. - */ #include <sys/errno.h> #include <sys/stat.h> @@ -411,25 +408,9 @@ profile_disable(void *arg, dtrace_id_t id, void *parg) /*ARGSUSED*/ static int -profile_mode(void *arg, dtrace_id_t id, void *parg) +profile_usermode(void *arg, dtrace_id_t id, void *parg) { - profile_probe_t *prof = parg; - int mode; - - if (CPU->cpu_profile_pc != 0) { - mode = DTRACE_MODE_KERNEL; - } else { - mode = DTRACE_MODE_USER; - } - - if (prof->prof_kind == PROF_TICK) { - mode |= DTRACE_MODE_NOPRIV_RESTRICT; - } else { - ASSERT(prof->prof_kind == PROF_PROFILE); - mode |= DTRACE_MODE_NOPRIV_DROP; - } - - return (mode); + return (CPU->cpu_profile_pc == 0); } static dtrace_pattr_t profile_attr = { @@ -449,7 +430,7 @@ static dtrace_pops_t profile_pops = { NULL, NULL, NULL, - profile_mode, + profile_usermode, profile_destroy }; diff --git a/uts/common/fs/zfs/arc.c b/uts/common/fs/zfs/arc.c index bd6bda5c9bfc..a82718e8bc6e 100644 --- a/uts/common/fs/zfs/arc.c +++ b/uts/common/fs/zfs/arc.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -80,9 +78,9 @@ * types of locks: 1) the hash table lock array, and 2) the * arc list locks. * - * Buffers do not have their own mutexes, rather they rely on the - * hash table mutexes for the bulk of their protection (i.e. most - * fields in the arc_buf_hdr_t are protected by these mutexes). + * Buffers do not have their own mutexs, rather they rely on the + * hash table mutexs for the bulk of their protection (i.e. most + * fields in the arc_buf_hdr_t are protected by these mutexs). * * buf_hash_find() returns the appropriate mutex (held) when it * locates the requested buffer in the hash table. It returns @@ -1219,7 +1217,7 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type) ASSERT(BUF_EMPTY(hdr)); hdr->b_size = size; hdr->b_type = type; - hdr->b_spa = spa_load_guid(spa); + hdr->b_spa = spa_guid(spa); hdr->b_state = arc_anon; hdr->b_arc_access = 0; buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); @@ -1921,7 +1919,7 @@ arc_flush(spa_t *spa) uint64_t guid = 0; if (spa) - guid = spa_load_guid(spa); + guid = spa_guid(spa); while (list_head(&arc_mru->arcs_list[ARC_BUFC_DATA])) { (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA); @@ -1982,11 +1980,6 @@ arc_shrink(void) arc_adjust(); } -/* - * Determine if the system is under memory pressure and is asking - * to reclaim memory. A return value of 1 indicates that the system - * is under memory pressure and that the arc should adjust accordingly. - */ static int arc_reclaim_needed(void) { @@ -2034,24 +2027,11 @@ arc_reclaim_needed(void) * heap is allocated. (Or, in the calculation, if less than 1/4th is * free) */ - if (vmem_size(heap_arena, VMEM_FREE) < - (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) + if (btop(vmem_size(heap_arena, VMEM_FREE)) < + (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) return (1); #endif - /* - * If zio data pages are being allocated out of a separate heap segment, - * then enforce that the size of available vmem for this arena remains - * above about 1/16th free. - * - * Note: The 1/16th arena free requirement was put in place - * to aggressively evict memory from the arc in order to avoid - * memory fragmentation issues. - */ - if (zio_arena != NULL && - vmem_size(zio_arena, VMEM_FREE) < - (vmem_size(zio_arena, VMEM_ALLOC) >> 4)) - return (1); #else if (spa_get_random(100) == 0) return (1); @@ -2103,13 +2083,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) } kmem_cache_reap_now(buf_cache); kmem_cache_reap_now(hdr_cache); - - /* - * Ask the vmem areana to reclaim unused memory from its - * quantum caches. - */ - if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR) - vmem_qcache_reap(zio_arena); } static void @@ -2243,6 +2216,18 @@ arc_evict_needed(arc_buf_contents_t type) if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) return (1); +#ifdef _KERNEL + /* + * If zio data pages are being allocated out of a separate heap segment, + * then enforce that the size of available vmem for this area remains + * above about 1/32nd free. + */ + if (type == ARC_BUFC_DATA && zio_arena != NULL && + vmem_size(zio_arena, VMEM_FREE) < + (vmem_size(zio_arena, VMEM_ALLOC) >> 5)) + return (1); +#endif + if (arc_reclaim_needed()) return (1); @@ -2547,11 +2532,9 @@ arc_read_done(zio_t *zio) callback_list = hdr->b_acb; ASSERT(callback_list != NULL); if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) { - dmu_object_byteswap_t bswap = - DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp)); arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ? byteswap_uint64_array : - dmu_ot_byteswap[bswap].ob_func; + dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap; func(buf->b_data, hdr->b_size); } @@ -2636,7 +2619,7 @@ arc_read_done(zio_t *zio) } /* - * "Read" the block at the specified DVA (in bp) via the + * "Read" the block block at the specified DVA (in bp) via the * cache. If the block is found in the cache, invoke the provided * callback immediately and return. Note that the `zio' parameter * in the callback will be NULL in this case, since no IO was @@ -2693,7 +2676,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *buf; kmutex_t *hash_lock; zio_t *rzio; - uint64_t guid = spa_load_guid(spa); + uint64_t guid = spa_guid(spa); top: hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp), @@ -4251,7 +4234,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) boolean_t have_lock, full; l2arc_write_callback_t *cb; zio_t *pio, *wzio; - uint64_t guid = spa_load_guid(spa); + uint64_t guid = spa_guid(spa); ASSERT(dev->l2ad_vdev != NULL); diff --git a/uts/common/fs/zfs/bpobj.c b/uts/common/fs/zfs/bpobj.c index 022921c666b8..72be31235607 100644 --- a/uts/common/fs/zfs/bpobj.c +++ b/uts/common/fs/zfs/bpobj.c @@ -20,13 +20,11 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <sys/bpobj.h> #include <sys/zfs_context.h> #include <sys/refcount.h> -#include <sys/dsl_pool.h> uint64_t bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) @@ -442,10 +440,7 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) struct space_range_arg *sra = arg; if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { - if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) - sra->used += bp_get_dsize_sync(sra->spa, bp); - else - sra->used += bp_get_dsize(sra->spa, bp); + sra->used += bp_get_dsize_sync(sra->spa, bp); sra->comp += BP_GET_PSIZE(bp); sra->uncomp += BP_GET_UCSIZE(bp); } diff --git a/uts/common/fs/zfs/bptree.c b/uts/common/fs/zfs/bptree.c deleted file mode 100644 index 8c5a7d40ef37..000000000000 --- a/uts/common/fs/zfs/bptree.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#include <sys/arc.h> -#include <sys/bptree.h> -#include <sys/dmu.h> -#include <sys/dmu_objset.h> -#include <sys/dmu_tx.h> -#include <sys/dmu_traverse.h> -#include <sys/dsl_dataset.h> -#include <sys/dsl_dir.h> -#include <sys/dsl_pool.h> -#include <sys/dnode.h> -#include <sys/refcount.h> -#include <sys/spa.h> - -/* - * A bptree is a queue of root block pointers from destroyed datasets. When a - * dataset is destroyed its root block pointer is put on the end of the pool's - * bptree queue so the dataset's blocks can be freed asynchronously by - * dsl_scan_sync. This allows the delete operation to finish without traversing - * all the dataset's blocks. - * - * Note that while bt_begin and bt_end are only ever incremented in this code - * they are effectively reset to 0 every time the entire bptree is freed because - * the bptree's object is destroyed and re-created. - */ - -struct bptree_args { - bptree_phys_t *ba_phys; /* data in bonus buffer, dirtied if freeing */ - boolean_t ba_free; /* true if freeing during traversal */ - - bptree_itor_t *ba_func; /* function to call for each blockpointer */ - void *ba_arg; /* caller supplied argument to ba_func */ - dmu_tx_t *ba_tx; /* caller supplied tx, NULL if not freeing */ -} bptree_args_t; - -uint64_t -bptree_alloc(objset_t *os, dmu_tx_t *tx) -{ - uint64_t obj; - dmu_buf_t *db; - bptree_phys_t *bt; - - obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA, - SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA, - sizeof (bptree_phys_t), tx); - - /* - * Bonus buffer contents are already initialized to 0, but for - * readability we make it explicit. - */ - VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); - dmu_buf_will_dirty(db, tx); - bt = db->db_data; - bt->bt_begin = 0; - bt->bt_end = 0; - bt->bt_bytes = 0; - bt->bt_comp = 0; - bt->bt_uncomp = 0; - dmu_buf_rele(db, FTAG); - - return (obj); -} - -int -bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) -{ - dmu_buf_t *db; - bptree_phys_t *bt; - - VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); - bt = db->db_data; - ASSERT3U(bt->bt_begin, ==, bt->bt_end); - ASSERT3U(bt->bt_bytes, ==, 0); - ASSERT3U(bt->bt_comp, ==, 0); - ASSERT3U(bt->bt_uncomp, ==, 0); - dmu_buf_rele(db, FTAG); - - return (dmu_object_free(os, obj, tx)); -} - -void -bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, - uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx) -{ - dmu_buf_t *db; - bptree_phys_t *bt; - bptree_entry_phys_t bte; - - /* - * bptree objects are in the pool mos, therefore they can only be - * modified in syncing context. Furthermore, this is only modified - * by the sync thread, so no locking is necessary. - */ - ASSERT(dmu_tx_is_syncing(tx)); - - VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); - bt = db->db_data; - - bte.be_birth_txg = birth_txg; - bte.be_bp = *bp; - bzero(&bte.be_zb, sizeof (bte.be_zb)); - dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx); - - dmu_buf_will_dirty(db, tx); - bt->bt_end++; - bt->bt_bytes += bytes; - bt->bt_comp += comp; - bt->bt_uncomp += uncomp; - dmu_buf_rele(db, FTAG); -} - -/* ARGSUSED */ -static int -bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) -{ - int err; - struct bptree_args *ba = arg; - - if (bp == NULL) - return (0); - - err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx); - if (err == 0 && ba->ba_free) { - ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp); - ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp); - ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp); - } - return (err); -} - -int -bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, - void *arg, dmu_tx_t *tx) -{ - int err; - uint64_t i; - dmu_buf_t *db; - struct bptree_args ba; - - ASSERT(!free || dmu_tx_is_syncing(tx)); - - err = dmu_bonus_hold(os, obj, FTAG, &db); - if (err != 0) - return (err); - - if (free) - dmu_buf_will_dirty(db, tx); - - ba.ba_phys = db->db_data; - ba.ba_free = free; - ba.ba_func = func; - ba.ba_arg = arg; - ba.ba_tx = tx; - - err = 0; - for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { - bptree_entry_phys_t bte; - - ASSERT(!free || i == ba.ba_phys->bt_begin); - - err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), - &bte, DMU_READ_NO_PREFETCH); - if (err != 0) - break; - - err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, - bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST, - bptree_visit_cb, &ba); - if (free) { - ASSERT(err == 0 || err == ERESTART); - if (err != 0) { - /* save bookmark for future resume */ - ASSERT3U(bte.be_zb.zb_objset, ==, - ZB_DESTROYED_OBJSET); - ASSERT3U(bte.be_zb.zb_level, ==, 0); - dmu_write(os, obj, i * sizeof (bte), - sizeof (bte), &bte, tx); - break; - } else { - ba.ba_phys->bt_begin++; - (void) dmu_free_range(os, obj, - i * sizeof (bte), sizeof (bte), tx); - } - } - } - - ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end); - - /* if all blocks are free there should be no used space */ - if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) { - ASSERT3U(ba.ba_phys->bt_bytes, ==, 0); - ASSERT3U(ba.ba_phys->bt_comp, ==, 0); - ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0); - } - - dmu_buf_rele(db, FTAG); - - return (err); -} diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c index 145cc01c67dd..9c4e0296db2b 100644 --- a/uts/common/fs/zfs/dbuf.c +++ b/uts/common/fs/zfs/dbuf.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -228,7 +226,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db) boolean_t is_metadata; DB_DNODE_ENTER(db); - is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type); + is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata; DB_DNODE_EXIT(db); return (is_metadata); @@ -1302,17 +1300,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * it, since one of the current holders may be in the * middle of an update. Note that users of dbuf_undirty() * should not place a hold on the dbuf before the call. - * Also note: we can get here with a spill block, so - * test for that similar to how dbuf_dirty does. */ if (refcount_count(&db->db_holds) > db->db_dirtycnt) { mutex_exit(&db->db_mtx); /* Make sure we don't toss this buffer at sync phase */ - if (db->db_blkid != DMU_SPILL_BLKID) { - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); - } + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); DB_DNODE_EXIT(db); return (0); } @@ -1325,18 +1319,11 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) *drp = dr->dr_next; - /* - * Note that there are three places in dbuf_dirty() - * where this dirty record may be put on a list. - * Make sure to do a list_remove corresponding to - * every one of those list_insert calls. - */ if (dr->dr_parent) { mutex_enter(&dr->dr_parent->dt.di.dr_mtx); list_remove(&dr->dr_parent->dt.di.dr_children, dr); mutex_exit(&dr->dr_parent->dt.di.dr_mtx); - } else if (db->db_blkid == DMU_SPILL_BLKID || - db->db_level+1 == dn->dn_nlevels) { + } else if (db->db_level+1 == dn->dn_nlevels) { ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); mutex_enter(&dn->dn_mtx); list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); diff --git a/uts/common/fs/zfs/ddt.c b/uts/common/fs/zfs/ddt.c index b3ec3ccbd27c..718331496765 100644 --- a/uts/common/fs/zfs/ddt.c +++ b/uts/common/fs/zfs/ddt.c @@ -21,7 +21,6 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -1062,9 +1061,11 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); if (spa->spa_ddt_stat_object == 0) { - spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os, - DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_DDT_STATS, tx); + spa->spa_ddt_stat_object = zap_create(ddt->ddt_os, + DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, + &spa->spa_ddt_stat_object, tx) == 0); } while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { diff --git a/uts/common/fs/zfs/dmu.c b/uts/common/fs/zfs/dmu.c index 94fa52f40d4f..39234eba53b2 100644 --- a/uts/common/fs/zfs/dmu.c +++ b/uts/common/fs/zfs/dmu.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -47,73 +46,60 @@ #endif const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { - { DMU_BSWAP_UINT8, TRUE, "unallocated" }, - { DMU_BSWAP_ZAP, TRUE, "object directory" }, - { DMU_BSWAP_UINT64, TRUE, "object array" }, - { DMU_BSWAP_UINT8, TRUE, "packed nvlist" }, - { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj header" }, - { DMU_BSWAP_UINT64, TRUE, "SPA space map header" }, - { DMU_BSWAP_UINT64, TRUE, "SPA space map" }, - { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" }, - { DMU_BSWAP_DNODE, TRUE, "DMU dnode" }, - { DMU_BSWAP_OBJSET, TRUE, "DMU objset" }, - { DMU_BSWAP_UINT64, TRUE, "DSL directory" }, - { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"}, - { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" }, - { DMU_BSWAP_ZAP, TRUE, "DSL props" }, - { DMU_BSWAP_UINT64, TRUE, "DSL dataset" }, - { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" }, - { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" }, - { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS directory" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS master node" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" }, - { DMU_BSWAP_UINT8, FALSE, "zvol object" }, - { DMU_BSWAP_ZAP, TRUE, "zvol prop" }, - { DMU_BSWAP_UINT8, FALSE, "other uint8[]" }, - { DMU_BSWAP_UINT64, FALSE, "other uint64[]" }, - { DMU_BSWAP_ZAP, TRUE, "other ZAP" }, - { DMU_BSWAP_ZAP, TRUE, "persistent error log" }, - { DMU_BSWAP_UINT8, TRUE, "SPA history" }, - { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" }, - { DMU_BSWAP_ZAP, TRUE, "Pool properties" }, - { DMU_BSWAP_ZAP, TRUE, "DSL permissions" }, - { DMU_BSWAP_ACL, TRUE, "ZFS ACL" }, - { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" }, - { DMU_BSWAP_UINT8, TRUE, "FUID table" }, - { DMU_BSWAP_UINT64, TRUE, "FUID table size" }, - { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"}, - { DMU_BSWAP_ZAP, TRUE, "scan work queue" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" }, - { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"}, - { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" }, - { DMU_BSWAP_ZAP, TRUE, "DDT statistics" }, - { DMU_BSWAP_UINT8, TRUE, "System attributes" }, - { DMU_BSWAP_ZAP, TRUE, "SA master node" }, - { DMU_BSWAP_ZAP, TRUE, "SA attr registration" }, - { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" }, - { DMU_BSWAP_ZAP, TRUE, "scan translations" }, - { DMU_BSWAP_UINT8, FALSE, "deduplicated block" }, - { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" }, - { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" }, - { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" } -}; - -const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { - { byteswap_uint8_array, "uint8" }, - { byteswap_uint16_array, "uint16" }, - { byteswap_uint32_array, "uint32" }, - { byteswap_uint64_array, "uint64" }, - { zap_byteswap, "zap" }, - { dnode_buf_byteswap, "dnode" }, - { dmu_objset_byteswap, "objset" }, - { zfs_znode_byteswap, "znode" }, - { zfs_oldacl_byteswap, "oldacl" }, - { zfs_acl_byteswap, "acl" } + { byteswap_uint8_array, TRUE, "unallocated" }, + { zap_byteswap, TRUE, "object directory" }, + { byteswap_uint64_array, TRUE, "object array" }, + { byteswap_uint8_array, TRUE, "packed nvlist" }, + { byteswap_uint64_array, TRUE, "packed nvlist size" }, + { byteswap_uint64_array, TRUE, "bpobj" }, + { byteswap_uint64_array, TRUE, "bpobj header" }, + { byteswap_uint64_array, TRUE, "SPA space map header" }, + { byteswap_uint64_array, TRUE, "SPA space map" }, + { byteswap_uint64_array, TRUE, "ZIL intent log" }, + { dnode_buf_byteswap, TRUE, "DMU dnode" }, + { dmu_objset_byteswap, TRUE, "DMU objset" }, + { byteswap_uint64_array, TRUE, "DSL directory" }, + { zap_byteswap, TRUE, "DSL directory child map"}, + { zap_byteswap, TRUE, "DSL dataset snap map" }, + { zap_byteswap, TRUE, "DSL props" }, + { byteswap_uint64_array, TRUE, "DSL dataset" }, + { zfs_znode_byteswap, TRUE, "ZFS znode" }, + { zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" }, + { byteswap_uint8_array, FALSE, "ZFS plain file" }, + { zap_byteswap, TRUE, "ZFS directory" }, + { zap_byteswap, TRUE, "ZFS master node" }, + { zap_byteswap, TRUE, "ZFS delete queue" }, + { byteswap_uint8_array, FALSE, "zvol object" }, + { zap_byteswap, TRUE, "zvol prop" }, + { byteswap_uint8_array, FALSE, "other uint8[]" }, + { byteswap_uint64_array, FALSE, "other uint64[]" }, + { zap_byteswap, TRUE, "other ZAP" }, + { zap_byteswap, TRUE, "persistent error log" }, + { byteswap_uint8_array, TRUE, "SPA history" }, + { byteswap_uint64_array, TRUE, "SPA history offsets" }, + { zap_byteswap, TRUE, "Pool properties" }, + { zap_byteswap, TRUE, "DSL permissions" }, + { zfs_acl_byteswap, TRUE, "ZFS ACL" }, + { byteswap_uint8_array, TRUE, "ZFS SYSACL" }, + { byteswap_uint8_array, TRUE, "FUID table" }, + { byteswap_uint64_array, TRUE, "FUID table size" }, + { zap_byteswap, TRUE, "DSL dataset next clones"}, + { zap_byteswap, TRUE, "scan work queue" }, + { zap_byteswap, TRUE, "ZFS user/group used" }, + { zap_byteswap, TRUE, "ZFS user/group quota" }, + { zap_byteswap, TRUE, "snapshot refcount tags"}, + { zap_byteswap, TRUE, "DDT ZAP algorithm" }, + { zap_byteswap, TRUE, "DDT statistics" }, + { byteswap_uint8_array, TRUE, "System attributes" }, + { zap_byteswap, TRUE, "SA master node" }, + { zap_byteswap, TRUE, "SA attr registration" }, + { zap_byteswap, TRUE, "SA attr layouts" }, + { zap_byteswap, TRUE, "scan translations" }, + { byteswap_uint8_array, FALSE, "deduplicated block" }, + { zap_byteswap, TRUE, "DSL deadlist map" }, + { byteswap_uint64_array, TRUE, "DSL deadlist map hdr" }, + { zap_byteswap, TRUE, "DSL dir clones" }, + { byteswap_uint64_array, TRUE, "bpobj subobj" }, }; int @@ -190,7 +176,7 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx) DB_DNODE_ENTER(db); dn = DB_DNODE(db); - if (!DMU_OT_IS_VALID(type)) { + if (type > DMU_OT_NUMTYPES) { error = EINVAL; } else if (dn->dn_bonus != db) { error = EINVAL; @@ -1517,7 +1503,7 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) { dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET; - boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) || + boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata || (wp & WP_SPILL)); enum zio_checksum checksum = os->os_checksum; enum zio_compress compress = os->os_compress; diff --git a/uts/common/fs/zfs/dmu_objset.c b/uts/common/fs/zfs/dmu_objset.c index 09c4ecf4dd58..7caebd979f02 100644 --- a/uts/common/fs/zfs/dmu_objset.c +++ b/uts/common/fs/zfs/dmu_objset.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -700,33 +699,30 @@ dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) spa_t *spa = dd->dd_pool->dp_spa; struct oscarg *oa = arg2; uint64_t obj; - dsl_dataset_t *ds; - blkptr_t *bp; ASSERT(dmu_tx_is_syncing(tx)); obj = dsl_dataset_create_sync(dd, oa->lastname, oa->clone_origin, oa->flags, oa->cr, tx); - VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds)); - bp = dsl_dataset_get_blkptr(ds); - if (BP_IS_HOLE(bp)) { - objset_t *os = - dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + if (oa->clone_origin == NULL) { + dsl_pool_t *dp = dd->dd_pool; + dsl_dataset_t *ds; + blkptr_t *bp; + objset_t *os; + + VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + ASSERT(BP_IS_HOLE(bp)); + + os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); if (oa->userfunc) oa->userfunc(os, oa->userarg, oa->cr, tx); + dsl_dataset_rele(ds, FTAG); } - if (oa->clone_origin == NULL) { - spa_history_log_internal_ds(ds, "create", tx, ""); - } else { - char namebuf[MAXNAMELEN]; - dsl_dataset_name(oa->clone_origin, namebuf); - spa_history_log_internal_ds(ds, "clone", tx, - "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object); - } - dsl_dataset_rele(ds, FTAG); + spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); } int @@ -803,40 +799,34 @@ dmu_objset_destroy(const char *name, boolean_t defer) return (error); } -typedef struct snapallarg { - dsl_sync_task_group_t *saa_dstg; - boolean_t saa_needsuspend; - nvlist_t *saa_props; - - /* the following are used only if 'temporary' is set: */ - boolean_t saa_temporary; - const char *saa_htag; - struct dsl_ds_holdarg *saa_ha; - dsl_dataset_t *saa_newds; -} snapallarg_t; - -typedef struct snaponearg { - const char *soa_longname; /* long snap name */ - const char *soa_snapname; /* short snap name */ - snapallarg_t *soa_saa; -} snaponearg_t; +struct snaparg { + dsl_sync_task_group_t *dstg; + char *snapname; + char *htag; + char failed[MAXPATHLEN]; + boolean_t recursive; + boolean_t needsuspend; + boolean_t temporary; + nvlist_t *props; + struct dsl_ds_holdarg *ha; /* only needed in the temporary case */ + dsl_dataset_t *newds; +}; static int snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; - snaponearg_t *soa = arg2; - snapallarg_t *saa = soa->soa_saa; + struct snaparg *sn = arg2; int error; /* The props have already been checked by zfs_check_userprops(). */ error = dsl_dataset_snapshot_check(os->os_dsl_dataset, - soa->soa_snapname, tx); + sn->snapname, tx); if (error) return (error); - if (saa->saa_temporary) { + if (sn->temporary) { /* * Ideally we would just call * dsl_dataset_user_hold_check() and @@ -854,13 +844,12 @@ snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) * Not checking number of tags because the tag will be * unique, as it will be the only tag. */ - if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) return (E2BIG); - saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), - KM_SLEEP); - saa->saa_ha->temphold = B_TRUE; - saa->saa_ha->htag = saa->saa_htag; + sn->ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + sn->ha->temphold = B_TRUE; + sn->ha->htag = sn->htag; } return (error); } @@ -870,25 +859,24 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; dsl_dataset_t *ds = os->os_dsl_dataset; - snaponearg_t *soa = arg2; - snapallarg_t *saa = soa->soa_saa; + struct snaparg *sn = arg2; - dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx); + dsl_dataset_snapshot_sync(ds, sn->snapname, tx); - if (saa->saa_props != NULL) { + if (sn->props) { dsl_props_arg_t pa; - pa.pa_props = saa->saa_props; + pa.pa_props = sn->props; pa.pa_source = ZPROP_SRC_LOCAL; dsl_props_set_sync(ds->ds_prev, &pa, tx); } - if (saa->saa_temporary) { + if (sn->temporary) { struct dsl_ds_destroyarg da; - dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx); - kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg)); - saa->saa_ha = NULL; - saa->saa_newds = ds->ds_prev; + dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); + kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); + sn->ha = NULL; + sn->newds = ds->ds_prev; da.ds = ds->ds_prev; da.defer = B_TRUE; @@ -897,180 +885,131 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) } static int -snapshot_one_impl(const char *snapname, void *arg) +dmu_objset_snapshot_one(const char *name, void *arg) { - char fsname[MAXPATHLEN]; - snapallarg_t *saa = arg; - snaponearg_t *soa; + struct snaparg *sn = arg; objset_t *os; int err; + char *cp; + + /* + * If the objset starts with a '%', then ignore it unless it was + * explicitly named (ie, not recursive). These hidden datasets + * are always inconsistent, and by not opening them here, we can + * avoid a race with dsl_dir_destroy_check(). + */ + cp = strrchr(name, '/'); + if (cp && cp[1] == '%' && sn->recursive) + return (0); - (void) strlcpy(fsname, snapname, sizeof (fsname)); - strchr(fsname, '@')[0] = '\0'; + (void) strcpy(sn->failed, name); + + /* + * Check permissions if we are doing a recursive snapshot. The + * permission checks for the starting dataset have already been + * performed in zfs_secpolicy_snapshot() + */ + if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) + return (err); - err = dmu_objset_hold(fsname, saa, &os); + err = dmu_objset_hold(name, sn, &os); if (err != 0) return (err); /* * If the objset is in an inconsistent state (eg, in the process - * of being destroyed), don't snapshot it. + * of being destroyed), don't snapshot it. As with %hidden + * datasets, we return EBUSY if this name was explicitly + * requested (ie, not recursive), and otherwise ignore it. */ if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_rele(os, saa); - return (EBUSY); + dmu_objset_rele(os, sn); + return (sn->recursive ? 0 : EBUSY); } - if (saa->saa_needsuspend) { + if (sn->needsuspend) { err = zil_suspend(dmu_objset_zil(os)); if (err) { - dmu_objset_rele(os, saa); + dmu_objset_rele(os, sn); return (err); } } - - soa = kmem_zalloc(sizeof (*soa), KM_SLEEP); - soa->soa_saa = saa; - soa->soa_longname = snapname; - soa->soa_snapname = strchr(snapname, '@') + 1; - - dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync, - os, soa, 3); + dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, + os, sn, 3); return (0); } -/* - * The snapshots must all be in the same pool. - */ int -dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) +dmu_objset_snapshot(char *fsname, char *snapname, char *tag, + nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) { dsl_sync_task_t *dst; - snapallarg_t saa = { 0 }; + struct snaparg sn; spa_t *spa; - int rv = 0; + minor_t minor; int err; - nvpair_t *pair; - pair = nvlist_next_nvpair(snaps, NULL); - if (pair == NULL) - return (0); + (void) strcpy(sn.failed, fsname); - err = spa_open(nvpair_name(pair), &spa, FTAG); + err = spa_open(fsname, &spa, FTAG); if (err) return (err); - saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - saa.saa_props = props; - saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - err = snapshot_one_impl(nvpair_name(pair), &saa); - if (err != 0) { - if (errors != NULL) { - fnvlist_add_int32(errors, - nvpair_name(pair), err); - } - rv = err; - } - } - - /* - * If any call to snapshot_one_impl() failed, don't execute the - * sync task. The error handling code below will clean up the - * snaponearg_t from any successful calls to - * snapshot_one_impl(). - */ - if (rv == 0) - err = dsl_sync_task_group_wait(saa.saa_dstg); - if (err != 0) - rv = err; - for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; - dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { - objset_t *os = dst->dst_arg1; - snaponearg_t *soa = dst->dst_arg2; - if (dst->dst_err != 0) { - if (errors != NULL) { - fnvlist_add_int32(errors, - soa->soa_longname, dst->dst_err); - } - rv = dst->dst_err; + if (temporary) { + if (cleanup_fd < 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { + spa_close(spa, FTAG); + return (err); } - - if (saa.saa_needsuspend) - zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, &saa); - kmem_free(soa, sizeof (*soa)); } - dsl_sync_task_group_destroy(saa.saa_dstg); - spa_close(spa, FTAG); - return (rv); -} - -int -dmu_objset_snapshot_one(const char *fsname, const char *snapname) -{ - int err; - char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); - nvlist_t *snaps = fnvlist_alloc(); - - fnvlist_add_boolean(snaps, longsnap); - err = dmu_objset_snapshot(snaps, NULL, NULL); - fnvlist_free(snaps); - strfree(longsnap); - return (err); -} - -int -dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd) -{ - dsl_sync_task_t *dst; - snapallarg_t saa = { 0 }; - spa_t *spa; - minor_t minor; - int err; - - err = spa_open(snapname, &spa, FTAG); - if (err) - return (err); - saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - saa.saa_htag = tag; - saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - saa.saa_temporary = B_TRUE; - - if (cleanup_fd < 0) { - spa_close(spa, FTAG); - return (EINVAL); - } - if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { - spa_close(spa, FTAG); - return (err); + sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + sn.snapname = snapname; + sn.htag = tag; + sn.props = props; + sn.recursive = recursive; + sn.needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + sn.temporary = temporary; + sn.ha = NULL; + sn.newds = NULL; + + if (recursive) { + err = dmu_objset_find(fsname, + dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); + } else { + err = dmu_objset_snapshot_one(fsname, &sn); } - err = snapshot_one_impl(snapname, &saa); - if (err == 0) - err = dsl_sync_task_group_wait(saa.saa_dstg); + err = dsl_sync_task_group_wait(sn.dstg); - for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; - dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { + for (dst = list_head(&sn.dstg->dstg_tasks); dst; + dst = list_next(&sn.dstg->dstg_tasks, dst)) { objset_t *os = dst->dst_arg1; - dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor); - if (saa.saa_needsuspend) + dsl_dataset_t *ds = os->os_dsl_dataset; + if (dst->dst_err) { + dsl_dataset_name(ds, sn.failed); + } else if (temporary) { + dsl_register_onexit_hold_cleanup(sn.newds, tag, minor); + } + if (sn.needsuspend) zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, &saa); + dmu_objset_rele(os, &sn); } - zfs_onexit_fd_rele(cleanup_fd); - dsl_sync_task_group_destroy(saa.saa_dstg); + if (err) + (void) strcpy(fsname, sn.failed); + if (temporary) + zfs_onexit_fd_rele(cleanup_fd); + dsl_sync_task_group_destroy(sn.dstg); spa_close(spa, FTAG); return (err); } - static void dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) { diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c index 5a2c6e2ce759..e47d533a44f4 100644 --- a/uts/common/fs/zfs/dmu_send.c +++ b/uts/common/fs/zfs/dmu_send.c @@ -20,9 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include <sys/dmu.h> @@ -47,38 +44,50 @@ #include <sys/ddt.h> #include <sys/zfs_onexit.h> -/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ -int zfs_send_corrupt_data = B_FALSE; - static char *dmu_recv_tag = "dmu_recv_tag"; +/* + * The list of data whose inclusion in a send stream can be pending from + * one call to backup_cb to another. Multiple calls to dump_free() and + * dump_freeobjects() can be aggregated into a single DRR_FREE or + * DRR_FREEOBJECTS replay record. + */ +typedef enum { + PENDING_NONE, + PENDING_FREE, + PENDING_FREEOBJECTS +} pendop_t; + +struct backuparg { + dmu_replay_record_t *drr; + vnode_t *vp; + offset_t *off; + objset_t *os; + zio_cksum_t zc; + uint64_t toguid; + int err; + pendop_t pending_op; +}; + static int -dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) +dump_bytes(struct backuparg *ba, void *buf, int len) { - dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; ssize_t resid; /* have to get resid to get detailed errno */ ASSERT3U(len % 8, ==, 0); - fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); - dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, + fletcher_4_incremental_native(buf, len, &ba->zc); + ba->err = vn_rdwr(UIO_WRITE, ba->vp, (caddr_t)buf, len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); - - mutex_enter(&ds->ds_sendstream_lock); - *dsp->dsa_off += len; - mutex_exit(&ds->ds_sendstream_lock); - - return (dsp->dsa_err); + *ba->off += len; + return (ba->err); } static int -dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, +dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, uint64_t length) { - struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); - - if (length != -1ULL && offset + length < offset) - length = -1ULL; + struct drr_free *drrf = &(ba->drr->drr_u.drr_free); /* * If there is a pending op, but it's not PENDING_FREE, push it out, @@ -87,15 +96,13 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, * other DRR_FREE records. DRR_FREEOBJECTS records can only be * aggregated with other DRR_FREEOBJECTS records. */ - if (dsp->dsa_pending_op != PENDING_NONE && - dsp->dsa_pending_op != PENDING_FREE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } - if (dsp->dsa_pending_op == PENDING_FREE) { + if (ba->pending_op == PENDING_FREE) { /* * There should never be a PENDING_FREE if length is -1 * (because dump_dnode is the only place where this @@ -113,35 +120,34 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, return (0); } else { /* not a continuation. Push out pending record */ - if (dump_bytes(dsp, dsp->dsa_drr, + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } } /* create a FREE record and make it pending */ - bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); - dsp->dsa_drr->drr_type = DRR_FREE; + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_FREE; drrf->drr_object = object; drrf->drr_offset = offset; drrf->drr_length = length; - drrf->drr_toguid = dsp->dsa_toguid; + drrf->drr_toguid = ba->toguid; if (length == -1ULL) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); } else { - dsp->dsa_pending_op = PENDING_FREE; + ba->pending_op = PENDING_FREE; } return (0); } static int -dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, +dump_data(struct backuparg *ba, dmu_object_type_t type, uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) { - struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); + struct drr_write *drrw = &(ba->drr->drr_u.drr_write); /* @@ -150,20 +156,19 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, * the stream, since aggregation can't be done across operations * of different types. */ - if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (ba->pending_op != PENDING_NONE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } /* write a DATA record */ - bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); - dsp->dsa_drr->drr_type = DRR_WRITE; + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_WRITE; drrw->drr_object = object; drrw->drr_type = type; drrw->drr_offset = offset; drrw->drr_length = blksz; - drrw->drr_toguid = dsp->dsa_toguid; + drrw->drr_toguid = ba->toguid; drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; @@ -172,43 +177,42 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - if (dump_bytes(dsp, data, blksz) != 0) + if (dump_bytes(ba, data, blksz) != 0) return (EINTR); return (0); } static int -dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) +dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) { - struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); + struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); - if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (ba->pending_op != PENDING_NONE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } /* write a SPILL record */ - bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); - dsp->dsa_drr->drr_type = DRR_SPILL; + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_SPILL; drrs->drr_object = object; drrs->drr_length = blksz; - drrs->drr_toguid = dsp->dsa_toguid; + drrs->drr_toguid = ba->toguid; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) return (EINTR); - if (dump_bytes(dsp, data, blksz)) + if (dump_bytes(ba, data, blksz)) return (EINTR); return (0); } static int -dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) +dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) { - struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); + struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, @@ -217,14 +221,13 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records * can only be aggregated with other DRR_FREEOBJECTS records. */ - if (dsp->dsa_pending_op != PENDING_NONE && - dsp->dsa_pending_op != PENDING_FREEOBJECTS) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (ba->pending_op != PENDING_NONE && + ba->pending_op != PENDING_FREEOBJECTS) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } - if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { + if (ba->pending_op == PENDING_FREEOBJECTS) { /* * See whether this free object array can be aggregated * with pending one @@ -234,43 +237,42 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) return (0); } else { /* can't be aggregated. Push out pending record */ - if (dump_bytes(dsp, dsp->dsa_drr, + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } } /* write a FREEOBJECTS record */ - bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); - dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_FREEOBJECTS; drrfo->drr_firstobj = firstobj; drrfo->drr_numobjs = numobjs; - drrfo->drr_toguid = dsp->dsa_toguid; + drrfo->drr_toguid = ba->toguid; - dsp->dsa_pending_op = PENDING_FREEOBJECTS; + ba->pending_op = PENDING_FREEOBJECTS; return (0); } static int -dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) +dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) { - struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); + struct drr_object *drro = &(ba->drr->drr_u.drr_object); if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) - return (dump_freeobjects(dsp, object, 1)); + return (dump_freeobjects(ba, object, 1)); - if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (ba->pending_op != PENDING_NONE) { + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - dsp->dsa_pending_op = PENDING_NONE; + ba->pending_op = PENDING_NONE; } /* write an OBJECT record */ - bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); - dsp->dsa_drr->drr_type = DRR_OBJECT; + bzero(ba->drr, sizeof (dmu_replay_record_t)); + ba->drr->drr_type = DRR_OBJECT; drro->drr_object = object; drro->drr_type = dnp->dn_type; drro->drr_bonustype = dnp->dn_bonustype; @@ -278,19 +280,19 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) drro->drr_bonuslen = dnp->dn_bonuslen; drro->drr_checksumtype = dnp->dn_checksum; drro->drr_compress = dnp->dn_compress; - drro->drr_toguid = dsp->dsa_toguid; + drro->drr_toguid = ba->toguid; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) + if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) return (EINTR); /* free anything past the end of the file */ - if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * + if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) return (EINTR); - if (dsp->dsa_err) + if (ba->err) return (EINTR); return (0); } @@ -304,7 +306,7 @@ static int backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { - dmu_sendarg_t *dsp = arg; + struct backuparg *ba = arg; dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; int err = 0; @@ -317,10 +319,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { uint64_t span = BP_SPAN(dnp, zb->zb_level); uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; - err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); + err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); } else if (bp == NULL) { uint64_t span = BP_SPAN(dnp, zb->zb_level); - err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); + err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { @@ -339,7 +341,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, for (i = 0; i < blksz >> DNODE_SHIFT; i++) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; - err = dump_dnode(dsp, dnobj, blk+i); + err = dump_dnode(ba, dnobj, blk+i); if (err) break; } @@ -354,7 +356,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) return (EIO); - err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } else { /* it's a level-0 block of a regular object */ uint32_t aflags = ARC_WAIT; @@ -363,22 +365,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL, &aflags, zb) != 0) { - if (zfs_send_corrupt_data) { - /* Send a block filled with 0x"zfs badd bloc" */ - abuf = arc_buf_alloc(spa, blksz, &abuf, - ARC_BUFC_DATA); - uint64_t *ptr; - for (ptr = abuf->b_data; - (char *)ptr < (char *)abuf->b_data + blksz; - ptr++) - *ptr = 0x2f5baddb10c; - } else { - return (EIO); - } - } + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) + return (EIO); - err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, + err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } @@ -387,53 +377,14 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, return (err); } -/* - * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. - * For example, they could both be snapshots of the same filesystem, and - * 'earlier' is before 'later'. Or 'earlier' could be the origin of - * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's - * filesystem. Or 'earlier' could be the origin's origin. - */ -static boolean_t -is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) -{ - dsl_pool_t *dp = later->ds_dir->dd_pool; - int error; - boolean_t ret; - dsl_dataset_t *origin; - - if (earlier->ds_phys->ds_creation_txg >= - later->ds_phys->ds_creation_txg) - return (B_FALSE); - - if (later->ds_dir == earlier->ds_dir) - return (B_TRUE); - if (!dsl_dir_is_clone(later->ds_dir)) - return (B_FALSE); - - rw_enter(&dp->dp_config_rwlock, RW_READER); - if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) { - rw_exit(&dp->dp_config_rwlock); - return (B_TRUE); - } - error = dsl_dataset_hold_obj(dp, - later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); - rw_exit(&dp->dp_config_rwlock); - if (error != 0) - return (B_FALSE); - ret = is_before(origin, earlier); - dsl_dataset_rele(origin, FTAG); - return (ret); -} - int -dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, - offset_t *off) +dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + vnode_t *vp, offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; dmu_replay_record_t *drr; - dmu_sendarg_t *dsp; + struct backuparg ba; int err; uint64_t fromtxg = 0; @@ -441,13 +392,30 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* - * fromsnap must be an earlier snapshot from the same fs as tosnap, - * or the origin's fs. - */ - if (fromds != NULL && !is_before(ds, fromds)) + /* fromsnap must be an earlier snapshot from the same fs as tosnap */ + if (fromds && (ds->ds_dir != fromds->ds_dir || + fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) return (EXDEV); + if (fromorigin) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (fromsnap) + return (EINVAL); + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + } else { + fromorigin = B_FALSE; + } + } + + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -457,10 +425,8 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, #ifdef _KERNEL if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { uint64_t version; - if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) { - kmem_free(drr, sizeof (dmu_replay_record_t)); + if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) return (EINVAL); - } if (version == ZPL_VERSION_SA) { DMU_SET_FEATUREFLAGS( drr->drr_u.drr_begin.drr_versioninfo, @@ -472,7 +438,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; - if (fromds != NULL && ds->ds_dir != fromds->ds_dir) + if (fromorigin) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) @@ -484,121 +450,47 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, if (fromds) fromtxg = fromds->ds_phys->ds_creation_txg; - - dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); - - dsp->dsa_drr = drr; - dsp->dsa_vp = vp; - dsp->dsa_outfd = outfd; - dsp->dsa_proc = curproc; - dsp->dsa_os = tosnap; - dsp->dsa_off = off; - dsp->dsa_toguid = ds->ds_phys->ds_guid; - ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); - dsp->dsa_pending_op = PENDING_NONE; - - mutex_enter(&ds->ds_sendstream_lock); - list_insert_head(&ds->ds_sendstreams, dsp); - mutex_exit(&ds->ds_sendstream_lock); - - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { - err = dsp->dsa_err; - goto out; + if (fromorigin) + dsl_dataset_rele(fromds, FTAG); + + ba.drr = drr; + ba.vp = vp; + ba.os = tosnap; + ba.off = off; + ba.toguid = ds->ds_phys->ds_guid; + ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); + ba.pending_op = PENDING_NONE; + + if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { + kmem_free(drr, sizeof (dmu_replay_record_t)); + return (ba.err); } err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, - backup_cb, dsp); + backup_cb, &ba); - if (dsp->dsa_pending_op != PENDING_NONE) - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) + if (ba.pending_op != PENDING_NONE) + if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) err = EINTR; if (err) { - if (err == EINTR && dsp->dsa_err) - err = dsp->dsa_err; - goto out; + if (err == EINTR && ba.err) + err = ba.err; + kmem_free(drr, sizeof (dmu_replay_record_t)); + return (err); } bzero(drr, sizeof (dmu_replay_record_t)); drr->drr_type = DRR_END; - drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; - drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; + drr->drr_u.drr_end.drr_checksum = ba.zc; + drr->drr_u.drr_end.drr_toguid = ba.toguid; - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { - err = dsp->dsa_err; - goto out; + if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { + kmem_free(drr, sizeof (dmu_replay_record_t)); + return (ba.err); } -out: - mutex_enter(&ds->ds_sendstream_lock); - list_remove(&ds->ds_sendstreams, dsp); - mutex_exit(&ds->ds_sendstream_lock); - kmem_free(drr, sizeof (dmu_replay_record_t)); - kmem_free(dsp, sizeof (dmu_sendarg_t)); - - return (err); -} - -int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) -{ - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - int err; - uint64_t size; - - /* tosnap must be a snapshot */ - if (ds->ds_phys->ds_next_snap_obj == 0) - return (EINVAL); - - /* - * fromsnap must be an earlier snapshot from the same fs as tosnap, - * or the origin's fs. - */ - if (fromds != NULL && !is_before(ds, fromds)) - return (EXDEV); - - /* Get uncompressed size estimate of changed data. */ - if (fromds == NULL) { - size = ds->ds_phys->ds_uncompressed_bytes; - } else { - uint64_t used, comp; - err = dsl_dataset_space_written(fromds, ds, - &used, &comp, &size); - if (err) - return (err); - } - - /* - * Assume that space (both on-disk and in-stream) is dominated by - * data. We will adjust for indirect blocks and the copies property, - * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). - */ - - /* - * Subtract out approximate space used by indirect blocks. - * Assume most space is used by data blocks (non-indirect, non-dnode). - * Assume all blocks are recordsize. Assume ditto blocks and - * internal fragmentation counter out compression. - * - * Therefore, space used by indirect blocks is sizeof(blkptr_t) per - * block, which we observe in practice. - */ - uint64_t recordsize; - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_prop_get_ds(ds, "recordsize", - sizeof (recordsize), 1, &recordsize, NULL); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - size -= size / recordsize * sizeof (blkptr_t); - - /* Add in the space for the record associated with each block. */ - size += size / recordsize * sizeof (dmu_replay_record_t); - - *sizep = size; return (0); } @@ -665,7 +557,8 @@ recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); } - spa_history_log_internal_ds(rbsa->ds, "receive new", tx, ""); + spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, + dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); } /* ARGSUSED */ @@ -766,7 +659,8 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds = cds; - spa_history_log_internal_ds(cds, "receive over existing", tx, ""); + spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, + dp->dp_spa, tx, "dataset = %lld", dsobj); } static boolean_t @@ -939,6 +833,61 @@ guid_compare(const void *arg1, const void *arg2) return (0); } +/* + * This function is a callback used by dmu_objset_find() (which + * enumerates the object sets) to build an avl tree that maps guids + * to datasets. The resulting table is used when processing DRR_WRITE_BYREF + * send stream records. These records, which are used in dedup'ed + * streams, do not contain data themselves, but refer to a copy + * of the data block that has already been written because it was + * earlier in the stream. That previous copy is identified by the + * guid of the dataset with the referenced data. + */ +int +find_ds_by_guid(const char *name, void *arg) +{ + avl_tree_t *guid_map = arg; + dsl_dataset_t *ds, *snapds; + guid_map_entry_t *gmep; + dsl_pool_t *dp; + int err; + uint64_t lastobj, firstobj; + + if (dsl_dataset_hold(name, FTAG, &ds) != 0) + return (0); + + dp = ds->ds_dir->dd_pool; + rw_enter(&dp->dp_config_rwlock, RW_READER); + firstobj = ds->ds_dir->dd_phys->dd_origin_obj; + lastobj = ds->ds_phys->ds_prev_snap_obj; + + while (lastobj != firstobj) { + err = dsl_dataset_hold_obj(dp, lastobj, guid_map, &snapds); + if (err) { + /* + * Skip this snapshot and move on. It's not + * clear why this would ever happen, but the + * remainder of the snapshot streadm can be + * processed. + */ + rw_exit(&dp->dp_config_rwlock); + dsl_dataset_rele(ds, FTAG); + return (0); + } + + gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); + gmep->guid = snapds->ds_phys->ds_guid; + gmep->gme_ds = snapds; + avl_add(guid_map, gmep); + lastobj = snapds->ds_phys->ds_prev_snap_obj; + } + + rw_exit(&dp->dp_config_rwlock); + dsl_dataset_rele(ds, FTAG); + + return (0); +} + static void free_guid_map_onexit(void *arg) { @@ -1076,8 +1025,8 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) void *data = NULL; if (drro->drr_type == DMU_OT_NONE || - !DMU_OT_IS_VALID(drro->drr_type) || - !DMU_OT_IS_VALID(drro->drr_bonustype) || + drro->drr_type >= DMU_OT_NUMTYPES || + drro->drr_bonustype >= DMU_OT_NUMTYPES || drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || @@ -1142,9 +1091,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) ASSERT3U(db->db_size, >=, drro->drr_bonuslen); bcopy(data, db->db_data, drro->drr_bonuslen); if (ra->byteswap) { - dmu_object_byteswap_t byteswap = - DMU_OT_BYTESWAP(drro->drr_bonustype); - dmu_ot_byteswap[byteswap].ob_func(db->db_data, + dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, drro->drr_bonuslen); } dmu_buf_rele(db, FTAG); @@ -1187,7 +1134,7 @@ restore_write(struct restorearg *ra, objset_t *os, int err; if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || - !DMU_OT_IS_VALID(drrw->drr_type)) + drrw->drr_type >= DMU_OT_NUMTYPES) return (EINVAL); data = restore_read(ra, drrw->drr_length); @@ -1206,11 +1153,8 @@ restore_write(struct restorearg *ra, objset_t *os, dmu_tx_abort(tx); return (err); } - if (ra->byteswap) { - dmu_object_byteswap_t byteswap = - DMU_OT_BYTESWAP(drrw->drr_type); - dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); - } + if (ra->byteswap) + dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); dmu_write(os, drrw->drr_object, drrw->drr_offset, drrw->drr_length, data, tx); dmu_tx_commit(tx); @@ -1426,6 +1370,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, avl_create(ra.guid_to_ds_map, guid_compare, sizeof (guid_map_entry_t), offsetof(guid_map_entry_t, avlnode)); + (void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid, + (void *)ra.guid_to_ds_map, + DS_FIND_CHILDREN); ra.err = zfs_onexit_add_cb(minor, free_guid_map_onexit, ra.guid_to_ds_map, action_handlep); @@ -1437,8 +1384,6 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, if (ra.err) goto out; } - - drc->drc_guid_to_ds_map = ra.guid_to_ds_map; } /* @@ -1574,31 +1519,6 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; - spa_history_log_internal_ds(ds, "finished receiving", tx, ""); -} - -static int -add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) -{ - dsl_pool_t *dp = ds->ds_dir->dd_pool; - uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; - dsl_dataset_t *snapds; - guid_map_entry_t *gmep; - int err; - - ASSERT(guid_map != NULL); - - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); - if (err == 0) { - gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); - gmep->guid = snapds->ds_phys->ds_guid; - gmep->gme_ds = snapds; - avl_add(guid_map, gmep); - } - - rw_exit(&dp->dp_config_rwlock); - return (err); } static int @@ -1606,7 +1526,7 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc) { struct recvendsyncarg resa; dsl_dataset_t *ds = drc->drc_logical_ds; - int err, myerr; + int err; /* * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() @@ -1641,11 +1561,8 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc) out: mutex_exit(&ds->ds_recvlock); - if (err == 0 && drc->drc_guid_to_ds_map != NULL) - (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); dsl_dataset_disown(ds, dmu_recv_tag); - myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); - ASSERT3U(myerr, ==, 0); + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); return (err); } @@ -1673,8 +1590,6 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc) /* clean up the fs we just recv'd into */ (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); } else { - if (drc->drc_guid_to_ds_map != NULL) - (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); /* release the hold from dmu_recv_begin */ dsl_dataset_disown(ds, dmu_recv_tag); } diff --git a/uts/common/fs/zfs/dmu_traverse.c b/uts/common/fs/zfs/dmu_traverse.c index bfe9e6506426..023f90e12e34 100644 --- a/uts/common/fs/zfs/dmu_traverse.c +++ b/uts/common/fs/zfs/dmu_traverse.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -54,7 +53,6 @@ typedef struct traverse_data { uint64_t td_objset; blkptr_t *td_rootbp; uint64_t td_min_txg; - zbookmark_t *td_resume; int td_flags; prefetch_data_t *td_pfd; blkptr_cb_t *td_func; @@ -130,54 +128,6 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh) zil_free(zilog); } -typedef enum resume_skip { - RESUME_SKIP_ALL, - RESUME_SKIP_NONE, - RESUME_SKIP_CHILDREN -} resume_skip_t; - -/* - * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and - * the block indicated by zb does not need to be visited at all. Returns - * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the - * resume point. This indicates that this block should be visited but not its - * children (since they must have been visited in a previous traversal). - * Otherwise returns RESUME_SKIP_NONE. - */ -static resume_skip_t -resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp, - const zbookmark_t *zb) -{ - if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) { - /* - * If we already visited this bp & everything below, - * don't bother doing it again. - */ - if (zbookmark_is_before(dnp, zb, td->td_resume)) - return (RESUME_SKIP_ALL); - - /* - * If we found the block we're trying to resume from, zero - * the bookmark out to indicate that we have resumed. - */ - ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object); - if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) { - bzero(td->td_resume, sizeof (*zb)); - if (td->td_flags & TRAVERSE_POST) - return (RESUME_SKIP_CHILDREN); - } - } - return (RESUME_SKIP_NONE); -} - -static void -traverse_pause(traverse_data_t *td, const zbookmark_t *zb) -{ - ASSERT(td->td_resume != NULL); - ASSERT3U(zb->zb_level, ==, 0); - bcopy(zb, td->td_resume, sizeof (*td->td_resume)); -} - static int traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb) @@ -187,20 +137,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, arc_buf_t *buf = NULL; prefetch_data_t *pd = td->td_pfd; boolean_t hard = td->td_flags & TRAVERSE_HARD; - boolean_t pause = B_FALSE; - - switch (resume_skip_check(td, dnp, zb)) { - case RESUME_SKIP_ALL: - return (0); - case RESUME_SKIP_CHILDREN: - goto post; - case RESUME_SKIP_NONE: - break; - default: - ASSERT(0); - } - if (BP_IS_HOLE(bp)) { + if (bp->blk_birth == 0) { err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp, td->td_arg); return (err); @@ -226,10 +164,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, td->td_arg); if (err == TRAVERSE_VISIT_NO_CHILDREN) return (0); - if (err == ERESTART) - pause = B_TRUE; /* handle pausing at a common point */ - if (err != 0) - goto post; + if (err) + return (err); } if (BP_GET_LEVEL(bp) > 0) { @@ -317,18 +253,9 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, if (buf) (void) arc_buf_remove_ref(buf, &buf); -post: if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) { err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp, td->td_arg); - if (err == ERESTART) - pause = B_TRUE; - } - - if (pause && td->td_resume != NULL) { - ASSERT3U(err, ==, ERESTART); - ASSERT(!hard); - traverse_pause(td, zb); } return (err != 0 ? err : lasterr); @@ -426,23 +353,18 @@ traverse_prefetch_thread(void *arg) * in syncing context). */ static int -traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, - uint64_t txg_start, zbookmark_t *resume, int flags, - blkptr_cb_t func, void *arg) +traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp, + uint64_t txg_start, int flags, blkptr_cb_t func, void *arg) { traverse_data_t td; prefetch_data_t pd = { 0 }; zbookmark_t czb; int err; - ASSERT(ds == NULL || objset == ds->ds_object); - ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST)); - td.td_spa = spa; - td.td_objset = objset; + td.td_objset = ds ? ds->ds_object : 0; td.td_rootbp = rootbp; td.td_min_txg = txg_start; - td.td_resume = resume; td.td_func = func; td.td_arg = arg; td.td_pfd = &pd; @@ -494,17 +416,8 @@ int traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags, blkptr_cb_t func, void *arg) { - return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object, - &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg)); -} - -int -traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, - uint64_t txg_start, zbookmark_t *resume, int flags, - blkptr_cb_t func, void *arg) -{ - return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET, - blkptr, txg_start, resume, flags, func, arg)); + return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, + &ds->ds_phys->ds_bp, txg_start, flags, func, arg)); } /* @@ -521,8 +434,8 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, boolean_t hard = (flags & TRAVERSE_HARD); /* visit the MOS */ - err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa), - txg_start, NULL, flags, func, arg); + err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa), + txg_start, flags, func, arg); if (err) return (err); diff --git a/uts/common/fs/zfs/dmu_tx.c b/uts/common/fs/zfs/dmu_tx.c index 723d62b48542..bd5c71a2265e 100644 --- a/uts/common/fs/zfs/dmu_tx.c +++ b/uts/common/fs/zfs/dmu_tx.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -48,7 +46,7 @@ dmu_tx_create_dd(dsl_dir_t *dd) { dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); tx->tx_dir = dd; - if (dd != NULL) + if (dd) tx->tx_pool = dd->dd_pool; list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), offsetof(dmu_tx_hold_t, txh_node)); @@ -675,11 +673,9 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) return; } - ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP); + ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap); if (dn->dn_maxblkid == 0 && !add) { - blkptr_t *bp; - /* * If there is only one block (i.e. this is a micro-zap) * and we are not adding anything, the accounting is simple. @@ -694,13 +690,14 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) * Use max block size here, since we don't know how much * the size will change between now and the dbuf dirty call. */ - bp = &dn->dn_phys->dn_blkptr[0]; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, - bp, bp->blk_birth)) + &dn->dn_phys->dn_blkptr[0], + dn->dn_phys->dn_blkptr[0].blk_birth)) { txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; - else + } else { txh->txh_space_towrite += SPA_MAXBLOCKSIZE; - if (!BP_IS_HOLE(bp)) + } + if (dn->dn_phys->dn_blkptr[0].blk_birth) txh->txh_space_tounref += SPA_MAXBLOCKSIZE; return; } @@ -1276,6 +1273,7 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) { dnode_t *dn; dmu_tx_hold_t *txh; + blkptr_t *bp; txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_SPILL, 0, 0); @@ -1286,18 +1284,17 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) return; /* If blkptr doesn't exist then add space to towrite */ - if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) { + bp = &dn->dn_phys->dn_spill; + if (BP_IS_HOLE(bp)) { txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + txh->txh_space_tounref = 0; } else { - blkptr_t *bp; - - bp = &dn->dn_phys->dn_spill; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, bp, bp->blk_birth)) txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; else txh->txh_space_towrite += SPA_MAXBLOCKSIZE; - if (!BP_IS_HOLE(bp)) + if (bp->blk_birth) txh->txh_space_tounref += SPA_MAXBLOCKSIZE; } } diff --git a/uts/common/fs/zfs/dnode.c b/uts/common/fs/zfs/dnode.c index 05ccf9fc62d3..850dd5816bf3 100644 --- a/uts/common/fs/zfs/dnode.c +++ b/uts/common/fs/zfs/dnode.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -194,7 +193,7 @@ dnode_verify(dnode_t *dn) ASSERT(dn->dn_objset); ASSERT(dn->dn_handle->dnh_dnode == dn); - ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type)); + ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES); if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY)) return; @@ -213,7 +212,7 @@ dnode_verify(dnode_t *dn) ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz); } ASSERT3U(dn->dn_nlevels, <=, 30); - ASSERT(DMU_OT_IS_VALID(dn->dn_type)); + ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES); ASSERT3U(dn->dn_nblkptr, >=, 1); ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR); ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN); @@ -279,10 +278,8 @@ dnode_byteswap(dnode_phys_t *dnp) */ int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t); size_t len = DN_MAX_BONUSLEN - off; - ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype)); - dmu_object_byteswap_t byteswap = - DMU_OT_BYTESWAP(dnp->dn_bonustype); - dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len); + ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES); + dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len); } /* Swap SPILL block if we have one */ @@ -410,7 +407,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, dmu_zfetch_init(&dn->dn_zfetch, dn); - ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type)); + ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES); mutex_enter(&os->os_lock); list_insert_head(&os->os_dnodes, dn); @@ -499,11 +496,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0); ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE); ASSERT(ot != DMU_OT_NONE); - ASSERT(DMU_OT_IS_VALID(ot)); + ASSERT3U(ot, <, DMU_OT_NUMTYPES); ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || (bonustype == DMU_OT_SA && bonuslen == 0) || (bonustype != DMU_OT_NONE && bonuslen != 0)); - ASSERT(DMU_OT_IS_VALID(bonustype)); + ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); ASSERT(dn->dn_type == DMU_OT_NONE); ASSERT3U(dn->dn_maxblkid, ==, 0); @@ -571,7 +568,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) || (bonustype != DMU_OT_NONE && bonuslen != 0) || (bonustype == DMU_OT_SA && bonuslen == 0)); - ASSERT(DMU_OT_IS_VALID(bonustype)); + ASSERT3U(bonustype, <, DMU_OT_NUMTYPES); ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); /* clean up any unreferenced dbufs */ diff --git a/uts/common/fs/zfs/dnode_sync.c b/uts/common/fs/zfs/dnode_sync.c index 8d817919b34a..2ee990a3b32c 100644 --- a/uts/common/fs/zfs/dnode_sync.c +++ b/uts/common/fs/zfs/dnode_sync.c @@ -18,10 +18,8 @@ * * CDDL HEADER END */ - /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -596,7 +594,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) } if (dn->dn_next_bonustype[txgoff]) { - ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff])); + ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES); dnp->dn_bonustype = dn->dn_next_bonustype[txgoff]; dn->dn_next_bonustype[txgoff] = 0; } diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c index 555797e77efe..59ac4a60947a 100644 --- a/uts/common/fs/zfs/dsl_dataset.c +++ b/uts/common/fs/zfs/dsl_dataset.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include <sys/dmu_objset.h> @@ -30,12 +28,10 @@ #include <sys/dsl_prop.h> #include <sys/dsl_synctask.h> #include <sys/dmu_traverse.h> -#include <sys/dmu_impl.h> #include <sys/dmu_tx.h> #include <sys/arc.h> #include <sys/zio.h> #include <sys/zap.h> -#include <sys/zfeature.h> #include <sys/unique.h> #include <sys/zfs_context.h> #include <sys/zfs_ioctl.h> @@ -101,7 +97,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) if (BP_IS_HOLE(bp)) return; ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); - ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); + ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); if (ds == NULL) { /* * Account for the meta-objset space in its placeholder @@ -118,7 +114,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) mutex_enter(&ds->ds_dir->dd_lock); mutex_enter(&ds->ds_lock); delta = parent_delta(ds, used); - ds->ds_phys->ds_referenced_bytes += used; + ds->ds_phys->ds_used_bytes += used; ds->ds_phys->ds_compressed_bytes += compressed; ds->ds_phys->ds_uncompressed_bytes += uncompressed; ds->ds_phys->ds_unique_bytes += used; @@ -212,8 +208,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, } } mutex_enter(&ds->ds_lock); - ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); - ds->ds_phys->ds_referenced_bytes -= used; + ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); + ds->ds_phys->ds_used_bytes -= used; ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); ds->ds_phys->ds_compressed_bytes -= compressed; ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); @@ -397,8 +393,6 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); - rw_init(&ds->ds_rwlock, 0, 0, 0); cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); @@ -406,9 +400,6 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); - list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), - offsetof(dmu_sendarg_t, dsa_link)); - if (err == 0) { err = dsl_dir_open_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); @@ -819,8 +810,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = origin->ds_phys->ds_creation_txg; - dsphys->ds_referenced_bytes = - origin->ds_phys->ds_referenced_bytes; + dsphys->ds_used_bytes = + origin->ds_phys->ds_used_bytes; dsphys->ds_compressed_bytes = origin->ds_phys->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = @@ -910,76 +901,87 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, return (dsobj); } +struct destroyarg { + dsl_sync_task_group_t *dstg; + char *snapname; + char *failed; + boolean_t defer; +}; + +static int +dsl_snapshot_destroy_one(const char *name, void *arg) +{ + struct destroyarg *da = arg; + dsl_dataset_t *ds; + int err; + char *dsname; + + dsname = kmem_asprintf("%s@%s", name, da->snapname); + err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); + strfree(dsname); + if (err == 0) { + struct dsl_ds_destroyarg *dsda; + + dsl_dataset_make_exclusive(ds, da->dstg); + dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); + dsda->ds = ds; + dsda->defer = da->defer; + dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, dsda, da->dstg, 0); + } else if (err == ENOENT) { + err = 0; + } else { + (void) strcpy(da->failed, name); + } + return (err); +} + /* - * The snapshots must all be in the same pool. + * Destroy 'snapname' in all descendants of 'fsname'. */ +#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, - nvlist_t *errlist) +dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) { int err; + struct destroyarg da; dsl_sync_task_t *dst; spa_t *spa; - nvpair_t *pair; - dsl_sync_task_group_t *dstg; - - pair = nvlist_next_nvpair(snaps, NULL); - if (pair == NULL) - return (0); - err = spa_open(nvpair_name(pair), &spa, FTAG); + err = spa_open(fsname, &spa, FTAG); if (err) return (err); - dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - dsl_dataset_t *ds; + da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + da.snapname = snapname; + da.failed = fsname; + da.defer = defer; - err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); - if (err == 0) { - struct dsl_ds_destroyarg *dsda; - - dsl_dataset_make_exclusive(ds, dstg); - dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), - KM_SLEEP); - dsda->ds = ds; - dsda->defer = defer; - dsl_sync_task_create(dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, dsda, dstg, 0); - } else if (err == ENOENT) { - err = 0; - } else { - fnvlist_add_int32(errlist, nvpair_name(pair), err); - break; - } - } + err = dmu_objset_find(fsname, + dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); if (err == 0) - err = dsl_sync_task_group_wait(dstg); + err = dsl_sync_task_group_wait(da.dstg); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { + for (dst = list_head(&da.dstg->dstg_tasks); dst; + dst = list_next(&da.dstg->dstg_tasks, dst)) { struct dsl_ds_destroyarg *dsda = dst->dst_arg1; dsl_dataset_t *ds = dsda->ds; /* - * Return the snapshots that triggered the error. + * Return the file system name that triggered the error */ - if (dst->dst_err != 0) { - char name[ZFS_MAXNAMELEN]; - dsl_dataset_name(ds, name); - fnvlist_add_int32(errlist, name, dst->dst_err); + if (dst->dst_err) { + dsl_dataset_name(ds, fsname); + *strchr(fsname, '@') = '\0'; } ASSERT3P(dsda->rm_origin, ==, NULL); - dsl_dataset_disown(ds, dstg); + dsl_dataset_disown(ds, da.dstg); kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); } - dsl_sync_task_group_destroy(dstg); + dsl_sync_task_group_destroy(da.dstg); spa_close(spa, FTAG); return (err); - } static boolean_t @@ -1048,6 +1050,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dir_t *dd; uint64_t obj; struct dsl_ds_destroyarg dsda = { 0 }; + dsl_dataset_t dummy_ds = { 0 }; dsda.ds = ds; @@ -1067,6 +1070,8 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) } dd = ds->ds_dir; + dummy_ds.ds_dir = dd; + dummy_ds.ds_object = ds->ds_object; /* * Check for errors and mark this ds as inconsistent, in @@ -1082,23 +1087,19 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) goto out; /* - * If async destruction is not enabled try to remove all objects - * while in the open context so that there is less work to do in - * the syncing context. + * remove the objects in open context, so that we won't + * have too much to do in syncing context. */ - if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, - ds->ds_phys->ds_prev_snap_txg)) { - /* - * Ignore errors, if there is not enough disk space - * we will deal with it in dsl_dataset_destroy_sync(). - */ - (void) dmu_free_object(os, obj); - } - if (err != ESRCH) - goto out; + for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, + ds->ds_phys->ds_prev_snap_txg)) { + /* + * Ignore errors, if there is not enough disk space + * we will deal with it in dsl_dataset_destroy_sync(). + */ + (void) dmu_free_object(os, obj); } + if (err != ESRCH) + goto out; /* * Only the ZIL knows how to free log blocks. @@ -1153,7 +1154,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, dd, FTAG, 0); + dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); @@ -1244,7 +1245,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) ASSERT(!dsl_dataset_is_snapshot(ds)); if (ds->ds_phys->ds_prev_snap_obj != 0) - mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; + mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; else mrs_used = 0; @@ -1252,7 +1253,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) ASSERT3U(dlused, <=, mrs_used); ds->ds_phys->ds_unique_bytes = - ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); + ds->ds_phys->ds_used_bytes - (mrs_used - dlused); if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) @@ -1328,12 +1329,14 @@ static void dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; + dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - spa_history_log_internal_ds(ds, "destroy begin", tx, ""); + spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, + "dataset = %llu", ds->ds_object); } static int @@ -1608,30 +1611,6 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, ds_next->ds_phys->ds_deadlist_obj); } -static int -old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) -{ - int err; - struct killarg ka; - - /* - * Free everything that we point to (that's born after - * the previous snapshot, if we are a clone) - * - * NB: this should be very quick, because we already - * freed all the objects in open context. - */ - ka.ds = ds; - ka.tx = tx; - err = traverse_dataset(ds, - ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, - kill_blkptr, &ka); - ASSERT3U(err, ==, 0); - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); - - return (err); -} - void dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { @@ -1658,13 +1637,9 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; - spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } - /* We need to log before removing it from the namespace. */ - spa_history_log_internal_ds(ds, "destroy", tx, ""); - /* signal any waiters that this dataset is going away */ mutex_enter(&ds->ds_lock); ds->ds_owner = dsl_reaper; @@ -1782,6 +1757,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) tx); dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, used, comp, uncomp, tx); + dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx); /* Merge our deadlist into next's and free it. */ dsl_deadlist_merge(&ds_next->ds_deadlist, @@ -1857,54 +1833,32 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) } dsl_dataset_rele(ds_next, FTAG); } else { - zfeature_info_t *async_destroy = - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; - /* * There's no next snapshot, so this is a head dataset. * Destroy the deadlist. Unless it's a clone, the * deadlist should be empty. (If it's a clone, it's * safe to ignore the deadlist contents.) */ + struct killarg ka; + dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); ds->ds_phys->ds_deadlist_obj = 0; - if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { - err = old_synchronous_dataset_destroy(ds, tx); - } else { - /* - * Move the bptree into the pool's list of trees to - * clean up and update space accounting information. - */ - uint64_t used, comp, uncomp; - - ASSERT(err == 0 || err == EBUSY); - if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { - spa_feature_incr(dp->dp_spa, async_destroy, tx); - dp->dp_bptree_obj = bptree_alloc( - dp->dp_meta_objset, tx); - VERIFY(zap_add(dp->dp_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, - &dp->dp_bptree_obj, tx) == 0); - } - - used = ds->ds_dir->dd_phys->dd_used_bytes; - comp = ds->ds_dir->dd_phys->dd_compressed_bytes; - uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; - - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || - ds->ds_phys->ds_unique_bytes == used); - - bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj, - &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, - used, comp, uncomp, tx); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, - -used, -comp, -uncomp, tx); - dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, - used, comp, uncomp, tx); - } + /* + * Free everything that we point to (that's born after + * the previous snapshot, if we are a clone) + * + * NB: this should be very quick, because we already + * freed all the objects in open context. + */ + ka.ds = ds; + ka.tx = tx; + err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, + TRAVERSE_POST, kill_blkptr, &ka); + ASSERT3U(err, ==, 0); + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || + ds->ds_phys->ds_unique_bytes == 0); if (ds->ds_prev != NULL) { if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { @@ -1959,6 +1913,8 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_dataset_rele(ds_prev, FTAG); spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); + spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, + "dataset = %llu", ds->ds_object); if (ds->ds_phys->ds_next_clones_obj != 0) { uint64_t count; @@ -2006,7 +1962,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) return (ENOSPC); /* - * Propagate any reserved space for this snapshot to other + * Propogate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) @@ -2016,9 +1972,10 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) } int -dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, - dmu_tx_t *tx) +dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { + dsl_dataset_t *ds = arg1; + const char *snapname = arg2; int err; uint64_t value; @@ -2030,7 +1987,7 @@ dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, return (EAGAIN); /* - * Check for conflicting snapshot name. + * Check for conflicting name snapshot name. */ err = dsl_dataset_snap_lookup(ds, snapname, &value); if (err == 0) @@ -2054,9 +2011,10 @@ dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, } void -dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, - dmu_tx_t *tx) +dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) { + dsl_dataset_t *ds = arg1; + const char *snapname = arg2; dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; @@ -2091,7 +2049,7 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = crtxg; dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; - dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; + dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; dsphys->ds_flags = ds->ds_phys->ds_flags; @@ -2162,7 +2120,8 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, dsl_dir_snap_cmtime_update(ds->ds_dir); - spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); + spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, + "dataset = %llu", dsobj); } void @@ -2183,86 +2142,12 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) dmu_objset_sync(ds->ds_objset, zio, tx); } -static void -get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) -{ - uint64_t count = 0; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - zap_cursor_t zc; - zap_attribute_t za; - nvlist_t *propval; - nvlist_t *val; - - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - /* - * There may me missing entries in ds_next_clones_obj - * due to a bug in a previous version of the code. - * Only trust it if it has the right number of entries. - */ - if (ds->ds_phys->ds_next_clones_obj != 0) { - ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, - &count)); - } - if (count != ds->ds_phys->ds_num_children - 1) { - goto fail; - } - for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - dsl_dataset_t *clone; - char buf[ZFS_MAXNAMELEN]; - /* - * Even though we hold the dp_config_rwlock, the dataset - * may fail to open, returning ENOENT. If there is a - * thread concurrently attempting to destroy this - * dataset, it will have the ds_rwlock held for - * RW_WRITER. Our call to dsl_dataset_hold_obj() -> - * dsl_dataset_hold_ref() will fail its - * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the - * dp_config_rwlock, and wait for the destroy progress - * and signal ds_exclusive_cv. If the destroy was - * successful, we will see that - * DSL_DATASET_IS_DESTROYED(), and return ENOENT. - */ - if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - za.za_first_integer, FTAG, &clone) != 0) - continue; - dsl_dir_name(clone->ds_dir, buf); - VERIFY(nvlist_add_boolean(val, buf) == 0); - dsl_dataset_rele(clone, FTAG); - } - zap_cursor_fini(&zc); - VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); - VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), - propval) == 0); -fail: - nvlist_free(val); - nvlist_free(propval); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); -} - void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { - uint64_t refd, avail, uobjs, aobjs, ratio; - - ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes); - - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + uint64_t refd, avail, uobjs, aobjs; - if (dsl_dataset_is_snapshot(ds)) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - ds->ds_phys->ds_unique_bytes); - get_clones_stat(ds, nv); - } else { - dsl_dir_stats(ds->ds_dir, nv); - } + dsl_dir_stats(ds->ds_dir, nv); dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); @@ -2287,26 +2172,18 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); - if (ds->ds_phys->ds_prev_snap_obj != 0) { - uint64_t written, comp, uncomp; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - dsl_dataset_t *prev; - - rw_enter(&dp->dp_config_rwlock, RW_READER); - int err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); - rw_exit(&dp->dp_config_rwlock); - if (err == 0) { - err = dsl_dataset_space_written(prev, ds, &written, - &comp, &uncomp); - dsl_dataset_rele(prev, FTAG); - if (err == 0) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, - written); - } - } + if (ds->ds_phys->ds_next_snap_obj) { + /* + * This is a snapshot; override the dd's space used with + * our unique space and compression ratio. + */ + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + ds->ds_phys->ds_unique_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, + ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes)); } - } void @@ -2315,25 +2192,27 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; - stat->dds_origin[0] = '\0'; - if (dsl_dataset_is_snapshot(ds)) { + if (ds->ds_phys->ds_next_snap_obj) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; + } - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_dir_is_clone(ds->ds_dir)) { - dsl_dataset_t *ods; + /* clone origin is really a dsl_dir thing... */ + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(ds->ds_dir)) { + dsl_dataset_t *ods; - VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_drop_ref(ods, FTAG); - } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dataset_name(ods, stat->dds_origin); + dsl_dataset_drop_ref(ods, FTAG); + } else { + stat->dds_origin[0] = '\0'; } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } uint64_t @@ -2347,7 +2226,7 @@ dsl_dataset_space(dsl_dataset_t *ds, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp) { - *refdbytesp = ds->ds_phys->ds_referenced_bytes; + *refdbytesp = ds->ds_phys->ds_used_bytes; *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; @@ -2450,8 +2329,8 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) ds->ds_snapname, 8, 1, &ds->ds_object, tx); ASSERT3U(err, ==, 0); - spa_history_log_internal_ds(ds, "rename", tx, - "-> @%s", newsnapname); + spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, + "dataset = %llu", ds->ds_object); dsl_dataset_rele(hds, FTAG); } @@ -2684,7 +2563,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) * Note however, if we stop before we reach the ORIGIN we get: * uN + kN + kN-1 + ... + kM - uM-1 */ - pa->used = origin_ds->ds_phys->ds_referenced_bytes; + pa->used = origin_ds->ds_phys->ds_used_bytes; pa->comp = origin_ds->ds_phys->ds_compressed_bytes; pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; for (snap = list_head(&pa->shared_snaps); snap; @@ -2718,7 +2597,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) * so we need to subtract out the clone origin's used space. */ if (pa->origin_origin) { - pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes; + pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; } @@ -2931,7 +2810,8 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) origin_ds->ds_phys->ds_unique_bytes = pa->unique; /* log history record */ - spa_history_log_internal_ds(hds, "promote", tx, ""); + spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, + "dataset = %llu", hds->ds_object); dsl_dir_close(odd, FTAG); } @@ -3233,8 +3113,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_deadlist_space(&csa->ohds->ds_deadlist, &odl_used, &odl_comp, &odl_uncomp); - dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used - - (csa->ohds->ds_phys->ds_referenced_bytes + odl_used); + dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - + (csa->ohds->ds_phys->ds_used_bytes + odl_used); dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + @@ -3263,8 +3143,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) } /* swap ds_*_bytes */ - SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes, - csa->cds->ds_phys->ds_referenced_bytes); + SWITCH64(csa->ohds->ds_phys->ds_used_bytes, + csa->cds->ds_phys->ds_used_bytes); SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, csa->cds->ds_phys->ds_compressed_bytes); SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, @@ -3289,9 +3169,6 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) csa->ohds->ds_phys->ds_deadlist_obj); dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); - - spa_history_log_internal_ds(csa->cds, "clone swap", tx, - "parent=%s", csa->ohds->ds_dir->dd_myname); } /* @@ -3396,9 +3273,8 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, * on-disk is over quota and there are no pending changes (which * may free up space for us). */ - if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { - if (inflight > 0 || - ds->ds_phys->ds_referenced_bytes < ds->ds_quota) + if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { + if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) error = ERESTART; else error = EDQUOT; @@ -3425,7 +3301,7 @@ dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) if (psa->psa_effective_value == 0) return (0); - if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes || + if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes || psa->psa_effective_value < ds->ds_reserved) return (ENOSPC); @@ -3448,8 +3324,9 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_quota = effective_value; - spa_history_log_internal_ds(ds, "set refquota", tx, - "refquota=%lld", (longlong_t)ds->ds_quota); + spa_history_log_internal(LOG_DS_REFQUOTA, + ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ", + (longlong_t)ds->ds_quota, ds->ds_object); } } @@ -3554,8 +3431,9 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); mutex_exit(&ds->ds_dir->dd_lock); - spa_history_log_internal_ds(ds, "set refreservation", tx, - "refreservation=%lld", (longlong_t)effective_value); + spa_history_log_internal(LOG_DS_REFRESERV, + ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu", + (longlong_t)effective_value, ds->ds_object); } int @@ -3621,7 +3499,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - const char *htag = ha->htag; + char *htag = ha->htag; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int error = 0; @@ -3655,7 +3533,7 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - const char *htag = ha->htag; + char *htag = ha->htag; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t now = gethrestime_sec(); @@ -3683,9 +3561,9 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) htag, &now, tx)); } - spa_history_log_internal_ds(ds, "hold", tx, - "tag = %s temp = %d holds now = %llu", - htag, (int)ha->temphold, ds->ds_userrefs); + spa_history_log_internal(LOG_DS_USER_HOLD, + dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, + (int)ha->temphold, ds->ds_object); } static int @@ -3892,6 +3770,7 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; + uint64_t dsobj = ds->ds_object; uint64_t refs; int error; @@ -3914,8 +3793,9 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_dataset_destroy_sync(&dsda, tag, tx); } - spa_history_log_internal_ds(ds, "release", tx, - "tag = %s refs now = %lld", ra->htag, (longlong_t)refs); + spa_history_log_internal(LOG_DS_USER_RELEASE, + dp->dp_spa, tx, "<%s> %lld dataset = %llu", + ra->htag, (longlong_t)refs, dsobj); } static int @@ -4129,7 +4009,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) } /* - * Note, this function is used as the callback for dmu_objset_find(). We + * Note, this fuction is used as the callback for dmu_objset_find(). We * always return 0 so that we will continue to find and process * inconsistent datasets, even if we encounter an error trying to * process one of them. @@ -4148,156 +4028,3 @@ dsl_destroy_inconsistent(const char *dsname, void *arg) } return (0); } - -/* - * Return (in *usedp) the amount of space written in new that is not - * present in oldsnap. New may be a snapshot or the head. Old must be - * a snapshot before new, in new's filesystem (or its origin). If not then - * fail and return EINVAL. - * - * The written space is calculated by considering two components: First, we - * ignore any freed space, and calculate the written as new's used space - * minus old's used space. Next, we add in the amount of space that was freed - * between the two snapshots, thus reducing new's used space relative to old's. - * Specifically, this is the space that was born before old->ds_creation_txg, - * and freed before new (ie. on new's deadlist or a previous deadlist). - * - * space freed [---------------------] - * snapshots ---O-------O--------O-------O------ - * oldsnap new - */ -int -dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, - uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) -{ - int err = 0; - uint64_t snapobj; - dsl_pool_t *dp = new->ds_dir->dd_pool; - - *usedp = 0; - *usedp += new->ds_phys->ds_referenced_bytes; - *usedp -= oldsnap->ds_phys->ds_referenced_bytes; - - *compp = 0; - *compp += new->ds_phys->ds_compressed_bytes; - *compp -= oldsnap->ds_phys->ds_compressed_bytes; - - *uncompp = 0; - *uncompp += new->ds_phys->ds_uncompressed_bytes; - *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; - - rw_enter(&dp->dp_config_rwlock, RW_READER); - snapobj = new->ds_object; - while (snapobj != oldsnap->ds_object) { - dsl_dataset_t *snap; - uint64_t used, comp, uncomp; - - if (snapobj == new->ds_object) { - snap = new; - } else { - err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); - if (err != 0) - break; - } - - if (snap->ds_phys->ds_prev_snap_txg == - oldsnap->ds_phys->ds_creation_txg) { - /* - * The blocks in the deadlist can not be born after - * ds_prev_snap_txg, so get the whole deadlist space, - * which is more efficient (especially for old-format - * deadlists). Unfortunately the deadlist code - * doesn't have enough information to make this - * optimization itself. - */ - dsl_deadlist_space(&snap->ds_deadlist, - &used, &comp, &uncomp); - } else { - dsl_deadlist_space_range(&snap->ds_deadlist, - 0, oldsnap->ds_phys->ds_creation_txg, - &used, &comp, &uncomp); - } - *usedp += used; - *compp += comp; - *uncompp += uncomp; - - /* - * If we get to the beginning of the chain of snapshots - * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap - * was not a snapshot of/before new. - */ - snapobj = snap->ds_phys->ds_prev_snap_obj; - if (snap != new) - dsl_dataset_rele(snap, FTAG); - if (snapobj == 0) { - err = EINVAL; - break; - } - - } - rw_exit(&dp->dp_config_rwlock); - return (err); -} - -/* - * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, - * lastsnap, and all snapshots in between are deleted. - * - * blocks that would be freed [---------------------------] - * snapshots ---O-------O--------O-------O--------O - * firstsnap lastsnap - * - * This is the set of blocks that were born after the snap before firstsnap, - * (birth > firstsnap->prev_snap_txg) and died before the snap after the - * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). - * We calculate this by iterating over the relevant deadlists (from the snap - * after lastsnap, backward to the snap after firstsnap), summing up the - * space on the deadlist that was born after the snap before firstsnap. - */ -int -dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, - dsl_dataset_t *lastsnap, - uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) -{ - int err = 0; - uint64_t snapobj; - dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; - - ASSERT(dsl_dataset_is_snapshot(firstsnap)); - ASSERT(dsl_dataset_is_snapshot(lastsnap)); - - /* - * Check that the snapshots are in the same dsl_dir, and firstsnap - * is before lastsnap. - */ - if (firstsnap->ds_dir != lastsnap->ds_dir || - firstsnap->ds_phys->ds_creation_txg > - lastsnap->ds_phys->ds_creation_txg) - return (EINVAL); - - *usedp = *compp = *uncompp = 0; - - rw_enter(&dp->dp_config_rwlock, RW_READER); - snapobj = lastsnap->ds_phys->ds_next_snap_obj; - while (snapobj != firstsnap->ds_object) { - dsl_dataset_t *ds; - uint64_t used, comp, uncomp; - - err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); - if (err != 0) - break; - - dsl_deadlist_space_range(&ds->ds_deadlist, - firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &used, &comp, &uncomp); - *usedp += used; - *compp += comp; - *uncompp += uncomp; - - snapobj = ds->ds_phys->ds_prev_snap_obj; - ASSERT3U(snapobj, !=, 0); - dsl_dataset_rele(ds, FTAG); - } - rw_exit(&dp->dp_config_rwlock); - return (err); -} diff --git a/uts/common/fs/zfs/dsl_deadlist.c b/uts/common/fs/zfs/dsl_deadlist.c index dd6db2120b31..064f8aceb8ee 100644 --- a/uts/common/fs/zfs/dsl_deadlist.c +++ b/uts/common/fs/zfs/dsl_deadlist.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <sys/dsl_dataset.h> @@ -30,26 +29,6 @@ #include <sys/zfs_context.h> #include <sys/dsl_pool.h> -/* - * Deadlist concurrency: - * - * Deadlists can only be modified from the syncing thread. - * - * Except for dsl_deadlist_insert(), it can only be modified with the - * dp_config_rwlock held with RW_WRITER. - * - * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can - * be called concurrently, from open context, with the dl_config_rwlock held - * with RW_READER. - * - * Therefore, we only need to provide locking between dsl_deadlist_insert() and - * the accessors, protecting: - * dl_phys->dl_used,comp,uncomp - * and protecting the dl_tree from being loaded. - * The locking is provided by dl_lock. Note that locking on the bpobj_t - * provides its own locking, and dl_oldfmt is immutable. - */ - static int dsl_deadlist_compare(const void *arg1, const void *arg2) { @@ -330,14 +309,14 @@ dsl_deadlist_space(dsl_deadlist_t *dl, * return space used in the range (mintxg, maxtxg]. * Includes maxtxg, does not include mintxg. * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is - * larger than any bp in the deadlist (eg. UINT64_MAX)). + * UINT64_MAX). */ void dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { - dsl_deadlist_entry_t *dle; dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; avl_index_t where; if (dl->dl_oldfmt) { @@ -346,10 +325,9 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, return; } + dsl_deadlist_load_tree(dl); *usedp = *compp = *uncompp = 0; - mutex_enter(&dl->dl_lock); - dsl_deadlist_load_tree(dl); dle_tofind.dle_mintxg = mintxg; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); /* @@ -358,7 +336,6 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, */ ASSERT(dle != NULL || avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL); - for (; dle && dle->dle_mintxg < maxtxg; dle = AVL_NEXT(&dl->dl_tree, dle)) { uint64_t used, comp, uncomp; @@ -370,7 +347,6 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, *compp += comp; *uncompp += uncomp; } - mutex_exit(&dl->dl_lock); } static void diff --git a/uts/common/fs/zfs/dsl_deleg.c b/uts/common/fs/zfs/dsl_deleg.c index ba620bd6fbed..529fb052fa75 100644 --- a/uts/common/fs/zfs/dsl_deleg.c +++ b/uts/common/fs/zfs/dsl_deleg.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -171,8 +170,10 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { - jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS, - zapobj, whokey, tx); + jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, + DMU_OT_NONE, 0, tx); + VERIFY(zap_update(mos, zapobj, + whokey, 8, 1, &jumpobj, tx) == 0); } while (permpair = nvlist_next_nvpair(perms, permpair)) { @@ -181,8 +182,10 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, jumpobj, perm, 8, 1, &n, tx) == 0); - spa_history_log_internal_dd(dd, "permission update", tx, - "%s %s", whokey, perm); + spa_history_log_internal(LOG_DS_PERM_UPDATE, + dd->dd_pool->dp_spa, tx, + "%s %s dataset = %llu", whokey, perm, + dd->dd_phys->dd_head_dataset_obj); } } } @@ -211,8 +214,10 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal_dd(dd, "permission who remove", - tx, "%s", whokey); + spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, + dd->dd_pool->dp_spa, tx, + "%s dataset = %llu", whokey, + dd->dd_phys->dd_head_dataset_obj); continue; } @@ -230,8 +235,10 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal_dd(dd, "permission remove", tx, - "%s %s", whokey, perm); + spa_history_log_internal(LOG_DS_PERM_REMOVE, + dd->dd_pool->dp_spa, tx, + "%s %s dataset = %llu", whokey, perm, + dd->dd_phys->dd_head_dataset_obj); } } } diff --git a/uts/common/fs/zfs/dsl_dir.c b/uts/common/fs/zfs/dsl_dir.c index 74c1050fabf0..1cd49c8274e8 100644 --- a/uts/common/fs/zfs/dsl_dir.c +++ b/uts/common/fs/zfs/dsl_dir.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -40,8 +39,8 @@ #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, - uint64_t value, dmu_tx_t *tx); +static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); + /* ARGSUSED */ static void @@ -448,7 +447,8 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, int dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; @@ -477,19 +477,24 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) void dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_prop_setarg_t psa; + uint64_t value = 0; uint64_t obj; dd_used_t t; ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - /* - * Remove our reservation. The impl() routine avoids setting the - * actual property, which would require the (already destroyed) ds. - */ - dsl_dir_set_reservation_sync_impl(dd, 0, tx); + /* Remove our reservation. */ + dsl_prop_setarg_init_uint64(&psa, "reservation", + (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), + &value); + psa.psa_effective_value = 0; /* predict default value */ + + dsl_dir_set_reservation_sync(ds, &psa, tx); ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0); ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); @@ -1055,8 +1060,9 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) dd->dd_phys->dd_quota = effective_value; mutex_exit(&dd->dd_lock); - spa_history_log_internal_dd(dd, "set quota", tx, - "quota=%lld", (longlong_t)effective_value); + spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa, + tx, "%lld dataset = %llu ", + (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj); } int @@ -1143,17 +1149,25 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) { + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t effective_value = psa->psa_effective_value; uint64_t used; int64_t delta; + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(dd, psa); + dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; - delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); - dd->dd_phys->dd_reserved = value; + delta = MAX(used, effective_value) - + MAX(used, dd->dd_phys->dd_reserved); + dd->dd_phys->dd_reserved = effective_value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ @@ -1161,24 +1175,10 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) delta, 0, 0, tx); } mutex_exit(&dd->dd_lock); -} - - -static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t value = psa->psa_effective_value; - - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); - dsl_dir_set_reservation_sync_impl(dd, value, tx); - - spa_history_log_internal_dd(dd, "set reservation", tx, - "reservation=%lld", (longlong_t)value); + spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa, + tx, "%lld dataset = %llu", + (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj); } int @@ -1299,15 +1299,9 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; - char namebuf[MAXNAMELEN]; ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); - /* Log this before we change the name. */ - dsl_dir_name(ra->newparent, namebuf); - spa_history_log_internal_dd(dd, "rename", tx, - "-> %s/%s", namebuf, ra->mynewname); - if (ra->newparent != dd->dd_parent) { dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, @@ -1347,6 +1341,8 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dd->dd_myname, 8, 1, &dd->dd_object, tx); ASSERT3U(err, ==, 0); + spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, + tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); } int diff --git a/uts/common/fs/zfs/dsl_pool.c b/uts/common/fs/zfs/dsl_pool.c index e9223944d1b4..700cc962865d 100644 --- a/uts/common/fs/zfs/dsl_pool.c +++ b/uts/common/fs/zfs/dsl_pool.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dsl_pool.h> @@ -40,8 +39,6 @@ #include <sys/zfs_znode.h> #include <sys/spa_impl.h> #include <sys/dsl_deadlist.h> -#include <sys/bptree.h> -#include <sys/zfeature.h> int zfs_no_write_throttle = 0; int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ @@ -102,32 +99,20 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) } int -dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) +dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); - - err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, - &dp->dp_meta_objset); - if (err != 0) - dsl_pool_close(dp); - else - *dpp = dp; - - return (err); -} - -int -dsl_pool_open(dsl_pool_t *dp) -{ - int err; dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; - ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset)); - rw_enter(&dp->dp_config_rwlock, RW_WRITER); + err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, + &dp->dp_meta_objset); + if (err) + goto out; + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); @@ -143,7 +128,7 @@ dsl_pool_open(dsl_pool_t *dp) if (err) goto out; - if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { + if (spa_version(spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; @@ -160,7 +145,7 @@ dsl_pool_open(dsl_pool_t *dp) goto out; } - if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { + if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) @@ -174,15 +159,6 @@ dsl_pool_open(dsl_pool_t *dp) dp->dp_meta_objset, obj)); } - if (spa_feature_is_active(dp->dp_spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, - &dp->dp_bptree_obj); - if (err != 0) - goto out; - } - err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); @@ -191,10 +167,15 @@ dsl_pool_open(dsl_pool_t *dp) if (err) goto out; - err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); + err = dsl_scan_init(dp, txg); out: rw_exit(&dp->dp_config_rwlock); + if (err) + dsl_pool_close(dp); + else + *dpp = dp; + return (err); } @@ -310,10 +291,7 @@ static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; - dsl_pool_t *dp = dmu_objset_pool(dl->dl_os); - rw_enter(&dp->dp_config_rwlock, RW_READER); dsl_deadlist_insert(dl, bp, tx); - rw_exit(&dp->dp_config_rwlock); return (0); } @@ -488,7 +466,7 @@ int dsl_pool_sync_context(dsl_pool_t *dp) { return (curthread == dp->dp_tx.tx_sync_thread || - spa_is_initializing(dp->dp_spa)); + spa_get_dsl(dp->dp_spa) == NULL); } uint64_t @@ -806,8 +784,11 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT(dp->dp_tmp_userrefs_obj == 0); ASSERT(dmu_tx_is_syncing(tx)); - dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS, - DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx); + dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS, + DMU_OT_NONE, 0, tx); + + VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, + sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0); } static int diff --git a/uts/common/fs/zfs/dsl_prop.c b/uts/common/fs/zfs/dsl_prop.c index 5bbe14ff691d..aa66b32e7938 100644 --- a/uts/common/fs/zfs/dsl_prop.c +++ b/uts/common/fs/zfs/dsl_prop.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -703,9 +702,11 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } - spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE || - source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx, - "%s=%s", propname, (valstr == NULL ? "" : valstr)); + spa_history_log_internal((source == ZPROP_SRC_NONE || + source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : + LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, + "%s=%s dataset = %llu", propname, + (valstr == NULL ? "" : valstr), ds->ds_object); if (tbuf != NULL) kmem_free(tbuf, ZAP_MAXVALUELEN); @@ -754,6 +755,24 @@ dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } +void +dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, + dmu_tx_t *tx) +{ + objset_t *mos = dd->dd_pool->dp_meta_objset; + uint64_t zapobj = dd->dd_phys->dd_props_zapobj; + + ASSERT(dmu_tx_is_syncing(tx)); + + VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); + + dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); + + spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, + "%s=%llu dataset = %llu", name, (u_longlong_t)val, + dd->dd_phys->dd_head_dataset_obj); +} + int dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, int intsz, int numints, const void *buf) diff --git a/uts/common/fs/zfs/dsl_scan.c b/uts/common/fs/zfs/dsl_scan.c index 8f08f04a0655..56d41083673e 100644 --- a/uts/common/fs/zfs/dsl_scan.c +++ b/uts/common/fs/zfs/dsl_scan.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dsl_scan.h> @@ -45,7 +44,6 @@ #include <sys/ddt.h> #include <sys/sa.h> #include <sys/sa_impl.h> -#include <sys/zfeature.h> #ifdef _KERNEL #include <sys/zfs_vfsops.h> #endif @@ -228,7 +226,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_scan_sync_state(scn, tx); - spa_history_log_internal(spa, "scan setup", tx, + spa_history_log_internal(LOG_POOL_SCAN, spa, tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); } @@ -277,7 +275,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) else scn->scn_phys.scn_state = DSS_CANCELED; - spa_history_log_internal(spa, "scan done", tx, + spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, "complete=%u", complete); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { @@ -384,6 +382,55 @@ dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp, priority, zio_flags, arc_flags, zb)); } +static boolean_t +bookmark_is_zero(const zbookmark_t *zb) +{ + return (zb->zb_objset == 0 && zb->zb_object == 0 && + zb->zb_level == 0 && zb->zb_blkid == 0); +} + +/* dnp is the dnode for zb1->zb_object */ +static boolean_t +bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1, + const zbookmark_t *zb2) +{ + uint64_t zb1nextL0, zb2thisobj; + + ASSERT(zb1->zb_objset == zb2->zb_objset); + ASSERT(zb2->zb_level == 0); + + /* + * A bookmark in the deadlist is considered to be after + * everything else. + */ + if (zb2->zb_object == DMU_DEADLIST_OBJECT) + return (B_TRUE); + + /* The objset_phys_t isn't before anything. */ + if (dnp == NULL) + return (B_FALSE); + + zb1nextL0 = (zb1->zb_blkid + 1) << + ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); + + zb2thisobj = zb2->zb_object ? zb2->zb_object : + zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); + + if (zb1->zb_object == DMU_META_DNODE_OBJECT) { + uint64_t nextobj = zb1nextL0 * + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT; + return (nextobj <= zb2thisobj); + } + + if (zb1->zb_object < zb2thisobj) + return (B_TRUE); + if (zb1->zb_object > zb2thisobj) + return (B_FALSE); + if (zb2->zb_object == DMU_META_DNODE_OBJECT) + return (B_FALSE); + return (zb1nextL0 <= zb2->zb_blkid); +} + static uint64_t dsl_scan_ds_maxtxg(dsl_dataset_t *ds) { @@ -415,7 +462,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb) if (scn->scn_pausing) return (B_TRUE); /* we're already pausing */ - if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) + if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark)) return (B_FALSE); /* we're resuming */ /* We only know how to resume from level-0 blocks. */ @@ -570,13 +617,13 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, /* * We never skip over user/group accounting objects (obj<0) */ - if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) && + if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) && (int64_t)zb->zb_object >= 0) { /* * If we already visited this bp & everything below (in * a prior txg sync), don't bother doing it again. */ - if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark)) + if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark)) return (B_TRUE); /* @@ -769,6 +816,22 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) return; + if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) { + /* + * For non-user-accounting blocks, we need to read the + * new bp (from a deleted snapshot, found in + * check_existing_xlation). If we used the old bp, + * pointers inside this block from before we resumed + * would be untranslated. + * + * For user-accounting blocks, we need to read the old + * bp, because we will apply the entire space delta to + * it (original untranslated -> translations from + * deleted snap -> now). + */ + bp_toread = *bp; + } + if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx, &buf) != 0) return; @@ -1333,28 +1396,19 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) zap_cursor_fini(&zc); } -static boolean_t -dsl_scan_free_should_pause(dsl_scan_t *scn) +static int +dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { + dsl_scan_t *scn = arg; uint64_t elapsed_nanosecs; elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; - return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || + + if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms && txg_sync_waiting(scn->scn_dp)) || - spa_shutting_down(scn->scn_dp->dp_spa)); -} - -static int -dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - dsl_scan_t *scn = arg; - - if (!scn->scn_is_bptree || - (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { - if (dsl_scan_free_should_pause(scn)) - return (ERESTART); - } + spa_shutting_down(scn->scn_dp->dp_spa)) + return (ERESTART); zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa, dmu_tx_get_txg(tx), bp, 0)); @@ -1379,10 +1433,6 @@ dsl_scan_active(dsl_scan_t *scn) if (scn->scn_phys.scn_state == DSS_SCANNING) return (B_TRUE); - if (spa_feature_is_active(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - return (B_TRUE); - } if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) { (void) bpobj_space(&scn->scn_dp->dp_free_bpobj, &used, &comp, &uncomp); @@ -1429,40 +1479,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) * traversing it. */ if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { - scn->scn_is_bptree = B_FALSE; scn->scn_zio_root = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); err = bpobj_iterate(&dp->dp_free_bpobj, - dsl_scan_free_block_cb, scn, tx); + dsl_scan_free_cb, scn, tx); VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); - - if (err == 0 && spa_feature_is_active(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - scn->scn_is_bptree = B_TRUE; - scn->scn_zio_root = zio_root(dp->dp_spa, NULL, - NULL, ZIO_FLAG_MUSTSUCCEED); - err = bptree_iterate(dp->dp_meta_objset, - dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, - scn, tx); - VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); - if (err != 0) - return; - - /* disable async destroy feature */ - spa_feature_decr(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx); - ASSERT(!spa_feature_is_active(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])); - VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, tx)); - VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset, - dp->dp_bptree_obj, tx)); - dp->dp_bptree_obj = 0; - } if (scn->scn_visited_this_txg) { zfs_dbgmsg("freed %llu blocks in %llums from " - "free_bpobj/bptree txg %llu", + "free_bpobj txg %llu", (longlong_t)scn->scn_visited_this_txg, (longlong_t) (gethrtime() - scn->scn_sync_start_time) / MICROSEC, @@ -1577,8 +1601,6 @@ count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp) for (i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS; int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL; - if (t & DMU_OT_NEWTYPE) - t = DMU_OT_OTHER; zfs_blkstat_t *zb = &zab->zab_type[l][t]; int equal; diff --git a/uts/common/fs/zfs/dsl_synctask.c b/uts/common/fs/zfs/dsl_synctask.c index 312423e943b5..b0818ce274d4 100644 --- a/uts/common/fs/zfs/dsl_synctask.c +++ b/uts/common/fs/zfs/dsl_synctask.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/dmu.h> @@ -86,17 +85,17 @@ top: /* Do a preliminary error check. */ dstg->dstg_err = 0; -#ifdef ZFS_DEBUG - /* - * Only check half the time, otherwise, the sync-context - * check will almost never fail. - */ - if (spa_get_random(2) == 0) - goto skip; -#endif rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); for (dst = list_head(&dstg->dstg_tasks); dst; dst = list_next(&dstg->dstg_tasks, dst)) { +#ifdef ZFS_DEBUG + /* + * Only check half the time, otherwise, the sync-context + * check will almost never fail. + */ + if (spa_get_random(2) == 0) + continue; +#endif dst->dst_err = dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); if (dst->dst_err) @@ -108,7 +107,6 @@ top: dmu_tx_commit(tx); return (dstg->dstg_err); } -skip: /* * We don't generally have many sync tasks, so pay the price of diff --git a/uts/common/fs/zfs/metaslab.c b/uts/common/fs/zfs/metaslab.c index 2f7c882c8c51..17b4b12c4ee4 100644 --- a/uts/common/fs/zfs/metaslab.c +++ b/uts/common/fs/zfs/metaslab.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -31,29 +30,10 @@ #include <sys/vdev_impl.h> #include <sys/zio.h> -/* - * Allow allocations to switch to gang blocks quickly. We do this to - * avoid having to load lots of space_maps in a given txg. There are, - * however, some cases where we want to avoid "fast" ganging and instead - * we want to do an exhaustive search of all metaslabs on this device. - * Currently we don't allow any gang, zil, or dump device related allocations - * to "fast" gang. - */ -#define CAN_FASTGANG(flags) \ - (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \ - METASLAB_GANG_AVOID))) - uint64_t metaslab_aliquot = 512ULL << 10; uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ /* - * This value defines the number of allowed allocation failures per vdev. - * If a device reaches this threshold in a given txg then we consider skipping - * allocations on that device. - */ -int zfs_mg_alloc_failures; - -/* * Metaslab debugging: when set, keeps all space maps in core to verify frees. */ static int metaslab_debug = 0; @@ -691,7 +671,7 @@ static space_map_ops_t metaslab_ndf_ops = { metaslab_ndf_fragmented }; -space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops; +space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops; /* * ========================================================================== @@ -864,7 +844,7 @@ metaslab_prefetch(metaslab_group_t *mg) } static int -metaslab_activate(metaslab_t *msp, uint64_t activation_weight) +metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) { metaslab_group_t *mg = msp->ms_group; space_map_t *sm = &msp->ms_map; @@ -897,6 +877,13 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight) mutex_exit(&mg->mg_lock); } + /* + * If we were able to load the map then make sure + * that this map is still able to satisfy our request. + */ + if (msp->ms_weight < size) + return (ENOSPC); + metaslab_group_sort(msp->ms_group, msp, msp->ms_weight | activation_weight); } @@ -1112,7 +1099,6 @@ void metaslab_sync_reassess(metaslab_group_t *mg) { vdev_t *vd = mg->mg_vd; - int64_t failures = mg->mg_alloc_failures; /* * Re-evaluate all metaslabs which have lower offsets than the @@ -1129,8 +1115,6 @@ metaslab_sync_reassess(metaslab_group_t *mg) mutex_exit(&msp->ms_lock); } - atomic_add_64(&mg->mg_alloc_failures, -failures); - /* * Prefetch the next potential metaslabs */ @@ -1155,10 +1139,9 @@ metaslab_distance(metaslab_t *msp, dva_t *dva) } static uint64_t -metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, - uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) +metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, + uint64_t min_distance, dva_t *dva, int d) { - spa_t *spa = mg->mg_vd->vdev_spa; metaslab_t *msp = NULL; uint64_t offset = -1ULL; avl_tree_t *t = &mg->mg_metaslab_tree; @@ -1179,17 +1162,11 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, mutex_enter(&mg->mg_lock); for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { - if (msp->ms_weight < asize) { - spa_dbgmsg(spa, "%s: failed to meet weight " - "requirement: vdev %llu, txg %llu, mg %p, " - "msp %p, psize %llu, asize %llu, " - "failures %llu, weight %llu", - spa_name(spa), mg->mg_vd->vdev_id, txg, - mg, msp, psize, asize, - mg->mg_alloc_failures, msp->ms_weight); + if (msp->ms_weight < size) { mutex_exit(&mg->mg_lock); return (-1ULL); } + was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; if (activation_weight == METASLAB_WEIGHT_PRIMARY) break; @@ -1208,25 +1185,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, if (msp == NULL) return (-1ULL); - /* - * If we've already reached the allowable number of failed - * allocation attempts on this metaslab group then we - * consider skipping it. We skip it only if we're allowed - * to "fast" gang, the physical size is larger than - * a gang block, and we're attempting to allocate from - * the primary metaslab. - */ - if (mg->mg_alloc_failures > zfs_mg_alloc_failures && - CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE && - activation_weight == METASLAB_WEIGHT_PRIMARY) { - spa_dbgmsg(spa, "%s: skipping metaslab group: " - "vdev %llu, txg %llu, mg %p, psize %llu, " - "asize %llu, failures %llu", spa_name(spa), - mg->mg_vd->vdev_id, txg, mg, psize, asize, - mg->mg_alloc_failures); - return (-1ULL); - } - mutex_enter(&msp->ms_lock); /* @@ -1235,7 +1193,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, * another thread may have changed the weight while we * were blocked on the metaslab lock. */ - if (msp->ms_weight < asize || (was_active && + if (msp->ms_weight < size || (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK) && activation_weight == METASLAB_WEIGHT_PRIMARY)) { mutex_exit(&msp->ms_lock); @@ -1250,16 +1208,14 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, continue; } - if (metaslab_activate(msp, activation_weight) != 0) { + if (metaslab_activate(msp, activation_weight, size) != 0) { mutex_exit(&msp->ms_lock); continue; } - if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL) + if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) break; - atomic_inc_64(&mg->mg_alloc_failures); - metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); mutex_exit(&msp->ms_lock); @@ -1268,7 +1224,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize); + space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); mutex_exit(&msp->ms_lock); @@ -1395,8 +1351,7 @@ top: asize = vdev_psize_to_asize(vd, psize); ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); - offset = metaslab_group_alloc(mg, psize, asize, txg, distance, - dva, d, flags); + offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); if (offset != -1ULL) { /* * If we've just selected this metaslab group, @@ -1408,24 +1363,18 @@ top: vdev_stat_t *vs = &vd->vdev_stat; int64_t vu, cu; - vu = (vs->vs_alloc * 100) / (vs->vs_space + 1); - cu = (mc->mc_alloc * 100) / (mc->mc_space + 1); + /* + * Determine percent used in units of 0..1024. + * (This is just to avoid floating point.) + */ + vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); + cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); /* - * Calculate how much more or less we should - * try to allocate from this device during - * this iteration around the rotor. - * For example, if a device is 80% full - * and the pool is 20% full then we should - * reduce allocations by 60% on this device. - * - * mg_bias = (20 - 80) * 512K / 100 = -307K - * - * This reduces allocations by 307K for this - * iteration. + * Bias by at most +/- 25% of the aliquot. */ mg->mg_bias = ((cu - vu) * - (int64_t)mg->mg_aliquot) / 100; + (int64_t)mg->mg_aliquot) / (1024 * 4); } if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= @@ -1539,7 +1488,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) mutex_enter(&msp->ms_lock); if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) - error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); + error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) error = ENOENT; diff --git a/uts/common/fs/zfs/rrwlock.c b/uts/common/fs/zfs/rrwlock.c index 7f9290bd44c1..4cef53f95132 100644 --- a/uts/common/fs/zfs/rrwlock.c +++ b/uts/common/fs/zfs/rrwlock.c @@ -22,9 +22,6 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ #include <sys/refcount.h> #include <sys/rrwlock.h> @@ -265,13 +262,3 @@ rrw_held(rrwlock_t *rrl, krw_t rw) return (held); } - -void -rrw_tsd_destroy(void *arg) -{ - rrw_node_t *rn = arg; - if (rn != NULL) { - panic("thread %p terminating with rrw lock %p held", - (void *)curthread, (void *)rn->rn_rrl); - } -} diff --git a/uts/common/fs/zfs/sa.c b/uts/common/fs/zfs/sa.c index 06607d784e42..4cb4546b2511 100644 --- a/uts/common/fs/zfs/sa.c +++ b/uts/common/fs/zfs/sa.c @@ -18,11 +18,8 @@ * * CDDL HEADER END */ - /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright 2011 iXsystems, Inc - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -429,9 +426,10 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, char attr_name[8]; if (sa->sa_layout_attr_obj == 0) { - sa->sa_layout_attr_obj = zap_create_link(os, - DMU_OT_SA_ATTR_LAYOUTS, - sa->sa_master_obj, SA_LAYOUTS, tx); + sa->sa_layout_attr_obj = zap_create(os, + DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1, + &sa->sa_layout_attr_obj, tx) == 0); } (void) snprintf(attr_name, sizeof (attr_name), @@ -607,14 +605,14 @@ sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, * and spill buffer. */ if (buftype == SA_BONUS && *index == -1 && - *total + P2ROUNDUP(hdrsize, 8) > + P2ROUNDUP(*total + hdrsize, 8) > (full_space - sizeof (blkptr_t))) { *index = i; done = B_TRUE; } next: - if (*total + P2ROUNDUP(hdrsize, 8) > full_space && + if (P2ROUNDUP(*total + hdrsize, 8) > full_space && buftype == SA_BONUS) *will_spill = B_TRUE; } @@ -1553,9 +1551,10 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) } if (sa->sa_reg_attr_obj == NULL) { - sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os, - DMU_OT_SA_ATTR_REGISTRATION, - sa->sa_master_obj, SA_REGISTRY, tx); + sa->sa_reg_attr_obj = zap_create(hdl->sa_os, + DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx); + VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj, + SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0); } for (i = 0; i != sa->sa_num_attrs; i++) { if (sa->sa_attr_table[i].sa_registered) diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c index 828d5e266643..b6190e4cfafe 100644 --- a/uts/common/fs/zfs/spa.c +++ b/uts/common/fs/zfs/spa.c @@ -21,8 +21,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -62,7 +60,6 @@ #include <sys/spa_boot.h> #include <sys/zfs_ioctl.h> #include <sys/dsl_scan.h> -#include <sys/zfeature.h> #ifdef _KERNEL #include <sys/bootprops.h> @@ -114,7 +111,6 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, }; -static dsl_syncfunc_t spa_sync_version; static dsl_syncfunc_t spa_sync_props; static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, @@ -169,18 +165,15 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, static void spa_prop_get_config(spa_t *spa, nvlist_t **nvp) { - vdev_t *rvd = spa->spa_root_vdev; - dsl_pool_t *pool = spa->spa_dsl_pool; uint64_t size; uint64_t alloc; - uint64_t space; uint64_t cap, version; zprop_source_t src = ZPROP_SRC_NONE; spa_config_dirent_t *dp; ASSERT(MUTEX_HELD(&spa->spa_props_lock)); - if (rvd != NULL) { + if (spa->spa_root_vdev != NULL) { alloc = metaslab_class_get_alloc(spa_normal_class(spa)); size = metaslab_class_get_space(spa_normal_class(spa)); spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); @@ -188,15 +181,6 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, size - alloc, src); - - space = 0; - for (int c = 0; c < rvd->vdev_children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; - space += tvd->vdev_max_asize - tvd->vdev_asize; - } - spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space, - src); - spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, (spa_mode(spa) == FREAD), src); @@ -207,7 +191,7 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) ddt_get_pool_dedup_ratio(spa), src); spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, - rvd->vdev_state, src); + spa->spa_root_vdev->vdev_state, src); version = spa_version(spa); if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) @@ -217,29 +201,8 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); } - if (pool != NULL) { - dsl_dir_t *freedir = pool->dp_free_dir; - - /* - * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, - * when opening pools before this version freedir will be NULL. - */ - if (freedir != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, - freedir->dd_phys->dd_used_bytes, src); - } else { - spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, - NULL, 0, src); - } - } - spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); - if (spa->spa_comment != NULL) { - spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, - 0, ZPROP_SRC_LOCAL); - } - if (spa->spa_root != NULL) spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 0, ZPROP_SRC_LOCAL); @@ -372,55 +335,25 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) nvpair_t *elem; int error = 0, reset_bootfs = 0; uint64_t objnum; - boolean_t has_feature = B_FALSE; elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + zpool_prop_t prop; + char *propname, *strval; uint64_t intval; - char *strval, *slash, *check, *fname; - const char *propname = nvpair_name(elem); - zpool_prop_t prop = zpool_name_to_prop(propname); - - switch (prop) { - case ZPROP_INVAL: - if (!zpool_prop_feature(propname)) { - error = EINVAL; - break; - } - - /* - * Sanitize the input. - */ - if (nvpair_type(elem) != DATA_TYPE_UINT64) { - error = EINVAL; - break; - } + objset_t *os; + char *slash; - if (nvpair_value_uint64(elem, &intval) != 0) { - error = EINVAL; - break; - } + propname = nvpair_name(elem); - if (intval != 0) { - error = EINVAL; - break; - } - - fname = strchr(propname, '@') + 1; - if (zfeature_lookup_name(fname, NULL) != 0) { - error = EINVAL; - break; - } - - has_feature = B_TRUE; - break; + if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) + return (EINVAL); + switch (prop) { case ZPOOL_PROP_VERSION: error = nvpair_value_uint64(elem, &intval); if (!error && - (intval < spa_version(spa) || - intval > SPA_VERSION_BEFORE_FEATURES || - has_feature)) + (intval < spa_version(spa) || intval > SPA_VERSION)) error = EINVAL; break; @@ -457,7 +390,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) error = nvpair_value_string(elem, &strval); if (!error) { - objset_t *os; uint64_t compress; if (strval == NULL || strval[0] == '\0') { @@ -530,26 +462,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) error = EINVAL; break; - case ZPOOL_PROP_COMMENT: - if ((error = nvpair_value_string(elem, &strval)) != 0) - break; - for (check = strval; *check != '\0'; check++) { - /* - * The kernel doesn't have an easy isprint() - * check. For this kernel check, we merely - * check ASCII apart from DEL. Fix this if - * there is an easy-to-use kernel isprint(). - */ - if (*check >= 0x7f) { - error = EINVAL; - break; - } - check++; - } - if (strlen(strval) > ZPROP_MAX_COMMENT) - error = E2BIG; - break; - case ZPOOL_PROP_DEDUPDITTO: if (spa_version(spa) < SPA_VERSION_DEDUP) error = ENOTSUP; @@ -607,58 +519,33 @@ int spa_prop_set(spa_t *spa, nvlist_t *nvp) { int error; - nvpair_t *elem = NULL; + nvpair_t *elem; boolean_t need_sync = B_FALSE; + zpool_prop_t prop; if ((error = spa_prop_validate(spa, nvp)) != 0) return (error); + elem = NULL; while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { - zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); + if ((prop = zpool_name_to_prop( + nvpair_name(elem))) == ZPROP_INVAL) + return (EINVAL); if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT || prop == ZPOOL_PROP_READONLY) continue; - if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) { - uint64_t ver; - - if (prop == ZPOOL_PROP_VERSION) { - VERIFY(nvpair_value_uint64(elem, &ver) == 0); - } else { - ASSERT(zpool_prop_feature(nvpair_name(elem))); - ver = SPA_VERSION_FEATURES; - need_sync = B_TRUE; - } - - /* Save time if the version is already set. */ - if (ver == spa_version(spa)) - continue; - - /* - * In addition to the pool directory object, we might - * create the pool properties object, the features for - * read object, the features for write object, or the - * feature descriptions object. - */ - error = dsl_sync_task_do(spa_get_dsl(spa), NULL, - spa_sync_version, spa, &ver, 6); - if (error) - return (error); - continue; - } - need_sync = B_TRUE; break; } - if (need_sync) { + if (need_sync) return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, - spa, nvp, 6)); - } - - return (0); + spa, nvp, 3)); + else + return (0); } /* @@ -676,43 +563,6 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) } /* - * Change the GUID for the pool. This is done so that we can later - * re-import a pool built from a clone of our own vdevs. We will modify - * the root vdev's guid, our own pool guid, and then mark all of our - * vdevs dirty. Note that we must make sure that all our vdevs are - * online when we do this, or else any vdevs that weren't present - * would be orphaned from our pool. We are also going to issue a - * sysevent to update any watchers. - */ -int -spa_change_guid(spa_t *spa) -{ - uint64_t oldguid, newguid; - uint64_t txg; - - if (!(spa_mode_global & FWRITE)) - return (EROFS); - - txg = spa_vdev_enter(spa); - - if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY) - return (spa_vdev_exit(spa, NULL, txg, ENXIO)); - - oldguid = spa_guid(spa); - newguid = spa_generate_guid(NULL); - ASSERT3U(oldguid, !=, newguid); - - spa->spa_root_vdev->vdev_guid = newguid; - spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid); - - vdev_config_dirty(spa->spa_root_vdev); - - spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); - - return (spa_vdev_exit(spa, NULL, txg, 0)); -} - -/* * ========================================================================== * SPA state manipulation (open/create/destroy/import/export) * ========================================================================== @@ -760,7 +610,7 @@ static taskq_t * spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, uint_t value) { - uint_t flags = 0; + uint_t flags = TASKQ_PREPOPULATE; boolean_t batch = B_FALSE; switch (mode) { @@ -1138,10 +988,8 @@ spa_unload(spa_t *spa) } spa->spa_spares.sav_count = 0; - for (i = 0; i < spa->spa_l2cache.sav_count; i++) { - vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); + for (i = 0; i < spa->spa_l2cache.sav_count; i++) vdev_free(spa->spa_l2cache.sav_vdevs[i]); - } if (spa->spa_l2cache.sav_vdevs) { kmem_free(spa->spa_l2cache.sav_vdevs, spa->spa_l2cache.sav_count * sizeof (void *)); @@ -1155,11 +1003,6 @@ spa_unload(spa_t *spa) spa->spa_async_suspended = 0; - if (spa->spa_comment != NULL) { - spa_strfree(spa->spa_comment); - spa->spa_comment = NULL; - } - spa_config_exit(spa, SCL_ALL, FTAG); } @@ -1369,13 +1212,11 @@ spa_load_l2cache(spa_t *spa) vd = oldvdevs[i]; if (vd != NULL) { - ASSERT(vd->vdev_isl2cache); - if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); - vdev_clear_stats(vd); - vdev_free(vd); + (void) vdev_close(vd); + spa_l2cache_remove(vd); } } @@ -1682,7 +1523,7 @@ spa_load_verify_done(zio_t *zio) int error = zio->io_error; if (error) { - if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && + if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && type != DMU_OT_INTENT_LOG) atomic_add_64(&sle->sle_meta_count, 1); else @@ -1877,7 +1718,6 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, { nvlist_t *config = spa->spa_config; char *ereport = FM_EREPORT_ZFS_POOL; - char *comment; int error; uint64_t pool_guid; nvlist_t *nvl; @@ -1885,10 +1725,6 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) return (EINVAL); - ASSERT(spa->spa_comment == NULL); - if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) - spa->spa_comment = spa_strdup(comment); - /* * Versioning wasn't explicitly added to the label until later, so if * it's not present treat it as the initial version. @@ -1904,7 +1740,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, spa_guid_exists(pool_guid, 0)) { error = EEXIST; } else { - spa->spa_config_guid = pool_guid; + spa->spa_load_guid = pool_guid; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) { @@ -1912,9 +1748,6 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, KM_SLEEP) == 0); } - nvlist_free(spa->spa_load_info); - spa->spa_load_info = fnvlist_alloc(); - gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, pool_guid, config, state, type, mosconfig, &ereport); @@ -1947,14 +1780,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, { int error = 0; nvlist_t *nvroot = NULL; - nvlist_t *label; vdev_t *rvd; uberblock_t *ub = &spa->spa_uberblock; uint64_t children, config_cache_txg = spa->spa_config_txg; int orig_mode = spa->spa_mode; int parse; uint64_t obj; - boolean_t missing_feat_write = B_FALSE; /* * If this is an untrusted config, access the pool in read-only mode. @@ -2021,7 +1852,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, */ if (type != SPA_IMPORT_ASSEMBLE) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - error = vdev_validate(rvd, mosconfig); + error = vdev_validate(rvd); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) @@ -2034,78 +1865,19 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, /* * Find the best uberblock. */ - vdev_uberblock_load(rvd, ub, &label); + vdev_uberblock_load(NULL, rvd, ub); /* * If we weren't able to find a single valid uberblock, return failure. */ - if (ub->ub_txg == 0) { - nvlist_free(label); + if (ub->ub_txg == 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); - } /* - * If the pool has an unsupported version we can't open it. + * If the pool is newer than the code, we can't open it. */ - if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { - nvlist_free(label); + if (ub->ub_version > SPA_VERSION) return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); - } - - if (ub->ub_version >= SPA_VERSION_FEATURES) { - nvlist_t *features; - - /* - * If we weren't able to find what's necessary for reading the - * MOS in the label, return failure. - */ - if (label == NULL || nvlist_lookup_nvlist(label, - ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) { - nvlist_free(label); - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, - ENXIO)); - } - - /* - * Update our in-core representation with the definitive values - * from the label. - */ - nvlist_free(spa->spa_label_features); - VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); - } - - nvlist_free(label); - - /* - * Look through entries in the label nvlist's features_for_read. If - * there is a feature listed there which we don't understand then we - * cannot open a pool. - */ - if (ub->ub_version >= SPA_VERSION_FEATURES) { - nvlist_t *unsup_feat; - - VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == - 0); - - for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, - NULL); nvp != NULL; - nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { - if (!zfeature_is_supported(nvpair_name(nvp))) { - VERIFY(nvlist_add_string(unsup_feat, - nvpair_name(nvp), "") == 0); - } - } - - if (!nvlist_empty(unsup_feat)) { - VERIFY(nvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); - nvlist_free(unsup_feat); - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); - } - - nvlist_free(unsup_feat); - } /* * If the vdev guid sum doesn't match the uberblock, we have an @@ -2139,7 +1911,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa->spa_claim_max_txg = spa->spa_first_txg; spa->spa_prev_software_version = ub->ub_software_version; - error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); + error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); if (error) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; @@ -2147,84 +1919,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (spa_version(spa) >= SPA_VERSION_FEATURES) { - boolean_t missing_feat_read = B_FALSE; - nvlist_t *unsup_feat; - - if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, - &spa->spa_feat_for_read_obj) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, - &spa->spa_feat_for_write_obj) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, - &spa->spa_feat_desc_obj) != 0) { - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - } - - VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == - 0); - - if (!feature_is_supported(spa->spa_meta_objset, - spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj, - unsup_feat)) - missing_feat_read = B_TRUE; - - if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) { - if (!feature_is_supported(spa->spa_meta_objset, - spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj, - unsup_feat)) - missing_feat_write = B_TRUE; - } - - if (!nvlist_empty(unsup_feat)) { - VERIFY(nvlist_add_nvlist(spa->spa_load_info, - ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); - } - - nvlist_free(unsup_feat); - - if (!missing_feat_read) { - fnvlist_add_boolean(spa->spa_load_info, - ZPOOL_CONFIG_CAN_RDONLY); - } - - /* - * If the state is SPA_LOAD_TRYIMPORT, our objective is - * twofold: to determine whether the pool is available for - * import in read-write mode and (if it is not) whether the - * pool is available for import in read-only mode. If the pool - * is available for import in read-write mode, it is displayed - * as available in userland; if it is not available for import - * in read-only mode, it is displayed as unavailable in - * userland. If the pool is available for import in read-only - * mode but not read-write mode, it is displayed as unavailable - * in userland with a special note that the pool is actually - * available for open in read-only mode. - * - * As a result, if the state is SPA_LOAD_TRYIMPORT and we are - * missing a feature for write, we must first determine whether - * the pool can be opened read-only before returning to - * userland in order to know whether to display the - * abovementioned note. - */ - if (missing_feat_read || (missing_feat_write && - spa_writeable(spa))) { - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, - ENOTSUP)); - } - } - - spa->spa_is_initializing = B_TRUE; - error = dsl_pool_open(spa->spa_dsl_pool); - spa->spa_is_initializing = B_FALSE; - if (error != 0) - return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); - if (!mosconfig) { uint64_t hostid; nvlist_t *policy = NULL, *nvconfig; @@ -2255,7 +1949,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, cmn_err(CE_WARN, "pool '%s' could not be " "loaded as it was last accessed by " "another system (host: %s hostid: 0x%lx). " - "See: http://illumos.org/msg/ZFS-8000-EY", + "See: http://www.sun.com/msg/ZFS-8000-EY", spa_name(spa), hostname, (unsigned long)hostid); return (EBADF); @@ -2442,7 +2136,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, nvlist_free(nvconfig); /* - * Now that we've validated the config, check the state of the + * Now that we've validate the config, check the state of the * root vdev. If it can't be opened, it indicates one or * more toplevel vdevs are faulted. */ @@ -2455,17 +2149,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, } } - if (missing_feat_write) { - ASSERT(state == SPA_LOAD_TRYIMPORT); - - /* - * At this point, we know that we can open the pool in - * read-only mode but not read-write mode. We now have enough - * information and can return to userland. - */ - return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); - } - /* * We've successfully opened the pool, verify that we're ready * to start pushing transactions. @@ -2545,12 +2228,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa_async_request(spa, SPA_ASYNC_RESILVER); /* - * Log the fact that we booted up (so that we can detect if - * we rebooted in the middle of an operation). - */ - spa_history_log_version(spa, "open"); - - /* * Delete any inconsistent datasets. */ (void) dmu_objset_find(spa_name(spa), @@ -2581,18 +2258,10 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig)); } -/* - * If spa_load() fails this function will try loading prior txg's. If - * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool - * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this - * function will not rewind the pool and will return the same error as - * spa_load(). - */ static int spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, uint64_t max_request, int rewind_flags) { - nvlist_t *loadinfo = NULL; nvlist_t *config = NULL; int load_error, rewind_error; uint64_t safe_rewind_txg; @@ -2621,18 +2290,9 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, return (load_error); } - if (state == SPA_LOAD_RECOVER) { - /* Price of rolling back is discarding txgs, including log */ + /* Price of rolling back is discarding txgs, including log */ + if (state == SPA_LOAD_RECOVER) spa_set_log_state(spa, SPA_LOG_CLEAR); - } else { - /* - * If we aren't rolling back save the load info from our first - * import attempt so that we can restore it after attempting - * to rewind. - */ - loadinfo = spa->spa_load_info; - spa->spa_load_info = fnvlist_alloc(); - } spa->spa_load_max_txg = spa->spa_last_ubsync_txg; safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; @@ -2656,20 +2316,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, if (config && (rewind_error || state != SPA_LOAD_RECOVER)) spa_config_set(spa, config); - if (state == SPA_LOAD_RECOVER) { - ASSERT3P(loadinfo, ==, NULL); - return (rewind_error); - } else { - /* Store the rewind info as part of the initial load info */ - fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, - spa->spa_load_info); - - /* Restore the initial load info */ - fnvlist_free(spa->spa_load_info); - spa->spa_load_info = loadinfo; - - return (load_error); - } + return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); } /* @@ -2939,50 +2586,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config) } } -static void -spa_add_feature_stats(spa_t *spa, nvlist_t *config) -{ - nvlist_t *features; - zap_cursor_t zc; - zap_attribute_t za; - - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - if (spa->spa_feat_for_read_obj != 0) { - for (zap_cursor_init(&zc, spa->spa_meta_objset, - spa->spa_feat_for_read_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); - } - zap_cursor_fini(&zc); - } - - if (spa->spa_feat_for_write_obj != 0) { - for (zap_cursor_init(&zc, spa->spa_meta_objset, - spa->spa_feat_for_write_obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, - za.za_first_integer)); - } - zap_cursor_fini(&zc); - } - - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, - features) == 0); - nvlist_free(features); -} - int -spa_get_stats(const char *name, nvlist_t **config, - char *altroot, size_t buflen) +spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) { int error; spa_t *spa; @@ -3017,7 +2622,6 @@ spa_get_stats(const char *name, nvlist_t **config, spa_add_spares(spa, *config); spa_add_l2cache(spa, *config); - spa_add_feature_stats(spa, *config); } } @@ -3108,7 +2712,6 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { error = ENOTBLK; - vdev_free(vd); goto out; } #endif @@ -3218,6 +2821,10 @@ spa_l2cache_drop(spa_t *spa) if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); + if (vd->vdev_isl2cache) + spa_l2cache_remove(vd); + vdev_clear_stats(vd); + (void) vdev_close(vd); } } @@ -3226,7 +2833,7 @@ spa_l2cache_drop(spa_t *spa) */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - nvlist_t *zplprops) + const char *history_str, nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; @@ -3238,7 +2845,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; uint64_t version, obj; - boolean_t has_features; /* * If this pool already exists, return failure. @@ -3264,18 +2870,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, return (error); } - has_features = B_FALSE; - for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); - elem != NULL; elem = nvlist_next_nvpair(props, elem)) { - if (zpool_prop_feature(nvpair_name(elem))) - has_features = B_TRUE; - } - - if (has_features || nvlist_lookup_uint64(props, - zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { + if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), + &version) != 0) version = SPA_VERSION; - } - ASSERT(SPA_VERSION_IS_SUPPORTED(version)); + ASSERT(version <= SPA_VERSION); spa->spa_first_txg = txg; spa->spa_uberblock.ub_txg = txg - 1; @@ -3351,10 +2949,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_l2cache.sav_sync = B_TRUE; } - spa->spa_is_initializing = B_TRUE; spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); spa->spa_meta_objset = dp->dp_meta_objset; - spa->spa_is_initializing = B_FALSE; /* * Create DDTs (dedup tables). @@ -3378,9 +2974,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, cmn_err(CE_PANIC, "failed to add pool config"); } - if (spa_version(spa) >= SPA_VERSION_FEATURES) - spa_feature_create_zap_objects(spa, tx); - if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, sizeof (uint64_t), 1, &version, tx) != 0) { @@ -3445,7 +3038,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_sync(spa, B_FALSE, B_TRUE); - spa_history_log_version(spa, "create"); + if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) + (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); + spa_history_log_version(spa, LOG_POOL_CREATE); spa->spa_minref = refcount_count(&spa->spa_refcount); @@ -3569,7 +3164,7 @@ spa_import_rootpool(char *devpath, char *devid) } #endif if (config == NULL) { - cmn_err(CE_NOTE, "Cannot read the pool label from '%s'", + cmn_err(CE_NOTE, "Can not read the pool label from '%s'", devpath); return (EIO); } @@ -3645,6 +3240,7 @@ spa_import_rootpool(char *devpath, char *devid) } error = 0; + spa_history_log_version(spa, LOG_POOL_IMPORT); out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); @@ -3706,7 +3302,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_config_sync(spa, B_FALSE, B_TRUE); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, "import"); + spa_history_log_version(spa, LOG_POOL_IMPORT); return (0); } @@ -3837,7 +3433,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, "import"); + spa_history_log_version(spa, LOG_POOL_IMPORT); return (0); } @@ -3882,8 +3478,6 @@ spa_tryimport(nvlist_t *tryconfig) state) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, spa->spa_uberblock.ub_timestamp) == 0); - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, - spa->spa_load_info) == 0); /* * If the bootfs property exists on this pool then we @@ -4222,7 +3816,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) pvd = oldvd->vdev_parent; if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, - VDEV_ALLOC_ATTACH)) != 0) + VDEV_ALLOC_ADD)) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); if (newrootvd->vdev_children != 1) @@ -4375,7 +3969,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); - spa_history_log_internal(spa, "vdev attach", NULL, + spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, @@ -4592,7 +4186,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) error = spa_vdev_exit(spa, vd, txg, 0); - spa_history_log_internal(spa, "detach", NULL, + spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, "vdev=%s", vdpath); spa_strfree(vdpath); @@ -4861,8 +4455,9 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) - spa_history_log_internal(spa, "detach", tx, - "vdev=%s", vml[c]->vdev_path); + spa_history_log_internal(LOG_POOL_VDEV_DETACH, + spa, tx, "vdev=%s", + vml[c]->vdev_path); vdev_free(vml[c]); } } @@ -4877,8 +4472,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ - spa_history_log_internal(newspa, "split", NULL, - "from pool %s", spa_name(spa)); + spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, + "split new pool %s from pool %s", newname, spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); @@ -5464,7 +5059,8 @@ spa_async_thread(spa_t *spa) * then log an internal history event. */ if (new_space != old_space) { - spa_history_log_internal(spa, "vdev online", NULL, + spa_history_log_internal(LOG_POOL_VDEV_ONLINE, + spa, NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } @@ -5599,7 +5195,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) * information. This avoids the dbuf_will_dirty() path and * saves us a pre-read to get data we don't actually care about. */ - bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); + bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); packed = kmem_alloc(bufsize, KM_SLEEP); VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, @@ -5684,25 +5280,6 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) spa_sync_nvlist(spa, spa->spa_config_object, config, tx); } -static void -spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) -{ - spa_t *spa = arg1; - uint64_t version = *(uint64_t *)arg2; - - /* - * Setting the version is special cased when first creating the pool. - */ - ASSERT(tx->tx_txg != TXG_INITIAL); - - ASSERT(version <= SPA_VERSION); - ASSERT(version >= spa_version(spa)); - - spa->spa_uberblock.ub_version = version; - vdev_config_dirty(spa->spa_root_vdev); - spa_history_log_internal(spa, "set", tx, "version=%lld", version); -} - /* * Set zpool properties. */ @@ -5712,40 +5289,32 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) spa_t *spa = arg1; objset_t *mos = spa->spa_meta_objset; nvlist_t *nvp = arg2; - nvpair_t *elem = NULL; + nvpair_t *elem; + uint64_t intval; + char *strval; + zpool_prop_t prop; + const char *propname; + zprop_type_t proptype; mutex_enter(&spa->spa_props_lock); + elem = NULL; while ((elem = nvlist_next_nvpair(nvp, elem))) { - uint64_t intval; - char *strval, *fname; - zpool_prop_t prop; - const char *propname; - zprop_type_t proptype; - zfeature_info_t *feature; - switch (prop = zpool_name_to_prop(nvpair_name(elem))) { - case ZPROP_INVAL: - /* - * We checked this earlier in spa_prop_validate(). - */ - ASSERT(zpool_prop_feature(nvpair_name(elem))); - - fname = strchr(nvpair_name(elem), '@') + 1; - VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); - - spa_feature_enable(spa, feature, tx); - spa_history_log_internal(spa, "set", tx, - "%s=enabled", nvpair_name(elem)); - break; - case ZPOOL_PROP_VERSION: - VERIFY(nvpair_value_uint64(elem, &intval) == 0); /* - * The version is synced seperatly before other - * properties and should be correct by now. + * Only set version for non-zpool-creation cases + * (set/import). spa_create() needs special care + * for version setting. */ - ASSERT3U(spa_version(spa), >=, intval); + if (tx->tx_txg != TXG_INITIAL) { + VERIFY(nvpair_value_uint64(elem, + &intval) == 0); + ASSERT(intval <= SPA_VERSION); + ASSERT(intval >= spa_version(spa)); + spa->spa_uberblock.ub_version = intval; + vdev_config_dirty(spa->spa_root_vdev); + } break; case ZPOOL_PROP_ALTROOT: @@ -5763,31 +5332,19 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) * properties. */ break; - case ZPOOL_PROP_COMMENT: - VERIFY(nvpair_value_string(elem, &strval) == 0); - if (spa->spa_comment != NULL) - spa_strfree(spa->spa_comment); - spa->spa_comment = spa_strdup(strval); - /* - * We need to dirty the configuration on all the vdevs - * so that their labels get updated. It's unnecessary - * to do this for pool creation since the vdev's - * configuratoin has already been dirtied. - */ - if (tx->tx_txg != TXG_INITIAL) - vdev_config_dirty(spa->spa_root_vdev); - spa_history_log_internal(spa, "set", tx, - "%s=%s", nvpair_name(elem), strval); - break; default: /* * Set pool property values in the poolprops mos object. */ if (spa->spa_pool_props_object == 0) { - spa->spa_pool_props_object = - zap_create_link(mos, DMU_OT_POOL_PROPS, + VERIFY((spa->spa_pool_props_object = + zap_create(mos, DMU_OT_POOL_PROPS, + DMU_OT_NONE, 0, tx)) > 0); + + VERIFY(zap_update(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, - tx); + 8, 1, &spa->spa_pool_props_object, tx) + == 0); } /* normalize the property name */ @@ -5800,8 +5357,7 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx) == 0); - spa_history_log_internal(spa, "set", tx, - "%s=%s", nvpair_name(elem), strval); + } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { VERIFY(nvpair_value_uint64(elem, &intval) == 0); @@ -5813,8 +5369,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx) == 0); - spa_history_log_internal(spa, "set", tx, - "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } @@ -5843,6 +5397,13 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) } } + /* log internal history if this is not a zpool create */ + if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && + tx->tx_txg != TXG_INITIAL) { + spa_history_log_internal(LOG_POOL_PROPSET, + spa, tx, "%s %lld %s", + nvpair_name(elem), intval, spa_name(spa)); + } } mutex_exit(&spa->spa_props_lock); @@ -5882,11 +5443,6 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) /* Keeping the freedir open increases spa_minref */ spa->spa_minref += 3; } - - if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && - spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { - spa_feature_create_zap_objects(spa, tx); - } } /* diff --git a/uts/common/fs/zfs/spa_config.c b/uts/common/fs/zfs/spa_config.c index 366545035d6c..69d57f66dbb6 100644 --- a/uts/common/fs/zfs/spa_config.c +++ b/uts/common/fs/zfs/spa_config.c @@ -21,8 +21,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -35,7 +33,6 @@ #include <sys/utsname.h> #include <sys/systeminfo.h> #include <sys/sunddi.h> -#include <sys/zfeature.h> #ifdef _KERNEL #include <sys/kobj.h> #include <sys/zone.h> @@ -348,10 +345,6 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); - VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, - ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); - - #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ @@ -410,12 +403,6 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); - /* - * Store what's necessary for reading the MOS in the label. - */ - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, - spa->spa_label_features) == 0); - if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { ddt_histogram_t *ddh; ddt_stat_t *dds; diff --git a/uts/common/fs/zfs/spa_history.c b/uts/common/fs/zfs/spa_history.c index f2c32f548b41..212abae5b80c 100644 --- a/uts/common/fs/zfs/spa_history.c +++ b/uts/common/fs/zfs/spa_history.c @@ -21,7 +21,6 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -30,12 +29,9 @@ #include <sys/dsl_synctask.h> #include <sys/dmu_tx.h> #include <sys/dmu_objset.h> -#include <sys/dsl_dataset.h> -#include <sys/dsl_dir.h> #include <sys/utsname.h> #include <sys/cmn_err.h> #include <sys/sunddi.h> -#include <sys/cred.h> #include "zfs_comutil.h" #ifdef _KERNEL #include <sys/zone.h> @@ -105,11 +101,11 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) /* * Figure out maximum size of history log. We set it at - * 0.1% of pool size, with a max of 1G and min of 128KB. + * 1% of pool size, with a max of 32MB and min of 128KB. */ shpp->sh_phys_max_off = - metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; - shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); + metaslab_class_get_dspace(spa_normal_class(spa)) / 100; + shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20); shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); dmu_buf_rele(dbp, FTAG); @@ -179,14 +175,12 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, } static char * -spa_history_zone(void) +spa_history_zone() { #ifdef _KERNEL - if (INGLOBALZONE(curproc)) - return (NULL); return (curproc->p_zone->zone_name); #else - return (NULL); + return ("global"); #endif } @@ -198,12 +192,14 @@ static void spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) { spa_t *spa = arg1; - nvlist_t *nvl = arg2; + history_arg_t *hap = arg2; + const char *history_str = hap->ha_history_str; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; + nvlist_t *nvrecord; char *record_packed = NULL; int ret; @@ -233,35 +229,46 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) } #endif - fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); + VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, + gethrestime_sec()) == 0); + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); + if (hap->ha_zone != NULL) + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, + hap->ha_zone) == 0); #ifdef _KERNEL - fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename); + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, + utsname.nodename) == 0); #endif - if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { - zfs_dbgmsg("command: %s", - fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); - } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { - if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { - zfs_dbgmsg("txg %lld %s %s (id %llu) %s", - fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), - fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), - fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), - fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), - fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); - } else { - zfs_dbgmsg("txg %lld %s %s", - fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), - fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), - fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); - } - } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { - zfs_dbgmsg("ioctl %s", - fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); + if (hap->ha_log_type == LOG_CMD_POOL_CREATE || + hap->ha_log_type == LOG_CMD_NORMAL) { + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, + history_str) == 0); + + zfs_dbgmsg("command: %s", history_str); + } else { + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, + hap->ha_event) == 0); + VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, + tx->tx_txg) == 0); + VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, + history_str) == 0); + + zfs_dbgmsg("internal %s pool:%s txg:%llu %s", + zfs_history_event_names[hap->ha_event], spa_name(spa), + (longlong_t)tx->tx_txg, history_str); + } - record_packed = fnvlist_pack(nvl, &reclen); + VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); + record_packed = kmem_alloc(reclen, KM_SLEEP); + + VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, + NV_ENCODE_XDR, KM_SLEEP) == 0); mutex_enter(&spa->spa_history_lock); + if (hap->ha_log_type == LOG_CMD_POOL_CREATE) + VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); @@ -269,42 +276,33 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); - /* The first command is the create, which we keep forever */ - if (ret == 0 && shpp->sh_pool_create_len == 0 && - nvlist_exists(nvl, ZPOOL_HIST_CMD)) { - shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; + if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { + shpp->sh_pool_create_len += sizeof (le_len) + reclen; + shpp->sh_bof = shpp->sh_pool_create_len; } mutex_exit(&spa->spa_history_lock); - fnvlist_pack_free(record_packed, reclen); + nvlist_free(nvrecord); + kmem_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); - fnvlist_free(nvl); + + strfree(hap->ha_history_str); + if (hap->ha_zone != NULL) + strfree(hap->ha_zone); + kmem_free(hap, sizeof (history_arg_t)); } /* * Write out a history event. */ int -spa_history_log(spa_t *spa, const char *msg) -{ - int err; - nvlist_t *nvl = fnvlist_alloc(); - - fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); - err = spa_history_log_nvl(spa, nvl); - fnvlist_free(nvl); - return (err); -} - -int -spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) +spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) { + history_arg_t *ha; int err = 0; dmu_tx_t *tx; - nvlist_t *nvarg; - if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) - return (EINVAL); + ASSERT(what != LOG_INTERNAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); @@ -313,21 +311,19 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) return (err); } - nvarg = fnvlist_dup(nvl); - if (spa_history_zone() != NULL) { - fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, - spa_history_zone()); - } - fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); + ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); + ha->ha_history_str = strdup(history_str); + ha->ha_zone = strdup(spa_history_zone()); + ha->ha_log_type = what; + ha->ha_uid = crgetuid(CRED()); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, nvarg, 0, tx); + spa_history_log_sync, spa, ha, 0, tx); dmu_tx_commit(tx); - /* spa_history_log_sync will free nvl */ + /* spa_history_log_sync will free ha and strings */ return (err); - } /* @@ -344,7 +340,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) int err; /* - * If the command history doesn't exist (older pool), + * If the command history doesn't exist (older pool), * that's ok, just return ENOENT. */ if (!spa->spa_history) @@ -427,14 +423,11 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) return (err); } -/* - * The nvlist will be consumed by this call. - */ static void -log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, +log_internal(history_internal_events_t event, spa_t *spa, dmu_tx_t *tx, const char *fmt, va_list adx) { - char *msg; + history_arg_t *ha; /* * If this is part of creating a pool, not everything is @@ -443,25 +436,28 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, if (tx->tx_txg == TXG_INITIAL) return; - msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP); - (void) vsprintf(msg, fmt, adx); - fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); - strfree(msg); + ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); + ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, + KM_SLEEP); + + (void) vsprintf(ha->ha_history_str, fmt, adx); - fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); - fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); + ha->ha_log_type = LOG_INTERNAL; + ha->ha_event = event; + ha->ha_zone = NULL; + ha->ha_uid = 0; if (dmu_tx_is_syncing(tx)) { - spa_history_log_sync(spa, nvl, tx); + spa_history_log_sync(spa, ha, tx); } else { dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, nvl, 0, tx); + spa_history_log_sync, spa, ha, 0, tx); } - /* spa_history_log_sync() will free nvl */ + /* spa_history_log_sync() will free ha and strings */ } void -spa_history_log_internal(spa_t *spa, const char *operation, +spa_history_log_internal(history_internal_events_t event, spa_t *spa, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; @@ -477,7 +473,7 @@ spa_history_log_internal(spa_t *spa, const char *operation, } va_start(adx, fmt); - log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); + log_internal(event, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ @@ -486,56 +482,21 @@ spa_history_log_internal(spa_t *spa, const char *operation, } void -spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, - dmu_tx_t *tx, const char *fmt, ...) -{ - va_list adx; - char namebuf[MAXNAMELEN]; - nvlist_t *nvl = fnvlist_alloc(); - - ASSERT(tx != NULL); - - dsl_dataset_name(ds, namebuf); - fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); - fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); - - va_start(adx, fmt); - log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); - va_end(adx); -} - -void -spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, - dmu_tx_t *tx, const char *fmt, ...) -{ - va_list adx; - char namebuf[MAXNAMELEN]; - nvlist_t *nvl = fnvlist_alloc(); - - ASSERT(tx != NULL); - - dsl_dir_name(dd, namebuf); - fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); - fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, - dd->dd_phys->dd_head_dataset_obj); - - va_start(adx, fmt); - log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); - va_end(adx); -} - -void -spa_history_log_version(spa_t *spa, const char *operation) +spa_history_log_version(spa_t *spa, history_internal_events_t event) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); - spa_history_log_internal(spa, operation, NULL, - "pool version %llu; software version %llu/%d; uts %s %s %s %s", - (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, - utsname.nodename, utsname.release, utsname.version, - utsname.machine); - cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation, + if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { + spa_history_log_internal(event, spa, NULL, + "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", + (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, + utsname.nodename, utsname.release, utsname.version, + utsname.machine); + } + cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", + event == LOG_POOL_IMPORT ? "imported" : + event == LOG_POOL_CREATE ? "created" : "accessed", (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif } diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c index 9400194a93b8..1b54afb0be5e 100644 --- a/uts/common/fs/zfs/spa_misc.c +++ b/uts/common/fs/zfs/spa_misc.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/zfs_context.h> @@ -48,7 +46,6 @@ #include <sys/arc.h> #include <sys/ddt.h> #include "zfs_prop.h" -#include "zfeature_common.h" /* * SPA locking @@ -217,7 +214,7 @@ * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual * locking is, always, based on spa_namespace_lock and spa_config_lock[]. * - * spa_rename() is also implemented within this file since it requires + * spa_rename() is also implemented within this file since is requires * manipulation of the namespace. */ @@ -484,22 +481,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, KM_SLEEP) == 0); - if (config != NULL) { - nvlist_t *features; - - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, - &features) == 0) { - VERIFY(nvlist_dup(features, &spa->spa_label_features, - 0) == 0); - } - + if (config != NULL) VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); - } - - if (spa->spa_label_features == NULL) { - VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, - KM_SLEEP) == 0); - } return (spa); } @@ -536,7 +519,6 @@ spa_remove(spa_t *spa) list_destroy(&spa->spa_config_list); - nvlist_free(spa->spa_label_features); nvlist_free(spa->spa_load_info); spa_config_set(spa, NULL); @@ -1045,20 +1027,6 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) * ========================================================================== */ -void -spa_activate_mos_feature(spa_t *spa, const char *feature) -{ - (void) nvlist_add_boolean(spa->spa_label_features, feature); - vdev_config_dirty(spa->spa_root_vdev); -} - -void -spa_deactivate_mos_feature(spa_t *spa, const char *feature) -{ - (void) nvlist_remove_all(spa->spa_label_features, feature); - vdev_config_dirty(spa->spa_root_vdev); -} - /* * Rename a spa_t. */ @@ -1209,22 +1177,12 @@ spa_generate_guid(spa_t *spa) void sprintf_blkptr(char *buf, const blkptr_t *bp) { - char type[256]; + char *type = NULL; char *checksum = NULL; char *compress = NULL; if (bp != NULL) { - if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) { - dmu_object_byteswap_t bswap = - DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); - (void) snprintf(type, sizeof (type), "bswap %s %s", - DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ? - "metadata" : "data", - dmu_ot_byteswap[bswap].ob_name); - } else { - (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name, - sizeof (type)); - } + type = dmu_ot[BP_GET_TYPE(bp)].ot_name; checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name; } @@ -1306,12 +1264,6 @@ spa_get_dsl(spa_t *spa) return (spa->spa_dsl_pool); } -boolean_t -spa_is_initializing(spa_t *spa) -{ - return (spa->spa_is_initializing); -} - blkptr_t * spa_get_rootblkptr(spa_t *spa) { @@ -1351,24 +1303,13 @@ spa_guid(spa_t *spa) /* * If we fail to parse the config during spa_load(), we can go through * the error path (which posts an ereport) and end up here with no root - * vdev. We stash the original pool guid in 'spa_config_guid' to handle + * vdev. We stash the original pool guid in 'spa_load_guid' to handle * this case. */ if (spa->spa_root_vdev != NULL) return (spa->spa_root_vdev->vdev_guid); else - return (spa->spa_config_guid); -} - -uint64_t -spa_load_guid(spa_t *spa) -{ - /* - * This is a GUID that exists solely as a reference for the - * purposes of the arc. It is generated at load time, and - * is never written to persistent storage. - */ - return (spa->spa_load_guid); + return (spa->spa_load_guid); } uint64_t @@ -1595,7 +1536,6 @@ spa_init(int mode) vdev_cache_stat_init(); zfs_prop_init(); zpool_prop_init(); - zpool_feature_init(); spa_config_load(); l2arc_start(); } @@ -1730,9 +1670,3 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) return (0); } - -boolean_t -spa_debug_enabled(spa_t *spa) -{ - return (spa->spa_debug); -} diff --git a/uts/common/fs/zfs/sys/bptree.h b/uts/common/fs/zfs/sys/bptree.h deleted file mode 100644 index 971507211875..000000000000 --- a/uts/common/fs/zfs/sys/bptree.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#ifndef _SYS_BPTREE_H -#define _SYS_BPTREE_H - -#include <sys/spa.h> -#include <sys/zio.h> - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct bptree_phys { - uint64_t bt_begin; - uint64_t bt_end; - uint64_t bt_bytes; - uint64_t bt_comp; - uint64_t bt_uncomp; -} bptree_phys_t; - -typedef struct bptree_entry_phys { - blkptr_t be_bp; - uint64_t be_birth_txg; /* only delete blocks born after this txg */ - zbookmark_t be_zb; /* holds traversal resume point if needed */ -} bptree_entry_phys_t; - -typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx); - -uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx); -int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx); - -void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, - uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx); - -int bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, - bptree_itor_t func, void *arg, dmu_tx_t *tx); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_BPTREE_H */ diff --git a/uts/common/fs/zfs/sys/dmu.h b/uts/common/fs/zfs/sys/dmu.h index d60483575574..07f5949ebfea 100644 --- a/uts/common/fs/zfs/sys/dmu.h +++ b/uts/common/fs/zfs/sys/dmu.h @@ -18,12 +18,8 @@ * * CDDL HEADER END */ - /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -44,7 +40,6 @@ #include <sys/param.h> #include <sys/cred.h> #include <sys/time.h> -#include <sys/fs/zfs.h> #ifdef __cplusplus extern "C" { @@ -75,53 +70,6 @@ typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; typedef struct dsl_dir dsl_dir_t; -typedef enum dmu_object_byteswap { - DMU_BSWAP_UINT8, - DMU_BSWAP_UINT16, - DMU_BSWAP_UINT32, - DMU_BSWAP_UINT64, - DMU_BSWAP_ZAP, - DMU_BSWAP_DNODE, - DMU_BSWAP_OBJSET, - DMU_BSWAP_ZNODE, - DMU_BSWAP_OLDACL, - DMU_BSWAP_ACL, - /* - * Allocating a new byteswap type number makes the on-disk format - * incompatible with any other format that uses the same number. - * - * Data can usually be structured to work with one of the - * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types. - */ - DMU_BSWAP_NUMFUNCS -} dmu_object_byteswap_t; - -#define DMU_OT_NEWTYPE 0x80 -#define DMU_OT_METADATA 0x40 -#define DMU_OT_BYTESWAP_MASK 0x3f - -/* - * Defines a uint8_t object type. Object types specify if the data - * in the object is metadata (boolean) and how to byteswap the data - * (dmu_object_byteswap_t). - */ -#define DMU_OT(byteswap, metadata) \ - (DMU_OT_NEWTYPE | \ - ((metadata) ? DMU_OT_METADATA : 0) | \ - ((byteswap) & DMU_OT_BYTESWAP_MASK)) - -#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ - ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ - (ot) < DMU_OT_NUMTYPES) - -#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ - ((ot) & DMU_OT_METADATA) : \ - dmu_ot[(ot)].ot_metadata) - -#define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \ - ((ot) & DMU_OT_BYTESWAP_MASK) : \ - dmu_ot[(ot)].ot_byteswap) - typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ @@ -186,37 +134,19 @@ typedef enum dmu_object_type { DMU_OT_DEADLIST_HDR, /* UINT64 */ DMU_OT_DSL_CLONES, /* ZAP */ DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */ - /* - * Do not allocate new object types here. Doing so makes the on-disk - * format incompatible with any other format that uses the same object - * type number. - * - * When creating an object which does not have one of the above types - * use the DMU_OTN_* type with the correct byteswap and metadata - * values. - * - * The DMU_OTN_* types do not have entries in the dmu_ot table, - * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead - * of indexing into dmu_ot directly (this works for both DMU_OT_* types - * and DMU_OTN_* types). - */ - DMU_OT_NUMTYPES, - - /* - * Names for valid types declared with DMU_OT(). - */ - DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), - DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), - DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), - DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), - DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), - DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), - DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), - DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), - DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), - DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + DMU_OT_NUMTYPES } dmu_object_type_t; +typedef enum dmu_objset_type { + DMU_OST_NONE, + DMU_OST_META, + DMU_OST_ZFS, + DMU_OST_ZVOL, + DMU_OST_OTHER, /* For testing only! */ + DMU_OST_ANY, /* Be careful! */ + DMU_OST_NUMTYPES +} dmu_objset_type_t; + void byteswap_uint64_array(void *buf, size_t size); void byteswap_uint32_array(void *buf, size_t size); void byteswap_uint16_array(void *buf, size_t size); @@ -261,11 +191,9 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, uint64_t flags); int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, - struct nvlist *errlist); -int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *); -int dmu_objset_snapshot_one(const char *fsname, const char *snapname); -int dmu_objset_snapshot_tmp(const char *, const char *, int); +int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); +int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, + struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); int dmu_objset_rename(const char *name, const char *newname, boolean_t recursive); int dmu_objset_find(char *name, int func(const char *, void *), void *arg, @@ -286,9 +214,6 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr); */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" -#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write" -#define DMU_POOL_FEATURES_FOR_READ "features_for_read" -#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPOBJ "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" @@ -304,7 +229,6 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr); #define DMU_POOL_CREATION_VERSION "creation_version" #define DMU_POOL_SCAN "scan" #define DMU_POOL_FREE_BPOBJ "free_bpobj" -#define DMU_POOL_BPTREE_OBJ "bptree_obj" /* * Allocate an object from this objset. The range of object numbers @@ -565,7 +489,7 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, /* * Free up the data blocks for a defined range of a file. If size is - * -1, the range from offset to end-of-file is freed. + * zero, the range from offset to end-of-file is freed. */ int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); @@ -635,18 +559,12 @@ typedef struct dmu_object_info { typedef void arc_byteswap_func_t(void *buf, size_t size); typedef struct dmu_object_type_info { - dmu_object_byteswap_t ot_byteswap; + arc_byteswap_func_t *ot_byteswap; boolean_t ot_metadata; char *ot_name; } dmu_object_type_info_t; -typedef struct dmu_object_byteswap_info { - arc_byteswap_func_t *ob_func; - char *ob_name; -} dmu_object_byteswap_info_t; - extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES]; -extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS]; /* * Get information on a DMU object. @@ -782,9 +700,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); -int dmu_send(objset_t *tosnap, objset_t *fromsnap, - int outfd, struct vnode *vp, offset_t *off); -int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep); +int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + struct vnode *vp, offset_t *off); typedef struct dmu_recv_cookie { /* @@ -801,7 +718,6 @@ typedef struct dmu_recv_cookie { char *drc_top_ds; boolean_t drc_newfs; boolean_t drc_force; - struct avl_tree *drc_guid_to_ds_map; } dmu_recv_cookie_t; int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *, diff --git a/uts/common/fs/zfs/sys/dmu_impl.h b/uts/common/fs/zfs/sys/dmu_impl.h index defcdb29ca60..22f9f5f8c88c 100644 --- a/uts/common/fs/zfs/sys/dmu_impl.h +++ b/uts/common/fs/zfs/sys/dmu_impl.h @@ -21,7 +21,6 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -31,7 +30,6 @@ #include <sys/zio.h> #include <sys/dnode.h> #include <sys/zfs_context.h> -#include <sys/zfs_ioctl.h> #ifdef __cplusplus extern "C" { @@ -266,32 +264,6 @@ static xuio_stats_t xuio_stats = { atomic_add_64(&xuio_stats.stat.value.ui64, (val)) #define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1) -/* - * The list of data whose inclusion in a send stream can be pending from - * one call to backup_cb to another. Multiple calls to dump_free() and - * dump_freeobjects() can be aggregated into a single DRR_FREE or - * DRR_FREEOBJECTS replay record. - */ -typedef enum { - PENDING_NONE, - PENDING_FREE, - PENDING_FREEOBJECTS -} dmu_pendop_t; - -typedef struct dmu_sendarg { - list_node_t dsa_link; - dmu_replay_record_t *dsa_drr; - vnode_t *dsa_vp; - int dsa_outfd; - struct proc *dsa_proc; - offset_t *dsa_off; - objset_t *dsa_os; - zio_cksum_t dsa_zc; - uint64_t dsa_toguid; - int dsa_err; - dmu_pendop_t dsa_pending_op; -} dmu_sendarg_t; - #ifdef __cplusplus } diff --git a/uts/common/fs/zfs/sys/dmu_objset.h b/uts/common/fs/zfs/sys/dmu_objset.h index 9439993ace78..c6d202e2e81a 100644 --- a/uts/common/fs/zfs/sys/dmu_objset.h +++ b/uts/common/fs/zfs/sys/dmu_objset.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -138,14 +137,24 @@ void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); +int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, + void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); +int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, + uint64_t flags); +int dmu_objset_destroy(const char *name, boolean_t defer); +int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, + struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); void dmu_objset_stats(objset_t *os, nvlist_t *nv); void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dmu_objset_fsid_guid(objset_t *os); +int dmu_objset_find(char *name, int func(const char *, void *), void *arg, + int flags); int dmu_objset_find_spa(spa_t *spa, const char *name, int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags); int dmu_objset_prefetch(const char *name, void *arg); +void dmu_objset_byteswap(void *buf, size_t size); int dmu_objset_evict_dbufs(objset_t *os); timestruc_t dmu_objset_snap_cmtime(objset_t *os); diff --git a/uts/common/fs/zfs/sys/dmu_traverse.h b/uts/common/fs/zfs/sys/dmu_traverse.h index 3cbf42f56a60..5b326cd99c09 100644 --- a/uts/common/fs/zfs/sys/dmu_traverse.h +++ b/uts/common/fs/zfs/sys/dmu_traverse.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_TRAVERSE_H @@ -55,9 +54,6 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); -int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, - uint64_t txg_start, zbookmark_t *resume, int flags, - blkptr_cb_t func, void *arg); int traverse_pool(spa_t *spa, uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); diff --git a/uts/common/fs/zfs/sys/dsl_dataset.h b/uts/common/fs/zfs/sys/dsl_dataset.h index 6c43d97fd9d6..22733d070e8b 100644 --- a/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/uts/common/fs/zfs/sys/dsl_dataset.h @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H @@ -86,12 +84,7 @@ typedef struct dsl_dataset_phys { uint64_t ds_creation_time; /* seconds since 1970 */ uint64_t ds_creation_txg; uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */ - /* - * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes - * include all blocks referenced by this dataset, including those - * shared with any other datasets. - */ - uint64_t ds_referenced_bytes; + uint64_t ds_used_bytes; uint64_t ds_compressed_bytes; uint64_t ds_uncompressed_bytes; uint64_t ds_unique_bytes; /* only relevant to snapshots */ @@ -156,9 +149,6 @@ typedef struct dsl_dataset { uint64_t ds_reserved; /* cached refreservation */ uint64_t ds_quota; /* cached refquota */ - kmutex_t ds_sendstream_lock; - list_t ds_sendstreams; - /* Protected by ds_lock; keep at end of struct for better locality */ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; @@ -180,7 +170,7 @@ struct dsl_ds_destroyarg { struct dsl_ds_holdarg { dsl_sync_task_group_t *dstg; - const char *htag; + char *htag; char *snapname; boolean_t recursive; boolean_t gotone; @@ -215,11 +205,12 @@ uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx); int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); +int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); dsl_checkfunc_t dsl_dataset_destroy_check; dsl_syncfunc_t dsl_dataset_destroy_sync; +dsl_checkfunc_t dsl_dataset_snapshot_check; +dsl_syncfunc_t dsl_dataset_snapshot_sync; dsl_syncfunc_t dsl_dataset_user_hold_sync; -int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); -void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, @@ -258,10 +249,6 @@ void dsl_dataset_space(dsl_dataset_t *ds, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds); -int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, - uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); -int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, - uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); diff --git a/uts/common/fs/zfs/sys/dsl_deleg.h b/uts/common/fs/zfs/sys/dsl_deleg.h index 5842639aafba..73c43bd23879 100644 --- a/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/uts/common/fs/zfs/sys/dsl_deleg.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DELEG_H diff --git a/uts/common/fs/zfs/sys/dsl_pool.h b/uts/common/fs/zfs/sys/dsl_pool.h index 9ff414888cb0..7d25bd7c020d 100644 --- a/uts/common/fs/zfs/sys/dsl_pool.h +++ b/uts/common/fs/zfs/sys/dsl_pool.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_POOL_H @@ -35,7 +34,6 @@ #include <sys/ddt.h> #include <sys/arc.h> #include <sys/bpobj.h> -#include <sys/bptree.h> #ifdef __cplusplus extern "C" { @@ -50,8 +48,7 @@ struct dsl_scan; /* These macros are for indexing into the zfs_all_blkstats_t. */ #define DMU_OT_DEFERRED DMU_OT_NONE -#define DMU_OT_OTHER DMU_OT_NUMTYPES /* place holder for DMU_OT() types */ -#define DMU_OT_TOTAL (DMU_OT_NUMTYPES + 1) +#define DMU_OT_TOTAL DMU_OT_NUMTYPES typedef struct zfs_blkstat { uint64_t zb_count; @@ -88,7 +85,6 @@ typedef struct dsl_pool { uint64_t dp_write_limit; uint64_t dp_tmp_userrefs_obj; bpobj_t dp_free_bpobj; - uint64_t dp_bptree_obj; struct dsl_scan *dp_scan; @@ -114,8 +110,7 @@ typedef struct dsl_pool { zfs_all_blkstats_t *dp_blkstats; } dsl_pool_t; -int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); -int dsl_pool_open(dsl_pool_t *dp); +int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); void dsl_pool_close(dsl_pool_t *dp); dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); diff --git a/uts/common/fs/zfs/sys/dsl_prop.h b/uts/common/fs/zfs/sys/dsl_prop.h index b0d9a52cdfd7..a636ad35096b 100644 --- a/uts/common/fs/zfs/sys/dsl_prop.h +++ b/uts/common/fs/zfs/sys/dsl_prop.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_PROP_H @@ -90,6 +89,8 @@ dsl_syncfunc_t dsl_props_set_sync; int dsl_prop_set(const char *ddname, const char *propname, zprop_source_t source, int intsz, int numints, const void *buf); int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); +void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, + dmu_tx_t *tx); void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, zprop_source_t source, uint64_t *value); diff --git a/uts/common/fs/zfs/sys/dsl_scan.h b/uts/common/fs/zfs/sys/dsl_scan.h index 5691f4d14d93..c79666e67de0 100644 --- a/uts/common/fs/zfs/sys/dsl_scan.h +++ b/uts/common/fs/zfs/sys/dsl_scan.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_SCAN_H @@ -80,9 +79,6 @@ typedef struct dsl_scan { uint64_t scn_sync_start_time; zio_t *scn_zio_root; - /* for freeing blocks */ - boolean_t scn_is_bptree; - /* for debugging / information */ uint64_t scn_visited_this_txg; diff --git a/uts/common/fs/zfs/sys/metaslab.h b/uts/common/fs/zfs/sys/metaslab.h index 2cf4d2b489bd..583d6303bd5a 100644 --- a/uts/common/fs/zfs/sys/metaslab.h +++ b/uts/common/fs/zfs/sys/metaslab.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_H @@ -48,8 +47,6 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg); #define METASLAB_HINTBP_FAVOR 0x0 #define METASLAB_HINTBP_AVOID 0x1 #define METASLAB_GANG_HEADER 0x2 -#define METASLAB_GANG_CHILD 0x4 -#define METASLAB_GANG_AVOID 0x8 extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags); diff --git a/uts/common/fs/zfs/sys/metaslab_impl.h b/uts/common/fs/zfs/sys/metaslab_impl.h index 6c670a1624ab..07988dd51a73 100644 --- a/uts/common/fs/zfs/sys/metaslab_impl.h +++ b/uts/common/fs/zfs/sys/metaslab_impl.h @@ -21,7 +21,6 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H @@ -53,7 +52,6 @@ struct metaslab_group { avl_tree_t mg_metaslab_tree; uint64_t mg_aliquot; uint64_t mg_bonus_area; - uint64_t mg_alloc_failures; int64_t mg_bias; int64_t mg_activation_count; metaslab_class_t *mg_class; diff --git a/uts/common/fs/zfs/sys/rrwlock.h b/uts/common/fs/zfs/sys/rrwlock.h index 239268bd58e7..19a43c97fc3c 100644 --- a/uts/common/fs/zfs/sys/rrwlock.h +++ b/uts/common/fs/zfs/sys/rrwlock.h @@ -22,13 +22,12 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ #ifndef _SYS_RR_RW_LOCK_H #define _SYS_RR_RW_LOCK_H +#pragma ident "%Z%%M% %I% %E% SMI" + #ifdef __cplusplus extern "C" { #endif @@ -70,7 +69,6 @@ void rrw_destroy(rrwlock_t *rrl); void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag); void rrw_exit(rrwlock_t *rrl, void *tag); boolean_t rrw_held(rrwlock_t *rrl, krw_t rw); -void rrw_tsd_destroy(void *arg); #define RRW_READ_HELD(x) rrw_held(x, RW_READER) #define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER) diff --git a/uts/common/fs/zfs/sys/spa.h b/uts/common/fs/zfs/sys/spa.h index 1043f4038a30..456ec06dc456 100644 --- a/uts/common/fs/zfs/sys/spa.h +++ b/uts/common/fs/zfs/sys/spa.h @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SPA_H @@ -52,7 +50,6 @@ typedef struct spa_aux_vdev spa_aux_vdev_t; typedef struct ddt ddt_t; typedef struct ddt_entry ddt_entry_t; struct dsl_pool; -struct dsl_dataset; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -95,7 +92,7 @@ struct dsl_dataset; /* * Size of block to hold the configuration data (a packed nvlist) */ -#define SPA_CONFIG_BLOCKSIZE (1ULL << 14) +#define SPA_CONFIG_BLOCKSIZE (1 << 14) /* * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. @@ -263,7 +260,7 @@ typedef struct blkptr { DVA_GET_ASIZE(&(bp)->blk_dva[2])) #define BP_GET_UCSIZE(bp) \ - ((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \ + ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) #define BP_GET_NDVAS(bp) \ @@ -404,8 +401,8 @@ typedef struct blkptr { #include <sys/dmu.h> #define BP_GET_BUFC_TYPE(bp) \ - (((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \ - ARC_BUFC_METADATA : ARC_BUFC_DATA) + (((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \ + ARC_BUFC_METADATA : ARC_BUFC_DATA); typedef enum spa_import_type { SPA_IMPORT_EXISTING, @@ -416,10 +413,10 @@ typedef enum spa_import_type { extern int spa_open(const char *pool, spa_t **, void *tag); extern int spa_open_rewind(const char *pool, spa_t **, void *tag, nvlist_t *policy, nvlist_t **config); -extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, - size_t buflen); +extern int spa_get_stats(const char *pool, nvlist_t **config, + char *altroot, size_t buflen); extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - nvlist_t *zplprops); + const char *history_str, nvlist_t *zplprops); extern int spa_import_rootpool(char *devpath, char *devid); extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); @@ -574,14 +571,12 @@ extern void spa_claim_notify(zio_t *zio); /* Accessor functions */ extern boolean_t spa_shutting_down(spa_t *spa); extern struct dsl_pool *spa_get_dsl(spa_t *spa); -extern boolean_t spa_is_initializing(spa_t *spa); extern blkptr_t *spa_get_rootblkptr(spa_t *spa); extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp); extern void spa_altroot(spa_t *, char *, size_t); extern int spa_sync_pass(spa_t *spa); extern char *spa_name(spa_t *spa); extern uint64_t spa_guid(spa_t *spa); -extern uint64_t spa_load_guid(spa_t *spa); extern uint64_t spa_last_synced_txg(spa_t *spa); extern uint64_t spa_first_txg(spa_t *spa); extern uint64_t spa_syncing_txg(spa_t *spa); @@ -606,8 +601,6 @@ extern uint64_t spa_delegation(spa_t *spa); extern objset_t *spa_meta_objset(spa_t *spa); /* Miscellaneous support routines */ -extern void spa_activate_mos_feature(spa_t *spa, const char *feature); -extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature); extern int spa_rename(const char *oldname, const char *newname); extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid); extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid); @@ -617,7 +610,6 @@ extern uint64_t spa_get_random(uint64_t range); extern uint64_t spa_generate_guid(spa_t *spa); extern void sprintf_blkptr(char *buf, const blkptr_t *bp); extern void spa_freeze(spa_t *spa); -extern int spa_change_guid(spa_t *spa); extern void spa_upgrade(spa_t *spa, uint64_t version); extern void spa_evict_all(void); extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid, @@ -633,20 +625,31 @@ extern boolean_t spa_writeable(spa_t *spa); extern int spa_mode(spa_t *spa); extern uint64_t strtonum(const char *str, char **nptr); +/* history logging */ +typedef enum history_log_type { + LOG_CMD_POOL_CREATE, + LOG_CMD_NORMAL, + LOG_INTERNAL +} history_log_type_t; + +typedef struct history_arg { + char *ha_history_str; + history_log_type_t ha_log_type; + history_internal_events_t ha_event; + char *ha_zone; + uid_t ha_uid; +} history_arg_t; + extern char *spa_his_ievent_table[]; extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx); extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read, char *his_buf); -extern int spa_history_log(spa_t *spa, const char *his_buf); -extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl); -extern void spa_history_log_version(spa_t *spa, const char *operation); -extern void spa_history_log_internal(spa_t *spa, const char *operation, - dmu_tx_t *tx, const char *fmt, ...); -extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op, - dmu_tx_t *tx, const char *fmt, ...); -extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, - dmu_tx_t *tx, const char *fmt, ...); +extern int spa_history_log(spa_t *spa, const char *his_buf, + history_log_type_t what); +extern void spa_history_log_internal(history_internal_events_t event, + spa_t *spa, dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt); /* error handling */ struct zbookmark; @@ -694,13 +697,6 @@ _NOTE(CONSTCOND) } while (0) #define dprintf_bp(bp, fmt, ...) #endif -extern boolean_t spa_debug_enabled(spa_t *spa); -#define spa_dbgmsg(spa, ...) \ -{ \ - if (spa_debug_enabled(spa)) \ - zfs_dbgmsg(__VA_ARGS__); \ -} - extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */ #ifdef __cplusplus diff --git a/uts/common/fs/zfs/sys/spa_impl.h b/uts/common/fs/zfs/sys/spa_impl.h index 5118954b0016..c965ffbbef87 100644 --- a/uts/common/fs/zfs/sys/spa_impl.h +++ b/uts/common/fs/zfs/sys/spa_impl.h @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SPA_IMPL_H @@ -112,7 +110,6 @@ struct spa { * Fields protected by spa_namespace_lock. */ char spa_name[MAXNAMELEN]; /* pool name */ - char *spa_comment; /* comment */ avl_node_t spa_avl; /* node in spa_namespace_avl */ nvlist_t *spa_config; /* last synced config */ nvlist_t *spa_config_syncing; /* currently syncing config */ @@ -127,7 +124,6 @@ struct spa { uint64_t spa_import_flags; /* import specific flags */ taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; dsl_pool_t *spa_dsl_pool; - boolean_t spa_is_initializing; /* true while opening pool */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ uint64_t spa_first_txg; /* first txg after spa_open() */ @@ -139,13 +135,11 @@ struct spa { objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */ vdev_t *spa_root_vdev; /* top-level vdev container */ - uint64_t spa_config_guid; /* config pool guid */ - uint64_t spa_load_guid; /* spa_load initialized guid */ + uint64_t spa_load_guid; /* initial guid for spa_load */ list_t spa_config_dirty_list; /* vdevs with dirty config */ list_t spa_state_dirty_list; /* vdevs with dirty state */ spa_aux_vdev_t spa_spares; /* hot spares */ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ - nvlist_t *spa_label_features; /* Features for reading MOS */ uint64_t spa_config_object; /* MOS object for pool config */ uint64_t spa_config_generation; /* config generation number */ uint64_t spa_syncing_txg; /* txg currently syncing */ @@ -202,7 +196,6 @@ struct spa { kcondvar_t spa_suspend_cv; /* notification of resume */ uint8_t spa_suspended; /* pool is suspended */ uint8_t spa_claiming; /* pool is doing zil_claim() */ - boolean_t spa_debug; /* debug enabled? */ boolean_t spa_is_root; /* pool is root */ int spa_minref; /* num refs when first opened */ int spa_mode; /* FREAD | FWRITE */ @@ -222,10 +215,7 @@ struct spa { boolean_t spa_autoreplace; /* autoreplace set in open */ int spa_vdev_locks; /* locks grabbed */ uint64_t spa_creation_version; /* version at pool creation */ - uint64_t spa_prev_software_version; /* See ub_software_version */ - uint64_t spa_feat_for_write_obj; /* required to write to pool */ - uint64_t spa_feat_for_read_obj; /* required to read from pool */ - uint64_t spa_feat_desc_obj; /* Feature descriptions */ + uint64_t spa_prev_software_version; /* * spa_refcnt & spa_config_lock must be the last elements * because refcount_t changes size based on compilation options. diff --git a/uts/common/fs/zfs/sys/vdev.h b/uts/common/fs/zfs/sys/vdev.h index 2329d5b85c68..941f234dc68f 100644 --- a/uts/common/fs/zfs/sys/vdev.h +++ b/uts/common/fs/zfs/sys/vdev.h @@ -18,10 +18,8 @@ * * CDDL HEADER END */ - /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_H @@ -50,7 +48,7 @@ extern boolean_t zfs_nocacheflush; extern int vdev_open(vdev_t *); extern void vdev_open_children(vdev_t *); extern boolean_t vdev_uses_zvols(vdev_t *); -extern int vdev_validate(vdev_t *, boolean_t); +extern int vdev_validate(vdev_t *); extern void vdev_close(vdev_t *); extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace); extern void vdev_reopen(vdev_t *); @@ -142,8 +140,8 @@ extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, struct uberblock; extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset); extern int vdev_label_number(uint64_t psise, uint64_t offset); -extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label); -extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **); +extern nvlist_t *vdev_label_read_config(vdev_t *vd); +extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub); typedef enum { VDEV_LABEL_CREATE, /* create/add a new device */ diff --git a/uts/common/fs/zfs/sys/vdev_impl.h b/uts/common/fs/zfs/sys/vdev_impl.h index 6d2e962fdd37..161bd21f05a6 100644 --- a/uts/common/fs/zfs/sys/vdev_impl.h +++ b/uts/common/fs/zfs/sys/vdev_impl.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_IMPL_H @@ -56,8 +55,7 @@ typedef struct vdev_cache_entry vdev_cache_entry_t; /* * Virtual device operations */ -typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, - uint64_t *ashift); +typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift); typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); typedef int vdev_io_start_func_t(zio_t *zio); @@ -120,7 +118,6 @@ struct vdev { uint64_t vdev_orig_guid; /* orig. guid prior to remove */ uint64_t vdev_asize; /* allocatable device capacity */ uint64_t vdev_min_asize; /* min acceptable asize */ - uint64_t vdev_max_asize; /* max acceptable asize */ uint64_t vdev_ashift; /* block alignment shift */ uint64_t vdev_state; /* see VDEV_STATE_* #defines */ uint64_t vdev_prevstate; /* used when reopening a vdev */ @@ -202,7 +199,7 @@ struct vdev { * For DTrace to work in userland (libzpool) context, these fields must * remain at the end of the structure. DTrace will use the kernel's * CTF definition for 'struct vdev', and since the size of a kmutex_t is - * larger in userland, the offsets for the rest of the fields would be + * larger in userland, the offsets for the rest fields would be * incorrect. */ kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */ @@ -257,7 +254,6 @@ typedef struct vdev_label { #define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE) #define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t)) #define VDEV_LABELS 4 -#define VDEV_BEST_LABEL VDEV_LABELS #define VDEV_ALLOC_LOAD 0 #define VDEV_ALLOC_ADD 1 @@ -265,7 +261,6 @@ typedef struct vdev_label { #define VDEV_ALLOC_L2CACHE 3 #define VDEV_ALLOC_ROOTPOOL 4 #define VDEV_ALLOC_SPLIT 5 -#define VDEV_ALLOC_ATTACH 6 /* * Allocate or free a vdev diff --git a/uts/common/fs/zfs/sys/zap.h b/uts/common/fs/zfs/sys/zap.h index 4d7b315597c5..a1130bbbaaae 100644 --- a/uts/common/fs/zfs/sys/zap.h +++ b/uts/common/fs/zfs/sys/zap.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_ZAP_H @@ -133,8 +132,6 @@ uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot, uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); -uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot, - uint64_t parent_obj, const char *name, dmu_tx_t *tx); /* * Create a new zapobj with no attributes from the given (unallocated) @@ -303,6 +300,12 @@ int zap_add_int_key(objset_t *os, uint64_t obj, int zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep); +/* + * They name is a stringified version of key; increment its value by + * delta. Zero values will be zap_remove()-ed. + */ +int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, + dmu_tx_t *tx); int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta, dmu_tx_t *tx); diff --git a/uts/common/fs/zfs/sys/zfeature.h b/uts/common/fs/zfs/sys/zfeature.h deleted file mode 100644 index 9ff1c93df7f4..000000000000 --- a/uts/common/fs/zfs/sys/zfeature.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#ifndef _SYS_ZFEATURE_H -#define _SYS_ZFEATURE_H - -#include <sys/dmu.h> -#include <sys/nvpair.h> -#include "zfeature_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern boolean_t feature_is_supported(objset_t *os, uint64_t obj, - uint64_t desc_obj, nvlist_t *unsup_feat); - -struct spa; -extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *); -extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *); -extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_ZFEATURE_H */ diff --git a/uts/common/fs/zfs/sys/zfs_acl.h b/uts/common/fs/zfs/sys/zfs_acl.h index d1a64180d5d0..c1a0aeebdce4 100644 --- a/uts/common/fs/zfs/sys/zfs_acl.h +++ b/uts/common/fs/zfs/sys/zfs_acl.h @@ -218,7 +218,7 @@ int zfs_fastaccesschk_execute(struct znode *, cred_t *); extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *); extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *); extern int zfs_acl_access(struct znode *, int, cred_t *); -int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t); +void zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t); int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *); int zfs_zaccess_rename(struct znode *, struct znode *, struct znode *, struct znode *, cred_t *cr); diff --git a/uts/common/fs/zfs/sys/zfs_context.h b/uts/common/fs/zfs/sys/zfs_context.h index fdd0412feefd..558e9e1884e3 100644 --- a/uts/common/fs/zfs/sys/zfs_context.h +++ b/uts/common/fs/zfs/sys/zfs_context.h @@ -22,9 +22,6 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - */ #ifndef _SYS_ZFS_CONTEXT_H #define _SYS_ZFS_CONTEXT_H @@ -42,7 +39,6 @@ extern "C" { #include <sys/cmn_err.h> #include <sys/kmem.h> #include <sys/taskq.h> -#include <sys/taskq_impl.h> #include <sys/buf.h> #include <sys/param.h> #include <sys/systm.h> diff --git a/uts/common/fs/zfs/sys/zfs_ioctl.h b/uts/common/fs/zfs/sys/zfs_ioctl.h index 4d781ad2a46c..84bf794fe5f0 100644 --- a/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H @@ -42,15 +41,6 @@ extern "C" { #endif /* - * The structures in this file are passed between userland and the - * kernel. Userland may be running a 32-bit process, while the kernel - * is 64-bit. Therefore, these structures need to compile the same in - * 32-bit and 64-bit. This means not using type "long", and adding - * explicit padding so that the 32-bit structure will not be packed more - * tightly than the 64-bit structure (which requires 64-bit alignment). - */ - -/* * Property values for snapdir */ #define ZFS_SNAPDIR_HIDDEN 0 @@ -266,29 +256,22 @@ typedef enum zfs_case { } zfs_case_t; typedef struct zfs_cmd { - char zc_name[MAXPATHLEN]; /* name of pool or dataset */ - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ - int zc_pad2; - - /* - * The following members are for legacy ioctls which haven't been - * converted to the new method. - */ - uint64_t zc_history; /* really (char *) */ + char zc_name[MAXPATHLEN]; char zc_value[MAXPATHLEN * 2]; char zc_string[MAXNAMELEN]; char zc_top_ds[MAXPATHLEN]; uint64_t zc_guid; uint64_t zc_nvlist_conf; /* really (char *) */ uint64_t zc_nvlist_conf_size; + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; uint64_t zc_cookie; uint64_t zc_objset_type; uint64_t zc_perm_action; - uint64_t zc_history_len; + uint64_t zc_history; /* really (char *) */ + uint64_t zc_history_len; uint64_t zc_history_offset; uint64_t zc_obj; uint64_t zc_iflags; /* internal to zfs(7fs) */ diff --git a/uts/common/fs/zfs/sys/zfs_vfsops.h b/uts/common/fs/zfs/sys/zfs_vfsops.h index 9af5cef05863..38c87df4300f 100644 --- a/uts/common/fs/zfs/sys/zfs_vfsops.h +++ b/uts/common/fs/zfs/sys/zfs_vfsops.h @@ -57,7 +57,6 @@ struct zfsvfs { boolean_t z_fuid_dirty; /* need to sync fuid table ? */ struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */ zilog_t *z_log; /* intent log pointer */ - uint_t z_acl_mode; /* acl chmod/mode behavior */ uint_t z_acl_inherit; /* acl inheritance behavior */ zfs_case_t z_case; /* case-sense */ boolean_t z_utf8; /* utf8-only */ diff --git a/uts/common/fs/zfs/sys/zio.h b/uts/common/fs/zfs/sys/zio.h index 9e475b4fcecd..97d8ec74d2e9 100644 --- a/uts/common/fs/zfs/sys/zio.h +++ b/uts/common/fs/zfs/sys/zio.h @@ -22,10 +22,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -/* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - */ #ifndef _ZIO_H #define _ZIO_H @@ -273,14 +269,6 @@ typedef struct zbookmark { #define ZB_ZIL_OBJECT (0ULL) #define ZB_ZIL_LEVEL (-2LL) -#define ZB_IS_ZERO(zb) \ - ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \ - (zb)->zb_level == 0 && (zb)->zb_blkid == 0) -#define ZB_IS_ROOT(zb) \ - ((zb)->zb_object == ZB_ROOT_OBJECT && \ - (zb)->zb_level == ZB_ROOT_LEVEL && \ - (zb)->zb_blkid == ZB_ROOT_BLKID) - typedef struct zio_prop { enum zio_checksum zp_checksum; enum zio_compress zp_compress; @@ -298,7 +286,6 @@ typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, typedef void zio_cksum_free_f(void *cbdata, size_t size); struct zio_bad_cksum; /* defined in zio_checksum.h */ -struct dnode_phys; struct zio_cksum_report { struct zio_cksum_report *zcr_next; @@ -430,9 +417,6 @@ struct zio { /* FMA state */ zio_cksum_report_t *io_cksum_report; uint64_t io_ena; - - /* Taskq dispatching state */ - taskq_ent_t io_tqent; }; extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, @@ -568,10 +552,6 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, /* Called from spa_sync(), but primarily an injection handler */ extern void spa_handle_ignored_writes(spa_t *spa); -/* zbookmark functions */ -boolean_t zbookmark_is_before(const struct dnode_phys *dnp, - const zbookmark_t *zb1, const zbookmark_t *zb2); - #ifdef __cplusplus } #endif diff --git a/uts/common/fs/zfs/txg.c b/uts/common/fs/zfs/txg.c index 55b1f3884bf3..9b308ca4e71a 100644 --- a/uts/common/fs/zfs/txg.c +++ b/uts/common/fs/zfs/txg.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright 2011 Martin Matuska */ #include <sys/zfs_context.h> @@ -480,7 +479,7 @@ void txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) { tx_state_t *tx = &dp->dp_tx; - clock_t timeout = ddi_get_lbolt() + ticks; + int timeout = ddi_get_lbolt() + ticks; /* don't delay if this txg could transition to quiesing immediately */ if (tx->tx_open_txg > txg || diff --git a/uts/common/fs/zfs/vdev.c b/uts/common/fs/zfs/vdev.c index 6fbaf7b7ecca..bac3e86054d6 100644 --- a/uts/common/fs/zfs/vdev.c +++ b/uts/common/fs/zfs/vdev.c @@ -21,8 +21,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -108,7 +106,7 @@ vdev_get_min_asize(vdev_t *vd) vdev_t *pvd = vd->vdev_parent; /* - * If our parent is NULL (inactive spare or cache) or is the root, + * The our parent is NULL (inactive spare or cache) or is the root, * just return our own asize. */ if (pvd == NULL) @@ -288,7 +286,6 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) if (spa->spa_root_vdev == NULL) { ASSERT(ops == &vdev_root_ops); spa->spa_root_vdev = vd; - spa->spa_load_guid = spa_generate_guid(NULL); } if (guid == 0 && ops != &vdev_hole_ops) { @@ -488,7 +485,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_removing); } - if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) { + if (parent && !parent->vdev_parent) { ASSERT(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_ADD || alloctype == VDEV_ALLOC_SPLIT || @@ -664,8 +661,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) svd->vdev_ms_shift = 0; svd->vdev_ms_count = 0; - if (tvd->vdev_mg) - ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg); tvd->vdev_mg = svd->vdev_mg; tvd->vdev_ms = svd->vdev_ms; @@ -737,7 +732,6 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) mvd->vdev_asize = cvd->vdev_asize; mvd->vdev_min_asize = cvd->vdev_min_asize; - mvd->vdev_max_asize = cvd->vdev_max_asize; mvd->vdev_ashift = cvd->vdev_ashift; mvd->vdev_state = cvd->vdev_state; mvd->vdev_crtxg = cvd->vdev_crtxg; @@ -1109,8 +1103,7 @@ vdev_open(vdev_t *vd) spa_t *spa = vd->vdev_spa; int error; uint64_t osize = 0; - uint64_t max_osize = 0; - uint64_t asize, max_asize, psize; + uint64_t asize, psize; uint64_t ashift = 0; ASSERT(vd->vdev_open_thread == curthread || @@ -1141,7 +1134,7 @@ vdev_open(vdev_t *vd) return (ENXIO); } - error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift); + error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); /* * Reset the vdev_reopening flag so that we actually close @@ -1199,7 +1192,6 @@ vdev_open(vdev_t *vd) } osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); - max_osize = P2ALIGN(max_osize, (uint64_t)sizeof (vdev_label_t)); if (vd->vdev_children == 0) { if (osize < SPA_MINDEVSIZE) { @@ -1209,8 +1201,6 @@ vdev_open(vdev_t *vd) } psize = osize; asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); - max_asize = max_osize - (VDEV_LABEL_START_SIZE + - VDEV_LABEL_END_SIZE); } else { if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { @@ -1220,7 +1210,6 @@ vdev_open(vdev_t *vd) } psize = 0; asize = osize; - max_asize = max_osize; } vd->vdev_psize = psize; @@ -1240,22 +1229,16 @@ vdev_open(vdev_t *vd) * For testing purposes, a higher ashift can be requested. */ vd->vdev_asize = asize; - vd->vdev_max_asize = max_asize; vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); } else { /* - * Detect if the alignment requirement has increased. - * We don't want to make the pool unavailable, just - * issue a warning instead. + * Make sure the alignment requirement hasn't increased. */ - if (ashift > vd->vdev_top->vdev_ashift && - vd->vdev_ops->vdev_op_leaf) { - cmn_err(CE_WARN, - "Disk, '%s', has a block alignment that is " - "larger than the pool's alignment\n", - vd->vdev_path); + if (ashift > vd->vdev_top->vdev_ashift) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (EINVAL); } - vd->vdev_max_asize = max_asize; } /* @@ -1297,18 +1280,13 @@ vdev_open(vdev_t *vd) * contents. This needs to be done before vdev_load() so that we don't * inadvertently do repair I/Os to the wrong device. * - * If 'strict' is false ignore the spa guid check. This is necessary because - * if the machine crashed during a re-guid the new guid might have been written - * to all of the vdev labels, but not the cached config. The strict check - * will be performed when the pool is opened again using the mos config. - * * This function will only return failure if one of the vdevs indicates that it * has since been destroyed or exported. This is only possible if * /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state * will be updated but the function will return 0. */ int -vdev_validate(vdev_t *vd, boolean_t strict) +vdev_validate(vdev_t *vd) { spa_t *spa = vd->vdev_spa; nvlist_t *label; @@ -1316,7 +1294,7 @@ vdev_validate(vdev_t *vd, boolean_t strict) uint64_t state; for (int c = 0; c < vd->vdev_children; c++) - if (vdev_validate(vd->vdev_child[c], strict) != 0) + if (vdev_validate(vd->vdev_child[c]) != 0) return (EBADF); /* @@ -1328,8 +1306,7 @@ vdev_validate(vdev_t *vd, boolean_t strict) uint64_t aux_guid = 0; nvlist_t *nvl; - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == - NULL) { + if ((label = vdev_label_read_config(vd)) == NULL) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_BAD_LABEL); return (0); @@ -1347,9 +1324,8 @@ vdev_validate(vdev_t *vd, boolean_t strict) return (0); } - if (strict && (nvlist_lookup_uint64(label, - ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || - guid != spa_guid(spa))) { + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, + &guid) != 0 || guid != spa_guid(spa)) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); nvlist_free(label); @@ -1511,7 +1487,7 @@ vdev_reopen(vdev_t *vd) !l2arc_vdev_present(vd)) l2arc_add_vdev(spa, vd); } else { - (void) vdev_validate(vd, B_TRUE); + (void) vdev_validate(vd); } /* @@ -1970,14 +1946,14 @@ vdev_validate_aux(vdev_t *vd) if (!vdev_readable(vd)) return (0); - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) { + if ((label = vdev_label_read_config(vd)) == NULL) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); return (-1); } if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 || - !SPA_VERSION_IS_SUPPORTED(version) || + version > SPA_VERSION || nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 || guid != vd->vdev_guid || nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) { @@ -2480,7 +2456,6 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) vs->vs_rsize = vdev_get_min_asize(vd); if (vd->vdev_ops->vdev_op_leaf) vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; - vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; mutex_exit(&vd->vdev_stat_lock); /* diff --git a/uts/common/fs/zfs/vdev_cache.c b/uts/common/fs/zfs/vdev_cache.c index 77f8116effe6..688d541344cb 100644 --- a/uts/common/fs/zfs/vdev_cache.c +++ b/uts/common/fs/zfs/vdev_cache.c @@ -71,16 +71,9 @@ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software * track buffer). At most zfs_vdev_cache_size bytes will be kept in each * vdev's vdev_cache. - * - * TODO: Note that with the current ZFS code, it turns out that the - * vdev cache is not helpful, and in some cases actually harmful. It - * is better if we disable this. Once some time has passed, we should - * actually remove this to simplify the code. For now we just disable - * it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11 - * has made these same changes. */ int zfs_vdev_cache_max = 1<<14; /* 16KB */ -int zfs_vdev_cache_size = 0; +int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */ int zfs_vdev_cache_bshift = 16; #define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */ diff --git a/uts/common/fs/zfs/vdev_disk.c b/uts/common/fs/zfs/vdev_disk.c index 759f0f84f1c8..d7417736b4ee 100644 --- a/uts/common/fs/zfs/vdev_disk.c +++ b/uts/common/fs/zfs/vdev_disk.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -31,7 +30,6 @@ #include <sys/fs/zfs.h> #include <sys/zio.h> #include <sys/sunldi.h> -#include <sys/efi_partition.h> #include <sys/fm/fs/zfs.h> /* @@ -104,39 +102,8 @@ vdev_disk_rele(vdev_t *vd) } } -static uint64_t -vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz) -{ - ASSERT(vd->vdev_wholedisk); - - vdev_disk_t *dvd = vd->vdev_tsd; - dk_efi_t dk_ioc; - efi_gpt_t *efi; - uint64_t avail_space = 0; - int efisize = EFI_LABEL_SIZE * 2; - - dk_ioc.dki_data = kmem_alloc(efisize, KM_SLEEP); - dk_ioc.dki_lba = 1; - dk_ioc.dki_length = efisize; - dk_ioc.dki_data_64 = (uint64_t)(uintptr_t)dk_ioc.dki_data; - efi = dk_ioc.dki_data; - - if (ldi_ioctl(dvd->vd_lh, DKIOCGETEFI, (intptr_t)&dk_ioc, - FKIOCTL, kcred, NULL) == 0) { - uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA); - - zfs_dbgmsg("vdev %s, capacity %llu, altern lba %llu", - vd->vdev_path, capacity, efi_altern_lba); - if (capacity > efi_altern_lba) - avail_space = (capacity - efi_altern_lba) * blksz; - } - kmem_free(dk_ioc.dki_data, efisize); - return (avail_space); -} - static int -vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) +vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd; @@ -307,6 +274,16 @@ skip_open: } /* + * If we own the whole disk, try to enable disk write caching. + * We ignore errors because it's OK if we can't do it. + */ + if (vd->vdev_wholedisk == 1) { + int wce = 1; + (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, + FKIOCTL, kcred, NULL); + } + + /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ @@ -316,25 +293,6 @@ skip_open: *ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1; - if (vd->vdev_wholedisk == 1) { - uint64_t capacity = dkmext.dki_capacity - 1; - uint64_t blksz = dkmext.dki_lbsize; - int wce = 1; - - /* - * If we own the whole disk, try to enable disk write caching. - * We ignore errors because it's OK if we can't do it. - */ - (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, - FKIOCTL, kcred, NULL); - - *max_psize = *psize + vdev_disk_get_space(vd, capacity, blksz); - zfs_dbgmsg("capacity change: vdev %s, psize %llu, " - "max_psize %llu", vd->vdev_path, *psize, *max_psize); - } else { - *max_psize = *psize; - } - /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. diff --git a/uts/common/fs/zfs/vdev_file.c b/uts/common/fs/zfs/vdev_file.c index 043fa51294c2..8c22aa5316a1 100644 --- a/uts/common/fs/zfs/vdev_file.c +++ b/uts/common/fs/zfs/vdev_file.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -48,8 +47,7 @@ vdev_file_rele(vdev_t *vd) } static int -vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) +vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { vdev_file_t *vf; vnode_t *vp; @@ -114,7 +112,7 @@ skip_open: return (error); } - *max_psize = *psize = vattr.va_size; + *psize = vattr.va_size; *ashift = SPA_MINBLOCKSHIFT; return (0); diff --git a/uts/common/fs/zfs/vdev_label.c b/uts/common/fs/zfs/vdev_label.c index b9436472495d..c08ed8ba0467 100644 --- a/uts/common/fs/zfs/vdev_label.c +++ b/uts/common/fs/zfs/vdev_label.c @@ -18,10 +18,8 @@ * * CDDL HEADER END */ - /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -123,8 +121,6 @@ * txg Transaction group in which this label was written * pool_guid Unique identifier for this pool * vdev_tree An nvlist describing vdev tree. - * features_for_read - * An nvlist of the features necessary for reading the MOS. * * Each leaf device label also contains the following: * @@ -432,13 +428,8 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config) kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); } -/* - * Returns the configuration from the label of the given vdev. If 'label' is - * VDEV_BEST_LABEL, each label of the vdev will be read until a valid - * configuration is found; otherwise, only the specified label will be read. - */ nvlist_t * -vdev_label_read_config(vdev_t *vd, int label) +vdev_label_read_config(vdev_t *vd) { spa_t *spa = vd->vdev_spa; nvlist_t *config = NULL; @@ -456,8 +447,6 @@ vdev_label_read_config(vdev_t *vd, int label) retry: for (int l = 0; l < VDEV_LABELS; l++) { - if (label >= 0 && label < VDEV_LABELS && label != l) - continue; zio = zio_root(spa, NULL, NULL, flags); @@ -507,7 +496,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, /* * Read the label, if any, and perform some basic sanity checks. */ - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) + if ((label = vdev_label_read_config(vd)) == NULL) return (B_FALSE); (void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, @@ -844,7 +833,7 @@ retry: * come back up, we fail to see the uberblock for txg + 1 because, say, * it was on a mirrored device and the replica to which we wrote txg + 1 * is now offline. If we then make some changes and sync txg + 1, and then - * the missing replica comes back, then for a few seconds we'll have two + * the missing replica comes back, then for a new seconds we'll have two * conflicting uberblocks on disk with the same txg. The solution is simple: * among uberblocks with equal txg, choose the one with the latest timestamp. */ @@ -864,50 +853,46 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) return (0); } -struct ubl_cbdata { - uberblock_t *ubl_ubbest; /* Best uberblock */ - vdev_t *ubl_vd; /* vdev associated with the above */ - int ubl_label; /* Label associated with the above */ -}; - static void vdev_uberblock_load_done(zio_t *zio) { - vdev_t *vd = zio->io_vd; spa_t *spa = zio->io_spa; zio_t *rio = zio->io_private; uberblock_t *ub = zio->io_data; - struct ubl_cbdata *cbp = rio->io_private; + uberblock_t *ubbest = rio->io_private; - ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd)); + ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd)); if (zio->io_error == 0 && uberblock_verify(ub) == 0) { mutex_enter(&rio->io_lock); if (ub->ub_txg <= spa->spa_load_max_txg && - vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) { - /* - * Keep track of the vdev and label in which this - * uberblock was found. We will use this information - * later to obtain the config nvlist associated with - * this uberblock. - */ - *cbp->ubl_ubbest = *ub; - cbp->ubl_vd = vd; - cbp->ubl_label = vdev_label_number(vd->vdev_psize, - zio->io_offset); - } + vdev_uberblock_compare(ub, ubbest) > 0) + *ubbest = *ub; mutex_exit(&rio->io_lock); } zio_buf_free(zio->io_data, zio->io_size); } -static void -vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags, - struct ubl_cbdata *cbp) +void +vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest) { + spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; + + if (vd == rvd) { + ASSERT(zio == NULL); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + zio = zio_root(spa, NULL, ubbest, flags); + bzero(ubbest, sizeof (uberblock_t)); + } + + ASSERT(zio != NULL); + for (int c = 0; c < vd->vdev_children; c++) - vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp); + vdev_uberblock_load(zio, vd->vdev_child[c], ubbest); if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { for (int l = 0; l < VDEV_LABELS; l++) { @@ -920,45 +905,11 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags, } } } -} -/* - * Reads the 'best' uberblock from disk along with its associated - * configuration. First, we read the uberblock array of each label of each - * vdev, keeping track of the uberblock with the highest txg in each array. - * Then, we read the configuration from the same label as the best uberblock. - */ -void -vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config) -{ - int i; - zio_t *zio; - spa_t *spa = rvd->vdev_spa; - struct ubl_cbdata cb; - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; - - ASSERT(ub); - ASSERT(config); - - bzero(ub, sizeof (uberblock_t)); - *config = NULL; - - cb.ubl_ubbest = ub; - cb.ubl_vd = NULL; - - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); - zio = zio_root(spa, NULL, &cb, flags); - vdev_uberblock_load_impl(zio, rvd, flags, &cb); - (void) zio_wait(zio); - if (cb.ubl_vd != NULL) { - for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) { - *config = vdev_label_read_config(cb.ubl_vd, i); - if (*config != NULL) - break; - } + if (vd == rvd) { + (void) zio_wait(zio); + spa_config_exit(spa, SCL_ALL, FTAG); } - spa_config_exit(spa, SCL_ALL, FTAG); } /* diff --git a/uts/common/fs/zfs/vdev_mirror.c b/uts/common/fs/zfs/vdev_mirror.c index a28ca3e3965b..698c0275d34e 100644 --- a/uts/common/fs/zfs/vdev_mirror.c +++ b/uts/common/fs/zfs/vdev_mirror.c @@ -23,10 +23,6 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - #include <sys/zfs_context.h> #include <sys/spa.h> #include <sys/vdev_impl.h> @@ -131,8 +127,7 @@ vdev_mirror_map_alloc(zio_t *zio) } static int -vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) +vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) { int numerrors = 0; int lasterror = 0; @@ -154,7 +149,6 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, } *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; - *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; *ashift = MAX(*ashift, cvd->vdev_ashift); } diff --git a/uts/common/fs/zfs/vdev_missing.c b/uts/common/fs/zfs/vdev_missing.c index 3bd8c90e04c7..6a5588d59213 100644 --- a/uts/common/fs/zfs/vdev_missing.c +++ b/uts/common/fs/zfs/vdev_missing.c @@ -24,10 +24,6 @@ */ /* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -/* * The 'missing' vdev is a special vdev type used only during import. It * signifies a placeholder in the root vdev for some vdev that we know is * missing. We pass it down to the kernel to allow the rest of the @@ -44,8 +40,7 @@ /* ARGSUSED */ static int -vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) +vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { /* * Really this should just fail. But then the root vdev will be in the @@ -54,7 +49,6 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * will fail the GUID sum check before ever trying to open the pool. */ *psize = 0; - *max_psize = 0; *ashift = 0; return (0); } diff --git a/uts/common/fs/zfs/vdev_raidz.c b/uts/common/fs/zfs/vdev_raidz.c index 030ea4293002..4b0f5602c1d4 100644 --- a/uts/common/fs/zfs/vdev_raidz.c +++ b/uts/common/fs/zfs/vdev_raidz.c @@ -21,7 +21,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -1442,8 +1441,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt) } static int -vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) +vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) { vdev_t *cvd; uint64_t nparity = vd->vdev_nparity; @@ -1471,12 +1469,10 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, } *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; - *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; *ashift = MAX(*ashift, cvd->vdev_ashift); } *asize *= vd->vdev_children; - *max_asize *= vd->vdev_children; if (numerrors > nparity) { vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; diff --git a/uts/common/fs/zfs/vdev_root.c b/uts/common/fs/zfs/vdev_root.c index 1abc79d330bb..879f78f3a5b3 100644 --- a/uts/common/fs/zfs/vdev_root.c +++ b/uts/common/fs/zfs/vdev_root.c @@ -23,10 +23,6 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - #include <sys/zfs_context.h> #include <sys/spa.h> #include <sys/vdev_impl.h> @@ -54,8 +50,7 @@ too_many_errors(vdev_t *vd, int numerrors) } static int -vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) +vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) { int lasterror = 0; int numerrors = 0; @@ -82,7 +77,6 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, } *asize = 0; - *max_asize = 0; *ashift = 0; return (0); diff --git a/uts/common/fs/zfs/zap.c b/uts/common/fs/zfs/zap.c index fa1d99fec957..288a4d99ab25 100644 --- a/uts/common/fs/zfs/zap.c +++ b/uts/common/fs/zfs/zap.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -947,19 +946,6 @@ fzap_prefetch(zap_name_t *zn) * Helper functions for consumers. */ -uint64_t -zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj, - const char *name, dmu_tx_t *tx) -{ - uint64_t new_obj; - - VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0); - VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj, - tx) == 0); - - return (new_obj); -} - int zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask, char *name) diff --git a/uts/common/fs/zfs/zap_micro.c b/uts/common/fs/zfs/zap_micro.c index 3e80fb9c5d80..2d89c20c47d7 100644 --- a/uts/common/fs/zfs/zap_micro.c +++ b/uts/common/fs/zfs/zap_micro.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zio.h> @@ -461,7 +460,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, { dmu_object_info_t doi; dmu_object_info_from_db(db, &doi); - ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); + ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); } #endif @@ -585,7 +584,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, { dmu_object_info_t doi; dmu_object_info_from_db(db, &doi); - ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); + ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap); } #endif @@ -1404,7 +1403,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, } /* - * We lock the zap with adding == FALSE. Because, if we pass + * We lock the zap with adding == FALSE. Because, if we pass * the actual value of add, it could trigger a mzap_upgrade(). * At present we are just evaluating the possibility of this operation * and hence we donot want to trigger an upgrade. diff --git a/uts/common/fs/zfs/zfeature.c b/uts/common/fs/zfs/zfeature.c deleted file mode 100644 index ba722088a40f..000000000000 --- a/uts/common/fs/zfs/zfeature.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#include <sys/zfs_context.h> -#include <sys/zfeature.h> -#include <sys/dmu.h> -#include <sys/nvpair.h> -#include <sys/zap.h> -#include <sys/dmu_tx.h> -#include "zfeature_common.h" -#include <sys/spa_impl.h> - -/* - * ZFS Feature Flags - * ----------------- - * - * ZFS feature flags are used to provide fine-grained versioning to the ZFS - * on-disk format. Once enabled on a pool feature flags replace the old - * spa_version() number. - * - * Each new on-disk format change will be given a uniquely identifying string - * guid rather than a version number. This avoids the problem of different - * organizations creating new on-disk formats with the same version number. To - * keep feature guids unique they should consist of the reverse dns name of the - * organization which implemented the feature and a short name for the feature, - * separated by a colon (e.g. com.delphix:async_destroy). - * - * Reference Counts - * ---------------- - * - * Within each pool features can be in one of three states: disabled, enabled, - * or active. These states are differentiated by a reference count stored on - * disk for each feature: - * - * 1) If there is no reference count stored on disk the feature is disabled. - * 2) If the reference count is 0 a system administrator has enabled the - * feature, but the feature has not been used yet, so no on-disk - * format changes have been made. - * 3) If the reference count is greater than 0 the feature is active. - * The format changes required by the feature are currently on disk. - * Note that if the feature's format changes are reversed the feature - * may choose to set its reference count back to 0. - * - * Feature flags makes no differentiation between non-zero reference counts - * for an active feature (e.g. a reference count of 1 means the same thing as a - * reference count of 27834721), but feature implementations may choose to use - * the reference count to store meaningful information. For example, a new RAID - * implementation might set the reference count to the number of vdevs using - * it. If all those disks are removed from the pool the feature goes back to - * having a reference count of 0. - * - * It is the responsibility of the individual features to maintain a non-zero - * reference count as long as the feature's format changes are present on disk. - * - * Dependencies - * ------------ - * - * Each feature may depend on other features. The only effect of this - * relationship is that when a feature is enabled all of its dependencies are - * automatically enabled as well. Any future work to support disabling of - * features would need to ensure that features cannot be disabled if other - * enabled features depend on them. - * - * On-disk Format - * -------------- - * - * When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES - * (5000). In order for this to work the pool is automatically upgraded to - * SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk - * format changes will be in use. - * - * Information about features is stored in 3 ZAP objects in the pool's MOS. - * These objects are linked to by the following names in the pool directory - * object: - * - * 1) features_for_read: feature guid -> reference count - * Features needed to open the pool for reading. - * 2) features_for_write: feature guid -> reference count - * Features needed to open the pool for writing. - * 3) feature_descriptions: feature guid -> descriptive string - * A human readable string. - * - * All enabled features appear in either features_for_read or - * features_for_write, but not both. - * - * To open a pool in read-only mode only the features listed in - * features_for_read need to be supported. - * - * To open the pool in read-write mode features in both features_for_read and - * features_for_write need to be supported. - * - * Some features may be required to read the ZAP objects containing feature - * information. To allow software to check for compatibility with these features - * before the pool is opened their names must be stored in the label in a - * new "features_for_read" entry (note that features that are only required - * to write to a pool never need to be stored in the label since the - * features_for_write ZAP object can be read before the pool is written to). - * To save space in the label features must be explicitly marked as needing to - * be written to the label. Also, reference counts are not stored in the label, - * instead any feature whose reference count drops to 0 is removed from the - * label. - * - * Adding New Features - * ------------------- - * - * Features must be registered in zpool_feature_init() function in - * zfeature_common.c using the zfeature_register() function. This function - * has arguments to specify if the feature should be stored in the - * features_for_read or features_for_write ZAP object and if it needs to be - * written to the label when active. - * - * Once a feature is registered it will appear as a "feature@<feature name>" - * property which can be set by an administrator. Feature implementors should - * use the spa_feature_is_enabled() and spa_feature_is_active() functions to - * query the state of a feature and the spa_feature_incr() and - * spa_feature_decr() functions to change an enabled feature's reference count. - * Reference counts may only be updated in the syncing context. - * - * Features may not perform enable-time initialization. Instead, any such - * initialization should occur when the feature is first used. This design - * enforces that on-disk changes be made only when features are used. Code - * should only check if a feature is enabled using spa_feature_is_enabled(), - * not by relying on any feature specific metadata existing. If a feature is - * enabled, but the feature's metadata is not on disk yet then it should be - * created as needed. - * - * As an example, consider the com.delphix:async_destroy feature. This feature - * relies on the existence of a bptree in the MOS that store blocks for - * asynchronous freeing. This bptree is not created when async_destroy is - * enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is - * called to check if async_destroy is enabled. If it is and the bptree object - * does not exist yet, the bptree object is created as part of the dataset - * destroy and async_destroy's reference count is incremented to indicate it - * has made an on-disk format change. Later, after the destroyed dataset's - * blocks have all been asynchronously freed there is no longer any use for the - * bptree object, so it is destroyed and async_destroy's reference count is - * decremented back to 0 to indicate that it has undone its on-disk format - * changes. - */ - -typedef enum { - FEATURE_ACTION_ENABLE, - FEATURE_ACTION_INCR, - FEATURE_ACTION_DECR, -} feature_action_t; - -/* - * Checks that the features active in the specified object are supported by - * this software. Adds each unsupported feature (name -> description) to - * the supplied nvlist. - */ -boolean_t -feature_is_supported(objset_t *os, uint64_t obj, uint64_t desc_obj, - nvlist_t *unsup_feat) -{ - boolean_t supported; - zap_cursor_t zc; - zap_attribute_t za; - - supported = B_TRUE; - for (zap_cursor_init(&zc, os, obj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - ASSERT(za.za_integer_length == sizeof (uint64_t) && - za.za_num_integers == 1); - - if (za.za_first_integer != 0 && - !zfeature_is_supported(za.za_name)) { - supported = B_FALSE; - - if (unsup_feat != NULL) { - char *desc = ""; - char buf[MAXPATHLEN]; - - if (zap_lookup(os, desc_obj, za.za_name, - 1, sizeof (buf), buf) == 0) - desc = buf; - - VERIFY(nvlist_add_string(unsup_feat, za.za_name, - desc) == 0); - } - } - } - zap_cursor_fini(&zc); - - return (supported); -} - -static int -feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj, - zfeature_info_t *feature, uint64_t *res) -{ - int err; - uint64_t refcount; - uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj; - - ASSERT(0 != zapobj); - - err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1, - &refcount); - if (err != 0) { - if (err == ENOENT) - return (ENOTSUP); - else - return (err); - } - *res = refcount; - return (0); -} - -static int -feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj, - uint64_t desc_obj, zfeature_info_t *feature, feature_action_t action, - dmu_tx_t *tx) -{ - int error; - uint64_t refcount; - uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj; - - ASSERT(0 != zapobj); - ASSERT(zfeature_is_valid_guid(feature->fi_guid)); - - error = zap_lookup(os, zapobj, feature->fi_guid, - sizeof (uint64_t), 1, &refcount); - - /* - * If we can't ascertain the status of the specified feature, an I/O - * error occurred. - */ - if (error != 0 && error != ENOENT) - return (error); - - switch (action) { - case FEATURE_ACTION_ENABLE: - /* - * If the feature is already enabled, ignore the request. - */ - if (error == 0) - return (0); - refcount = 0; - break; - case FEATURE_ACTION_INCR: - if (error == ENOENT) - return (ENOTSUP); - if (refcount == UINT64_MAX) - return (EOVERFLOW); - refcount++; - break; - case FEATURE_ACTION_DECR: - if (error == ENOENT) - return (ENOTSUP); - if (refcount == 0) - return (EOVERFLOW); - refcount--; - break; - default: - ASSERT(0); - break; - } - - if (action == FEATURE_ACTION_ENABLE) { - int i; - - for (i = 0; feature->fi_depends[i] != NULL; i++) { - zfeature_info_t *dep = feature->fi_depends[i]; - - error = feature_do_action(os, read_obj, write_obj, - desc_obj, dep, FEATURE_ACTION_ENABLE, tx); - if (error != 0) - return (error); - } - } - - error = zap_update(os, zapobj, feature->fi_guid, - sizeof (uint64_t), 1, &refcount, tx); - if (error != 0) - return (error); - - if (action == FEATURE_ACTION_ENABLE) { - error = zap_update(os, desc_obj, - feature->fi_guid, 1, strlen(feature->fi_desc) + 1, - feature->fi_desc, tx); - if (error != 0) - return (error); - } - - if (action == FEATURE_ACTION_INCR && refcount == 1 && feature->fi_mos) { - spa_activate_mos_feature(dmu_objset_spa(os), feature->fi_guid); - } - - if (action == FEATURE_ACTION_DECR && refcount == 0) { - spa_deactivate_mos_feature(dmu_objset_spa(os), - feature->fi_guid); - } - - return (0); -} - -void -spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx) -{ - /* - * We create feature flags ZAP objects in two instances: during pool - * creation and during pool upgrade. - */ - ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && - tx->tx_txg == TXG_INITIAL)); - - spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, - DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_FEATURES_FOR_READ, tx); - spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset, - DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_FEATURES_FOR_WRITE, tx); - spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset, - DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_FEATURE_DESCRIPTIONS, tx); -} - -/* - * Enable any required dependencies, then enable the requested feature. - */ -void -spa_feature_enable(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) -{ - ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); - VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset, - spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj, - spa->spa_feat_desc_obj, feature, FEATURE_ACTION_ENABLE, tx)); -} - -/* - * If the specified feature has not yet been enabled, this function returns - * ENOTSUP; otherwise, this function increments the feature's refcount (or - * returns EOVERFLOW if the refcount cannot be incremented). This function must - * be called from syncing context. - */ -void -spa_feature_incr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) -{ - ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); - VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset, - spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj, - spa->spa_feat_desc_obj, feature, FEATURE_ACTION_INCR, tx)); -} - -/* - * If the specified feature has not yet been enabled, this function returns - * ENOTSUP; otherwise, this function decrements the feature's refcount (or - * returns EOVERFLOW if the refcount is already 0). This function must - * be called from syncing context. - */ -void -spa_feature_decr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) -{ - ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); - VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset, - spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj, - spa->spa_feat_desc_obj, feature, FEATURE_ACTION_DECR, tx)); -} - -boolean_t -spa_feature_is_enabled(spa_t *spa, zfeature_info_t *feature) -{ - int err; - uint64_t refcount; - - if (spa_version(spa) < SPA_VERSION_FEATURES) - return (B_FALSE); - - err = feature_get_refcount(spa->spa_meta_objset, - spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj, - feature, &refcount); - ASSERT(err == 0 || err == ENOTSUP); - return (err == 0); -} - -boolean_t -spa_feature_is_active(spa_t *spa, zfeature_info_t *feature) -{ - int err; - uint64_t refcount; - - if (spa_version(spa) < SPA_VERSION_FEATURES) - return (B_FALSE); - - err = feature_get_refcount(spa->spa_meta_objset, - spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj, - feature, &refcount); - ASSERT(err == 0 || err == ENOTSUP); - return (err == 0 && refcount > 0); -} diff --git a/uts/common/fs/zfs/zfs_acl.c b/uts/common/fs/zfs/zfs_acl.c index 2b93fc8329f9..843b5ff06ef4 100644 --- a/uts/common/fs/zfs/zfs_acl.c +++ b/uts/common/fs/zfs/zfs_acl.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/types.h> @@ -1331,8 +1330,75 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); } +/* + * Update access mask for prepended ACE + * + * This applies the "groupmask" value for aclmode property. + */ +static void +zfs_acl_prepend_fixup(zfs_acl_t *aclp, void *acep, void *origacep, + mode_t mode, uint64_t owner) +{ + int rmask, wmask, xmask; + int user_ace; + uint16_t aceflags; + uint32_t origmask, acepmask; + uint64_t fuid; + + aceflags = aclp->z_ops.ace_flags_get(acep); + fuid = aclp->z_ops.ace_who_get(acep); + origmask = aclp->z_ops.ace_mask_get(origacep); + acepmask = aclp->z_ops.ace_mask_get(acep); + + user_ace = (!(aceflags & + (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP))); + + if (user_ace && (fuid == owner)) { + rmask = S_IRUSR; + wmask = S_IWUSR; + xmask = S_IXUSR; + } else { + rmask = S_IRGRP; + wmask = S_IWGRP; + xmask = S_IXGRP; + } + + if (origmask & ACE_READ_DATA) { + if (mode & rmask) { + acepmask &= ~ACE_READ_DATA; + } else { + acepmask |= ACE_READ_DATA; + } + } + + if (origmask & ACE_WRITE_DATA) { + if (mode & wmask) { + acepmask &= ~ACE_WRITE_DATA; + } else { + acepmask |= ACE_WRITE_DATA; + } + } + + if (origmask & ACE_APPEND_DATA) { + if (mode & wmask) { + acepmask &= ~ACE_APPEND_DATA; + } else { + acepmask |= ACE_APPEND_DATA; + } + } + + if (origmask & ACE_EXECUTE) { + if (mode & xmask) { + acepmask &= ~ACE_EXECUTE; + } else { + acepmask |= ACE_EXECUTE; + } + } + aclp->z_ops.ace_mask_set(acep, acepmask); +} + static void -zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) +zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp) { void *acep = NULL; uint64_t who; @@ -1344,31 +1410,30 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) zfs_acl_node_t *newnode; size_t abstract_size = aclp->z_ops.ace_abstract_size(); void *zacep; - boolean_t isdir; - trivial_acl_t masks; + uint32_t owner, group, everyone; + uint32_t deny1, deny2, allow0; new_count = new_bytes = 0; - isdir = (vtype == VDIR); - - acl_trivial_access_masks((mode_t)mode, isdir, &masks); + acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2, + &owner, &group, &everyone); newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes); zacep = newnode->z_acldata; - if (masks.allow0) { - zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER); + if (allow0) { + zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER); zacep = (void *)((uintptr_t)zacep + abstract_size); new_count++; new_bytes += abstract_size; - } if (masks.deny1) { - zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER); + } if (deny1) { + zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER); zacep = (void *)((uintptr_t)zacep + abstract_size); new_count++; new_bytes += abstract_size; } - if (masks.deny2) { - zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP); + if (deny2) { + zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP); zacep = (void *)((uintptr_t)zacep + abstract_size); new_count++; new_bytes += abstract_size; @@ -1387,17 +1452,10 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) continue; } - /* - * If this ACL has any inheritable ACEs, mark that in - * the hints (which are later masked into the pflags) - * so create knows to do inheritance. - */ - if (isdir && (inherit_flags & - (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))) - aclp->z_hints |= ZFS_INHERIT_ACE; - if ((type != ALLOW && type != DENY) || (inherit_flags & ACE_INHERIT_ONLY_ACE)) { + if (inherit_flags) + aclp->z_hints |= ZFS_INHERIT_ACE; switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: @@ -1410,13 +1468,20 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) /* * Limit permissions to be no greater than - * group permissions. - * The "aclinherit" and "aclmode" properties - * affect policy for create and chmod(2), - * respectively. + * group permissions */ - if ((type == ALLOW) && trim) - access_mask &= masks.group; + if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) { + if (!(mode & S_IRGRP)) + access_mask &= ~ACE_READ_DATA; + if (!(mode & S_IWGRP)) + access_mask &= + ~(ACE_WRITE_DATA|ACE_APPEND_DATA); + if (!(mode & S_IXGRP)) + access_mask &= ~ACE_EXECUTE; + access_mask &= + ~(ACE_WRITE_OWNER|ACE_WRITE_ACL| + ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS); + } } zfs_set_ace(aclp, zacep, access_mask, type, who, iflags); ace_size = aclp->z_ops.ace_size(acep); @@ -1424,11 +1489,11 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) new_count++; new_bytes += ace_size; } - zfs_set_ace(aclp, zacep, masks.owner, 0, -1, ACE_OWNER); + zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER); zacep = (void *)((uintptr_t)zacep + abstract_size); - zfs_set_ace(aclp, zacep, masks.group, 0, -1, OWNING_GROUP); + zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP); zacep = (void *)((uintptr_t)zacep + abstract_size); - zfs_set_ace(aclp, zacep, masks.everyone, 0, -1, ACE_EVERYONE); + zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE); new_count += 3; new_bytes += abstract_size * 3; @@ -1440,27 +1505,17 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) list_insert_tail(&aclp->z_acl, newnode); } -int +void zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode) { - int error = 0; - mutex_enter(&zp->z_acl_lock); mutex_enter(&zp->z_lock); - if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD) - *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); - else - error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE); - - if (error == 0) { - (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; - zfs_acl_chmod(ZTOV(zp)->v_type, mode, - (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp); - } + *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); + (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; + zfs_acl_chmod(zp->z_zfsvfs, mode, *aclp); mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); - - return (error); + ASSERT(*aclp); } /* @@ -1708,8 +1763,8 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (acl_ids->z_aclp == NULL) { mutex_enter(&dzp->z_acl_lock); mutex_enter(&dzp->z_lock); - if (!(flag & IS_ROOT_NODE) && - (dzp->z_pflags & ZFS_INHERIT_ACE) && + if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR && + (dzp->z_pflags & ZFS_INHERIT_ACE)) && !(dzp->z_pflags & ZFS_XATTR)) { VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE, &paclp, B_FALSE)); @@ -1726,9 +1781,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (need_chmod) { acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ? ZFS_ACL_AUTO_INHERIT : 0; - zfs_acl_chmod(vap->va_type, acl_ids->z_mode, - (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED), - acl_ids->z_aclp); + zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp); } } diff --git a/uts/common/fs/zfs/zfs_ctldir.c b/uts/common/fs/zfs/zfs_ctldir.c index d902ff637c38..815f8895e702 100644 --- a/uts/common/fs/zfs/zfs_ctldir.c +++ b/uts/common/fs/zfs/zfs_ctldir.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -750,7 +749,8 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, return (err); if (err == 0) { - err = dmu_objset_snapshot_one(name, dirname); + err = dmu_objset_snapshot(name, dirname, NULL, NULL, + B_FALSE, B_FALSE, -1); if (err) return (err); err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); diff --git a/uts/common/fs/zfs/zfs_fm.c b/uts/common/fs/zfs/zfs_fm.c index fa5903a432dd..0b4812666442 100644 --- a/uts/common/fs/zfs/zfs_fm.c +++ b/uts/common/fs/zfs/zfs_fm.c @@ -23,10 +23,6 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - #include <sys/spa.h> #include <sys/spa_impl.h> #include <sys/vdev.h> @@ -713,10 +709,6 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, if (report->zcr_ereport == NULL) { report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); - if (report->zcr_ckinfo != NULL) { - kmem_free(report->zcr_ckinfo, - sizeof (*report->zcr_ckinfo)); - } kmem_free(report, sizeof (*report)); return; } diff --git a/uts/common/fs/zfs/zfs_ioctl.c b/uts/common/fs/zfs/zfs_ioctl.c index 213142740162..1b63c9bf45ef 100644 --- a/uts/common/fs/zfs/zfs_ioctl.c +++ b/uts/common/fs/zfs/zfs_ioctl.c @@ -18,114 +18,8 @@ * * CDDL HEADER END */ - /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright 2011 Martin Matuska - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -/* - * ZFS ioctls. - * - * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage - * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool. - * - * There are two ways that we handle ioctls: the legacy way where almost - * all of the logic is in the ioctl callback, and the new way where most - * of the marshalling is handled in the common entry point, zfsdev_ioctl(). - * - * Non-legacy ioctls should be registered by calling - * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked - * from userland by lzc_ioctl(). - * - * The registration arguments are as follows: - * - * const char *name - * The name of the ioctl. This is used for history logging. If the - * ioctl returns successfully (the callback returns 0), and allow_log - * is true, then a history log entry will be recorded with the input & - * output nvlists. The log entry can be printed with "zpool history -i". - * - * zfs_ioc_t ioc - * The ioctl request number, which userland will pass to ioctl(2). - * The ioctl numbers can change from release to release, because - * the caller (libzfs) must be matched to the kernel. - * - * zfs_secpolicy_func_t *secpolicy - * This function will be called before the zfs_ioc_func_t, to - * determine if this operation is permitted. It should return EPERM - * on failure, and 0 on success. Checks include determining if the - * dataset is visible in this zone, and if the user has either all - * zfs privileges in the zone (SYS_MOUNT), or has been granted permission - * to do this operation on this dataset with "zfs allow". - * - * zfs_ioc_namecheck_t namecheck - * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool - * name, a dataset name, or nothing. If the name is not well-formed, - * the ioctl will fail and the callback will not be called. - * Therefore, the callback can assume that the name is well-formed - * (e.g. is null-terminated, doesn't have more than one '@' character, - * doesn't have invalid characters). - * - * zfs_ioc_poolcheck_t pool_check - * This specifies requirements on the pool state. If the pool does - * not meet them (is suspended or is readonly), the ioctl will fail - * and the callback will not be called. If any checks are specified - * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME. - * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED | - * POOL_CHECK_READONLY). - * - * boolean_t smush_outnvlist - * If smush_outnvlist is true, then the output is presumed to be a - * list of errors, and it will be "smushed" down to fit into the - * caller's buffer, by removing some entries and replacing them with a - * single "N_MORE_ERRORS" entry indicating how many were removed. See - * nvlist_smush() for details. If smush_outnvlist is false, and the - * outnvlist does not fit into the userland-provided buffer, then the - * ioctl will fail with ENOMEM. - * - * zfs_ioc_func_t *func - * The callback function that will perform the operation. - * - * The callback should return 0 on success, or an error number on - * failure. If the function fails, the userland ioctl will return -1, - * and errno will be set to the callback's return value. The callback - * will be called with the following arguments: - * - * const char *name - * The name of the pool or dataset to operate on, from - * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the - * expected type (pool, dataset, or none). - * - * nvlist_t *innvl - * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or - * NULL if no input nvlist was provided. Changes to this nvlist are - * ignored. If the input nvlist could not be deserialized, the - * ioctl will fail and the callback will not be called. - * - * nvlist_t *outnvl - * The output nvlist, initially empty. The callback can fill it in, - * and it will be returned to userland by serializing it into - * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization - * fails (e.g. because the caller didn't supply a large enough - * buffer), then the overall ioctl will fail. See the - * 'smush_nvlist' argument above for additional behaviors. - * - * There are two typical uses of the output nvlist: - * - To return state, e.g. property values. In this case, - * smush_outnvlist should be false. If the buffer was not large - * enough, the caller will reallocate a larger buffer and try - * the ioctl again. - * - * - To return multiple errors from an ioctl which makes on-disk - * changes. In this case, smush_outnvlist should be true. - * Ioctls which make on-disk modifications should generally not - * use the outnvl if they succeed, because the caller can not - * distinguish between the operation failing, and - * deserialization failing. */ #include <sys/types.h> @@ -154,7 +48,6 @@ #include <sys/dsl_prop.h> #include <sys/dsl_deleg.h> #include <sys/dmu_objset.h> -#include <sys/dmu_impl.h> #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/sunldi.h> @@ -186,13 +79,8 @@ extern void zfs_fini(void); ldi_ident_t zfs_li = NULL; dev_info_t *zfs_dip; -uint_t zfs_fsyncer_key; -extern uint_t rrw_tsd_key; -static uint_t zfs_allow_log_key; - -typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); -typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); -typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *); +typedef int zfs_ioc_func_t(zfs_cmd_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); typedef enum { NO_NAME, @@ -203,18 +91,15 @@ typedef enum { typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, - POOL_CHECK_READONLY = 1 << 2, + POOL_CHECK_READONLY = 1 << 2 } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { - zfs_ioc_legacy_func_t *zvec_legacy_func; zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; - boolean_t zvec_allow_log; + boolean_t zvec_his_log; zfs_ioc_poolcheck_t zvec_pool_check; - boolean_t zvec_smush_outnvlist; - const char *zvec_name; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ @@ -232,8 +117,7 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); -int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); -static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */ void @@ -371,7 +255,7 @@ zfs_log_history(zfs_cmd_t *zc) if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) - (void) spa_history_log(spa, buf); + (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); spa_close(spa, FTAG); } history_str_free(buf); @@ -383,7 +267,7 @@ zfs_log_history(zfs_cmd_t *zc) */ /* ARGSUSED */ static int -zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) { return (0); } @@ -394,7 +278,7 @@ zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) { if (INGLOBALZONE(curproc) || zone_dataset_visible(zc->zc_name, NULL)) @@ -463,28 +347,21 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) return (zfs_dozonecheck_impl(dataset, zoned, cr)); } -static int +int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; - dsl_dataset_t *ds; - error = dsl_dataset_hold(name, FTAG, &ds); - if (error != 0) - return (error); - - error = zfs_dozonecheck_ds(name, ds, cr); + error = zfs_dozonecheck(name, cr); if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, perm, cr); + error = dsl_deleg_access(name, perm, cr); } - - dsl_dataset_rele(ds, FTAG); return (error); } -static int +int zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, const char *perm, cred_t *cr) { @@ -648,9 +525,8 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } -/* ARGSUSED */ -static int -zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +int +zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) { int error; @@ -665,17 +541,15 @@ zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (0); } -/* ARGSUSED */ -static int -zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +int +zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } -/* ARGSUSED */ -static int -zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +int +zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) { spa_t *spa; dsl_pool_t *dp; @@ -711,17 +585,8 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (error); } -/* ARGSUSED */ static int -zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) -{ - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_SEND, cr)); -} - -/* ARGSUSED */ -static int -zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) { vnode_t *vp; int error; @@ -745,7 +610,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) } int -zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) { if (!INGLOBALZONE(curproc)) return (EPERM); @@ -753,12 +618,12 @@ zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) if (secpolicy_nfs(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, innvl, cr)); + return (zfs_secpolicy_deleg_share(zc, cr)); } } int -zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) { if (!INGLOBALZONE(curproc)) return (EPERM); @@ -766,7 +631,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) if (secpolicy_smb(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, innvl, cr)); + return (zfs_secpolicy_deleg_share(zc, cr)); } } @@ -804,55 +669,34 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires - * descendant mount and destroy permissions. + * descendent mount and destroy permissions. + * Reassemble the full filesystem@snap name so dsl_deleg_access() + * can do the correct permission check. + * + * Since this routine is used when doing a recursive destroy of snapshots + * and destroying snapshots requires descendent permissions, a successfull + * check of the top level snapshot applies to snapshots of all descendent + * datasets as well. */ -/* ARGSUSED */ static int -zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) { - nvlist_t *snaps; - nvpair_t *pair, *nextpair; - int error = 0; + int error; + char *dsname; - if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) - return (EINVAL); - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nextpair) { - dsl_dataset_t *ds; + dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); - nextpair = nvlist_next_nvpair(snaps, pair); - error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds); - if (error == 0) { - dsl_dataset_rele(ds, FTAG); - } else if (error == ENOENT) { - /* - * Ignore any snapshots that don't exist (we consider - * them "already destroyed"). Remove the name from the - * nvl here in case the snapshot is created between - * now and when we try to destroy it (in which case - * we don't want to destroy it since we haven't - * checked for permission). - */ - fnvlist_remove_nvpair(snaps, pair); - error = 0; - continue; - } else { - break; - } - error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); - if (error != 0) - break; - } + error = zfs_secpolicy_destroy_perms(dsname, cr); + strfree(dsname); return (error); } @@ -885,16 +729,14 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) return (error); } -/* ARGSUSED */ static int -zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) { char parentname[MAXNAMELEN]; objset_t *clone; @@ -934,9 +776,8 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (error); } -/* ARGSUSED */ static int -zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) { int error; @@ -959,72 +800,49 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) ZFS_DELEG_PERM_SNAPSHOT, cr)); } -/* - * Check for permission to create each snapshot in the nvlist. - */ -/* ARGSUSED */ static int -zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) { - nvlist_t *snaps; - int error; - nvpair_t *pair; - - if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) - return (EINVAL); - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - char *name = nvpair_name(pair); - char *atp = strchr(name, '@'); - if (atp == NULL) { - error = EINVAL; - break; - } - *atp = '\0'; - error = zfs_secpolicy_snapshot_perms(name, cr); - *atp = '@'; - if (error != 0) - break; - } - return (error); + return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) -{ - /* - * Even root must have a proper TSD so that we know what pool - * to log to. - */ - if (tsd_get(zfs_allow_log_key) == NULL) - return (EPERM); - return (0); -} - -static int -zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) { char parentname[MAXNAMELEN]; int error; - char *origin; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); - if (nvlist_lookup_string(innvl, "origin", &origin) == 0 && - (error = zfs_secpolicy_write_perms(origin, - ZFS_DELEG_PERM_CLONE, cr)) != 0) - return (error); + if (zc->zc_value[0] != '\0') { + if ((error = zfs_secpolicy_write_perms(zc->zc_value, + ZFS_DELEG_PERM_CLONE, cr)) != 0) + return (error); + } if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); - return (zfs_secpolicy_write_perms(parentname, - ZFS_DELEG_PERM_MOUNT, cr)); + error = zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr); + + return (error); +} + +static int +zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) +{ + int error; + + error = secpolicy_fs_unmount(cr, NULL); + if (error) { + error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); + } + return (error); } /* @@ -1033,7 +851,7 @@ zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) return (EPERM); @@ -1046,7 +864,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) { int error; @@ -1062,14 +880,13 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) { return (secpolicy_zinject(cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); @@ -1085,9 +902,9 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) } static int -zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) { - int err = zfs_secpolicy_read(zc, innvl, cr); + int err = zfs_secpolicy_read(zc, cr); if (err) return (err); @@ -1114,9 +931,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) } static int -zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) { - int err = zfs_secpolicy_read(zc, innvl, cr); + int err = zfs_secpolicy_read(zc, cr); if (err) return (err); @@ -1127,25 +944,22 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) userquota_perms[zc->zc_objset_type], cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_HOLD, cr)); } -/* ARGSUSED */ static int -zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RELEASE, cr)); @@ -1155,7 +969,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) * Policy for allowing temporary snapshots to be taken or released */ static int -zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, @@ -1168,13 +982,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); - error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); + error = zfs_secpolicy_snapshot(zc, cr); if (!error) - error = zfs_secpolicy_hold(zc, innvl, cr); + error = zfs_secpolicy_hold(zc, cr); if (!error) - error = zfs_secpolicy_release(zc, innvl, cr); + error = zfs_secpolicy_release(zc, cr); if (!error) - error = zfs_secpolicy_destroy(zc, innvl, cr); + error = zfs_secpolicy_destroy(zc, cr); return (error); } @@ -1213,40 +1027,36 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) return (0); } -/* - * Reduce the size of this nvlist until it can be serialized in 'max' bytes. - * Entries will be removed from the end of the nvlist, and one int32 entry - * named "N_MORE_ERRORS" will be added indicating how many entries were - * removed. - */ static int -nvlist_smush(nvlist_t *errors, size_t max) +fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) { size_t size; - size = fnvlist_size(errors); + VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); - if (size > max) { + if (size > zc->zc_nvlist_dst_size) { nvpair_t *more_errors; int n = 0; - if (max < 1024) + if (zc->zc_nvlist_dst_size < 1024) return (ENOMEM); - fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0); - more_errors = nvlist_prev_nvpair(errors, NULL); + VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); + more_errors = nvlist_prev_nvpair(*errors, NULL); do { - nvpair_t *pair = nvlist_prev_nvpair(errors, + nvpair_t *pair = nvlist_prev_nvpair(*errors, more_errors); - fnvlist_remove_nvpair(errors, pair); + VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); n++; - size = fnvlist_size(errors); - } while (size > max); + VERIFY(nvlist_size(*errors, &size, + NV_ENCODE_NATIVE) == 0); + } while (size > zc->zc_nvlist_dst_size); - fnvlist_remove_nvpair(errors, more_errors); - fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n); - ASSERT3U(fnvlist_size(errors), <=, max); + VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); + VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); + ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + ASSERT(size <= zc->zc_nvlist_dst_size); } return (0); @@ -1259,20 +1069,21 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) int error = 0; size_t size; - size = fnvlist_size(nvl); + VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { - packed = fnvlist_pack(nvl, &size); + packed = kmem_alloc(size, KM_SLEEP); + VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, + KM_SLEEP) == 0); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) error = EFAULT; - fnvlist_pack_free(packed, size); + kmem_free(packed, size); } zc->zc_nvlist_dst_size = size; - zc->zc_nvlist_dst_filled = B_TRUE; return (error); } @@ -1305,8 +1116,6 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) /* * Find a zfsvfs_t for a mounted filesystem, or create our own, in which * case its z_vfs will be NULL, and it will be opened as the owner. - * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER, - * which prevents all vnode ops from running. */ static int zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) @@ -1351,6 +1160,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; + char *buf; if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) @@ -1369,7 +1179,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version); - if (!SPA_VERSION_IS_SUPPORTED(version)) { + if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) { error = EINVAL; goto pool_props_bad; } @@ -1390,7 +1200,9 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; } - error = spa_create(zc->zc_name, config, props, zplprops); + buf = history_str_get(zc); + + error = spa_create(zc->zc_name, config, props, buf, zplprops); /* * Set the remaining root properties @@ -1399,6 +1211,9 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); + if (buf != NULL) + history_str_free(buf); + pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); @@ -1488,15 +1303,6 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc) return (error); } -/* - * inputs: - * zc_name name of the pool - * - * outputs: - * zc_cookie real errno - * zc_nvlist_dst config nvlist - * zc_nvlist_dst_size size of config nvlist - */ static int zfs_ioc_pool_stats(zfs_cmd_t *zc) { @@ -1598,8 +1404,7 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc) if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); - if (zc->zc_cookie < spa_version(spa) || - !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) { + if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) { spa_close(spa, FTAG); return (EINVAL); } @@ -1643,20 +1448,6 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc) } static int -zfs_ioc_pool_reguid(zfs_cmd_t *zc) -{ - spa_t *spa; - int error; - - error = spa_open(zc->zc_name, &spa, FTAG); - if (error == 0) { - error = spa_change_guid(spa); - spa_close(spa, FTAG); - } - return (error); -} - -static int zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) { int error; @@ -1953,12 +1744,9 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) * inconsistent. So this is a bit of a workaround... * XXX reading with out owning */ - if (!zc->zc_objset_stats.dds_inconsistent && - dmu_objset_type(os) == DMU_OST_ZVOL) { - error = zvol_get_stats(os, nv); - if (error == EIO) - return (error); - VERIFY3S(error, ==, 0); + if (!zc->zc_objset_stats.dds_inconsistent) { + if (dmu_objset_type(os) == DMU_OST_ZVOL) + VERIFY(zvol_get_stats(os, nv) == 0); } error = put_nvlist(zc, nv); nvlist_free(nv); @@ -2155,10 +1943,8 @@ top: uint64_t cookie = 0; int len = sizeof (zc->zc_name) - (p - zc->zc_name); - while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { - if (!dataset_name_hidden(zc->zc_name)) - (void) dmu_objset_prefetch(zc->zc_name, NULL); - } + while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) + (void) dmu_objset_prefetch(p, NULL); } do { @@ -2167,7 +1953,8 @@ top: NULL, &zc->zc_cookie); if (error == ENOENT) error = ESRCH; - } while (error == 0 && dataset_name_hidden(zc->zc_name)); + } while (error == 0 && dataset_name_hidden(zc->zc_name) && + !(zc->zc_iflags & FKIOCTL)); dmu_objset_rele(os, FTAG); /* @@ -2393,25 +2180,31 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, /* * This function is best effort. If it fails to set any of the given properties, - * it continues to set as many as it can and returns the last error - * encountered. If the caller provides a non-NULL errlist, it will be filled in - * with the list of names of all the properties that failed along with the - * corresponding error numbers. + * it continues to set as many as it can and returns the first error + * encountered. If the caller provides a non-NULL errlist, it also gives the + * complete list of names of all the properties it failed to set along with the + * corresponding error numbers. The caller is responsible for freeing the + * returned errlist. * - * If every property is set successfully, zero is returned and errlist is not - * modified. + * If every property is set successfully, zero is returned and the list pointed + * at by errlist is NULL. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, - nvlist_t *errlist) + nvlist_t **errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; - nvlist_t *genericnvl = fnvlist_alloc(); - nvlist_t *retrynvl = fnvlist_alloc(); + nvlist_t *genericnvl; + nvlist_t *errors; + nvlist_t *retrynvl; + + VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); retry: pair = NULL; @@ -2424,7 +2217,7 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - attrs = fnvpair_value_nvlist(pair); + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) err = EINVAL; @@ -2439,8 +2232,6 @@ retry: if (nvpair_type(propval) != DATA_TYPE_UINT64_ARRAY) err = EINVAL; - } else { - err = EINVAL; } } else if (err == 0) { if (nvpair_type(propval) == DATA_TYPE_STRING) { @@ -2449,7 +2240,8 @@ retry: } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; - intval = fnvpair_value_uint64(propval); + VERIFY(nvpair_value_uint64(propval, + &intval) == 0); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2493,11 +2285,8 @@ retry: } } - if (err != 0) { - if (errlist != NULL) - fnvlist_add_int32(errlist, propname, err); - rv = err; - } + if (err != 0) + VERIFY(nvlist_add_int32(errors, propname, err) == 0); } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2519,33 +2308,44 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - attrs = fnvpair_value_nvlist(pair); - propval = fnvlist_lookup_nvpair(attrs, - ZPROP_VALUE); + VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, + &propval) == 0); } if (nvpair_type(propval) == DATA_TYPE_STRING) { - strval = fnvpair_value_string(propval); + VERIFY(nvpair_value_string(propval, + &strval) == 0); err = dsl_prop_set(dsname, propname, source, 1, strlen(strval) + 1, strval); } else { - intval = fnvpair_value_uint64(propval); + VERIFY(nvpair_value_uint64(propval, + &intval) == 0); err = dsl_prop_set(dsname, propname, source, 8, 1, &intval); } if (err != 0) { - if (errlist != NULL) { - fnvlist_add_int32(errlist, propname, - err); - } - rv = err; + VERIFY(nvlist_add_int32(errors, propname, + err) == 0); } } } nvlist_free(genericnvl); nvlist_free(retrynvl); + if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { + nvlist_free(errors); + errors = NULL; + } else { + VERIFY(nvpair_value_int32(pair, &rv) == 0); + } + + if (errlist == NULL) + nvlist_free(errors); + else + *errlist = errors; + return (rv); } @@ -2553,7 +2353,7 @@ retry: * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(const char *fsname, nvlist_t *nvl) +zfs_check_userprops(char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; @@ -2633,7 +2433,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); - nvlist_t *errors; + nvlist_t *errors = NULL; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, @@ -2656,8 +2456,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) } } - errors = fnvlist_alloc(); - error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); if (zc->zc_nvlist_dst != NULL && errors != NULL) { (void) put_nvlist(zc, errors); @@ -2739,7 +2538,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) return (EINVAL); } - /* property name has been validated by zfs_secpolicy_inherit_prop() */ + /* the property name has been validated by zfs_secpolicy_inherit() */ return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); } @@ -3082,30 +2881,26 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, } /* - * innvl: { - * "type" -> dmu_objset_type_t (int32) - * (optional) "props" -> { prop -> value } - * } + * inputs: + * zc_objset_type type of objset to create (fs vs zvol) + * zc_name name of new objset + * zc_value name of snapshot to clone from (may be empty) + * zc_nvlist_src{_size} nvlist of properties to apply * - * outnvl: propname -> error code (int32) + * outputs: none */ static int -zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) +zfs_ioc_create(zfs_cmd_t *zc) { + objset_t *clone; int error = 0; - zfs_creat_t zct = { 0 }; + zfs_creat_t zct; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - int32_t type32; - dmu_objset_type_t type; - boolean_t is_insensitive = B_FALSE; - - if (nvlist_lookup_int32(innvl, "type", &type32) != 0) - return (EINVAL); - type = type32; - (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + dmu_objset_type_t type = zc->zc_objset_type; switch (type) { + case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; @@ -3118,290 +2913,210 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) cbfunc = NULL; break; } - if (strchr(fsname, '@') || - strchr(fsname, '%')) + if (strchr(zc->zc_name, '@') || + strchr(zc->zc_name, '%')) return (EINVAL); - zct.zct_props = nvprops; - - if (cbfunc == NULL) - return (EINVAL); - - if (type == DMU_OST_ZVOL) { - uint64_t volsize, volblocksize; + if (zc->zc_nvlist_src != NULL && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvprops)) != 0) + return (error); - if (nvprops == NULL) - return (EINVAL); - if (nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) - return (EINVAL); + zct.zct_zplprops = NULL; + zct.zct_props = nvprops; - if ((error = nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &volblocksize)) != 0 && error != ENOENT) + if (zc->zc_value[0] != '\0') { + /* + * We're creating a clone of an existing snapshot. + */ + zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; + if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { + nvlist_free(nvprops); return (EINVAL); + } - if (error != 0) - volblocksize = zfs_prop_default_numeric( - ZFS_PROP_VOLBLOCKSIZE); - - if ((error = zvol_check_volblocksize( - volblocksize)) != 0 || - (error = zvol_check_volsize(volsize, - volblocksize)) != 0) + error = dmu_objset_hold(zc->zc_value, FTAG, &clone); + if (error) { + nvlist_free(nvprops); return (error); - } else if (type == DMU_OST_ZFS) { - int error; + } - /* - * We have to have normalization and - * case-folding flags correct when we do the - * file system creation, so go figure them out - * now. - */ - VERIFY(nvlist_alloc(&zct.zct_zplprops, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - error = zfs_fill_zplprops(fsname, nvprops, - zct.zct_zplprops, &is_insensitive); - if (error != 0) { - nvlist_free(zct.zct_zplprops); + error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); + dmu_objset_rele(clone, FTAG); + if (error) { + nvlist_free(nvprops); return (error); } - } + } else { + boolean_t is_insensitive = B_FALSE; - error = dmu_objset_create(fsname, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); - nvlist_free(zct.zct_zplprops); + if (cbfunc == NULL) { + nvlist_free(nvprops); + return (EINVAL); + } - /* - * It would be nice to do this atomically. - */ - if (error == 0) { - error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, - nvprops, outnvl); - if (error != 0) - (void) dmu_objset_destroy(fsname, B_FALSE); - } - return (error); -} + if (type == DMU_OST_ZVOL) { + uint64_t volsize, volblocksize; -/* - * innvl: { - * "origin" -> name of origin snapshot - * (optional) "props" -> { prop -> value } - * } - * - * outnvl: propname -> error code (int32) - */ -static int -zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) -{ - int error = 0; - nvlist_t *nvprops = NULL; - char *origin_name; - dsl_dataset_t *origin; - - if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) - return (EINVAL); - (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + if (nvprops == NULL || + nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), + &volsize) != 0) { + nvlist_free(nvprops); + return (EINVAL); + } - if (strchr(fsname, '@') || - strchr(fsname, '%')) - return (EINVAL); + if ((error = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize)) != 0 && error != ENOENT) { + nvlist_free(nvprops); + return (EINVAL); + } - if (dataset_namecheck(origin_name, NULL, NULL) != 0) - return (EINVAL); + if (error != 0) + volblocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); - error = dsl_dataset_hold(origin_name, FTAG, &origin); - if (error) - return (error); + if ((error = zvol_check_volblocksize( + volblocksize)) != 0 || + (error = zvol_check_volsize(volsize, + volblocksize)) != 0) { + nvlist_free(nvprops); + return (error); + } + } else if (type == DMU_OST_ZFS) { + int error; - error = dmu_objset_clone(fsname, origin, 0); - dsl_dataset_rele(origin, FTAG); - if (error) - return (error); + /* + * We have to have normalization and + * case-folding flags correct when we do the + * file system creation, so go figure them out + * now. + */ + VERIFY(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops(zc->zc_name, nvprops, + zct.zct_zplprops, &is_insensitive); + if (error != 0) { + nvlist_free(nvprops); + nvlist_free(zct.zct_zplprops); + return (error); + } + } + error = dmu_objset_create(zc->zc_name, type, + is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + } /* * It would be nice to do this atomically. */ if (error == 0) { - error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, - nvprops, outnvl); + error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, + nvprops, NULL); if (error != 0) - (void) dmu_objset_destroy(fsname, B_FALSE); + (void) dmu_objset_destroy(zc->zc_name, B_FALSE); } + nvlist_free(nvprops); return (error); } /* - * innvl: { - * "snaps" -> { snapshot1, snapshot2 } - * (optional) "props" -> { prop -> value (string) } - * } - * - * outnvl: snapshot -> error code (int32) + * inputs: + * zc_name name of filesystem + * zc_value short name of snapshot + * zc_cookie recursive flag + * zc_nvlist_src[_size] property list * + * outputs: + * zc_value short snapname (i.e. part after the '@') */ static int -zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +zfs_ioc_snapshot(zfs_cmd_t *zc) { - nvlist_t *snaps; - nvlist_t *props = NULL; - int error, poollen; - nvpair_t *pair; - - (void) nvlist_lookup_nvlist(innvl, "props", &props); - if ((error = zfs_check_userprops(poolname, props)) != 0) - return (error); - - if (!nvlist_empty(props) && - zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) - return (ENOTSUP); + nvlist_t *nvprops = NULL; + int error; + boolean_t recursive = zc->zc_cookie; - if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); - poollen = strlen(poolname); - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - const char *name = nvpair_name(pair); - const char *cp = strchr(name, '@'); - - /* - * The snap name must contain an @, and the part after it must - * contain only valid characters. - */ - if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) - return (EINVAL); - - /* - * The snap must be in the specified pool. - */ - if (strncmp(name, poolname, poollen) != 0 || - (name[poollen] != '/' && name[poollen] != '@')) - return (EXDEV); - - /* This must be the only snap of this fs. */ - for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair); - pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { - if (strncmp(name, nvpair_name(pair2), cp - name + 1) - == 0) { - return (EXDEV); - } - } - } - - error = dmu_objset_snapshot(snaps, props, outnvl); - return (error); -} - -/* - * innvl: "message" -> string - */ -/* ARGSUSED */ -static int -zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) -{ - char *message; - spa_t *spa; - int error; - char *poolname; - /* - * The poolname in the ioctl is not set, we get it from the TSD, - * which was set at the end of the last successful ioctl that allows - * logging. The secpolicy func already checked that it is set. - * Only one log ioctl is allowed after each successful ioctl, so - * we clear the TSD here. - */ - poolname = tsd_get(zfs_allow_log_key); - (void) tsd_set(zfs_allow_log_key, NULL); - error = spa_open(poolname, &spa, FTAG); - strfree(poolname); - if (error != 0) + if (zc->zc_nvlist_src != NULL && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &nvprops)) != 0) return (error); - if (nvlist_lookup_string(innvl, "message", &message) != 0) { - spa_close(spa, FTAG); - return (EINVAL); - } + error = zfs_check_userprops(zc->zc_name, nvprops); + if (error) + goto out; - if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { - spa_close(spa, FTAG); - return (ENOTSUP); + if (!nvlist_empty(nvprops) && + zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { + error = ENOTSUP; + goto out; } - error = spa_history_log(spa, message); - spa_close(spa, FTAG); + error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, + nvprops, recursive, B_FALSE, -1); + +out: + nvlist_free(nvprops); return (error); } -/* ARGSUSED */ int zfs_unmount_snap(const char *name, void *arg) { - vfs_t *vfsp; - int err; + vfs_t *vfsp = NULL; - if (strchr(name, '@') == NULL) - return (0); + if (arg) { + char *snapname = arg; + char *fullname = kmem_asprintf("%s@%s", name, snapname); + vfsp = zfs_get_vfs(fullname); + strfree(fullname); + } else if (strchr(name, '@')) { + vfsp = zfs_get_vfs(name); + } - vfsp = zfs_get_vfs(name); - if (vfsp == NULL) - return (0); + if (vfsp) { + /* + * Always force the unmount for snapshots. + */ + int flag = MS_FORCE; + int err; - if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { + if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { + VFS_RELE(vfsp); + return (err); + } VFS_RELE(vfsp); - return (err); + if ((err = dounmount(vfsp, flag, kcred)) != 0) + return (err); } - VFS_RELE(vfsp); - - /* - * Always force the unmount for snapshots. - */ - return (dounmount(vfsp, MS_FORCE, kcred)); + return (0); } /* - * innvl: { - * "snaps" -> { snapshot1, snapshot2 } - * (optional boolean) "defer" - * } - * - * outnvl: snapshot -> error code (int32) + * inputs: + * zc_name name of filesystem + * zc_value short name of snapshot + * zc_defer_destroy mark for deferred destroy * + * outputs: none */ static int -zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +zfs_ioc_destroy_snaps(zfs_cmd_t *zc) { - int poollen; - nvlist_t *snaps; - nvpair_t *pair; - boolean_t defer; + int err; - if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) return (EINVAL); - defer = nvlist_exists(innvl, "defer"); - - poollen = strlen(poolname); - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - const char *name = nvpair_name(pair); - - /* - * The snap must be in the specified pool. - */ - if (strncmp(name, poolname, poollen) != 0 || - (name[poollen] != '/' && name[poollen] != '@')) - return (EXDEV); - - /* - * Ignore failures to unmount; dmu_snapshots_destroy_nvl() - * will deal with this gracefully (by filling in outnvl). - */ - (void) zfs_unmount_snap(name, NULL); - } - - return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl)); + err = dmu_objset_find(zc->zc_name, + zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN); + if (err) + return (err); + return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value, + zc->zc_defer_destroy)); } /* @@ -3705,7 +3420,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || - (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) { + (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); @@ -3913,6 +3628,8 @@ zfs_ioc_recv(zfs_cmd_t *zc) * dmu_recv_begin() succeeds. */ if (props) { + nvlist_t *errlist; + if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { if (drc.drc_newfs) { if (spa_version(os->os_spa) >= @@ -3931,12 +3648,12 @@ zfs_ioc_recv(zfs_cmd_t *zc) } (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, errors); + props, &errlist); + (void) nvlist_merge(errors, errlist, 0); + nvlist_free(errlist); } - if (zc->zc_nvlist_dst_size != 0 && - (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || - put_nvlist(zc, errors) != 0)) { + if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. @@ -4042,8 +3759,6 @@ out: * zc_obj fromorigin flag (mutually exclusive with zc_fromobj) * zc_sendobj objsetid of snapshot to send * zc_fromobj objsetid of incremental fromsnap (may be zero) - * zc_guid if set, estimate size of stream only. zc_cookie is ignored. - * output size in zc_objset_type. * * outputs: none */ @@ -4052,13 +3767,13 @@ zfs_ioc_send(zfs_cmd_t *zc) { objset_t *fromsnap = NULL; objset_t *tosnap; + file_t *fp; int error; offset_t off; dsl_dataset_t *ds; dsl_dataset_t *dsfrom = NULL; spa_t *spa; dsl_pool_t *dp; - boolean_t estimate = (zc->zc_guid != 0); error = spa_open(zc->zc_name, &spa, FTAG); if (error) @@ -4068,13 +3783,15 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) + if (error) { + spa_close(spa, FTAG); return (error); + } error = dmu_objset_from_ds(ds, &tosnap); if (error) { dsl_dataset_rele(ds, FTAG); + spa_close(spa, FTAG); return (error); } @@ -4082,6 +3799,7 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); rw_exit(&dp->dp_config_rwlock); + spa_close(spa, FTAG); if (error) { dsl_dataset_rele(ds, FTAG); return (error); @@ -4092,104 +3810,30 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(ds, FTAG); return (error); } + } else { + spa_close(spa, FTAG); } - if (zc->zc_obj) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (fromsnap != NULL) { + fp = getf(zc->zc_cookie); + if (fp == NULL) { + dsl_dataset_rele(ds, FTAG); + if (dsfrom) dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom); - rw_exit(&dp->dp_config_rwlock); - if (error) { - dsl_dataset_rele(ds, FTAG); - return (error); - } - error = dmu_objset_from_ds(dsfrom, &fromsnap); - if (error) { - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); - return (error); - } - } + return (EBADF); } - if (estimate) { - error = dmu_send_estimate(tosnap, fromsnap, - &zc->zc_objset_type); - } else { - file_t *fp = getf(zc->zc_cookie); - if (fp == NULL) { - dsl_dataset_rele(ds, FTAG); - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); - return (EBADF); - } - - off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, - zc->zc_cookie, fp->f_vnode, &off); + off = fp->f_offset; + error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; - releasef(zc->zc_cookie); - } + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + releasef(zc->zc_cookie); if (dsfrom) dsl_dataset_rele(dsfrom, FTAG); dsl_dataset_rele(ds, FTAG); return (error); } -/* - * inputs: - * zc_name name of snapshot on which to report progress - * zc_cookie file descriptor of send stream - * - * outputs: - * zc_cookie number of bytes written in send stream thus far - */ -static int -zfs_ioc_send_progress(zfs_cmd_t *zc) -{ - dsl_dataset_t *ds; - dmu_sendarg_t *dsp = NULL; - int error; - - if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0) - return (error); - - mutex_enter(&ds->ds_sendstream_lock); - - /* - * Iterate over all the send streams currently active on this dataset. - * If there's one which matches the specified file descriptor _and_ the - * stream was started by the current process, return the progress of - * that stream. - */ - for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL; - dsp = list_next(&ds->ds_sendstreams, dsp)) { - if (dsp->dsa_outfd == zc->zc_cookie && - dsp->dsa_proc == curproc) - break; - } - - if (dsp != NULL) - zc->zc_cookie = *(dsp->dsa_off); - else - error = ENOENT; - - mutex_exit(&ds->ds_sendstream_lock); - dsl_dataset_rele(ds, FTAG); - return (error); -} - static int zfs_ioc_inject_fault(zfs_cmd_t *zc) { @@ -4324,22 +3968,6 @@ zfs_ioc_clear(zfs_cmd_t *zc) return (error); } -static int -zfs_ioc_pool_reopen(zfs_cmd_t *zc) -{ - spa_t *spa; - int error; - - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) - return (error); - - spa_vdev_state_enter(spa, SCL_NONE); - vdev_reopen(spa->spa_root_vdev); - (void) spa_vdev_state_exit(spa, NULL, 0); - spa_close(spa, FTAG); - return (0); -} /* * inputs: * zc_name name of filesystem @@ -4648,7 +4276,6 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: - * zc_value short name of new snapshot */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) @@ -4656,21 +4283,22 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) char *snap_name; int error; - snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value, + snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); - if (strlen(snap_name) >= MAXPATHLEN) { + if (strlen(snap_name) >= MAXNAMELEN) { strfree(snap_name); return (E2BIG); } - error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd); + error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, + NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); if (error != 0) { strfree(snap_name); return (error); } - (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1); + (void) strcpy(zc->zc_value, snap_name); strfree(snap_name); return (0); } @@ -4994,457 +4622,128 @@ zfs_ioc_get_holds(zfs_cmd_t *zc) } /* - * inputs: - * zc_name name of new filesystem or snapshot - * zc_value full name of old snapshot - * - * outputs: - * zc_cookie space in bytes - * zc_objset_type compressed space in bytes - * zc_perm_action uncompressed space in bytes - */ -static int -zfs_ioc_space_written(zfs_cmd_t *zc) -{ - int error; - dsl_dataset_t *new, *old; - - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); - if (error != 0) - return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); - if (error != 0) { - dsl_dataset_rele(new, FTAG); - return (error); - } - - error = dsl_dataset_space_written(old, new, &zc->zc_cookie, - &zc->zc_objset_type, &zc->zc_perm_action); - dsl_dataset_rele(old, FTAG); - dsl_dataset_rele(new, FTAG); - return (error); -} -/* - * innvl: { - * "firstsnap" -> snapshot name - * } - * - * outnvl: { - * "used" -> space in bytes - * "compressed" -> compressed space in bytes - * "uncompressed" -> uncompressed space in bytes - * } - */ -static int -zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) -{ - int error; - dsl_dataset_t *new, *old; - char *firstsnap; - uint64_t used, comp, uncomp; - - if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) - return (EINVAL); - - error = dsl_dataset_hold(lastsnap, FTAG, &new); - if (error != 0) - return (error); - error = dsl_dataset_hold(firstsnap, FTAG, &old); - if (error != 0) { - dsl_dataset_rele(new, FTAG); - return (error); - } - - error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp); - dsl_dataset_rele(old, FTAG); - dsl_dataset_rele(new, FTAG); - fnvlist_add_uint64(outnvl, "used", used); - fnvlist_add_uint64(outnvl, "compressed", comp); - fnvlist_add_uint64(outnvl, "uncompressed", uncomp); - return (error); -} - -/* - * innvl: { - * "fd" -> file descriptor to write stream to (int32) - * (optional) "fromsnap" -> full snap name to send an incremental from - * } - * - * outnvl is unused + * pool create, destroy, and export don't log the history as part of + * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export + * do the logging of those commands. */ -/* ARGSUSED */ -static int -zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) -{ - objset_t *fromsnap = NULL; - objset_t *tosnap; - int error; - offset_t off; - char *fromname; - int fd; - - error = nvlist_lookup_int32(innvl, "fd", &fd); - if (error != 0) - return (EINVAL); - - error = dmu_objset_hold(snapname, FTAG, &tosnap); - if (error) - return (error); - - error = nvlist_lookup_string(innvl, "fromsnap", &fromname); - if (error == 0) { - error = dmu_objset_hold(fromname, FTAG, &fromsnap); - if (error) { - dmu_objset_rele(tosnap, FTAG); - return (error); - } - } - - file_t *fp = getf(fd); - if (fp == NULL) { - dmu_objset_rele(tosnap, FTAG); - if (fromsnap != NULL) - dmu_objset_rele(fromsnap, FTAG); - return (EBADF); - } - - off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off); - - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; - releasef(fd); - if (fromsnap != NULL) - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); - return (error); -} - -/* - * Determine approximately how large a zfs send stream will be -- the number - * of bytes that will be written to the fd supplied to zfs_ioc_send_new(). - * - * innvl: { - * (optional) "fromsnap" -> full snap name to send an incremental from - * } - * - * outnvl: { - * "space" -> bytes of space (uint64) - * } - */ -static int -zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) -{ - objset_t *fromsnap = NULL; - objset_t *tosnap; - int error; - char *fromname; - uint64_t space; - - error = dmu_objset_hold(snapname, FTAG, &tosnap); - if (error) - return (error); - - error = nvlist_lookup_string(innvl, "fromsnap", &fromname); - if (error == 0) { - error = dmu_objset_hold(fromname, FTAG, &fromsnap); - if (error) { - dmu_objset_rele(tosnap, FTAG); - return (error); - } - } - - error = dmu_send_estimate(tosnap, fromsnap, &space); - fnvlist_add_uint64(outnvl, "space", space); - - if (fromsnap != NULL) - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); - return (error); -} - - -static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; - -static void -zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, - zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, - boolean_t log_history, zfs_ioc_poolcheck_t pool_check) -{ - zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; - - ASSERT3U(ioc, >=, ZFS_IOC_FIRST); - ASSERT3U(ioc, <, ZFS_IOC_LAST); - ASSERT3P(vec->zvec_legacy_func, ==, NULL); - ASSERT3P(vec->zvec_func, ==, NULL); - - vec->zvec_legacy_func = func; - vec->zvec_secpolicy = secpolicy; - vec->zvec_namecheck = namecheck; - vec->zvec_allow_log = log_history; - vec->zvec_pool_check = pool_check; -} - -/* - * See the block comment at the beginning of this file for details on - * each argument to this function. - */ -static void -zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, - zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, - zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist, - boolean_t allow_log) -{ - zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; - - ASSERT3U(ioc, >=, ZFS_IOC_FIRST); - ASSERT3U(ioc, <, ZFS_IOC_LAST); - ASSERT3P(vec->zvec_legacy_func, ==, NULL); - ASSERT3P(vec->zvec_func, ==, NULL); - - /* if we are logging, the name must be valid */ - ASSERT(!allow_log || namecheck != NO_NAME); - - vec->zvec_name = name; - vec->zvec_func = func; - vec->zvec_secpolicy = secpolicy; - vec->zvec_namecheck = namecheck; - vec->zvec_pool_check = pool_check; - vec->zvec_smush_outnvlist = smush_outnvlist; - vec->zvec_allow_log = allow_log; -} - -static void -zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, - zfs_secpolicy_func_t *secpolicy, boolean_t log_history, - zfs_ioc_poolcheck_t pool_check) -{ - zfs_ioctl_register_legacy(ioc, func, secpolicy, - POOL_NAME, log_history, pool_check); -} - -static void -zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, - zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check) -{ - zfs_ioctl_register_legacy(ioc, func, secpolicy, - DATASET_NAME, B_FALSE, pool_check); -} - -static void -zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) -{ - zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config, - POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); -} - -static void -zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, - zfs_secpolicy_func_t *secpolicy) -{ - zfs_ioctl_register_legacy(ioc, func, secpolicy, - NO_NAME, B_FALSE, POOL_CHECK_NONE); -} - -static void -zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc, - zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy) -{ - zfs_ioctl_register_legacy(ioc, func, secpolicy, - DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED); -} - -static void -zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) -{ - zfs_ioctl_register_dataset_read_secpolicy(ioc, func, - zfs_secpolicy_read); -} - -static void -zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, - zfs_secpolicy_func_t *secpolicy) -{ - zfs_ioctl_register_legacy(ioc, func, secpolicy, - DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); -} - -static void -zfs_ioctl_init(void) -{ - zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT, - zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); - - zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY, - zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); - - zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS, - zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, - POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); - - zfs_ioctl_register("send", ZFS_IOC_SEND_NEW, - zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME, - POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); - - zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE, - zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME, - POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); - - zfs_ioctl_register("create", ZFS_IOC_CREATE, - zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); - - zfs_ioctl_register("clone", ZFS_IOC_CLONE, - zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); - - zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS, - zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); - - /* IOCTLS that use the legacy function signature */ - - zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, - zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY); - - zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create, - zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); - zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN, - zfs_ioc_pool_scan); - zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE, - zfs_ioc_pool_upgrade); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD, - zfs_ioc_vdev_add); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE, - zfs_ioc_vdev_remove); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE, - zfs_ioc_vdev_set_state); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH, - zfs_ioc_vdev_attach); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH, - zfs_ioc_vdev_detach); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH, - zfs_ioc_vdev_setpath); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU, - zfs_ioc_vdev_setfru); - zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS, - zfs_ioc_pool_set_props); - zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT, - zfs_ioc_vdev_split); - zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID, - zfs_ioc_pool_reguid); - - zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS, - zfs_ioc_pool_configs, zfs_secpolicy_none); - zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT, - zfs_ioc_pool_tryimport, zfs_secpolicy_config); - zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT, - zfs_ioc_inject_fault, zfs_secpolicy_inject); - zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT, - zfs_ioc_clear_fault, zfs_secpolicy_inject); - zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, - zfs_ioc_inject_list_next, zfs_secpolicy_inject); - - /* - * pool destroy, and export don't log the history as part of - * zfsdev_ioctl, but rather zfs_ioc_pool_export - * does the logging of those commands. - */ - zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy, - zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); - zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export, - zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); - - zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, - zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); - zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, - zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); - - zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, - zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); - zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME, - zfs_ioc_dsobj_to_dsname, - zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED); - zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY, - zfs_ioc_pool_get_history, - zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED); - - zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import, - zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); - - zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear, - zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); - zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, - zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); - - zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, - zfs_ioc_space_written); - zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS, - zfs_ioc_get_holds); - zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, - zfs_ioc_objset_recvd_props); - zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, - zfs_ioc_next_obj); - zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL, - zfs_ioc_get_fsacl); - zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS, - zfs_ioc_objset_stats); - zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS, - zfs_ioc_objset_zplprops); - zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT, - zfs_ioc_dataset_list_next); - zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT, - zfs_ioc_snapshot_list_next); - zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS, - zfs_ioc_send_progress); - - zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF, - zfs_ioc_diff, zfs_secpolicy_diff); - zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS, - zfs_ioc_obj_to_stats, zfs_secpolicy_diff); - zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH, - zfs_ioc_obj_to_path, zfs_secpolicy_diff); - zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE, - zfs_ioc_userspace_one, zfs_secpolicy_userspace_one); - zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY, - zfs_ioc_userspace_many, zfs_secpolicy_userspace_many); - zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND, - zfs_ioc_send, zfs_secpolicy_send); - - zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop, - zfs_secpolicy_none); - zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy, - zfs_secpolicy_destroy); - zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback, - zfs_secpolicy_rollback); - zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename, - zfs_secpolicy_rename); - zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv, - zfs_secpolicy_recv); - zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, - zfs_secpolicy_promote); - zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold, - zfs_secpolicy_hold); - zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release, - zfs_secpolicy_release); - zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, - zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); - zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, - zfs_secpolicy_set_fsacl); - - zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share, - zfs_secpolicy_share, POOL_CHECK_NONE); - zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl, - zfs_secpolicy_smb_acl, POOL_CHECK_NONE); - zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE, - zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); - zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT, - zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); -} +static zfs_ioc_vec_t zfs_ioc_vec[] = { + { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, + POOL_CHECK_READONLY }, + { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, + POOL_CHECK_NONE }, + { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_NONE }, + { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME, + B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, + B_FALSE, POOL_CHECK_NONE }, + { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, + B_FALSE, POOL_CHECK_NONE }, + { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED }, + { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE }, + { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, + B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, + { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, + POOL_CHECK_SUSPENDED } +}; int pool_status_check(const char *name, zfs_ioc_namecheck_t type, @@ -5581,145 +4880,67 @@ static int zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) { zfs_cmd_t *zc; - uint_t vecnum; - int error, rc, len; + uint_t vec; + int error, rc; minor_t minor = getminor(dev); - const zfs_ioc_vec_t *vec; - char *saved_poolname = NULL; - nvlist_t *innvl = NULL; if (minor != 0 && zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL) return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); - vecnum = cmd - ZFS_IOC_FIRST; + vec = cmd - ZFS_IOC; ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); - if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) + if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (EINVAL); - vec = &zfs_ioc_vec[vecnum]; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); - if (error != 0) { + if (error != 0) error = EFAULT; - goto out; - } - zc->zc_iflags = flag & FKIOCTL; - if (zc->zc_nvlist_src_size != 0) { - error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &innvl); - if (error != 0) - goto out; - } + if ((error == 0) && !(flag & FKIOCTL)) + error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr); /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ - zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; - switch (vec->zvec_namecheck) { - case POOL_NAME: - if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; - else + if (error == 0) { + zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; + zc->zc_iflags = flag & FKIOCTL; + switch (zfs_ioc_vec[vec].zvec_namecheck) { + case POOL_NAME: + if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; error = pool_status_check(zc->zc_name, - vec->zvec_namecheck, vec->zvec_pool_check); - break; + zfs_ioc_vec[vec].zvec_namecheck, + zfs_ioc_vec[vec].zvec_pool_check); + break; - case DATASET_NAME: - if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; - else + case DATASET_NAME: + if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; error = pool_status_check(zc->zc_name, - vec->zvec_namecheck, vec->zvec_pool_check); - break; - - case NO_NAME: - break; - } - - - if (error == 0 && !(flag & FKIOCTL)) - error = vec->zvec_secpolicy(zc, innvl, cr); - - if (error != 0) - goto out; - - /* legacy ioctls can modify zc_name */ - len = strcspn(zc->zc_name, "/@") + 1; - saved_poolname = kmem_alloc(len, KM_SLEEP); - (void) strlcpy(saved_poolname, zc->zc_name, len); - - if (vec->zvec_func != NULL) { - nvlist_t *outnvl; - int puterror = 0; - spa_t *spa; - nvlist_t *lognv = NULL; - - ASSERT(vec->zvec_legacy_func == NULL); - - /* - * Add the innvl to the lognv before calling the func, - * in case the func changes the innvl. - */ - if (vec->zvec_allow_log) { - lognv = fnvlist_alloc(); - fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL, - vec->zvec_name); - if (!nvlist_empty(innvl)) { - fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL, - innvl); - } - } - - outnvl = fnvlist_alloc(); - error = vec->zvec_func(zc->zc_name, innvl, outnvl); - - if (error == 0 && vec->zvec_allow_log && - spa_open(zc->zc_name, &spa, FTAG) == 0) { - if (!nvlist_empty(outnvl)) { - fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, - outnvl); - } - (void) spa_history_log_nvl(spa, lognv); - spa_close(spa, FTAG); - } - fnvlist_free(lognv); + zfs_ioc_vec[vec].zvec_namecheck, + zfs_ioc_vec[vec].zvec_pool_check); + break; - if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) { - int smusherror = 0; - if (vec->zvec_smush_outnvlist) { - smusherror = nvlist_smush(outnvl, - zc->zc_nvlist_dst_size); - } - if (smusherror == 0) - puterror = put_nvlist(zc, outnvl); + case NO_NAME: + break; } - - if (puterror != 0) - error = puterror; - - nvlist_free(outnvl); - } else { - error = vec->zvec_legacy_func(zc); } -out: - nvlist_free(innvl); + if (error == 0) + error = zfs_ioc_vec[vec].zvec_func(zc); + rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); - if (error == 0 && rc != 0) - error = EFAULT; - if (error == 0 && vec->zvec_allow_log) { - char *s = tsd_get(zfs_allow_log_key); - if (s != NULL) - strfree(s); - (void) tsd_set(zfs_allow_log_key, saved_poolname); - } else { - if (saved_poolname != NULL) - strfree(saved_poolname); + if (error == 0) { + if (rc != 0) + error = EFAULT; + if (zfs_ioc_vec[vec].zvec_his_log) + zfs_log_history(zc); } kmem_free(zc, sizeof (zfs_cmd_t)); @@ -5835,12 +5056,9 @@ static struct modlinkage modlinkage = { NULL }; -static void -zfs_allow_log_destroy(void *arg) -{ - char *poolname = arg; - strfree(poolname); -} + +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; int _init(void) @@ -5850,7 +5068,6 @@ _init(void) spa_init(FREAD | FWRITE); zfs_init(); zvol_init(); - zfs_ioctl_init(); if ((error = mod_install(&modlinkage)) != 0) { zvol_fini(); @@ -5860,8 +5077,7 @@ _init(void) } tsd_create(&zfs_fsyncer_key, NULL); - tsd_create(&rrw_tsd_key, rrw_tsd_destroy); - tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); + tsd_create(&rrw_tsd_key, NULL); error = ldi_ident_from_mod(&modlinkage, &zfs_li); ASSERT(error == 0); diff --git a/uts/common/fs/zfs/zfs_vfsops.c b/uts/common/fs/zfs/zfs_vfsops.c index 21ac731c1eb9..4970552d0cb7 100644 --- a/uts/common/fs/zfs/zfs_vfsops.c +++ b/uts/common/fs/zfs/zfs_vfsops.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -385,14 +384,6 @@ vscan_changed_cb(void *arg, uint64_t newval) } static void -acl_mode_changed_cb(void *arg, uint64_t newval) -{ - zfsvfs_t *zfsvfs = arg; - - zfsvfs->z_acl_mode = newval; -} - -static void acl_inherit_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; @@ -523,8 +514,6 @@ zfs_register_callbacks(vfs_t *vfsp) error = error ? error : dsl_prop_register(ds, "snapdir", snapdir_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "aclmode", acl_mode_changed_cb, zfsvfs); - error = error ? error : dsl_prop_register(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, "vscan", vscan_changed_cb, zfsvfs); @@ -565,7 +554,6 @@ unregister: (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs); (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); @@ -1248,9 +1236,6 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs) VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs) == 0); - VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, - zfsvfs) == 0); - VERIFY(dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, zfsvfs) == 0); @@ -2249,8 +2234,9 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) sa_register_update_callback(os, zfs_sa_upgrade); } - spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, - "from %llu to %llu", zfsvfs->z_version, newvers); + spa_history_log_internal(LOG_DS_UPGRADE, + dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", + zfsvfs->z_version, newvers, dmu_objset_id(os)); dmu_tx_commit(tx); diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c index 0c39274caf18..a0720079cf46 100644 --- a/uts/common/fs/zfs/zfs_vnops.c +++ b/uts/common/fs/zfs/zfs_vnops.c @@ -2975,8 +2975,7 @@ top: uint64_t acl_obj; new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); - if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) - goto out; + zfs_acl_chmod_setattr(zp, &aclp, new_mode); mutex_enter(&zp->z_lock); if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { @@ -4194,14 +4193,6 @@ zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, ZFS_VERIFY_ZP(zp); /* - * There's nothing to do if no data is cached. - */ - if (!vn_has_cached_data(vp)) { - ZFS_EXIT(zfsvfs); - return (0); - } - - /* * Align this request to the file block size in case we kluster. * XXX - this can result in pretty aggresive locking, which can * impact simultanious read/write access. One option might be diff --git a/uts/common/fs/zfs/zil.c b/uts/common/fs/zfs/zil.c index 081242cece5d..c66313ff6f85 100644 --- a/uts/common/fs/zfs/zil.c +++ b/uts/common/fs/zfs/zil.c @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -561,7 +560,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) if (!list_is_empty(&zilog->zl_lwb_list)) { ASSERT(zh->zh_claim_txg == 0); - VERIFY(!keep_first); + ASSERT(!keep_first); while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { list_remove(&zilog->zl_lwb_list, lwb); if (lwb->lwb_buf != NULL) @@ -1662,9 +1661,20 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) void zil_free(zilog_t *zilog) { + lwb_t *head_lwb; + zilog->zl_stop_sync = 1; - ASSERT(list_is_empty(&zilog->zl_lwb_list)); + /* + * After zil_close() there should only be one lwb with a buffer. + */ + head_lwb = list_head(&zilog->zl_lwb_list); + if (head_lwb) { + ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list)); + list_remove(&zilog->zl_lwb_list, head_lwb); + zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz); + kmem_cache_free(zil_lwb_cache, head_lwb); + } list_destroy(&zilog->zl_lwb_list); avl_destroy(&zilog->zl_vdev_tree); @@ -1704,10 +1714,6 @@ zil_open(objset_t *os, zil_get_data_t *get_data) { zilog_t *zilog = dmu_objset_zil(os); - ASSERT(zilog->zl_clean_taskq == NULL); - ASSERT(zilog->zl_get_data == NULL); - ASSERT(list_is_empty(&zilog->zl_lwb_list)); - zilog->zl_get_data = get_data; zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, 2, 2, TASKQ_PREPOPULATE); @@ -1721,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data) void zil_close(zilog_t *zilog) { - lwb_t *lwb; + lwb_t *tail_lwb; uint64_t txg = 0; zil_commit(zilog, 0); /* commit all itx */ @@ -1733,9 +1739,9 @@ zil_close(zilog_t *zilog) * destroy the zl_clean_taskq. */ mutex_enter(&zilog->zl_lock); - lwb = list_tail(&zilog->zl_lwb_list); - if (lwb != NULL) - txg = lwb->lwb_max_txg; + tail_lwb = list_tail(&zilog->zl_lwb_list); + if (tail_lwb != NULL) + txg = tail_lwb->lwb_max_txg; mutex_exit(&zilog->zl_lock); if (txg) txg_wait_synced(zilog->zl_dmu_pool, txg); @@ -1743,19 +1749,6 @@ zil_close(zilog_t *zilog) taskq_destroy(zilog->zl_clean_taskq); zilog->zl_clean_taskq = NULL; zilog->zl_get_data = NULL; - - /* - * We should have only one LWB left on the list; remove it now. - */ - mutex_enter(&zilog->zl_lock); - lwb = list_head(&zilog->zl_lwb_list); - if (lwb != NULL) { - ASSERT(lwb == list_tail(&zilog->zl_lwb_list)); - list_remove(&zilog->zl_lwb_list, lwb); - zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); - kmem_cache_free(zil_lwb_cache, lwb); - } - mutex_exit(&zilog->zl_lock); } /* diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c index cfb5733f2bd7..eb509c5911f7 100644 --- a/uts/common/fs/zfs/zio.c +++ b/uts/common/fs/zfs/zio.c @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/zfs_context.h> @@ -80,7 +78,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; #endif -extern int zfs_mg_alloc_failures; /* * An allocating zio is one that either currently has the DVA allocate @@ -161,12 +158,6 @@ zio_init(void) zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; } - /* - * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs - * to fail 3 times per txg or 8 failures, whichever is greater. - */ - zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); - zio_inject_init(); } @@ -619,7 +610,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && zp->zp_compress >= ZIO_COMPRESS_OFF && zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && - DMU_OT_IS_VALID(zp->zp_type) && + zp->zp_type < DMU_OT_NUMTYPES && zp->zp_level < 32 && zp->zp_copies > 0 && zp->zp_copies <= spa_max_replication(spa) && @@ -903,7 +894,7 @@ zio_read_bp_init(zio_t *zio) zio_push_transform(zio, cbuf, psize, psize, zio_decompress); } - if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0) + if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) zio->io_flags |= ZIO_FLAG_DONT_CACHE; if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) @@ -1062,7 +1053,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) { spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; - int flags = (cutinline ? TQ_FRONT : 0); + int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0); /* * If we're a config writer or a probe, the normal issue and @@ -1086,15 +1077,8 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) q++; ASSERT3U(q, <, ZIO_TASKQ_TYPES); - - /* - * NB: We are assuming that the zio can only be dispatched - * to a single taskq at a time. It would be a grievous error - * to dispatch the zio to another taskq at the same time. - */ - ASSERT(zio->io_tqent.tqent_next == NULL); - taskq_dispatch_ent(spa->spa_zio_taskq[t][q], - (task_func_t *)zio_execute, zio, flags, &zio->io_tqent); + (void) taskq_dispatch(spa->spa_zio_taskq[t][q], + (task_func_t *)zio_execute, zio, flags); } static boolean_t @@ -2130,7 +2114,6 @@ zio_dva_allocate(zio_t *zio) metaslab_class_t *mc = spa_normal_class(spa); blkptr_t *bp = zio->io_bp; int error; - int flags = 0; if (zio->io_gang_leader == NULL) { ASSERT(zio->io_child_type > ZIO_CHILD_GANG); @@ -2143,21 +2126,10 @@ zio_dva_allocate(zio_t *zio) ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); - /* - * The dump device does not support gang blocks so allocation on - * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid - * the "fast" gang feature. - */ - flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; - flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? - METASLAB_GANG_CHILD : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, - zio->io_prop.zp_copies, zio->io_txg, NULL, flags); + zio->io_prop.zp_copies, zio->io_txg, NULL, 0); if (error) { - spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " - "size %llu, error %d", spa_name(spa), zio, zio->io_size, - error); if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) return (zio_write_gang_block(zio)); zio->io_error = error; @@ -2219,22 +2191,13 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, ASSERT(txg > spa_syncing_txg(spa)); - /* - * ZIL blocks are always contiguous (i.e. not gang blocks) so we - * set the METASLAB_GANG_AVOID flag so that they don't "fast gang" - * when allocating them. - */ - if (use_slog) { + if (use_slog) error = metaslab_alloc(spa, spa_log_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); - } + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); - if (error) { + if (error) error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); - } + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); if (error == 0) { BP_SET_LSIZE(new_bp, size); @@ -2906,11 +2869,9 @@ zio_done(zio_t *zio) * Reexecution is potentially a huge amount of work. * Hand it off to the otherwise-unused claim taskq. */ - ASSERT(zio->io_tqent.tqent_next == NULL); - (void) taskq_dispatch_ent( + (void) taskq_dispatch( spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], - (task_func_t *)zio_reexecute, zio, 0, - &zio->io_tqent); + (task_func_t *)zio_reexecute, zio, TQ_SLEEP); } return (ZIO_PIPELINE_STOP); } @@ -2989,45 +2950,3 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_checksum_verify, zio_done }; - -/* dnp is the dnode for zb1->zb_object */ -boolean_t -zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1, - const zbookmark_t *zb2) -{ - uint64_t zb1nextL0, zb2thisobj; - - ASSERT(zb1->zb_objset == zb2->zb_objset); - ASSERT(zb2->zb_level == 0); - - /* - * A bookmark in the deadlist is considered to be after - * everything else. - */ - if (zb2->zb_object == DMU_DEADLIST_OBJECT) - return (B_TRUE); - - /* The objset_phys_t isn't before anything. */ - if (dnp == NULL) - return (B_FALSE); - - zb1nextL0 = (zb1->zb_blkid + 1) << - ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); - - zb2thisobj = zb2->zb_object ? zb2->zb_object : - zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); - - if (zb1->zb_object == DMU_META_DNODE_OBJECT) { - uint64_t nextobj = zb1nextL0 * - (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT; - return (nextobj <= zb2thisobj); - } - - if (zb1->zb_object < zb2thisobj) - return (B_TRUE); - if (zb1->zb_object > zb2thisobj) - return (B_FALSE); - if (zb2->zb_object == DMU_META_DNODE_OBJECT) - return (B_FALSE); - return (zb1nextL0 <= zb2->zb_blkid); -} diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c index edf574e3c9ba..47b6c5a87a52 100644 --- a/uts/common/fs/zfs/zvol.c +++ b/uts/common/fs/zfs/zvol.c @@ -20,13 +20,10 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Portions Copyright 2010 Robert Milkowski - * - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ +/* Portions Copyright 2010 Robert Milkowski */ + /* * ZFS volume emulation driver. * @@ -136,7 +133,7 @@ typedef struct zvol_state { int zvol_maxphys = DMU_MAX_ACCESS/2; extern int zfs_set_prop_nvlist(const char *, zprop_source_t, - nvlist_t *, nvlist_t *); + nvlist_t *, nvlist_t **); static int zvol_remove_zv(zvol_state_t *); static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); static int zvol_dumpify(zvol_state_t *zv); @@ -345,24 +342,6 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) } /* - * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we - * implement DKIOCFREE/free-long-range. - */ -static int -zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap) -{ - uint64_t offset, length; - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - offset = lr->lr_offset; - length = lr->lr_length; - - return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length)); -} - -/* * Replay a TX_WRITE ZIL transaction that didn't get committed * after a system failure */ @@ -412,7 +391,7 @@ zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) /* * Callback vectors for replaying records. - * Only TX_WRITE and TX_TRUNCATE are needed for zvol. + * Only TX_WRITE is needed for zvol. */ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* 0 no such transaction type */ @@ -425,7 +404,7 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_LINK */ zvol_replay_err, /* TX_RENAME */ zvol_replay_write, /* TX_WRITE */ - zvol_replay_truncate, /* TX_TRUNCATE */ + zvol_replay_err, /* TX_TRUNCATE */ zvol_replay_err, /* TX_SETATTR */ zvol_replay_err, /* TX_ACL */ zvol_replay_err, /* TX_CREATE_ACL */ @@ -1533,32 +1512,7 @@ zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, */ /* - * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. - */ -static void -zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, - boolean_t sync) -{ - itx_t *itx; - lr_truncate_t *lr; - zilog_t *zilog = zv->zv_zilog; - - if (zil_replaying(zilog, tx)) - return; - - itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); - lr = (lr_truncate_t *)&itx->itx_lr; - lr->lr_foid = ZVOL_OBJ; - lr->lr_offset = off; - lr->lr_length = len; - - itx->itx_sync = sync; - zil_itx_assign(zilog, itx, tx); -} - -/* * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). - * Also a dirtbag dkio ioctl for unmap/free-block functionality. */ /*ARGSUSED*/ int @@ -1677,65 +1631,6 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) zfs_range_unlock(rl); break; - case DKIOCFREE: - { - dkioc_free_t df; - dmu_tx_t *tx; - - if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { - error = EFAULT; - break; - } - - /* - * Apply Postel's Law to length-checking. If they overshoot, - * just blank out until the end, if there's a need to blank - * out anything. - */ - if (df.df_start >= zv->zv_volsize) - break; /* No need to do anything... */ - if (df.df_start + df.df_length > zv->zv_volsize) - df.df_length = DMU_OBJECT_END; - - rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, - RL_WRITER); - tx = dmu_tx_create(zv->zv_objset); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error != 0) { - dmu_tx_abort(tx); - } else { - zvol_log_truncate(zv, tx, df.df_start, - df.df_length, B_TRUE); - dmu_tx_commit(tx); - error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, - df.df_start, df.df_length); - } - - zfs_range_unlock(rl); - - if (error == 0) { - /* - * If the write-cache is disabled or 'sync' property - * is set to 'always' then treat this as a synchronous - * operation (i.e. commit to zil). - */ - if (!(zv->zv_flags & ZVOL_WCE) || - (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) - zil_commit(zv->zv_zilog, ZVOL_OBJ); - - /* - * If the caller really wants synchronous writes, and - * can't wait for them, don't return until the write - * is done. - */ - if (df.df_flags & DF_WAIT_SYNC) { - txg_wait_synced( - dmu_objset_pool(zv->zv_objset), 0); - } - } - break; - } - default: error = ENOTTY; break; @@ -1886,7 +1781,7 @@ zvol_dumpify(zvol_state_t *zv) if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { - boolean_t resize = (dumpsize > 0); + boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; if ((error = zvol_dump_init(zv, resize)) != 0) { (void) zvol_dump_fini(zv); diff --git a/uts/common/os/fm.c b/uts/common/os/fm.c index eff91aee5e64..4efcff4f464a 100644 --- a/uts/common/os/fm.c +++ b/uts/common/os/fm.c @@ -79,7 +79,7 @@ * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These * values must be kept in sync with the FMA source code in usr/src/cmd/fm. */ -static const char *fm_url = "http://illumos.org/msg"; +static const char *fm_url = "http://www.sun.com/msg"; static const char *fm_msgid = "SUNOS-8000-0G"; static char *volatile fm_panicstr = NULL; diff --git a/uts/common/sys/ccompile.h b/uts/common/sys/ccompile.h index 690bb7afb73a..c9857b086575 100644 --- a/uts/common/sys/ccompile.h +++ b/uts/common/sys/ccompile.h @@ -27,6 +27,8 @@ #ifndef _SYS_CCOMPILE_H #define _SYS_CCOMPILE_H +#pragma ident "%Z%%M% %I% %E% SMI" + /* * This file contains definitions designed to enable different compilers * to be used harmoniously on Solaris systems. @@ -77,27 +79,6 @@ extern "C" { */ #define __sun_attr___noreturn__ __attribute__((__noreturn__)) -/* - * The function is 'extern inline' and expects GNU C89 behaviour, not C99 - * behaviour. - * - * Should only be used on 'extern inline' definitions for GCC. - */ -#if __GNUC_VERSION >= 40200 -#define __sun_attr___gnu_inline__ __attribute__((__gnu_inline__)) -#else -#define __sun_attr___gnu_inline__ -#endif - -/* - * The function has control flow such that it may return multiple times (in - * the manner of setjmp or vfork) - */ -#if __GNUC_VERSION >= 40100 -#define __sun_attr___returns_twice__ __attribute__((__returns_twice__)) -#else -#define __sun_attr___returns_twice__ -#endif /* * This is an appropriate label for functions that do not @@ -135,11 +116,10 @@ extern "C" { #define __KPRINTFLIKE(__n) __sun_attr__((__KPRINTFLIKE__(__n))) #define __KVPRINTFLIKE(__n) __sun_attr__((__KVPRINTFLIKE__(__n))) #define __NORETURN __sun_attr__((__noreturn__)) -#define __GNU_INLINE __inline__ __sun_attr__((__gnu_inline__)) -#define __RETURNS_TWICE __sun_attr__((__returns_twice__)) #define __CONST __sun_attr__((__const__)) #define __PURE __sun_attr__((__pure__)) + #ifdef __cplusplus } #endif diff --git a/uts/common/sys/cmn_err.h b/uts/common/sys/cmn_err.h index 736c77b9dcf8..e710d8e5c30b 100644 --- a/uts/common/sys/cmn_err.h +++ b/uts/common/sys/cmn_err.h @@ -26,19 +26,17 @@ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_CMN_ERR_H #define _SYS_CMN_ERR_H +#pragma ident "%Z%%M% %I% %E% SMI" + #if defined(_KERNEL) && !defined(_ASM) #include <sys/va_list.h> #endif -#include <sys/dditypes.h> - #ifdef __cplusplus extern "C" { #endif @@ -58,40 +56,47 @@ extern "C" { /*PRINTFLIKE2*/ extern void cmn_err(int, const char *, ...) __KPRINTFLIKE(2); +#pragma rarely_called(cmn_err) extern void vzcmn_err(zoneid_t, int, const char *, __va_list) __KVPRINTFLIKE(3); - -extern void dev_err(dev_info_t *, int, char *, ...) - __KPRINTFLIKE(3); +#pragma rarely_called(vzcmn_err) extern void vcmn_err(int, const char *, __va_list) __KVPRINTFLIKE(2); +#pragma rarely_called(vcmn_err) /*PRINTFLIKE3*/ extern void zcmn_err(zoneid_t, int, const char *, ...) __KPRINTFLIKE(3); +#pragma rarely_called(zcmn_err) /*PRINTFLIKE1*/ extern void printf(const char *, ...) __KPRINTFLIKE(1); +#pragma rarely_called(printf) extern void vzprintf(zoneid_t, const char *, __va_list) __KVPRINTFLIKE(2); +#pragma rarely_called(vzprintf) /*PRINTFLIKE2*/ extern void zprintf(zoneid_t, const char *, ...) __KPRINTFLIKE(2); +#pragma rarely_called(zprintf) extern void vprintf(const char *, __va_list) __KVPRINTFLIKE(1); +#pragma rarely_called(vprintf) /*PRINTFLIKE1*/ extern void uprintf(const char *, ...) __KPRINTFLIKE(1); +#pragma rarely_called(uprintf) extern void vuprintf(const char *, __va_list) __KVPRINTFLIKE(1); +#pragma rarely_called(vuprintf) /*PRINTFLIKE3*/ extern size_t snprintf(char *, size_t, const char *, ...) @@ -107,9 +112,11 @@ extern char *vsprintf(char *, const char *, __va_list) /*PRINTFLIKE1*/ extern void panic(const char *, ...) __KPRINTFLIKE(1) __NORETURN; +#pragma rarely_called(panic) extern void vpanic(const char *, __va_list) __KVPRINTFLIKE(1) __NORETURN; +#pragma rarely_called(vpanic) #endif /* _KERNEL */ #endif /* !_ASM */ diff --git a/uts/common/sys/dtrace.h b/uts/common/sys/dtrace.h index c15799a4e4a2..007502d7d856 100644 --- a/uts/common/sys/dtrace.h +++ b/uts/common/sys/dtrace.h @@ -24,10 +24,6 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2011, Joyent, Inc. All rights reserved. - */ - #ifndef _SYS_DTRACE_H #define _SYS_DTRACE_H @@ -206,7 +202,6 @@ typedef enum dtrace_probespec { #define DIF_VAR_ARGS 0x0000 /* arguments array */ #define DIF_VAR_REGS 0x0001 /* registers array */ #define DIF_VAR_UREGS 0x0002 /* user registers array */ -#define DIF_VAR_VMREGS 0x0003 /* virtual machine registers array */ #define DIF_VAR_CURTHREAD 0x0100 /* thread pointer */ #define DIF_VAR_TIMESTAMP 0x0101 /* timestamp */ #define DIF_VAR_VTIMESTAMP 0x0102 /* virtual timestamp */ @@ -285,10 +280,8 @@ typedef enum dtrace_probespec { #define DIF_SUBR_INET_NTOP 41 #define DIF_SUBR_INET_NTOA 42 #define DIF_SUBR_INET_NTOA6 43 -#define DIF_SUBR_TOUPPER 44 -#define DIF_SUBR_TOLOWER 45 -#define DIF_SUBR_MAX 45 /* max subroutine value */ +#define DIF_SUBR_MAX 43 /* max subroutine value */ typedef uint32_t dif_instr_t; @@ -397,8 +390,6 @@ typedef struct dtrace_difv { #define DTRACEACT_PRINTF 3 /* printf() action */ #define DTRACEACT_PRINTA 4 /* printa() action */ #define DTRACEACT_LIBACT 5 /* library-controlled action */ -#define DTRACEACT_TRACEMEM 6 /* tracemem() action */ -#define DTRACEACT_TRACEMEM_DYNSIZE 7 /* dynamic tracemem() size */ #define DTRACEACT_PROC 0x0100 #define DTRACEACT_USTACK (DTRACEACT_PROC + 1) @@ -464,7 +455,6 @@ typedef struct dtrace_difv { #define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6) #define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7) #define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8) -#define DTRACEAGG_LLQUANTIZE (DTRACEACT_AGGREGATION + 9) #define DTRACEACT_ISAGG(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION) @@ -499,31 +489,6 @@ typedef struct dtrace_difv { (int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \ DTRACE_LQUANTIZE_BASESHIFT) -#define DTRACE_LLQUANTIZE_FACTORSHIFT 48 -#define DTRACE_LLQUANTIZE_FACTORMASK ((uint64_t)UINT16_MAX << 48) -#define DTRACE_LLQUANTIZE_LOWSHIFT 32 -#define DTRACE_LLQUANTIZE_LOWMASK ((uint64_t)UINT16_MAX << 32) -#define DTRACE_LLQUANTIZE_HIGHSHIFT 16 -#define DTRACE_LLQUANTIZE_HIGHMASK ((uint64_t)UINT16_MAX << 16) -#define DTRACE_LLQUANTIZE_NSTEPSHIFT 0 -#define DTRACE_LLQUANTIZE_NSTEPMASK UINT16_MAX - -#define DTRACE_LLQUANTIZE_FACTOR(x) \ - (uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \ - DTRACE_LLQUANTIZE_FACTORSHIFT) - -#define DTRACE_LLQUANTIZE_LOW(x) \ - (uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \ - DTRACE_LLQUANTIZE_LOWSHIFT) - -#define DTRACE_LLQUANTIZE_HIGH(x) \ - (uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \ - DTRACE_LLQUANTIZE_HIGHSHIFT) - -#define DTRACE_LLQUANTIZE_NSTEP(x) \ - (uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \ - DTRACE_LLQUANTIZE_NSTEPSHIFT) - #define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX) #define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32) #define DTRACE_USTACK_ARG(x, y) \ @@ -1356,7 +1321,7 @@ typedef struct dof_helper { * dtps_resume() <-- Resume specified probe * dtps_getargdesc() <-- Get the argument description for args[X] * dtps_getargval() <-- Get the value for an argX or args[X] variable - * dtps_mode() <-- Return the mode of the fired probe + * dtps_usermode() <-- Find out if the probe was fired in user mode * dtps_destroy() <-- Destroy all state associated with this probe * * 1.2 void dtps_provide(void *arg, const dtrace_probedesc_t *spec) @@ -1605,32 +1570,24 @@ typedef struct dof_helper { * This is called from within dtrace_probe() meaning that interrupts * are disabled. No locks should be taken within this entry point. * - * 1.10 int dtps_mode(void *arg, dtrace_id_t id, void *parg) + * 1.10 int dtps_usermode(void *arg, dtrace_id_t id, void *parg) * * 1.10.1 Overview * - * Called to determine the mode of a fired probe. + * Called to determine if the probe was fired in a user context. * * 1.10.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The - * second argument is the identifier of the current probe. The third + * second argument is the identifier of the current probe. The third * argument is the probe argument as passed to dtrace_probe_create(). This * entry point must not be left NULL for providers whose probes allow for - * mixed mode tracing, that is to say those unanchored probes that can fire - * during kernel- or user-mode execution. + * mixed mode tracing, that is to say those probes that can fire during + * kernel- _or_ user-mode execution * * 1.10.3 Return value * - * A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL - * or DTRACE_MODE_USER) and the policy when the privilege of the enabling - * is insufficient for that mode (either DTRACE_MODE_NOPRIV_DROP or - * DTRACE_MODE_NOPRIV_RESTRICT). If the policy is DTRACE_MODE_NOPRIV_DROP, - * insufficient privilege will result in the probe firing being silently - * ignored for the enabling; if the policy is DTRACE_NODE_NOPRIV_RESTRICT, - * insufficient privilege will not prevent probe processing for the - * enabling, but restrictions will be in place that induce a UPRIV fault - * upon attempt to examine probe arguments or current process state. + * A boolean value. * * 1.10.4 Caller's context * @@ -2021,15 +1978,10 @@ typedef struct dtrace_pops { dtrace_argdesc_t *desc); uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg, int argno, int aframes); - int (*dtps_mode)(void *arg, dtrace_id_t id, void *parg); + int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg); void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg); } dtrace_pops_t; -#define DTRACE_MODE_KERNEL 0x01 -#define DTRACE_MODE_USER 0x02 -#define DTRACE_MODE_NOPRIV_DROP 0x10 -#define DTRACE_MODE_NOPRIV_RESTRICT 0x20 - typedef uintptr_t dtrace_provider_id_t; extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t, diff --git a/uts/common/sys/dtrace_impl.h b/uts/common/sys/dtrace_impl.h index 3bebd0cb30b0..fed537e18ba0 100644 --- a/uts/common/sys/dtrace_impl.h +++ b/uts/common/sys/dtrace_impl.h @@ -24,13 +24,11 @@ * Use is subject to license terms. */ -/* - * Copyright (c) 2011, Joyent, Inc. All rights reserved. - */ - #ifndef _SYS_DTRACE_IMPL_H #define _SYS_DTRACE_IMPL_H +#pragma ident "%Z%%M% %I% %E% SMI" + #ifdef __cplusplus extern "C" { #endif @@ -421,11 +419,8 @@ typedef struct dtrace_buffer { uint32_t dtb_errors; /* number of errors */ uint32_t dtb_xamot_errors; /* errors in inactive buffer */ #ifndef _LP64 - uint64_t dtb_pad1; /* pad out to 64 bytes */ + uint64_t dtb_pad1; #endif - uint64_t dtb_switched; /* time of last switch */ - uint64_t dtb_interval; /* observed switch interval */ - uint64_t dtb_pad2[6]; /* pad to avoid false sharing */ } dtrace_buffer_t; /* @@ -929,8 +924,7 @@ typedef struct dtrace_mstate { * Access flag used by dtrace_mstate.dtms_access. */ #define DTRACE_ACCESS_KERNEL 0x1 /* the priv to read kmem */ -#define DTRACE_ACCESS_PROC 0x2 /* the priv for proc state */ -#define DTRACE_ACCESS_ARGS 0x4 /* the priv to examine args */ + /* * DTrace Activity @@ -1145,7 +1139,7 @@ struct dtrace_provider { dtrace_pops_t dtpv_pops; /* provider operations */ char *dtpv_name; /* provider name */ void *dtpv_arg; /* provider argument */ - hrtime_t dtpv_defunct; /* when made defunct */ + uint_t dtpv_defunct; /* boolean: defunct provider */ struct dtrace_provider *dtpv_next; /* next provider */ }; @@ -1252,7 +1246,6 @@ extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); extern ulong_t dtrace_getreg(struct regs *, uint_t); -extern uint64_t dtrace_getvmreg(uint_t, volatile uint16_t *); extern int dtrace_getstackdepth(int); extern void dtrace_getupcstack(uint64_t *, int); extern void dtrace_getufpstack(uint64_t *, uint64_t *, int); diff --git a/uts/common/sys/feature_tests.h b/uts/common/sys/feature_tests.h index e6ababd3d409..43339a83cd7f 100644 --- a/uts/common/sys/feature_tests.h +++ b/uts/common/sys/feature_tests.h @@ -27,6 +27,8 @@ #ifndef _SYS_FEATURE_TESTS_H #define _SYS_FEATURE_TESTS_H +#pragma ident "%Z%%M% %I% %E% SMI" + #include <sys/ccompile.h> #include <sys/isa_defs.h> @@ -363,7 +365,7 @@ extern "C" { * compiler is used. This allows for the use of single prototype * declarations regardless of compiler version. */ -#if (defined(__STDC__) && defined(_STDC_C99)) && !defined(__cplusplus) +#if (defined(__STDC__) && defined(_STDC_C99)) #define _RESTRICT_KYWD restrict #else #define _RESTRICT_KYWD diff --git a/uts/common/sys/fs/zfs.h b/uts/common/sys/fs/zfs.h index 511fa9589817..da0b12bab4a9 100644 --- a/uts/common/sys/fs/zfs.h +++ b/uts/common/sys/fs/zfs.h @@ -21,9 +21,6 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -52,16 +49,6 @@ typedef enum { ZFS_TYPE_POOL = 0x8 } zfs_type_t; -typedef enum dmu_objset_type { - DMU_OST_NONE, - DMU_OST_META, - DMU_OST_ZFS, - DMU_OST_ZVOL, - DMU_OST_OTHER, /* For testing only! */ - DMU_OST_ANY, /* Be careful! */ - DMU_OST_NUMTYPES -} dmu_objset_type_t; - #define ZFS_TYPE_DATASET \ (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) @@ -100,7 +87,7 @@ typedef enum { ZFS_PROP_READONLY, ZFS_PROP_ZONED, ZFS_PROP_SNAPDIR, - ZFS_PROP_ACLMODE, + ZFS_PROP_PRIVATE, /* not exposed to user, temporary */ ZFS_PROP_ACLINHERIT, ZFS_PROP_CREATETXG, /* not exposed to the user */ ZFS_PROP_NAME, /* not exposed to the user */ @@ -135,9 +122,6 @@ typedef enum { ZFS_PROP_DEDUP, ZFS_PROP_MLSLABEL, ZFS_PROP_SYNC, - ZFS_PROP_REFRATIO, - ZFS_PROP_WRITTEN, - ZFS_PROP_CLONES, ZFS_NUM_PROPS } zfs_prop_t; @@ -177,15 +161,9 @@ typedef enum { ZPOOL_PROP_FREE, ZPOOL_PROP_ALLOCATED, ZPOOL_PROP_READONLY, - ZPOOL_PROP_COMMENT, - ZPOOL_PROP_EXPANDSZ, - ZPOOL_PROP_FREEING, ZPOOL_NUM_PROPS } zpool_prop_t; -/* Small enough to not hog a whole line of printout in zpool(1M). */ -#define ZPROP_MAX_COMMENT 32 - #define ZPROP_CONT -2 #define ZPROP_INVAL -1 @@ -240,7 +218,6 @@ const char *zfs_prop_to_name(zfs_prop_t); zfs_prop_t zfs_name_to_prop(const char *); boolean_t zfs_prop_user(const char *); boolean_t zfs_prop_userquota(const char *); -boolean_t zfs_prop_written(const char *); int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **); int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *); uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed); @@ -254,8 +231,6 @@ const char *zpool_prop_to_name(zpool_prop_t); const char *zpool_prop_default_string(zpool_prop_t); uint64_t zpool_prop_default_numeric(zpool_prop_t); boolean_t zpool_prop_readonly(zpool_prop_t); -boolean_t zpool_prop_feature(const char *); -boolean_t zpool_prop_unsupported(const char *name); int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **); int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *); uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed); @@ -363,7 +338,6 @@ typedef enum { #define SPA_VERSION_26 26ULL #define SPA_VERSION_27 27ULL #define SPA_VERSION_28 28ULL -#define SPA_VERSION_5000 5000ULL /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk @@ -371,8 +345,8 @@ typedef enum { * and do the appropriate changes. Also bump the version number in * usr/src/grub/capability. */ -#define SPA_VERSION SPA_VERSION_5000 -#define SPA_VERSION_STRING "5000" +#define SPA_VERSION SPA_VERSION_28 +#define SPA_VERSION_STRING "28" /* * Symbolic names for the changes that caused a SPA_VERSION switch. @@ -423,12 +397,6 @@ typedef enum { #define SPA_VERSION_DEADLISTS SPA_VERSION_26 #define SPA_VERSION_FAST_SNAP SPA_VERSION_27 #define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28 -#define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28 -#define SPA_VERSION_FEATURES SPA_VERSION_5000 - -#define SPA_VERSION_IS_SUPPORTED(v) \ - (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ - ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION)) /* * ZPL version - rev'd whenever an incompatible on-disk format change @@ -520,17 +488,11 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_SPLIT_LIST "guid_list" #define ZPOOL_CONFIG_REMOVING "removing" #define ZPOOL_CONFIG_RESILVERING "resilvering" -#define ZPOOL_CONFIG_COMMENT "comment" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ #define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */ #define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */ -#define ZPOOL_CONFIG_REWIND_INFO "rewind_info" /* not stored on disk */ -#define ZPOOL_CONFIG_UNSUP_FEAT "unsup_feat" /* not stored on disk */ -#define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */ -#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" -#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */ /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such @@ -609,7 +571,6 @@ typedef enum vdev_aux { VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */ VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */ VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */ - VDEV_AUX_UNSUP_FEAT, /* unsupported features */ VDEV_AUX_SPARED, /* hot spare used in another pool */ VDEV_AUX_ERR_EXCEEDED, /* too many errors */ VDEV_AUX_IO_FAILURE, /* experienced I/O failure */ @@ -700,7 +661,6 @@ typedef struct vdev_stat { uint64_t vs_space; /* total capacity */ uint64_t vs_dspace; /* deflated capacity */ uint64_t vs_rsize; /* replaceable dev size */ - uint64_t vs_esize; /* expandable dev size */ uint64_t vs_ops[ZIO_TYPES]; /* operation count */ uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */ uint64_t vs_read_errors; /* read errors */ @@ -754,10 +714,10 @@ typedef struct ddt_histogram { /* * /dev/zfs ioctl numbers. */ +#define ZFS_IOC ('Z' << 8) + typedef enum zfs_ioc { - ZFS_IOC_FIRST = ('Z' << 8), - ZFS_IOC = ZFS_IOC_FIRST, - ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST, + ZFS_IOC_POOL_CREATE = ZFS_IOC, ZFS_IOC_POOL_DESTROY, ZFS_IOC_POOL_IMPORT, ZFS_IOC_POOL_EXPORT, @@ -792,6 +752,7 @@ typedef enum zfs_ioc { ZFS_IOC_ERROR_LOG, ZFS_IOC_CLEAR, ZFS_IOC_PROMOTE, + ZFS_IOC_DESTROY_SNAPS, ZFS_IOC_SNAPSHOT, ZFS_IOC_DSOBJ_TO_DSNAME, ZFS_IOC_OBJ_TO_PATH, @@ -813,18 +774,7 @@ typedef enum zfs_ioc { ZFS_IOC_NEXT_OBJ, ZFS_IOC_DIFF, ZFS_IOC_TMP_SNAPSHOT, - ZFS_IOC_OBJ_TO_STATS, - ZFS_IOC_SPACE_WRITTEN, - ZFS_IOC_SPACE_SNAPS, - ZFS_IOC_DESTROY_SNAPS, - ZFS_IOC_POOL_REGUID, - ZFS_IOC_POOL_REOPEN, - ZFS_IOC_SEND_PROGRESS, - ZFS_IOC_LOG_HISTORY, - ZFS_IOC_SEND_NEW, - ZFS_IOC_SEND_SPACE, - ZFS_IOC_CLONE, - ZFS_IOC_LAST + ZFS_IOC_OBJ_TO_STATS } zfs_ioc_t; /* @@ -861,12 +811,6 @@ typedef enum { #define ZPOOL_HIST_TXG "history txg" #define ZPOOL_HIST_INT_EVENT "history internal event" #define ZPOOL_HIST_INT_STR "history internal str" -#define ZPOOL_HIST_INT_NAME "internal_name" -#define ZPOOL_HIST_IOCTL "ioctl" -#define ZPOOL_HIST_INPUT_NVL "in_nvl" -#define ZPOOL_HIST_OUTPUT_NVL "out_nvl" -#define ZPOOL_HIST_DSNAME "dsname" -#define ZPOOL_HIST_DSID "dsid" /* * Flags for ZFS_IOC_VDEV_SET_STATE @@ -893,7 +837,6 @@ typedef enum { * ESC_ZFS_RESILVER_START * ESC_ZFS_RESILVER_END * ESC_ZFS_POOL_DESTROY - * ESC_ZFS_POOL_REGUID * * ZFS_EV_POOL_NAME DATA_TYPE_STRING * ZFS_EV_POOL_GUID DATA_TYPE_UINT64 @@ -912,6 +855,56 @@ typedef enum { #define ZFS_EV_VDEV_PATH "vdev_path" #define ZFS_EV_VDEV_GUID "vdev_guid" +/* + * Note: This is encoded on-disk, so new events must be added to the + * end, and unused events can not be removed. Be sure to edit + * libzfs_pool.c: hist_event_table[]. + */ +typedef enum history_internal_events { + LOG_NO_EVENT = 0, + LOG_POOL_CREATE, + LOG_POOL_VDEV_ADD, + LOG_POOL_REMOVE, + LOG_POOL_DESTROY, + LOG_POOL_EXPORT, + LOG_POOL_IMPORT, + LOG_POOL_VDEV_ATTACH, + LOG_POOL_VDEV_REPLACE, + LOG_POOL_VDEV_DETACH, + LOG_POOL_VDEV_ONLINE, + LOG_POOL_VDEV_OFFLINE, + LOG_POOL_UPGRADE, + LOG_POOL_CLEAR, + LOG_POOL_SCAN, + LOG_POOL_PROPSET, + LOG_DS_CREATE, + LOG_DS_CLONE, + LOG_DS_DESTROY, + LOG_DS_DESTROY_BEGIN, + LOG_DS_INHERIT, + LOG_DS_PROPSET, + LOG_DS_QUOTA, + LOG_DS_PERM_UPDATE, + LOG_DS_PERM_REMOVE, + LOG_DS_PERM_WHO_REMOVE, + LOG_DS_PROMOTE, + LOG_DS_RECEIVE, + LOG_DS_RENAME, + LOG_DS_RESERVATION, + LOG_DS_REPLAY_INC_SYNC, + LOG_DS_REPLAY_FULL_SYNC, + LOG_DS_ROLLBACK, + LOG_DS_SNAPSHOT, + LOG_DS_UPGRADE, + LOG_DS_REFQUOTA, + LOG_DS_REFRESERV, + LOG_POOL_SCAN_DONE, + LOG_DS_USER_HOLD, + LOG_DS_USER_RELEASE, + LOG_POOL_SPLIT, + LOG_END +} history_internal_events_t; + #ifdef __cplusplus } #endif diff --git a/uts/common/sys/nvpair.h b/uts/common/sys/nvpair.h index ad25effc2994..30ff4e0667b3 100644 --- a/uts/common/sys/nvpair.h +++ b/uts/common/sys/nvpair.h @@ -20,14 +20,12 @@ */ /* * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_NVPAIR_H #define _SYS_NVPAIR_H #include <sys/types.h> -#include <sys/time.h> #include <sys/errno.h> #include <sys/va_list.h> @@ -276,73 +274,6 @@ int nvpair_value_hrtime(nvpair_t *, hrtime_t *); int nvpair_value_double(nvpair_t *, double *); #endif -nvlist_t *fnvlist_alloc(void); -void fnvlist_free(nvlist_t *); -size_t fnvlist_size(nvlist_t *); -char *fnvlist_pack(nvlist_t *, size_t *); -void fnvlist_pack_free(char *, size_t); -nvlist_t *fnvlist_unpack(char *, size_t); -nvlist_t *fnvlist_dup(nvlist_t *); -void fnvlist_merge(nvlist_t *, nvlist_t *); - -void fnvlist_add_boolean(nvlist_t *, const char *); -void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); -void fnvlist_add_byte(nvlist_t *, const char *, uchar_t); -void fnvlist_add_int8(nvlist_t *, const char *, int8_t); -void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t); -void fnvlist_add_int16(nvlist_t *, const char *, int16_t); -void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t); -void fnvlist_add_int32(nvlist_t *, const char *, int32_t); -void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t); -void fnvlist_add_int64(nvlist_t *, const char *, int64_t); -void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t); -void fnvlist_add_string(nvlist_t *, const char *, const char *); -void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); -void fnvlist_add_nvpair(nvlist_t *, nvpair_t *); -void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t); -void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t); -void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t); -void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t); -void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t); -void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t); -void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t); -void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t); -void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t); -void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t); -void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t); -void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t); - -void fnvlist_remove(nvlist_t *, const char *); -void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *); - -nvpair_t *fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name); -boolean_t fnvlist_lookup_boolean(nvlist_t *nvl, const char *name); -boolean_t fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name); -uchar_t fnvlist_lookup_byte(nvlist_t *nvl, const char *name); -int8_t fnvlist_lookup_int8(nvlist_t *nvl, const char *name); -int16_t fnvlist_lookup_int16(nvlist_t *nvl, const char *name); -int32_t fnvlist_lookup_int32(nvlist_t *nvl, const char *name); -int64_t fnvlist_lookup_int64(nvlist_t *nvl, const char *name); -uint8_t fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name); -uint16_t fnvlist_lookup_uint16(nvlist_t *nvl, const char *name); -uint32_t fnvlist_lookup_uint32(nvlist_t *nvl, const char *name); -uint64_t fnvlist_lookup_uint64(nvlist_t *nvl, const char *name); -char *fnvlist_lookup_string(nvlist_t *nvl, const char *name); -nvlist_t *fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name); - -boolean_t fnvpair_value_boolean_value(nvpair_t *nvp); -uchar_t fnvpair_value_byte(nvpair_t *nvp); -int8_t fnvpair_value_int8(nvpair_t *nvp); -int16_t fnvpair_value_int16(nvpair_t *nvp); -int32_t fnvpair_value_int32(nvpair_t *nvp); -int64_t fnvpair_value_int64(nvpair_t *nvp); -uint8_t fnvpair_value_uint8_t(nvpair_t *nvp); -uint16_t fnvpair_value_uint16(nvpair_t *nvp); -uint32_t fnvpair_value_uint32(nvpair_t *nvp); -uint64_t fnvpair_value_uint64(nvpair_t *nvp); -char *fnvpair_value_string(nvpair_t *nvp); -nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp); - #ifdef __cplusplus } #endif diff --git a/uts/common/sys/sysevent/eventdefs.h b/uts/common/sys/sysevent/eventdefs.h index 5a75c5d84460..3ed9bb298018 100644 --- a/uts/common/sys/sysevent/eventdefs.h +++ b/uts/common/sys/sysevent/eventdefs.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SYSEVENT_EVENTDEFS_H @@ -257,7 +256,6 @@ extern "C" { #define ESC_ZFS_SCRUB_FINISH "ESC_ZFS_scrub_finish" #define ESC_ZFS_VDEV_SPARE "ESC_ZFS_vdev_spare" #define ESC_ZFS_BOOTFS_VDEV_ATTACH "ESC_ZFS_bootfs_vdev_attach" -#define ESC_ZFS_POOL_REGUID "ESC_ZFS_pool_reguid" /* * datalink subclass definitions. diff --git a/uts/common/sys/sysmacros.h b/uts/common/sys/sysmacros.h index 71042eba85ae..89a672db2f8c 100644 --- a/uts/common/sys/sysmacros.h +++ b/uts/common/sys/sysmacros.h @@ -25,8 +25,6 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * - * Copyright 2011, 2012 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SYSMACROS_H @@ -366,18 +364,12 @@ extern unsigned char bcd_to_byte[256]; #error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined #endif /* _BIT_FIELDS_LTOH */ +#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof) + /* avoid any possibility of clashing with <stddef.h> version */ -#if defined(_KERNEL) && !defined(_KMEMUSER) -#if !defined(offsetof) #define offsetof(s, m) ((size_t)(&(((s *)0)->m))) -#endif /* !offsetof */ - -#define container_of(m, s, name) \ - (void *)((uintptr_t)(m) - (uintptr_t)offsetof(s, name)) - -#define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0])) -#endif /* _KERNEL, !_KMEMUSER */ +#endif #ifdef __cplusplus } diff --git a/uts/common/zmod/crc32.c b/uts/common/zmod/crc32.c new file mode 100644 index 000000000000..61ad581ef562 --- /dev/null +++ b/uts/common/zmod/crc32.c @@ -0,0 +1,428 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + */ + +#ifdef MAKECRCH +# include <stdio.h> +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +#define local static + +/* Find a four-byte integer type for crc32_little() and crc32_big(). */ +#ifndef NOBYFOUR +# ifdef STDC /* need ANSI C limits.h to determine sizes */ +# include <limits.h> +# define BYFOUR +# if (UINT_MAX == 0xffffffffUL) + typedef unsigned int u4; +# else +# if (ULONG_MAX == 0xffffffffUL) + typedef unsigned long u4; +# else +# if (USHRT_MAX == 0xffffffffUL) + typedef unsigned short u4; +# else +# undef BYFOUR /* can't find a four-byte integer type! */ +# endif +# endif +# endif +# endif /* STDC */ +#endif /* !NOBYFOUR */ + +/* Definitions for doing the crc four data bytes at a time. */ +#ifdef BYFOUR +# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ + (((w)&0xff00)<<8)+(((w)&0xff)<<24)) + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, unsigned)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, unsigned)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local unsigned long FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const unsigned long FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + unsigned long c; + int n, k; + unsigned long poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0UL; + for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++) + poly |= 1UL << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (unsigned long)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = REV(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = REV(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const unsigned long FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const unsigned long FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n], + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const unsigned long FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const unsigned long FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + u4 endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +#ifdef BYFOUR + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = (u4)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *++buf4; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = REV((u4)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + buf4--; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf4++; + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(REV(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case */ + if (len2 == 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} |