diff options
Diffstat (limited to 'common')
-rw-r--r-- | common/acl/acl_common.c | 1755 | ||||
-rw-r--r-- | common/acl/acl_common.h | 59 | ||||
-rw-r--r-- | common/atomic/amd64/atomic.s | 573 | ||||
-rw-r--r-- | common/atomic/i386/atomic.s | 720 | ||||
-rw-r--r-- | common/atomic/sparc/atomic.s | 801 | ||||
-rw-r--r-- | common/list/list.c | 251 | ||||
-rw-r--r-- | common/nvpair/nvpair.c | 3297 | ||||
-rw-r--r-- | common/nvpair/nvpair_alloc_fixed.c | 120 | ||||
-rw-r--r-- | common/unicode/u8_textprep.c | 2132 | ||||
-rw-r--r-- | common/zfs/zfs_comutil.c | 202 | ||||
-rw-r--r-- | common/zfs/zfs_comutil.h | 46 | ||||
-rw-r--r-- | common/zfs/zfs_deleg.c | 237 | ||||
-rw-r--r-- | common/zfs/zfs_deleg.h | 85 | ||||
-rw-r--r-- | common/zfs/zfs_fletcher.c | 246 | ||||
-rw-r--r-- | common/zfs/zfs_fletcher.h | 53 | ||||
-rw-r--r-- | common/zfs/zfs_namecheck.c | 345 | ||||
-rw-r--r-- | common/zfs/zfs_namecheck.h | 58 | ||||
-rw-r--r-- | common/zfs/zfs_prop.c | 595 | ||||
-rw-r--r-- | common/zfs/zfs_prop.h | 129 | ||||
-rw-r--r-- | common/zfs/zpool_prop.c | 202 | ||||
-rw-r--r-- | common/zfs/zprop_common.c | 426 |
21 files changed, 12332 insertions, 0 deletions
diff --git a/common/acl/acl_common.c b/common/acl/acl_common.c new file mode 100644 index 000000000000..eafc47d10f2d --- /dev/null +++ b/common/acl/acl_common.c @@ -0,0 +1,1755 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/avl.h> +#if defined(_KERNEL) +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <acl/acl_common.h> +#else +#include <errno.h> +#include <stdlib.h> +#include <stddef.h> +#include <strings.h> +#include <unistd.h> +#include <assert.h> +#include <grp.h> +#include <pwd.h> +#include <acl_common.h> +#define ASSERT assert +#endif + +#define ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \ + ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \ + ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL) + + +#define ACL_SYNCHRONIZE_SET_DENY 0x0000001 +#define ACL_SYNCHRONIZE_SET_ALLOW 0x0000002 +#define ACL_SYNCHRONIZE_ERR_DENY 0x0000004 +#define ACL_SYNCHRONIZE_ERR_ALLOW 0x0000008 + +#define ACL_WRITE_OWNER_SET_DENY 0x0000010 +#define ACL_WRITE_OWNER_SET_ALLOW 0x0000020 +#define ACL_WRITE_OWNER_ERR_DENY 0x0000040 +#define ACL_WRITE_OWNER_ERR_ALLOW 0x0000080 + +#define ACL_DELETE_SET_DENY 0x0000100 +#define ACL_DELETE_SET_ALLOW 0x0000200 +#define ACL_DELETE_ERR_DENY 0x0000400 +#define ACL_DELETE_ERR_ALLOW 0x0000800 + +#define ACL_WRITE_ATTRS_OWNER_SET_DENY 0x0001000 +#define ACL_WRITE_ATTRS_OWNER_SET_ALLOW 0x0002000 +#define ACL_WRITE_ATTRS_OWNER_ERR_DENY 0x0004000 +#define ACL_WRITE_ATTRS_OWNER_ERR_ALLOW 0x0008000 + +#define ACL_WRITE_ATTRS_WRITER_SET_DENY 0x0010000 +#define ACL_WRITE_ATTRS_WRITER_SET_ALLOW 0x0020000 +#define ACL_WRITE_ATTRS_WRITER_ERR_DENY 0x0040000 +#define ACL_WRITE_ATTRS_WRITER_ERR_ALLOW 0x0080000 + +#define ACL_WRITE_NAMED_WRITER_SET_DENY 0x0100000 +#define ACL_WRITE_NAMED_WRITER_SET_ALLOW 0x0200000 +#define ACL_WRITE_NAMED_WRITER_ERR_DENY 0x0400000 +#define ACL_WRITE_NAMED_WRITER_ERR_ALLOW 0x0800000 + +#define ACL_READ_NAMED_READER_SET_DENY 0x1000000 +#define ACL_READ_NAMED_READER_SET_ALLOW 0x2000000 +#define ACL_READ_NAMED_READER_ERR_DENY 0x4000000 +#define ACL_READ_NAMED_READER_ERR_ALLOW 0x8000000 + + +#define ACE_VALID_MASK_BITS (\ + ACE_READ_DATA | \ + ACE_LIST_DIRECTORY | \ + ACE_WRITE_DATA | \ + ACE_ADD_FILE | \ + ACE_APPEND_DATA | \ + ACE_ADD_SUBDIRECTORY | \ + ACE_READ_NAMED_ATTRS | \ + ACE_WRITE_NAMED_ATTRS | \ + ACE_EXECUTE | \ + ACE_DELETE_CHILD | \ + ACE_READ_ATTRIBUTES | \ + ACE_WRITE_ATTRIBUTES | \ + ACE_DELETE | \ + ACE_READ_ACL | \ + ACE_WRITE_ACL | \ + ACE_WRITE_OWNER | \ + ACE_SYNCHRONIZE) + +#define ACE_MASK_UNDEFINED 0x80000000 + +#define ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \ + ACE_DIRECTORY_INHERIT_ACE | \ + ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \ + ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \ + ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE) + +/* + * ACL conversion helpers + */ + +typedef enum { + ace_unused, + ace_user_obj, + ace_user, + ace_group, /* includes GROUP and GROUP_OBJ */ + ace_other_obj +} ace_to_aent_state_t; + +typedef struct acevals { + uid_t key; + avl_node_t avl; + uint32_t mask; + uint32_t allowed; + uint32_t denied; + int aent_type; +} acevals_t; + +typedef struct ace_list { + acevals_t user_obj; + avl_tree_t user; + int numusers; + acevals_t group_obj; + avl_tree_t group; + int numgroups; + acevals_t other_obj; + uint32_t acl_mask; + int hasmask; + int dfacl_flag; + ace_to_aent_state_t state; + int seen; /* bitmask of all aclent_t a_type values seen */ +} ace_list_t; + +/* + * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified. + * v = Ptr to array/vector of objs + * n = # objs in the array + * s = size of each obj (must be multiples of a word size) + * f = ptr to function to compare two objs + * returns (-1 = less than, 0 = equal, 1 = greater than + */ +void +ksort(caddr_t v, int n, int s, int (*f)()) +{ + int g, i, j, ii; + unsigned int *p1, *p2; + unsigned int tmp; + + /* No work to do */ + if (v == NULL || n <= 1) + return; + + /* Sanity check on arguments */ + ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0); + ASSERT(s > 0); + for (g = n / 2; g > 0; g /= 2) { + for (i = g; i < n; i++) { + for (j = i - g; j >= 0 && + (*f)(v + j * s, v + (j + g) * s) == 1; + j -= g) { + p1 = (void *)(v + j * s); + p2 = (void *)(v + (j + g) * s); + for (ii = 0; ii < s / 4; ii++) { + tmp = *p1; + *p1++ = *p2; + *p2++ = tmp; + } + } + } + } +} + +/* + * Compare two acls, all fields. Returns: + * -1 (less than) + * 0 (equal) + * +1 (greater than) + */ +int +cmp2acls(void *a, void *b) +{ + aclent_t *x = (aclent_t *)a; + aclent_t *y = (aclent_t *)b; + + /* Compare types */ + if (x->a_type < y->a_type) + return (-1); + if (x->a_type > y->a_type) + return (1); + /* Equal types; compare id's */ + if (x->a_id < y->a_id) + return (-1); + if (x->a_id > y->a_id) + return (1); + /* Equal ids; compare perms */ + if (x->a_perm < y->a_perm) + return (-1); + if (x->a_perm > y->a_perm) + return (1); + /* Totally equal */ + return (0); +} + +/*ARGSUSED*/ +static void * +cacl_realloc(void *ptr, size_t size, size_t new_size) +{ +#if defined(_KERNEL) + void *tmp; + + tmp = kmem_alloc(new_size, KM_SLEEP); + (void) memcpy(tmp, ptr, (size < new_size) ? size : new_size); + kmem_free(ptr, size); + return (tmp); +#else + return (realloc(ptr, new_size)); +#endif +} + +static int +cacl_malloc(void **ptr, size_t size) +{ +#if defined(_KERNEL) + *ptr = kmem_zalloc(size, KM_SLEEP); + return (0); +#else + *ptr = calloc(1, size); + if (*ptr == NULL) + return (errno); + + return (0); +#endif +} + +/*ARGSUSED*/ +static void +cacl_free(void *ptr, size_t size) +{ +#if defined(_KERNEL) + kmem_free(ptr, size); +#else + free(ptr); +#endif +} + +acl_t * +acl_alloc(enum acl_type type) +{ + acl_t *aclp; + + if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0) + return (NULL); + + aclp->acl_aclp = NULL; + aclp->acl_cnt = 0; + + switch (type) { + case ACE_T: + aclp->acl_type = ACE_T; + aclp->acl_entry_size = sizeof (ace_t); + break; + case ACLENT_T: + aclp->acl_type = ACLENT_T; + aclp->acl_entry_size = sizeof (aclent_t); + break; + default: + acl_free(aclp); + aclp = NULL; + } + return (aclp); +} + +/* + * Free acl_t structure + */ +void +acl_free(acl_t *aclp) +{ + int acl_size; + + if (aclp == NULL) + return; + + if (aclp->acl_aclp) { + acl_size = aclp->acl_cnt * aclp->acl_entry_size; + cacl_free(aclp->acl_aclp, acl_size); + } + + cacl_free(aclp, sizeof (acl_t)); +} + +static uint32_t +access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow) +{ + uint32_t access_mask = 0; + int acl_produce; + int synchronize_set = 0, write_owner_set = 0; + int delete_set = 0, write_attrs_set = 0; + int read_named_set = 0, write_named_set = 0; + + acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW | + ACL_WRITE_ATTRS_OWNER_SET_ALLOW | + ACL_WRITE_ATTRS_WRITER_SET_DENY); + + if (isallow) { + synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW; + write_owner_set = ACL_WRITE_OWNER_SET_ALLOW; + delete_set = ACL_DELETE_SET_ALLOW; + if (hasreadperm) + read_named_set = ACL_READ_NAMED_READER_SET_ALLOW; + if (haswriteperm) + write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW; + if (isowner) + write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; + else if (haswriteperm) + write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; + } else { + + synchronize_set = ACL_SYNCHRONIZE_SET_DENY; + write_owner_set = ACL_WRITE_OWNER_SET_DENY; + delete_set = ACL_DELETE_SET_DENY; + if (hasreadperm) + read_named_set = ACL_READ_NAMED_READER_SET_DENY; + if (haswriteperm) + write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY; + if (isowner) + write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY; + else if (haswriteperm) + write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY; + else + /* + * If the entity is not the owner and does not + * have write permissions ACE_WRITE_ATTRIBUTES will + * always go in the DENY ACE. + */ + access_mask |= ACE_WRITE_ATTRIBUTES; + } + + if (acl_produce & synchronize_set) + access_mask |= ACE_SYNCHRONIZE; + if (acl_produce & write_owner_set) + access_mask |= ACE_WRITE_OWNER; + if (acl_produce & delete_set) + access_mask |= ACE_DELETE; + if (acl_produce & write_attrs_set) + access_mask |= ACE_WRITE_ATTRIBUTES; + if (acl_produce & read_named_set) + access_mask |= ACE_READ_NAMED_ATTRS; + if (acl_produce & write_named_set) + access_mask |= ACE_WRITE_NAMED_ATTRS; + + return (access_mask); +} + +/* + * Given an mode_t, convert it into an access_mask as used + * by nfsace, assuming aclent_t -> nfsace semantics. + */ +static uint32_t +mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow) +{ + uint32_t access = 0; + int haswriteperm = 0; + int hasreadperm = 0; + + if (isallow) { + haswriteperm = (mode & S_IWOTH); + hasreadperm = (mode & S_IROTH); + } else { + haswriteperm = !(mode & S_IWOTH); + hasreadperm = !(mode & S_IROTH); + } + + /* + * The following call takes care of correctly setting the following + * mask bits in the access_mask: + * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE, + * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS + */ + access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow); + + if (isallow) { + access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES; + if (isowner) + access |= ACE_WRITE_ACL; + } else { + if (! isowner) + access |= ACE_WRITE_ACL; + } + + /* read */ + if (mode & S_IROTH) { + access |= ACE_READ_DATA; + } + /* write */ + if (mode & S_IWOTH) { + access |= ACE_WRITE_DATA | + ACE_APPEND_DATA; + if (isdir) + access |= ACE_DELETE_CHILD; + } + /* exec */ + if (mode & 01) { + access |= ACE_EXECUTE; + } + + return (access); +} + +/* + * Given an nfsace (presumably an ALLOW entry), make a + * corresponding DENY entry at the address given. + */ +static void +ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner) +{ + (void) memcpy(deny, allow, sizeof (ace_t)); + + deny->a_who = allow->a_who; + + deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS; + if (isdir) + deny->a_access_mask ^= ACE_DELETE_CHILD; + + deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER | + ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS | + ACE_WRITE_NAMED_ATTRS); + deny->a_access_mask |= access_mask_set((allow->a_access_mask & + ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner, + B_FALSE); +} +/* + * Make an initial pass over an array of aclent_t's. Gather + * information such as an ACL_MASK (if any), number of users, + * number of groups, and whether the array needs to be sorted. + */ +static int +ln_aent_preprocess(aclent_t *aclent, int n, + int *hasmask, mode_t *mask, + int *numuser, int *numgroup, int *needsort) +{ + int error = 0; + int i; + int curtype = 0; + + *hasmask = 0; + *mask = 07; + *needsort = 0; + *numuser = 0; + *numgroup = 0; + + for (i = 0; i < n; i++) { + if (aclent[i].a_type < curtype) + *needsort = 1; + else if (aclent[i].a_type > curtype) + curtype = aclent[i].a_type; + if (aclent[i].a_type & USER) + (*numuser)++; + if (aclent[i].a_type & (GROUP | GROUP_OBJ)) + (*numgroup)++; + if (aclent[i].a_type & CLASS_OBJ) { + if (*hasmask) { + error = EINVAL; + goto out; + } else { + *hasmask = 1; + *mask = aclent[i].a_perm; + } + } + } + + if ((! *hasmask) && (*numuser + *numgroup > 1)) { + error = EINVAL; + goto out; + } + +out: + return (error); +} + +/* + * Convert an array of aclent_t into an array of nfsace entries, + * following POSIX draft -> nfsv4 conversion semantics as outlined in + * the IETF draft. + */ +static int +ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir) +{ + int error = 0; + mode_t mask; + int numuser, numgroup, needsort; + int resultsize = 0; + int i, groupi = 0, skip; + ace_t *acep, *result = NULL; + int hasmask; + + error = ln_aent_preprocess(aclent, n, &hasmask, &mask, + &numuser, &numgroup, &needsort); + if (error != 0) + goto out; + + /* allow + deny for each aclent */ + resultsize = n * 2; + if (hasmask) { + /* + * stick extra deny on the group_obj and on each + * user|group for the mask (the group_obj was added + * into the count for numgroup) + */ + resultsize += numuser + numgroup; + /* ... and don't count the mask itself */ + resultsize -= 2; + } + + /* sort the source if necessary */ + if (needsort) + ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls); + + if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0) + goto out; + + acep = result; + + for (i = 0; i < n; i++) { + /* + * don't process CLASS_OBJ (mask); mask was grabbed in + * ln_aent_preprocess() + */ + if (aclent[i].a_type & CLASS_OBJ) + continue; + + /* If we need an ACL_MASK emulator, prepend it now */ + if ((hasmask) && + (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) { + acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE; + acep->a_flags = 0; + if (aclent[i].a_type & GROUP_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= + (ACE_IDENTIFIER_GROUP|ACE_GROUP); + } else if (aclent[i].a_type & USER) { + acep->a_who = aclent[i].a_id; + } else { + acep->a_who = aclent[i].a_id; + acep->a_flags |= ACE_IDENTIFIER_GROUP; + } + if (aclent[i].a_type & ACL_DEFAULT) { + acep->a_flags |= ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE; + } + /* + * Set the access mask for the prepended deny + * ace. To do this, we invert the mask (found + * in ln_aent_preprocess()) then convert it to an + * DENY ace access_mask. + */ + acep->a_access_mask = mode_to_ace_access((mask ^ 07), + isdir, 0, 0); + acep += 1; + } + + /* handle a_perm -> access_mask */ + acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm, + isdir, aclent[i].a_type & USER_OBJ, 1); + + /* emulate a default aclent */ + if (aclent[i].a_type & ACL_DEFAULT) { + acep->a_flags |= ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE; + } + + /* + * handle a_perm and a_id + * + * this must be done last, since it involves the + * corresponding deny aces, which are handled + * differently for each different a_type. + */ + if (aclent[i].a_type & USER_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_OWNER; + ace_make_deny(acep, acep + 1, isdir, B_TRUE); + acep += 2; + } else if (aclent[i].a_type & USER) { + acep->a_who = aclent[i].a_id; + ace_make_deny(acep, acep + 1, isdir, B_FALSE); + acep += 2; + } else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) { + if (aclent[i].a_type & GROUP_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_GROUP; + } else { + acep->a_who = aclent[i].a_id; + } + acep->a_flags |= ACE_IDENTIFIER_GROUP; + /* + * Set the corresponding deny for the group ace. + * + * The deny aces go after all of the groups, unlike + * everything else, where they immediately follow + * the allow ace. + * + * We calculate "skip", the number of slots to + * skip ahead for the deny ace, here. + * + * The pattern is: + * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3 + * thus, skip is + * (2 * numgroup) - 1 - groupi + * (2 * numgroup) to account for MD + A + * - 1 to account for the fact that we're on the + * access (A), not the mask (MD) + * - groupi to account for the fact that we have + * passed up groupi number of MD's. + */ + skip = (2 * numgroup) - 1 - groupi; + ace_make_deny(acep, acep + skip, isdir, B_FALSE); + /* + * If we just did the last group, skip acep past + * all of the denies; else, just move ahead one. + */ + if (++groupi >= numgroup) + acep += numgroup + 1; + else + acep += 1; + } else if (aclent[i].a_type & OTHER_OBJ) { + acep->a_who = (uid_t)-1; + acep->a_flags |= ACE_EVERYONE; + ace_make_deny(acep, acep + 1, isdir, B_FALSE); + acep += 2; + } else { + error = EINVAL; + goto out; + } + } + + *acepp = result; + *rescount = resultsize; + +out: + if (error != 0) { + if ((result != NULL) && (resultsize > 0)) { + cacl_free(result, resultsize * sizeof (ace_t)); + } + } + + return (error); +} + +static int +convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir, + ace_t **retacep, int *retacecnt) +{ + ace_t *acep; + ace_t *dfacep; + int acecnt = 0; + int dfacecnt = 0; + int dfaclstart = 0; + int dfaclcnt = 0; + aclent_t *aclp; + int i; + int error; + int acesz, dfacesz; + + ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls); + + for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) { + if (aclp->a_type & ACL_DEFAULT) + break; + } + + if (i < aclcnt) { + dfaclstart = i; + dfaclcnt = aclcnt - i; + } + + if (dfaclcnt && isdir == 0) { + return (EINVAL); + } + + error = ln_aent_to_ace(aclentp, i, &acep, &acecnt, isdir); + if (error) + return (error); + + if (dfaclcnt) { + error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt, + &dfacep, &dfacecnt, isdir); + if (error) { + if (acep) { + cacl_free(acep, acecnt * sizeof (ace_t)); + } + return (error); + } + } + + if (dfacecnt != 0) { + acesz = sizeof (ace_t) * acecnt; + dfacesz = sizeof (ace_t) * dfacecnt; + acep = cacl_realloc(acep, acesz, acesz + dfacesz); + if (acep == NULL) + return (ENOMEM); + if (dfaclcnt) { + (void) memcpy(acep + acecnt, dfacep, dfacesz); + } + } + if (dfaclcnt) + cacl_free(dfacep, dfacecnt * sizeof (ace_t)); + + *retacecnt = acecnt + dfacecnt; + *retacep = acep; + return (0); +} + +static int +ace_mask_to_mode(uint32_t mask, o_mode_t *modep, int isdir) +{ + int error = 0; + o_mode_t mode = 0; + uint32_t bits, wantbits; + + /* read */ + if (mask & ACE_READ_DATA) + mode |= S_IROTH; + + /* write */ + wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA); + if (isdir) + wantbits |= ACE_DELETE_CHILD; + bits = mask & wantbits; + if (bits != 0) { + if (bits != wantbits) { + error = ENOTSUP; + goto out; + } + mode |= S_IWOTH; + } + + /* exec */ + if (mask & ACE_EXECUTE) { + mode |= S_IXOTH; + } + + *modep = mode; + +out: + return (error); +} + +static void +acevals_init(acevals_t *vals, uid_t key) +{ + bzero(vals, sizeof (*vals)); + vals->allowed = ACE_MASK_UNDEFINED; + vals->denied = ACE_MASK_UNDEFINED; + vals->mask = ACE_MASK_UNDEFINED; + vals->key = key; +} + +static void +ace_list_init(ace_list_t *al, int dfacl_flag) +{ + acevals_init(&al->user_obj, NULL); + acevals_init(&al->group_obj, NULL); + acevals_init(&al->other_obj, NULL); + al->numusers = 0; + al->numgroups = 0; + al->acl_mask = 0; + al->hasmask = 0; + al->state = ace_unused; + al->seen = 0; + al->dfacl_flag = dfacl_flag; +} + +/* + * Find or create an acevals holder for a given id and avl tree. + * + * Note that only one thread will ever touch these avl trees, so + * there is no need for locking. + */ +static acevals_t * +acevals_find(ace_t *ace, avl_tree_t *avl, int *num) +{ + acevals_t key, *rc; + avl_index_t where; + + key.key = ace->a_who; + rc = avl_find(avl, &key, &where); + if (rc != NULL) + return (rc); + + /* this memory is freed by ln_ace_to_aent()->ace_list_free() */ + if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0) + return (NULL); + + acevals_init(rc, ace->a_who); + avl_insert(avl, rc, where); + (*num)++; + + return (rc); +} + +static int +access_mask_check(ace_t *acep, int mask_bit, int isowner) +{ + int set_deny, err_deny; + int set_allow, err_allow; + int acl_consume; + int haswriteperm, hasreadperm; + + if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { + haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1; + hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1; + } else { + haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0; + hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0; + } + + acl_consume = (ACL_SYNCHRONIZE_ERR_DENY | + ACL_DELETE_ERR_DENY | + ACL_WRITE_OWNER_ERR_DENY | + ACL_WRITE_OWNER_ERR_ALLOW | + ACL_WRITE_ATTRS_OWNER_SET_ALLOW | + ACL_WRITE_ATTRS_OWNER_ERR_DENY | + ACL_WRITE_ATTRS_WRITER_SET_DENY | + ACL_WRITE_ATTRS_WRITER_ERR_ALLOW | + ACL_WRITE_NAMED_WRITER_ERR_DENY | + ACL_READ_NAMED_READER_ERR_DENY); + + if (mask_bit == ACE_SYNCHRONIZE) { + set_deny = ACL_SYNCHRONIZE_SET_DENY; + err_deny = ACL_SYNCHRONIZE_ERR_DENY; + set_allow = ACL_SYNCHRONIZE_SET_ALLOW; + err_allow = ACL_SYNCHRONIZE_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_OWNER) { + set_deny = ACL_WRITE_OWNER_SET_DENY; + err_deny = ACL_WRITE_OWNER_ERR_DENY; + set_allow = ACL_WRITE_OWNER_SET_ALLOW; + err_allow = ACL_WRITE_OWNER_ERR_ALLOW; + } else if (mask_bit == ACE_DELETE) { + set_deny = ACL_DELETE_SET_DENY; + err_deny = ACL_DELETE_ERR_DENY; + set_allow = ACL_DELETE_SET_ALLOW; + err_allow = ACL_DELETE_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_ATTRIBUTES) { + if (isowner) { + set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY; + err_deny = ACL_WRITE_ATTRS_OWNER_ERR_DENY; + set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; + err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW; + } else if (haswriteperm) { + set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY; + err_deny = ACL_WRITE_ATTRS_WRITER_ERR_DENY; + set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; + err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW; + } else { + if ((acep->a_access_mask & mask_bit) && + (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) { + return (ENOTSUP); + } + return (0); + } + } else if (mask_bit == ACE_READ_NAMED_ATTRS) { + if (!hasreadperm) + return (0); + + set_deny = ACL_READ_NAMED_READER_SET_DENY; + err_deny = ACL_READ_NAMED_READER_ERR_DENY; + set_allow = ACL_READ_NAMED_READER_SET_ALLOW; + err_allow = ACL_READ_NAMED_READER_ERR_ALLOW; + } else if (mask_bit == ACE_WRITE_NAMED_ATTRS) { + if (!haswriteperm) + return (0); + + set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY; + err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY; + set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW; + err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW; + } else { + return (EINVAL); + } + + if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { + if (acl_consume & set_deny) { + if (!(acep->a_access_mask & mask_bit)) { + return (ENOTSUP); + } + } else if (acl_consume & err_deny) { + if (acep->a_access_mask & mask_bit) { + return (ENOTSUP); + } + } + } else { + /* ACE_ACCESS_ALLOWED_ACE_TYPE */ + if (acl_consume & set_allow) { + if (!(acep->a_access_mask & mask_bit)) { + return (ENOTSUP); + } + } else if (acl_consume & err_allow) { + if (acep->a_access_mask & mask_bit) { + return (ENOTSUP); + } + } + } + return (0); +} + +static int +ace_to_aent_legal(ace_t *acep) +{ + int error = 0; + int isowner; + + /* only ALLOW or DENY */ + if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) && + (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) { + error = ENOTSUP; + goto out; + } + + /* check for invalid flags */ + if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) { + error = EINVAL; + goto out; + } + + /* some flags are illegal */ + if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG | + ACE_FAILED_ACCESS_ACE_FLAG | + ACE_NO_PROPAGATE_INHERIT_ACE)) { + error = ENOTSUP; + goto out; + } + + /* check for invalid masks */ + if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) { + error = EINVAL; + goto out; + } + + if ((acep->a_flags & ACE_OWNER)) { + isowner = 1; + } else { + isowner = 0; + } + + error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_OWNER, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_DELETE, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner); + if (error) + goto out; + + error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner); + if (error) + goto out; + + /* more detailed checking of masks */ + if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { + if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) { + error = ENOTSUP; + goto out; + } + if ((acep->a_access_mask & ACE_WRITE_DATA) && + (! (acep->a_access_mask & ACE_APPEND_DATA))) { + error = ENOTSUP; + goto out; + } + if ((! (acep->a_access_mask & ACE_WRITE_DATA)) && + (acep->a_access_mask & ACE_APPEND_DATA)) { + error = ENOTSUP; + goto out; + } + } + + /* ACL enforcement */ + if ((acep->a_access_mask & ACE_READ_ACL) && + (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) { + error = ENOTSUP; + goto out; + } + if (acep->a_access_mask & ACE_WRITE_ACL) { + if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) && + (isowner)) { + error = ENOTSUP; + goto out; + } + if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) && + (! isowner)) { + error = ENOTSUP; + goto out; + } + } + +out: + return (error); +} + +static int +ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir) +{ + /* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */ + if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) != + (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) { + return (ENOTSUP); + } + + return (ace_mask_to_mode(mask, modep, isdir)); +} + +static int +acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list, + uid_t owner, gid_t group, int isdir) +{ + int error; + uint32_t flips = ACE_POSIX_SUPPORTED_BITS; + + if (isdir) + flips |= ACE_DELETE_CHILD; + if (vals->allowed != (vals->denied ^ flips)) { + error = ENOTSUP; + goto out; + } + if ((list->hasmask) && (list->acl_mask != vals->mask) && + (vals->aent_type & (USER | GROUP | GROUP_OBJ))) { + error = ENOTSUP; + goto out; + } + error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir); + if (error != 0) + goto out; + dest->a_type = vals->aent_type; + if (dest->a_type & (USER | GROUP)) { + dest->a_id = vals->key; + } else if (dest->a_type & USER_OBJ) { + dest->a_id = owner; + } else if (dest->a_type & GROUP_OBJ) { + dest->a_id = group; + } else if (dest->a_type & OTHER_OBJ) { + dest->a_id = 0; + } else { + error = EINVAL; + goto out; + } + +out: + return (error); +} + + +static int +ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt, + uid_t owner, gid_t group, int isdir) +{ + int error = 0; + aclent_t *aent, *result = NULL; + acevals_t *vals; + int resultcount; + + if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) != + (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) { + error = ENOTSUP; + goto out; + } + if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) { + error = ENOTSUP; + goto out; + } + + resultcount = 3 + list->numusers + list->numgroups; + /* + * This must be the same condition as below, when we add the CLASS_OBJ + * (aka ACL mask) + */ + if ((list->hasmask) || (! list->dfacl_flag)) + resultcount += 1; + + if (cacl_malloc((void **)&result, + resultcount * sizeof (aclent_t)) != 0) { + error = ENOMEM; + goto out; + } + aent = result; + + /* USER_OBJ */ + if (!(list->user_obj.aent_type & USER_OBJ)) { + error = EINVAL; + goto out; + } + + error = acevals_to_aent(&list->user_obj, aent, list, owner, group, + isdir); + + if (error != 0) + goto out; + ++aent; + /* USER */ + vals = NULL; + for (vals = avl_first(&list->user); vals != NULL; + vals = AVL_NEXT(&list->user, vals)) { + if (!(vals->aent_type & USER)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(vals, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + } + /* GROUP_OBJ */ + if (!(list->group_obj.aent_type & GROUP_OBJ)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(&list->group_obj, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + /* GROUP */ + vals = NULL; + for (vals = avl_first(&list->group); vals != NULL; + vals = AVL_NEXT(&list->group, vals)) { + if (!(vals->aent_type & GROUP)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(vals, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + } + /* + * CLASS_OBJ (aka ACL_MASK) + * + * An ACL_MASK is not fabricated if the ACL is a default ACL. + * This is to follow UFS's behavior. + */ + if ((list->hasmask) || (! list->dfacl_flag)) { + if (list->hasmask) { + uint32_t flips = ACE_POSIX_SUPPORTED_BITS; + if (isdir) + flips |= ACE_DELETE_CHILD; + error = ace_mask_to_mode(list->acl_mask ^ flips, + &aent->a_perm, isdir); + if (error != 0) + goto out; + } else { + /* fabricate the ACL_MASK from the group permissions */ + error = ace_mask_to_mode(list->group_obj.allowed, + &aent->a_perm, isdir); + if (error != 0) + goto out; + } + aent->a_id = 0; + aent->a_type = CLASS_OBJ | list->dfacl_flag; + ++aent; + } + /* OTHER_OBJ */ + if (!(list->other_obj.aent_type & OTHER_OBJ)) { + error = EINVAL; + goto out; + } + error = acevals_to_aent(&list->other_obj, aent, list, owner, group, + isdir); + if (error != 0) + goto out; + ++aent; + + *aclentp = result; + *aclcnt = resultcount; + +out: + if (error != 0) { + if (result != NULL) + cacl_free(result, resultcount * sizeof (aclent_t)); + } + + return (error); +} + + +/* + * free all data associated with an ace_list + */ +static void +ace_list_free(ace_list_t *al) +{ + acevals_t *node; + void *cookie; + + if (al == NULL) + return; + + cookie = NULL; + while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL) + cacl_free(node, sizeof (acevals_t)); + cookie = NULL; + while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL) + cacl_free(node, sizeof (acevals_t)); + + avl_destroy(&al->user); + avl_destroy(&al->group); + + /* free the container itself */ + cacl_free(al, sizeof (ace_list_t)); +} + +static int +acevals_compare(const void *va, const void *vb) +{ + const acevals_t *a = va, *b = vb; + + if (a->key == b->key) + return (0); + + if (a->key > b->key) + return (1); + + else + return (-1); +} + +/* + * Convert a list of ace_t entries to equivalent regular and default + * aclent_t lists. Return error (ENOTSUP) when conversion is not possible. + */ +static int +ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group, + aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt, + int isdir) +{ + int error = 0; + ace_t *acep; + uint32_t bits; + int i; + ace_list_t *normacl = NULL, *dfacl = NULL, *acl; + acevals_t *vals; + + *aclentp = NULL; + *aclcnt = 0; + *dfaclentp = NULL; + *dfaclcnt = 0; + + /* we need at least user_obj, group_obj, and other_obj */ + if (n < 6) { + error = ENOTSUP; + goto out; + } + if (ace == NULL) { + error = EINVAL; + goto out; + } + + error = cacl_malloc((void **)&normacl, sizeof (ace_list_t)); + if (error != 0) + goto out; + + avl_create(&normacl->user, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + avl_create(&normacl->group, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + + ace_list_init(normacl, 0); + + error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t)); + if (error != 0) + goto out; + + avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t), + offsetof(acevals_t, avl)); + ace_list_init(dfacl, ACL_DEFAULT); + + /* process every ace_t... */ + for (i = 0; i < n; i++) { + acep = &ace[i]; + + /* rule out certain cases quickly */ + error = ace_to_aent_legal(acep); + if (error != 0) + goto out; + + /* + * Turn off these bits in order to not have to worry about + * them when doing the checks for compliments. + */ + acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE | + ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES | + ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS); + + /* see if this should be a regular or default acl */ + bits = acep->a_flags & + (ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE); + if (bits != 0) { + /* all or nothing on these inherit bits */ + if (bits != (ACE_INHERIT_ONLY_ACE | + ACE_FILE_INHERIT_ACE | + ACE_DIRECTORY_INHERIT_ACE)) { + error = ENOTSUP; + goto out; + } + acl = dfacl; + } else { + acl = normacl; + } + + if ((acep->a_flags & ACE_OWNER)) { + if (acl->state > ace_user_obj) { + error = ENOTSUP; + goto out; + } + acl->state = ace_user_obj; + acl->seen |= USER_OBJ; + vals = &acl->user_obj; + vals->aent_type = USER_OBJ | acl->dfacl_flag; + } else if ((acep->a_flags & ACE_EVERYONE)) { + acl->state = ace_other_obj; + acl->seen |= OTHER_OBJ; + vals = &acl->other_obj; + vals->aent_type = OTHER_OBJ | acl->dfacl_flag; + } else if (acep->a_flags & ACE_IDENTIFIER_GROUP) { + if (acl->state > ace_group) { + error = ENOTSUP; + goto out; + } + if ((acep->a_flags & ACE_GROUP)) { + acl->seen |= GROUP_OBJ; + vals = &acl->group_obj; + vals->aent_type = GROUP_OBJ | acl->dfacl_flag; + } else { + acl->seen |= GROUP; + vals = acevals_find(acep, &acl->group, + &acl->numgroups); + if (vals == NULL) { + error = ENOMEM; + goto out; + } + vals->aent_type = GROUP | acl->dfacl_flag; + } + acl->state = ace_group; + } else { + if (acl->state > ace_user) { + error = ENOTSUP; + goto out; + } + acl->state = ace_user; + acl->seen |= USER; + vals = acevals_find(acep, &acl->user, + &acl->numusers); + if (vals == NULL) { + error = ENOMEM; + goto out; + } + vals->aent_type = USER | acl->dfacl_flag; + } + + if (!(acl->state > ace_unused)) { + error = EINVAL; + goto out; + } + + if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { + /* no more than one allowed per aclent_t */ + if (vals->allowed != ACE_MASK_UNDEFINED) { + error = ENOTSUP; + goto out; + } + vals->allowed = acep->a_access_mask; + } else { + /* + * it's a DENY; if there was a previous DENY, it + * must have been an ACL_MASK. + */ + if (vals->denied != ACE_MASK_UNDEFINED) { + /* ACL_MASK is for USER and GROUP only */ + if ((acl->state != ace_user) && + (acl->state != ace_group)) { + error = ENOTSUP; + goto out; + } + + if (! acl->hasmask) { + acl->hasmask = 1; + acl->acl_mask = vals->denied; + /* check for mismatched ACL_MASK emulations */ + } else if (acl->acl_mask != vals->denied) { + error = ENOTSUP; + goto out; + } + vals->mask = vals->denied; + } + vals->denied = acep->a_access_mask; + } + } + + /* done collating; produce the aclent_t lists */ + if (normacl->state != ace_unused) { + error = ace_list_to_aent(normacl, aclentp, aclcnt, + owner, group, isdir); + if (error != 0) { + goto out; + } + } + if (dfacl->state != ace_unused) { + error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt, + owner, group, isdir); + if (error != 0) { + goto out; + } + } + +out: + if (normacl != NULL) + ace_list_free(normacl); + if (dfacl != NULL) + ace_list_free(dfacl); + + return (error); +} + +static int +convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir, + uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt) +{ + int error = 0; + aclent_t *aclentp, *dfaclentp; + int aclcnt, dfaclcnt; + int aclsz, dfaclsz; + + error = ln_ace_to_aent(acebufp, acecnt, owner, group, + &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir); + + if (error) + return (error); + + + if (dfaclcnt != 0) { + /* + * Slap aclentp and dfaclentp into a single array. + */ + aclsz = sizeof (aclent_t) * aclcnt; + dfaclsz = sizeof (aclent_t) * dfaclcnt; + aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz); + if (aclentp != NULL) { + (void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz); + } else { + error = ENOMEM; + } + } + + if (aclentp) { + *retaclentp = aclentp; + *retaclcnt = aclcnt + dfaclcnt; + } + + if (dfaclentp) + cacl_free(dfaclentp, dfaclsz); + + return (error); +} + + +int +acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner, + gid_t group) +{ + int aclcnt; + void *acldata; + int error; + + /* + * See if we need to translate + */ + if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) || + (target_flavor == _ACL_ACLENT_ENABLED && + aclp->acl_type == ACLENT_T)) + return (0); + + if (target_flavor == -1) { + error = EINVAL; + goto out; + } + + if (target_flavor == _ACL_ACE_ENABLED && + aclp->acl_type == ACLENT_T) { + error = convert_aent_to_ace(aclp->acl_aclp, + aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt); + if (error) + goto out; + + } else if (target_flavor == _ACL_ACLENT_ENABLED && + aclp->acl_type == ACE_T) { + error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt, + isdir, owner, group, (aclent_t **)&acldata, &aclcnt); + if (error) + goto out; + } else { + error = ENOTSUP; + goto out; + } + + /* + * replace old acl with newly translated acl + */ + cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size); + aclp->acl_aclp = acldata; + aclp->acl_cnt = aclcnt; + if (target_flavor == _ACL_ACE_ENABLED) { + aclp->acl_type = ACE_T; + aclp->acl_entry_size = sizeof (ace_t); + } else { + aclp->acl_type = ACLENT_T; + aclp->acl_entry_size = sizeof (aclent_t); + } + return (0); + +out: + +#if !defined(_KERNEL) + errno = error; + return (-1); +#else + return (error); +#endif +} + +#define SET_ACE(acl, index, who, mask, type, flags) { \ + acl[0][index].a_who = (uint32_t)who; \ + acl[0][index].a_type = type; \ + acl[0][index].a_flags = flags; \ + acl[0][index++].a_access_mask = mask; \ +} + +void +acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone) +{ + *deny1 = *deny2 = *allow0 = *group = 0; + + if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH))) + *deny1 |= ACE_READ_DATA; + if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH))) + *deny1 |= ACE_WRITE_DATA; + if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH))) + *deny1 |= ACE_EXECUTE; + + if (!(mode & S_IRGRP) && (mode & S_IROTH)) + *deny2 = ACE_READ_DATA; + if (!(mode & S_IWGRP) && (mode & S_IWOTH)) + *deny2 |= ACE_WRITE_DATA; + if (!(mode & S_IXGRP) && (mode & S_IXOTH)) + *deny2 |= ACE_EXECUTE; + + if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH))) + *allow0 |= ACE_READ_DATA; + if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH))) + *allow0 |= ACE_WRITE_DATA; + if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH))) + *allow0 |= ACE_EXECUTE; + + *owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| + ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES| + ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE; + if (mode & S_IRUSR) + *owner |= ACE_READ_DATA; + if (mode & S_IWUSR) + *owner |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXUSR) + *owner |= ACE_EXECUTE; + + *group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IRGRP) + *group |= ACE_READ_DATA; + if (mode & S_IWGRP) + *group |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXGRP) + *group |= ACE_EXECUTE; + + *everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IROTH) + *everyone |= ACE_READ_DATA; + if (mode & S_IWOTH) + *everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXOTH) + *everyone |= ACE_EXECUTE; +} + +int +acl_trivial_create(mode_t mode, ace_t **acl, int *count) +{ + uint32_t deny1, deny2; + uint32_t allow0; + uint32_t owner, group, everyone; + int index = 0; + int error; + + *count = 3; + acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group, + &everyone); + + if (allow0) + (*count)++; + if (deny1) + (*count)++; + if (deny2) + (*count)++; + + if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0) + return (error); + + if (allow0) { + SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_OWNER); + } + if (deny1) { + SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE, + ACE_OWNER); + } + if (deny2) { + SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE, + ACE_GROUP|ACE_IDENTIFIER_GROUP); + } + + SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER); + SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_IDENTIFIER_GROUP|ACE_GROUP); + SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE, + ACE_EVERYONE); + + return (0); +} + +/* + * ace_trivial: + * determine whether an ace_t acl is trivial + * + * Trivialness implies that the acl is composed of only + * owner, group, everyone entries. ACL can't + * have read_acl denied, and write_owner/write_acl/write_attributes + * can only be owner@ entry. + */ +int +ace_trivial_common(void *acep, int aclcnt, + uint64_t (*walk)(void *, uint64_t, int aclcnt, + uint16_t *, uint16_t *, uint32_t *)) +{ + uint16_t flags; + uint32_t mask; + uint16_t type; + uint64_t cookie = 0; + + while (cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask)) { + switch (flags & ACE_TYPE_FLAGS) { + case ACE_OWNER: + case ACE_GROUP|ACE_IDENTIFIER_GROUP: + case ACE_EVERYONE: + break; + default: + return (1); + + } + + if (flags & (ACE_FILE_INHERIT_ACE| + ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE| + ACE_INHERIT_ONLY_ACE)) + return (1); + + /* + * Special check for some special bits + * + * Don't allow anybody to deny reading basic + * attributes or a files ACL. + */ + if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && + (type == ACE_ACCESS_DENIED_ACE_TYPE)) + return (1); + + /* + * Delete permissions are never set by default + */ + if (mask & (ACE_DELETE|ACE_DELETE_CHILD)) + return (1); + /* + * only allow owner@ to have + * write_acl/write_owner/write_attributes/write_xattr/ + */ + if (type == ACE_ACCESS_ALLOWED_ACE_TYPE && + (!(flags & ACE_OWNER) && (mask & + (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES| + ACE_WRITE_NAMED_ATTRS)))) + return (1); + + } + return (0); +} + +uint64_t +ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags, + uint16_t *type, uint32_t *mask) +{ + ace_t *acep = datap; + + if (cookie >= aclcnt) + return (0); + + *flags = acep[cookie].a_flags; + *type = acep[cookie].a_type; + *mask = acep[cookie++].a_access_mask; + + return (cookie); +} + +int +ace_trivial(ace_t *acep, int aclcnt) +{ + return (ace_trivial_common(acep, aclcnt, ace_walk)); +} diff --git a/common/acl/acl_common.h b/common/acl/acl_common.h new file mode 100644 index 000000000000..f76cbd3b450f --- /dev/null +++ b/common/acl/acl_common.h @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ACL_COMMON_H +#define _ACL_COMMON_H + +#include <sys/types.h> +#include <sys/acl.h> +#include <sys/stat.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern ace_t trivial_acl[6]; + +extern int acltrivial(const char *); +extern void adjust_ace_pair(ace_t *pair, mode_t mode); +extern void adjust_ace_pair_common(void *, size_t, size_t, mode_t); +extern int ace_trivial(ace_t *acep, int aclcnt); +extern int ace_trivial_common(void *, int, + uint64_t (*walk)(void *, uint64_t, int aclcnt, uint16_t *, uint16_t *, + uint32_t *mask)); +extern acl_t *acl_alloc(acl_type_t); +extern void acl_free(acl_t *aclp); +extern int acl_translate(acl_t *aclp, int target_flavor, + int isdir, uid_t owner, gid_t group); +void ksort(caddr_t v, int n, int s, int (*f)()); +int cmp2acls(void *a, void *b); +int acl_trivial_create(mode_t mode, ace_t **acl, int *count); +void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone); + +#ifdef __cplusplus +} +#endif + +#endif /* _ACL_COMMON_H */ diff --git a/common/atomic/amd64/atomic.s b/common/atomic/amd64/atomic.s new file mode 100644 index 000000000000..4b0d66e4db20 --- /dev/null +++ b/common/atomic/amd64/atomic.s @@ -0,0 +1,573 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + + .file "atomic.s" + +#include <sys/asm_linkage.h> + +#if defined(_KERNEL) + /* + * Legacy kernel interfaces; they will go away (eventually). + */ + ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function) + ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function) + ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function) + ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function) + ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function) + ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function) + ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function) +#endif + + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_uchar) + lock + incb (%rdi) + ret + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8) + + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_ushort) + lock + incw (%rdi) + ret + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16) + + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_uint) + lock + incl (%rdi) + ret + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32) + + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_ulong) + lock + incq (%rdi) + ret + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_64) + + ENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar_nv) + xorl %eax, %eax / clear upper bits of %eax return register + incb %al / %al = 1 + lock + xaddb %al, (%rdi) / %al = old value, (%rdi) = new value + incb %al / return new value + ret + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_8_nv) + + ENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort_nv) + xorl %eax, %eax / clear upper bits of %eax return register + incw %ax / %ax = 1 + lock + xaddw %ax, (%rdi) / %ax = old value, (%rdi) = new value + incw %ax / return new value + ret + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_16_nv) + + ENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint_nv) + xorl %eax, %eax / %eax = 0 + incl %eax / %eax = 1 + lock + xaddl %eax, (%rdi) / %eax = old value, (%rdi) = new value + incl %eax / return new value + ret + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_32_nv) + + ENTRY(atomic_inc_64_nv) + ALTENTRY(atomic_inc_ulong_nv) + xorq %rax, %rax / %rax = 0 + incq %rax / %rax = 1 + lock + xaddq %rax, (%rdi) / %rax = old value, (%rdi) = new value + incq %rax / return new value + ret + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_64_nv) + + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_uchar) + lock + decb (%rdi) + ret + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8) + + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_ushort) + lock + decw (%rdi) + ret + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16) + + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_uint) + lock + decl (%rdi) + ret + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32) + + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_ulong) + lock + decq (%rdi) + ret + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_64) + + ENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar_nv) + xorl %eax, %eax / clear upper bits of %eax return register + decb %al / %al = -1 + lock + xaddb %al, (%rdi) / %al = old value, (%rdi) = new value + decb %al / return new value + ret + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_8_nv) + + ENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort_nv) + xorl %eax, %eax / clear upper bits of %eax return register + decw %ax / %ax = -1 + lock + xaddw %ax, (%rdi) / %ax = old value, (%rdi) = new value + decw %ax / return new value + ret + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_16_nv) + + ENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint_nv) + xorl %eax, %eax / %eax = 0 + decl %eax / %eax = -1 + lock + xaddl %eax, (%rdi) / %eax = old value, (%rdi) = new value + decl %eax / return new value + ret + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_32_nv) + + ENTRY(atomic_dec_64_nv) + ALTENTRY(atomic_dec_ulong_nv) + xorq %rax, %rax / %rax = 0 + decq %rax / %rax = -1 + lock + xaddq %rax, (%rdi) / %rax = old value, (%rdi) = new value + decq %rax / return new value + ret + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_64_nv) + + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_char) + lock + addb %sil, (%rdi) + ret + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8) + + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_short) + lock + addw %si, (%rdi) + ret + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16) + + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_int) + lock + addl %esi, (%rdi) + ret + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32) + + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_long) + lock + addq %rsi, (%rdi) + ret + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_64) + + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_uchar) + lock + orb %sil, (%rdi) + ret + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8) + + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_ushort) + lock + orw %si, (%rdi) + ret + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16) + + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_uint) + lock + orl %esi, (%rdi) + ret + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32) + + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_ulong) + lock + orq %rsi, (%rdi) + ret + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_64) + + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_uchar) + lock + andb %sil, (%rdi) + ret + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8) + + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_ushort) + lock + andw %si, (%rdi) + ret + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16) + + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_uint) + lock + andl %esi, (%rdi) + ret + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32) + + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_ulong) + lock + andq %rsi, (%rdi) + ret + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char_nv) + movzbl %sil, %eax / %al = delta addend, clear upper bits + lock + xaddb %sil, (%rdi) / %sil = old value, (%rdi) = sum + addb %sil, %al / new value = original value + delta + ret + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_8_nv) + + ENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short_nv) + movzwl %si, %eax / %ax = delta addend, clean upper bits + lock + xaddw %si, (%rdi) / %si = old value, (%rdi) = sum + addw %si, %ax / new value = original value + delta + ret + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_16_nv) + + ENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int_nv) + mov %esi, %eax / %eax = delta addend + lock + xaddl %esi, (%rdi) / %esi = old value, (%rdi) = sum + add %esi, %eax / new value = original value + delta + ret + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_32_nv) + + ENTRY(atomic_add_64_nv) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long_nv) + mov %rsi, %rax / %rax = delta addend + lock + xaddq %rsi, (%rdi) / %rsi = old value, (%rdi) = sum + addq %rsi, %rax / new value = original value + delta + ret + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_64_nv) + + ENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar_nv) + movb (%rdi), %al / %al = old value +1: + movb %sil, %cl + andb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%rdi) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort_nv) + movw (%rdi), %ax / %ax = old value +1: + movw %si, %cx + andw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%rdi) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_16_nv) + + ENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + andl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_32_nv) + + ENTRY(atomic_and_64_nv) + ALTENTRY(atomic_and_ulong_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + andq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_64_nv) + + ENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar_nv) + movb (%rdi), %al / %al = old value +1: + movb %sil, %cl + orb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%rdi) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_8_nv) + + ENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort_nv) + movw (%rdi), %ax / %ax = old value +1: + movw %si, %cx + orw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%rdi) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_16_nv) + + ENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint_nv) + movl (%rdi), %eax +1: + movl %esi, %ecx + orl %eax, %ecx + lock + cmpxchgl %ecx, (%rdi) + jne 1b + movl %ecx, %eax + ret + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_32_nv) + + ENTRY(atomic_or_64_nv) + ALTENTRY(atomic_or_ulong_nv) + movq (%rdi), %rax +1: + movq %rsi, %rcx + orq %rax, %rcx + lock + cmpxchgq %rcx, (%rdi) + jne 1b + movq %rcx, %rax + ret + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_64_nv) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + movzbl %sil, %eax + lock + cmpxchgb %dl, (%rdi) + ret + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + movzwl %si, %eax + lock + cmpxchgw %dx, (%rdi) + ret + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + movl %esi, %eax + lock + cmpxchgl %edx, (%rdi) + ret + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + ALTENTRY(atomic_cas_ulong) + ALTENTRY(atomic_cas_ptr) + movq %rsi, %rax + lock + cmpxchgq %rdx, (%rdi) + ret + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + movzbl %sil, %eax + lock + xchgb %al, (%rdi) + ret + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + movzwl %si, %eax + lock + xchgw %ax, (%rdi) + ret + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + movl %esi, %eax + lock + xchgl %eax, (%rdi) + ret + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + ALTENTRY(atomic_swap_ulong) + ALTENTRY(atomic_swap_ptr) + movq %rsi, %rax + lock + xchgq %rax, (%rdi) + ret + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + xorl %eax, %eax + lock + btsq %rsi, (%rdi) + jnc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + xorl %eax, %eax + lock + btrq %rsi, (%rdi) + jc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_clear_long_excl) + +#if !defined(_KERNEL) + + /* + * NOTE: membar_enter, and membar_exit are identical routines. + * We define them separately, instead of using an ALTENTRY + * definitions to alias them together, so that DTrace and + * debuggers will see a unique address for them, allowing + * more accurate tracing. + */ + + ENTRY(membar_enter) + mfence + ret + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + mfence + ret + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + sfence + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lfence + ret + SET_SIZE(membar_consumer) + +#endif /* !_KERNEL */ diff --git a/common/atomic/i386/atomic.s b/common/atomic/i386/atomic.s new file mode 100644 index 000000000000..4fa525ba20af --- /dev/null +++ b/common/atomic/i386/atomic.s @@ -0,0 +1,720 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atomic.s" + +#include <sys/asm_linkage.h> + +#if defined(_KERNEL) + /* + * Legacy kernel interfaces; they will go away (eventually). + */ + ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function) + ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function) + ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function) + ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function) + ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function) + ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function) + ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function) +#endif + + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_uchar) + movl 4(%esp), %eax + lock + incb (%eax) + ret + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8) + + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_ushort) + movl 4(%esp), %eax + lock + incw (%eax) + ret + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16) + + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_uint) + ALTENTRY(atomic_inc_ulong) + movl 4(%esp), %eax + lock + incl (%eax) + ret + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32) + + ENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / clear upper bits of %eax + incb %al / %al = 1 + lock + xaddb %al, (%edx) / %al = old value, inc (%edx) + incb %al / return new value + ret + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_8_nv) + + ENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / clear upper bits of %eax + incw %ax / %ax = 1 + lock + xaddw %ax, (%edx) / %ax = old value, inc (%edx) + incw %ax / return new value + ret + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_16_nv) + + ENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint_nv) + ALTENTRY(atomic_inc_ulong_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / %eax = 0 + incl %eax / %eax = 1 + lock + xaddl %eax, (%edx) / %eax = old value, inc (%edx) + incl %eax / return new value + ret + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_32_nv) + + /* + * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever + * separated, you need to also edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_64_nv. + */ + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + xorl %ebx, %ebx + xorl %ecx, %ecx + incl %ebx / %ecx:%ebx = 1 + addl %eax, %ebx + adcl %edx, %ecx / add in the carry from inc + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_inc_64_nv) + SET_SIZE(atomic_inc_64) + + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_uchar) + movl 4(%esp), %eax + lock + decb (%eax) + ret + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8) + + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_ushort) + movl 4(%esp), %eax + lock + decw (%eax) + ret + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16) + + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_uint) + ALTENTRY(atomic_dec_ulong) + movl 4(%esp), %eax + lock + decl (%eax) + ret + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32) + + ENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / zero upper bits of %eax + decb %al / %al = -1 + lock + xaddb %al, (%edx) / %al = old value, dec (%edx) + decb %al / return new value + ret + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_8_nv) + + ENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / zero upper bits of %eax + decw %ax / %ax = -1 + lock + xaddw %ax, (%edx) / %ax = old value, dec (%edx) + decw %ax / return new value + ret + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_16_nv) + + ENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint_nv) + ALTENTRY(atomic_dec_ulong_nv) + movl 4(%esp), %edx / %edx = target address + xorl %eax, %eax / %eax = 0 + decl %eax / %eax = -1 + lock + xaddl %eax, (%edx) / %eax = old value, dec (%edx) + decl %eax / return new value + ret + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_32_nv) + + /* + * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_64_nv. + */ + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + xorl %ebx, %ebx + xorl %ecx, %ecx + not %ecx + not %ebx / %ecx:%ebx = -1 + addl %eax, %ebx + adcl %edx, %ecx / add in the carry from inc + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_dec_64_nv) + SET_SIZE(atomic_dec_64) + + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_char) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addb %cl, (%eax) + ret + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8) + + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_short) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addw %cx, (%eax) + ret + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16) + + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_int) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_long) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + addl %ecx, (%eax) + ret + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32) + + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_uchar) + movl 4(%esp), %eax + movb 8(%esp), %cl + lock + orb %cl, (%eax) + ret + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8) + + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_ushort) + movl 4(%esp), %eax + movw 8(%esp), %cx + lock + orw %cx, (%eax) + ret + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16) + + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_uint) + ALTENTRY(atomic_or_ulong) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + orl %ecx, (%eax) + ret + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32) + + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_uchar) + movl 4(%esp), %eax + movb 8(%esp), %cl + lock + andb %cl, (%eax) + ret + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8) + + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_ushort) + movl 4(%esp), %eax + movw 8(%esp), %cx + lock + andw %cx, (%eax) + ret + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16) + + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_uint) + ALTENTRY(atomic_and_ulong) + movl 4(%esp), %eax + movl 8(%esp), %ecx + lock + andl %ecx, (%eax) + ret + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32) + + ENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char_nv) + movl 4(%esp), %edx / %edx = target address + movb 8(%esp), %cl / %cl = delta + movzbl %cl, %eax / %al = delta, zero extended + lock + xaddb %cl, (%edx) / %cl = old value, (%edx) = sum + addb %cl, %al / return old value plus delta + ret + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_8_nv) + + ENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short_nv) + movl 4(%esp), %edx / %edx = target address + movw 8(%esp), %cx / %cx = delta + movzwl %cx, %eax / %ax = delta, zero extended + lock + xaddw %cx, (%edx) / %cx = old value, (%edx) = sum + addw %cx, %ax / return old value plus delta + ret + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_16_nv) + + ENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int_nv) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long_nv) + movl 4(%esp), %edx / %edx = target address + movl 8(%esp), %eax / %eax = delta + movl %eax, %ecx / %ecx = delta + lock + xaddl %eax, (%edx) / %eax = old value, (%edx) = sum + addl %ecx, %eax / return old value plus delta + ret + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_32_nv) + + /* + * NOTE: If atomic_add_64 and atomic_add_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_64_nv. + */ + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx / %ecx:%ebx = delta + addl %eax, %ebx + adcl %edx, %ecx / %ecx:%ebx = new value + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_add_64_nv) + SET_SIZE(atomic_add_64) + + ENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar_nv) + movl 4(%esp), %edx / %edx = target address + movb (%edx), %al / %al = old value +1: + movl 8(%esp), %ecx / %ecx = delta + orb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%edx) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_8_nv) + + ENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort_nv) + movl 4(%esp), %edx / %edx = target address + movw (%edx), %ax / %ax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + orw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%edx) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_16_nv) + + ENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint_nv) + ALTENTRY(atomic_or_ulong_nv) + movl 4(%esp), %edx / %edx = target address + movl (%edx), %eax / %eax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + orl %eax, %ecx / %ecx = new value + lock + cmpxchgl %ecx, (%edx) / try to stick it in + jne 1b + movl %ecx, %eax / return new value + ret + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_32_nv) + + /* + * NOTE: If atomic_or_64 and atomic_or_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_64_nv. + */ + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx / %ecx:%ebx = delta + orl %eax, %ebx + orl %edx, %ecx / %ecx:%ebx = new value + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_or_64_nv) + SET_SIZE(atomic_or_64) + + ENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar_nv) + movl 4(%esp), %edx / %edx = target address + movb (%edx), %al / %al = old value +1: + movl 8(%esp), %ecx / %ecx = delta + andb %al, %cl / %cl = new value + lock + cmpxchgb %cl, (%edx) / try to stick it in + jne 1b + movzbl %cl, %eax / return new value + ret + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_8_nv) + + ENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort_nv) + movl 4(%esp), %edx / %edx = target address + movw (%edx), %ax / %ax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + andw %ax, %cx / %cx = new value + lock + cmpxchgw %cx, (%edx) / try to stick it in + jne 1b + movzwl %cx, %eax / return new value + ret + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_16_nv) + + ENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint_nv) + ALTENTRY(atomic_and_ulong_nv) + movl 4(%esp), %edx / %edx = target address + movl (%edx), %eax / %eax = old value +1: + movl 8(%esp), %ecx / %ecx = delta + andl %eax, %ecx / %ecx = new value + lock + cmpxchgl %ecx, (%edx) / try to stick it in + jne 1b + movl %ecx, %eax / return new value + ret + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_32_nv) + + /* + * NOTE: If atomic_and_64 and atomic_and_64_nv are ever + * separated, it is important to edit the libc i386 platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_64_nv. + */ + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_64_nv) + pushl %edi + pushl %ebx + movl 12(%esp), %edi / %edi = target address + movl (%edi), %eax + movl 4(%edi), %edx / %edx:%eax = old value +1: + movl 16(%esp), %ebx + movl 20(%esp), %ecx / %ecx:%ebx = delta + andl %eax, %ebx + andl %edx, %ecx / %ecx:%ebx = new value + lock + cmpxchg8b (%edi) / try to stick it in + jne 1b + movl %ebx, %eax + movl %ecx, %edx / return new value + popl %ebx + popl %edi + ret + SET_SIZE(atomic_and_64_nv) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + movl 4(%esp), %edx + movzbl 8(%esp), %eax + movb 12(%esp), %cl + lock + cmpxchgb %cl, (%edx) + ret + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + movl 4(%esp), %edx + movzwl 8(%esp), %eax + movw 12(%esp), %cx + lock + cmpxchgw %cx, (%edx) + ret + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + ALTENTRY(atomic_cas_ulong) + ALTENTRY(atomic_cas_ptr) + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + lock + cmpxchgl %ecx, (%edx) + ret + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + pushl %ebx + pushl %esi + movl 12(%esp), %esi + movl 16(%esp), %eax + movl 20(%esp), %edx + movl 24(%esp), %ebx + movl 28(%esp), %ecx + lock + cmpxchg8b (%esi) + popl %esi + popl %ebx + ret + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + movl 4(%esp), %edx + movzbl 8(%esp), %eax + lock + xchgb %al, (%edx) + ret + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + movl 4(%esp), %edx + movzwl 8(%esp), %eax + lock + xchgw %ax, (%edx) + ret + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + ALTENTRY(atomic_swap_ptr) + ALTENTRY(atomic_swap_ulong) + movl 4(%esp), %edx + movl 8(%esp), %eax + lock + xchgl %eax, (%edx) + ret + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + pushl %esi + pushl %ebx + movl 12(%esp), %esi + movl 16(%esp), %ebx + movl 20(%esp), %ecx + movl (%esi), %eax + movl 4(%esi), %edx / %edx:%eax = old value +1: + lock + cmpxchg8b (%esi) + jne 1b + popl %ebx + popl %esi + ret + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + movl 4(%esp), %edx / %edx = target address + movl 8(%esp), %ecx / %ecx = bit id + xorl %eax, %eax + lock + btsl %ecx, (%edx) + jnc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + movl 4(%esp), %edx / %edx = target address + movl 8(%esp), %ecx / %ecx = bit id + xorl %eax, %eax + lock + btrl %ecx, (%edx) + jc 1f + decl %eax / return -1 +1: + ret + SET_SIZE(atomic_clear_long_excl) + +#if !defined(_KERNEL) + + /* + * NOTE: membar_enter, membar_exit, membar_producer, and + * membar_consumer are all identical routines. We define them + * separately, instead of using ALTENTRY definitions to alias them + * together, so that DTrace and debuggers will see a unique address + * for them, allowing more accurate tracing. + */ + + + ENTRY(membar_enter) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lock + xorl $0, (%esp) + ret + SET_SIZE(membar_consumer) + +#endif /* !_KERNEL */ diff --git a/common/atomic/sparc/atomic.s b/common/atomic/sparc/atomic.s new file mode 100644 index 000000000000..8aa240efa297 --- /dev/null +++ b/common/atomic/sparc/atomic.s @@ -0,0 +1,801 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atomic.s" + +#include <sys/asm_linkage.h> + +#if defined(_KERNEL) + /* + * Legacy kernel interfaces; they will go away (eventually). + */ + ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function) + ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function) + ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function) + ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function) + ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function) + ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function) + ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function) + ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function) +#endif + + /* + * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_8_nv. + */ + ENTRY(atomic_inc_8) + ALTENTRY(atomic_inc_8_nv) + ALTENTRY(atomic_inc_uchar) + ALTENTRY(atomic_inc_uchar_nv) + ba add_8 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_uchar_nv) + SET_SIZE(atomic_inc_uchar) + SET_SIZE(atomic_inc_8_nv) + SET_SIZE(atomic_inc_8) + + /* + * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_8_nv. + */ + ENTRY(atomic_dec_8) + ALTENTRY(atomic_dec_8_nv) + ALTENTRY(atomic_dec_uchar) + ALTENTRY(atomic_dec_uchar_nv) + ba add_8 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_uchar_nv) + SET_SIZE(atomic_dec_uchar) + SET_SIZE(atomic_dec_8_nv) + SET_SIZE(atomic_dec_8) + + /* + * NOTE: If atomic_add_8 and atomic_add_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_8_nv. + */ + ENTRY(atomic_add_8) + ALTENTRY(atomic_add_8_nv) + ALTENTRY(atomic_add_char) + ALTENTRY(atomic_add_char_nv) +add_8: + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + add %o2, %o1, %o5 ! add value to the old value + and %o5, %o3, %o5 ! clear other bits + andn %o2, %o3, %o4 ! clear target bits + or %o4, %o5, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + add %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_add_char_nv) + SET_SIZE(atomic_add_char) + SET_SIZE(atomic_add_8_nv) + SET_SIZE(atomic_add_8) + + /* + * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_16_nv. + */ + ENTRY(atomic_inc_16) + ALTENTRY(atomic_inc_16_nv) + ALTENTRY(atomic_inc_ushort) + ALTENTRY(atomic_inc_ushort_nv) + ba add_16 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_ushort_nv) + SET_SIZE(atomic_inc_ushort) + SET_SIZE(atomic_inc_16_nv) + SET_SIZE(atomic_inc_16) + + /* + * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_16_nv. + */ + ENTRY(atomic_dec_16) + ALTENTRY(atomic_dec_16_nv) + ALTENTRY(atomic_dec_ushort) + ALTENTRY(atomic_dec_ushort_nv) + ba add_16 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_ushort_nv) + SET_SIZE(atomic_dec_ushort) + SET_SIZE(atomic_dec_16_nv) + SET_SIZE(atomic_dec_16) + + /* + * NOTE: If atomic_add_16 and atomic_add_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_16_nv. + */ + ENTRY(atomic_add_16) + ALTENTRY(atomic_add_16_nv) + ALTENTRY(atomic_add_short) + ALTENTRY(atomic_add_short_nv) +add_16: + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + add %o1, %o2, %o5 ! add value to the old value + and %o5, %o3, %o5 ! clear other bits + andn %o2, %o3, %o4 ! clear target bits + or %o4, %o5, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + add %o1, %o2, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_add_short_nv) + SET_SIZE(atomic_add_short) + SET_SIZE(atomic_add_16_nv) + SET_SIZE(atomic_add_16) + + /* + * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_32_nv. + */ + ENTRY(atomic_inc_32) + ALTENTRY(atomic_inc_32_nv) + ALTENTRY(atomic_inc_uint) + ALTENTRY(atomic_inc_uint_nv) + ALTENTRY(atomic_inc_ulong) + ALTENTRY(atomic_inc_ulong_nv) + ba add_32 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_ulong_nv) + SET_SIZE(atomic_inc_ulong) + SET_SIZE(atomic_inc_uint_nv) + SET_SIZE(atomic_inc_uint) + SET_SIZE(atomic_inc_32_nv) + SET_SIZE(atomic_inc_32) + + /* + * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_32_nv. + */ + ENTRY(atomic_dec_32) + ALTENTRY(atomic_dec_32_nv) + ALTENTRY(atomic_dec_uint) + ALTENTRY(atomic_dec_uint_nv) + ALTENTRY(atomic_dec_ulong) + ALTENTRY(atomic_dec_ulong_nv) + ba add_32 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_ulong_nv) + SET_SIZE(atomic_dec_ulong) + SET_SIZE(atomic_dec_uint_nv) + SET_SIZE(atomic_dec_uint) + SET_SIZE(atomic_dec_32_nv) + SET_SIZE(atomic_dec_32) + + /* + * NOTE: If atomic_add_32 and atomic_add_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_32_nv. + */ + ENTRY(atomic_add_32) + ALTENTRY(atomic_add_32_nv) + ALTENTRY(atomic_add_int) + ALTENTRY(atomic_add_int_nv) + ALTENTRY(atomic_add_ptr) + ALTENTRY(atomic_add_ptr_nv) + ALTENTRY(atomic_add_long) + ALTENTRY(atomic_add_long_nv) +add_32: + ld [%o0], %o2 +1: + add %o2, %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + add %o2, %o1, %o0 ! return new value + SET_SIZE(atomic_add_long_nv) + SET_SIZE(atomic_add_long) + SET_SIZE(atomic_add_ptr_nv) + SET_SIZE(atomic_add_ptr) + SET_SIZE(atomic_add_int_nv) + SET_SIZE(atomic_add_int) + SET_SIZE(atomic_add_32_nv) + SET_SIZE(atomic_add_32) + + /* + * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_inc_64_nv. + */ + ENTRY(atomic_inc_64) + ALTENTRY(atomic_inc_64_nv) + ba add_64 + add %g0, 1, %o1 + SET_SIZE(atomic_inc_64_nv) + SET_SIZE(atomic_inc_64) + + /* + * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_dec_64_nv. + */ + ENTRY(atomic_dec_64) + ALTENTRY(atomic_dec_64_nv) + ba add_64 + sub %g0, 1, %o1 + SET_SIZE(atomic_dec_64_nv) + SET_SIZE(atomic_dec_64) + + /* + * NOTE: If atomic_add_64 and atomic_add_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_add_64_nv. + */ + ENTRY(atomic_add_64) + ALTENTRY(atomic_add_64_nv) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit +add_64: + ldx [%o0], %o2 +1: + add %o2, %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + add %o2, %o1, %o1 ! return lower 32-bits in %o1 + retl + srlx %o1, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_add_64_nv) + SET_SIZE(atomic_add_64) + + /* + * NOTE: If atomic_or_8 and atomic_or_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_8_nv. + */ + ENTRY(atomic_or_8) + ALTENTRY(atomic_or_8_nv) + ALTENTRY(atomic_or_uchar) + ALTENTRY(atomic_or_uchar_nv) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + or %o2, %o1, %o5 ! or in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + or %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_or_uchar_nv) + SET_SIZE(atomic_or_uchar) + SET_SIZE(atomic_or_8_nv) + SET_SIZE(atomic_or_8) + + /* + * NOTE: If atomic_or_16 and atomic_or_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_16_nv. + */ + ENTRY(atomic_or_16) + ALTENTRY(atomic_or_16_nv) + ALTENTRY(atomic_or_ushort) + ALTENTRY(atomic_or_ushort_nv) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + or %o2, %o1, %o5 ! or in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + or %o2, %o1, %o5 ! or in the new value + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_or_ushort_nv) + SET_SIZE(atomic_or_ushort) + SET_SIZE(atomic_or_16_nv) + SET_SIZE(atomic_or_16) + + /* + * NOTE: If atomic_or_32 and atomic_or_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_32_nv. + */ + ENTRY(atomic_or_32) + ALTENTRY(atomic_or_32_nv) + ALTENTRY(atomic_or_uint) + ALTENTRY(atomic_or_uint_nv) + ALTENTRY(atomic_or_ulong) + ALTENTRY(atomic_or_ulong_nv) + ld [%o0], %o2 +1: + or %o2, %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + or %o2, %o1, %o0 ! return new value + SET_SIZE(atomic_or_ulong_nv) + SET_SIZE(atomic_or_ulong) + SET_SIZE(atomic_or_uint_nv) + SET_SIZE(atomic_or_uint) + SET_SIZE(atomic_or_32_nv) + SET_SIZE(atomic_or_32) + + /* + * NOTE: If atomic_or_64 and atomic_or_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_or_64_nv. + */ + ENTRY(atomic_or_64) + ALTENTRY(atomic_or_64_nv) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit + ldx [%o0], %o2 +1: + or %o2, %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + or %o2, %o1, %o1 ! return lower 32-bits in %o1 + retl + srlx %o1, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_or_64_nv) + SET_SIZE(atomic_or_64) + + /* + * NOTE: If atomic_and_8 and atomic_and_8_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_8_nv. + */ + ENTRY(atomic_and_8) + ALTENTRY(atomic_and_8_nv) + ALTENTRY(atomic_and_uchar) + ALTENTRY(atomic_and_uchar_nv) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + orn %o1, %o3, %o1 ! all ones in other bytes + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + and %o2, %o1, %o5 ! and in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_and_uchar_nv) + SET_SIZE(atomic_and_uchar) + SET_SIZE(atomic_and_8_nv) + SET_SIZE(atomic_and_8) + + /* + * NOTE: If atomic_and_16 and atomic_and_16_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_16_nv. + */ + ENTRY(atomic_and_16) + ALTENTRY(atomic_and_16_nv) + ALTENTRY(atomic_and_ushort) + ALTENTRY(atomic_and_ushort_nv) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + orn %o1, %o3, %o1 ! all ones in the other half + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + and %o2, %o1, %o5 ! and in the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o2, %o1, %o5 + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = new value + SET_SIZE(atomic_and_ushort_nv) + SET_SIZE(atomic_and_ushort) + SET_SIZE(atomic_and_16_nv) + SET_SIZE(atomic_and_16) + + /* + * NOTE: If atomic_and_32 and atomic_and_32_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_32_nv. + */ + ENTRY(atomic_and_32) + ALTENTRY(atomic_and_32_nv) + ALTENTRY(atomic_and_uint) + ALTENTRY(atomic_and_uint_nv) + ALTENTRY(atomic_and_ulong) + ALTENTRY(atomic_and_ulong_nv) + ld [%o0], %o2 +1: + and %o2, %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + and %o2, %o1, %o0 ! return new value + SET_SIZE(atomic_and_ulong_nv) + SET_SIZE(atomic_and_ulong) + SET_SIZE(atomic_and_uint_nv) + SET_SIZE(atomic_and_uint) + SET_SIZE(atomic_and_32_nv) + SET_SIZE(atomic_and_32) + + /* + * NOTE: If atomic_and_64 and atomic_and_64_nv are ever + * separated, you need to also edit the libc sparc platform + * specific mapfile and remove the NODYNSORT attribute + * from atomic_and_64_nv. + */ + ENTRY(atomic_and_64) + ALTENTRY(atomic_and_64_nv) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit + ldx [%o0], %o2 +1: + and %o2, %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + and %o2, %o1, %o1 ! return lower 32-bits in %o1 + retl + srlx %o1, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_and_64_nv) + SET_SIZE(atomic_and_64) + + ENTRY(atomic_cas_8) + ALTENTRY(atomic_cas_uchar) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + sll %o2, %g1, %o2 ! %o2 = shifted to bit offset + and %o2, %o3, %o2 ! %o2 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o4 ! read old value +1: + andn %o4, %o3, %o4 ! clear target bits + or %o4, %o2, %o5 ! insert the new value + or %o4, %o1, %o4 ! insert the comparison value + cas [%o0], %o4, %o5 + cmp %o4, %o5 ! did we succeed? + be,pt %icc, 2f + and %o5, %o3, %o4 ! isolate the old value + cmp %o1, %o4 ! should we have succeeded? + be,a,pt %icc, 1b ! yes, try again + mov %o5, %o4 ! %o4 = old value +2: + retl + srl %o4, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_cas_uchar) + SET_SIZE(atomic_cas_8) + + ENTRY(atomic_cas_16) + ALTENTRY(atomic_cas_ushort) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + sll %o2, %g1, %o2 ! %o2 = shifted to bit offset + and %o2, %o3, %o2 ! %o2 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o4 ! read old value +1: + andn %o4, %o3, %o4 ! clear target bits + or %o4, %o2, %o5 ! insert the new value + or %o4, %o1, %o4 ! insert the comparison value + cas [%o0], %o4, %o5 + cmp %o4, %o5 ! did we succeed? + be,pt %icc, 2f + and %o5, %o3, %o4 ! isolate the old value + cmp %o1, %o4 ! should we have succeeded? + be,a,pt %icc, 1b ! yes, try again + mov %o5, %o4 ! %o4 = old value +2: + retl + srl %o4, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_cas_ushort) + SET_SIZE(atomic_cas_16) + + ENTRY(atomic_cas_32) + ALTENTRY(atomic_cas_uint) + ALTENTRY(atomic_cas_ptr) + ALTENTRY(atomic_cas_ulong) + cas [%o0], %o1, %o2 + retl + mov %o2, %o0 + SET_SIZE(atomic_cas_ulong) + SET_SIZE(atomic_cas_ptr) + SET_SIZE(atomic_cas_uint) + SET_SIZE(atomic_cas_32) + + ENTRY(atomic_cas_64) + sllx %o1, 32, %o1 ! cmp's upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 ! convert 2 32-bit args into 1 64-bit + add %o1, %o2, %o1 + sllx %o3, 32, %o2 ! newval upper 32 in %o3, lower in %o4 + srl %o4, 0, %o4 ! setup %o2 to have newval + add %o2, %o4, %o2 + casx [%o0], %o1, %o2 + srl %o2, 0, %o1 ! return lower 32-bits in %o1 + retl + srlx %o2, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_cas_64) + + ENTRY(atomic_swap_8) + ALTENTRY(atomic_swap_uchar) + and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + set 0xff, %o3 ! %o3 = mask + sll %o3, %g1, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single byte value + andn %o0, 0x3, %o0 ! %o0 = word address + ld [%o0], %o2 ! read old value +1: + andn %o2, %o3, %o5 ! clear target bits + or %o5, %o1, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_swap_uchar) + SET_SIZE(atomic_swap_8) + + ENTRY(atomic_swap_16) + ALTENTRY(atomic_swap_ushort) + and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right + xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left + sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right + sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left + sethi %hi(0xffff0000), %o3 ! %o3 = mask + srl %o3, %o4, %o3 ! %o3 = shifted to bit offset + sll %o1, %g1, %o1 ! %o1 = shifted to bit offset + and %o1, %o3, %o1 ! %o1 = single short value + andn %o0, 0x2, %o0 ! %o0 = word address + ! if low-order bit is 1, we will properly get an alignment fault here + ld [%o0], %o2 ! read old value +1: + andn %o2, %o3, %o5 ! clear target bits + or %o5, %o1, %o5 ! insert the new value + cas [%o0], %o2, %o5 + cmp %o2, %o5 + bne,a,pn %icc, 1b + mov %o5, %o2 ! %o2 = old value + and %o5, %o3, %o5 + retl + srl %o5, %g1, %o0 ! %o0 = old value + SET_SIZE(atomic_swap_ushort) + SET_SIZE(atomic_swap_16) + + ENTRY(atomic_swap_32) + ALTENTRY(atomic_swap_uint) + ALTENTRY(atomic_swap_ptr) + ALTENTRY(atomic_swap_ulong) + ld [%o0], %o2 +1: + mov %o1, %o3 + cas [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %icc, 1b + mov %o3, %o2 + retl + mov %o3, %o0 + SET_SIZE(atomic_swap_ulong) + SET_SIZE(atomic_swap_ptr) + SET_SIZE(atomic_swap_uint) + SET_SIZE(atomic_swap_32) + + ENTRY(atomic_swap_64) + sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2 + srl %o2, 0, %o2 + add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit + ldx [%o0], %o2 +1: + mov %o1, %o3 + casx [%o0], %o2, %o3 + cmp %o2, %o3 + bne,a,pn %xcc, 1b + mov %o3, %o2 + srl %o3, 0, %o1 ! return lower 32-bits in %o1 + retl + srlx %o3, 32, %o0 ! return upper 32-bits in %o0 + SET_SIZE(atomic_swap_64) + + ENTRY(atomic_set_long_excl) + mov 1, %o3 + slln %o3, %o1, %o3 + ldn [%o0], %o2 +1: + andcc %o2, %o3, %g0 ! test if the bit is set + bnz,a,pn %ncc, 2f ! if so, then fail out + mov -1, %o0 + or %o2, %o3, %o4 ! set the bit, and try to commit it + casn [%o0], %o2, %o4 + cmp %o2, %o4 + bne,a,pn %ncc, 1b ! failed to commit, try again + mov %o4, %o2 + mov %g0, %o0 +2: + retl + nop + SET_SIZE(atomic_set_long_excl) + + ENTRY(atomic_clear_long_excl) + mov 1, %o3 + slln %o3, %o1, %o3 + ldn [%o0], %o2 +1: + andncc %o3, %o2, %g0 ! test if the bit is clear + bnz,a,pn %ncc, 2f ! if so, then fail out + mov -1, %o0 + andn %o2, %o3, %o4 ! clear the bit, and try to commit it + casn [%o0], %o2, %o4 + cmp %o2, %o4 + bne,a,pn %ncc, 1b ! failed to commit, try again + mov %o4, %o2 + mov %g0, %o0 +2: + retl + nop + SET_SIZE(atomic_clear_long_excl) + +#if !defined(_KERNEL) + + /* + * Spitfires and Blackbirds have a problem with membars in the + * delay slot (SF_ERRATA_51). For safety's sake, we assume + * that the whole world needs the workaround. + */ + ENTRY(membar_enter) + membar #StoreLoad|#StoreStore + retl + nop + SET_SIZE(membar_enter) + + ENTRY(membar_exit) + membar #LoadStore|#StoreStore + retl + nop + SET_SIZE(membar_exit) + + ENTRY(membar_producer) + membar #StoreStore + retl + nop + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + membar #LoadLoad + retl + nop + SET_SIZE(membar_consumer) + +#endif /* !_KERNEL */ diff --git a/common/list/list.c b/common/list/list.c new file mode 100644 index 000000000000..94f7782a87d2 --- /dev/null +++ b/common/list/list.c @@ -0,0 +1,251 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Generic doubly-linked list implementation + */ + +#include <sys/list.h> +#include <sys/list_impl.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#ifdef _KERNEL +#include <sys/debug.h> +#else +#include <assert.h> +#define ASSERT(a) assert(a) +#endif + +#ifdef lint +extern list_node_t *list_d2l(list_t *list, void *obj); +#else +#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset)) +#endif +#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset)) +#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head) + +#define list_insert_after_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_prev = (node); \ + lnew->list_next = (node)->list_next; \ + (node)->list_next->list_prev = lnew; \ + (node)->list_next = lnew; \ +} + +#define list_insert_before_node(list, node, object) { \ + list_node_t *lnew = list_d2l(list, object); \ + lnew->list_next = (node); \ + lnew->list_prev = (node)->list_prev; \ + (node)->list_prev->list_next = lnew; \ + (node)->list_prev = lnew; \ +} + +#define list_remove_node(node) \ + (node)->list_prev->list_next = (node)->list_next; \ + (node)->list_next->list_prev = (node)->list_prev; \ + (node)->list_next = (node)->list_prev = NULL + +void +list_create(list_t *list, size_t size, size_t offset) +{ + ASSERT(list); + ASSERT(size > 0); + ASSERT(size >= offset + sizeof (list_node_t)); + + list->list_size = size; + list->list_offset = offset; + list->list_head.list_next = list->list_head.list_prev = + &list->list_head; +} + +void +list_destroy(list_t *list) +{ + list_node_t *node = &list->list_head; + + ASSERT(list); + ASSERT(list->list_head.list_next == node); + ASSERT(list->list_head.list_prev == node); + + node->list_next = node->list_prev = NULL; +} + +void +list_insert_after(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_head(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_after_node(list, lold, nobject); + } +} + +void +list_insert_before(list_t *list, void *object, void *nobject) +{ + if (object == NULL) { + list_insert_tail(list, nobject); + } else { + list_node_t *lold = list_d2l(list, object); + list_insert_before_node(list, lold, nobject); + } +} + +void +list_insert_head(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_after_node(list, lold, object); +} + +void +list_insert_tail(list_t *list, void *object) +{ + list_node_t *lold = &list->list_head; + list_insert_before_node(list, lold, object); +} + +void +list_remove(list_t *list, void *object) +{ + list_node_t *lold = list_d2l(list, object); + ASSERT(!list_empty(list)); + ASSERT(lold->list_next != NULL); + list_remove_node(lold); +} + +void * +list_remove_head(list_t *list) +{ + list_node_t *head = list->list_head.list_next; + if (head == &list->list_head) + return (NULL); + list_remove_node(head); + return (list_object(list, head)); +} + +void * +list_remove_tail(list_t *list) +{ + list_node_t *tail = list->list_head.list_prev; + if (tail == &list->list_head) + return (NULL); + list_remove_node(tail); + return (list_object(list, tail)); +} + +void * +list_head(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_next)); +} + +void * +list_tail(list_t *list) +{ + if (list_empty(list)) + return (NULL); + return (list_object(list, list->list_head.list_prev)); +} + +void * +list_next(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_next != &list->list_head) + return (list_object(list, node->list_next)); + + return (NULL); +} + +void * +list_prev(list_t *list, void *object) +{ + list_node_t *node = list_d2l(list, object); + + if (node->list_prev != &list->list_head) + return (list_object(list, node->list_prev)); + + return (NULL); +} + +/* + * Insert src list after dst list. Empty src list thereafter. + */ +void +list_move_tail(list_t *dst, list_t *src) +{ + list_node_t *dstnode = &dst->list_head; + list_node_t *srcnode = &src->list_head; + + ASSERT(dst->list_size == src->list_size); + ASSERT(dst->list_offset == src->list_offset); + + if (list_empty(src)) + return; + + dstnode->list_prev->list_next = srcnode->list_next; + srcnode->list_next->list_prev = dstnode->list_prev; + dstnode->list_prev = srcnode->list_prev; + srcnode->list_prev->list_next = dstnode; + + /* empty src list */ + srcnode->list_next = srcnode->list_prev = srcnode; +} + +void +list_link_replace(list_node_t *lold, list_node_t *lnew) +{ + ASSERT(list_link_active(lold)); + ASSERT(!list_link_active(lnew)); + + lnew->list_next = lold->list_next; + lnew->list_prev = lold->list_prev; + lold->list_prev->list_next = lnew; + lold->list_next->list_prev = lnew; + lold->list_next = lold->list_prev = NULL; +} + +void +list_link_init(list_node_t *link) +{ + link->list_next = NULL; + link->list_prev = NULL; +} + +int +list_link_active(list_node_t *link) +{ + return (link->list_next != NULL); +} + +int +list_is_empty(list_t *list) +{ + return (list_empty(list)); +} diff --git a/common/nvpair/nvpair.c b/common/nvpair/nvpair.c new file mode 100644 index 000000000000..00d44263ccda --- /dev/null +++ b/common/nvpair/nvpair.c @@ -0,0 +1,3297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/stropts.h> +#include <sys/debug.h> +#include <sys/isa_defs.h> +#include <sys/int_limits.h> +#include <sys/nvpair.h> +#include <sys/nvpair_impl.h> +#include <rpc/types.h> +#include <rpc/xdr.h> + +#if defined(_KERNEL) && !defined(_BOOT) +#include <sys/varargs.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#else +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#endif + +#ifndef offsetof +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) +#endif +#define skip_whitespace(p) while ((*(p) == ' ') || (*(p) == '\t')) p++ + +/* + * nvpair.c - Provides kernel & userland interfaces for manipulating + * name-value pairs. + * + * Overview Diagram + * + * +--------------+ + * | nvlist_t | + * |--------------| + * | nvl_version | + * | nvl_nvflag | + * | nvl_priv -+-+ + * | nvl_flag | | + * | nvl_pad | | + * +--------------+ | + * V + * +--------------+ last i_nvp in list + * | nvpriv_t | +---------------------> + * |--------------| | + * +--+- nvp_list | | +------------+ + * | | nvp_last -+--+ + nv_alloc_t | + * | | nvp_curr | |------------| + * | | nvp_nva -+----> | nva_ops | + * | | nvp_stat | | nva_arg | + * | +--------------+ +------------+ + * | + * +-------+ + * V + * +---------------------+ +-------------------+ + * | i_nvp_t | +-->| i_nvp_t | +--> + * |---------------------| | |-------------------| | + * | nvi_next -+--+ | nvi_next -+--+ + * | nvi_prev (NULL) | <----+ nvi_prev | + * | . . . . . . . . . . | | . . . . . . . . . | + * | nvp (nvpair_t) | | nvp (nvpair_t) | + * | - nvp_size | | - nvp_size | + * | - nvp_name_sz | | - nvp_name_sz | + * | - nvp_value_elem | | - nvp_value_elem | + * | - nvp_type | | - nvp_type | + * | - data ... | | - data ... | + * +---------------------+ +-------------------+ + * + * + * + * +---------------------+ +---------------------+ + * | i_nvp_t | +--> +-->| i_nvp_t (last) | + * |---------------------| | | |---------------------| + * | nvi_next -+--+ ... --+ | nvi_next (NULL) | + * <-+- nvi_prev |<-- ... <----+ nvi_prev | + * | . . . . . . . . . | | . . . . . . . . . | + * | nvp (nvpair_t) | | nvp (nvpair_t) | + * | - nvp_size | | - nvp_size | + * | - nvp_name_sz | | - nvp_name_sz | + * | - nvp_value_elem | | - nvp_value_elem | + * | - DATA_TYPE_NVLIST | | - nvp_type | + * | - data (embedded) | | - data ... | + * | nvlist name | +---------------------+ + * | +--------------+ | + * | | nvlist_t | | + * | |--------------| | + * | | nvl_version | | + * | | nvl_nvflag | | + * | | nvl_priv --+---+----> + * | | nvl_flag | | + * | | nvl_pad | | + * | +--------------+ | + * +---------------------+ + * + * + * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will + * allow value to be aligned on 8 byte boundary + * + * name_len is the length of the name string including the null terminator + * so it must be >= 1 + */ +#define NVP_SIZE_CALC(name_len, data_len) \ + (NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len)) + +static int i_get_value_size(data_type_t type, const void *data, uint_t nelem); +static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type, + uint_t nelem, const void *data); + +#define NV_STAT_EMBEDDED 0x1 +#define EMBEDDED_NVL(nvp) ((nvlist_t *)(void *)NVP_VALUE(nvp)) +#define EMBEDDED_NVL_ARRAY(nvp) ((nvlist_t **)(void *)NVP_VALUE(nvp)) + +#define NVP_VALOFF(nvp) (NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz)) +#define NVPAIR2I_NVP(nvp) \ + ((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp))) + + +int +nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...) +{ + va_list valist; + int err = 0; + + nva->nva_ops = nvo; + nva->nva_arg = NULL; + + va_start(valist, nvo); + if (nva->nva_ops->nv_ao_init != NULL) + err = nva->nva_ops->nv_ao_init(nva, valist); + va_end(valist); + + return (err); +} + +void +nv_alloc_reset(nv_alloc_t *nva) +{ + if (nva->nva_ops->nv_ao_reset != NULL) + nva->nva_ops->nv_ao_reset(nva); +} + +void +nv_alloc_fini(nv_alloc_t *nva) +{ + if (nva->nva_ops->nv_ao_fini != NULL) + nva->nva_ops->nv_ao_fini(nva); +} + +nv_alloc_t * +nvlist_lookup_nv_alloc(nvlist_t *nvl) +{ + nvpriv_t *priv; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (NULL); + + return (priv->nvp_nva); +} + +static void * +nv_mem_zalloc(nvpriv_t *nvp, size_t size) +{ + nv_alloc_t *nva = nvp->nvp_nva; + void *buf; + + if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL) + bzero(buf, size); + + return (buf); +} + +static void +nv_mem_free(nvpriv_t *nvp, void *buf, size_t size) +{ + nv_alloc_t *nva = nvp->nvp_nva; + + nva->nva_ops->nv_ao_free(nva, buf, size); +} + +static void +nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat) +{ + bzero(priv, sizeof (nvpriv_t)); + + priv->nvp_nva = nva; + priv->nvp_stat = stat; +} + +static nvpriv_t * +nv_priv_alloc(nv_alloc_t *nva) +{ + nvpriv_t *priv; + + /* + * nv_mem_alloc() cannot called here because it needs the priv + * argument. + */ + if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL) + return (NULL); + + nv_priv_init(priv, nva, 0); + + return (priv); +} + +/* + * Embedded lists need their own nvpriv_t's. We create a new + * nvpriv_t using the parameters and allocator from the parent + * list's nvpriv_t. + */ +static nvpriv_t * +nv_priv_alloc_embedded(nvpriv_t *priv) +{ + nvpriv_t *emb_priv; + + if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL) + return (NULL); + + nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED); + + return (emb_priv); +} + +static void +nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv) +{ + nvl->nvl_version = NV_VERSION; + nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE); + nvl->nvl_priv = (uint64_t)(uintptr_t)priv; + nvl->nvl_flag = 0; + nvl->nvl_pad = 0; +} + +uint_t +nvlist_nvflag(nvlist_t *nvl) +{ + return (nvl->nvl_nvflag); +} + +/* + * nvlist_alloc - Allocate nvlist. + */ +/*ARGSUSED1*/ +int +nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xalloc(nvlp, nvflag, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva) +{ + nvpriv_t *priv; + + if (nvlp == NULL || nva == NULL) + return (EINVAL); + + if ((priv = nv_priv_alloc(nva)) == NULL) + return (ENOMEM); + + if ((*nvlp = nv_mem_zalloc(priv, + NV_ALIGN(sizeof (nvlist_t)))) == NULL) { + nv_mem_free(priv, priv, sizeof (nvpriv_t)); + return (ENOMEM); + } + + nvlist_init(*nvlp, nvflag, priv); + + return (0); +} + +/* + * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair. + */ +static nvpair_t * +nvp_buf_alloc(nvlist_t *nvl, size_t len) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *buf; + nvpair_t *nvp; + size_t nvsize; + + /* + * Allocate the buffer + */ + nvsize = len + offsetof(i_nvp_t, nvi_nvp); + + if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL) + return (NULL); + + nvp = &buf->nvi_nvp; + nvp->nvp_size = len; + + return (nvp); +} + +/* + * nvp_buf_free - de-Allocate an i_nvp_t. + */ +static void +nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp); + + nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize); +} + +/* + * nvp_buf_link - link a new nv pair into the nvlist. + */ +static void +nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr = NVPAIR2I_NVP(nvp); + + /* Put element at end of nvlist */ + if (priv->nvp_list == NULL) { + priv->nvp_list = priv->nvp_last = curr; + } else { + curr->nvi_prev = priv->nvp_last; + priv->nvp_last->nvi_next = curr; + priv->nvp_last = curr; + } +} + +/* + * nvp_buf_unlink - unlink an removed nvpair out of the nvlist. + */ +static void +nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr = NVPAIR2I_NVP(nvp); + + /* + * protect nvlist_next_nvpair() against walking on freed memory. + */ + if (priv->nvp_curr == curr) + priv->nvp_curr = curr->nvi_next; + + if (curr == priv->nvp_list) + priv->nvp_list = curr->nvi_next; + else + curr->nvi_prev->nvi_next = curr->nvi_next; + + if (curr == priv->nvp_last) + priv->nvp_last = curr->nvi_prev; + else + curr->nvi_next->nvi_prev = curr->nvi_prev; +} + +/* + * take a nvpair type and number of elements and make sure the are valid + */ +static int +i_validate_type_nelem(data_type_t type, uint_t nelem) +{ + switch (type) { + case DATA_TYPE_BOOLEAN: + if (nelem != 0) + return (EINVAL); + break; + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_STRING: + case DATA_TYPE_HRTIME: + case DATA_TYPE_NVLIST: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + if (nelem != 1) + return (EINVAL); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_NVLIST_ARRAY: + /* we allow arrays with 0 elements */ + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * Verify nvp_name_sz and check the name string length. + */ +static int +i_validate_nvpair_name(nvpair_t *nvp) +{ + if ((nvp->nvp_name_sz <= 0) || + (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0))) + return (EFAULT); + + /* verify the name string, make sure its terminated */ + if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0') + return (EFAULT); + + return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT); +} + +static int +i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data) +{ + switch (type) { + case DATA_TYPE_BOOLEAN_VALUE: + if (*(boolean_t *)data != B_TRUE && + *(boolean_t *)data != B_FALSE) + return (EINVAL); + break; + case DATA_TYPE_BOOLEAN_ARRAY: { + int i; + + for (i = 0; i < nelem; i++) + if (((boolean_t *)data)[i] != B_TRUE && + ((boolean_t *)data)[i] != B_FALSE) + return (EINVAL); + break; + } + default: + break; + } + + return (0); +} + +/* + * This function takes a pointer to what should be a nvpair and it's size + * and then verifies that all the nvpair fields make sense and can be + * trusted. This function is used when decoding packed nvpairs. + */ +static int +i_validate_nvpair(nvpair_t *nvp) +{ + data_type_t type = NVP_TYPE(nvp); + int size1, size2; + + /* verify nvp_name_sz, check the name string length */ + if (i_validate_nvpair_name(nvp) != 0) + return (EFAULT); + + if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0) + return (EFAULT); + + /* + * verify nvp_type, nvp_value_elem, and also possibly + * verify string values and get the value size. + */ + size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp)); + size1 = nvp->nvp_size - NVP_VALOFF(nvp); + if (size2 < 0 || size1 != NV_ALIGN(size2)) + return (EFAULT); + + return (0); +} + +static int +nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL) + return (EINVAL); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + nvpair_t *nvp = &curr->nvi_nvp; + int err; + + if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp), + NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0) + return (err); + } + + return (0); +} + +/* + * Frees all memory allocated for an nvpair (like embedded lists) with + * the exception of the nvpair buffer itself. + */ +static void +nvpair_free(nvpair_t *nvp) +{ + switch (NVP_TYPE(nvp)) { + case DATA_TYPE_NVLIST: + nvlist_free(EMBEDDED_NVL(nvp)); + break; + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); + int i; + + for (i = 0; i < NVP_NELEM(nvp); i++) + if (nvlp[i] != NULL) + nvlist_free(nvlp[i]); + break; + } + default: + break; + } +} + +/* + * nvlist_free - free an unpacked nvlist + */ +void +nvlist_free(nvlist_t *nvl) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return; + + /* + * Unpacked nvlist are linked through i_nvp_t + */ + curr = priv->nvp_list; + while (curr != NULL) { + nvpair_t *nvp = &curr->nvi_nvp; + curr = curr->nvi_next; + + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + } + + if (!(priv->nvp_stat & NV_STAT_EMBEDDED)) + nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t))); + else + nvl->nvl_priv = 0; + + nv_mem_free(priv, priv, sizeof (nvpriv_t)); +} + +static int +nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr; + + if (nvp == NULL) + return (0); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) + if (&curr->nvi_nvp == nvp) + return (1); + + return (0); +} + +/* + * Make a copy of nvlist + */ +/*ARGSUSED1*/ +int +nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xdup(nvl, nvlp, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva) +{ + int err; + nvlist_t *ret; + + if (nvl == NULL || nvlp == NULL) + return (EINVAL); + + if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0) + return (err); + + if ((err = nvlist_copy_pairs(nvl, ret)) != 0) + nvlist_free(ret); + else + *nvlp = ret; + + return (err); +} + +/* + * Remove all with matching name + */ +int +nvlist_remove_all(nvlist_t *nvl, const char *name) +{ + nvpriv_t *priv; + i_nvp_t *curr; + int error = ENOENT; + + if (nvl == NULL || name == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + curr = priv->nvp_list; + while (curr != NULL) { + nvpair_t *nvp = &curr->nvi_nvp; + + curr = curr->nvi_next; + if (strcmp(name, NVP_NAME(nvp)) != 0) + continue; + + nvp_buf_unlink(nvl, nvp); + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + + error = 0; + } + + return (error); +} + +/* + * Remove first one with matching name and type + */ +int +nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || name == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + curr = priv->nvp_list; + while (curr != NULL) { + nvpair_t *nvp = &curr->nvi_nvp; + + if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) { + nvp_buf_unlink(nvl, nvp); + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + + return (0); + } + curr = curr->nvi_next; + } + + return (ENOENT); +} + +int +nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + if (nvl == NULL || nvp == NULL) + return (EINVAL); + + nvp_buf_unlink(nvl, nvp); + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + return (0); +} + +/* + * This function calculates the size of an nvpair value. + * + * The data argument controls the behavior in case of the data types + * DATA_TYPE_STRING and + * DATA_TYPE_STRING_ARRAY + * Is data == NULL then the size of the string(s) is excluded. + */ +static int +i_get_value_size(data_type_t type, const void *data, uint_t nelem) +{ + uint64_t value_sz; + + if (i_validate_type_nelem(type, nelem) != 0) + return (-1); + + /* Calculate required size for holding value */ + switch (type) { + case DATA_TYPE_BOOLEAN: + value_sz = 0; + break; + case DATA_TYPE_BOOLEAN_VALUE: + value_sz = sizeof (boolean_t); + break; + case DATA_TYPE_BYTE: + value_sz = sizeof (uchar_t); + break; + case DATA_TYPE_INT8: + value_sz = sizeof (int8_t); + break; + case DATA_TYPE_UINT8: + value_sz = sizeof (uint8_t); + break; + case DATA_TYPE_INT16: + value_sz = sizeof (int16_t); + break; + case DATA_TYPE_UINT16: + value_sz = sizeof (uint16_t); + break; + case DATA_TYPE_INT32: + value_sz = sizeof (int32_t); + break; + case DATA_TYPE_UINT32: + value_sz = sizeof (uint32_t); + break; + case DATA_TYPE_INT64: + value_sz = sizeof (int64_t); + break; + case DATA_TYPE_UINT64: + value_sz = sizeof (uint64_t); + break; +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: + value_sz = sizeof (double); + break; +#endif + case DATA_TYPE_STRING: + if (data == NULL) + value_sz = 0; + else + value_sz = strlen(data) + 1; + break; + case DATA_TYPE_BOOLEAN_ARRAY: + value_sz = (uint64_t)nelem * sizeof (boolean_t); + break; + case DATA_TYPE_BYTE_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uchar_t); + break; + case DATA_TYPE_INT8_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int8_t); + break; + case DATA_TYPE_UINT8_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint8_t); + break; + case DATA_TYPE_INT16_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int16_t); + break; + case DATA_TYPE_UINT16_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint16_t); + break; + case DATA_TYPE_INT32_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int32_t); + break; + case DATA_TYPE_UINT32_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint32_t); + break; + case DATA_TYPE_INT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int64_t); + break; + case DATA_TYPE_UINT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + break; + case DATA_TYPE_STRING_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + + if (data != NULL) { + char *const *strs = data; + uint_t i; + + /* no alignment requirement for strings */ + for (i = 0; i < nelem; i++) { + if (strs[i] == NULL) + return (-1); + value_sz += strlen(strs[i]) + 1; + } + } + break; + case DATA_TYPE_HRTIME: + value_sz = sizeof (hrtime_t); + break; + case DATA_TYPE_NVLIST: + value_sz = NV_ALIGN(sizeof (nvlist_t)); + break; + case DATA_TYPE_NVLIST_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t) + + (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t)); + break; + default: + return (-1); + } + + return (value_sz > INT32_MAX ? -1 : (int)value_sz); +} + +static int +nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl) +{ + nvpriv_t *priv; + int err; + + if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t) + nvl->nvl_priv)) == NULL) + return (ENOMEM); + + nvlist_init(emb_nvl, onvl->nvl_nvflag, priv); + + if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) { + nvlist_free(emb_nvl); + emb_nvl->nvl_priv = 0; + } + + return (err); +} + +/* + * nvlist_add_common - Add new <name,value> pair to nvlist + */ +static int +nvlist_add_common(nvlist_t *nvl, const char *name, + data_type_t type, uint_t nelem, const void *data) +{ + nvpair_t *nvp; + uint_t i; + + int nvp_sz, name_sz, value_sz; + int err = 0; + + if (name == NULL || nvl == NULL || nvl->nvl_priv == 0) + return (EINVAL); + + if (nelem != 0 && data == NULL) + return (EINVAL); + + /* + * Verify type and nelem and get the value size. + * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY + * is the size of the string(s) included. + */ + if ((value_sz = i_get_value_size(type, data, nelem)) < 0) + return (EINVAL); + + if (i_validate_nvpair_value(type, nelem, data) != 0) + return (EINVAL); + + /* + * If we're adding an nvlist or nvlist array, ensure that we are not + * adding the input nvlist to itself, which would cause recursion, + * and ensure that no NULL nvlist pointers are present. + */ + switch (type) { + case DATA_TYPE_NVLIST: + if (data == nvl || data == NULL) + return (EINVAL); + break; + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **onvlp = (nvlist_t **)data; + for (i = 0; i < nelem; i++) { + if (onvlp[i] == nvl || onvlp[i] == NULL) + return (EINVAL); + } + break; + } + default: + break; + } + + /* calculate sizes of the nvpair elements and the nvpair itself */ + name_sz = strlen(name) + 1; + + nvp_sz = NVP_SIZE_CALC(name_sz, value_sz); + + if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL) + return (ENOMEM); + + ASSERT(nvp->nvp_size == nvp_sz); + nvp->nvp_name_sz = name_sz; + nvp->nvp_value_elem = nelem; + nvp->nvp_type = type; + bcopy(name, NVP_NAME(nvp), name_sz); + + switch (type) { + case DATA_TYPE_BOOLEAN: + break; + case DATA_TYPE_STRING_ARRAY: { + char *const *strs = data; + char *buf = NVP_VALUE(nvp); + char **cstrs = (void *)buf; + + /* skip pre-allocated space for pointer array */ + buf += nelem * sizeof (uint64_t); + for (i = 0; i < nelem; i++) { + int slen = strlen(strs[i]) + 1; + bcopy(strs[i], buf, slen); + cstrs[i] = buf; + buf += slen; + } + break; + } + case DATA_TYPE_NVLIST: { + nvlist_t *nnvl = EMBEDDED_NVL(nvp); + nvlist_t *onvl = (nvlist_t *)data; + + if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) { + nvp_buf_free(nvl, nvp); + return (err); + } + break; + } + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **onvlp = (nvlist_t **)data; + nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); + nvlist_t *embedded = (nvlist_t *) + ((uintptr_t)nvlp + nelem * sizeof (uint64_t)); + + for (i = 0; i < nelem; i++) { + if ((err = nvlist_copy_embedded(nvl, + onvlp[i], embedded)) != 0) { + /* + * Free any successfully created lists + */ + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + return (err); + } + + nvlp[i] = embedded++; + } + break; + } + default: + bcopy(data, NVP_VALUE(nvp), value_sz); + } + + /* if unique name, remove before add */ + if (nvl->nvl_nvflag & NV_UNIQUE_NAME) + (void) nvlist_remove_all(nvl, name); + else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE) + (void) nvlist_remove(nvl, name, type); + + nvp_buf_link(nvl, nvp); + + return (0); +} + +int +nvlist_add_boolean(nvlist_t *nvl, const char *name) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL)); +} + +int +nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val)); +} + +int +nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val)); +} + +int +nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val)); +} + +int +nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val)); +} + +int +nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val)); +} + +int +nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val)); +} + +int +nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val)); +} + +int +nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val)); +} + +int +nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val)); +} + +int +nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val)); +} + +#if !defined(_KERNEL) +int +nvlist_add_double(nvlist_t *nvl, const char *name, double val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_DOUBLE, 1, &val)); +} +#endif + +int +nvlist_add_string(nvlist_t *nvl, const char *name, const char *val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val)); +} + +int +nvlist_add_boolean_array(nvlist_t *nvl, const char *name, + boolean_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a)); +} + +int +nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); +} + +int +nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); +} + +int +nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); +} + +int +nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); +} + +int +nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); +} + +int +nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); +} + +int +nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); +} + +int +nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); +} + +int +nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); +} + +int +nvlist_add_string_array(nvlist_t *nvl, const char *name, + char *const *a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); +} + +int +nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val)); +} + +int +nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val)); +} + +int +nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); +} + +/* reading name-value pairs */ +nvpair_t * +nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (NULL); + + curr = NVPAIR2I_NVP(nvp); + + /* + * Ensure that nvp is a valid nvpair on this nvlist. + * NB: nvp_curr is used only as a hint so that we don't always + * have to walk the list to determine if nvp is still on the list. + */ + if (nvp == NULL) + curr = priv->nvp_list; + else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) + curr = curr->nvi_next; + else + curr = NULL; + + priv->nvp_curr = curr; + + return (curr != NULL ? &curr->nvi_nvp : NULL); +} + +nvpair_t * +nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + nvpriv_t *priv; + i_nvp_t *curr; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (NULL); + + curr = NVPAIR2I_NVP(nvp); + + if (nvp == NULL) + curr = priv->nvp_last; + else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) + curr = curr->nvi_prev; + else + curr = NULL; + + priv->nvp_curr = curr; + + return (curr != NULL ? &curr->nvi_nvp : NULL); +} + +boolean_t +nvlist_empty(nvlist_t *nvl) +{ + nvpriv_t *priv; + + if (nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (B_TRUE); + + return (priv->nvp_list == NULL); +} + +char * +nvpair_name(nvpair_t *nvp) +{ + return (NVP_NAME(nvp)); +} + +data_type_t +nvpair_type(nvpair_t *nvp) +{ + return (NVP_TYPE(nvp)); +} + +int +nvpair_type_is_array(nvpair_t *nvp) +{ + data_type_t type = NVP_TYPE(nvp); + + if ((type == DATA_TYPE_BYTE_ARRAY) || + (type == DATA_TYPE_UINT8_ARRAY) || + (type == DATA_TYPE_INT16_ARRAY) || + (type == DATA_TYPE_UINT16_ARRAY) || + (type == DATA_TYPE_INT32_ARRAY) || + (type == DATA_TYPE_UINT32_ARRAY) || + (type == DATA_TYPE_INT64_ARRAY) || + (type == DATA_TYPE_UINT64_ARRAY) || + (type == DATA_TYPE_BOOLEAN_ARRAY) || + (type == DATA_TYPE_STRING_ARRAY) || + (type == DATA_TYPE_NVLIST_ARRAY)) + return (1); + return (0); + +} + +static int +nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data) +{ + if (nvp == NULL || nvpair_type(nvp) != type) + return (EINVAL); + + /* + * For non-array types, we copy the data. + * For array types (including string), we set a pointer. + */ + switch (type) { + case DATA_TYPE_BOOLEAN: + if (nelem != NULL) + *nelem = 0; + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_HRTIME: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + if (data == NULL) + return (EINVAL); + bcopy(NVP_VALUE(nvp), data, + (size_t)i_get_value_size(type, NULL, 1)); + if (nelem != NULL) + *nelem = 1; + break; + + case DATA_TYPE_NVLIST: + case DATA_TYPE_STRING: + if (data == NULL) + return (EINVAL); + *(void **)data = (void *)NVP_VALUE(nvp); + if (nelem != NULL) + *nelem = 1; + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_NVLIST_ARRAY: + if (nelem == NULL || data == NULL) + return (EINVAL); + if ((*nelem = NVP_NELEM(nvp)) != 0) + *(void **)data = (void *)NVP_VALUE(nvp); + else + *(void **)data = NULL; + break; + + default: + return (ENOTSUP); + } + + return (0); +} + +static int +nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type, + uint_t *nelem, void *data) +{ + nvpriv_t *priv; + nvpair_t *nvp; + i_nvp_t *curr; + + if (name == NULL || nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE))) + return (ENOTSUP); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + nvp = &curr->nvi_nvp; + + if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) + return (nvpair_value_common(nvp, type, nelem, data)); + } + + return (ENOENT); +} + +int +nvlist_lookup_boolean(nvlist_t *nvl, const char *name) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL)); +} + +int +nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val) +{ + return (nvlist_lookup_common(nvl, name, + DATA_TYPE_BOOLEAN_VALUE, NULL, val)); +} + +int +nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val)); +} + +int +nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val)); +} + +int +nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val)); +} + +int +nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val)); +} + +int +nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val)); +} + +int +nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val)); +} + +int +nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val)); +} + +int +nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val)); +} + +int +nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val)); +} + +#if !defined(_KERNEL) +int +nvlist_lookup_double(nvlist_t *nvl, const char *name, double *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_DOUBLE, NULL, val)); +} +#endif + +int +nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val)); +} + +int +nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val)); +} + +int +nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name, + boolean_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, + DATA_TYPE_BOOLEAN_ARRAY, n, a)); +} + +int +nvlist_lookup_byte_array(nvlist_t *nvl, const char *name, + uchar_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); +} + +int +nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); +} + +int +nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name, + uint8_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); +} + +int +nvlist_lookup_int16_array(nvlist_t *nvl, const char *name, + int16_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); +} + +int +nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name, + uint16_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); +} + +int +nvlist_lookup_int32_array(nvlist_t *nvl, const char *name, + int32_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); +} + +int +nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name, + uint32_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); +} + +int +nvlist_lookup_int64_array(nvlist_t *nvl, const char *name, + int64_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); +} + +int +nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name, + uint64_t **a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); +} + +int +nvlist_lookup_string_array(nvlist_t *nvl, const char *name, + char ***a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); +} + +int +nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name, + nvlist_t ***a, uint_t *n) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); +} + +int +nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val) +{ + return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val)); +} + +int +nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...) +{ + va_list ap; + char *name; + int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0); + int ret = 0; + + va_start(ap, flag); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + data_type_t type; + void *val; + uint_t *nelem; + + switch (type = va_arg(ap, data_type_t)) { + case DATA_TYPE_BOOLEAN: + ret = nvlist_lookup_common(nvl, name, type, NULL, NULL); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_HRTIME: + case DATA_TYPE_STRING: + case DATA_TYPE_NVLIST: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + val = va_arg(ap, void *); + ret = nvlist_lookup_common(nvl, name, type, NULL, val); + break; + + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_NVLIST_ARRAY: + val = va_arg(ap, void *); + nelem = va_arg(ap, uint_t *); + ret = nvlist_lookup_common(nvl, name, type, nelem, val); + break; + + default: + ret = EINVAL; + } + + if (ret == ENOENT && noentok) + ret = 0; + } + va_end(ap); + + return (ret); +} + +/* + * Find the 'name'ed nvpair in the nvlist 'nvl'. If 'name' found, the function + * returns zero and a pointer to the matching nvpair is returned in '*ret' + * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate + * multiple levels of embedded nvlists, with 'sep' as the separator. As an + * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or + * "a.d[3].e[1]". This matches the C syntax for array embed (for convience, + * code also supports "a.d[3]e[1]" syntax). + * + * If 'ip' is non-NULL and the last name component is an array, return the + * value of the "...[index]" array index in *ip. For an array reference that + * is not indexed, *ip will be returned as -1. If there is a syntax error in + * 'name', and 'ep' is non-NULL then *ep will be set to point to the location + * inside the 'name' string where the syntax error was detected. + */ +static int +nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep, + nvpair_t **ret, int *ip, char **ep) +{ + nvpair_t *nvp; + const char *np; + char *sepp; + char *idxp, *idxep; + nvlist_t **nva; + long idx; + int n; + + if (ip) + *ip = -1; /* not indexed */ + if (ep) + *ep = NULL; + + if ((nvl == NULL) || (name == NULL)) + return (EINVAL); + + /* step through components of name */ + for (np = name; np && *np; np = sepp) { + /* ensure unique names */ + if (!(nvl->nvl_nvflag & NV_UNIQUE_NAME)) + return (ENOTSUP); + + /* skip white space */ + skip_whitespace(np); + if (*np == 0) + break; + + /* set 'sepp' to end of current component 'np' */ + if (sep) + sepp = strchr(np, sep); + else + sepp = NULL; + + /* find start of next "[ index ]..." */ + idxp = strchr(np, '['); + + /* if sepp comes first, set idxp to NULL */ + if (sepp && idxp && (sepp < idxp)) + idxp = NULL; + + /* + * At this point 'idxp' is set if there is an index + * expected for the current component. + */ + if (idxp) { + /* set 'n' to length of current 'np' name component */ + n = idxp++ - np; + + /* keep sepp up to date for *ep use as we advance */ + skip_whitespace(idxp); + sepp = idxp; + + /* determine the index value */ +#if defined(_KERNEL) && !defined(_BOOT) + if (ddi_strtol(idxp, &idxep, 0, &idx)) + goto fail; +#else + idx = strtol(idxp, &idxep, 0); +#endif + if (idxep == idxp) + goto fail; + + /* keep sepp up to date for *ep use as we advance */ + sepp = idxep; + + /* skip white space index value and check for ']' */ + skip_whitespace(sepp); + if (*sepp++ != ']') + goto fail; + + /* for embedded arrays, support C syntax: "a[1].b" */ + skip_whitespace(sepp); + if (sep && (*sepp == sep)) + sepp++; + } else if (sepp) { + n = sepp++ - np; + } else { + n = strlen(np); + } + + /* trim trailing whitespace by reducing length of 'np' */ + if (n == 0) + goto fail; + for (n--; (np[n] == ' ') || (np[n] == '\t'); n--) + ; + n++; + + /* skip whitespace, and set sepp to NULL if complete */ + if (sepp) { + skip_whitespace(sepp); + if (*sepp == 0) + sepp = NULL; + } + + /* + * At this point: + * o 'n' is the length of current 'np' component. + * o 'idxp' is set if there was an index, and value 'idx'. + * o 'sepp' is set to the beginning of the next component, + * and set to NULL if we have no more components. + * + * Search for nvpair with matching component name. + */ + for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + + /* continue if no match on name */ + if (strncmp(np, nvpair_name(nvp), n) || + (strlen(nvpair_name(nvp)) != n)) + continue; + + /* if indexed, verify type is array oriented */ + if (idxp && !nvpair_type_is_array(nvp)) + goto fail; + + /* + * Full match found, return nvp and idx if this + * was the last component. + */ + if (sepp == NULL) { + if (ret) + *ret = nvp; + if (ip && idxp) + *ip = (int)idx; /* return index */ + return (0); /* found */ + } + + /* + * More components: current match must be + * of DATA_TYPE_NVLIST or DATA_TYPE_NVLIST_ARRAY + * to support going deeper. + */ + if (nvpair_type(nvp) == DATA_TYPE_NVLIST) { + nvl = EMBEDDED_NVL(nvp); + break; + } else if (nvpair_type(nvp) == DATA_TYPE_NVLIST_ARRAY) { + (void) nvpair_value_nvlist_array(nvp, + &nva, (uint_t *)&n); + if ((n < 0) || (idx >= n)) + goto fail; + nvl = nva[idx]; + break; + } + + /* type does not support more levels */ + goto fail; + } + if (nvp == NULL) + goto fail; /* 'name' not found */ + + /* search for match of next component in embedded 'nvl' list */ + } + +fail: if (ep && sepp) + *ep = sepp; + return (EINVAL); +} + +/* + * Return pointer to nvpair with specified 'name'. + */ +int +nvlist_lookup_nvpair(nvlist_t *nvl, const char *name, nvpair_t **ret) +{ + return (nvlist_lookup_nvpair_ei_sep(nvl, name, 0, ret, NULL, NULL)); +} + +/* + * Determine if named nvpair exists in nvlist (use embedded separator of '.' + * and return array index). See nvlist_lookup_nvpair_ei_sep for more detailed + * description. + */ +int nvlist_lookup_nvpair_embedded_index(nvlist_t *nvl, + const char *name, nvpair_t **ret, int *ip, char **ep) +{ + return (nvlist_lookup_nvpair_ei_sep(nvl, name, '.', ret, ip, ep)); +} + +boolean_t +nvlist_exists(nvlist_t *nvl, const char *name) +{ + nvpriv_t *priv; + nvpair_t *nvp; + i_nvp_t *curr; + + if (name == NULL || nvl == NULL || + (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (B_FALSE); + + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + nvp = &curr->nvi_nvp; + + if (strcmp(name, NVP_NAME(nvp)) == 0) + return (B_TRUE); + } + + return (B_FALSE); +} + +int +nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val)); +} + +int +nvpair_value_byte(nvpair_t *nvp, uchar_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val)); +} + +int +nvpair_value_int8(nvpair_t *nvp, int8_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val)); +} + +int +nvpair_value_uint8(nvpair_t *nvp, uint8_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val)); +} + +int +nvpair_value_int16(nvpair_t *nvp, int16_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val)); +} + +int +nvpair_value_uint16(nvpair_t *nvp, uint16_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val)); +} + +int +nvpair_value_int32(nvpair_t *nvp, int32_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val)); +} + +int +nvpair_value_uint32(nvpair_t *nvp, uint32_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val)); +} + +int +nvpair_value_int64(nvpair_t *nvp, int64_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val)); +} + +int +nvpair_value_uint64(nvpair_t *nvp, uint64_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val)); +} + +#if !defined(_KERNEL) +int +nvpair_value_double(nvpair_t *nvp, double *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_DOUBLE, NULL, val)); +} +#endif + +int +nvpair_value_string(nvpair_t *nvp, char **val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val)); +} + +int +nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val)); +} + +int +nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val)); +} + +int +nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val)); +} + +int +nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val)); +} + +int +nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val)); +} + +int +nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val)); +} + +int +nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val)); +} + +int +nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val)); +} + +int +nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val)); +} + +int +nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val)); +} + +int +nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val)); +} + +int +nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val)); +} + +int +nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem) +{ + return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val)); +} + +int +nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val) +{ + return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val)); +} + +/* + * Add specified pair to the list. + */ +int +nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + if (nvl == NULL || nvp == NULL) + return (EINVAL); + + return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp), + NVP_NELEM(nvp), NVP_VALUE(nvp))); +} + +/* + * Merge the supplied nvlists and put the result in dst. + * The merged list will contain all names specified in both lists, + * the values are taken from nvl in the case of duplicates. + * Return 0 on success. + */ +/*ARGSUSED*/ +int +nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag) +{ + if (nvl == NULL || dst == NULL) + return (EINVAL); + + if (dst != nvl) + return (nvlist_copy_pairs(nvl, dst)); + + return (0); +} + +/* + * Encoding related routines + */ +#define NVS_OP_ENCODE 0 +#define NVS_OP_DECODE 1 +#define NVS_OP_GETSIZE 2 + +typedef struct nvs_ops nvs_ops_t; + +typedef struct { + int nvs_op; + const nvs_ops_t *nvs_ops; + void *nvs_private; + nvpriv_t *nvs_priv; +} nvstream_t; + +/* + * nvs operations are: + * - nvs_nvlist + * encoding / decoding of a nvlist header (nvlist_t) + * calculates the size used for header and end detection + * + * - nvs_nvpair + * responsible for the first part of encoding / decoding of an nvpair + * calculates the decoded size of an nvpair + * + * - nvs_nvp_op + * second part of encoding / decoding of an nvpair + * + * - nvs_nvp_size + * calculates the encoding size of an nvpair + * + * - nvs_nvl_fini + * encodes the end detection mark (zeros). + */ +struct nvs_ops { + int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *); + int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *); + int (*nvs_nvp_op)(nvstream_t *, nvpair_t *); + int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *); + int (*nvs_nvl_fini)(nvstream_t *); +}; + +typedef struct { + char nvh_encoding; /* nvs encoding method */ + char nvh_endian; /* nvs endian */ + char nvh_reserved1; /* reserved for future use */ + char nvh_reserved2; /* reserved for future use */ +} nvs_header_t; + +static int +nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr; + + /* + * Walk nvpair in list and encode each nvpair + */ + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) + if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0) + return (EFAULT); + + return (nvs->nvs_ops->nvs_nvl_fini(nvs)); +} + +static int +nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl) +{ + nvpair_t *nvp; + size_t nvsize; + int err; + + /* + * Get decoded size of next pair in stream, alloc + * memory for nvpair_t, then decode the nvpair + */ + while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) { + if (nvsize == 0) /* end of list */ + break; + + /* make sure len makes sense */ + if (nvsize < NVP_SIZE_CALC(1, 0)) + return (EFAULT); + + if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL) + return (ENOMEM); + + if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) { + nvp_buf_free(nvl, nvp); + return (err); + } + + if (i_validate_nvpair(nvp) != 0) { + nvpair_free(nvp); + nvp_buf_free(nvl, nvp); + return (EFAULT); + } + + nvp_buf_link(nvl, nvp); + } + return (err); +} + +static int +nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen) +{ + nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; + i_nvp_t *curr; + uint64_t nvsize = *buflen; + size_t size; + + /* + * Get encoded size of nvpairs in nvlist + */ + for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { + if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0) + return (EINVAL); + + if ((nvsize += size) > INT32_MAX) + return (EINVAL); + } + + *buflen = nvsize; + return (0); +} + +static int +nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen) +{ + int err; + + if (nvl->nvl_priv == 0) + return (EFAULT); + + /* + * Perform the operation, starting with header, then each nvpair + */ + if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0) + return (err); + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + err = nvs_encode_pairs(nvs, nvl); + break; + + case NVS_OP_DECODE: + err = nvs_decode_pairs(nvs, nvl); + break; + + case NVS_OP_GETSIZE: + err = nvs_getsize_pairs(nvs, nvl, buflen); + break; + + default: + err = EINVAL; + } + + return (err); +} + +static int +nvs_embedded(nvstream_t *nvs, nvlist_t *embedded) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + return (nvs_operation(nvs, embedded, NULL)); + + case NVS_OP_DECODE: { + nvpriv_t *priv; + int err; + + if (embedded->nvl_version != NV_VERSION) + return (ENOTSUP); + + if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL) + return (ENOMEM); + + nvlist_init(embedded, embedded->nvl_nvflag, priv); + + if ((err = nvs_operation(nvs, embedded, NULL)) != 0) + nvlist_free(embedded); + return (err); + } + default: + break; + } + + return (EINVAL); +} + +static int +nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + size_t nelem = NVP_NELEM(nvp); + nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); + int i; + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + for (i = 0; i < nelem; i++) + if (nvs_embedded(nvs, nvlp[i]) != 0) + return (EFAULT); + break; + + case NVS_OP_DECODE: { + size_t len = nelem * sizeof (uint64_t); + nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len); + + bzero(nvlp, len); /* don't trust packed data */ + for (i = 0; i < nelem; i++) { + if (nvs_embedded(nvs, embedded) != 0) { + nvpair_free(nvp); + return (EFAULT); + } + + nvlp[i] = embedded++; + } + break; + } + case NVS_OP_GETSIZE: { + uint64_t nvsize = 0; + + for (i = 0; i < nelem; i++) { + size_t nvp_sz = 0; + + if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0) + return (EINVAL); + + if ((nvsize += nvp_sz) > INT32_MAX) + return (EINVAL); + } + + *size = nvsize; + break; + } + default: + return (EINVAL); + } + + return (0); +} + +static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *); +static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *); + +/* + * Common routine for nvlist operations: + * encode, decode, getsize (encoded size). + */ +static int +nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, + int nvs_op) +{ + int err = 0; + nvstream_t nvs; + int nvl_endian; +#ifdef _LITTLE_ENDIAN + int host_endian = 1; +#else + int host_endian = 0; +#endif /* _LITTLE_ENDIAN */ + nvs_header_t *nvh = (void *)buf; + + if (buflen == NULL || nvl == NULL || + (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) + return (EINVAL); + + nvs.nvs_op = nvs_op; + + /* + * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and + * a buffer is allocated. The first 4 bytes in the buffer are + * used for encoding method and host endian. + */ + switch (nvs_op) { + case NVS_OP_ENCODE: + if (buf == NULL || *buflen < sizeof (nvs_header_t)) + return (EINVAL); + + nvh->nvh_encoding = encoding; + nvh->nvh_endian = nvl_endian = host_endian; + nvh->nvh_reserved1 = 0; + nvh->nvh_reserved2 = 0; + break; + + case NVS_OP_DECODE: + if (buf == NULL || *buflen < sizeof (nvs_header_t)) + return (EINVAL); + + /* get method of encoding from first byte */ + encoding = nvh->nvh_encoding; + nvl_endian = nvh->nvh_endian; + break; + + case NVS_OP_GETSIZE: + nvl_endian = host_endian; + + /* + * add the size for encoding + */ + *buflen = sizeof (nvs_header_t); + break; + + default: + return (ENOTSUP); + } + + /* + * Create an nvstream with proper encoding method + */ + switch (encoding) { + case NV_ENCODE_NATIVE: + /* + * check endianness, in case we are unpacking + * from a file + */ + if (nvl_endian != host_endian) + return (ENOTSUP); + err = nvs_native(&nvs, nvl, buf, buflen); + break; + case NV_ENCODE_XDR: + err = nvs_xdr(&nvs, nvl, buf, buflen); + break; + default: + err = ENOTSUP; + break; + } + + return (err); +} + +int +nvlist_size(nvlist_t *nvl, size_t *size, int encoding) +{ + return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE)); +} + +/* + * Pack nvlist into contiguous memory + */ +/*ARGSUSED1*/ +int +nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding, + int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xpack(nvl, bufp, buflen, encoding, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding, + nv_alloc_t *nva) +{ + nvpriv_t nvpriv; + size_t alloc_size; + char *buf; + int err; + + if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL) + return (EINVAL); + + if (*bufp != NULL) + return (nvlist_common(nvl, *bufp, buflen, encoding, + NVS_OP_ENCODE)); + + /* + * Here is a difficult situation: + * 1. The nvlist has fixed allocator properties. + * All other nvlist routines (like nvlist_add_*, ...) use + * these properties. + * 2. When using nvlist_pack() the user can specify his own + * allocator properties (e.g. by using KM_NOSLEEP). + * + * We use the user specified properties (2). A clearer solution + * will be to remove the kmflag from nvlist_pack(), but we will + * not change the interface. + */ + nv_priv_init(&nvpriv, nva, 0); + + if (err = nvlist_size(nvl, &alloc_size, encoding)) + return (err); + + if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL) + return (ENOMEM); + + if ((err = nvlist_common(nvl, buf, &alloc_size, encoding, + NVS_OP_ENCODE)) != 0) { + nv_mem_free(&nvpriv, buf, alloc_size); + } else { + *buflen = alloc_size; + *bufp = buf; + } + + return (err); +} + +/* + * Unpack buf into an nvlist_t + */ +/*ARGSUSED1*/ +int +nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag) +{ +#if defined(_KERNEL) && !defined(_BOOT) + return (nvlist_xunpack(buf, buflen, nvlp, + (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); +#else + return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep)); +#endif +} + +int +nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva) +{ + nvlist_t *nvl; + int err; + + if (nvlp == NULL) + return (EINVAL); + + if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0) + return (err); + + if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0) + nvlist_free(nvl); + else + *nvlp = nvl; + + return (err); +} + +/* + * Native encoding functions + */ +typedef struct { + /* + * This structure is used when decoding a packed nvpair in + * the native format. n_base points to a buffer containing the + * packed nvpair. n_end is a pointer to the end of the buffer. + * (n_end actually points to the first byte past the end of the + * buffer.) n_curr is a pointer that lies between n_base and n_end. + * It points to the current data that we are decoding. + * The amount of data left in the buffer is equal to n_end - n_curr. + * n_flag is used to recognize a packed embedded list. + */ + caddr_t n_base; + caddr_t n_end; + caddr_t n_curr; + uint_t n_flag; +} nvs_native_t; + +static int +nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf, + size_t buflen) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + nvs->nvs_private = native; + native->n_curr = native->n_base = buf; + native->n_end = buf + buflen; + native->n_flag = 0; + return (0); + + case NVS_OP_GETSIZE: + nvs->nvs_private = native; + native->n_curr = native->n_base = native->n_end = NULL; + native->n_flag = 0; + return (0); + default: + return (EINVAL); + } +} + +/*ARGSUSED*/ +static void +nvs_native_destroy(nvstream_t *nvs) +{ +} + +static int +native_cp(nvstream_t *nvs, void *buf, size_t size) +{ + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + + if (native->n_curr + size > native->n_end) + return (EFAULT); + + /* + * The bcopy() below eliminates alignment requirement + * on the buffer (stream) and is preferred over direct access. + */ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + bcopy(buf, native->n_curr, size); + break; + case NVS_OP_DECODE: + bcopy(native->n_curr, buf, size); + break; + default: + return (EINVAL); + } + + native->n_curr += size; + return (0); +} + +/* + * operate on nvlist_t header + */ +static int +nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size) +{ + nvs_native_t *native = nvs->nvs_private; + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + if (native->n_flag) + return (0); /* packed embedded list */ + + native->n_flag = 1; + + /* copy version and nvflag of the nvlist_t */ + if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 || + native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0) + return (EFAULT); + + return (0); + + case NVS_OP_GETSIZE: + /* + * if calculate for packed embedded list + * 4 for end of the embedded list + * else + * 2 * sizeof (int32_t) for nvl_version and nvl_nvflag + * and 4 for end of the entire list + */ + if (native->n_flag) { + *size += 4; + } else { + native->n_flag = 1; + *size += 2 * sizeof (int32_t) + 4; + } + + return (0); + + default: + return (EINVAL); + } +} + +static int +nvs_native_nvl_fini(nvstream_t *nvs) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + /* + * Add 4 zero bytes at end of nvlist. They are used + * for end detection by the decode routine. + */ + if (native->n_curr + sizeof (int) > native->n_end) + return (EFAULT); + + bzero(native->n_curr, sizeof (int)); + native->n_curr += sizeof (int); + } + + return (0); +} + +static int +nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + nvlist_t *packed = (void *) + (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp)); + /* + * Null out the pointer that is meaningless in the packed + * structure. The address may not be aligned, so we have + * to use bzero. + */ + bzero(&packed->nvl_priv, sizeof (packed->nvl_priv)); + } + + return (nvs_embedded(nvs, EMBEDDED_NVL(nvp))); +} + +static int +nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp); + size_t len = NVP_NELEM(nvp) * sizeof (uint64_t); + nvlist_t *packed = (nvlist_t *)((uintptr_t)value + len); + int i; + /* + * Null out pointers that are meaningless in the packed + * structure. The addresses may not be aligned, so we have + * to use bzero. + */ + bzero(value, len); + + for (i = 0; i < NVP_NELEM(nvp); i++, packed++) + /* + * Null out the pointer that is meaningless in the + * packed structure. The address may not be aligned, + * so we have to use bzero. + */ + bzero(&packed->nvl_priv, sizeof (packed->nvl_priv)); + } + + return (nvs_embedded_nvl_array(nvs, nvp, NULL)); +} + +static void +nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + uint64_t *strp = (void *) + (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp)); + /* + * Null out pointers that are meaningless in the packed + * structure. The addresses may not be aligned, so we have + * to use bzero. + */ + bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t)); + break; + } + case NVS_OP_DECODE: { + char **strp = (void *)NVP_VALUE(nvp); + char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t)); + int i; + + for (i = 0; i < NVP_NELEM(nvp); i++) { + strp[i] = buf; + buf += strlen(buf) + 1; + } + break; + } + } +} + +static int +nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp) +{ + data_type_t type; + int value_sz; + int ret = 0; + + /* + * We do the initial bcopy of the data before we look at + * the nvpair type, because when we're decoding, we won't + * have the correct values for the pair until we do the bcopy. + */ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + if (native_cp(nvs, nvp, nvp->nvp_size) != 0) + return (EFAULT); + break; + default: + return (EINVAL); + } + + /* verify nvp_name_sz, check the name string length */ + if (i_validate_nvpair_name(nvp) != 0) + return (EFAULT); + + type = NVP_TYPE(nvp); + + /* + * Verify type and nelem and get the value size. + * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY + * is the size of the string(s) excluded. + */ + if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0) + return (EFAULT); + + if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size) + return (EFAULT); + + switch (type) { + case DATA_TYPE_NVLIST: + ret = nvpair_native_embedded(nvs, nvp); + break; + case DATA_TYPE_NVLIST_ARRAY: + ret = nvpair_native_embedded_array(nvs, nvp); + break; + case DATA_TYPE_STRING_ARRAY: + nvpair_native_string_array(nvs, nvp); + break; + default: + break; + } + + return (ret); +} + +static int +nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + uint64_t nvp_sz = nvp->nvp_size; + + switch (NVP_TYPE(nvp)) { + case DATA_TYPE_NVLIST: { + size_t nvsize = 0; + + if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0) + return (EINVAL); + + nvp_sz += nvsize; + break; + } + case DATA_TYPE_NVLIST_ARRAY: { + size_t nvsize; + + if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0) + return (EINVAL); + + nvp_sz += nvsize; + break; + } + default: + break; + } + + if (nvp_sz > INT32_MAX) + return (EINVAL); + + *size = nvp_sz; + + return (0); +} + +static int +nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + return (nvs_native_nvp_op(nvs, nvp)); + + case NVS_OP_DECODE: { + nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; + int32_t decode_len; + + /* try to read the size value from the stream */ + if (native->n_curr + sizeof (int32_t) > native->n_end) + return (EFAULT); + bcopy(native->n_curr, &decode_len, sizeof (int32_t)); + + /* sanity check the size value */ + if (decode_len < 0 || + decode_len > native->n_end - native->n_curr) + return (EFAULT); + + *size = decode_len; + + /* + * If at the end of the stream then move the cursor + * forward, otherwise nvpair_native_op() will read + * the entire nvpair at the same cursor position. + */ + if (*size == 0) + native->n_curr += sizeof (int32_t); + break; + } + + default: + return (EINVAL); + } + + return (0); +} + +static const nvs_ops_t nvs_native_ops = { + nvs_native_nvlist, + nvs_native_nvpair, + nvs_native_nvp_op, + nvs_native_nvp_size, + nvs_native_nvl_fini +}; + +static int +nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) +{ + nvs_native_t native; + int err; + + nvs->nvs_ops = &nvs_native_ops; + + if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t), + *buflen - sizeof (nvs_header_t))) != 0) + return (err); + + err = nvs_operation(nvs, nvl, buflen); + + nvs_native_destroy(nvs); + + return (err); +} + +/* + * XDR encoding functions + * + * An xdr packed nvlist is encoded as: + * + * - encoding methode and host endian (4 bytes) + * - nvl_version (4 bytes) + * - nvl_nvflag (4 bytes) + * + * - encoded nvpairs, the format of one xdr encoded nvpair is: + * - encoded size of the nvpair (4 bytes) + * - decoded size of the nvpair (4 bytes) + * - name string, (4 + sizeof(NV_ALIGN4(string)) + * a string is coded as size (4 bytes) and data + * - data type (4 bytes) + * - number of elements in the nvpair (4 bytes) + * - data + * + * - 2 zero's for end of the entire list (8 bytes) + */ +static int +nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen) +{ + /* xdr data must be 4 byte aligned */ + if ((ulong_t)buf % 4 != 0) + return (EFAULT); + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE); + nvs->nvs_private = xdr; + return (0); + case NVS_OP_DECODE: + xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE); + nvs->nvs_private = xdr; + return (0); + case NVS_OP_GETSIZE: + nvs->nvs_private = NULL; + return (0); + default: + return (EINVAL); + } +} + +static void +nvs_xdr_destroy(nvstream_t *nvs) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: + xdr_destroy((XDR *)nvs->nvs_private); + break; + default: + break; + } +} + +static int +nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size) +{ + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: + case NVS_OP_DECODE: { + XDR *xdr = nvs->nvs_private; + + if (!xdr_int(xdr, &nvl->nvl_version) || + !xdr_u_int(xdr, &nvl->nvl_nvflag)) + return (EFAULT); + break; + } + case NVS_OP_GETSIZE: { + /* + * 2 * 4 for nvl_version + nvl_nvflag + * and 8 for end of the entire list + */ + *size += 2 * 4 + 8; + break; + } + default: + return (EINVAL); + } + return (0); +} + +static int +nvs_xdr_nvl_fini(nvstream_t *nvs) +{ + if (nvs->nvs_op == NVS_OP_ENCODE) { + XDR *xdr = nvs->nvs_private; + int zero = 0; + + if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero)) + return (EFAULT); + } + + return (0); +} + +/* + * The format of xdr encoded nvpair is: + * encode_size, decode_size, name string, data type, nelem, data + */ +static int +nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp) +{ + data_type_t type; + char *buf; + char *buf_end = (char *)nvp + nvp->nvp_size; + int value_sz; + uint_t nelem, buflen; + bool_t ret = FALSE; + XDR *xdr = nvs->nvs_private; + + ASSERT(xdr != NULL && nvp != NULL); + + /* name string */ + if ((buf = NVP_NAME(nvp)) >= buf_end) + return (EFAULT); + buflen = buf_end - buf; + + if (!xdr_string(xdr, &buf, buflen - 1)) + return (EFAULT); + nvp->nvp_name_sz = strlen(buf) + 1; + + /* type and nelem */ + if (!xdr_int(xdr, (int *)&nvp->nvp_type) || + !xdr_int(xdr, &nvp->nvp_value_elem)) + return (EFAULT); + + type = NVP_TYPE(nvp); + nelem = nvp->nvp_value_elem; + + /* + * Verify type and nelem and get the value size. + * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY + * is the size of the string(s) excluded. + */ + if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0) + return (EFAULT); + + /* if there is no data to extract then return */ + if (nelem == 0) + return (0); + + /* value */ + if ((buf = NVP_VALUE(nvp)) >= buf_end) + return (EFAULT); + buflen = buf_end - buf; + + if (buflen < value_sz) + return (EFAULT); + + switch (type) { + case DATA_TYPE_NVLIST: + if (nvs_embedded(nvs, (void *)buf) == 0) + return (0); + break; + + case DATA_TYPE_NVLIST_ARRAY: + if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0) + return (0); + break; + + case DATA_TYPE_BOOLEAN: + ret = TRUE; + break; + + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + ret = xdr_char(xdr, buf); + break; + + case DATA_TYPE_INT16: + ret = xdr_short(xdr, (void *)buf); + break; + + case DATA_TYPE_UINT16: + ret = xdr_u_short(xdr, (void *)buf); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_INT32: + ret = xdr_int(xdr, (void *)buf); + break; + + case DATA_TYPE_UINT32: + ret = xdr_u_int(xdr, (void *)buf); + break; + + case DATA_TYPE_INT64: + ret = xdr_longlong_t(xdr, (void *)buf); + break; + + case DATA_TYPE_UINT64: + ret = xdr_u_longlong_t(xdr, (void *)buf); + break; + + case DATA_TYPE_HRTIME: + /* + * NOTE: must expose the definition of hrtime_t here + */ + ret = xdr_longlong_t(xdr, (void *)buf); + break; +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: + ret = xdr_double(xdr, (void *)buf); + break; +#endif + case DATA_TYPE_STRING: + ret = xdr_string(xdr, &buf, buflen - 1); + break; + + case DATA_TYPE_BYTE_ARRAY: + ret = xdr_opaque(xdr, buf, nelem); + break; + + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t), + (xdrproc_t)xdr_char); + break; + + case DATA_TYPE_INT16_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t), + sizeof (int16_t), (xdrproc_t)xdr_short); + break; + + case DATA_TYPE_UINT16_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t), + sizeof (uint16_t), (xdrproc_t)xdr_u_short); + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT32_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t), + sizeof (int32_t), (xdrproc_t)xdr_int); + break; + + case DATA_TYPE_UINT32_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t), + sizeof (uint32_t), (xdrproc_t)xdr_u_int); + break; + + case DATA_TYPE_INT64_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t), + sizeof (int64_t), (xdrproc_t)xdr_longlong_t); + break; + + case DATA_TYPE_UINT64_ARRAY: + ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t), + sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t); + break; + + case DATA_TYPE_STRING_ARRAY: { + size_t len = nelem * sizeof (uint64_t); + char **strp = (void *)buf; + int i; + + if (nvs->nvs_op == NVS_OP_DECODE) + bzero(buf, len); /* don't trust packed data */ + + for (i = 0; i < nelem; i++) { + if (buflen <= len) + return (EFAULT); + + buf += len; + buflen -= len; + + if (xdr_string(xdr, &buf, buflen - 1) != TRUE) + return (EFAULT); + + if (nvs->nvs_op == NVS_OP_DECODE) + strp[i] = buf; + len = strlen(buf) + 1; + } + ret = TRUE; + break; + } + default: + break; + } + + return (ret == TRUE ? 0 : EFAULT); +} + +static int +nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + data_type_t type = NVP_TYPE(nvp); + /* + * encode_size + decode_size + name string size + data type + nelem + * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + */ + uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4; + + switch (type) { + case DATA_TYPE_BOOLEAN: + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + nvp_sz += 4; /* 4 is the minimum xdr unit */ + break; + + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + case DATA_TYPE_HRTIME: +#if !defined(_KERNEL) + case DATA_TYPE_DOUBLE: +#endif + nvp_sz += 8; + break; + + case DATA_TYPE_STRING: + nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp))); + break; + + case DATA_TYPE_BYTE_ARRAY: + nvp_sz += NV_ALIGN4(NVP_NELEM(nvp)); + break; + + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp); + break; + + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp); + break; + + case DATA_TYPE_STRING_ARRAY: { + int i; + char **strs = (void *)NVP_VALUE(nvp); + + for (i = 0; i < NVP_NELEM(nvp); i++) + nvp_sz += 4 + NV_ALIGN4(strlen(strs[i])); + + break; + } + + case DATA_TYPE_NVLIST: + case DATA_TYPE_NVLIST_ARRAY: { + size_t nvsize = 0; + int old_nvs_op = nvs->nvs_op; + int err; + + nvs->nvs_op = NVS_OP_GETSIZE; + if (type == DATA_TYPE_NVLIST) + err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize); + else + err = nvs_embedded_nvl_array(nvs, nvp, &nvsize); + nvs->nvs_op = old_nvs_op; + + if (err != 0) + return (EINVAL); + + nvp_sz += nvsize; + break; + } + + default: + return (EINVAL); + } + + if (nvp_sz > INT32_MAX) + return (EINVAL); + + *size = nvp_sz; + + return (0); +} + + +/* + * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates + * the largest nvpair that could be encoded in the buffer. + * + * See comments above nvpair_xdr_op() for the format of xdr encoding. + * The size of a xdr packed nvpair without any data is 5 words. + * + * Using the size of the data directly as an estimate would be ok + * in all cases except one. If the data type is of DATA_TYPE_STRING_ARRAY + * then the actual nvpair has space for an array of pointers to index + * the strings. These pointers are not encoded into the packed xdr buffer. + * + * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are + * of length 0, then each string is endcoded in xdr format as a single word. + * Therefore when expanded to an nvpair there will be 2.25 word used for + * each string. (a int64_t allocated for pointer usage, and a single char + * for the null termination.) + * + * This is the calculation performed by the NVS_XDR_MAX_LEN macro. + */ +#define NVS_XDR_HDR_LEN ((size_t)(5 * 4)) +#define NVS_XDR_DATA_LEN(y) (((size_t)(y) <= NVS_XDR_HDR_LEN) ? \ + 0 : ((size_t)(y) - NVS_XDR_HDR_LEN)) +#define NVS_XDR_MAX_LEN(x) (NVP_SIZE_CALC(1, 0) + \ + (NVS_XDR_DATA_LEN(x) * 2) + \ + NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4))) + +static int +nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size) +{ + XDR *xdr = nvs->nvs_private; + int32_t encode_len, decode_len; + + switch (nvs->nvs_op) { + case NVS_OP_ENCODE: { + size_t nvsize; + + if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0) + return (EFAULT); + + decode_len = nvp->nvp_size; + encode_len = nvsize; + if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len)) + return (EFAULT); + + return (nvs_xdr_nvp_op(nvs, nvp)); + } + case NVS_OP_DECODE: { + struct xdr_bytesrec bytesrec; + + /* get the encode and decode size */ + if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len)) + return (EFAULT); + *size = decode_len; + + /* are we at the end of the stream? */ + if (*size == 0) + return (0); + + /* sanity check the size parameter */ + if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec)) + return (EFAULT); + + if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail)) + return (EFAULT); + break; + } + + default: + return (EINVAL); + } + return (0); +} + +static const struct nvs_ops nvs_xdr_ops = { + nvs_xdr_nvlist, + nvs_xdr_nvpair, + nvs_xdr_nvp_op, + nvs_xdr_nvp_size, + nvs_xdr_nvl_fini +}; + +static int +nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) +{ + XDR xdr; + int err; + + nvs->nvs_ops = &nvs_xdr_ops; + + if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t), + *buflen - sizeof (nvs_header_t))) != 0) + return (err); + + err = nvs_operation(nvs, nvl, buflen); + + nvs_xdr_destroy(nvs); + + return (err); +} diff --git a/common/nvpair/nvpair_alloc_fixed.c b/common/nvpair/nvpair_alloc_fixed.c new file mode 100644 index 000000000000..b1128eeb9bc3 --- /dev/null +++ b/common/nvpair/nvpair_alloc_fixed.c @@ -0,0 +1,120 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/stropts.h> +#include <sys/isa_defs.h> +#include <sys/nvpair.h> +#include <sys/sysmacros.h> +#if defined(_KERNEL) && !defined(_BOOT) +#include <sys/varargs.h> +#else +#include <stdarg.h> +#include <strings.h> +#endif + +/* + * This allocator is very simple. + * - it uses a pre-allocated buffer for memory allocations. + * - it does _not_ free memory in the pre-allocated buffer. + * + * The reason for the selected implemention is simplicity. + * This allocator is designed for the usage in interrupt context when + * the caller may not wait for free memory. + */ + +/* pre-allocated buffer for memory allocations */ +typedef struct nvbuf { + uintptr_t nvb_buf; /* address of pre-allocated buffer */ + uintptr_t nvb_lim; /* limit address in the buffer */ + uintptr_t nvb_cur; /* current address in the buffer */ +} nvbuf_t; + +/* + * Initialize the pre-allocated buffer allocator. The caller needs to supply + * + * buf address of pre-allocated buffer + * bufsz size of pre-allocated buffer + * + * nv_fixed_init() calculates the remaining members of nvbuf_t. + */ +static int +nv_fixed_init(nv_alloc_t *nva, va_list valist) +{ + uintptr_t base = va_arg(valist, uintptr_t); + uintptr_t lim = base + va_arg(valist, size_t); + nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t)); + + if (base == 0 || (uintptr_t)&nvb[1] > lim) + return (EINVAL); + + nvb->nvb_buf = (uintptr_t)&nvb[0]; + nvb->nvb_cur = (uintptr_t)&nvb[1]; + nvb->nvb_lim = lim; + nva->nva_arg = nvb; + + return (0); +} + +static void * +nv_fixed_alloc(nv_alloc_t *nva, size_t size) +{ + nvbuf_t *nvb = nva->nva_arg; + uintptr_t new = nvb->nvb_cur; + + if (size == 0 || new + size > nvb->nvb_lim) + return (NULL); + + nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t)); + + return ((void *)new); +} + +/*ARGSUSED*/ +static void +nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size) +{ + /* don't free memory in the pre-allocated buffer */ +} + +static void +nv_fixed_reset(nv_alloc_t *nva) +{ + nvbuf_t *nvb = nva->nva_arg; + + nvb->nvb_cur = (uintptr_t)&nvb[1]; +} + +const nv_alloc_ops_t nv_fixed_ops_def = { + nv_fixed_init, /* nv_ao_init() */ + NULL, /* nv_ao_fini() */ + nv_fixed_alloc, /* nv_ao_alloc() */ + nv_fixed_free, /* nv_ao_free() */ + nv_fixed_reset /* nv_ao_reset() */ +}; + +const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def; diff --git a/common/unicode/u8_textprep.c b/common/unicode/u8_textprep.c new file mode 100644 index 000000000000..8faf1a97e47e --- /dev/null +++ b/common/unicode/u8_textprep.c @@ -0,0 +1,2132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +/* + * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458). + * + * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F), + * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also + * the section 3C man pages. + * Interface stability: Committed. + */ + +#include <sys/types.h> +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/debug.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#else +#include <sys/u8_textprep.h> +#include <strings.h> +#endif /* _KERNEL */ +#include <sys/byteorder.h> +#include <sys/errno.h> +#include <sys/u8_textprep_data.h> + + +/* The maximum possible number of bytes in a UTF-8 character. */ +#define U8_MB_CUR_MAX (4) + +/* + * The maximum number of bytes needed for a UTF-8 character to cover + * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2. + */ +#define U8_MAX_BYTES_UCS2 (3) + +/* The maximum possible number of bytes in a Stream-Safe Text. */ +#define U8_STREAM_SAFE_TEXT_MAX (128) + +/* + * The maximum number of characters in a combining/conjoining sequence and + * the actual upperbound limit of a combining/conjoining sequence. + */ +#define U8_MAX_CHARS_A_SEQ (32) +#define U8_UPPER_LIMIT_IN_A_SEQ (31) + +/* The combining class value for Starter. */ +#define U8_COMBINING_CLASS_STARTER (0) + +/* + * Some Hangul related macros at below. + * + * The first and the last of Hangul syllables, Hangul Jamo Leading consonants, + * Vowels, and optional Trailing consonants in Unicode scalar values. + * + * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not + * the actual U+11A8. This is due to that the trailing consonant is optional + * and thus we are doing a pre-calculation of subtracting one. + * + * Each of 19 modern leading consonants has total 588 possible syllables since + * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for + * no trailing consonant case, i.e., 21 x 28 = 588. + * + * We also have bunch of Hangul related macros at below. Please bear in mind + * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is + * a Hangul Jamo or not but the value does not guarantee that it is a Hangul + * Jamo; it just guarantee that it will be most likely. + */ +#define U8_HANGUL_SYL_FIRST (0xAC00U) +#define U8_HANGUL_SYL_LAST (0xD7A3U) + +#define U8_HANGUL_JAMO_L_FIRST (0x1100U) +#define U8_HANGUL_JAMO_L_LAST (0x1112U) +#define U8_HANGUL_JAMO_V_FIRST (0x1161U) +#define U8_HANGUL_JAMO_V_LAST (0x1175U) +#define U8_HANGUL_JAMO_T_FIRST (0x11A7U) +#define U8_HANGUL_JAMO_T_LAST (0x11C2U) + +#define U8_HANGUL_V_COUNT (21) +#define U8_HANGUL_VT_COUNT (588) +#define U8_HANGUL_T_COUNT (28) + +#define U8_HANGUL_JAMO_1ST_BYTE (0xE1U) + +#define U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \ + (s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \ + (s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \ + (s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU)); + +#define U8_HANGUL_JAMO_L(u) \ + ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST) + +#define U8_HANGUL_JAMO_V(u) \ + ((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST) + +#define U8_HANGUL_JAMO_T(u) \ + ((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST) + +#define U8_HANGUL_JAMO(u) \ + ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST) + +#define U8_HANGUL_SYLLABLE(u) \ + ((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST) + +#define U8_HANGUL_COMPOSABLE_L_V(s, u) \ + ((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u))) + +#define U8_HANGUL_COMPOSABLE_LV_T(s, u) \ + ((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u))) + +/* The types of decomposition mappings. */ +#define U8_DECOMP_BOTH (0xF5U) +#define U8_DECOMP_CANONICAL (0xF6U) + +/* The indicator for 16-bit table. */ +#define U8_16BIT_TABLE_INDICATOR (0x8000U) + +/* The following are some convenience macros. */ +#define U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3) \ + (u) = ((uint32_t)(b1) & 0x0F) << 12 | ((uint32_t)(b2) & 0x3F) << 6 | \ + (uint32_t)(b3) & 0x3F; + +#define U8_SIMPLE_SWAP(a, b, t) \ + (t) = (a); \ + (a) = (b); \ + (b) = (t); + +#define U8_ASCII_TOUPPER(c) \ + (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c)) + +#define U8_ASCII_TOLOWER(c) \ + (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c)) + +#define U8_ISASCII(c) (((uchar_t)(c)) < 0x80U) +/* + * The following macro assumes that the two characters that are to be + * swapped are adjacent to each other and 'a' comes before 'b'. + * + * If the assumptions are not met, then, the macro will fail. + */ +#define U8_SWAP_COMB_MARKS(a, b) \ + for (k = 0; k < disp[(a)]; k++) \ + u8t[k] = u8s[start[(a)] + k]; \ + for (k = 0; k < disp[(b)]; k++) \ + u8s[start[(a)] + k] = u8s[start[(b)] + k]; \ + start[(b)] = start[(a)] + disp[(b)]; \ + for (k = 0; k < disp[(a)]; k++) \ + u8s[start[(b)] + k] = u8t[k]; \ + U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \ + U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc); + +/* The possible states during normalization. */ +typedef enum { + U8_STATE_START = 0, + U8_STATE_HANGUL_L = 1, + U8_STATE_HANGUL_LV = 2, + U8_STATE_HANGUL_LVT = 3, + U8_STATE_HANGUL_V = 4, + U8_STATE_HANGUL_T = 5, + U8_STATE_COMBINING_MARK = 6 +} u8_normalization_states_t; + +/* + * The three vectors at below are used to check bytes of a given UTF-8 + * character are valid and not containing any malformed byte values. + * + * We used to have a quite relaxed UTF-8 binary representation but then there + * was some security related issues and so the Unicode Consortium defined + * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it + * one more time at the Unicode 3.2. The following three tables are based on + * that. + */ + +#define U8_ILLEGAL_NEXT_BYTE_COMMON(c) ((c) < 0x80 || (c) > 0xBF) + +#define I_ U8_ILLEGAL_CHAR +#define O_ U8_OUT_OF_RANGE_CHAR + +const int8_t u8_number_of_bytes[0x100] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +/* 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF */ + I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, + +/* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */ + I_, I_, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + +/* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + +/* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + +/* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */ + 4, 4, 4, 4, 4, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, +}; + +#undef I_ +#undef O_ + +const uint8_t u8_valid_min_2nd_byte[0x100] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* C0 C1 C2 C3 C4 C5 C6 C7 */ + 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* C8 C9 CA CB CC CD CE CF */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* D0 D1 D2 D3 D4 D5 D6 D7 */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* D8 D9 DA DB DC DD DE DF */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* E0 E1 E2 E3 E4 E5 E6 E7 */ + 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* E8 E9 EA EB EC ED EE EF */ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +/* F0 F1 F2 F3 F4 F5 F6 F7 */ + 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const uint8_t u8_valid_max_2nd_byte[0x100] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* C0 C1 C2 C3 C4 C5 C6 C7 */ + 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* C8 C9 CA CB CC CD CE CF */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* D0 D1 D2 D3 D4 D5 D6 D7 */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* D8 D9 DA DB DC DD DE DF */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* E0 E1 E2 E3 E4 E5 E6 E7 */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, +/* E8 E9 EA EB EC ED EE EF */ + 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf, +/* F0 F1 F2 F3 F4 F5 F6 F7 */ + 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +/* + * The u8_validate() validates on the given UTF-8 character string and + * calculate the byte length. It is quite similar to mblen(3C) except that + * this will validate against the list of characters if required and + * specific to UTF-8 and Unicode. + */ +int +u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum) +{ + uchar_t *ib; + uchar_t *ibtail; + uchar_t **p; + uchar_t *s1; + uchar_t *s2; + uchar_t f; + int sz; + size_t i; + int ret_val; + boolean_t second; + boolean_t no_need_to_validate_entire; + boolean_t check_additional; + boolean_t validate_ucs2_range_only; + + if (! u8str) + return (0); + + ib = (uchar_t *)u8str; + ibtail = ib + n; + + ret_val = 0; + + no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE); + check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL; + validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE; + + while (ib < ibtail) { + /* + * The first byte of a UTF-8 character tells how many + * bytes will follow for the character. If the first byte + * is an illegal byte value or out of range value, we just + * return -1 with an appropriate error number. + */ + sz = u8_number_of_bytes[*ib]; + if (sz == U8_ILLEGAL_CHAR) { + *errnum = EILSEQ; + return (-1); + } + + if (sz == U8_OUT_OF_RANGE_CHAR || + (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) { + *errnum = ERANGE; + return (-1); + } + + /* + * If we don't have enough bytes to check on, that's also + * an error. As you can see, we give illegal byte sequence + * checking higher priority then EINVAL cases. + */ + if ((ibtail - ib) < sz) { + *errnum = EINVAL; + return (-1); + } + + if (sz == 1) { + ib++; + ret_val++; + } else { + /* + * Check on the multi-byte UTF-8 character. For more + * details on this, see comment added for the used + * data structures at the beginning of the file. + */ + f = *ib++; + ret_val++; + second = B_TRUE; + for (i = 1; i < sz; i++) { + if (second) { + if (*ib < u8_valid_min_2nd_byte[f] || + *ib > u8_valid_max_2nd_byte[f]) { + *errnum = EILSEQ; + return (-1); + } + second = B_FALSE; + } else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) { + *errnum = EILSEQ; + return (-1); + } + ib++; + ret_val++; + } + } + + if (check_additional) { + for (p = (uchar_t **)list, i = 0; p[i]; i++) { + s1 = ib - sz; + s2 = p[i]; + while (s1 < ib) { + if (*s1 != *s2 || *s2 == '\0') + break; + s1++; + s2++; + } + + if (s1 >= ib && *s2 == '\0') { + *errnum = EBADF; + return (-1); + } + } + } + + if (no_need_to_validate_entire) + break; + } + + return (ret_val); +} + +/* + * The do_case_conv() looks at the mapping tables and returns found + * bytes if any. If not found, the input bytes are returned. The function + * always terminate the return bytes with a null character assuming that + * there are plenty of room to do so. + * + * The case conversions are simple case conversions mapping a character to + * another character as specified in the Unicode data. The byte size of + * the mapped character could be different from that of the input character. + * + * The return value is the byte length of the returned character excluding + * the terminating null byte. + */ +static size_t +do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper) +{ + size_t i; + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b3_tbl; + uint16_t b3_base; + uint16_t b4 = 0; + size_t start_id; + size_t end_id; + + /* + * At this point, the only possible values for sz are 2, 3, and 4. + * The u8s should point to a vector that is well beyond the size of + * 5 bytes. + */ + if (sz == 2) { + b3 = u8s[0] = s[0]; + b4 = u8s[1] = s[1]; + } else if (sz == 3) { + b2 = u8s[0] = s[0]; + b3 = u8s[1] = s[1]; + b4 = u8s[2] = s[2]; + } else if (sz == 4) { + b1 = u8s[0] = s[0]; + b2 = u8s[1] = s[1]; + b3 = u8s[2] = s[2]; + b4 = u8s[3] = s[3]; + } else { + /* This is not possible but just in case as a fallback. */ + if (is_it_toupper) + *u8s = U8_ASCII_TOUPPER(*s); + else + *u8s = U8_ASCII_TOLOWER(*s); + u8s[1] = '\0'; + + return (1); + } + u8s[sz] = '\0'; + + /* + * Let's find out if we have a corresponding character. + */ + b1 = u8_common_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + b2 = u8_case_common_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + if (is_it_toupper) { + b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1]; + + /* Either there is no match or an error at the table. */ + if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX) + return ((size_t)sz); + + b3_base = u8_toupper_b3_tbl[uv][b2][b3].base; + + for (i = 0; start_id < end_id; start_id++) + u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id]; + } else { + b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1]; + + if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX) + return ((size_t)sz); + + b3_base = u8_tolower_b3_tbl[uv][b2][b3].base; + + for (i = 0; start_id < end_id; start_id++) + u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id]; + } + + /* + * If i is still zero, that means there is no corresponding character. + */ + if (i == 0) + return ((size_t)sz); + + u8s[i] = '\0'; + + return (i); +} + +/* + * The do_case_compare() function compares the two input strings, s1 and s2, + * one character at a time doing case conversions if applicable and return + * the comparison result as like strcmp(). + * + * Since, in empirical sense, most of text data are 7-bit ASCII characters, + * we treat the 7-bit ASCII characters as a special case trying to yield + * faster processing time. + */ +static int +do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, + size_t n2, boolean_t is_it_toupper, int *errnum) +{ + int f; + int sz1; + int sz2; + size_t j; + size_t i1; + size_t i2; + uchar_t u8s1[U8_MB_CUR_MAX + 1]; + uchar_t u8s2[U8_MB_CUR_MAX + 1]; + + i1 = i2 = 0; + while (i1 < n1 && i2 < n2) { + /* + * Find out what would be the byte length for this UTF-8 + * character at string s1 and also find out if this is + * an illegal start byte or not and if so, issue a proper + * error number and yet treat this byte as a character. + */ + sz1 = u8_number_of_bytes[*s1]; + if (sz1 < 0) { + *errnum = EILSEQ; + sz1 = 1; + } + + /* + * For 7-bit ASCII characters mainly, we do a quick case + * conversion right at here. + * + * If we don't have enough bytes for this character, issue + * an EINVAL error and use what are available. + * + * If we have enough bytes, find out if there is + * a corresponding uppercase character and if so, copy over + * the bytes for a comparison later. If there is no + * corresponding uppercase character, then, use what we have + * for the comparison. + */ + if (sz1 == 1) { + if (is_it_toupper) + u8s1[0] = U8_ASCII_TOUPPER(*s1); + else + u8s1[0] = U8_ASCII_TOLOWER(*s1); + s1++; + u8s1[1] = '\0'; + } else if ((i1 + sz1) > n1) { + *errnum = EINVAL; + for (j = 0; (i1 + j) < n1; ) + u8s1[j++] = *s1++; + u8s1[j] = '\0'; + } else { + (void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper); + s1 += sz1; + } + + /* Do the same for the string s2. */ + sz2 = u8_number_of_bytes[*s2]; + if (sz2 < 0) { + *errnum = EILSEQ; + sz2 = 1; + } + + if (sz2 == 1) { + if (is_it_toupper) + u8s2[0] = U8_ASCII_TOUPPER(*s2); + else + u8s2[0] = U8_ASCII_TOLOWER(*s2); + s2++; + u8s2[1] = '\0'; + } else if ((i2 + sz2) > n2) { + *errnum = EINVAL; + for (j = 0; (i2 + j) < n2; ) + u8s2[j++] = *s2++; + u8s2[j] = '\0'; + } else { + (void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper); + s2 += sz2; + } + + /* Now compare the two characters. */ + if (sz1 == 1 && sz2 == 1) { + if (*u8s1 > *u8s2) + return (1); + if (*u8s1 < *u8s2) + return (-1); + } else { + f = strcmp((const char *)u8s1, (const char *)u8s2); + if (f != 0) + return (f); + } + + /* + * They were the same. Let's move on to the next + * characters then. + */ + i1 += sz1; + i2 += sz2; + } + + /* + * We compared until the end of either or both strings. + * + * If we reached to or went over the ends for the both, that means + * they are the same. + * + * If we reached only one of the two ends, that means the other string + * has something which then the fact can be used to determine + * the return value. + */ + if (i1 >= n1) { + if (i2 >= n2) + return (0); + return (-1); + } + return (1); +} + +/* + * The combining_class() function checks on the given bytes and find out + * the corresponding Unicode combining class value. The return value 0 means + * it is a Starter. Any illegal UTF-8 character will also be treated as + * a Starter. + */ +static uchar_t +combining_class(size_t uv, uchar_t *s, size_t sz) +{ + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b4 = 0; + + if (sz == 1 || sz > 4) + return (0); + + if (sz == 2) { + b3 = s[0]; + b4 = s[1]; + } else if (sz == 3) { + b2 = s[0]; + b3 = s[1]; + b4 = s[2]; + } else if (sz == 4) { + b1 = s[0]; + b2 = s[1]; + b3 = s[2]; + b4 = s[3]; + } + + b1 = u8_common_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return (0); + + b2 = u8_combining_class_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return (0); + + b3 = u8_combining_class_b3_tbl[uv][b2][b3]; + if (b3 == U8_TBL_ELEMENT_NOT_DEF) + return (0); + + return (u8_combining_class_b4_tbl[uv][b3][b4]); +} + +/* + * The do_decomp() function finds out a matching decomposition if any + * and return. If there is no match, the input bytes are copied and returned. + * The function also checks if there is a Hangul, decomposes it if necessary + * and returns. + * + * To save time, a single byte 7-bit ASCII character should be handled by + * the caller. + * + * The function returns the number of bytes returned sans always terminating + * the null byte. It will also return a state that will tell if there was + * a Hangul character decomposed which then will be used by the caller. + */ +static size_t +do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz, + boolean_t canonical_decomposition, u8_normalization_states_t *state) +{ + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b3_tbl; + uint16_t b3_base; + uint16_t b4 = 0; + size_t start_id; + size_t end_id; + size_t i; + uint32_t u1; + + if (sz == 2) { + b3 = u8s[0] = s[0]; + b4 = u8s[1] = s[1]; + u8s[2] = '\0'; + } else if (sz == 3) { + /* Convert it to a Unicode scalar value. */ + U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]); + + /* + * If this is a Hangul syllable, we decompose it into + * a leading consonant, a vowel, and an optional trailing + * consonant and then return. + */ + if (U8_HANGUL_SYLLABLE(u1)) { + u1 -= U8_HANGUL_SYL_FIRST; + + b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT; + b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT) + / U8_HANGUL_T_COUNT; + b3 = u1 % U8_HANGUL_T_COUNT; + + U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1); + U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2); + if (b3) { + b3 += U8_HANGUL_JAMO_T_FIRST; + U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3); + + u8s[9] = '\0'; + *state = U8_STATE_HANGUL_LVT; + return (9); + } + + u8s[6] = '\0'; + *state = U8_STATE_HANGUL_LV; + return (6); + } + + b2 = u8s[0] = s[0]; + b3 = u8s[1] = s[1]; + b4 = u8s[2] = s[2]; + u8s[3] = '\0'; + + /* + * If this is a Hangul Jamo, we know there is nothing + * further that we can decompose. + */ + if (U8_HANGUL_JAMO_L(u1)) { + *state = U8_STATE_HANGUL_L; + return (3); + } + + if (U8_HANGUL_JAMO_V(u1)) { + if (*state == U8_STATE_HANGUL_L) + *state = U8_STATE_HANGUL_LV; + else + *state = U8_STATE_HANGUL_V; + return (3); + } + + if (U8_HANGUL_JAMO_T(u1)) { + if (*state == U8_STATE_HANGUL_LV) + *state = U8_STATE_HANGUL_LVT; + else + *state = U8_STATE_HANGUL_T; + return (3); + } + } else if (sz == 4) { + b1 = u8s[0] = s[0]; + b2 = u8s[1] = s[1]; + b3 = u8s[2] = s[2]; + b4 = u8s[3] = s[3]; + u8s[4] = '\0'; + } else { + /* + * This is a fallback and should not happen if the function + * was called properly. + */ + u8s[0] = s[0]; + u8s[1] = '\0'; + *state = U8_STATE_START; + return (1); + } + + /* + * At this point, this rountine does not know what it would get. + * The caller should sort it out if the state isn't a Hangul one. + */ + *state = U8_STATE_START; + + /* Try to find matching decomposition mapping byte sequence. */ + b1 = u8_common_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + b2 = u8_decomp_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return ((size_t)sz); + + /* + * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR + * which is 0x8000, this means we couldn't fit the mappings into + * the cardinality of a unsigned byte. + */ + if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) { + b3_tbl -= U8_16BIT_TABLE_INDICATOR; + start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4]; + end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1]; + } else { + start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1]; + } + + /* This also means there wasn't any matching decomposition. */ + if (start_id >= end_id) + return ((size_t)sz); + + /* + * The final table for decomposition mappings has three types of + * byte sequences depending on whether a mapping is for compatibility + * decomposition, canonical decomposition, or both like the following: + * + * (1) Compatibility decomposition mappings: + * + * +---+---+-...-+---+ + * | B0| B1| ... | Bm| + * +---+---+-...-+---+ + * + * The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH). + * + * (2) Canonical decomposition mappings: + * + * +---+---+---+-...-+---+ + * | T | b0| b1| ... | bn| + * +---+---+---+-...-+---+ + * + * where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL). + * + * (3) Both mappings: + * + * +---+---+---+---+-...-+---+---+---+-...-+---+ + * | T | D | b0| b1| ... | bn| B0| B1| ... | Bm| + * +---+---+---+---+-...-+---+---+---+-...-+---+ + * + * where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement + * byte, b0 to bn are canonical mapping bytes and B0 to Bm are + * compatibility mapping bytes. + * + * Note that compatibility decomposition means doing recursive + * decompositions using both compatibility decomposition mappings and + * canonical decomposition mappings. On the other hand, canonical + * decomposition means doing recursive decompositions using only + * canonical decomposition mappings. Since the table we have has gone + * through the recursions already, we do not need to do so during + * runtime, i.e., the table has been completely flattened out + * already. + */ + + b3_base = u8_decomp_b3_tbl[uv][b2][b3].base; + + /* Get the type, T, of the byte sequence. */ + b1 = u8_decomp_final_tbl[uv][b3_base + start_id]; + + /* + * If necessary, adjust start_id, end_id, or both. Note that if + * this is compatibility decomposition mapping, there is no + * adjustment. + */ + if (canonical_decomposition) { + /* Is the mapping only for compatibility decomposition? */ + if (b1 < U8_DECOMP_BOTH) + return ((size_t)sz); + + start_id++; + + if (b1 == U8_DECOMP_BOTH) { + end_id = start_id + + u8_decomp_final_tbl[uv][b3_base + start_id]; + start_id++; + } + } else { + /* + * Unless this is a compatibility decomposition mapping, + * we adjust the start_id. + */ + if (b1 == U8_DECOMP_BOTH) { + start_id++; + start_id += u8_decomp_final_tbl[uv][b3_base + start_id]; + } else if (b1 == U8_DECOMP_CANONICAL) { + start_id++; + } + } + + for (i = 0; start_id < end_id; start_id++) + u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id]; + u8s[i] = '\0'; + + return (i); +} + +/* + * The find_composition_start() function uses the character bytes given and + * find out the matching composition mappings if any and return the address + * to the composition mappings as explained in the do_composition(). + */ +static uchar_t * +find_composition_start(size_t uv, uchar_t *s, size_t sz) +{ + uint16_t b1 = 0; + uint16_t b2 = 0; + uint16_t b3 = 0; + uint16_t b3_tbl; + uint16_t b3_base; + uint16_t b4 = 0; + size_t start_id; + size_t end_id; + + if (sz == 1) { + b4 = s[0]; + } else if (sz == 2) { + b3 = s[0]; + b4 = s[1]; + } else if (sz == 3) { + b2 = s[0]; + b3 = s[1]; + b4 = s[2]; + } else if (sz == 4) { + b1 = s[0]; + b2 = s[1]; + b3 = s[2]; + b4 = s[3]; + } else { + /* + * This is a fallback and should not happen if the function + * was called properly. + */ + return (NULL); + } + + b1 = u8_composition_b1_tbl[uv][b1]; + if (b1 == U8_TBL_ELEMENT_NOT_DEF) + return (NULL); + + b2 = u8_composition_b2_tbl[uv][b1][b2]; + if (b2 == U8_TBL_ELEMENT_NOT_DEF) + return (NULL); + + b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id; + if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) + return (NULL); + + if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) { + b3_tbl -= U8_16BIT_TABLE_INDICATOR; + start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4]; + end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1]; + } else { + start_id = u8_composition_b4_tbl[uv][b3_tbl][b4]; + end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1]; + } + + if (start_id >= end_id) + return (NULL); + + b3_base = u8_composition_b3_tbl[uv][b2][b3].base; + + return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id])); +} + +/* + * The blocked() function checks on the combining class values of previous + * characters in this sequence and return whether it is blocked or not. + */ +static boolean_t +blocked(uchar_t *comb_class, size_t last) +{ + uchar_t my_comb_class; + size_t i; + + my_comb_class = comb_class[last]; + for (i = 1; i < last; i++) + if (comb_class[i] >= my_comb_class || + comb_class[i] == U8_COMBINING_CLASS_STARTER) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * The do_composition() reads the character string pointed by 's' and + * do necessary canonical composition and then copy over the result back to + * the 's'. + * + * The input argument 's' cannot contain more than 32 characters. + */ +static size_t +do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start, + uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast) +{ + uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t tc[U8_MB_CUR_MAX]; + uint8_t saved_marks[U8_MAX_CHARS_A_SEQ]; + size_t saved_marks_count; + uchar_t *p; + uchar_t *saved_p; + uchar_t *q; + size_t i; + size_t saved_i; + size_t j; + size_t k; + size_t l; + size_t C; + size_t saved_l; + size_t size; + uint32_t u1; + uint32_t u2; + boolean_t match_not_found = B_TRUE; + + /* + * This should never happen unless the callers are doing some strange + * and unexpected things. + * + * The "last" is the index pointing to the last character not last + 1. + */ + if (last >= U8_MAX_CHARS_A_SEQ) + last = U8_UPPER_LIMIT_IN_A_SEQ; + + for (i = l = 0; i <= last; i++) { + /* + * The last or any non-Starters at the beginning, we don't + * have any chance to do composition and so we just copy them + * to the temporary buffer. + */ + if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) { +SAVE_THE_CHAR: + p = s + start[i]; + size = disp[i]; + for (k = 0; k < size; k++) + t[l++] = *p++; + continue; + } + + /* + * If this could be a start of Hangul Jamos, then, we try to + * conjoin them. + */ + if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) { + U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]], + s[start[i] + 1], s[start[i] + 2]); + U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3], + s[start[i] + 4], s[start[i] + 5]); + + if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) { + u1 -= U8_HANGUL_JAMO_L_FIRST; + u2 -= U8_HANGUL_JAMO_V_FIRST; + u1 = U8_HANGUL_SYL_FIRST + + (u1 * U8_HANGUL_V_COUNT + u2) * + U8_HANGUL_T_COUNT; + + i += 2; + if (i <= last) { + U8_PUT_3BYTES_INTO_UTF32(u2, + s[start[i]], s[start[i] + 1], + s[start[i] + 2]); + + if (U8_HANGUL_JAMO_T(u2)) { + u1 += u2 - + U8_HANGUL_JAMO_T_FIRST; + i++; + } + } + + U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1); + i--; + l += 3; + continue; + } + } + + /* + * Let's then find out if this Starter has composition + * mapping. + */ + p = find_composition_start(uv, s + start[i], disp[i]); + if (p == NULL) + goto SAVE_THE_CHAR; + + /* + * We have a Starter with composition mapping and the next + * character is a non-Starter. Let's try to find out if + * we can do composition. + */ + + saved_p = p; + saved_i = i; + saved_l = l; + saved_marks_count = 0; + +TRY_THE_NEXT_MARK: + q = s + start[++i]; + size = disp[i]; + + /* + * The next for() loop compares the non-Starter pointed by + * 'q' with the possible (joinable) characters pointed by 'p'. + * + * The composition final table entry pointed by the 'p' + * looks like the following: + * + * +---+---+---+-...-+---+---+---+---+-...-+---+---+ + * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F | + * +---+---+---+-...-+---+---+---+---+-...-+---+---+ + * + * where C is the count byte indicating the number of + * mapping pairs where each pair would be look like + * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second + * character of a canonical decomposition and the B0-Bm are + * the bytes of a matching composite character. The F is + * a filler byte after each character as the separator. + */ + + match_not_found = B_TRUE; + + for (C = *p++; C > 0; C--) { + for (k = 0; k < size; p++, k++) + if (*p != q[k]) + break; + + /* Have we found it? */ + if (k >= size && *p == U8_TBL_ELEMENT_FILLER) { + match_not_found = B_FALSE; + + l = saved_l; + + while (*++p != U8_TBL_ELEMENT_FILLER) + t[l++] = *p; + + break; + } + + /* We didn't find; skip to the next pair. */ + if (*p != U8_TBL_ELEMENT_FILLER) + while (*++p != U8_TBL_ELEMENT_FILLER) + ; + while (*++p != U8_TBL_ELEMENT_FILLER) + ; + p++; + } + + /* + * If there was no match, we will need to save the combining + * mark for later appending. After that, if the next one + * is a non-Starter and not blocked, then, we try once + * again to do composition with the next non-Starter. + * + * If there was no match and this was a Starter, then, + * this is a new start. + * + * If there was a match and a composition done and we have + * more to check on, then, we retrieve a new composition final + * table entry for the composite and then try to do the + * composition again. + */ + + if (match_not_found) { + if (comb_class[i] == U8_COMBINING_CLASS_STARTER) { + i--; + goto SAVE_THE_CHAR; + } + + saved_marks[saved_marks_count++] = i; + } + + if (saved_l == l) { + while (i < last) { + if (blocked(comb_class, i + 1)) + saved_marks[saved_marks_count++] = ++i; + else + break; + } + if (i < last) { + p = saved_p; + goto TRY_THE_NEXT_MARK; + } + } else if (i < last) { + p = find_composition_start(uv, t + saved_l, + l - saved_l); + if (p != NULL) { + saved_p = p; + goto TRY_THE_NEXT_MARK; + } + } + + /* + * There is no more composition possible. + * + * If there was no composition what so ever then we copy + * over the original Starter and then append any non-Starters + * remaining at the target string sequentially after that. + */ + + if (saved_l == l) { + p = s + start[saved_i]; + size = disp[saved_i]; + for (j = 0; j < size; j++) + t[l++] = *p++; + } + + for (k = 0; k < saved_marks_count; k++) { + p = s + start[saved_marks[k]]; + size = disp[saved_marks[k]]; + for (j = 0; j < size; j++) + t[l++] = *p++; + } + } + + /* + * If the last character is a Starter and if we have a character + * (possibly another Starter) that can be turned into a composite, + * we do so and we do so until there is no more of composition + * possible. + */ + if (comb_class[last] == U8_COMBINING_CLASS_STARTER) { + p = *os; + saved_l = l - disp[last]; + + while (p < oslast) { + size = u8_number_of_bytes[*p]; + if (size <= 1 || (p + size) > oslast) + break; + + saved_p = p; + + for (i = 0; i < size; i++) + tc[i] = *p++; + + q = find_composition_start(uv, t + saved_l, + l - saved_l); + if (q == NULL) { + p = saved_p; + break; + } + + match_not_found = B_TRUE; + + for (C = *q++; C > 0; C--) { + for (k = 0; k < size; q++, k++) + if (*q != tc[k]) + break; + + if (k >= size && *q == U8_TBL_ELEMENT_FILLER) { + match_not_found = B_FALSE; + + l = saved_l; + + while (*++q != U8_TBL_ELEMENT_FILLER) { + /* + * This is practically + * impossible but we don't + * want to take any chances. + */ + if (l >= + U8_STREAM_SAFE_TEXT_MAX) { + p = saved_p; + goto SAFE_RETURN; + } + t[l++] = *q; + } + + break; + } + + if (*q != U8_TBL_ELEMENT_FILLER) + while (*++q != U8_TBL_ELEMENT_FILLER) + ; + while (*++q != U8_TBL_ELEMENT_FILLER) + ; + q++; + } + + if (match_not_found) { + p = saved_p; + break; + } + } +SAFE_RETURN: + *os = p; + } + + /* + * Now we copy over the temporary string to the target string. + * Since composition always reduces the number of characters or + * the number of characters stay, we don't need to worry about + * the buffer overflow here. + */ + for (i = 0; i < l; i++) + s[i] = t[i]; + s[l] = '\0'; + + return (l); +} + +/* + * The collect_a_seq() function checks on the given string s, collect + * a sequence of characters at u8s, and return the sequence. While it collects + * a sequence, it also applies case conversion, canonical or compatibility + * decomposition, canonical decomposition, or some or all of them and + * in that order. + * + * The collected sequence cannot be bigger than 32 characters since if + * it is having more than 31 characters, the sequence will be terminated + * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into + * a Stream-Safe Text. The collected sequence is always terminated with + * a null byte and the return value is the byte length of the sequence + * including 0. The return value does not include the terminating + * null byte. + */ +static size_t +collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast, + boolean_t is_it_toupper, + boolean_t is_it_tolower, + boolean_t canonical_decomposition, + boolean_t compatibility_decomposition, + boolean_t canonical_composition, + int *errnum, u8_normalization_states_t *state) +{ + uchar_t *s; + int sz; + int saved_sz; + size_t i; + size_t j; + size_t k; + size_t l; + uchar_t comb_class[U8_MAX_CHARS_A_SEQ]; + uchar_t disp[U8_MAX_CHARS_A_SEQ]; + uchar_t start[U8_MAX_CHARS_A_SEQ]; + uchar_t u8t[U8_MB_CUR_MAX]; + uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t tc; + size_t last; + size_t saved_last; + uint32_t u1; + + /* + * Save the source string pointer which we will return a changed + * pointer if we do processing. + */ + s = *source; + + /* + * The following is a fallback for just in case callers are not + * checking the string boundaries before the calling. + */ + if (s >= slast) { + u8s[0] = '\0'; + + return (0); + } + + /* + * As the first thing, let's collect a character and do case + * conversion if necessary. + */ + + sz = u8_number_of_bytes[*s]; + + if (sz < 0) { + *errnum = EILSEQ; + + u8s[0] = *s++; + u8s[1] = '\0'; + + *source = s; + + return (1); + } + + if (sz == 1) { + if (is_it_toupper) + u8s[0] = U8_ASCII_TOUPPER(*s); + else if (is_it_tolower) + u8s[0] = U8_ASCII_TOLOWER(*s); + else + u8s[0] = *s; + s++; + u8s[1] = '\0'; + } else if ((s + sz) > slast) { + *errnum = EINVAL; + + for (i = 0; s < slast; ) + u8s[i++] = *s++; + u8s[i] = '\0'; + + *source = s; + + return (i); + } else { + if (is_it_toupper || is_it_tolower) { + i = do_case_conv(uv, u8s, s, sz, is_it_toupper); + s += sz; + sz = i; + } else { + for (i = 0; i < sz; ) + u8s[i++] = *s++; + u8s[i] = '\0'; + } + } + + /* + * And then canonical/compatibility decomposition followed by + * an optional canonical composition. Please be noted that + * canonical composition is done only when a decomposition is + * done. + */ + if (canonical_decomposition || compatibility_decomposition) { + if (sz == 1) { + *state = U8_STATE_START; + + saved_sz = 1; + + comb_class[0] = 0; + start[0] = 0; + disp[0] = 1; + + last = 1; + } else { + saved_sz = do_decomp(uv, u8s, u8s, sz, + canonical_decomposition, state); + + last = 0; + + for (i = 0; i < saved_sz; ) { + sz = u8_number_of_bytes[u8s[i]]; + + comb_class[last] = combining_class(uv, + u8s + i, sz); + start[last] = i; + disp[last] = sz; + + last++; + i += sz; + } + + /* + * Decomposition yields various Hangul related + * states but not on combining marks. We need to + * find out at here by checking on the last + * character. + */ + if (*state == U8_STATE_START) { + if (comb_class[last - 1]) + *state = U8_STATE_COMBINING_MARK; + } + } + + saved_last = last; + + while (s < slast) { + sz = u8_number_of_bytes[*s]; + + /* + * If this is an illegal character, an incomplete + * character, or an 7-bit ASCII Starter character, + * then we have collected a sequence; break and let + * the next call deal with the two cases. + * + * Note that this is okay only if you are using this + * function with a fixed length string, not on + * a buffer with multiple calls of one chunk at a time. + */ + if (sz <= 1) { + break; + } else if ((s + sz) > slast) { + break; + } else { + /* + * If the previous character was a Hangul Jamo + * and this character is a Hangul Jamo that + * can be conjoined, we collect the Jamo. + */ + if (*s == U8_HANGUL_JAMO_1ST_BYTE) { + U8_PUT_3BYTES_INTO_UTF32(u1, + *s, *(s + 1), *(s + 2)); + + if (U8_HANGUL_COMPOSABLE_L_V(*state, + u1)) { + i = 0; + *state = U8_STATE_HANGUL_LV; + goto COLLECT_A_HANGUL; + } + + if (U8_HANGUL_COMPOSABLE_LV_T(*state, + u1)) { + i = 0; + *state = U8_STATE_HANGUL_LVT; + goto COLLECT_A_HANGUL; + } + } + + /* + * Regardless of whatever it was, if this is + * a Starter, we don't collect the character + * since that's a new start and we will deal + * with it at the next time. + */ + i = combining_class(uv, s, sz); + if (i == U8_COMBINING_CLASS_STARTER) + break; + + /* + * We know the current character is a combining + * mark. If the previous character wasn't + * a Starter (not Hangul) or a combining mark, + * then, we don't collect this combining mark. + */ + if (*state != U8_STATE_START && + *state != U8_STATE_COMBINING_MARK) + break; + + *state = U8_STATE_COMBINING_MARK; +COLLECT_A_HANGUL: + /* + * If we collected a Starter and combining + * marks up to 30, i.e., total 31 characters, + * then, we terminate this degenerately long + * combining sequence with a U+034F COMBINING + * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in + * UTF-8 and turn this into a Stream-Safe + * Text. This will be extremely rare but + * possible. + * + * The following will also guarantee that + * we are not writing more than 32 characters + * plus a NULL at u8s[]. + */ + if (last >= U8_UPPER_LIMIT_IN_A_SEQ) { +TURN_STREAM_SAFE: + *state = U8_STATE_START; + comb_class[last] = 0; + start[last] = saved_sz; + disp[last] = 2; + last++; + + u8s[saved_sz++] = 0xCD; + u8s[saved_sz++] = 0x8F; + + break; + } + + /* + * Some combining marks also do decompose into + * another combining mark or marks. + */ + if (*state == U8_STATE_COMBINING_MARK) { + k = last; + l = sz; + i = do_decomp(uv, uts, s, sz, + canonical_decomposition, state); + for (j = 0; j < i; ) { + sz = u8_number_of_bytes[uts[j]]; + + comb_class[last] = + combining_class(uv, + uts + j, sz); + start[last] = saved_sz + j; + disp[last] = sz; + + last++; + if (last >= + U8_UPPER_LIMIT_IN_A_SEQ) { + last = k; + goto TURN_STREAM_SAFE; + } + j += sz; + } + + *state = U8_STATE_COMBINING_MARK; + sz = i; + s += l; + + for (i = 0; i < sz; i++) + u8s[saved_sz++] = uts[i]; + } else { + comb_class[last] = i; + start[last] = saved_sz; + disp[last] = sz; + last++; + + for (i = 0; i < sz; i++) + u8s[saved_sz++] = *s++; + } + + /* + * If this is U+0345 COMBINING GREEK + * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a., + * iota subscript, and need to be converted to + * uppercase letter, convert it to U+0399 GREEK + * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8), + * i.e., convert to capital adscript form as + * specified in the Unicode standard. + * + * This is the only special case of (ambiguous) + * case conversion at combining marks and + * probably the standard will never have + * anything similar like this in future. + */ + if (is_it_toupper && sz >= 2 && + u8s[saved_sz - 2] == 0xCD && + u8s[saved_sz - 1] == 0x85) { + u8s[saved_sz - 2] = 0xCE; + u8s[saved_sz - 1] = 0x99; + } + } + } + + /* + * Let's try to ensure a canonical ordering for the collected + * combining marks. We do this only if we have collected + * at least one more non-Starter. (The decomposition mapping + * data tables have fully (and recursively) expanded and + * canonically ordered decompositions.) + * + * The U8_SWAP_COMB_MARKS() convenience macro has some + * assumptions and we are meeting the assumptions. + */ + last--; + if (last >= saved_last) { + for (i = 0; i < last; i++) + for (j = last; j > i; j--) + if (comb_class[j] && + comb_class[j - 1] > comb_class[j]) { + U8_SWAP_COMB_MARKS(j - 1, j); + } + } + + *source = s; + + if (! canonical_composition) { + u8s[saved_sz] = '\0'; + return (saved_sz); + } + + /* + * Now do the canonical composition. Note that we do this + * only after a canonical or compatibility decomposition to + * finish up NFC or NFKC. + */ + sz = do_composition(uv, u8s, comb_class, start, disp, last, + &s, slast); + } + + *source = s; + + return ((size_t)sz); +} + +/* + * The do_norm_compare() function does string comparion based on Unicode + * simple case mappings and Unicode Normalization definitions. + * + * It does so by collecting a sequence of character at a time and comparing + * the collected sequences from the strings. + * + * The meanings on the return values are the same as the usual strcmp(). + */ +static int +do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2, + int flag, int *errnum) +{ + int result; + size_t sz1; + size_t sz2; + uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1]; + uchar_t *s1last; + uchar_t *s2last; + boolean_t is_it_toupper; + boolean_t is_it_tolower; + boolean_t canonical_decomposition; + boolean_t compatibility_decomposition; + boolean_t canonical_composition; + u8_normalization_states_t state; + + s1last = s1 + n1; + s2last = s2 + n2; + + is_it_toupper = flag & U8_TEXTPREP_TOUPPER; + is_it_tolower = flag & U8_TEXTPREP_TOLOWER; + canonical_decomposition = flag & U8_CANON_DECOMP; + compatibility_decomposition = flag & U8_COMPAT_DECOMP; + canonical_composition = flag & U8_CANON_COMP; + + while (s1 < s1last && s2 < s2last) { + /* + * If the current character is a 7-bit ASCII and the last + * character, or, if the current character and the next + * character are both some 7-bit ASCII characters then + * we treat the current character as a sequence. + * + * In any other cases, we need to call collect_a_seq(). + */ + + if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last || + ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) { + if (is_it_toupper) + u8s1[0] = U8_ASCII_TOUPPER(*s1); + else if (is_it_tolower) + u8s1[0] = U8_ASCII_TOLOWER(*s1); + else + u8s1[0] = *s1; + u8s1[1] = '\0'; + sz1 = 1; + s1++; + } else { + state = U8_STATE_START; + sz1 = collect_a_seq(uv, u8s1, &s1, s1last, + is_it_toupper, is_it_tolower, + canonical_decomposition, + compatibility_decomposition, + canonical_composition, errnum, &state); + } + + if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last || + ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) { + if (is_it_toupper) + u8s2[0] = U8_ASCII_TOUPPER(*s2); + else if (is_it_tolower) + u8s2[0] = U8_ASCII_TOLOWER(*s2); + else + u8s2[0] = *s2; + u8s2[1] = '\0'; + sz2 = 1; + s2++; + } else { + state = U8_STATE_START; + sz2 = collect_a_seq(uv, u8s2, &s2, s2last, + is_it_toupper, is_it_tolower, + canonical_decomposition, + compatibility_decomposition, + canonical_composition, errnum, &state); + } + + /* + * Now compare the two characters. If they are the same, + * we move on to the next character sequences. + */ + if (sz1 == 1 && sz2 == 1) { + if (*u8s1 > *u8s2) + return (1); + if (*u8s1 < *u8s2) + return (-1); + } else { + result = strcmp((const char *)u8s1, (const char *)u8s2); + if (result != 0) + return (result); + } + } + + /* + * We compared until the end of either or both strings. + * + * If we reached to or went over the ends for the both, that means + * they are the same. + * + * If we reached only one end, that means the other string has + * something which then can be used to determine the return value. + */ + if (s1 >= s1last) { + if (s2 >= s2last) + return (0); + return (-1); + } + return (1); +} + +/* + * The u8_strcmp() function compares two UTF-8 strings quite similar to + * the strcmp(). For the comparison, however, Unicode Normalization specific + * equivalency and Unicode simple case conversion mappings based equivalency + * can be requested and checked against. + */ +int +u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv, + int *errnum) +{ + int f; + size_t n1; + size_t n2; + + *errnum = 0; + + /* + * Check on the requested Unicode version, case conversion, and + * normalization flag values. + */ + + if (uv > U8_UNICODE_LATEST) { + *errnum = ERANGE; + uv = U8_UNICODE_LATEST; + } + + if (flag == 0) { + flag = U8_STRCMP_CS; + } else { + f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER | + U8_STRCMP_CI_LOWER); + if (f == 0) { + flag |= U8_STRCMP_CS; + } else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER && + f != U8_STRCMP_CI_LOWER) { + *errnum = EBADF; + flag = U8_STRCMP_CS; + } + + f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP); + if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC && + f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) { + *errnum = EBADF; + flag = U8_STRCMP_CS; + } + } + + if (flag == U8_STRCMP_CS) { + return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n)); + } + + n1 = strlen(s1); + n2 = strlen(s2); + if (n != 0) { + if (n < n1) + n1 = n; + if (n < n2) + n2 = n; + } + + /* + * Simple case conversion can be done much faster and so we do + * them separately here. + */ + if (flag == U8_STRCMP_CI_UPPER) { + return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, + n1, n2, B_TRUE, errnum)); + } else if (flag == U8_STRCMP_CI_LOWER) { + return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, + n1, n2, B_FALSE, errnum)); + } + + return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, + flag, errnum)); +} + +size_t +u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, + int flag, size_t unicode_version, int *errnum) +{ + int f; + int sz; + uchar_t *ib; + uchar_t *ibtail; + uchar_t *ob; + uchar_t *obtail; + boolean_t do_not_ignore_null; + boolean_t do_not_ignore_invalid; + boolean_t is_it_toupper; + boolean_t is_it_tolower; + boolean_t canonical_decomposition; + boolean_t compatibility_decomposition; + boolean_t canonical_composition; + size_t ret_val; + size_t i; + size_t j; + uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1]; + u8_normalization_states_t state; + + if (unicode_version > U8_UNICODE_LATEST) { + *errnum = ERANGE; + return ((size_t)-1); + } + + f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER); + if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) { + *errnum = EBADF; + return ((size_t)-1); + } + + f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP); + if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC && + f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) { + *errnum = EBADF; + return ((size_t)-1); + } + + if (inarray == NULL || *inlen == 0) + return (0); + + if (outarray == NULL) { + *errnum = E2BIG; + return ((size_t)-1); + } + + ib = (uchar_t *)inarray; + ob = (uchar_t *)outarray; + ibtail = ib + *inlen; + obtail = ob + *outlen; + + do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL); + do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID); + is_it_toupper = flag & U8_TEXTPREP_TOUPPER; + is_it_tolower = flag & U8_TEXTPREP_TOLOWER; + + ret_val = 0; + + /* + * If we don't have a normalization flag set, we do the simple case + * conversion based text preparation separately below. Text + * preparation involving Normalization will be done in the false task + * block, again, separately since it will take much more time and + * resource than doing simple case conversions. + */ + if (f == 0) { + while (ib < ibtail) { + if (*ib == '\0' && do_not_ignore_null) + break; + + sz = u8_number_of_bytes[*ib]; + + if (sz < 0) { + if (do_not_ignore_invalid) { + *errnum = EILSEQ; + ret_val = (size_t)-1; + break; + } + + sz = 1; + ret_val++; + } + + if (sz == 1) { + if (ob >= obtail) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + if (is_it_toupper) + *ob = U8_ASCII_TOUPPER(*ib); + else if (is_it_tolower) + *ob = U8_ASCII_TOLOWER(*ib); + else + *ob = *ib; + ib++; + ob++; + } else if ((ib + sz) > ibtail) { + if (do_not_ignore_invalid) { + *errnum = EINVAL; + ret_val = (size_t)-1; + break; + } + + if ((obtail - ob) < (ibtail - ib)) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + /* + * We treat the remaining incomplete character + * bytes as a character. + */ + ret_val++; + + while (ib < ibtail) + *ob++ = *ib++; + } else { + if (is_it_toupper || is_it_tolower) { + i = do_case_conv(unicode_version, u8s, + ib, sz, is_it_toupper); + + if ((obtail - ob) < i) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + ib += sz; + + for (sz = 0; sz < i; sz++) + *ob++ = u8s[sz]; + } else { + if ((obtail - ob) < sz) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + for (i = 0; i < sz; i++) + *ob++ = *ib++; + } + } + } + } else { + canonical_decomposition = flag & U8_CANON_DECOMP; + compatibility_decomposition = flag & U8_COMPAT_DECOMP; + canonical_composition = flag & U8_CANON_COMP; + + while (ib < ibtail) { + if (*ib == '\0' && do_not_ignore_null) + break; + + /* + * If the current character is a 7-bit ASCII + * character and it is the last character, or, + * if the current character is a 7-bit ASCII + * character and the next character is also a 7-bit + * ASCII character, then, we copy over this + * character without going through collect_a_seq(). + * + * In any other cases, we need to look further with + * the collect_a_seq() function. + */ + if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail || + ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) { + if (ob >= obtail) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + if (is_it_toupper) + *ob = U8_ASCII_TOUPPER(*ib); + else if (is_it_tolower) + *ob = U8_ASCII_TOLOWER(*ib); + else + *ob = *ib; + ib++; + ob++; + } else { + *errnum = 0; + state = U8_STATE_START; + + j = collect_a_seq(unicode_version, u8s, + &ib, ibtail, + is_it_toupper, + is_it_tolower, + canonical_decomposition, + compatibility_decomposition, + canonical_composition, + errnum, &state); + + if (*errnum && do_not_ignore_invalid) { + ret_val = (size_t)-1; + break; + } + + if ((obtail - ob) < j) { + *errnum = E2BIG; + ret_val = (size_t)-1; + break; + } + + for (i = 0; i < j; i++) + *ob++ = u8s[i]; + } + } + } + + *inlen = ibtail - ib; + *outlen = obtail - ob; + + return (ret_val); +} diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c new file mode 100644 index 000000000000..ed9b67ea3bc9 --- /dev/null +++ b/common/zfs/zfs_comutil.c @@ -0,0 +1,202 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * This file is intended for functions that ought to be common between user + * land (libzfs) and the kernel. When many common routines need to be shared + * then a separate file should to be created. + */ + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <string.h> +#endif + +#include <sys/types.h> +#include <sys/fs/zfs.h> +#include <sys/int_limits.h> +#include <sys/nvpair.h> +#include "zfs_comutil.h" + +/* + * Are there allocatable vdevs? + */ +boolean_t +zfs_allocatable_devs(nvlist_t *nv) +{ + uint64_t is_log; + uint_t c; + nvlist_t **child; + uint_t children; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) { + return (B_FALSE); + } + for (c = 0; c < children; c++) { + is_log = 0; + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (!is_log) + return (B_TRUE); + } + return (B_FALSE); +} + +void +zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp) +{ + nvlist_t *policy; + nvpair_t *elem; + char *nm; + + /* Defaults */ + zrpp->zrp_request = ZPOOL_NO_REWIND; + zrpp->zrp_maxmeta = 0; + zrpp->zrp_maxdata = UINT64_MAX; + zrpp->zrp_txg = UINT64_MAX; + + if (nvl == NULL) + return; + + elem = NULL; + while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + nm = nvpair_name(elem); + if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) { + if (nvpair_value_nvlist(elem, &policy) == 0) + zpool_get_rewind_policy(policy, zrpp); + return; + } else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) { + if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0) + if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES) + zrpp->zrp_request = ZPOOL_NO_REWIND; + } else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) { + (void) nvpair_value_uint64(elem, &zrpp->zrp_txg); + } else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) { + (void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta); + } else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) { + (void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata); + } + } + if (zrpp->zrp_request == 0) + zrpp->zrp_request = ZPOOL_NO_REWIND; +} + +typedef struct zfs_version_spa_map { + int version_zpl; + int version_spa; +} zfs_version_spa_map_t; + +/* + * Keep this table in monotonically increasing version number order. + */ +static zfs_version_spa_map_t zfs_version_table[] = { + {ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL}, + {ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL}, + {ZPL_VERSION_FUID, SPA_VERSION_FUID}, + {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE}, + {ZPL_VERSION_SA, SPA_VERSION_SA}, + {0, 0} +}; + +/* + * Return the max zpl version for a corresponding spa version + * -1 is returned if no mapping exists. + */ +int +zfs_zpl_version_map(int spa_version) +{ + int i; + int version = -1; + + for (i = 0; zfs_version_table[i].version_spa; i++) { + if (spa_version >= zfs_version_table[i].version_spa) + version = zfs_version_table[i].version_zpl; + } + + return (version); +} + +/* + * Return the min spa version for a corresponding spa version + * -1 is returned if no mapping exists. + */ +int +zfs_spa_version_map(int zpl_version) +{ + int i; + int version = -1; + + for (i = 0; zfs_version_table[i].version_zpl; i++) { + if (zfs_version_table[i].version_zpl >= zpl_version) + return (zfs_version_table[i].version_spa); + } + + return (version); +} + +const char *zfs_history_event_names[LOG_END] = { + "invalid event", + "pool create", + "vdev add", + "pool remove", + "pool destroy", + "pool export", + "pool import", + "vdev attach", + "vdev replace", + "vdev detach", + "vdev online", + "vdev offline", + "vdev upgrade", + "pool clear", + "pool scrub", + "pool property set", + "create", + "clone", + "destroy", + "destroy_begin_sync", + "inherit", + "property set", + "quota set", + "permission update", + "permission remove", + "permission who remove", + "promote", + "receive", + "rename", + "reservation set", + "replay_inc_sync", + "replay_full_sync", + "rollback", + "snapshot", + "filesystem version upgrade", + "refquota set", + "refreservation set", + "pool scrub done", + "user hold", + "user release", + "pool split", +}; diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h new file mode 100644 index 000000000000..61327f9aa909 --- /dev/null +++ b/common/zfs/zfs_comutil.h @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ZFS_COMUTIL_H +#define _ZFS_COMUTIL_H + +#include <sys/fs/zfs.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern boolean_t zfs_allocatable_devs(nvlist_t *); +extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *); + +extern int zfs_zpl_version_map(int spa_version); +extern int zfs_spa_version_map(int zpl_version); +extern const char *zfs_history_event_names[LOG_END]; + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_COMUTIL_H */ diff --git a/common/zfs/zfs_deleg.c b/common/zfs/zfs_deleg.c new file mode 100644 index 000000000000..83d9edb21389 --- /dev/null +++ b/common/zfs/zfs_deleg.c @@ -0,0 +1,237 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#if defined(_KERNEL) +#include <sys/systm.h> +#include <sys/sunddi.h> +#include <sys/ctype.h> +#else +#include <stdio.h> +#include <unistd.h> +#include <strings.h> +#include <libnvpair.h> +#include <ctype.h> +#endif +/* XXX includes zfs_context.h, so why bother with the above? */ +#include <sys/dsl_deleg.h> +#include "zfs_prop.h" +#include "zfs_deleg.h" +#include "zfs_namecheck.h" + +/* + * permission table + * + * Keep this table in sorted order + * + * This table is used for displaying all permissions for + * zfs allow + */ + +zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { + {ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW}, + {ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, + {ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, + {ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, + {ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, + {ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, + {ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, + {ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, + {ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, + {ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, + {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, + {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE }, + {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, + {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, + {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, + {ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, + {ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, + {ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, + {ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, + {ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, + {NULL, ZFS_DELEG_NOTE_NONE } +}; + +static int +zfs_valid_permission_name(const char *perm) +{ + if (zfs_deleg_canonicalize_perm(perm)) + return (0); + + return (permset_namecheck(perm, NULL, NULL)); +} + +const char * +zfs_deleg_canonicalize_perm(const char *perm) +{ + int i; + zfs_prop_t prop; + + for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) { + if (strcmp(perm, zfs_deleg_perm_tab[i].z_perm) == 0) + return (perm); + } + + prop = zfs_name_to_prop(perm); + if (prop != ZPROP_INVAL && zfs_prop_delegatable(prop)) + return (zfs_prop_to_name(prop)); + return (NULL); + +} + +static int +zfs_validate_who(char *who) +{ + char *p; + + if (who[2] != ZFS_DELEG_FIELD_SEP_CHR) + return (-1); + + switch (who[0]) { + case ZFS_DELEG_USER: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP_SETS: + if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT) + return (-1); + for (p = &who[3]; *p; p++) + if (!isdigit(*p)) + return (-1); + break; + + case ZFS_DELEG_NAMED_SET: + case ZFS_DELEG_NAMED_SET_SETS: + if (who[1] != ZFS_DELEG_NA) + return (-1); + return (permset_namecheck(&who[3], NULL, NULL)); + + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + if (who[1] != ZFS_DELEG_NA) + return (-1); + if (who[3] != '\0') + return (-1); + break; + + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT) + return (-1); + if (who[3] != '\0') + return (-1); + break; + + default: + return (-1); + } + + return (0); +} + +int +zfs_deleg_verify_nvlist(nvlist_t *nvp) +{ + nvpair_t *who, *perm_name; + nvlist_t *perms; + int error; + + if (nvp == NULL) + return (-1); + + who = nvlist_next_nvpair(nvp, NULL); + if (who == NULL) + return (-1); + + do { + if (zfs_validate_who(nvpair_name(who))) + return (-1); + + error = nvlist_lookup_nvlist(nvp, nvpair_name(who), &perms); + + if (error && error != ENOENT) + return (-1); + if (error == ENOENT) + continue; + + perm_name = nvlist_next_nvpair(perms, NULL); + if (perm_name == NULL) { + return (-1); + } + do { + error = zfs_valid_permission_name( + nvpair_name(perm_name)); + if (error) + return (-1); + } while (perm_name = nvlist_next_nvpair(perms, perm_name)); + } while (who = nvlist_next_nvpair(nvp, who)); + return (0); +} + +/* + * Construct the base attribute name. The base attribute names + * are the "key" to locate the jump objects which contain the actual + * permissions. The base attribute names are encoded based on + * type of entry and whether it is a local or descendent permission. + * + * Arguments: + * attr - attribute name return string, attribute is assumed to be + * ZFS_MAX_DELEG_NAME long. + * type - type of entry to construct + * inheritchr - inheritance type (local,descendent, or NA for create and + * permission set definitions + * data - is either a permission set name or a 64 bit uid/gid. + */ +void +zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, + char inheritchr, void *data) +{ + int len = ZFS_MAX_DELEG_NAME; + uint64_t *id = data; + + switch (type) { + case ZFS_DELEG_USER: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP_SETS: + (void) snprintf(attr, len, "%c%c%c%lld", type, inheritchr, + ZFS_DELEG_FIELD_SEP_CHR, (longlong_t)*id); + break; + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + (void) snprintf(attr, len, "%c-%c%s", type, + ZFS_DELEG_FIELD_SEP_CHR, (char *)data); + break; + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + (void) snprintf(attr, len, "%c-%c", type, + ZFS_DELEG_FIELD_SEP_CHR); + break; + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + (void) snprintf(attr, len, "%c%c%c", type, inheritchr, + ZFS_DELEG_FIELD_SEP_CHR); + break; + default: + ASSERT(!"bad zfs_deleg_who_type_t"); + } +} diff --git a/common/zfs/zfs_deleg.h b/common/zfs/zfs_deleg.h new file mode 100644 index 000000000000..b4cb8e2b4e37 --- /dev/null +++ b/common/zfs/zfs_deleg.h @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ZFS_DELEG_H +#define _ZFS_DELEG_H + +#include <sys/fs/zfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZFS_DELEG_SET_NAME_CHR '@' /* set name lead char */ +#define ZFS_DELEG_FIELD_SEP_CHR '$' /* field separator */ + +/* + * Max name length for a delegation attribute + */ +#define ZFS_MAX_DELEG_NAME 128 + +#define ZFS_DELEG_LOCAL 'l' +#define ZFS_DELEG_DESCENDENT 'd' +#define ZFS_DELEG_NA '-' + +typedef enum { + ZFS_DELEG_NOTE_CREATE, + ZFS_DELEG_NOTE_DESTROY, + ZFS_DELEG_NOTE_SNAPSHOT, + ZFS_DELEG_NOTE_ROLLBACK, + ZFS_DELEG_NOTE_CLONE, + ZFS_DELEG_NOTE_PROMOTE, + ZFS_DELEG_NOTE_RENAME, + ZFS_DELEG_NOTE_RECEIVE, + ZFS_DELEG_NOTE_ALLOW, + ZFS_DELEG_NOTE_USERPROP, + ZFS_DELEG_NOTE_MOUNT, + ZFS_DELEG_NOTE_SHARE, + ZFS_DELEG_NOTE_USERQUOTA, + ZFS_DELEG_NOTE_GROUPQUOTA, + ZFS_DELEG_NOTE_USERUSED, + ZFS_DELEG_NOTE_GROUPUSED, + ZFS_DELEG_NOTE_HOLD, + ZFS_DELEG_NOTE_RELEASE, + ZFS_DELEG_NOTE_DIFF, + ZFS_DELEG_NOTE_NONE +} zfs_deleg_note_t; + +typedef struct zfs_deleg_perm_tab { + char *z_perm; + zfs_deleg_note_t z_note; +} zfs_deleg_perm_tab_t; + +extern zfs_deleg_perm_tab_t zfs_deleg_perm_tab[]; + +int zfs_deleg_verify_nvlist(nvlist_t *nvlist); +void zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, + char checkflag, void *data); +const char *zfs_deleg_canonicalize_perm(const char *perm); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_DELEG_H */ diff --git a/common/zfs/zfs_fletcher.c b/common/zfs/zfs_fletcher.c new file mode 100644 index 000000000000..fa43ce6bdb5d --- /dev/null +++ b/common/zfs/zfs_fletcher.c @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Fletcher Checksums + * ------------------ + * + * ZFS's 2nd and 4th order Fletcher checksums are defined by the following + * recurrence relations: + * + * a = a + f + * i i-1 i-1 + * + * b = b + a + * i i-1 i + * + * c = c + b (fletcher-4 only) + * i i-1 i + * + * d = d + c (fletcher-4 only) + * i i-1 i + * + * Where + * a_0 = b_0 = c_0 = d_0 = 0 + * and + * f_0 .. f_(n-1) are the input data. + * + * Using standard techniques, these translate into the following series: + * + * __n_ __n_ + * \ | \ | + * a = > f b = > i * f + * n /___| n - i n /___| n - i + * i = 1 i = 1 + * + * + * __n_ __n_ + * \ | i*(i+1) \ | i*(i+1)*(i+2) + * c = > ------- f d = > ------------- f + * n /___| 2 n - i n /___| 6 n - i + * i = 1 i = 1 + * + * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators. + * Since the additions are done mod (2^64), errors in the high bits may not + * be noticed. For this reason, fletcher-2 is deprecated. + * + * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators. + * A conservative estimate of how big the buffer can get before we overflow + * can be estimated using f_i = 0xffffffff for all i: + * + * % bc + * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4 + * 2264 + * quit + * % + * + * So blocks of up to 2k will not overflow. Our largest block size is + * 128k, which has 32k 4-byte words, so we can compute the largest possible + * accumulators, then divide by 2^64 to figure the max amount of overflow: + * + * % bc + * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c } + * a/2^64;b/2^64;c/2^64;d/2^64 + * 0 + * 0 + * 1365 + * 11186858 + * quit + * % + * + * So a and b cannot overflow. To make sure each bit of input has some + * effect on the contents of c and d, we can look at what the factors of + * the coefficients in the equations for c_n and d_n are. The number of 2s + * in the factors determines the lowest set bit in the multiplier. Running + * through the cases for n*(n+1)/2 reveals that the highest power of 2 is + * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow + * the 64-bit accumulators, every bit of every f_i effects every accumulator, + * even for 128k blocks. + * + * If we wanted to make a stronger version of fletcher4 (fletcher4c?), + * we could do our calculations mod (2^32 - 1) by adding in the carries + * periodically, and store the number of carries in the top 32-bits. + * + * -------------------- + * Checksum Performance + * -------------------- + * + * There are two interesting components to checksum performance: cached and + * uncached performance. With cached data, fletcher-2 is about four times + * faster than fletcher-4. With uncached data, the performance difference is + * negligible, since the cost of a cache fill dominates the processing time. + * Even though fletcher-4 is slower than fletcher-2, it is still a pretty + * efficient pass over the data. + * + * In normal operation, the data which is being checksummed is in a buffer + * which has been filled either by: + * + * 1. a compression step, which will be mostly cached, or + * 2. a bcopy() or copyin(), which will be uncached (because the + * copy is cache-bypassing). + * + * For both cached and uncached data, both fletcher checksums are much faster + * than sha-256, and slower than 'off', which doesn't touch the data at all. + */ + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/byteorder.h> +#include <sys/zio.h> +#include <sys/spa.h> + +void +fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += ip[0]; + a1 += ip[1]; + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += BSWAP_64(ip[0]); + a1 += BSWAP_64(ip[1]); + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_native(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + a = zcp->zc_word[0]; + b = zcp->zc_word[1]; + c = zcp->zc_word[2]; + d = zcp->zc_word[3]; + + for (; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_byteswap(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + a = zcp->zc_word[0]; + b = zcp->zc_word[1]; + c = zcp->zc_word[2]; + d = zcp->zc_word[3]; + + for (; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} diff --git a/common/zfs/zfs_fletcher.h b/common/zfs/zfs_fletcher.h new file mode 100644 index 000000000000..b49df0cf4f0f --- /dev/null +++ b/common/zfs/zfs_fletcher.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_FLETCHER_H +#define _ZFS_FLETCHER_H + +#include <sys/types.h> +#include <sys/spa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * fletcher checksum functions + */ + +void fletcher_2_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_incremental_native(const void *, uint64_t, + zio_cksum_t *); +void fletcher_4_incremental_byteswap(const void *, uint64_t, + zio_cksum_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_FLETCHER_H */ diff --git a/common/zfs/zfs_namecheck.c b/common/zfs/zfs_namecheck.c new file mode 100644 index 000000000000..5cfafea471b3 --- /dev/null +++ b/common/zfs/zfs_namecheck.c @@ -0,0 +1,345 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Common name validation routines for ZFS. These routines are shared by the + * userland code as well as the ioctl() layer to ensure that we don't + * inadvertently expose a hole through direct ioctl()s that never gets tested. + * In userland, however, we want significantly more information about _why_ the + * name is invalid. In the kernel, we only care whether it's valid or not. + * Each routine therefore takes a 'namecheck_err_t' which describes exactly why + * the name failed to validate. + * + * Each function returns 0 on success, -1 on error. + */ + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <string.h> +#endif + +#include <sys/param.h> +#include <sys/nvpair.h> +#include "zfs_namecheck.h" +#include "zfs_deleg.h" + +static int +valid_char(char c) +{ + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == ':' || c == ' '); +} + +/* + * Snapshot names must be made up of alphanumeric characters plus the following + * characters: + * + * [-_.: ] + */ +int +snapshot_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + const char *loc; + + if (strlen(path) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + if (path[0] == '\0') { + if (why) + *why = NAME_ERR_EMPTY_COMPONENT; + return (-1); + } + + for (loc = path; *loc; loc++) { + if (!valid_char(*loc)) { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *loc; + } + return (-1); + } + } + return (0); +} + + +/* + * Permissions set name must start with the letter '@' followed by the + * same character restrictions as snapshot names, except that the name + * cannot exceed 64 characters. + */ +int +permset_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + if (strlen(path) >= ZFS_PERMSET_MAXLEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + if (path[0] != '@') { + if (why) { + *why = NAME_ERR_NO_AT; + *what = path[0]; + } + return (-1); + } + + return (snapshot_namecheck(&path[1], why, what)); +} + +/* + * Dataset names must be of the following form: + * + * [component][/]*[component][@component] + * + * Where each component is made up of alphanumeric characters plus the following + * characters: + * + * [-_.:%] + * + * We allow '%' here as we use that character internally to create unique + * names for temporary clones (for online recv). + */ +int +dataset_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + const char *loc, *end; + int found_snapshot; + + /* + * Make sure the name is not too long. + * + * ZFS_MAXNAMELEN is the maximum dataset length used in the userland + * which is the same as MAXNAMELEN used in the kernel. + * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all + * places using MAXNAMELEN. + */ + + if (strlen(path) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + /* Explicitly check for a leading slash. */ + if (path[0] == '/') { + if (why) + *why = NAME_ERR_LEADING_SLASH; + return (-1); + } + + if (path[0] == '\0') { + if (why) + *why = NAME_ERR_EMPTY_COMPONENT; + return (-1); + } + + loc = path; + found_snapshot = 0; + for (;;) { + /* Find the end of this component */ + end = loc; + while (*end != '/' && *end != '@' && *end != '\0') + end++; + + if (*end == '\0' && end[-1] == '/') { + /* trailing slashes are not allowed */ + if (why) + *why = NAME_ERR_TRAILING_SLASH; + return (-1); + } + + /* Zero-length components are not allowed */ + if (loc == end) { + if (why) { + /* + * Make sure this is really a zero-length + * component and not a '@@'. + */ + if (*end == '@' && found_snapshot) { + *why = NAME_ERR_MULTIPLE_AT; + } else { + *why = NAME_ERR_EMPTY_COMPONENT; + } + } + + return (-1); + } + + /* Validate the contents of this component */ + while (loc != end) { + if (!valid_char(*loc) && *loc != '%') { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *loc; + } + return (-1); + } + loc++; + } + + /* If we've reached the end of the string, we're OK */ + if (*end == '\0') + return (0); + + if (*end == '@') { + /* + * If we've found an @ symbol, indicate that we're in + * the snapshot component, and report a second '@' + * character as an error. + */ + if (found_snapshot) { + if (why) + *why = NAME_ERR_MULTIPLE_AT; + return (-1); + } + + found_snapshot = 1; + } + + /* + * If there is a '/' in a snapshot name + * then report an error + */ + if (*end == '/' && found_snapshot) { + if (why) + *why = NAME_ERR_TRAILING_SLASH; + return (-1); + } + + /* Update to the next component */ + loc = end + 1; + } +} + + +/* + * mountpoint names must be of the following form: + * + * /[component][/]*[component][/] + */ +int +mountpoint_namecheck(const char *path, namecheck_err_t *why) +{ + const char *start, *end; + + /* + * Make sure none of the mountpoint component names are too long. + * If a component name is too long then the mkdir of the mountpoint + * will fail but then the mountpoint property will be set to a value + * that can never be mounted. Better to fail before setting the prop. + * Extra slashes are OK, they will be tossed by the mountpoint mkdir. + */ + + if (path == NULL || *path != '/') { + if (why) + *why = NAME_ERR_LEADING_SLASH; + return (-1); + } + + /* Skip leading slash */ + start = &path[1]; + do { + end = start; + while (*end != '/' && *end != '\0') + end++; + + if (end - start >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + start = end + 1; + + } while (*end != '\0'); + + return (0); +} + +/* + * For pool names, we have the same set of valid characters as described in + * dataset names, with the additional restriction that the pool name must begin + * with a letter. The pool names 'raidz' and 'mirror' are also reserved names + * that cannot be used. + */ +int +pool_namecheck(const char *pool, namecheck_err_t *why, char *what) +{ + const char *c; + + /* + * Make sure the name is not too long. + * + * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland + * which is the same as MAXNAMELEN used in the kernel. + * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all + * places using MAXNAMELEN. + */ + if (strlen(pool) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + c = pool; + while (*c != '\0') { + if (!valid_char(*c)) { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *c; + } + return (-1); + } + c++; + } + + if (!(*pool >= 'a' && *pool <= 'z') && + !(*pool >= 'A' && *pool <= 'Z')) { + if (why) + *why = NAME_ERR_NOLETTER; + return (-1); + } + + if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) { + if (why) + *why = NAME_ERR_RESERVED; + return (-1); + } + + if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) { + if (why) + *why = NAME_ERR_DISKLIKE; + return (-1); + } + + return (0); +} diff --git a/common/zfs/zfs_namecheck.h b/common/zfs/zfs_namecheck.h new file mode 100644 index 000000000000..7711da099be9 --- /dev/null +++ b/common/zfs/zfs_namecheck.h @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_NAMECHECK_H +#define _ZFS_NAMECHECK_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + NAME_ERR_LEADING_SLASH, /* name begins with leading slash */ + NAME_ERR_EMPTY_COMPONENT, /* name contains an empty component */ + NAME_ERR_TRAILING_SLASH, /* name ends with a slash */ + NAME_ERR_INVALCHAR, /* invalid character found */ + NAME_ERR_MULTIPLE_AT, /* multiple '@' characters found */ + NAME_ERR_NOLETTER, /* pool doesn't begin with a letter */ + NAME_ERR_RESERVED, /* entire name is reserved */ + NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */ + NAME_ERR_TOOLONG, /* name is too long */ + NAME_ERR_NO_AT, /* permission set is missing '@' */ +} namecheck_err_t; + +#define ZFS_PERMSET_MAXLEN 64 + +int pool_namecheck(const char *, namecheck_err_t *, char *); +int dataset_namecheck(const char *, namecheck_err_t *, char *); +int mountpoint_namecheck(const char *, namecheck_err_t *); +int snapshot_namecheck(const char *, namecheck_err_t *, char *); +int permset_namecheck(const char *, namecheck_err_t *, char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_NAMECHECK_H */ diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c new file mode 100644 index 000000000000..f29bcf62718f --- /dev/null +++ b/common/zfs/zfs_prop.c @@ -0,0 +1,595 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* Portions Copyright 2010 Robert Milkowski */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/u8_textprep.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_znode.h> + +#include "zfs_prop.h" +#include "zfs_deleg.h" + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#endif + +static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS]; + +/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */ +const char *zfs_userquota_prop_prefixes[] = { + "userused@", + "userquota@", + "groupused@", + "groupquota@" +}; + +zprop_desc_t * +zfs_prop_get_table(void) +{ + return (zfs_prop_table); +} + +void +zfs_prop_init(void) +{ + static zprop_index_t checksum_table[] = { + { "on", ZIO_CHECKSUM_ON }, + { "off", ZIO_CHECKSUM_OFF }, + { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, + { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, + { "sha256", ZIO_CHECKSUM_SHA256 }, + { NULL } + }; + + static zprop_index_t dedup_table[] = { + { "on", ZIO_CHECKSUM_ON }, + { "off", ZIO_CHECKSUM_OFF }, + { "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY }, + { "sha256", ZIO_CHECKSUM_SHA256 }, + { "sha256,verify", + ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY }, + { NULL } + }; + + static zprop_index_t compress_table[] = { + { "on", ZIO_COMPRESS_ON }, + { "off", ZIO_COMPRESS_OFF }, + { "lzjb", ZIO_COMPRESS_LZJB }, + { "gzip", ZIO_COMPRESS_GZIP_6 }, /* gzip default */ + { "gzip-1", ZIO_COMPRESS_GZIP_1 }, + { "gzip-2", ZIO_COMPRESS_GZIP_2 }, + { "gzip-3", ZIO_COMPRESS_GZIP_3 }, + { "gzip-4", ZIO_COMPRESS_GZIP_4 }, + { "gzip-5", ZIO_COMPRESS_GZIP_5 }, + { "gzip-6", ZIO_COMPRESS_GZIP_6 }, + { "gzip-7", ZIO_COMPRESS_GZIP_7 }, + { "gzip-8", ZIO_COMPRESS_GZIP_8 }, + { "gzip-9", ZIO_COMPRESS_GZIP_9 }, + { "zle", ZIO_COMPRESS_ZLE }, + { NULL } + }; + + static zprop_index_t snapdir_table[] = { + { "hidden", ZFS_SNAPDIR_HIDDEN }, + { "visible", ZFS_SNAPDIR_VISIBLE }, + { NULL } + }; + + static zprop_index_t acl_inherit_table[] = { + { "discard", ZFS_ACL_DISCARD }, + { "noallow", ZFS_ACL_NOALLOW }, + { "restricted", ZFS_ACL_RESTRICTED }, + { "passthrough", ZFS_ACL_PASSTHROUGH }, + { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatability */ + { "passthrough-x", ZFS_ACL_PASSTHROUGH_X }, + { NULL } + }; + + static zprop_index_t case_table[] = { + { "sensitive", ZFS_CASE_SENSITIVE }, + { "insensitive", ZFS_CASE_INSENSITIVE }, + { "mixed", ZFS_CASE_MIXED }, + { NULL } + }; + + static zprop_index_t copies_table[] = { + { "1", 1 }, + { "2", 2 }, + { "3", 3 }, + { NULL } + }; + + /* + * Use the unique flags we have to send to u8_strcmp() and/or + * u8_textprep() to represent the various normalization property + * values. + */ + static zprop_index_t normalize_table[] = { + { "none", 0 }, + { "formD", U8_TEXTPREP_NFD }, + { "formKC", U8_TEXTPREP_NFKC }, + { "formC", U8_TEXTPREP_NFC }, + { "formKD", U8_TEXTPREP_NFKD }, + { NULL } + }; + + static zprop_index_t version_table[] = { + { "1", 1 }, + { "2", 2 }, + { "3", 3 }, + { "4", 4 }, + { "5", 5 }, + { "current", ZPL_VERSION }, + { NULL } + }; + + static zprop_index_t boolean_table[] = { + { "off", 0 }, + { "on", 1 }, + { NULL } + }; + + static zprop_index_t logbias_table[] = { + { "latency", ZFS_LOGBIAS_LATENCY }, + { "throughput", ZFS_LOGBIAS_THROUGHPUT }, + { NULL } + }; + + static zprop_index_t canmount_table[] = { + { "off", ZFS_CANMOUNT_OFF }, + { "on", ZFS_CANMOUNT_ON }, + { "noauto", ZFS_CANMOUNT_NOAUTO }, + { NULL } + }; + + static zprop_index_t cache_table[] = { + { "none", ZFS_CACHE_NONE }, + { "metadata", ZFS_CACHE_METADATA }, + { "all", ZFS_CACHE_ALL }, + { NULL } + }; + + static zprop_index_t sync_table[] = { + { "standard", ZFS_SYNC_STANDARD }, + { "always", ZFS_SYNC_ALWAYS }, + { "disabled", ZFS_SYNC_DISABLED }, + { NULL } + }; + + /* inherit index properties */ + zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "standard | always | disabled", "SYNC", + sync_table); + zprop_register_index(ZFS_PROP_CHECKSUM, "checksum", + ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_VOLUME, + "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM", + checksum_table); + zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "on | off | verify | sha256[,verify]", "DEDUP", + dedup_table); + zprop_register_index(ZFS_PROP_COMPRESSION, "compression", + ZIO_COMPRESS_DEFAULT, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS", + compress_table); + zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "hidden | visible", "SNAPDIR", snapdir_table); + zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", + ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "discard | noallow | restricted | passthrough | passthrough-x", + "ACLINHERIT", acl_inherit_table); + zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "1 | 2 | 3", "COPIES", copies_table); + zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache", + ZFS_CACHE_ALL, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, + "all | none | metadata", "PRIMARYCACHE", cache_table); + zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache", + ZFS_CACHE_ALL, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, + "all | none | metadata", "SECONDARYCACHE", cache_table); + zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "latency | throughput", "LOGBIAS", logbias_table); + + /* inherit index (boolean) properties */ + zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table); + zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES", + boolean_table); + zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC", + boolean_table); + zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID", + boolean_table); + zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY", + boolean_table); + zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table); + zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR", + boolean_table); + zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", + boolean_table); + zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND", + boolean_table); + + /* default index properties */ + zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, + "1 | 2 | 3 | 4 | current", "VERSION", version_table); + zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", + "CANMOUNT", canmount_table); + + /* readonly index (boolean) properties */ + zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, + ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); + zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, + PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", + boolean_table); + + /* set once index properties */ + zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, + PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, + "none | formC | formD | formKC | formKD", "NORMALIZATION", + normalize_table); + zprop_register_index(ZFS_PROP_CASE, "casesensitivity", + ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_SNAPSHOT, + "sensitive | insensitive | mixed", "CASE", case_table); + + /* set once index (boolean) properties */ + zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, + "on | off", "UTF8ONLY", boolean_table); + + /* string properties */ + zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN"); + zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none", + "MOUNTPOINT"); + zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options", + "SHARENFS"); + zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY, + ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE"); + zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", + PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "on | off | sharemgr(1M) options", "SHARESMB"); + zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel", + ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET, + "<sensitivity label>", "MLSLABEL"); + + /* readonly number properties */ + zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, + ZFS_TYPE_DATASET, "<size>", "USED"); + zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL"); + zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0, + PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER"); + zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, + PROP_READONLY, ZFS_TYPE_DATASET, + "<1.00x or higher if compressed>", "RATIO"); + zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", + ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, + ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); + zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", + "USEDSNAP"); + zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", + "USEDDS"); + zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", + "USEDCHILD"); + zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0, + PROP_READONLY, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV"); + zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, + ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS"); + + /* default number properties */ + zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, + ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA"); + zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "<size> | none", "RESERV"); + zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT, + ZFS_TYPE_VOLUME, "<size>", "VOLSIZE"); + zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT, + ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA"); + zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "<size> | none", "REFRESERV"); + + /* inherit number properties */ + zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize", + SPA_MAXBLOCKSIZE, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE"); + + /* hidden properties */ + zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG"); + zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES"); + zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING, + PROP_READONLY, ZFS_TYPE_DATASET, "NAME"); + zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", + PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS"); + zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu", + PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, + "STMF_SBD_LU"); + zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "GUID"); + zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting", + PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, + "USERACCOUNTING"); + zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE"); + zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID"); + + /* + * Property to be removed once libbe is integrated + */ + zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop", + PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM, + "PRIV_PROP"); + + /* oddball properties */ + zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, + NULL, PROP_READONLY, ZFS_TYPE_DATASET, + "<date>", "CREATION", B_FALSE, B_TRUE, NULL); +} + +boolean_t +zfs_prop_delegatable(zfs_prop_t prop) +{ + zprop_desc_t *pd = &zfs_prop_table[prop]; + + /* The mlslabel property is never delegatable. */ + if (prop == ZFS_PROP_MLSLABEL) + return (B_FALSE); + + return (pd->pd_attr != PROP_READONLY); +} + +/* + * Given a zfs dataset property name, returns the corresponding property ID. + */ +zfs_prop_t +zfs_name_to_prop(const char *propname) +{ + return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET)); +} + +/* + * For user property names, we allow all lowercase alphanumeric characters, plus + * a few useful punctuation characters. + */ +static int +valid_char(char c) +{ + return ((c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == ':'); +} + +/* + * Returns true if this is a valid user-defined property (one with a ':'). + */ +boolean_t +zfs_prop_user(const char *name) +{ + int i; + char c; + boolean_t foundsep = B_FALSE; + + for (i = 0; i < strlen(name); i++) { + c = name[i]; + if (!valid_char(c)) + return (B_FALSE); + if (c == ':') + foundsep = B_TRUE; + } + + if (!foundsep) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Returns true if this is a valid userspace-type property (one with a '@'). + * Note that after the @, any character is valid (eg, another @, for SID + * user@domain). + */ +boolean_t +zfs_prop_userquota(const char *name) +{ + zfs_userquota_prop_t prop; + + for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) { + if (strncmp(name, zfs_userquota_prop_prefixes[prop], + strlen(zfs_userquota_prop_prefixes[prop])) == 0) { + return (B_TRUE); + } + } + + return (B_FALSE); +} + +/* + * Tables of index types, plus functions to convert between the user view + * (strings) and internal representation (uint64_t). + */ +int +zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index) +{ + return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET)); +} + +int +zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string) +{ + return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET)); +} + +uint64_t +zfs_prop_random_value(zfs_prop_t prop, uint64_t seed) +{ + return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET)); +} + +/* + * Returns TRUE if the property applies to any of the given dataset types. + */ +boolean_t +zfs_prop_valid_for_type(int prop, zfs_type_t types) +{ + return (zprop_valid_for_type(prop, types)); +} + +zprop_type_t +zfs_prop_get_type(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_proptype); +} + +/* + * Returns TRUE if the property is readonly. + */ +boolean_t +zfs_prop_readonly(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_attr == PROP_READONLY || + zfs_prop_table[prop].pd_attr == PROP_ONETIME); +} + +/* + * Returns TRUE if the property is only allowed to be set once. + */ +boolean_t +zfs_prop_setonce(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); +} + +const char * +zfs_prop_default_string(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_strdefault); +} + +uint64_t +zfs_prop_default_numeric(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_numdefault); +} + +/* + * Given a dataset property ID, returns the corresponding name. + * Assuming the zfs dataset property ID is valid. + */ +const char * +zfs_prop_to_name(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_name); +} + +/* + * Returns TRUE if the property is inheritable. + */ +boolean_t +zfs_prop_inheritable(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_attr == PROP_INHERIT || + zfs_prop_table[prop].pd_attr == PROP_ONETIME); +} + +#ifndef _KERNEL + +/* + * Returns a string describing the set of acceptable values for the given + * zfs property, or NULL if it cannot be set. + */ +const char * +zfs_prop_values(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_values); +} + +/* + * Returns TRUE if this property is a string type. Note that index types + * (compression, checksum) are treated as strings in userland, even though they + * are stored numerically on disk. + */ +int +zfs_prop_is_string(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING || + zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX); +} + +/* + * Returns the column header for the given property. Used only in + * 'zfs list -o', but centralized here with the other property information. + */ +const char * +zfs_prop_column_name(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_colname); +} + +/* + * Returns whether the given property should be displayed right-justified for + * 'zfs list'. + */ +boolean_t +zfs_prop_align_right(zfs_prop_t prop) +{ + return (zfs_prop_table[prop].pd_rightalign); +} + +#endif diff --git a/common/zfs/zfs_prop.h b/common/zfs/zfs_prop.h new file mode 100644 index 000000000000..a63262311b3d --- /dev/null +++ b/common/zfs/zfs_prop.h @@ -0,0 +1,129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_PROP_H +#define _ZFS_PROP_H + +#include <sys/fs/zfs.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * For index types (e.g. compression and checksum), we want the numeric value + * in the kernel, but the string value in userland. + */ +typedef enum { + PROP_TYPE_NUMBER, /* numeric value */ + PROP_TYPE_STRING, /* string value */ + PROP_TYPE_INDEX /* numeric value indexed by string */ +} zprop_type_t; + +typedef enum { + PROP_DEFAULT, + PROP_READONLY, + PROP_INHERIT, + /* + * ONETIME properties are a sort of conglomeration of READONLY + * and INHERIT. They can be set only during object creation, + * after that they are READONLY. If not explicitly set during + * creation, they can be inherited. + */ + PROP_ONETIME +} zprop_attr_t; + +typedef struct zfs_index { + const char *pi_name; + uint64_t pi_value; +} zprop_index_t; + +typedef struct { + const char *pd_name; /* human-readable property name */ + int pd_propnum; /* property number */ + zprop_type_t pd_proptype; /* string, boolean, index, number */ + const char *pd_strdefault; /* default for strings */ + uint64_t pd_numdefault; /* for boolean / index / number */ + zprop_attr_t pd_attr; /* default, readonly, inherit */ + int pd_types; /* bitfield of valid dataset types */ + /* fs | vol | snap; or pool */ + const char *pd_values; /* string telling acceptable values */ + const char *pd_colname; /* column header for "zfs list" */ + boolean_t pd_rightalign; /* column alignment for "zfs list" */ + boolean_t pd_visible; /* do we list this property with the */ + /* "zfs get" help message */ + const zprop_index_t *pd_table; /* for index properties, a table */ + /* defining the possible values */ + size_t pd_table_size; /* number of entries in pd_table[] */ +} zprop_desc_t; + +/* + * zfs dataset property functions + */ +void zfs_prop_init(void); +zprop_type_t zfs_prop_get_type(zfs_prop_t); +boolean_t zfs_prop_delegatable(zfs_prop_t prop); +zprop_desc_t *zfs_prop_get_table(void); + +/* + * zpool property functions + */ +void zpool_prop_init(void); +zprop_type_t zpool_prop_get_type(zpool_prop_t); +zprop_desc_t *zpool_prop_get_table(void); + +/* + * Common routines to initialize property tables + */ +void zprop_register_impl(int, const char *, zprop_type_t, uint64_t, + const char *, zprop_attr_t, int, const char *, const char *, + boolean_t, boolean_t, const zprop_index_t *); +void zprop_register_string(int, const char *, const char *, + zprop_attr_t attr, int, const char *, const char *); +void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int, + const char *, const char *); +void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int, + const char *, const char *, const zprop_index_t *); +void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t, + int, const char *); + +/* + * Common routines for zfs and zpool property management + */ +int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t); +int zprop_name_to_prop(const char *, zfs_type_t); +int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t); +int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t); +uint64_t zprop_random_value(int, uint64_t, zfs_type_t); +const char *zprop_values(int, zfs_type_t); +size_t zprop_width(int, boolean_t *, zfs_type_t); +boolean_t zprop_valid_for_type(int, zfs_type_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_PROP_H */ diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c new file mode 100644 index 000000000000..988d05de6e20 --- /dev/null +++ b/common/zfs/zpool_prop.c @@ -0,0 +1,202 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/fs/zfs.h> + +#include "zfs_prop.h" + +#if defined(_KERNEL) +#include <sys/systm.h> +#else +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#endif + +static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS]; + +zprop_desc_t * +zpool_prop_get_table(void) +{ + return (zpool_prop_table); +} + +void +zpool_prop_init(void) +{ + static zprop_index_t boolean_table[] = { + { "off", 0}, + { "on", 1}, + { NULL } + }; + + static zprop_index_t failuremode_table[] = { + { "wait", ZIO_FAILURE_MODE_WAIT }, + { "continue", ZIO_FAILURE_MODE_CONTINUE }, + { "panic", ZIO_FAILURE_MODE_PANIC }, + { NULL } + }; + + /* string properties */ + zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT, + ZFS_TYPE_POOL, "<path>", "ALTROOT"); + zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT, + ZFS_TYPE_POOL, "<filesystem>", "BOOTFS"); + zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, + PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE"); + + /* readonly number properties */ + zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<size>", "SIZE"); + zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<size>", "FREE"); + zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0, + PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC"); + zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<size>", "CAP"); + zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<guid>", "GUID"); + zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY, + ZFS_TYPE_POOL, "<state>", "HEALTH"); + zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0, + PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>", + "DEDUP"); + + /* default number properties */ + zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, + PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION"); + zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO"); + + /* default index (boolean) properties */ + zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION", + boolean_table); + zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table); + zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS", + boolean_table); + zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table); + zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table); + + /* default index properties */ + zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode", + ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL, + "wait | continue | panic", "FAILMODE", failuremode_table); + + /* hidden properties */ + zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING, + PROP_READONLY, ZFS_TYPE_POOL, "NAME"); +} + +/* + * Given a property name and its type, returns the corresponding property ID. + */ +zpool_prop_t +zpool_name_to_prop(const char *propname) +{ + return (zprop_name_to_prop(propname, ZFS_TYPE_POOL)); +} + +/* + * Given a pool property ID, returns the corresponding name. + * Assuming the pool propety ID is valid. + */ +const char * +zpool_prop_to_name(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_name); +} + +zprop_type_t +zpool_prop_get_type(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_proptype); +} + +boolean_t +zpool_prop_readonly(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_attr == PROP_READONLY); +} + +const char * +zpool_prop_default_string(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_strdefault); +} + +uint64_t +zpool_prop_default_numeric(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_numdefault); +} + +int +zpool_prop_string_to_index(zpool_prop_t prop, const char *string, + uint64_t *index) +{ + return (zprop_string_to_index(prop, string, index, ZFS_TYPE_POOL)); +} + +int +zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index, + const char **string) +{ + return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL)); +} + +uint64_t +zpool_prop_random_value(zpool_prop_t prop, uint64_t seed) +{ + return (zprop_random_value(prop, seed, ZFS_TYPE_POOL)); +} + +#ifndef _KERNEL + +const char * +zpool_prop_values(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_values); +} + +const char * +zpool_prop_column_name(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_colname); +} + +boolean_t +zpool_prop_align_right(zpool_prop_t prop) +{ + return (zpool_prop_table[prop].pd_rightalign); +} +#endif diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c new file mode 100644 index 000000000000..0bbf20d4f02c --- /dev/null +++ b/common/zfs/zprop_common.c @@ -0,0 +1,426 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Common routines used by zfs and zpool property management. + */ + +#include <sys/zio.h> +#include <sys/spa.h> +#include <sys/zfs_acl.h> +#include <sys/zfs_ioctl.h> +#include <sys/zfs_znode.h> +#include <sys/fs/zfs.h> + +#include "zfs_prop.h" +#include "zfs_deleg.h" + +#if defined(_KERNEL) +#include <sys/systm.h> +#include <util/qsort.h> +#else +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#endif + +static zprop_desc_t * +zprop_get_proptable(zfs_type_t type) +{ + if (type == ZFS_TYPE_POOL) + return (zpool_prop_get_table()); + else + return (zfs_prop_get_table()); +} + +static int +zprop_get_numprops(zfs_type_t type) +{ + if (type == ZFS_TYPE_POOL) + return (ZPOOL_NUM_PROPS); + else + return (ZFS_NUM_PROPS); +} + +void +zprop_register_impl(int prop, const char *name, zprop_type_t type, + uint64_t numdefault, const char *strdefault, zprop_attr_t attr, + int objset_types, const char *values, const char *colname, + boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl) +{ + zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types); + zprop_desc_t *pd; + + pd = &prop_tbl[prop]; + + ASSERT(pd->pd_name == NULL || pd->pd_name == name); + ASSERT(name != NULL); + ASSERT(colname != NULL); + + pd->pd_name = name; + pd->pd_propnum = prop; + pd->pd_proptype = type; + pd->pd_numdefault = numdefault; + pd->pd_strdefault = strdefault; + pd->pd_attr = attr; + pd->pd_types = objset_types; + pd->pd_values = values; + pd->pd_colname = colname; + pd->pd_rightalign = rightalign; + pd->pd_visible = visible; + pd->pd_table = idx_tbl; + pd->pd_table_size = 0; + while (idx_tbl && (idx_tbl++)->pi_name != NULL) + pd->pd_table_size++; +} + +void +zprop_register_string(int prop, const char *name, const char *def, + zprop_attr_t attr, int objset_types, const char *values, + const char *colname) +{ + zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr, + objset_types, values, colname, B_FALSE, B_TRUE, NULL); + +} + +void +zprop_register_number(int prop, const char *name, uint64_t def, + zprop_attr_t attr, int objset_types, const char *values, + const char *colname) +{ + zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr, + objset_types, values, colname, B_TRUE, B_TRUE, NULL); +} + +void +zprop_register_index(int prop, const char *name, uint64_t def, + zprop_attr_t attr, int objset_types, const char *values, + const char *colname, const zprop_index_t *idx_tbl) +{ + zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr, + objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl); +} + +void +zprop_register_hidden(int prop, const char *name, zprop_type_t type, + zprop_attr_t attr, int objset_types, const char *colname) +{ + zprop_register_impl(prop, name, type, 0, NULL, attr, + objset_types, NULL, colname, B_FALSE, B_FALSE, NULL); +} + + +/* + * A comparison function we can use to order indexes into property tables. + */ +static int +zprop_compare(const void *arg1, const void *arg2) +{ + const zprop_desc_t *p1 = *((zprop_desc_t **)arg1); + const zprop_desc_t *p2 = *((zprop_desc_t **)arg2); + boolean_t p1ro, p2ro; + + p1ro = (p1->pd_attr == PROP_READONLY); + p2ro = (p2->pd_attr == PROP_READONLY); + + if (p1ro == p2ro) + return (strcmp(p1->pd_name, p2->pd_name)); + + return (p1ro ? -1 : 1); +} + +/* + * Iterate over all properties in the given property table, calling back + * into the specified function for each property. We will continue to + * iterate until we either reach the end or the callback function returns + * something other than ZPROP_CONT. + */ +int +zprop_iter_common(zprop_func func, void *cb, boolean_t show_all, + boolean_t ordered, zfs_type_t type) +{ + int i, num_props, size, prop; + zprop_desc_t *prop_tbl; + zprop_desc_t **order; + + prop_tbl = zprop_get_proptable(type); + num_props = zprop_get_numprops(type); + size = num_props * sizeof (zprop_desc_t *); + +#if defined(_KERNEL) + order = kmem_alloc(size, KM_SLEEP); +#else + if ((order = malloc(size)) == NULL) + return (ZPROP_CONT); +#endif + + for (int j = 0; j < num_props; j++) + order[j] = &prop_tbl[j]; + + if (ordered) { + qsort((void *)order, num_props, sizeof (zprop_desc_t *), + zprop_compare); + } + + prop = ZPROP_CONT; + for (i = 0; i < num_props; i++) { + if ((order[i]->pd_visible || show_all) && + (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) { + prop = order[i]->pd_propnum; + break; + } + } + +#if defined(_KERNEL) + kmem_free(order, size); +#else + free(order); +#endif + return (prop); +} + +static boolean_t +propname_match(const char *p, size_t len, zprop_desc_t *prop_entry) +{ + const char *propname = prop_entry->pd_name; +#ifndef _KERNEL + const char *colname = prop_entry->pd_colname; + int c; +#endif + + if (len == strlen(propname) && + strncmp(p, propname, len) == 0) + return (B_TRUE); + +#ifndef _KERNEL + if (colname == NULL || len != strlen(colname)) + return (B_FALSE); + + for (c = 0; c < len; c++) + if (p[c] != tolower(colname[c])) + break; + + return (colname[c] == '\0'); +#else + return (B_FALSE); +#endif +} + +typedef struct name_to_prop_cb { + const char *propname; + zprop_desc_t *prop_tbl; +} name_to_prop_cb_t; + +static int +zprop_name_to_prop_cb(int prop, void *cb_data) +{ + name_to_prop_cb_t *data = cb_data; + + if (propname_match(data->propname, strlen(data->propname), + &data->prop_tbl[prop])) + return (prop); + + return (ZPROP_CONT); +} + +int +zprop_name_to_prop(const char *propname, zfs_type_t type) +{ + int prop; + name_to_prop_cb_t cb_data; + + cb_data.propname = propname; + cb_data.prop_tbl = zprop_get_proptable(type); + + prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data, + B_TRUE, B_FALSE, type); + + return (prop == ZPROP_CONT ? ZPROP_INVAL : prop); +} + +int +zprop_string_to_index(int prop, const char *string, uint64_t *index, + zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + const zprop_index_t *idx_tbl; + int i; + + if (prop == ZPROP_INVAL || prop == ZPROP_CONT) + return (-1); + + ASSERT(prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + if ((idx_tbl = prop_tbl[prop].pd_table) == NULL) + return (-1); + + for (i = 0; idx_tbl[i].pi_name != NULL; i++) { + if (strcmp(string, idx_tbl[i].pi_name) == 0) { + *index = idx_tbl[i].pi_value; + return (0); + } + } + + return (-1); +} + +int +zprop_index_to_string(int prop, uint64_t index, const char **string, + zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + const zprop_index_t *idx_tbl; + int i; + + if (prop == ZPROP_INVAL || prop == ZPROP_CONT) + return (-1); + + ASSERT(prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + if ((idx_tbl = prop_tbl[prop].pd_table) == NULL) + return (-1); + + for (i = 0; idx_tbl[i].pi_name != NULL; i++) { + if (idx_tbl[i].pi_value == index) { + *string = idx_tbl[i].pi_name; + return (0); + } + } + + return (-1); +} + +/* + * Return a random valid property value. Used by ztest. + */ +uint64_t +zprop_random_value(int prop, uint64_t seed, zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + const zprop_index_t *idx_tbl; + + ASSERT((uint_t)prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + idx_tbl = prop_tbl[prop].pd_table; + + if (idx_tbl == NULL) + return (seed); + + return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value); +} + +const char * +zprop_values(int prop, zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + + ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT); + ASSERT(prop < zprop_get_numprops(type)); + + prop_tbl = zprop_get_proptable(type); + + return (prop_tbl[prop].pd_values); +} + +/* + * Returns TRUE if the property applies to any of the given dataset types. + */ +boolean_t +zprop_valid_for_type(int prop, zfs_type_t type) +{ + zprop_desc_t *prop_tbl; + + if (prop == ZPROP_INVAL || prop == ZPROP_CONT) + return (B_FALSE); + + ASSERT(prop < zprop_get_numprops(type)); + prop_tbl = zprop_get_proptable(type); + return ((prop_tbl[prop].pd_types & type) != 0); +} + +#ifndef _KERNEL + +/* + * Determines the minimum width for the column, and indicates whether it's fixed + * or not. Only string columns are non-fixed. + */ +size_t +zprop_width(int prop, boolean_t *fixed, zfs_type_t type) +{ + zprop_desc_t *prop_tbl, *pd; + const zprop_index_t *idx; + size_t ret; + int i; + + ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT); + ASSERT(prop < zprop_get_numprops(type)); + + prop_tbl = zprop_get_proptable(type); + pd = &prop_tbl[prop]; + + *fixed = B_TRUE; + + /* + * Start with the width of the column name. + */ + ret = strlen(pd->pd_colname); + + /* + * For fixed-width values, make sure the width is large enough to hold + * any possible value. + */ + switch (pd->pd_proptype) { + case PROP_TYPE_NUMBER: + /* + * The maximum length of a human-readable number is 5 characters + * ("20.4M", for example). + */ + if (ret < 5) + ret = 5; + /* + * 'creation' is handled specially because it's a number + * internally, but displayed as a date string. + */ + if (prop == ZFS_PROP_CREATION) + *fixed = B_FALSE; + break; + case PROP_TYPE_INDEX: + idx = prop_tbl[prop].pd_table; + for (i = 0; idx[i].pi_name != NULL; i++) { + if (strlen(idx[i].pi_name) > ret) + ret = strlen(idx[i].pi_name); + } + break; + + case PROP_TYPE_STRING: + *fixed = B_FALSE; + break; + } + + return (ret); +} + +#endif |