21 files changed, 12332 insertions, 0 deletions
diff --git a/common/acl/acl_common.c b/common/acl/acl_common.c
new file mode 100644
index 000000000000..eafc47d10f2d
--- /dev/null
+++ b/common/acl/acl_common.c
@@ -0,0 +1,1755 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/avl.h>
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <acl/acl_common.h>
+#else
+#include <errno.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <grp.h>
+#include <pwd.h>
+#include <acl_common.h>
+#define	ASSERT	assert
+#endif
+
+#define	ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \
+    ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \
+    ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL)
+
+
+#define	ACL_SYNCHRONIZE_SET_DENY		0x0000001
+#define	ACL_SYNCHRONIZE_SET_ALLOW		0x0000002
+#define	ACL_SYNCHRONIZE_ERR_DENY		0x0000004
+#define	ACL_SYNCHRONIZE_ERR_ALLOW		0x0000008
+
+#define	ACL_WRITE_OWNER_SET_DENY		0x0000010
+#define	ACL_WRITE_OWNER_SET_ALLOW		0x0000020
+#define	ACL_WRITE_OWNER_ERR_DENY		0x0000040
+#define	ACL_WRITE_OWNER_ERR_ALLOW		0x0000080
+
+#define	ACL_DELETE_SET_DENY			0x0000100
+#define	ACL_DELETE_SET_ALLOW			0x0000200
+#define	ACL_DELETE_ERR_DENY			0x0000400
+#define	ACL_DELETE_ERR_ALLOW			0x0000800
+
+#define	ACL_WRITE_ATTRS_OWNER_SET_DENY		0x0001000
+#define	ACL_WRITE_ATTRS_OWNER_SET_ALLOW		0x0002000
+#define	ACL_WRITE_ATTRS_OWNER_ERR_DENY		0x0004000
+#define	ACL_WRITE_ATTRS_OWNER_ERR_ALLOW		0x0008000
+
+#define	ACL_WRITE_ATTRS_WRITER_SET_DENY		0x0010000
+#define	ACL_WRITE_ATTRS_WRITER_SET_ALLOW	0x0020000
+#define	ACL_WRITE_ATTRS_WRITER_ERR_DENY		0x0040000
+#define	ACL_WRITE_ATTRS_WRITER_ERR_ALLOW	0x0080000
+
+#define	ACL_WRITE_NAMED_WRITER_SET_DENY		0x0100000
+#define	ACL_WRITE_NAMED_WRITER_SET_ALLOW	0x0200000
+#define	ACL_WRITE_NAMED_WRITER_ERR_DENY		0x0400000
+#define	ACL_WRITE_NAMED_WRITER_ERR_ALLOW	0x0800000
+
+#define	ACL_READ_NAMED_READER_SET_DENY		0x1000000
+#define	ACL_READ_NAMED_READER_SET_ALLOW		0x2000000
+#define	ACL_READ_NAMED_READER_ERR_DENY		0x4000000
+#define	ACL_READ_NAMED_READER_ERR_ALLOW		0x8000000
+
+
+#define	ACE_VALID_MASK_BITS (\
+    ACE_READ_DATA | \
+    ACE_LIST_DIRECTORY | \
+    ACE_WRITE_DATA | \
+    ACE_ADD_FILE | \
+    ACE_APPEND_DATA | \
+    ACE_ADD_SUBDIRECTORY | \
+    ACE_READ_NAMED_ATTRS | \
+    ACE_WRITE_NAMED_ATTRS | \
+    ACE_EXECUTE | \
+    ACE_DELETE_CHILD | \
+    ACE_READ_ATTRIBUTES | \
+    ACE_WRITE_ATTRIBUTES | \
+    ACE_DELETE | \
+    ACE_READ_ACL | \
+    ACE_WRITE_ACL | \
+    ACE_WRITE_OWNER | \
+    ACE_SYNCHRONIZE)
+
+#define	ACE_MASK_UNDEFINED			0x80000000
+
+#define	ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \
+    ACE_DIRECTORY_INHERIT_ACE | \
+    ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \
+    ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \
+    ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE)
+
+/*
+ * ACL conversion helpers
+ */
+
+typedef enum {
+	ace_unused,
+	ace_user_obj,
+	ace_user,
+	ace_group, /* includes GROUP and GROUP_OBJ */
+	ace_other_obj
+} ace_to_aent_state_t;
+
+typedef struct acevals {
+	uid_t key;
+	avl_node_t avl;
+	uint32_t mask;
+	uint32_t allowed;
+	uint32_t denied;
+	int aent_type;
+} acevals_t;
+
+typedef struct ace_list {
+	acevals_t user_obj;
+	avl_tree_t user;
+	int numusers;
+	acevals_t group_obj;
+	avl_tree_t group;
+	int numgroups;
+	acevals_t other_obj;
+	uint32_t acl_mask;
+	int hasmask;
+	int dfacl_flag;
+	ace_to_aent_state_t state;
+	int seen; /* bitmask of all aclent_t a_type values seen */
+} ace_list_t;
+
+/*
+ * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified.
+ * v = Ptr to array/vector of objs
+ * n = # objs in the array
+ * s = size of each obj (must be multiples of a word size)
+ * f = ptr to function to compare two objs
+ *	returns (-1 = less than, 0 = equal, 1 = greater than
+ */
+void
+ksort(caddr_t v, int n, int s, int (*f)())
+{
+	int g, i, j, ii;
+	unsigned int *p1, *p2;
+	unsigned int tmp;
+
+	/* No work to do */
+	if (v == NULL || n <= 1)
+		return;
+
+	/* Sanity check on arguments */
+	ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0);
+	ASSERT(s > 0);
+	for (g = n / 2; g > 0; g /= 2) {
+		for (i = g; i < n; i++) {
+			for (j = i - g; j >= 0 &&
+			    (*f)(v + j * s, v + (j + g) * s) == 1;
+			    j -= g) {
+				p1 = (void *)(v + j * s);
+				p2 = (void *)(v + (j + g) * s);
+				for (ii = 0; ii < s / 4; ii++) {
+					tmp = *p1;
+					*p1++ = *p2;
+					*p2++ = tmp;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Compare two acls, all fields.  Returns:
+ * -1 (less than)
+ *  0 (equal)
+ * +1 (greater than)
+ */
+int
+cmp2acls(void *a, void *b)
+{
+	aclent_t *x = (aclent_t *)a;
+	aclent_t *y = (aclent_t *)b;
+
+	/* Compare types */
+	if (x->a_type < y->a_type)
+		return (-1);
+	if (x->a_type > y->a_type)
+		return (1);
+	/* Equal types; compare id's */
+	if (x->a_id < y->a_id)
+		return (-1);
+	if (x->a_id > y->a_id)
+		return (1);
+	/* Equal ids; compare perms */
+	if (x->a_perm < y->a_perm)
+		return (-1);
+	if (x->a_perm > y->a_perm)
+		return (1);
+	/* Totally equal */
+	return (0);
+}
+
+/*ARGSUSED*/
+static void *
+cacl_realloc(void *ptr, size_t size, size_t new_size)
+{
+#if defined(_KERNEL)
+	void *tmp;
+
+	tmp = kmem_alloc(new_size, KM_SLEEP);
+	(void) memcpy(tmp, ptr, (size < new_size) ? size : new_size);
+	kmem_free(ptr, size);
+	return (tmp);
+#else
+	return (realloc(ptr, new_size));
+#endif
+}
+
+static int
+cacl_malloc(void **ptr, size_t size)
+{
+#if defined(_KERNEL)
+	*ptr = kmem_zalloc(size, KM_SLEEP);
+	return (0);
+#else
+	*ptr = calloc(1, size);
+	if (*ptr == NULL)
+		return (errno);
+
+	return (0);
+#endif
+}
+
+/*ARGSUSED*/
+static void
+cacl_free(void *ptr, size_t size)
+{
+#if defined(_KERNEL)
+	kmem_free(ptr, size);
+#else
+	free(ptr);
+#endif
+}
+
+acl_t *
+acl_alloc(enum acl_type type)
+{
+	acl_t *aclp;
+
+	if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0)
+		return (NULL);
+
+	aclp->acl_aclp = NULL;
+	aclp->acl_cnt = 0;
+
+	switch (type) {
+	case ACE_T:
+		aclp->acl_type = ACE_T;
+		aclp->acl_entry_size = sizeof (ace_t);
+		break;
+	case ACLENT_T:
+		aclp->acl_type = ACLENT_T;
+		aclp->acl_entry_size = sizeof (aclent_t);
+		break;
+	default:
+		acl_free(aclp);
+		aclp = NULL;
+	}
+	return (aclp);
+}
+
+/*
+ * Free acl_t structure
+ */
+void
+acl_free(acl_t *aclp)
+{
+	int acl_size;
+
+	if (aclp == NULL)
+		return;
+
+	if (aclp->acl_aclp) {
+		acl_size = aclp->acl_cnt * aclp->acl_entry_size;
+		cacl_free(aclp->acl_aclp, acl_size);
+	}
+
+	cacl_free(aclp, sizeof (acl_t));
+}
+
+static uint32_t
+access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow)
+{
+	uint32_t access_mask = 0;
+	int acl_produce;
+	int synchronize_set = 0, write_owner_set = 0;
+	int delete_set = 0, write_attrs_set = 0;
+	int read_named_set = 0, write_named_set = 0;
+
+	acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW |
+	    ACL_WRITE_ATTRS_OWNER_SET_ALLOW |
+	    ACL_WRITE_ATTRS_WRITER_SET_DENY);
+
+	if (isallow) {
+		synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW;
+		write_owner_set = ACL_WRITE_OWNER_SET_ALLOW;
+		delete_set = ACL_DELETE_SET_ALLOW;
+		if (hasreadperm)
+			read_named_set = ACL_READ_NAMED_READER_SET_ALLOW;
+		if (haswriteperm)
+			write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW;
+		if (isowner)
+			write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW;
+		else if (haswriteperm)
+			write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW;
+	} else {
+
+		synchronize_set = ACL_SYNCHRONIZE_SET_DENY;
+		write_owner_set = ACL_WRITE_OWNER_SET_DENY;
+		delete_set = ACL_DELETE_SET_DENY;
+		if (hasreadperm)
+			read_named_set = ACL_READ_NAMED_READER_SET_DENY;
+		if (haswriteperm)
+			write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY;
+		if (isowner)
+			write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY;
+		else if (haswriteperm)
+			write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY;
+		else
+			/*
+			 * If the entity is not the owner and does not
+			 * have write permissions ACE_WRITE_ATTRIBUTES will
+			 * always go in the DENY ACE.
+			 */
+			access_mask |= ACE_WRITE_ATTRIBUTES;
+	}
+
+	if (acl_produce & synchronize_set)
+		access_mask |= ACE_SYNCHRONIZE;
+	if (acl_produce & write_owner_set)
+		access_mask |= ACE_WRITE_OWNER;
+	if (acl_produce & delete_set)
+		access_mask |= ACE_DELETE;
+	if (acl_produce & write_attrs_set)
+		access_mask |= ACE_WRITE_ATTRIBUTES;
+	if (acl_produce & read_named_set)
+		access_mask |= ACE_READ_NAMED_ATTRS;
+	if (acl_produce & write_named_set)
+		access_mask |= ACE_WRITE_NAMED_ATTRS;
+
+	return (access_mask);
+}
+
+/*
+ * Given an mode_t, convert it into an access_mask as used
+ * by nfsace, assuming aclent_t -> nfsace semantics.
+ */
+static uint32_t
+mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow)
+{
+	uint32_t access = 0;
+	int haswriteperm = 0;
+	int hasreadperm = 0;
+
+	if (isallow) {
+		haswriteperm = (mode & S_IWOTH);
+		hasreadperm = (mode & S_IROTH);
+	} else {
+		haswriteperm = !(mode & S_IWOTH);
+		hasreadperm = !(mode & S_IROTH);
+	}
+
+	/*
+	 * The following call takes care of correctly setting the following
+	 * mask bits in the access_mask:
+	 * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE,
+	 * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS
+	 */
+	access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow);
+
+	if (isallow) {
+		access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES;
+		if (isowner)
+			access |= ACE_WRITE_ACL;
+	} else {
+		if (! isowner)
+			access |= ACE_WRITE_ACL;
+	}
+
+	/* read */
+	if (mode & S_IROTH) {
+		access |= ACE_READ_DATA;
+	}
+	/* write */
+	if (mode & S_IWOTH) {
+		access |= ACE_WRITE_DATA |
+		    ACE_APPEND_DATA;
+		if (isdir)
+			access |= ACE_DELETE_CHILD;
+	}
+	/* exec */
+	if (mode & 01) {
+		access |= ACE_EXECUTE;
+	}
+
+	return (access);
+}
+
+/*
+ * Given an nfsace (presumably an ALLOW entry), make a
+ * corresponding DENY entry at the address given.
+ */
+static void
+ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner)
+{
+	(void) memcpy(deny, allow, sizeof (ace_t));
+
+	deny->a_who = allow->a_who;
+
+	deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+	deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS;
+	if (isdir)
+		deny->a_access_mask ^= ACE_DELETE_CHILD;
+
+	deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER |
+	    ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS |
+	    ACE_WRITE_NAMED_ATTRS);
+	deny->a_access_mask |= access_mask_set((allow->a_access_mask &
+	    ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner,
+	    B_FALSE);
+}
+/*
+ * Make an initial pass over an array of aclent_t's.  Gather
+ * information such as an ACL_MASK (if any), number of users,
+ * number of groups, and whether the array needs to be sorted.
+ */
+static int
+ln_aent_preprocess(aclent_t *aclent, int n,
+    int *hasmask, mode_t *mask,
+    int *numuser, int *numgroup, int *needsort)
+{
+	int error = 0;
+	int i;
+	int curtype = 0;
+
+	*hasmask = 0;
+	*mask = 07;
+	*needsort = 0;
+	*numuser = 0;
+	*numgroup = 0;
+
+	for (i = 0; i < n; i++) {
+		if (aclent[i].a_type < curtype)
+			*needsort = 1;
+		else if (aclent[i].a_type > curtype)
+			curtype = aclent[i].a_type;
+		if (aclent[i].a_type & USER)
+			(*numuser)++;
+		if (aclent[i].a_type & (GROUP | GROUP_OBJ))
+			(*numgroup)++;
+		if (aclent[i].a_type & CLASS_OBJ) {
+			if (*hasmask) {
+				error = EINVAL;
+				goto out;
+			} else {
+				*hasmask = 1;
+				*mask = aclent[i].a_perm;
+			}
+		}
+	}
+
+	if ((! *hasmask) && (*numuser + *numgroup > 1)) {
+		error = EINVAL;
+		goto out;
+	}
+
+out:
+	return (error);
+}
+
+/*
+ * Convert an array of aclent_t into an array of nfsace entries,
+ * following POSIX draft -> nfsv4 conversion semantics as outlined in
+ * the IETF draft.
+ */
+static int
+ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir)
+{
+	int error = 0;
+	mode_t mask;
+	int numuser, numgroup, needsort;
+	int resultsize = 0;
+	int i, groupi = 0, skip;
+	ace_t *acep, *result = NULL;
+	int hasmask;
+
+	error = ln_aent_preprocess(aclent, n, &hasmask, &mask,
+	    &numuser, &numgroup, &needsort);
+	if (error != 0)
+		goto out;
+
+	/* allow + deny for each aclent */
+	resultsize = n * 2;
+	if (hasmask) {
+		/*
+		 * stick extra deny on the group_obj and on each
+		 * user|group for the mask (the group_obj was added
+		 * into the count for numgroup)
+		 */
+		resultsize += numuser + numgroup;
+		/* ... and don't count the mask itself */
+		resultsize -= 2;
+	}
+
+	/* sort the source if necessary */
+	if (needsort)
+		ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls);
+
+	if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0)
+		goto out;
+
+	acep = result;
+
+	for (i = 0; i < n; i++) {
+		/*
+		 * don't process CLASS_OBJ (mask); mask was grabbed in
+		 * ln_aent_preprocess()
+		 */
+		if (aclent[i].a_type & CLASS_OBJ)
+			continue;
+
+		/* If we need an ACL_MASK emulator, prepend it now */
+		if ((hasmask) &&
+		    (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) {
+			acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+			acep->a_flags = 0;
+			if (aclent[i].a_type & GROUP_OBJ) {
+				acep->a_who = (uid_t)-1;
+				acep->a_flags |=
+				    (ACE_IDENTIFIER_GROUP|ACE_GROUP);
+			} else if (aclent[i].a_type & USER) {
+				acep->a_who = aclent[i].a_id;
+			} else {
+				acep->a_who = aclent[i].a_id;
+				acep->a_flags |= ACE_IDENTIFIER_GROUP;
+			}
+			if (aclent[i].a_type & ACL_DEFAULT) {
+				acep->a_flags |= ACE_INHERIT_ONLY_ACE |
+				    ACE_FILE_INHERIT_ACE |
+				    ACE_DIRECTORY_INHERIT_ACE;
+			}
+			/*
+			 * Set the access mask for the prepended deny
+			 * ace.  To do this, we invert the mask (found
+			 * in ln_aent_preprocess()) then convert it to an
+			 * DENY ace access_mask.
+			 */
+			acep->a_access_mask = mode_to_ace_access((mask ^ 07),
+			    isdir, 0, 0);
+			acep += 1;
+		}
+
+		/* handle a_perm -> access_mask */
+		acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm,
+		    isdir, aclent[i].a_type & USER_OBJ, 1);
+
+		/* emulate a default aclent */
+		if (aclent[i].a_type & ACL_DEFAULT) {
+			acep->a_flags |= ACE_INHERIT_ONLY_ACE |
+			    ACE_FILE_INHERIT_ACE |
+			    ACE_DIRECTORY_INHERIT_ACE;
+		}
+
+		/*
+		 * handle a_perm and a_id
+		 *
+		 * this must be done last, since it involves the
+		 * corresponding deny aces, which are handled
+		 * differently for each different a_type.
+		 */
+		if (aclent[i].a_type & USER_OBJ) {
+			acep->a_who = (uid_t)-1;
+			acep->a_flags |= ACE_OWNER;
+			ace_make_deny(acep, acep + 1, isdir, B_TRUE);
+			acep += 2;
+		} else if (aclent[i].a_type & USER) {
+			acep->a_who = aclent[i].a_id;
+			ace_make_deny(acep, acep + 1, isdir, B_FALSE);
+			acep += 2;
+		} else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) {
+			if (aclent[i].a_type & GROUP_OBJ) {
+				acep->a_who = (uid_t)-1;
+				acep->a_flags |= ACE_GROUP;
+			} else {
+				acep->a_who = aclent[i].a_id;
+			}
+			acep->a_flags |= ACE_IDENTIFIER_GROUP;
+			/*
+			 * Set the corresponding deny for the group ace.
+			 *
+			 * The deny aces go after all of the groups, unlike
+			 * everything else, where they immediately follow
+			 * the allow ace.
+			 *
+			 * We calculate "skip", the number of slots to
+			 * skip ahead for the deny ace, here.
+			 *
+			 * The pattern is:
+			 * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3
+			 * thus, skip is
+			 * (2 * numgroup) - 1 - groupi
+			 * (2 * numgroup) to account for MD + A
+			 * - 1 to account for the fact that we're on the
+			 * access (A), not the mask (MD)
+			 * - groupi to account for the fact that we have
+			 * passed up groupi number of MD's.
+			 */
+			skip = (2 * numgroup) - 1 - groupi;
+			ace_make_deny(acep, acep + skip, isdir, B_FALSE);
+			/*
+			 * If we just did the last group, skip acep past
+			 * all of the denies; else, just move ahead one.
+			 */
+			if (++groupi >= numgroup)
+				acep += numgroup + 1;
+			else
+				acep += 1;
+		} else if (aclent[i].a_type & OTHER_OBJ) {
+			acep->a_who = (uid_t)-1;
+			acep->a_flags |= ACE_EVERYONE;
+			ace_make_deny(acep, acep + 1, isdir, B_FALSE);
+			acep += 2;
+		} else {
+			error = EINVAL;
+			goto out;
+		}
+	}
+
+	*acepp = result;
+	*rescount = resultsize;
+
+out:
+	if (error != 0) {
+		if ((result != NULL) && (resultsize > 0)) {
+			cacl_free(result, resultsize * sizeof (ace_t));
+		}
+	}
+
+	return (error);
+}
+
+static int
+convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir,
+    ace_t **retacep, int *retacecnt)
+{
+	ace_t *acep;
+	ace_t *dfacep;
+	int acecnt = 0;
+	int dfacecnt = 0;
+	int dfaclstart = 0;
+	int dfaclcnt = 0;
+	aclent_t *aclp;
+	int i;
+	int error;
+	int acesz, dfacesz;
+
+	ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls);
+
+	for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) {
+		if (aclp->a_type & ACL_DEFAULT)
+			break;
+	}
+
+	if (i < aclcnt) {
+		dfaclstart = i;
+		dfaclcnt = aclcnt - i;
+	}
+
+	if (dfaclcnt && isdir == 0) {
+		return (EINVAL);
+	}
+
+	error = ln_aent_to_ace(aclentp, i,  &acep, &acecnt, isdir);
+	if (error)
+		return (error);
+
+	if (dfaclcnt) {
+		error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt,
+		    &dfacep, &dfacecnt, isdir);
+		if (error) {
+			if (acep) {
+				cacl_free(acep, acecnt * sizeof (ace_t));
+			}
+			return (error);
+		}
+	}
+
+	if (dfacecnt != 0) {
+		acesz = sizeof (ace_t) * acecnt;
+		dfacesz = sizeof (ace_t) * dfacecnt;
+		acep = cacl_realloc(acep, acesz, acesz + dfacesz);
+		if (acep == NULL)
+			return (ENOMEM);
+		if (dfaclcnt) {
+			(void) memcpy(acep + acecnt, dfacep, dfacesz);
+		}
+	}
+	if (dfaclcnt)
+		cacl_free(dfacep, dfacecnt * sizeof (ace_t));
+
+	*retacecnt = acecnt + dfacecnt;
+	*retacep = acep;
+	return (0);
+}
+
+static int
+ace_mask_to_mode(uint32_t  mask, o_mode_t *modep, int isdir)
+{
+	int error = 0;
+	o_mode_t mode = 0;
+	uint32_t bits, wantbits;
+
+	/* read */
+	if (mask & ACE_READ_DATA)
+		mode |= S_IROTH;
+
+	/* write */
+	wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA);
+	if (isdir)
+		wantbits |= ACE_DELETE_CHILD;
+	bits = mask & wantbits;
+	if (bits != 0) {
+		if (bits != wantbits) {
+			error = ENOTSUP;
+			goto out;
+		}
+		mode |= S_IWOTH;
+	}
+
+	/* exec */
+	if (mask & ACE_EXECUTE) {
+		mode |= S_IXOTH;
+	}
+
+	*modep = mode;
+
+out:
+	return (error);
+}
+
+static void
+acevals_init(acevals_t *vals, uid_t key)
+{
+	bzero(vals, sizeof (*vals));
+	vals->allowed = ACE_MASK_UNDEFINED;
+	vals->denied = ACE_MASK_UNDEFINED;
+	vals->mask = ACE_MASK_UNDEFINED;
+	vals->key = key;
+}
+
+static void
+ace_list_init(ace_list_t *al, int dfacl_flag)
+{
+	acevals_init(&al->user_obj, NULL);
+	acevals_init(&al->group_obj, NULL);
+	acevals_init(&al->other_obj, NULL);
+	al->numusers = 0;
+	al->numgroups = 0;
+	al->acl_mask = 0;
+	al->hasmask = 0;
+	al->state = ace_unused;
+	al->seen = 0;
+	al->dfacl_flag = dfacl_flag;
+}
+
+/*
+ * Find or create an acevals holder for a given id and avl tree.
+ *
+ * Note that only one thread will ever touch these avl trees, so
+ * there is no need for locking.
+ */
+static acevals_t *
+acevals_find(ace_t *ace, avl_tree_t *avl, int *num)
+{
+	acevals_t key, *rc;
+	avl_index_t where;
+
+	key.key = ace->a_who;
+	rc = avl_find(avl, &key, &where);
+	if (rc != NULL)
+		return (rc);
+
+	/* this memory is freed by ln_ace_to_aent()->ace_list_free() */
+	if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0)
+		return (NULL);
+
+	acevals_init(rc, ace->a_who);
+	avl_insert(avl, rc, where);
+	(*num)++;
+
+	return (rc);
+}
+
+static int
+access_mask_check(ace_t *acep, int mask_bit, int isowner)
+{
+	int set_deny, err_deny;
+	int set_allow, err_allow;
+	int acl_consume;
+	int haswriteperm, hasreadperm;
+
+	if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) {
+		haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1;
+		hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1;
+	} else {
+		haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0;
+		hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0;
+	}
+
+	acl_consume = (ACL_SYNCHRONIZE_ERR_DENY |
+	    ACL_DELETE_ERR_DENY |
+	    ACL_WRITE_OWNER_ERR_DENY |
+	    ACL_WRITE_OWNER_ERR_ALLOW |
+	    ACL_WRITE_ATTRS_OWNER_SET_ALLOW |
+	    ACL_WRITE_ATTRS_OWNER_ERR_DENY |
+	    ACL_WRITE_ATTRS_WRITER_SET_DENY |
+	    ACL_WRITE_ATTRS_WRITER_ERR_ALLOW |
+	    ACL_WRITE_NAMED_WRITER_ERR_DENY |
+	    ACL_READ_NAMED_READER_ERR_DENY);
+
+	if (mask_bit == ACE_SYNCHRONIZE) {
+		set_deny = ACL_SYNCHRONIZE_SET_DENY;
+		err_deny =  ACL_SYNCHRONIZE_ERR_DENY;
+		set_allow = ACL_SYNCHRONIZE_SET_ALLOW;
+		err_allow = ACL_SYNCHRONIZE_ERR_ALLOW;
+	} else if (mask_bit == ACE_WRITE_OWNER) {
+		set_deny = ACL_WRITE_OWNER_SET_DENY;
+		err_deny =  ACL_WRITE_OWNER_ERR_DENY;
+		set_allow = ACL_WRITE_OWNER_SET_ALLOW;
+		err_allow = ACL_WRITE_OWNER_ERR_ALLOW;
+	} else if (mask_bit == ACE_DELETE) {
+		set_deny = ACL_DELETE_SET_DENY;
+		err_deny =  ACL_DELETE_ERR_DENY;
+		set_allow = ACL_DELETE_SET_ALLOW;
+		err_allow = ACL_DELETE_ERR_ALLOW;
+	} else if (mask_bit == ACE_WRITE_ATTRIBUTES) {
+		if (isowner) {
+			set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY;
+			err_deny =  ACL_WRITE_ATTRS_OWNER_ERR_DENY;
+			set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW;
+			err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW;
+		} else if (haswriteperm) {
+			set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY;
+			err_deny =  ACL_WRITE_ATTRS_WRITER_ERR_DENY;
+			set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW;
+			err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW;
+		} else {
+			if ((acep->a_access_mask & mask_bit) &&
+			    (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) {
+				return (ENOTSUP);
+			}
+			return (0);
+		}
+	} else if (mask_bit == ACE_READ_NAMED_ATTRS) {
+		if (!hasreadperm)
+			return (0);
+
+		set_deny = ACL_READ_NAMED_READER_SET_DENY;
+		err_deny = ACL_READ_NAMED_READER_ERR_DENY;
+		set_allow = ACL_READ_NAMED_READER_SET_ALLOW;
+		err_allow = ACL_READ_NAMED_READER_ERR_ALLOW;
+	} else if (mask_bit == ACE_WRITE_NAMED_ATTRS) {
+		if (!haswriteperm)
+			return (0);
+
+		set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY;
+		err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY;
+		set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW;
+		err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW;
+	} else {
+		return (EINVAL);
+	}
+
+	if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) {
+		if (acl_consume & set_deny) {
+			if (!(acep->a_access_mask & mask_bit)) {
+				return (ENOTSUP);
+			}
+		} else if (acl_consume & err_deny) {
+			if (acep->a_access_mask & mask_bit) {
+				return (ENOTSUP);
+			}
+		}
+	} else {
+		/* ACE_ACCESS_ALLOWED_ACE_TYPE */
+		if (acl_consume & set_allow) {
+			if (!(acep->a_access_mask & mask_bit)) {
+				return (ENOTSUP);
+			}
+		} else if (acl_consume & err_allow) {
+			if (acep->a_access_mask & mask_bit) {
+				return (ENOTSUP);
+			}
+		}
+	}
+	return (0);
+}
+
+static int
+ace_to_aent_legal(ace_t *acep)
+{
+	int error = 0;
+	int isowner;
+
+	/* only ALLOW or DENY */
+	if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) &&
+	    (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	/* check for invalid flags */
+	if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/* some flags are illegal */
+	if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG |
+	    ACE_FAILED_ACCESS_ACE_FLAG |
+	    ACE_NO_PROPAGATE_INHERIT_ACE)) {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	/* check for invalid masks */
+	if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if ((acep->a_flags & ACE_OWNER)) {
+		isowner = 1;
+	} else {
+		isowner = 0;
+	}
+
+	error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_WRITE_OWNER, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_DELETE, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner);
+	if (error)
+		goto out;
+
+	error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner);
+	if (error)
+		goto out;
+
+	/* more detailed checking of masks */
+	if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) {
+		if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) {
+			error = ENOTSUP;
+			goto out;
+		}
+		if ((acep->a_access_mask & ACE_WRITE_DATA) &&
+		    (! (acep->a_access_mask & ACE_APPEND_DATA))) {
+			error = ENOTSUP;
+			goto out;
+		}
+		if ((! (acep->a_access_mask & ACE_WRITE_DATA)) &&
+		    (acep->a_access_mask & ACE_APPEND_DATA)) {
+			error = ENOTSUP;
+			goto out;
+		}
+	}
+
+	/* ACL enforcement */
+	if ((acep->a_access_mask & ACE_READ_ACL) &&
+	    (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if (acep->a_access_mask & ACE_WRITE_ACL) {
+		if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) &&
+		    (isowner)) {
+			error = ENOTSUP;
+			goto out;
+		}
+		if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) &&
+		    (! isowner)) {
+			error = ENOTSUP;
+			goto out;
+		}
+	}
+
+out:
+	return (error);
+}
+
+static int
+ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
+{
+	/* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */
+	if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) !=
+	    (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) {
+		return (ENOTSUP);
+	}
+
+	return (ace_mask_to_mode(mask, modep, isdir));
+}
+
+static int
+acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list,
+    uid_t owner, gid_t group, int isdir)
+{
+	int error;
+	uint32_t  flips = ACE_POSIX_SUPPORTED_BITS;
+
+	if (isdir)
+		flips |= ACE_DELETE_CHILD;
+	if (vals->allowed != (vals->denied ^ flips)) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if ((list->hasmask) && (list->acl_mask != vals->mask) &&
+	    (vals->aent_type & (USER | GROUP | GROUP_OBJ))) {
+		error = ENOTSUP;
+		goto out;
+	}
+	error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir);
+	if (error != 0)
+		goto out;
+	dest->a_type = vals->aent_type;
+	if (dest->a_type & (USER | GROUP)) {
+		dest->a_id = vals->key;
+	} else if (dest->a_type & USER_OBJ) {
+		dest->a_id = owner;
+	} else if (dest->a_type & GROUP_OBJ) {
+		dest->a_id = group;
+	} else if (dest->a_type & OTHER_OBJ) {
+		dest->a_id = 0;
+	} else {
+		error = EINVAL;
+		goto out;
+	}
+
+out:
+	return (error);
+}
+
+
+static int
+ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt,
+    uid_t owner, gid_t group, int isdir)
+{
+	int error = 0;
+	aclent_t *aent, *result = NULL;
+	acevals_t *vals;
+	int resultcount;
+
+	if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) !=
+	    (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	resultcount = 3 + list->numusers + list->numgroups;
+	/*
+	 * This must be the same condition as below, when we add the CLASS_OBJ
+	 * (aka ACL mask)
+	 */
+	if ((list->hasmask) || (! list->dfacl_flag))
+		resultcount += 1;
+
+	if (cacl_malloc((void **)&result,
+	    resultcount * sizeof (aclent_t)) != 0) {
+		error = ENOMEM;
+		goto out;
+	}
+	aent = result;
+
+	/* USER_OBJ */
+	if (!(list->user_obj.aent_type & USER_OBJ)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = acevals_to_aent(&list->user_obj, aent, list, owner, group,
+	    isdir);
+
+	if (error != 0)
+		goto out;
+	++aent;
+	/* USER */
+	vals = NULL;
+	for (vals = avl_first(&list->user); vals != NULL;
+	    vals = AVL_NEXT(&list->user, vals)) {
+		if (!(vals->aent_type & USER)) {
+			error = EINVAL;
+			goto out;
+		}
+		error = acevals_to_aent(vals, aent, list, owner, group,
+		    isdir);
+		if (error != 0)
+			goto out;
+		++aent;
+	}
+	/* GROUP_OBJ */
+	if (!(list->group_obj.aent_type & GROUP_OBJ)) {
+		error = EINVAL;
+		goto out;
+	}
+	error = acevals_to_aent(&list->group_obj, aent, list, owner, group,
+	    isdir);
+	if (error != 0)
+		goto out;
+	++aent;
+	/* GROUP */
+	vals = NULL;
+	for (vals = avl_first(&list->group); vals != NULL;
+	    vals = AVL_NEXT(&list->group, vals)) {
+		if (!(vals->aent_type & GROUP)) {
+			error = EINVAL;
+			goto out;
+		}
+		error = acevals_to_aent(vals, aent, list, owner, group,
+		    isdir);
+		if (error != 0)
+			goto out;
+		++aent;
+	}
+	/*
+	 * CLASS_OBJ (aka ACL_MASK)
+	 *
+	 * An ACL_MASK is not fabricated if the ACL is a default ACL.
+	 * This is to follow UFS's behavior.
+	 */
+	if ((list->hasmask) || (! list->dfacl_flag)) {
+		if (list->hasmask) {
+			uint32_t flips = ACE_POSIX_SUPPORTED_BITS;
+			if (isdir)
+				flips |= ACE_DELETE_CHILD;
+			error = ace_mask_to_mode(list->acl_mask ^ flips,
+			    &aent->a_perm, isdir);
+			if (error != 0)
+				goto out;
+		} else {
+			/* fabricate the ACL_MASK from the group permissions */
+			error = ace_mask_to_mode(list->group_obj.allowed,
+			    &aent->a_perm, isdir);
+			if (error != 0)
+				goto out;
+		}
+		aent->a_id = 0;
+		aent->a_type = CLASS_OBJ | list->dfacl_flag;
+		++aent;
+	}
+	/* OTHER_OBJ */
+	if (!(list->other_obj.aent_type & OTHER_OBJ)) {
+		error = EINVAL;
+		goto out;
+	}
+	error = acevals_to_aent(&list->other_obj, aent, list, owner, group,
+	    isdir);
+	if (error != 0)
+		goto out;
+	++aent;
+
+	*aclentp = result;
+	*aclcnt = resultcount;
+
+out:
+	if (error != 0) {
+		if (result != NULL)
+			cacl_free(result, resultcount * sizeof (aclent_t));
+	}
+
+	return (error);
+}
+
+
+/*
+ * free all data associated with an ace_list
+ */
+static void
+ace_list_free(ace_list_t *al)
+{
+	acevals_t *node;
+	void *cookie;
+
+	if (al == NULL)
+		return;
+
+	cookie = NULL;
+	while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL)
+		cacl_free(node, sizeof (acevals_t));
+	cookie = NULL;
+	while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL)
+		cacl_free(node, sizeof (acevals_t));
+
+	avl_destroy(&al->user);
+	avl_destroy(&al->group);
+
+	/* free the container itself */
+	cacl_free(al, sizeof (ace_list_t));
+}
+
+static int
+acevals_compare(const void *va, const void *vb)
+{
+	const acevals_t *a = va, *b = vb;
+
+	if (a->key == b->key)
+		return (0);
+
+	if (a->key > b->key)
+		return (1);
+
+	else
+		return (-1);
+}
+
+/*
+ * Convert a list of ace_t entries to equivalent regular and default
+ * aclent_t lists.  Return error (ENOTSUP) when conversion is not possible.
+ */
+static int
+ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group,
+    aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt,
+    int isdir)
+{
+	int error = 0;
+	ace_t *acep;
+	uint32_t bits;
+	int i;
+	ace_list_t *normacl = NULL, *dfacl = NULL, *acl;
+	acevals_t *vals;
+
+	*aclentp = NULL;
+	*aclcnt = 0;
+	*dfaclentp = NULL;
+	*dfaclcnt = 0;
+
+	/* we need at least user_obj, group_obj, and other_obj */
+	if (n < 6) {
+		error = ENOTSUP;
+		goto out;
+	}
+	if (ace == NULL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = cacl_malloc((void **)&normacl, sizeof (ace_list_t));
+	if (error != 0)
+		goto out;
+
+	avl_create(&normacl->user, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+	avl_create(&normacl->group, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+
+	ace_list_init(normacl, 0);
+
+	error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t));
+	if (error != 0)
+		goto out;
+
+	avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+	avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t),
+	    offsetof(acevals_t, avl));
+	ace_list_init(dfacl, ACL_DEFAULT);
+
+	/* process every ace_t... */
+	for (i = 0; i < n; i++) {
+		acep = &ace[i];
+
+		/* rule out certain cases quickly */
+		error = ace_to_aent_legal(acep);
+		if (error != 0)
+			goto out;
+
+		/*
+		 * Turn off these bits in order to not have to worry about
+		 * them when doing the checks for compliments.
+		 */
+		acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE |
+		    ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES |
+		    ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS);
+
+		/* see if this should be a regular or default acl */
+		bits = acep->a_flags &
+		    (ACE_INHERIT_ONLY_ACE |
+		    ACE_FILE_INHERIT_ACE |
+		    ACE_DIRECTORY_INHERIT_ACE);
+		if (bits != 0) {
+			/* all or nothing on these inherit bits */
+			if (bits != (ACE_INHERIT_ONLY_ACE |
+			    ACE_FILE_INHERIT_ACE |
+			    ACE_DIRECTORY_INHERIT_ACE)) {
+				error = ENOTSUP;
+				goto out;
+			}
+			acl = dfacl;
+		} else {
+			acl = normacl;
+		}
+
+		if ((acep->a_flags & ACE_OWNER)) {
+			if (acl->state > ace_user_obj) {
+				error = ENOTSUP;
+				goto out;
+			}
+			acl->state = ace_user_obj;
+			acl->seen |= USER_OBJ;
+			vals = &acl->user_obj;
+			vals->aent_type = USER_OBJ | acl->dfacl_flag;
+		} else if ((acep->a_flags & ACE_EVERYONE)) {
+			acl->state = ace_other_obj;
+			acl->seen |= OTHER_OBJ;
+			vals = &acl->other_obj;
+			vals->aent_type = OTHER_OBJ | acl->dfacl_flag;
+		} else if (acep->a_flags & ACE_IDENTIFIER_GROUP) {
+			if (acl->state > ace_group) {
+				error = ENOTSUP;
+				goto out;
+			}
+			if ((acep->a_flags & ACE_GROUP)) {
+				acl->seen |= GROUP_OBJ;
+				vals = &acl->group_obj;
+				vals->aent_type = GROUP_OBJ | acl->dfacl_flag;
+			} else {
+				acl->seen |= GROUP;
+				vals = acevals_find(acep, &acl->group,
+				    &acl->numgroups);
+				if (vals == NULL) {
+					error = ENOMEM;
+					goto out;
+				}
+				vals->aent_type = GROUP | acl->dfacl_flag;
+			}
+			acl->state = ace_group;
+		} else {
+			if (acl->state > ace_user) {
+				error = ENOTSUP;
+				goto out;
+			}
+			acl->state = ace_user;
+			acl->seen |= USER;
+			vals = acevals_find(acep, &acl->user,
+			    &acl->numusers);
+			if (vals == NULL) {
+				error = ENOMEM;
+				goto out;
+			}
+			vals->aent_type = USER | acl->dfacl_flag;
+		}
+
+		if (!(acl->state > ace_unused)) {
+			error = EINVAL;
+			goto out;
+		}
+
+		if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) {
+			/* no more than one allowed per aclent_t */
+			if (vals->allowed != ACE_MASK_UNDEFINED) {
+				error = ENOTSUP;
+				goto out;
+			}
+			vals->allowed = acep->a_access_mask;
+		} else {
+			/*
+			 * it's a DENY; if there was a previous DENY, it
+			 * must have been an ACL_MASK.
+			 */
+			if (vals->denied != ACE_MASK_UNDEFINED) {
+				/* ACL_MASK is for USER and GROUP only */
+				if ((acl->state != ace_user) &&
+				    (acl->state != ace_group)) {
+					error = ENOTSUP;
+					goto out;
+				}
+
+				if (! acl->hasmask) {
+					acl->hasmask = 1;
+					acl->acl_mask = vals->denied;
+				/* check for mismatched ACL_MASK emulations */
+				} else if (acl->acl_mask != vals->denied) {
+					error = ENOTSUP;
+					goto out;
+				}
+				vals->mask = vals->denied;
+			}
+			vals->denied = acep->a_access_mask;
+		}
+	}
+
+	/* done collating; produce the aclent_t lists */
+	if (normacl->state != ace_unused) {
+		error = ace_list_to_aent(normacl, aclentp, aclcnt,
+		    owner, group, isdir);
+		if (error != 0) {
+			goto out;
+		}
+	}
+	if (dfacl->state != ace_unused) {
+		error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt,
+		    owner, group, isdir);
+		if (error != 0) {
+			goto out;
+		}
+	}
+
+out:
+	if (normacl != NULL)
+		ace_list_free(normacl);
+	if (dfacl != NULL)
+		ace_list_free(dfacl);
+
+	return (error);
+}
+
+static int
+convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir,
+    uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt)
+{
+	int error = 0;
+	aclent_t *aclentp, *dfaclentp;
+	int aclcnt, dfaclcnt;
+	int aclsz, dfaclsz;
+
+	error = ln_ace_to_aent(acebufp, acecnt, owner, group,
+	    &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir);
+
+	if (error)
+		return (error);
+
+
+	if (dfaclcnt != 0) {
+		/*
+		 * Slap aclentp and dfaclentp into a single array.
+		 */
+		aclsz = sizeof (aclent_t) * aclcnt;
+		dfaclsz = sizeof (aclent_t) * dfaclcnt;
+		aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz);
+		if (aclentp != NULL) {
+			(void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz);
+		} else {
+			error = ENOMEM;
+		}
+	}
+
+	if (aclentp) {
+		*retaclentp = aclentp;
+		*retaclcnt = aclcnt + dfaclcnt;
+	}
+
+	if (dfaclentp)
+		cacl_free(dfaclentp, dfaclsz);
+
+	return (error);
+}
+
+
+int
+acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner,
+    gid_t group)
+{
+	int aclcnt;
+	void *acldata;
+	int error;
+
+	/*
+	 * See if we need to translate
+	 */
+	if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) ||
+	    (target_flavor == _ACL_ACLENT_ENABLED &&
+	    aclp->acl_type == ACLENT_T))
+		return (0);
+
+	if (target_flavor == -1) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (target_flavor ==  _ACL_ACE_ENABLED &&
+	    aclp->acl_type == ACLENT_T) {
+		error = convert_aent_to_ace(aclp->acl_aclp,
+		    aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt);
+		if (error)
+			goto out;
+
+	} else if (target_flavor == _ACL_ACLENT_ENABLED &&
+	    aclp->acl_type == ACE_T) {
+		error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt,
+		    isdir, owner, group, (aclent_t **)&acldata, &aclcnt);
+		if (error)
+			goto out;
+	} else {
+		error = ENOTSUP;
+		goto out;
+	}
+
+	/*
+	 * replace old acl with newly translated acl
+	 */
+	cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size);
+	aclp->acl_aclp = acldata;
+	aclp->acl_cnt = aclcnt;
+	if (target_flavor == _ACL_ACE_ENABLED) {
+		aclp->acl_type = ACE_T;
+		aclp->acl_entry_size = sizeof (ace_t);
+	} else {
+		aclp->acl_type = ACLENT_T;
+		aclp->acl_entry_size = sizeof (aclent_t);
+	}
+	return (0);
+
+out:
+
+#if !defined(_KERNEL)
+	errno = error;
+	return (-1);
+#else
+	return (error);
+#endif
+}
+
+#define	SET_ACE(acl, index, who, mask, type, flags) { \
+	acl[0][index].a_who = (uint32_t)who; \
+	acl[0][index].a_type = type; \
+	acl[0][index].a_flags = flags; \
+	acl[0][index++].a_access_mask = mask; \
+}
+
+void
+acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
+    uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
+{
+	*deny1 = *deny2 = *allow0 = *group = 0;
+
+	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
+		*deny1 |= ACE_READ_DATA;
+	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
+		*deny1 |= ACE_WRITE_DATA;
+	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
+		*deny1 |= ACE_EXECUTE;
+
+	if (!(mode & S_IRGRP) && (mode & S_IROTH))
+		*deny2 = ACE_READ_DATA;
+	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
+		*deny2 |= ACE_WRITE_DATA;
+	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
+		*deny2 |= ACE_EXECUTE;
+
+	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
+		*allow0 |= ACE_READ_DATA;
+	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
+		*allow0 |= ACE_WRITE_DATA;
+	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
+		*allow0 |= ACE_EXECUTE;
+
+	*owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
+	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
+	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
+	if (mode & S_IRUSR)
+		*owner |= ACE_READ_DATA;
+	if (mode & S_IWUSR)
+		*owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
+	if (mode & S_IXUSR)
+		*owner |= ACE_EXECUTE;
+
+	*group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IRGRP)
+		*group |= ACE_READ_DATA;
+	if (mode & S_IWGRP)
+		*group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
+	if (mode & S_IXGRP)
+		*group |= ACE_EXECUTE;
+
+	*everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IROTH)
+		*everyone |= ACE_READ_DATA;
+	if (mode & S_IWOTH)
+		*everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
+	if (mode & S_IXOTH)
+		*everyone |= ACE_EXECUTE;
+}
+
+int
+acl_trivial_create(mode_t mode, ace_t **acl, int *count)
+{
+	uint32_t	deny1, deny2;
+	uint32_t	allow0;
+	uint32_t	owner, group, everyone;
+	int 		index = 0;
+	int		error;
+
+	*count = 3;
+	acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group,
+	    &everyone);
+
+	if (allow0)
+		(*count)++;
+	if (deny1)
+		(*count)++;
+	if (deny2)
+		(*count)++;
+
+	if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0)
+		return (error);
+
+	if (allow0) {
+		SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE,
+		    ACE_OWNER);
+	}
+	if (deny1) {
+		SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE,
+		    ACE_OWNER);
+	}
+	if (deny2) {
+		SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE,
+		    ACE_GROUP|ACE_IDENTIFIER_GROUP);
+	}
+
+	SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
+	SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE,
+	    ACE_IDENTIFIER_GROUP|ACE_GROUP);
+	SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
+	    ACE_EVERYONE);
+
+	return (0);
+}
+
+/*
+ * ace_trivial:
+ * determine whether an ace_t acl is trivial
+ *
+ * Trivialness implies that the acl is composed of only
+ * owner, group, everyone entries.  ACL can't
+ * have read_acl denied, and write_owner/write_acl/write_attributes
+ * can only be owner@ entry.
+ */
+int
+ace_trivial_common(void *acep, int aclcnt,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt,
+    uint16_t *, uint16_t *, uint32_t *))
+{
+	uint16_t flags;
+	uint32_t mask;
+	uint16_t type;
+	uint64_t cookie = 0;
+
+	while (cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask)) {
+		switch (flags & ACE_TYPE_FLAGS) {
+		case ACE_OWNER:
+		case ACE_GROUP|ACE_IDENTIFIER_GROUP:
+		case ACE_EVERYONE:
+			break;
+		default:
+			return (1);
+
+		}
+
+		if (flags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
+		    ACE_INHERIT_ONLY_ACE))
+			return (1);
+
+		/*
+		 * Special check for some special bits
+		 *
+		 * Don't allow anybody to deny reading basic
+		 * attributes or a files ACL.
+		 */
+		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
+			return (1);
+
+		/*
+		 * Delete permissions are never set by default
+		 */
+		if (mask & (ACE_DELETE|ACE_DELETE_CHILD))
+			return (1);
+		/*
+		 * only allow owner@ to have
+		 * write_acl/write_owner/write_attributes/write_xattr/
+		 */
+		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
+		    (!(flags & ACE_OWNER) && (mask &
+		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
+		    ACE_WRITE_NAMED_ATTRS))))
+			return (1);
+
+	}
+	return (0);
+}
+
+uint64_t
+ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags,
+    uint16_t *type, uint32_t *mask)
+{
+	ace_t *acep = datap;
+
+	if (cookie >= aclcnt)
+		return (0);
+
+	*flags = acep[cookie].a_flags;
+	*type = acep[cookie].a_type;
+	*mask = acep[cookie++].a_access_mask;
+
+	return (cookie);
+}
+
+int
+ace_trivial(ace_t *acep, int aclcnt)
+{
+	return (ace_trivial_common(acep, aclcnt, ace_walk));
+}
diff --git a/common/acl/acl_common.h b/common/acl/acl_common.h
new file mode 100644
index 000000000000..f76cbd3b450f
--- /dev/null
+++ b/common/acl/acl_common.h
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_ACL_COMMON_H
+#define	_ACL_COMMON_H
+
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+extern ace_t trivial_acl[6];
+
+extern int acltrivial(const char *);
+extern void adjust_ace_pair(ace_t *pair, mode_t mode);
+extern void adjust_ace_pair_common(void *, size_t, size_t, mode_t);
+extern int ace_trivial(ace_t *acep, int aclcnt);
+extern int ace_trivial_common(void *, int,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt, uint16_t *, uint16_t *,
+    uint32_t *mask));
+extern acl_t *acl_alloc(acl_type_t);
+extern void acl_free(acl_t *aclp);
+extern int acl_translate(acl_t *aclp, int target_flavor,
+    int isdir, uid_t owner, gid_t group);
+void ksort(caddr_t v, int n, int s, int (*f)());
+int cmp2acls(void *a, void *b);
+int acl_trivial_create(mode_t mode, ace_t **acl, int *count);
+void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
+    uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _ACL_COMMON_H */
diff --git a/common/atomic/amd64/atomic.s b/common/atomic/amd64/atomic.s
new file mode 100644
index 000000000000..4b0d66e4db20
--- /dev/null
+++ b/common/atomic/amd64/atomic.s
@@ -0,0 +1,573 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+	.file	"atomic.s"
+
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL)
+	/*
+	 * Legacy kernel interfaces; they will go away (eventually).
+	 */
+	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
+	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
+	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
+	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
+	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
+	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
+	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
+#endif
+
+	ENTRY(atomic_inc_8)
+	ALTENTRY(atomic_inc_uchar)
+	lock
+	incb	(%rdi)
+	ret
+	SET_SIZE(atomic_inc_uchar)
+	SET_SIZE(atomic_inc_8)
+
+	ENTRY(atomic_inc_16)
+	ALTENTRY(atomic_inc_ushort)
+	lock
+	incw	(%rdi)
+	ret
+	SET_SIZE(atomic_inc_ushort)
+	SET_SIZE(atomic_inc_16)
+
+	ENTRY(atomic_inc_32)
+	ALTENTRY(atomic_inc_uint)
+	lock
+	incl	(%rdi)
+	ret
+	SET_SIZE(atomic_inc_uint)
+	SET_SIZE(atomic_inc_32)
+
+	ENTRY(atomic_inc_64)
+	ALTENTRY(atomic_inc_ulong)
+	lock
+	incq	(%rdi)
+	ret
+	SET_SIZE(atomic_inc_ulong)
+	SET_SIZE(atomic_inc_64)
+
+	ENTRY(atomic_inc_8_nv)
+	ALTENTRY(atomic_inc_uchar_nv)
+	xorl	%eax, %eax	/ clear upper bits of %eax return register
+	incb	%al		/ %al = 1
+	lock
+	  xaddb	%al, (%rdi)	/ %al = old value, (%rdi) = new value
+	incb	%al		/ return new value
+	ret
+	SET_SIZE(atomic_inc_uchar_nv)
+	SET_SIZE(atomic_inc_8_nv)
+
+	ENTRY(atomic_inc_16_nv)
+	ALTENTRY(atomic_inc_ushort_nv)
+	xorl	%eax, %eax	/ clear upper bits of %eax return register
+	incw	%ax		/ %ax = 1
+	lock
+	  xaddw	%ax, (%rdi)	/ %ax = old value, (%rdi) = new value
+	incw	%ax		/ return new value
+	ret
+	SET_SIZE(atomic_inc_ushort_nv)
+	SET_SIZE(atomic_inc_16_nv)
+
+	ENTRY(atomic_inc_32_nv)
+	ALTENTRY(atomic_inc_uint_nv)
+	xorl	%eax, %eax	/ %eax = 0
+	incl	%eax		/ %eax = 1
+	lock
+	  xaddl	%eax, (%rdi)	/ %eax = old value, (%rdi) = new value
+	incl	%eax		/ return new value
+	ret
+	SET_SIZE(atomic_inc_uint_nv)
+	SET_SIZE(atomic_inc_32_nv)
+
+	ENTRY(atomic_inc_64_nv)
+	ALTENTRY(atomic_inc_ulong_nv)
+	xorq	%rax, %rax	/ %rax = 0
+	incq	%rax		/ %rax = 1
+	lock
+	  xaddq	%rax, (%rdi)	/ %rax = old value, (%rdi) = new value
+	incq	%rax		/ return new value
+	ret
+	SET_SIZE(atomic_inc_ulong_nv)
+	SET_SIZE(atomic_inc_64_nv)
+
+	ENTRY(atomic_dec_8)
+	ALTENTRY(atomic_dec_uchar)
+	lock
+	decb	(%rdi)
+	ret
+	SET_SIZE(atomic_dec_uchar)
+	SET_SIZE(atomic_dec_8)
+
+	ENTRY(atomic_dec_16)
+	ALTENTRY(atomic_dec_ushort)
+	lock
+	decw	(%rdi)
+	ret
+	SET_SIZE(atomic_dec_ushort)
+	SET_SIZE(atomic_dec_16)
+
+	ENTRY(atomic_dec_32)
+	ALTENTRY(atomic_dec_uint)
+	lock
+	decl	(%rdi)
+	ret
+	SET_SIZE(atomic_dec_uint)
+	SET_SIZE(atomic_dec_32)
+
+	ENTRY(atomic_dec_64)
+	ALTENTRY(atomic_dec_ulong)
+	lock
+	decq	(%rdi)
+	ret
+	SET_SIZE(atomic_dec_ulong)
+	SET_SIZE(atomic_dec_64)
+
+	ENTRY(atomic_dec_8_nv)
+	ALTENTRY(atomic_dec_uchar_nv)
+	xorl	%eax, %eax	/ clear upper bits of %eax return register
+	decb	%al		/ %al = -1
+	lock
+	  xaddb	%al, (%rdi)	/ %al = old value, (%rdi) = new value
+	decb	%al		/ return new value
+	ret
+	SET_SIZE(atomic_dec_uchar_nv)
+	SET_SIZE(atomic_dec_8_nv)
+
+	ENTRY(atomic_dec_16_nv)
+	ALTENTRY(atomic_dec_ushort_nv)
+	xorl	%eax, %eax	/ clear upper bits of %eax return register
+	decw	%ax		/ %ax = -1
+	lock
+	  xaddw	%ax, (%rdi)	/ %ax = old value, (%rdi) = new value
+	decw	%ax		/ return new value
+	ret
+	SET_SIZE(atomic_dec_ushort_nv)
+	SET_SIZE(atomic_dec_16_nv)
+
+	ENTRY(atomic_dec_32_nv)
+	ALTENTRY(atomic_dec_uint_nv)
+	xorl	%eax, %eax	/ %eax = 0
+	decl	%eax		/ %eax = -1
+	lock
+	  xaddl	%eax, (%rdi)	/ %eax = old value, (%rdi) = new value
+	decl	%eax		/ return new value
+	ret
+	SET_SIZE(atomic_dec_uint_nv)
+	SET_SIZE(atomic_dec_32_nv)
+
+	ENTRY(atomic_dec_64_nv)
+	ALTENTRY(atomic_dec_ulong_nv)
+	xorq	%rax, %rax	/ %rax = 0
+	decq	%rax		/ %rax = -1
+	lock
+	  xaddq	%rax, (%rdi)	/ %rax = old value, (%rdi) = new value
+	decq	%rax		/ return new value
+	ret
+	SET_SIZE(atomic_dec_ulong_nv)
+	SET_SIZE(atomic_dec_64_nv)
+
+	ENTRY(atomic_add_8)
+	ALTENTRY(atomic_add_char)
+	lock
+	addb	%sil, (%rdi)
+	ret
+	SET_SIZE(atomic_add_char)
+	SET_SIZE(atomic_add_8)
+
+	ENTRY(atomic_add_16)
+	ALTENTRY(atomic_add_short)
+	lock
+	addw	%si, (%rdi)
+	ret
+	SET_SIZE(atomic_add_short)
+	SET_SIZE(atomic_add_16)
+
+	ENTRY(atomic_add_32)
+	ALTENTRY(atomic_add_int)
+	lock
+	addl	%esi, (%rdi)
+	ret
+	SET_SIZE(atomic_add_int)
+	SET_SIZE(atomic_add_32)
+
+	ENTRY(atomic_add_64)
+	ALTENTRY(atomic_add_ptr)
+	ALTENTRY(atomic_add_long)
+	lock
+	addq	%rsi, (%rdi)
+	ret
+	SET_SIZE(atomic_add_long)
+	SET_SIZE(atomic_add_ptr)
+	SET_SIZE(atomic_add_64)
+
+	ENTRY(atomic_or_8)
+	ALTENTRY(atomic_or_uchar)
+	lock
+	orb	%sil, (%rdi)
+	ret
+	SET_SIZE(atomic_or_uchar)
+	SET_SIZE(atomic_or_8)
+
+	ENTRY(atomic_or_16)
+	ALTENTRY(atomic_or_ushort)
+	lock
+	orw	%si, (%rdi)
+	ret
+	SET_SIZE(atomic_or_ushort)
+	SET_SIZE(atomic_or_16)
+
+	ENTRY(atomic_or_32)
+	ALTENTRY(atomic_or_uint)
+	lock
+	orl	%esi, (%rdi)
+	ret
+	SET_SIZE(atomic_or_uint)
+	SET_SIZE(atomic_or_32)
+
+	ENTRY(atomic_or_64)
+	ALTENTRY(atomic_or_ulong)
+	lock
+	orq	%rsi, (%rdi)
+	ret
+	SET_SIZE(atomic_or_ulong)
+	SET_SIZE(atomic_or_64)
+
+	ENTRY(atomic_and_8)
+	ALTENTRY(atomic_and_uchar)
+	lock
+	andb	%sil, (%rdi)
+	ret
+	SET_SIZE(atomic_and_uchar)
+	SET_SIZE(atomic_and_8)
+
+	ENTRY(atomic_and_16)
+	ALTENTRY(atomic_and_ushort)
+	lock
+	andw	%si, (%rdi)
+	ret
+	SET_SIZE(atomic_and_ushort)
+	SET_SIZE(atomic_and_16)
+
+	ENTRY(atomic_and_32)
+	ALTENTRY(atomic_and_uint)
+	lock
+	andl	%esi, (%rdi)
+	ret
+	SET_SIZE(atomic_and_uint)
+	SET_SIZE(atomic_and_32)
+
+	ENTRY(atomic_and_64)
+	ALTENTRY(atomic_and_ulong)
+	lock
+	andq	%rsi, (%rdi)
+	ret
+	SET_SIZE(atomic_and_ulong)
+	SET_SIZE(atomic_and_64)
+
+	ENTRY(atomic_add_8_nv)
+	ALTENTRY(atomic_add_char_nv)
+	movzbl	%sil, %eax		/ %al = delta addend, clear upper bits
+	lock
+	  xaddb	%sil, (%rdi)		/ %sil = old value, (%rdi) = sum
+	addb	%sil, %al		/ new value = original value + delta
+	ret
+	SET_SIZE(atomic_add_char_nv)
+	SET_SIZE(atomic_add_8_nv)
+
+	ENTRY(atomic_add_16_nv)
+	ALTENTRY(atomic_add_short_nv)
+	movzwl	%si, %eax		/ %ax = delta addend, clean upper bits
+	lock
+	  xaddw	%si, (%rdi)		/ %si = old value, (%rdi) = sum
+	addw	%si, %ax		/ new value = original value + delta
+	ret
+	SET_SIZE(atomic_add_short_nv)
+	SET_SIZE(atomic_add_16_nv)
+
+	ENTRY(atomic_add_32_nv)
+	ALTENTRY(atomic_add_int_nv)
+	mov	%esi, %eax		/ %eax = delta addend
+	lock
+	  xaddl	%esi, (%rdi)		/ %esi = old value, (%rdi) = sum
+	add	%esi, %eax		/ new value = original value + delta
+	ret
+	SET_SIZE(atomic_add_int_nv)
+	SET_SIZE(atomic_add_32_nv)
+
+	ENTRY(atomic_add_64_nv)
+	ALTENTRY(atomic_add_ptr_nv)
+	ALTENTRY(atomic_add_long_nv)
+	mov	%rsi, %rax		/ %rax = delta addend
+	lock
+	  xaddq	%rsi, (%rdi)		/ %rsi = old value, (%rdi) = sum
+	addq	%rsi, %rax		/ new value = original value + delta
+	ret
+	SET_SIZE(atomic_add_long_nv)
+	SET_SIZE(atomic_add_ptr_nv)
+	SET_SIZE(atomic_add_64_nv)
+
+	ENTRY(atomic_and_8_nv)
+	ALTENTRY(atomic_and_uchar_nv)
+	movb	(%rdi), %al	/ %al = old value
+1:
+	movb	%sil, %cl
+	andb	%al, %cl	/ %cl = new value
+	lock
+	cmpxchgb %cl, (%rdi)	/ try to stick it in
+	jne	1b
+	movzbl	%cl, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_and_uchar_nv)
+	SET_SIZE(atomic_and_8_nv)
+
+	ENTRY(atomic_and_16_nv)
+	ALTENTRY(atomic_and_ushort_nv)
+	movw	(%rdi), %ax	/ %ax = old value
+1:
+	movw	%si, %cx
+	andw	%ax, %cx	/ %cx = new value
+	lock
+	cmpxchgw %cx, (%rdi)	/ try to stick it in
+	jne	1b
+	movzwl	%cx, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_and_ushort_nv)
+	SET_SIZE(atomic_and_16_nv)
+
+	ENTRY(atomic_and_32_nv)
+	ALTENTRY(atomic_and_uint_nv)
+	movl	(%rdi), %eax
+1:
+	movl	%esi, %ecx
+	andl	%eax, %ecx
+	lock
+	cmpxchgl %ecx, (%rdi)
+	jne	1b
+	movl	%ecx, %eax
+	ret
+	SET_SIZE(atomic_and_uint_nv)
+	SET_SIZE(atomic_and_32_nv)
+
+	ENTRY(atomic_and_64_nv)
+	ALTENTRY(atomic_and_ulong_nv)
+	movq	(%rdi), %rax
+1:
+	movq	%rsi, %rcx
+	andq	%rax, %rcx
+	lock
+	cmpxchgq %rcx, (%rdi)
+	jne	1b
+	movq	%rcx, %rax
+	ret
+	SET_SIZE(atomic_and_ulong_nv)
+	SET_SIZE(atomic_and_64_nv)
+
+	ENTRY(atomic_or_8_nv)
+	ALTENTRY(atomic_or_uchar_nv)
+	movb	(%rdi), %al	/ %al = old value
+1:
+	movb	%sil, %cl
+	orb	%al, %cl	/ %cl = new value
+	lock
+	cmpxchgb %cl, (%rdi)	/ try to stick it in
+	jne	1b
+	movzbl	%cl, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_or_uchar_nv)
+	SET_SIZE(atomic_or_8_nv)
+
+	ENTRY(atomic_or_16_nv)
+	ALTENTRY(atomic_or_ushort_nv)
+	movw	(%rdi), %ax	/ %ax = old value
+1:
+	movw	%si, %cx
+	orw	%ax, %cx	/ %cx = new value
+	lock
+	cmpxchgw %cx, (%rdi)	/ try to stick it in
+	jne	1b
+	movzwl	%cx, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_or_ushort_nv)
+	SET_SIZE(atomic_or_16_nv)
+
+	ENTRY(atomic_or_32_nv)
+	ALTENTRY(atomic_or_uint_nv)
+	movl	(%rdi), %eax
+1:
+	movl	%esi, %ecx
+	orl	%eax, %ecx
+	lock
+	cmpxchgl %ecx, (%rdi)
+	jne	1b
+	movl	%ecx, %eax
+	ret
+	SET_SIZE(atomic_or_uint_nv)
+	SET_SIZE(atomic_or_32_nv)
+
+	ENTRY(atomic_or_64_nv)
+	ALTENTRY(atomic_or_ulong_nv)
+	movq	(%rdi), %rax
+1:
+	movq	%rsi, %rcx
+	orq	%rax, %rcx
+	lock
+	cmpxchgq %rcx, (%rdi)
+	jne	1b
+	movq	%rcx, %rax
+	ret
+	SET_SIZE(atomic_or_ulong_nv)
+	SET_SIZE(atomic_or_64_nv)
+
+	ENTRY(atomic_cas_8)
+	ALTENTRY(atomic_cas_uchar)
+	movzbl	%sil, %eax
+	lock
+	cmpxchgb %dl, (%rdi)
+	ret
+	SET_SIZE(atomic_cas_uchar)
+	SET_SIZE(atomic_cas_8)
+
+	ENTRY(atomic_cas_16)
+	ALTENTRY(atomic_cas_ushort)
+	movzwl	%si, %eax
+	lock
+	cmpxchgw %dx, (%rdi)
+	ret
+	SET_SIZE(atomic_cas_ushort)
+	SET_SIZE(atomic_cas_16)
+
+	ENTRY(atomic_cas_32)
+	ALTENTRY(atomic_cas_uint)
+	movl	%esi, %eax
+	lock
+	cmpxchgl %edx, (%rdi)
+	ret
+	SET_SIZE(atomic_cas_uint)
+	SET_SIZE(atomic_cas_32)
+
+	ENTRY(atomic_cas_64)
+	ALTENTRY(atomic_cas_ulong)
+	ALTENTRY(atomic_cas_ptr)
+	movq	%rsi, %rax
+	lock
+	cmpxchgq %rdx, (%rdi)
+	ret
+	SET_SIZE(atomic_cas_ptr)
+	SET_SIZE(atomic_cas_ulong)
+	SET_SIZE(atomic_cas_64)
+
+	ENTRY(atomic_swap_8)
+	ALTENTRY(atomic_swap_uchar)
+	movzbl	%sil, %eax
+	lock
+	xchgb %al, (%rdi)
+	ret
+	SET_SIZE(atomic_swap_uchar)
+	SET_SIZE(atomic_swap_8)
+
+	ENTRY(atomic_swap_16)
+	ALTENTRY(atomic_swap_ushort)
+	movzwl	%si, %eax
+	lock
+	xchgw %ax, (%rdi)
+	ret
+	SET_SIZE(atomic_swap_ushort)
+	SET_SIZE(atomic_swap_16)
+
+	ENTRY(atomic_swap_32)
+	ALTENTRY(atomic_swap_uint)
+	movl	%esi, %eax
+	lock
+	xchgl %eax, (%rdi)
+	ret
+	SET_SIZE(atomic_swap_uint)
+	SET_SIZE(atomic_swap_32)
+
+	ENTRY(atomic_swap_64)
+	ALTENTRY(atomic_swap_ulong)
+	ALTENTRY(atomic_swap_ptr)
+	movq	%rsi, %rax
+	lock
+	xchgq %rax, (%rdi)
+	ret
+	SET_SIZE(atomic_swap_ptr)
+	SET_SIZE(atomic_swap_ulong)
+	SET_SIZE(atomic_swap_64)
+
+	ENTRY(atomic_set_long_excl)
+	xorl	%eax, %eax
+	lock
+	btsq	%rsi, (%rdi)
+	jnc	1f
+	decl	%eax			/ return -1
+1:
+	ret
+	SET_SIZE(atomic_set_long_excl)
+
+	ENTRY(atomic_clear_long_excl)
+	xorl	%eax, %eax
+	lock
+	btrq	%rsi, (%rdi)
+	jc	1f
+	decl	%eax			/ return -1
+1:
+	ret
+	SET_SIZE(atomic_clear_long_excl)
+
+#if !defined(_KERNEL)
+
+	/*
+	 * NOTE: membar_enter, and membar_exit are identical routines. 
+	 * We define them separately, instead of using an ALTENTRY
+	 * definitions to alias them together, so that DTrace and
+	 * debuggers will see a unique address for them, allowing 
+	 * more accurate tracing.
+	*/
+
+	ENTRY(membar_enter)
+	mfence
+	ret
+	SET_SIZE(membar_enter)
+
+	ENTRY(membar_exit)
+	mfence
+	ret
+	SET_SIZE(membar_exit)
+
+	ENTRY(membar_producer)
+	sfence
+	ret
+	SET_SIZE(membar_producer)
+
+	ENTRY(membar_consumer)
+	lfence
+	ret
+	SET_SIZE(membar_consumer)
+
+#endif	/* !_KERNEL */
diff --git a/common/atomic/i386/atomic.s b/common/atomic/i386/atomic.s
new file mode 100644
index 000000000000..4fa525ba20af
--- /dev/null
+++ b/common/atomic/i386/atomic.s
@@ -0,0 +1,720 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+	.file	"atomic.s"
+
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL)
+	/*
+	 * Legacy kernel interfaces; they will go away (eventually).
+	 */
+	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
+	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
+	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
+	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
+	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
+	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
+	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
+#endif
+
+	ENTRY(atomic_inc_8)
+	ALTENTRY(atomic_inc_uchar)
+	movl	4(%esp), %eax
+	lock
+	incb	(%eax)
+	ret
+	SET_SIZE(atomic_inc_uchar)
+	SET_SIZE(atomic_inc_8)
+
+	ENTRY(atomic_inc_16)
+	ALTENTRY(atomic_inc_ushort)
+	movl	4(%esp), %eax
+	lock
+	incw	(%eax)
+	ret
+	SET_SIZE(atomic_inc_ushort)
+	SET_SIZE(atomic_inc_16)
+
+	ENTRY(atomic_inc_32)
+	ALTENTRY(atomic_inc_uint)
+	ALTENTRY(atomic_inc_ulong)
+	movl	4(%esp), %eax
+	lock
+	incl	(%eax)
+	ret
+	SET_SIZE(atomic_inc_ulong)
+	SET_SIZE(atomic_inc_uint)
+	SET_SIZE(atomic_inc_32)
+
+	ENTRY(atomic_inc_8_nv)
+	ALTENTRY(atomic_inc_uchar_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	xorl	%eax, %eax	/ clear upper bits of %eax
+	incb	%al		/ %al = 1
+	lock
+	  xaddb	%al, (%edx)	/ %al = old value, inc (%edx)
+	incb	%al	/ return new value
+	ret
+	SET_SIZE(atomic_inc_uchar_nv)
+	SET_SIZE(atomic_inc_8_nv)
+
+	ENTRY(atomic_inc_16_nv)
+	ALTENTRY(atomic_inc_ushort_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	xorl	%eax, %eax	/ clear upper bits of %eax
+	incw	%ax		/ %ax = 1
+	lock
+	  xaddw	%ax, (%edx)	/ %ax = old value, inc (%edx)
+	incw	%ax		/ return new value
+	ret
+	SET_SIZE(atomic_inc_ushort_nv)
+	SET_SIZE(atomic_inc_16_nv)
+
+	ENTRY(atomic_inc_32_nv)
+	ALTENTRY(atomic_inc_uint_nv)
+	ALTENTRY(atomic_inc_ulong_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	xorl	%eax, %eax	/ %eax = 0
+	incl	%eax		/ %eax = 1
+	lock
+	  xaddl	%eax, (%edx)	/ %eax = old value, inc (%edx)
+	incl	%eax		/ return new value
+	ret
+	SET_SIZE(atomic_inc_ulong_nv)
+	SET_SIZE(atomic_inc_uint_nv)
+	SET_SIZE(atomic_inc_32_nv)
+
+	/*
+	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
+	 * separated, you need to also edit the libc i386 platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_inc_64_nv.
+	 */
+	ENTRY(atomic_inc_64)
+	ALTENTRY(atomic_inc_64_nv)
+	pushl	%edi
+	pushl	%ebx
+	movl	12(%esp), %edi	/ %edi = target address
+	movl	(%edi), %eax
+	movl	4(%edi), %edx	/ %edx:%eax = old value
+1:
+	xorl	%ebx, %ebx
+	xorl	%ecx, %ecx
+	incl	%ebx		/ %ecx:%ebx = 1
+	addl	%eax, %ebx
+	adcl	%edx, %ecx	/ add in the carry from inc
+	lock
+	cmpxchg8b (%edi)	/ try to stick it in
+	jne	1b
+	movl	%ebx, %eax
+	movl	%ecx, %edx	/ return new value
+	popl	%ebx
+	popl	%edi
+	ret
+	SET_SIZE(atomic_inc_64_nv)
+	SET_SIZE(atomic_inc_64)
+
+	ENTRY(atomic_dec_8)
+	ALTENTRY(atomic_dec_uchar)
+	movl	4(%esp), %eax
+	lock
+	decb	(%eax)
+	ret
+	SET_SIZE(atomic_dec_uchar)
+	SET_SIZE(atomic_dec_8)
+
+	ENTRY(atomic_dec_16)
+	ALTENTRY(atomic_dec_ushort)
+	movl	4(%esp), %eax
+	lock
+	decw	(%eax)
+	ret
+	SET_SIZE(atomic_dec_ushort)
+	SET_SIZE(atomic_dec_16)
+
+	ENTRY(atomic_dec_32)
+	ALTENTRY(atomic_dec_uint)
+	ALTENTRY(atomic_dec_ulong)
+	movl	4(%esp), %eax
+	lock
+	decl	(%eax)
+	ret
+	SET_SIZE(atomic_dec_ulong)
+	SET_SIZE(atomic_dec_uint)
+	SET_SIZE(atomic_dec_32)
+
+	ENTRY(atomic_dec_8_nv)
+	ALTENTRY(atomic_dec_uchar_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	xorl	%eax, %eax	/ zero upper bits of %eax
+	decb	%al		/ %al = -1
+	lock
+	  xaddb	%al, (%edx)	/ %al = old value, dec (%edx)
+	decb	%al		/ return new value
+	ret
+	SET_SIZE(atomic_dec_uchar_nv)
+	SET_SIZE(atomic_dec_8_nv)
+
+	ENTRY(atomic_dec_16_nv)
+	ALTENTRY(atomic_dec_ushort_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	xorl	%eax, %eax	/ zero upper bits of %eax
+	decw	%ax		/ %ax = -1
+	lock
+	  xaddw	%ax, (%edx)	/ %ax = old value, dec (%edx)
+	decw	%ax		/ return new value
+	ret
+	SET_SIZE(atomic_dec_ushort_nv)
+	SET_SIZE(atomic_dec_16_nv)
+
+	ENTRY(atomic_dec_32_nv)
+	ALTENTRY(atomic_dec_uint_nv)
+	ALTENTRY(atomic_dec_ulong_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	xorl	%eax, %eax	/ %eax = 0
+	decl	%eax		/ %eax = -1
+	lock
+	  xaddl	%eax, (%edx)	/ %eax = old value, dec (%edx)
+	decl	%eax		/ return new value
+	ret
+	SET_SIZE(atomic_dec_ulong_nv)
+	SET_SIZE(atomic_dec_uint_nv)
+	SET_SIZE(atomic_dec_32_nv)
+
+	/*
+	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
+	 * separated, it is important to edit the libc i386 platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_dec_64_nv.
+	 */
+	ENTRY(atomic_dec_64)
+	ALTENTRY(atomic_dec_64_nv)
+	pushl	%edi
+	pushl	%ebx
+	movl	12(%esp), %edi	/ %edi = target address
+	movl	(%edi), %eax
+	movl	4(%edi), %edx	/ %edx:%eax = old value
+1:
+	xorl	%ebx, %ebx
+	xorl	%ecx, %ecx
+	not	%ecx
+	not	%ebx		/ %ecx:%ebx = -1
+	addl	%eax, %ebx
+	adcl	%edx, %ecx	/ add in the carry from inc
+	lock
+	cmpxchg8b (%edi)	/ try to stick it in
+	jne	1b
+	movl	%ebx, %eax
+	movl	%ecx, %edx	/ return new value
+	popl	%ebx
+	popl	%edi
+	ret
+	SET_SIZE(atomic_dec_64_nv)
+	SET_SIZE(atomic_dec_64)
+
+	ENTRY(atomic_add_8)
+	ALTENTRY(atomic_add_char)
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	lock
+	addb	%cl, (%eax)
+	ret
+	SET_SIZE(atomic_add_char)
+	SET_SIZE(atomic_add_8)
+
+	ENTRY(atomic_add_16)
+	ALTENTRY(atomic_add_short)
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	lock
+	addw	%cx, (%eax)
+	ret
+	SET_SIZE(atomic_add_short)
+	SET_SIZE(atomic_add_16)
+
+	ENTRY(atomic_add_32)
+	ALTENTRY(atomic_add_int)
+	ALTENTRY(atomic_add_ptr)
+	ALTENTRY(atomic_add_long)
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	lock
+	addl	%ecx, (%eax)
+	ret
+	SET_SIZE(atomic_add_long)
+	SET_SIZE(atomic_add_ptr)
+	SET_SIZE(atomic_add_int)
+	SET_SIZE(atomic_add_32)
+
+	ENTRY(atomic_or_8)
+	ALTENTRY(atomic_or_uchar)
+	movl	4(%esp), %eax
+	movb	8(%esp), %cl
+	lock
+	orb	%cl, (%eax)
+	ret
+	SET_SIZE(atomic_or_uchar)
+	SET_SIZE(atomic_or_8)
+
+	ENTRY(atomic_or_16)
+	ALTENTRY(atomic_or_ushort)
+	movl	4(%esp), %eax
+	movw	8(%esp), %cx
+	lock
+	orw	%cx, (%eax)
+	ret
+	SET_SIZE(atomic_or_ushort)
+	SET_SIZE(atomic_or_16)
+
+	ENTRY(atomic_or_32)
+	ALTENTRY(atomic_or_uint)
+	ALTENTRY(atomic_or_ulong)
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	lock
+	orl	%ecx, (%eax)
+	ret
+	SET_SIZE(atomic_or_ulong)
+	SET_SIZE(atomic_or_uint)
+	SET_SIZE(atomic_or_32)
+
+	ENTRY(atomic_and_8)
+	ALTENTRY(atomic_and_uchar)
+	movl	4(%esp), %eax
+	movb	8(%esp), %cl
+	lock
+	andb	%cl, (%eax)
+	ret
+	SET_SIZE(atomic_and_uchar)
+	SET_SIZE(atomic_and_8)
+
+	ENTRY(atomic_and_16)
+	ALTENTRY(atomic_and_ushort)
+	movl	4(%esp), %eax
+	movw	8(%esp), %cx
+	lock
+	andw	%cx, (%eax)
+	ret
+	SET_SIZE(atomic_and_ushort)
+	SET_SIZE(atomic_and_16)
+
+	ENTRY(atomic_and_32)
+	ALTENTRY(atomic_and_uint)
+	ALTENTRY(atomic_and_ulong)
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	lock
+	andl	%ecx, (%eax)
+	ret
+	SET_SIZE(atomic_and_ulong)
+	SET_SIZE(atomic_and_uint)
+	SET_SIZE(atomic_and_32)
+
+	ENTRY(atomic_add_8_nv)
+	ALTENTRY(atomic_add_char_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movb	8(%esp), %cl	/ %cl = delta
+	movzbl	%cl, %eax	/ %al = delta, zero extended
+	lock
+	  xaddb	%cl, (%edx)	/ %cl = old value, (%edx) = sum
+	addb	%cl, %al	/ return old value plus delta
+	ret
+	SET_SIZE(atomic_add_char_nv)
+	SET_SIZE(atomic_add_8_nv)
+
+	ENTRY(atomic_add_16_nv)
+	ALTENTRY(atomic_add_short_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movw	8(%esp), %cx	/ %cx = delta
+	movzwl	%cx, %eax	/ %ax = delta, zero extended
+	lock
+	  xaddw	%cx, (%edx)	/ %cx = old value, (%edx) = sum
+	addw	%cx, %ax	/ return old value plus delta
+	ret
+	SET_SIZE(atomic_add_short_nv)
+	SET_SIZE(atomic_add_16_nv)
+
+	ENTRY(atomic_add_32_nv)
+	ALTENTRY(atomic_add_int_nv)
+	ALTENTRY(atomic_add_ptr_nv)
+	ALTENTRY(atomic_add_long_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movl	8(%esp), %eax	/ %eax = delta
+	movl	%eax, %ecx	/ %ecx = delta
+	lock
+	  xaddl	%eax, (%edx)	/ %eax = old value, (%edx) = sum
+	addl	%ecx, %eax	/ return old value plus delta
+	ret
+	SET_SIZE(atomic_add_long_nv)
+	SET_SIZE(atomic_add_ptr_nv)
+	SET_SIZE(atomic_add_int_nv)
+	SET_SIZE(atomic_add_32_nv)
+
+	/*
+	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
+	 * separated, it is important to edit the libc i386 platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_add_64_nv.
+	 */
+	ENTRY(atomic_add_64)
+	ALTENTRY(atomic_add_64_nv)
+	pushl	%edi
+	pushl	%ebx
+	movl	12(%esp), %edi	/ %edi = target address
+	movl	(%edi), %eax
+	movl	4(%edi), %edx	/ %edx:%eax = old value
+1:
+	movl	16(%esp), %ebx
+	movl	20(%esp), %ecx	/ %ecx:%ebx = delta
+	addl	%eax, %ebx
+	adcl	%edx, %ecx	/ %ecx:%ebx = new value
+	lock
+	cmpxchg8b (%edi)	/ try to stick it in
+	jne	1b
+	movl	%ebx, %eax
+	movl	%ecx, %edx	/ return new value
+	popl	%ebx
+	popl	%edi
+	ret
+	SET_SIZE(atomic_add_64_nv)
+	SET_SIZE(atomic_add_64)
+
+	ENTRY(atomic_or_8_nv)
+	ALTENTRY(atomic_or_uchar_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movb	(%edx), %al	/ %al = old value
+1:
+	movl	8(%esp), %ecx	/ %ecx = delta
+	orb	%al, %cl	/ %cl = new value
+	lock
+	cmpxchgb %cl, (%edx)	/ try to stick it in
+	jne	1b
+	movzbl	%cl, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_or_uchar_nv)
+	SET_SIZE(atomic_or_8_nv)
+
+	ENTRY(atomic_or_16_nv)
+	ALTENTRY(atomic_or_ushort_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movw	(%edx), %ax	/ %ax = old value
+1:
+	movl	8(%esp), %ecx	/ %ecx = delta
+	orw	%ax, %cx	/ %cx = new value
+	lock
+	cmpxchgw %cx, (%edx)	/ try to stick it in
+	jne	1b
+	movzwl	%cx, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_or_ushort_nv)
+	SET_SIZE(atomic_or_16_nv)
+
+	ENTRY(atomic_or_32_nv)
+	ALTENTRY(atomic_or_uint_nv)
+	ALTENTRY(atomic_or_ulong_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movl	(%edx), %eax	/ %eax = old value
+1:
+	movl	8(%esp), %ecx	/ %ecx = delta
+	orl	%eax, %ecx	/ %ecx = new value
+	lock
+	cmpxchgl %ecx, (%edx)	/ try to stick it in
+	jne	1b
+	movl	%ecx, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_or_ulong_nv)
+	SET_SIZE(atomic_or_uint_nv)
+	SET_SIZE(atomic_or_32_nv)
+
+	/*
+	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
+	 * separated, it is important to edit the libc i386 platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_or_64_nv.
+	 */
+	ENTRY(atomic_or_64)
+	ALTENTRY(atomic_or_64_nv)
+	pushl	%edi
+	pushl	%ebx
+	movl	12(%esp), %edi	/ %edi = target address
+	movl	(%edi), %eax
+	movl	4(%edi), %edx	/ %edx:%eax = old value
+1:
+	movl	16(%esp), %ebx
+	movl	20(%esp), %ecx	/ %ecx:%ebx = delta
+	orl	%eax, %ebx
+	orl	%edx, %ecx	/ %ecx:%ebx = new value
+	lock
+	cmpxchg8b (%edi)	/ try to stick it in
+	jne	1b
+	movl	%ebx, %eax
+	movl	%ecx, %edx	/ return new value
+	popl	%ebx
+	popl	%edi
+	ret
+	SET_SIZE(atomic_or_64_nv)
+	SET_SIZE(atomic_or_64)
+
+	ENTRY(atomic_and_8_nv)
+	ALTENTRY(atomic_and_uchar_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movb	(%edx), %al	/ %al = old value
+1:
+	movl	8(%esp), %ecx	/ %ecx = delta
+	andb	%al, %cl	/ %cl = new value
+	lock
+	cmpxchgb %cl, (%edx)	/ try to stick it in
+	jne	1b
+	movzbl	%cl, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_and_uchar_nv)
+	SET_SIZE(atomic_and_8_nv)
+
+	ENTRY(atomic_and_16_nv)
+	ALTENTRY(atomic_and_ushort_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movw	(%edx), %ax	/ %ax = old value
+1:
+	movl	8(%esp), %ecx	/ %ecx = delta
+	andw	%ax, %cx	/ %cx = new value
+	lock
+	cmpxchgw %cx, (%edx)	/ try to stick it in
+	jne	1b
+	movzwl	%cx, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_and_ushort_nv)
+	SET_SIZE(atomic_and_16_nv)
+
+	ENTRY(atomic_and_32_nv)
+	ALTENTRY(atomic_and_uint_nv)
+	ALTENTRY(atomic_and_ulong_nv)
+	movl	4(%esp), %edx	/ %edx = target address
+	movl	(%edx), %eax	/ %eax = old value
+1:
+	movl	8(%esp), %ecx	/ %ecx = delta
+	andl	%eax, %ecx	/ %ecx = new value
+	lock
+	cmpxchgl %ecx, (%edx)	/ try to stick it in
+	jne	1b
+	movl	%ecx, %eax	/ return new value
+	ret
+	SET_SIZE(atomic_and_ulong_nv)
+	SET_SIZE(atomic_and_uint_nv)
+	SET_SIZE(atomic_and_32_nv)
+
+	/*
+	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
+	 * separated, it is important to edit the libc i386 platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_and_64_nv.
+	 */
+	ENTRY(atomic_and_64)
+	ALTENTRY(atomic_and_64_nv)
+	pushl	%edi
+	pushl	%ebx
+	movl	12(%esp), %edi	/ %edi = target address
+	movl	(%edi), %eax
+	movl	4(%edi), %edx	/ %edx:%eax = old value
+1:
+	movl	16(%esp), %ebx
+	movl	20(%esp), %ecx	/ %ecx:%ebx = delta
+	andl	%eax, %ebx
+	andl	%edx, %ecx	/ %ecx:%ebx = new value
+	lock
+	cmpxchg8b (%edi)	/ try to stick it in
+	jne	1b
+	movl	%ebx, %eax
+	movl	%ecx, %edx	/ return new value
+	popl	%ebx
+	popl	%edi
+	ret
+	SET_SIZE(atomic_and_64_nv)
+	SET_SIZE(atomic_and_64)
+
+	ENTRY(atomic_cas_8)
+	ALTENTRY(atomic_cas_uchar)
+	movl	4(%esp), %edx
+	movzbl	8(%esp), %eax
+	movb	12(%esp), %cl
+	lock
+	cmpxchgb %cl, (%edx)
+	ret
+	SET_SIZE(atomic_cas_uchar)
+	SET_SIZE(atomic_cas_8)
+
+	ENTRY(atomic_cas_16)
+	ALTENTRY(atomic_cas_ushort)
+	movl	4(%esp), %edx
+	movzwl	8(%esp), %eax
+	movw	12(%esp), %cx
+	lock
+	cmpxchgw %cx, (%edx)
+	ret
+	SET_SIZE(atomic_cas_ushort)
+	SET_SIZE(atomic_cas_16)
+
+	ENTRY(atomic_cas_32)
+	ALTENTRY(atomic_cas_uint)
+	ALTENTRY(atomic_cas_ulong)
+	ALTENTRY(atomic_cas_ptr)
+	movl	4(%esp), %edx
+	movl	8(%esp), %eax
+	movl	12(%esp), %ecx
+	lock
+	cmpxchgl %ecx, (%edx)
+	ret
+	SET_SIZE(atomic_cas_ptr)
+	SET_SIZE(atomic_cas_ulong)
+	SET_SIZE(atomic_cas_uint)
+	SET_SIZE(atomic_cas_32)
+
+	ENTRY(atomic_cas_64)
+	pushl	%ebx
+	pushl	%esi
+	movl	12(%esp), %esi
+	movl	16(%esp), %eax
+	movl	20(%esp), %edx
+	movl	24(%esp), %ebx
+	movl	28(%esp), %ecx
+	lock
+	cmpxchg8b (%esi)
+	popl	%esi
+	popl	%ebx
+	ret
+	SET_SIZE(atomic_cas_64)
+
+	ENTRY(atomic_swap_8)
+	ALTENTRY(atomic_swap_uchar)
+	movl	4(%esp), %edx
+	movzbl	8(%esp), %eax
+	lock
+	xchgb	%al, (%edx)
+	ret
+	SET_SIZE(atomic_swap_uchar)
+	SET_SIZE(atomic_swap_8)
+
+	ENTRY(atomic_swap_16)
+	ALTENTRY(atomic_swap_ushort)
+	movl	4(%esp), %edx
+	movzwl	8(%esp), %eax
+	lock
+	xchgw	%ax, (%edx)
+	ret
+	SET_SIZE(atomic_swap_ushort)
+	SET_SIZE(atomic_swap_16)
+
+	ENTRY(atomic_swap_32)
+	ALTENTRY(atomic_swap_uint)
+	ALTENTRY(atomic_swap_ptr)
+	ALTENTRY(atomic_swap_ulong)
+	movl	4(%esp), %edx
+	movl	8(%esp), %eax
+	lock
+	xchgl	%eax, (%edx)
+	ret
+	SET_SIZE(atomic_swap_ulong)
+	SET_SIZE(atomic_swap_ptr)
+	SET_SIZE(atomic_swap_uint)
+	SET_SIZE(atomic_swap_32)
+
+	ENTRY(atomic_swap_64)
+	pushl	%esi
+	pushl	%ebx
+	movl	12(%esp), %esi
+	movl	16(%esp), %ebx
+	movl	20(%esp), %ecx
+	movl	(%esi), %eax
+	movl	4(%esi), %edx	/ %edx:%eax = old value
+1:
+	lock
+	cmpxchg8b (%esi)
+	jne	1b
+	popl	%ebx
+	popl	%esi
+	ret
+	SET_SIZE(atomic_swap_64)
+
+	ENTRY(atomic_set_long_excl)
+	movl	4(%esp), %edx	/ %edx = target address
+	movl	8(%esp), %ecx	/ %ecx = bit id
+	xorl	%eax, %eax
+	lock
+	btsl	%ecx, (%edx)
+	jnc	1f
+	decl	%eax		/ return -1
+1:
+	ret
+	SET_SIZE(atomic_set_long_excl)
+
+	ENTRY(atomic_clear_long_excl)
+	movl	4(%esp), %edx	/ %edx = target address
+	movl	8(%esp), %ecx	/ %ecx = bit id
+	xorl	%eax, %eax
+	lock
+	btrl	%ecx, (%edx)
+	jc	1f
+	decl	%eax		/ return -1
+1:
+	ret
+	SET_SIZE(atomic_clear_long_excl)
+
+#if !defined(_KERNEL)
+
+	/*
+	 * NOTE: membar_enter, membar_exit, membar_producer, and 
+	 * membar_consumer are all identical routines. We define them
+	 * separately, instead of using ALTENTRY definitions to alias them
+	 * together, so that DTrace and debuggers will see a unique address
+	 * for them, allowing more accurate tracing.
+	*/
+
+
+	ENTRY(membar_enter)
+	lock
+	xorl	$0, (%esp)
+	ret
+	SET_SIZE(membar_enter)
+
+	ENTRY(membar_exit)
+	lock
+	xorl	$0, (%esp)
+	ret
+	SET_SIZE(membar_exit)
+
+	ENTRY(membar_producer)
+	lock
+	xorl	$0, (%esp)
+	ret
+	SET_SIZE(membar_producer)
+
+	ENTRY(membar_consumer)
+	lock
+	xorl	$0, (%esp)
+	ret
+	SET_SIZE(membar_consumer)
+
+#endif	/* !_KERNEL */
diff --git a/common/atomic/sparc/atomic.s b/common/atomic/sparc/atomic.s
new file mode 100644
index 000000000000..8aa240efa297
--- /dev/null
+++ b/common/atomic/sparc/atomic.s
@@ -0,0 +1,801 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+	.file	"atomic.s"
+
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL)
+	/*
+	 * Legacy kernel interfaces; they will go away (eventually).
+	 */
+	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
+	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
+	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
+	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
+	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
+	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
+	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
+	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
+#endif
+
+	/*
+	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_inc_8_nv.
+	 */
+	ENTRY(atomic_inc_8)
+	ALTENTRY(atomic_inc_8_nv)
+	ALTENTRY(atomic_inc_uchar)
+	ALTENTRY(atomic_inc_uchar_nv)
+	ba	add_8
+	  add	%g0, 1, %o1
+	SET_SIZE(atomic_inc_uchar_nv)
+	SET_SIZE(atomic_inc_uchar)
+	SET_SIZE(atomic_inc_8_nv)
+	SET_SIZE(atomic_inc_8)
+
+	/*
+	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_dec_8_nv.
+	 */
+	ENTRY(atomic_dec_8)
+	ALTENTRY(atomic_dec_8_nv)
+	ALTENTRY(atomic_dec_uchar)
+	ALTENTRY(atomic_dec_uchar_nv)
+	ba	add_8
+	  sub	%g0, 1, %o1
+	SET_SIZE(atomic_dec_uchar_nv)
+	SET_SIZE(atomic_dec_uchar)
+	SET_SIZE(atomic_dec_8_nv)
+	SET_SIZE(atomic_dec_8)
+
+	/*
+	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_add_8_nv.
+	 */
+	ENTRY(atomic_add_8)
+	ALTENTRY(atomic_add_8_nv)
+	ALTENTRY(atomic_add_char)
+	ALTENTRY(atomic_add_char_nv)
+add_8:
+	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	set	0xff, %o3		! %o3 = mask
+	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single byte value
+	andn	%o0, 0x3, %o0		! %o0 = word address
+	ld	[%o0], %o2		! read old value
+1:
+	add	%o2, %o1, %o5		! add value to the old value
+	and	%o5, %o3, %o5		! clear other bits
+	andn	%o2, %o3, %o4		! clear target bits
+	or	%o4, %o5, %o5		! insert the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	add	%o2, %o1, %o5
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = new value
+	SET_SIZE(atomic_add_char_nv)
+	SET_SIZE(atomic_add_char)
+	SET_SIZE(atomic_add_8_nv)
+	SET_SIZE(atomic_add_8)
+
+	/*
+	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_inc_16_nv.
+	 */
+	ENTRY(atomic_inc_16)
+	ALTENTRY(atomic_inc_16_nv)
+	ALTENTRY(atomic_inc_ushort)
+	ALTENTRY(atomic_inc_ushort_nv)
+	ba	add_16
+	  add	%g0, 1, %o1
+	SET_SIZE(atomic_inc_ushort_nv)
+	SET_SIZE(atomic_inc_ushort)
+	SET_SIZE(atomic_inc_16_nv)
+	SET_SIZE(atomic_inc_16)
+
+	/*
+	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_dec_16_nv.
+	 */
+	ENTRY(atomic_dec_16)
+	ALTENTRY(atomic_dec_16_nv)
+	ALTENTRY(atomic_dec_ushort)
+	ALTENTRY(atomic_dec_ushort_nv)
+	ba	add_16
+	  sub	%g0, 1, %o1
+	SET_SIZE(atomic_dec_ushort_nv)
+	SET_SIZE(atomic_dec_ushort)
+	SET_SIZE(atomic_dec_16_nv)
+	SET_SIZE(atomic_dec_16)
+
+	/*
+	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_add_16_nv.
+	 */
+	ENTRY(atomic_add_16)
+	ALTENTRY(atomic_add_16_nv)
+	ALTENTRY(atomic_add_short)
+	ALTENTRY(atomic_add_short_nv)
+add_16:
+	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
+	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	sethi	%hi(0xffff0000), %o3	! %o3 = mask
+	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single short value
+	andn	%o0, 0x2, %o0		! %o0 = word address
+	! if low-order bit is 1, we will properly get an alignment fault here
+	ld	[%o0], %o2		! read old value
+1:
+	add	%o1, %o2, %o5		! add value to the old value
+	and	%o5, %o3, %o5		! clear other bits
+	andn	%o2, %o3, %o4		! clear target bits
+	or	%o4, %o5, %o5		! insert the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	add	%o1, %o2, %o5
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = new value
+	SET_SIZE(atomic_add_short_nv)
+	SET_SIZE(atomic_add_short)
+	SET_SIZE(atomic_add_16_nv)
+	SET_SIZE(atomic_add_16)
+
+	/*
+	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_inc_32_nv.
+	 */
+	ENTRY(atomic_inc_32)
+	ALTENTRY(atomic_inc_32_nv)
+	ALTENTRY(atomic_inc_uint)
+	ALTENTRY(atomic_inc_uint_nv)
+	ALTENTRY(atomic_inc_ulong)
+	ALTENTRY(atomic_inc_ulong_nv)
+	ba	add_32
+	  add	%g0, 1, %o1
+	SET_SIZE(atomic_inc_ulong_nv)
+	SET_SIZE(atomic_inc_ulong)
+	SET_SIZE(atomic_inc_uint_nv)
+	SET_SIZE(atomic_inc_uint)
+	SET_SIZE(atomic_inc_32_nv)
+	SET_SIZE(atomic_inc_32)
+
+	/*
+	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_dec_32_nv.
+	 */
+	ENTRY(atomic_dec_32)
+	ALTENTRY(atomic_dec_32_nv)
+	ALTENTRY(atomic_dec_uint)
+	ALTENTRY(atomic_dec_uint_nv)
+	ALTENTRY(atomic_dec_ulong)
+	ALTENTRY(atomic_dec_ulong_nv)
+	ba	add_32
+	  sub	%g0, 1, %o1
+	SET_SIZE(atomic_dec_ulong_nv)
+	SET_SIZE(atomic_dec_ulong)
+	SET_SIZE(atomic_dec_uint_nv)
+	SET_SIZE(atomic_dec_uint)
+	SET_SIZE(atomic_dec_32_nv)
+	SET_SIZE(atomic_dec_32)
+
+	/*
+	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_add_32_nv.
+	 */
+	ENTRY(atomic_add_32)
+	ALTENTRY(atomic_add_32_nv)
+	ALTENTRY(atomic_add_int)
+	ALTENTRY(atomic_add_int_nv)
+	ALTENTRY(atomic_add_ptr)
+	ALTENTRY(atomic_add_ptr_nv)
+	ALTENTRY(atomic_add_long)
+	ALTENTRY(atomic_add_long_nv)
+add_32:
+	ld	[%o0], %o2
+1:
+	add	%o2, %o1, %o3
+	cas	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %icc, 1b
+	  mov	%o3, %o2
+	retl
+	add	%o2, %o1, %o0		! return new value
+	SET_SIZE(atomic_add_long_nv)
+	SET_SIZE(atomic_add_long)
+	SET_SIZE(atomic_add_ptr_nv)
+	SET_SIZE(atomic_add_ptr)
+	SET_SIZE(atomic_add_int_nv)
+	SET_SIZE(atomic_add_int)
+	SET_SIZE(atomic_add_32_nv)
+	SET_SIZE(atomic_add_32)
+
+	/*
+	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_inc_64_nv.
+	 */
+	ENTRY(atomic_inc_64)
+	ALTENTRY(atomic_inc_64_nv)
+	ba	add_64
+	  add	%g0, 1, %o1
+	SET_SIZE(atomic_inc_64_nv)
+	SET_SIZE(atomic_inc_64)
+
+	/*
+	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_dec_64_nv.
+	 */
+	ENTRY(atomic_dec_64)
+	ALTENTRY(atomic_dec_64_nv)
+	ba	add_64
+	  sub	%g0, 1, %o1
+	SET_SIZE(atomic_dec_64_nv)
+	SET_SIZE(atomic_dec_64)
+
+	/*
+	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_add_64_nv.
+	 */
+	ENTRY(atomic_add_64)
+	ALTENTRY(atomic_add_64_nv)
+	sllx	%o1, 32, %o1		! upper 32 in %o1, lower in %o2
+	srl	%o2, 0, %o2
+	add	%o1, %o2, %o1		! convert 2 32-bit args into 1 64-bit
+add_64:
+	ldx	[%o0], %o2
+1:
+	add	%o2, %o1, %o3
+	casx	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %xcc, 1b
+	  mov	%o3, %o2
+	add	%o2, %o1, %o1		! return lower 32-bits in %o1
+	retl
+	srlx	%o1, 32, %o0		! return upper 32-bits in %o0
+	SET_SIZE(atomic_add_64_nv)
+	SET_SIZE(atomic_add_64)
+
+	/*
+	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_or_8_nv.
+	 */
+	ENTRY(atomic_or_8)
+	ALTENTRY(atomic_or_8_nv)
+	ALTENTRY(atomic_or_uchar)
+	ALTENTRY(atomic_or_uchar_nv)
+	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	set	0xff, %o3		! %o3 = mask
+	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single byte value
+	andn	%o0, 0x3, %o0		! %o0 = word address
+	ld	[%o0], %o2		! read old value
+1:
+	or	%o2, %o1, %o5		! or in the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	or	%o2, %o1, %o5
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = new value
+	SET_SIZE(atomic_or_uchar_nv)
+	SET_SIZE(atomic_or_uchar)
+	SET_SIZE(atomic_or_8_nv)
+	SET_SIZE(atomic_or_8)
+
+	/*
+	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_or_16_nv.
+	 */
+	ENTRY(atomic_or_16)
+	ALTENTRY(atomic_or_16_nv)
+	ALTENTRY(atomic_or_ushort)
+	ALTENTRY(atomic_or_ushort_nv)
+	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
+	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	sethi	%hi(0xffff0000), %o3	! %o3 = mask
+	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single short value
+	andn	%o0, 0x2, %o0		! %o0 = word address
+	! if low-order bit is 1, we will properly get an alignment fault here
+	ld	[%o0], %o2		! read old value
+1:
+	or	%o2, %o1, %o5		! or in the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	or	%o2, %o1, %o5		! or in the new value
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = new value
+	SET_SIZE(atomic_or_ushort_nv)
+	SET_SIZE(atomic_or_ushort)
+	SET_SIZE(atomic_or_16_nv)
+	SET_SIZE(atomic_or_16)
+
+	/*
+	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_or_32_nv.
+	 */
+	ENTRY(atomic_or_32)
+	ALTENTRY(atomic_or_32_nv)
+	ALTENTRY(atomic_or_uint)
+	ALTENTRY(atomic_or_uint_nv)
+	ALTENTRY(atomic_or_ulong)
+	ALTENTRY(atomic_or_ulong_nv)
+	ld	[%o0], %o2
+1:
+	or	%o2, %o1, %o3
+	cas	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %icc, 1b
+	  mov	%o3, %o2
+	retl
+	or	%o2, %o1, %o0		! return new value
+	SET_SIZE(atomic_or_ulong_nv)
+	SET_SIZE(atomic_or_ulong)
+	SET_SIZE(atomic_or_uint_nv)
+	SET_SIZE(atomic_or_uint)
+	SET_SIZE(atomic_or_32_nv)
+	SET_SIZE(atomic_or_32)
+
+	/*
+	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_or_64_nv.
+	 */
+	ENTRY(atomic_or_64)
+	ALTENTRY(atomic_or_64_nv)
+	sllx	%o1, 32, %o1		! upper 32 in %o1, lower in %o2
+	srl	%o2, 0, %o2
+	add	%o1, %o2, %o1		! convert 2 32-bit args into 1 64-bit
+	ldx	[%o0], %o2
+1:
+	or	%o2, %o1, %o3
+	casx	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %xcc, 1b
+	  mov	%o3, %o2
+	or	%o2, %o1, %o1		! return lower 32-bits in %o1
+	retl
+	srlx	%o1, 32, %o0		! return upper 32-bits in %o0
+	SET_SIZE(atomic_or_64_nv)
+	SET_SIZE(atomic_or_64)
+
+	/*
+	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_and_8_nv.
+	 */
+	ENTRY(atomic_and_8)
+	ALTENTRY(atomic_and_8_nv)
+	ALTENTRY(atomic_and_uchar)
+	ALTENTRY(atomic_and_uchar_nv)
+	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	set	0xff, %o3		! %o3 = mask
+	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	orn	%o1, %o3, %o1		! all ones in other bytes
+	andn	%o0, 0x3, %o0		! %o0 = word address
+	ld	[%o0], %o2		! read old value
+1:
+	and	%o2, %o1, %o5		! and in the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	and	%o2, %o1, %o5
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = new value
+	SET_SIZE(atomic_and_uchar_nv)
+	SET_SIZE(atomic_and_uchar)
+	SET_SIZE(atomic_and_8_nv)
+	SET_SIZE(atomic_and_8)
+
+	/*
+	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_and_16_nv.
+	 */
+	ENTRY(atomic_and_16)
+	ALTENTRY(atomic_and_16_nv)
+	ALTENTRY(atomic_and_ushort)
+	ALTENTRY(atomic_and_ushort_nv)
+	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
+	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	sethi	%hi(0xffff0000), %o3	! %o3 = mask
+	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	orn	%o1, %o3, %o1		! all ones in the other half
+	andn	%o0, 0x2, %o0		! %o0 = word address
+	! if low-order bit is 1, we will properly get an alignment fault here
+	ld	[%o0], %o2		! read old value
+1:
+	and	%o2, %o1, %o5		! and in the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	and	%o2, %o1, %o5
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = new value
+	SET_SIZE(atomic_and_ushort_nv)
+	SET_SIZE(atomic_and_ushort)
+	SET_SIZE(atomic_and_16_nv)
+	SET_SIZE(atomic_and_16)
+
+	/*
+	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_and_32_nv.
+	 */
+	ENTRY(atomic_and_32)
+	ALTENTRY(atomic_and_32_nv)
+	ALTENTRY(atomic_and_uint)
+	ALTENTRY(atomic_and_uint_nv)
+	ALTENTRY(atomic_and_ulong)
+	ALTENTRY(atomic_and_ulong_nv)
+	ld	[%o0], %o2
+1:
+	and	%o2, %o1, %o3
+	cas	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %icc, 1b
+	  mov	%o3, %o2
+	retl
+	and	%o2, %o1, %o0		! return new value
+	SET_SIZE(atomic_and_ulong_nv)
+	SET_SIZE(atomic_and_ulong)
+	SET_SIZE(atomic_and_uint_nv)
+	SET_SIZE(atomic_and_uint)
+	SET_SIZE(atomic_and_32_nv)
+	SET_SIZE(atomic_and_32)
+
+	/*
+	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
+	 * separated, you need to also edit the libc sparc platform
+	 * specific mapfile and remove the NODYNSORT attribute
+	 * from atomic_and_64_nv.
+	 */
+	ENTRY(atomic_and_64)
+	ALTENTRY(atomic_and_64_nv)
+	sllx	%o1, 32, %o1		! upper 32 in %o1, lower in %o2
+	srl	%o2, 0, %o2
+	add	%o1, %o2, %o1		! convert 2 32-bit args into 1 64-bit
+	ldx	[%o0], %o2
+1:
+	and	%o2, %o1, %o3
+	casx	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %xcc, 1b
+	  mov	%o3, %o2
+	and	%o2, %o1, %o1		! return lower 32-bits in %o1
+	retl
+	srlx	%o1, 32, %o0		! return upper 32-bits in %o0
+	SET_SIZE(atomic_and_64_nv)
+	SET_SIZE(atomic_and_64)
+
+	ENTRY(atomic_cas_8)
+	ALTENTRY(atomic_cas_uchar)
+	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	set	0xff, %o3		! %o3 = mask
+	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single byte value
+	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
+	and	%o2, %o3, %o2		! %o2 = single byte value
+	andn	%o0, 0x3, %o0		! %o0 = word address
+	ld	[%o0], %o4		! read old value
+1:
+	andn	%o4, %o3, %o4		! clear target bits
+	or	%o4, %o2, %o5		! insert the new value
+	or	%o4, %o1, %o4		! insert the comparison value
+	cas	[%o0], %o4, %o5
+	cmp	%o4, %o5		! did we succeed?
+	be,pt	%icc, 2f
+	  and	%o5, %o3, %o4		! isolate the old value
+	cmp	%o1, %o4		! should we have succeeded?
+	be,a,pt	%icc, 1b		! yes, try again
+	  mov	%o5, %o4		! %o4 = old value
+2:
+	retl
+	srl	%o4, %g1, %o0		! %o0 = old value
+	SET_SIZE(atomic_cas_uchar)
+	SET_SIZE(atomic_cas_8)
+
+	ENTRY(atomic_cas_16)
+	ALTENTRY(atomic_cas_ushort)
+	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
+	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	sethi	%hi(0xffff0000), %o3	! %o3 = mask
+	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single short value
+	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
+	and	%o2, %o3, %o2		! %o2 = single short value
+	andn	%o0, 0x2, %o0		! %o0 = word address
+	! if low-order bit is 1, we will properly get an alignment fault here
+	ld	[%o0], %o4		! read old value
+1:
+	andn	%o4, %o3, %o4		! clear target bits
+	or	%o4, %o2, %o5		! insert the new value
+	or	%o4, %o1, %o4		! insert the comparison value
+	cas	[%o0], %o4, %o5
+	cmp	%o4, %o5		! did we succeed?
+	be,pt	%icc, 2f
+	  and	%o5, %o3, %o4		! isolate the old value
+	cmp	%o1, %o4		! should we have succeeded?
+	be,a,pt	%icc, 1b		! yes, try again
+	  mov	%o5, %o4		! %o4 = old value
+2:
+	retl
+	srl	%o4, %g1, %o0		! %o0 = old value
+	SET_SIZE(atomic_cas_ushort)
+	SET_SIZE(atomic_cas_16)
+
+	ENTRY(atomic_cas_32)
+	ALTENTRY(atomic_cas_uint)
+	ALTENTRY(atomic_cas_ptr)
+	ALTENTRY(atomic_cas_ulong)
+	cas	[%o0], %o1, %o2
+	retl
+	mov	%o2, %o0
+	SET_SIZE(atomic_cas_ulong)
+	SET_SIZE(atomic_cas_ptr)
+	SET_SIZE(atomic_cas_uint)
+	SET_SIZE(atomic_cas_32)
+
+	ENTRY(atomic_cas_64)
+	sllx	%o1, 32, %o1		! cmp's upper 32 in %o1, lower in %o2
+	srl	%o2, 0, %o2		! convert 2 32-bit args into 1 64-bit
+	add	%o1, %o2, %o1
+	sllx	%o3, 32, %o2		! newval upper 32 in %o3, lower in %o4
+	srl	%o4, 0, %o4		! setup %o2 to have newval
+	add	%o2, %o4, %o2
+	casx	[%o0], %o1, %o2
+	srl	%o2, 0, %o1		! return lower 32-bits in %o1
+	retl
+	srlx	%o2, 32, %o0		! return upper 32-bits in %o0
+	SET_SIZE(atomic_cas_64)
+
+	ENTRY(atomic_swap_8)
+	ALTENTRY(atomic_swap_uchar)
+	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	set	0xff, %o3		! %o3 = mask
+	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single byte value
+	andn	%o0, 0x3, %o0		! %o0 = word address
+	ld	[%o0], %o2		! read old value
+1:
+	andn	%o2, %o3, %o5		! clear target bits
+	or	%o5, %o1, %o5		! insert the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = old value
+	SET_SIZE(atomic_swap_uchar)
+	SET_SIZE(atomic_swap_8)
+
+	ENTRY(atomic_swap_16)
+	ALTENTRY(atomic_swap_ushort)
+	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
+	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
+	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
+	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
+	sethi	%hi(0xffff0000), %o3	! %o3 = mask
+	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
+	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
+	and	%o1, %o3, %o1		! %o1 = single short value
+	andn	%o0, 0x2, %o0		! %o0 = word address
+	! if low-order bit is 1, we will properly get an alignment fault here
+	ld	[%o0], %o2		! read old value
+1:
+	andn	%o2, %o3, %o5		! clear target bits
+	or	%o5, %o1, %o5		! insert the new value
+	cas	[%o0], %o2, %o5
+	cmp	%o2, %o5
+	bne,a,pn %icc, 1b
+	  mov	%o5, %o2		! %o2 = old value
+	and	%o5, %o3, %o5
+	retl
+	srl	%o5, %g1, %o0		! %o0 = old value
+	SET_SIZE(atomic_swap_ushort)
+	SET_SIZE(atomic_swap_16)
+
+	ENTRY(atomic_swap_32)
+	ALTENTRY(atomic_swap_uint)
+	ALTENTRY(atomic_swap_ptr)
+	ALTENTRY(atomic_swap_ulong)
+	ld	[%o0], %o2
+1:
+	mov	%o1, %o3
+	cas	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %icc, 1b
+	  mov	%o3, %o2
+	retl
+	mov	%o3, %o0
+	SET_SIZE(atomic_swap_ulong)
+	SET_SIZE(atomic_swap_ptr)
+	SET_SIZE(atomic_swap_uint)
+	SET_SIZE(atomic_swap_32)
+
+	ENTRY(atomic_swap_64)
+	sllx	%o1, 32, %o1		! upper 32 in %o1, lower in %o2
+	srl	%o2, 0, %o2
+	add	%o1, %o2, %o1		! convert 2 32-bit args into 1 64-bit
+	ldx	[%o0], %o2
+1:
+	mov	%o1, %o3
+	casx	[%o0], %o2, %o3
+	cmp	%o2, %o3
+	bne,a,pn %xcc, 1b
+	  mov	%o3, %o2
+	srl	%o3, 0, %o1		! return lower 32-bits in %o1
+	retl
+	srlx	%o3, 32, %o0		! return upper 32-bits in %o0
+	SET_SIZE(atomic_swap_64)
+
+	ENTRY(atomic_set_long_excl)
+	mov	1, %o3
+	slln	%o3, %o1, %o3
+	ldn	[%o0], %o2
+1:
+	andcc	%o2, %o3, %g0		! test if the bit is set
+	bnz,a,pn %ncc, 2f		! if so, then fail out
+	  mov	-1, %o0
+	or	%o2, %o3, %o4		! set the bit, and try to commit it
+	casn	[%o0], %o2, %o4
+	cmp	%o2, %o4
+	bne,a,pn %ncc, 1b		! failed to commit, try again
+	  mov	%o4, %o2
+	mov	%g0, %o0
+2:
+	retl
+	nop
+	SET_SIZE(atomic_set_long_excl)
+
+	ENTRY(atomic_clear_long_excl)
+	mov	1, %o3
+	slln	%o3, %o1, %o3
+	ldn	[%o0], %o2
+1:
+	andncc	%o3, %o2, %g0		! test if the bit is clear
+	bnz,a,pn %ncc, 2f		! if so, then fail out
+	  mov	-1, %o0
+	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
+	casn	[%o0], %o2, %o4
+	cmp	%o2, %o4
+	bne,a,pn %ncc, 1b		! failed to commit, try again
+	  mov	%o4, %o2
+	mov	%g0, %o0
+2:
+	retl
+	nop
+	SET_SIZE(atomic_clear_long_excl)
+
+#if !defined(_KERNEL)
+
+	/*
+	 * Spitfires and Blackbirds have a problem with membars in the
+	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
+	 * that the whole world needs the workaround.
+	 */
+	ENTRY(membar_enter)
+	membar	#StoreLoad|#StoreStore
+	retl
+	nop
+	SET_SIZE(membar_enter)
+
+	ENTRY(membar_exit)
+	membar	#LoadStore|#StoreStore
+	retl
+	nop
+	SET_SIZE(membar_exit)
+
+	ENTRY(membar_producer)
+	membar	#StoreStore
+	retl
+	nop
+	SET_SIZE(membar_producer)
+
+	ENTRY(membar_consumer)
+	membar	#LoadLoad
+	retl
+	nop
+	SET_SIZE(membar_consumer)
+
+#endif	/* !_KERNEL */
diff --git a/common/list/list.c b/common/list/list.c
new file mode 100644
index 000000000000..94f7782a87d2
--- /dev/null
+++ b/common/list/list.c
@@ -0,0 +1,251 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/list.h>
+#include <sys/list_impl.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#ifdef _KERNEL
+#include <sys/debug.h>
+#else
+#include <assert.h>
+#define	ASSERT(a)	assert(a)
+#endif
+
+#ifdef lint
+extern list_node_t *list_d2l(list_t *list, void *obj);
+#else
+#define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#endif
+#define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define	list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+#define	list_insert_after_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_prev = (node);			\
+	lnew->list_next = (node)->list_next;		\
+	(node)->list_next->list_prev = lnew;		\
+	(node)->list_next = lnew;			\
+}
+
+#define	list_insert_before_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_next = (node);			\
+	lnew->list_prev = (node)->list_prev;		\
+	(node)->list_prev->list_next = lnew;		\
+	(node)->list_prev = lnew;			\
+}
+
+#define	list_remove_node(node)					\
+	(node)->list_prev->list_next = (node)->list_next;	\
+	(node)->list_next->list_prev = (node)->list_prev;	\
+	(node)->list_next = (node)->list_prev = NULL
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+	ASSERT(list);
+	ASSERT(size > 0);
+	ASSERT(size >= offset + sizeof (list_node_t));
+
+	list->list_size = size;
+	list->list_offset = offset;
+	list->list_head.list_next = list->list_head.list_prev =
+	    &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+	list_node_t *node = &list->list_head;
+
+	ASSERT(list);
+	ASSERT(list->list_head.list_next == node);
+	ASSERT(list->list_head.list_prev == node);
+
+	node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_head(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_after_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_tail(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_before_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+	list_node_t *lold = list_d2l(list, object);
+	ASSERT(!list_empty(list));
+	ASSERT(lold->list_next != NULL);
+	list_remove_node(lold);
+}
+
+void *
+list_remove_head(list_t *list)
+{
+	list_node_t *head = list->list_head.list_next;
+	if (head == &list->list_head)
+		return (NULL);
+	list_remove_node(head);
+	return (list_object(list, head));
+}
+
+void *
+list_remove_tail(list_t *list)
+{
+	list_node_t *tail = list->list_head.list_prev;
+	if (tail == &list->list_head)
+		return (NULL);
+	list_remove_node(tail);
+	return (list_object(list, tail));
+}
+
+void *
+list_head(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_next != &list->list_head)
+		return (list_object(list, node->list_next));
+
+	return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_prev != &list->list_head)
+		return (list_object(list, node->list_prev));
+
+	return (NULL);
+}
+
+/*
+ *  Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+	list_node_t *dstnode = &dst->list_head;
+	list_node_t *srcnode = &src->list_head;
+
+	ASSERT(dst->list_size == src->list_size);
+	ASSERT(dst->list_offset == src->list_offset);
+
+	if (list_empty(src))
+		return;
+
+	dstnode->list_prev->list_next = srcnode->list_next;
+	srcnode->list_next->list_prev = dstnode->list_prev;
+	dstnode->list_prev = srcnode->list_prev;
+	srcnode->list_prev->list_next = dstnode;
+
+	/* empty src list */
+	srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+void
+list_link_replace(list_node_t *lold, list_node_t *lnew)
+{
+	ASSERT(list_link_active(lold));
+	ASSERT(!list_link_active(lnew));
+
+	lnew->list_next = lold->list_next;
+	lnew->list_prev = lold->list_prev;
+	lold->list_prev->list_next = lnew;
+	lold->list_next->list_prev = lnew;
+	lold->list_next = lold->list_prev = NULL;
+}
+
+void
+list_link_init(list_node_t *link)
+{
+	link->list_next = NULL;
+	link->list_prev = NULL;
+}
+
+int
+list_link_active(list_node_t *link)
+{
+	return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+	return (list_empty(list));
+}
diff --git a/common/nvpair/nvpair.c b/common/nvpair/nvpair.c
new file mode 100644
index 000000000000..00d44263ccda
--- /dev/null
+++ b/common/nvpair/nvpair.c
@@ -0,0 +1,3297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/stropts.h>
+#include <sys/debug.h>
+#include <sys/isa_defs.h>
+#include <sys/int_limits.h>
+#include <sys/nvpair.h>
+#include <sys/nvpair_impl.h>
+#include <rpc/types.h>
+#include <rpc/xdr.h>
+
+#if defined(_KERNEL) && !defined(_BOOT)
+#include <sys/varargs.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#else
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#endif
+
+#ifndef	offsetof
+#define	offsetof(s, m)		((size_t)(&(((s *)0)->m)))
+#endif
+#define	skip_whitespace(p)	while ((*(p) == ' ') || (*(p) == '\t')) p++
+
+/*
+ * nvpair.c - Provides kernel & userland interfaces for manipulating
+ *	name-value pairs.
+ *
+ * Overview Diagram
+ *
+ *  +--------------+
+ *  |  nvlist_t    |
+ *  |--------------|
+ *  | nvl_version  |
+ *  | nvl_nvflag   |
+ *  | nvl_priv    -+-+
+ *  | nvl_flag     | |
+ *  | nvl_pad      | |
+ *  +--------------+ |
+ *                   V
+ *      +--------------+      last i_nvp in list
+ *      | nvpriv_t     |  +--------------------->
+ *      |--------------|  |
+ *   +--+- nvp_list    |  |   +------------+
+ *   |  |  nvp_last   -+--+   + nv_alloc_t |
+ *   |  |  nvp_curr    |      |------------|
+ *   |  |  nvp_nva    -+----> | nva_ops    |
+ *   |  |  nvp_stat    |      | nva_arg    |
+ *   |  +--------------+      +------------+
+ *   |
+ *   +-------+
+ *           V
+ *   +---------------------+      +-------------------+
+ *   |  i_nvp_t            |  +-->|  i_nvp_t          |  +-->
+ *   |---------------------|  |   |-------------------|  |
+ *   | nvi_next           -+--+   | nvi_next         -+--+
+ *   | nvi_prev (NULL)     | <----+ nvi_prev          |
+ *   | . . . . . . . . . . |      | . . . . . . . . . |
+ *   | nvp (nvpair_t)      |      | nvp (nvpair_t)    |
+ *   |  - nvp_size         |      |  - nvp_size       |
+ *   |  - nvp_name_sz      |      |  - nvp_name_sz    |
+ *   |  - nvp_value_elem   |      |  - nvp_value_elem |
+ *   |  - nvp_type         |      |  - nvp_type       |
+ *   |  - data ...         |      |  - data ...       |
+ *   +---------------------+      +-------------------+
+ *
+ *
+ *
+ *   +---------------------+              +---------------------+
+ *   |  i_nvp_t            |  +-->    +-->|  i_nvp_t (last)     |
+ *   |---------------------|  |       |   |---------------------|
+ *   |  nvi_next          -+--+ ... --+   | nvi_next (NULL)     |
+ * <-+- nvi_prev           |<-- ...  <----+ nvi_prev            |
+ *   | . . . . . . . . .   |              | . . . . . . . . .   |
+ *   | nvp (nvpair_t)      |              | nvp (nvpair_t)      |
+ *   |  - nvp_size         |              |  - nvp_size         |
+ *   |  - nvp_name_sz      |              |  - nvp_name_sz      |
+ *   |  - nvp_value_elem   |              |  - nvp_value_elem   |
+ *   |  - DATA_TYPE_NVLIST |              |  - nvp_type         |
+ *   |  - data (embedded)  |              |  - data ...         |
+ *   |    nvlist name      |              +---------------------+
+ *   |  +--------------+   |
+ *   |  |  nvlist_t    |   |
+ *   |  |--------------|   |
+ *   |  | nvl_version  |   |
+ *   |  | nvl_nvflag   |   |
+ *   |  | nvl_priv   --+---+---->
+ *   |  | nvl_flag     |   |
+ *   |  | nvl_pad      |   |
+ *   |  +--------------+   |
+ *   +---------------------+
+ *
+ *
+ * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will
+ * allow value to be aligned on 8 byte boundary
+ *
+ * name_len is the length of the name string including the null terminator
+ * so it must be >= 1
+ */
+#define	NVP_SIZE_CALC(name_len, data_len) \
+	(NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))
+
+static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);
+static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,
+    uint_t nelem, const void *data);
+
+#define	NV_STAT_EMBEDDED	0x1
+#define	EMBEDDED_NVL(nvp)	((nvlist_t *)(void *)NVP_VALUE(nvp))
+#define	EMBEDDED_NVL_ARRAY(nvp)	((nvlist_t **)(void *)NVP_VALUE(nvp))
+
+#define	NVP_VALOFF(nvp)	(NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz))
+#define	NVPAIR2I_NVP(nvp) \
+	((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp)))
+
+
+int
+nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...)
+{
+	va_list valist;
+	int err = 0;
+
+	nva->nva_ops = nvo;
+	nva->nva_arg = NULL;
+
+	va_start(valist, nvo);
+	if (nva->nva_ops->nv_ao_init != NULL)
+		err = nva->nva_ops->nv_ao_init(nva, valist);
+	va_end(valist);
+
+	return (err);
+}
+
+void
+nv_alloc_reset(nv_alloc_t *nva)
+{
+	if (nva->nva_ops->nv_ao_reset != NULL)
+		nva->nva_ops->nv_ao_reset(nva);
+}
+
+void
+nv_alloc_fini(nv_alloc_t *nva)
+{
+	if (nva->nva_ops->nv_ao_fini != NULL)
+		nva->nva_ops->nv_ao_fini(nva);
+}
+
+nv_alloc_t *
+nvlist_lookup_nv_alloc(nvlist_t *nvl)
+{
+	nvpriv_t *priv;
+
+	if (nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (NULL);
+
+	return (priv->nvp_nva);
+}
+
+static void *
+nv_mem_zalloc(nvpriv_t *nvp, size_t size)
+{
+	nv_alloc_t *nva = nvp->nvp_nva;
+	void *buf;
+
+	if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL)
+		bzero(buf, size);
+
+	return (buf);
+}
+
+static void
+nv_mem_free(nvpriv_t *nvp, void *buf, size_t size)
+{
+	nv_alloc_t *nva = nvp->nvp_nva;
+
+	nva->nva_ops->nv_ao_free(nva, buf, size);
+}
+
+static void
+nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat)
+{
+	bzero(priv, sizeof (nvpriv_t));
+
+	priv->nvp_nva = nva;
+	priv->nvp_stat = stat;
+}
+
+static nvpriv_t *
+nv_priv_alloc(nv_alloc_t *nva)
+{
+	nvpriv_t *priv;
+
+	/*
+	 * nv_mem_alloc() cannot called here because it needs the priv
+	 * argument.
+	 */
+	if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL)
+		return (NULL);
+
+	nv_priv_init(priv, nva, 0);
+
+	return (priv);
+}
+
+/*
+ * Embedded lists need their own nvpriv_t's.  We create a new
+ * nvpriv_t using the parameters and allocator from the parent
+ * list's nvpriv_t.
+ */
+static nvpriv_t *
+nv_priv_alloc_embedded(nvpriv_t *priv)
+{
+	nvpriv_t *emb_priv;
+
+	if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL)
+		return (NULL);
+
+	nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED);
+
+	return (emb_priv);
+}
+
+static void
+nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv)
+{
+	nvl->nvl_version = NV_VERSION;
+	nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE);
+	nvl->nvl_priv = (uint64_t)(uintptr_t)priv;
+	nvl->nvl_flag = 0;
+	nvl->nvl_pad = 0;
+}
+
+uint_t
+nvlist_nvflag(nvlist_t *nvl)
+{
+	return (nvl->nvl_nvflag);
+}
+
+/*
+ * nvlist_alloc - Allocate nvlist.
+ */
+/*ARGSUSED1*/
+int
+nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+	return (nvlist_xalloc(nvlp, nvflag,
+	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+	return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva)
+{
+	nvpriv_t *priv;
+
+	if (nvlp == NULL || nva == NULL)
+		return (EINVAL);
+
+	if ((priv = nv_priv_alloc(nva)) == NULL)
+		return (ENOMEM);
+
+	if ((*nvlp = nv_mem_zalloc(priv,
+	    NV_ALIGN(sizeof (nvlist_t)))) == NULL) {
+		nv_mem_free(priv, priv, sizeof (nvpriv_t));
+		return (ENOMEM);
+	}
+
+	nvlist_init(*nvlp, nvflag, priv);
+
+	return (0);
+}
+
+/*
+ * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair.
+ */
+static nvpair_t *
+nvp_buf_alloc(nvlist_t *nvl, size_t len)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	i_nvp_t *buf;
+	nvpair_t *nvp;
+	size_t nvsize;
+
+	/*
+	 * Allocate the buffer
+	 */
+	nvsize = len + offsetof(i_nvp_t, nvi_nvp);
+
+	if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL)
+		return (NULL);
+
+	nvp = &buf->nvi_nvp;
+	nvp->nvp_size = len;
+
+	return (nvp);
+}
+
+/*
+ * nvp_buf_free - de-Allocate an i_nvp_t.
+ */
+static void
+nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp);
+
+	nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize);
+}
+
+/*
+ * nvp_buf_link - link a new nv pair into the nvlist.
+ */
+static void
+nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	i_nvp_t *curr = NVPAIR2I_NVP(nvp);
+
+	/* Put element at end of nvlist */
+	if (priv->nvp_list == NULL) {
+		priv->nvp_list = priv->nvp_last = curr;
+	} else {
+		curr->nvi_prev = priv->nvp_last;
+		priv->nvp_last->nvi_next = curr;
+		priv->nvp_last = curr;
+	}
+}
+
+/*
+ * nvp_buf_unlink - unlink an removed nvpair out of the nvlist.
+ */
+static void
+nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	i_nvp_t *curr = NVPAIR2I_NVP(nvp);
+
+	/*
+	 * protect nvlist_next_nvpair() against walking on freed memory.
+	 */
+	if (priv->nvp_curr == curr)
+		priv->nvp_curr = curr->nvi_next;
+
+	if (curr == priv->nvp_list)
+		priv->nvp_list = curr->nvi_next;
+	else
+		curr->nvi_prev->nvi_next = curr->nvi_next;
+
+	if (curr == priv->nvp_last)
+		priv->nvp_last = curr->nvi_prev;
+	else
+		curr->nvi_next->nvi_prev = curr->nvi_prev;
+}
+
+/*
+ * take a nvpair type and number of elements and make sure the are valid
+ */
+static int
+i_validate_type_nelem(data_type_t type, uint_t nelem)
+{
+	switch (type) {
+	case DATA_TYPE_BOOLEAN:
+		if (nelem != 0)
+			return (EINVAL);
+		break;
+	case DATA_TYPE_BOOLEAN_VALUE:
+	case DATA_TYPE_BYTE:
+	case DATA_TYPE_INT8:
+	case DATA_TYPE_UINT8:
+	case DATA_TYPE_INT16:
+	case DATA_TYPE_UINT16:
+	case DATA_TYPE_INT32:
+	case DATA_TYPE_UINT32:
+	case DATA_TYPE_INT64:
+	case DATA_TYPE_UINT64:
+	case DATA_TYPE_STRING:
+	case DATA_TYPE_HRTIME:
+	case DATA_TYPE_NVLIST:
+#if !defined(_KERNEL)
+	case DATA_TYPE_DOUBLE:
+#endif
+		if (nelem != 1)
+			return (EINVAL);
+		break;
+	case DATA_TYPE_BOOLEAN_ARRAY:
+	case DATA_TYPE_BYTE_ARRAY:
+	case DATA_TYPE_INT8_ARRAY:
+	case DATA_TYPE_UINT8_ARRAY:
+	case DATA_TYPE_INT16_ARRAY:
+	case DATA_TYPE_UINT16_ARRAY:
+	case DATA_TYPE_INT32_ARRAY:
+	case DATA_TYPE_UINT32_ARRAY:
+	case DATA_TYPE_INT64_ARRAY:
+	case DATA_TYPE_UINT64_ARRAY:
+	case DATA_TYPE_STRING_ARRAY:
+	case DATA_TYPE_NVLIST_ARRAY:
+		/* we allow arrays with 0 elements */
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+/*
+ * Verify nvp_name_sz and check the name string length.
+ */
+static int
+i_validate_nvpair_name(nvpair_t *nvp)
+{
+	if ((nvp->nvp_name_sz <= 0) ||
+	    (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0)))
+		return (EFAULT);
+
+	/* verify the name string, make sure its terminated */
+	if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0')
+		return (EFAULT);
+
+	return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT);
+}
+
+static int
+i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data)
+{
+	switch (type) {
+	case DATA_TYPE_BOOLEAN_VALUE:
+		if (*(boolean_t *)data != B_TRUE &&
+		    *(boolean_t *)data != B_FALSE)
+			return (EINVAL);
+		break;
+	case DATA_TYPE_BOOLEAN_ARRAY: {
+		int i;
+
+		for (i = 0; i < nelem; i++)
+			if (((boolean_t *)data)[i] != B_TRUE &&
+			    ((boolean_t *)data)[i] != B_FALSE)
+				return (EINVAL);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * This function takes a pointer to what should be a nvpair and it's size
+ * and then verifies that all the nvpair fields make sense and can be
+ * trusted.  This function is used when decoding packed nvpairs.
+ */
+static int
+i_validate_nvpair(nvpair_t *nvp)
+{
+	data_type_t type = NVP_TYPE(nvp);
+	int size1, size2;
+
+	/* verify nvp_name_sz, check the name string length */
+	if (i_validate_nvpair_name(nvp) != 0)
+		return (EFAULT);
+
+	if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0)
+		return (EFAULT);
+
+	/*
+	 * verify nvp_type, nvp_value_elem, and also possibly
+	 * verify string values and get the value size.
+	 */
+	size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));
+	size1 = nvp->nvp_size - NVP_VALOFF(nvp);
+	if (size2 < 0 || size1 != NV_ALIGN(size2))
+		return (EFAULT);
+
+	return (0);
+}
+
+static int
+nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl)
+{
+	nvpriv_t *priv;
+	i_nvp_t *curr;
+
+	if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL)
+		return (EINVAL);
+
+	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+		nvpair_t *nvp = &curr->nvi_nvp;
+		int err;
+
+		if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp),
+		    NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0)
+			return (err);
+	}
+
+	return (0);
+}
+
+/*
+ * Frees all memory allocated for an nvpair (like embedded lists) with
+ * the exception of the nvpair buffer itself.
+ */
+static void
+nvpair_free(nvpair_t *nvp)
+{
+	switch (NVP_TYPE(nvp)) {
+	case DATA_TYPE_NVLIST:
+		nvlist_free(EMBEDDED_NVL(nvp));
+		break;
+	case DATA_TYPE_NVLIST_ARRAY: {
+		nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
+		int i;
+
+		for (i = 0; i < NVP_NELEM(nvp); i++)
+			if (nvlp[i] != NULL)
+				nvlist_free(nvlp[i]);
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+/*
+ * nvlist_free - free an unpacked nvlist
+ */
+void
+nvlist_free(nvlist_t *nvl)
+{
+	nvpriv_t *priv;
+	i_nvp_t *curr;
+
+	if (nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return;
+
+	/*
+	 * Unpacked nvlist are linked through i_nvp_t
+	 */
+	curr = priv->nvp_list;
+	while (curr != NULL) {
+		nvpair_t *nvp = &curr->nvi_nvp;
+		curr = curr->nvi_next;
+
+		nvpair_free(nvp);
+		nvp_buf_free(nvl, nvp);
+	}
+
+	if (!(priv->nvp_stat & NV_STAT_EMBEDDED))
+		nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t)));
+	else
+		nvl->nvl_priv = 0;
+
+	nv_mem_free(priv, priv, sizeof (nvpriv_t));
+}
+
+static int
+nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	i_nvp_t *curr;
+
+	if (nvp == NULL)
+		return (0);
+
+	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
+		if (&curr->nvi_nvp == nvp)
+			return (1);
+
+	return (0);
+}
+
+/*
+ * Make a copy of nvlist
+ */
+/*ARGSUSED1*/
+int
+nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+	return (nvlist_xdup(nvl, nvlp,
+	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+	return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva)
+{
+	int err;
+	nvlist_t *ret;
+
+	if (nvl == NULL || nvlp == NULL)
+		return (EINVAL);
+
+	if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0)
+		return (err);
+
+	if ((err = nvlist_copy_pairs(nvl, ret)) != 0)
+		nvlist_free(ret);
+	else
+		*nvlp = ret;
+
+	return (err);
+}
+
+/*
+ * Remove all with matching name
+ */
+int
+nvlist_remove_all(nvlist_t *nvl, const char *name)
+{
+	nvpriv_t *priv;
+	i_nvp_t *curr;
+	int error = ENOENT;
+
+	if (nvl == NULL || name == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (EINVAL);
+
+	curr = priv->nvp_list;
+	while (curr != NULL) {
+		nvpair_t *nvp = &curr->nvi_nvp;
+
+		curr = curr->nvi_next;
+		if (strcmp(name, NVP_NAME(nvp)) != 0)
+			continue;
+
+		nvp_buf_unlink(nvl, nvp);
+		nvpair_free(nvp);
+		nvp_buf_free(nvl, nvp);
+
+		error = 0;
+	}
+
+	return (error);
+}
+
+/*
+ * Remove first one with matching name and type
+ */
+int
+nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
+{
+	nvpriv_t *priv;
+	i_nvp_t *curr;
+
+	if (nvl == NULL || name == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (EINVAL);
+
+	curr = priv->nvp_list;
+	while (curr != NULL) {
+		nvpair_t *nvp = &curr->nvi_nvp;
+
+		if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) {
+			nvp_buf_unlink(nvl, nvp);
+			nvpair_free(nvp);
+			nvp_buf_free(nvl, nvp);
+
+			return (0);
+		}
+		curr = curr->nvi_next;
+	}
+
+	return (ENOENT);
+}
+
+int
+nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+	if (nvl == NULL || nvp == NULL)
+		return (EINVAL);
+
+	nvp_buf_unlink(nvl, nvp);
+	nvpair_free(nvp);
+	nvp_buf_free(nvl, nvp);
+	return (0);
+}
+
+/*
+ * This function calculates the size of an nvpair value.
+ *
+ * The data argument controls the behavior in case of the data types
+ * 	DATA_TYPE_STRING    	and
+ *	DATA_TYPE_STRING_ARRAY
+ * Is data == NULL then the size of the string(s) is excluded.
+ */
+static int
+i_get_value_size(data_type_t type, const void *data, uint_t nelem)
+{
+	uint64_t value_sz;
+
+	if (i_validate_type_nelem(type, nelem) != 0)
+		return (-1);
+
+	/* Calculate required size for holding value */
+	switch (type) {
+	case DATA_TYPE_BOOLEAN:
+		value_sz = 0;
+		break;
+	case DATA_TYPE_BOOLEAN_VALUE:
+		value_sz = sizeof (boolean_t);
+		break;
+	case DATA_TYPE_BYTE:
+		value_sz = sizeof (uchar_t);
+		break;
+	case DATA_TYPE_INT8:
+		value_sz = sizeof (int8_t);
+		break;
+	case DATA_TYPE_UINT8:
+		value_sz = sizeof (uint8_t);
+		break;
+	case DATA_TYPE_INT16:
+		value_sz = sizeof (int16_t);
+		break;
+	case DATA_TYPE_UINT16:
+		value_sz = sizeof (uint16_t);
+		break;
+	case DATA_TYPE_INT32:
+		value_sz = sizeof (int32_t);
+		break;
+	case DATA_TYPE_UINT32:
+		value_sz = sizeof (uint32_t);
+		break;
+	case DATA_TYPE_INT64:
+		value_sz = sizeof (int64_t);
+		break;
+	case DATA_TYPE_UINT64:
+		value_sz = sizeof (uint64_t);
+		break;
+#if !defined(_KERNEL)
+	case DATA_TYPE_DOUBLE:
+		value_sz = sizeof (double);
+		break;
+#endif
+	case DATA_TYPE_STRING:
+		if (data == NULL)
+			value_sz = 0;
+		else
+			value_sz = strlen(data) + 1;
+		break;
+	case DATA_TYPE_BOOLEAN_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (boolean_t);
+		break;
+	case DATA_TYPE_BYTE_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uchar_t);
+		break;
+	case DATA_TYPE_INT8_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (int8_t);
+		break;
+	case DATA_TYPE_UINT8_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uint8_t);
+		break;
+	case DATA_TYPE_INT16_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (int16_t);
+		break;
+	case DATA_TYPE_UINT16_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uint16_t);
+		break;
+	case DATA_TYPE_INT32_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (int32_t);
+		break;
+	case DATA_TYPE_UINT32_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uint32_t);
+		break;
+	case DATA_TYPE_INT64_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (int64_t);
+		break;
+	case DATA_TYPE_UINT64_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uint64_t);
+		break;
+	case DATA_TYPE_STRING_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uint64_t);
+
+		if (data != NULL) {
+			char *const *strs = data;
+			uint_t i;
+
+			/* no alignment requirement for strings */
+			for (i = 0; i < nelem; i++) {
+				if (strs[i] == NULL)
+					return (-1);
+				value_sz += strlen(strs[i]) + 1;
+			}
+		}
+		break;
+	case DATA_TYPE_HRTIME:
+		value_sz = sizeof (hrtime_t);
+		break;
+	case DATA_TYPE_NVLIST:
+		value_sz = NV_ALIGN(sizeof (nvlist_t));
+		break;
+	case DATA_TYPE_NVLIST_ARRAY:
+		value_sz = (uint64_t)nelem * sizeof (uint64_t) +
+		    (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t));
+		break;
+	default:
+		return (-1);
+	}
+
+	return (value_sz > INT32_MAX ? -1 : (int)value_sz);
+}
+
+static int
+nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl)
+{
+	nvpriv_t *priv;
+	int err;
+
+	if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t)
+	    nvl->nvl_priv)) == NULL)
+		return (ENOMEM);
+
+	nvlist_init(emb_nvl, onvl->nvl_nvflag, priv);
+
+	if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) {
+		nvlist_free(emb_nvl);
+		emb_nvl->nvl_priv = 0;
+	}
+
+	return (err);
+}
+
+/*
+ * nvlist_add_common - Add new <name,value> pair to nvlist
+ */
+static int
+nvlist_add_common(nvlist_t *nvl, const char *name,
+    data_type_t type, uint_t nelem, const void *data)
+{
+	nvpair_t *nvp;
+	uint_t i;
+
+	int nvp_sz, name_sz, value_sz;
+	int err = 0;
+
+	if (name == NULL || nvl == NULL || nvl->nvl_priv == 0)
+		return (EINVAL);
+
+	if (nelem != 0 && data == NULL)
+		return (EINVAL);
+
+	/*
+	 * Verify type and nelem and get the value size.
+	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
+	 * is the size of the string(s) included.
+	 */
+	if ((value_sz = i_get_value_size(type, data, nelem)) < 0)
+		return (EINVAL);
+
+	if (i_validate_nvpair_value(type, nelem, data) != 0)
+		return (EINVAL);
+
+	/*
+	 * If we're adding an nvlist or nvlist array, ensure that we are not
+	 * adding the input nvlist to itself, which would cause recursion,
+	 * and ensure that no NULL nvlist pointers are present.
+	 */
+	switch (type) {
+	case DATA_TYPE_NVLIST:
+		if (data == nvl || data == NULL)
+			return (EINVAL);
+		break;
+	case DATA_TYPE_NVLIST_ARRAY: {
+		nvlist_t **onvlp = (nvlist_t **)data;
+		for (i = 0; i < nelem; i++) {
+			if (onvlp[i] == nvl || onvlp[i] == NULL)
+				return (EINVAL);
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	/* calculate sizes of the nvpair elements and the nvpair itself */
+	name_sz = strlen(name) + 1;
+
+	nvp_sz = NVP_SIZE_CALC(name_sz, value_sz);
+
+	if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL)
+		return (ENOMEM);
+
+	ASSERT(nvp->nvp_size == nvp_sz);
+	nvp->nvp_name_sz = name_sz;
+	nvp->nvp_value_elem = nelem;
+	nvp->nvp_type = type;
+	bcopy(name, NVP_NAME(nvp), name_sz);
+
+	switch (type) {
+	case DATA_TYPE_BOOLEAN:
+		break;
+	case DATA_TYPE_STRING_ARRAY: {
+		char *const *strs = data;
+		char *buf = NVP_VALUE(nvp);
+		char **cstrs = (void *)buf;
+
+		/* skip pre-allocated space for pointer array */
+		buf += nelem * sizeof (uint64_t);
+		for (i = 0; i < nelem; i++) {
+			int slen = strlen(strs[i]) + 1;
+			bcopy(strs[i], buf, slen);
+			cstrs[i] = buf;
+			buf += slen;
+		}
+		break;
+	}
+	case DATA_TYPE_NVLIST: {
+		nvlist_t *nnvl = EMBEDDED_NVL(nvp);
+		nvlist_t *onvl = (nvlist_t *)data;
+
+		if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) {
+			nvp_buf_free(nvl, nvp);
+			return (err);
+		}
+		break;
+	}
+	case DATA_TYPE_NVLIST_ARRAY: {
+		nvlist_t **onvlp = (nvlist_t **)data;
+		nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
+		nvlist_t *embedded = (nvlist_t *)
+		    ((uintptr_t)nvlp + nelem * sizeof (uint64_t));
+
+		for (i = 0; i < nelem; i++) {
+			if ((err = nvlist_copy_embedded(nvl,
+			    onvlp[i], embedded)) != 0) {
+				/*
+				 * Free any successfully created lists
+				 */
+				nvpair_free(nvp);
+				nvp_buf_free(nvl, nvp);
+				return (err);
+			}
+
+			nvlp[i] = embedded++;
+		}
+		break;
+	}
+	default:
+		bcopy(data, NVP_VALUE(nvp), value_sz);
+	}
+
+	/* if unique name, remove before add */
+	if (nvl->nvl_nvflag & NV_UNIQUE_NAME)
+		(void) nvlist_remove_all(nvl, name);
+	else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE)
+		(void) nvlist_remove(nvl, name, type);
+
+	nvp_buf_link(nvl, nvp);
+
+	return (0);
+}
+
+int
+nvlist_add_boolean(nvlist_t *nvl, const char *name)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL));
+}
+
+int
+nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val));
+}
+
+int
+nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val));
+}
+
+int
+nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val));
+}
+
+int
+nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val));
+}
+
+int
+nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val));
+}
+
+int
+nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val));
+}
+
+int
+nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val));
+}
+
+int
+nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val));
+}
+
+int
+nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val));
+}
+
+int
+nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val));
+}
+
+#if !defined(_KERNEL)
+int
+nvlist_add_double(nvlist_t *nvl, const char *name, double val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_DOUBLE, 1, &val));
+}
+#endif
+
+int
+nvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val));
+}
+
+int
+nvlist_add_boolean_array(nvlist_t *nvl, const char *name,
+    boolean_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a));
+}
+
+int
+nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
+}
+
+int
+nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
+}
+
+int
+nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
+}
+
+int
+nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
+}
+
+int
+nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
+}
+
+int
+nvlist_add_string_array(nvlist_t *nvl, const char *name,
+    char *const *a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
+}
+
+int
+nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val));
+}
+
+int
+nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val));
+}
+
+int
+nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n)
+{
+	return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
+}
+
+/* reading name-value pairs */
+nvpair_t *
+nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+	nvpriv_t *priv;
+	i_nvp_t *curr;
+
+	if (nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (NULL);
+
+	curr = NVPAIR2I_NVP(nvp);
+
+	/*
+	 * Ensure that nvp is a valid nvpair on this nvlist.
+	 * NB: nvp_curr is used only as a hint so that we don't always
+	 * have to walk the list to determine if nvp is still on the list.
+	 */
+	if (nvp == NULL)
+		curr = priv->nvp_list;
+	else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
+		curr = curr->nvi_next;
+	else
+		curr = NULL;
+
+	priv->nvp_curr = curr;
+
+	return (curr != NULL ? &curr->nvi_nvp : NULL);
+}
+
+nvpair_t *
+nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+	nvpriv_t *priv;
+	i_nvp_t *curr;
+
+	if (nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (NULL);
+
+	curr = NVPAIR2I_NVP(nvp);
+
+	if (nvp == NULL)
+		curr = priv->nvp_last;
+	else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
+		curr = curr->nvi_prev;
+	else
+		curr = NULL;
+
+	priv->nvp_curr = curr;
+
+	return (curr != NULL ? &curr->nvi_nvp : NULL);
+}
+
+boolean_t
+nvlist_empty(nvlist_t *nvl)
+{
+	nvpriv_t *priv;
+
+	if (nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (B_TRUE);
+
+	return (priv->nvp_list == NULL);
+}
+
+char *
+nvpair_name(nvpair_t *nvp)
+{
+	return (NVP_NAME(nvp));
+}
+
+data_type_t
+nvpair_type(nvpair_t *nvp)
+{
+	return (NVP_TYPE(nvp));
+}
+
+int
+nvpair_type_is_array(nvpair_t *nvp)
+{
+	data_type_t type = NVP_TYPE(nvp);
+
+	if ((type == DATA_TYPE_BYTE_ARRAY) ||
+	    (type == DATA_TYPE_UINT8_ARRAY) ||
+	    (type == DATA_TYPE_INT16_ARRAY) ||
+	    (type == DATA_TYPE_UINT16_ARRAY) ||
+	    (type == DATA_TYPE_INT32_ARRAY) ||
+	    (type == DATA_TYPE_UINT32_ARRAY) ||
+	    (type == DATA_TYPE_INT64_ARRAY) ||
+	    (type == DATA_TYPE_UINT64_ARRAY) ||
+	    (type == DATA_TYPE_BOOLEAN_ARRAY) ||
+	    (type == DATA_TYPE_STRING_ARRAY) ||
+	    (type == DATA_TYPE_NVLIST_ARRAY))
+		return (1);
+	return (0);
+
+}
+
+static int
+nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data)
+{
+	if (nvp == NULL || nvpair_type(nvp) != type)
+		return (EINVAL);
+
+	/*
+	 * For non-array types, we copy the data.
+	 * For array types (including string), we set a pointer.
+	 */
+	switch (type) {
+	case DATA_TYPE_BOOLEAN:
+		if (nelem != NULL)
+			*nelem = 0;
+		break;
+
+	case DATA_TYPE_BOOLEAN_VALUE:
+	case DATA_TYPE_BYTE:
+	case DATA_TYPE_INT8:
+	case DATA_TYPE_UINT8:
+	case DATA_TYPE_INT16:
+	case DATA_TYPE_UINT16:
+	case DATA_TYPE_INT32:
+	case DATA_TYPE_UINT32:
+	case DATA_TYPE_INT64:
+	case DATA_TYPE_UINT64:
+	case DATA_TYPE_HRTIME:
+#if !defined(_KERNEL)
+	case DATA_TYPE_DOUBLE:
+#endif
+		if (data == NULL)
+			return (EINVAL);
+		bcopy(NVP_VALUE(nvp), data,
+		    (size_t)i_get_value_size(type, NULL, 1));
+		if (nelem != NULL)
+			*nelem = 1;
+		break;
+
+	case DATA_TYPE_NVLIST:
+	case DATA_TYPE_STRING:
+		if (data == NULL)
+			return (EINVAL);
+		*(void **)data = (void *)NVP_VALUE(nvp);
+		if (nelem != NULL)
+			*nelem = 1;
+		break;
+
+	case DATA_TYPE_BOOLEAN_ARRAY:
+	case DATA_TYPE_BYTE_ARRAY:
+	case DATA_TYPE_INT8_ARRAY:
+	case DATA_TYPE_UINT8_ARRAY:
+	case DATA_TYPE_INT16_ARRAY:
+	case DATA_TYPE_UINT16_ARRAY:
+	case DATA_TYPE_INT32_ARRAY:
+	case DATA_TYPE_UINT32_ARRAY:
+	case DATA_TYPE_INT64_ARRAY:
+	case DATA_TYPE_UINT64_ARRAY:
+	case DATA_TYPE_STRING_ARRAY:
+	case DATA_TYPE_NVLIST_ARRAY:
+		if (nelem == NULL || data == NULL)
+			return (EINVAL);
+		if ((*nelem = NVP_NELEM(nvp)) != 0)
+			*(void **)data = (void *)NVP_VALUE(nvp);
+		else
+			*(void **)data = NULL;
+		break;
+
+	default:
+		return (ENOTSUP);
+	}
+
+	return (0);
+}
+
+static int
+nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type,
+    uint_t *nelem, void *data)
+{
+	nvpriv_t *priv;
+	nvpair_t *nvp;
+	i_nvp_t *curr;
+
+	if (name == NULL || nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (EINVAL);
+
+	if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE)))
+		return (ENOTSUP);
+
+	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+		nvp = &curr->nvi_nvp;
+
+		if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type)
+			return (nvpair_value_common(nvp, type, nelem, data));
+	}
+
+	return (ENOENT);
+}
+
+int
+nvlist_lookup_boolean(nvlist_t *nvl, const char *name)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL));
+}
+
+int
+nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val)
+{
+	return (nvlist_lookup_common(nvl, name,
+	    DATA_TYPE_BOOLEAN_VALUE, NULL, val));
+}
+
+int
+nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val));
+}
+
+int
+nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val));
+}
+
+int
+nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val));
+}
+
+int
+nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val));
+}
+
+int
+nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val));
+}
+
+int
+nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val));
+}
+
+int
+nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val));
+}
+
+int
+nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val));
+}
+
+int
+nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val));
+}
+
+#if !defined(_KERNEL)
+int
+nvlist_lookup_double(nvlist_t *nvl, const char *name, double *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_DOUBLE, NULL, val));
+}
+#endif
+
+int
+nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val));
+}
+
+int
+nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val));
+}
+
+int
+nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name,
+    boolean_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name,
+	    DATA_TYPE_BOOLEAN_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_byte_array(nvlist_t *nvl, const char *name,
+    uchar_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name,
+    uint8_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int16_array(nvlist_t *nvl, const char *name,
+    int16_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name,
+    uint16_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int32_array(nvlist_t *nvl, const char *name,
+    int32_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name,
+    uint32_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int64_array(nvlist_t *nvl, const char *name,
+    int64_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name,
+    uint64_t **a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_string_array(nvlist_t *nvl, const char *name,
+    char ***a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name,
+    nvlist_t ***a, uint_t *n)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val)
+{
+	return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val));
+}
+
+int
+nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...)
+{
+	va_list ap;
+	char *name;
+	int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0);
+	int ret = 0;
+
+	va_start(ap, flag);
+	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+		data_type_t type;
+		void *val;
+		uint_t *nelem;
+
+		switch (type = va_arg(ap, data_type_t)) {
+		case DATA_TYPE_BOOLEAN:
+			ret = nvlist_lookup_common(nvl, name, type, NULL, NULL);
+			break;
+
+		case DATA_TYPE_BOOLEAN_VALUE:
+		case DATA_TYPE_BYTE:
+		case DATA_TYPE_INT8:
+		case DATA_TYPE_UINT8:
+		case DATA_TYPE_INT16:
+		case DATA_TYPE_UINT16:
+		case DATA_TYPE_INT32:
+		case DATA_TYPE_UINT32:
+		case DATA_TYPE_INT64:
+		case DATA_TYPE_UINT64:
+		case DATA_TYPE_HRTIME:
+		case DATA_TYPE_STRING:
+		case DATA_TYPE_NVLIST:
+#if !defined(_KERNEL)
+		case DATA_TYPE_DOUBLE:
+#endif
+			val = va_arg(ap, void *);
+			ret = nvlist_lookup_common(nvl, name, type, NULL, val);
+			break;
+
+		case DATA_TYPE_BYTE_ARRAY:
+		case DATA_TYPE_BOOLEAN_ARRAY:
+		case DATA_TYPE_INT8_ARRAY:
+		case DATA_TYPE_UINT8_ARRAY:
+		case DATA_TYPE_INT16_ARRAY:
+		case DATA_TYPE_UINT16_ARRAY:
+		case DATA_TYPE_INT32_ARRAY:
+		case DATA_TYPE_UINT32_ARRAY:
+		case DATA_TYPE_INT64_ARRAY:
+		case DATA_TYPE_UINT64_ARRAY:
+		case DATA_TYPE_STRING_ARRAY:
+		case DATA_TYPE_NVLIST_ARRAY:
+			val = va_arg(ap, void *);
+			nelem = va_arg(ap, uint_t *);
+			ret = nvlist_lookup_common(nvl, name, type, nelem, val);
+			break;
+
+		default:
+			ret = EINVAL;
+		}
+
+		if (ret == ENOENT && noentok)
+			ret = 0;
+	}
+	va_end(ap);
+
+	return (ret);
+}
+
+/*
+ * Find the 'name'ed nvpair in the nvlist 'nvl'. If 'name' found, the function
+ * returns zero and a pointer to the matching nvpair is returned in '*ret'
+ * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate
+ * multiple levels of embedded nvlists, with 'sep' as the separator. As an
+ * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or
+ * "a.d[3].e[1]".  This matches the C syntax for array embed (for convience,
+ * code also supports "a.d[3]e[1]" syntax).
+ *
+ * If 'ip' is non-NULL and the last name component is an array, return the
+ * value of the "...[index]" array index in *ip. For an array reference that
+ * is not indexed, *ip will be returned as -1. If there is a syntax error in
+ * 'name', and 'ep' is non-NULL then *ep will be set to point to the location
+ * inside the 'name' string where the syntax error was detected.
+ */
+static int
+nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep,
+    nvpair_t **ret, int *ip, char **ep)
+{
+	nvpair_t	*nvp;
+	const char	*np;
+	char		*sepp;
+	char		*idxp, *idxep;
+	nvlist_t	**nva;
+	long		idx;
+	int		n;
+
+	if (ip)
+		*ip = -1;			/* not indexed */
+	if (ep)
+		*ep = NULL;
+
+	if ((nvl == NULL) || (name == NULL))
+		return (EINVAL);
+
+	/* step through components of name */
+	for (np = name; np && *np; np = sepp) {
+		/* ensure unique names */
+		if (!(nvl->nvl_nvflag & NV_UNIQUE_NAME))
+			return (ENOTSUP);
+
+		/* skip white space */
+		skip_whitespace(np);
+		if (*np == 0)
+			break;
+
+		/* set 'sepp' to end of current component 'np' */
+		if (sep)
+			sepp = strchr(np, sep);
+		else
+			sepp = NULL;
+
+		/* find start of next "[ index ]..." */
+		idxp = strchr(np, '[');
+
+		/* if sepp comes first, set idxp to NULL */
+		if (sepp && idxp && (sepp < idxp))
+			idxp = NULL;
+
+		/*
+		 * At this point 'idxp' is set if there is an index
+		 * expected for the current component.
+		 */
+		if (idxp) {
+			/* set 'n' to length of current 'np' name component */
+			n = idxp++ - np;
+
+			/* keep sepp up to date for *ep use as we advance */
+			skip_whitespace(idxp);
+			sepp = idxp;
+
+			/* determine the index value */
+#if defined(_KERNEL) && !defined(_BOOT)
+			if (ddi_strtol(idxp, &idxep, 0, &idx))
+				goto fail;
+#else
+			idx = strtol(idxp, &idxep, 0);
+#endif
+			if (idxep == idxp)
+				goto fail;
+
+			/* keep sepp up to date for *ep use as we advance */
+			sepp = idxep;
+
+			/* skip white space index value and check for ']' */
+			skip_whitespace(sepp);
+			if (*sepp++ != ']')
+				goto fail;
+
+			/* for embedded arrays, support C syntax: "a[1].b" */
+			skip_whitespace(sepp);
+			if (sep && (*sepp == sep))
+				sepp++;
+		} else if (sepp) {
+			n = sepp++ - np;
+		} else {
+			n = strlen(np);
+		}
+
+		/* trim trailing whitespace by reducing length of 'np' */
+		if (n == 0)
+			goto fail;
+		for (n--; (np[n] == ' ') || (np[n] == '\t'); n--)
+			;
+		n++;
+
+		/* skip whitespace, and set sepp to NULL if complete */
+		if (sepp) {
+			skip_whitespace(sepp);
+			if (*sepp == 0)
+				sepp = NULL;
+		}
+
+		/*
+		 * At this point:
+		 * o  'n' is the length of current 'np' component.
+		 * o  'idxp' is set if there was an index, and value 'idx'.
+		 * o  'sepp' is set to the beginning of the next component,
+		 *    and set to NULL if we have no more components.
+		 *
+		 * Search for nvpair with matching component name.
+		 */
+		for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL;
+		    nvp = nvlist_next_nvpair(nvl, nvp)) {
+
+			/* continue if no match on name */
+			if (strncmp(np, nvpair_name(nvp), n) ||
+			    (strlen(nvpair_name(nvp)) != n))
+				continue;
+
+			/* if indexed, verify type is array oriented */
+			if (idxp && !nvpair_type_is_array(nvp))
+				goto fail;
+
+			/*
+			 * Full match found, return nvp and idx if this
+			 * was the last component.
+			 */
+			if (sepp == NULL) {
+				if (ret)
+					*ret = nvp;
+				if (ip && idxp)
+					*ip = (int)idx;	/* return index */
+				return (0);		/* found */
+			}
+
+			/*
+			 * More components: current match must be
+			 * of DATA_TYPE_NVLIST or DATA_TYPE_NVLIST_ARRAY
+			 * to support going deeper.
+			 */
+			if (nvpair_type(nvp) == DATA_TYPE_NVLIST) {
+				nvl = EMBEDDED_NVL(nvp);
+				break;
+			} else if (nvpair_type(nvp) == DATA_TYPE_NVLIST_ARRAY) {
+				(void) nvpair_value_nvlist_array(nvp,
+				    &nva, (uint_t *)&n);
+				if ((n < 0) || (idx >= n))
+					goto fail;
+				nvl = nva[idx];
+				break;
+			}
+
+			/* type does not support more levels */
+			goto fail;
+		}
+		if (nvp == NULL)
+			goto fail;		/* 'name' not found */
+
+		/* search for match of next component in embedded 'nvl' list */
+	}
+
+fail:	if (ep && sepp)
+		*ep = sepp;
+	return (EINVAL);
+}
+
+/*
+ * Return pointer to nvpair with specified 'name'.
+ */
+int
+nvlist_lookup_nvpair(nvlist_t *nvl, const char *name, nvpair_t **ret)
+{
+	return (nvlist_lookup_nvpair_ei_sep(nvl, name, 0, ret, NULL, NULL));
+}
+
+/*
+ * Determine if named nvpair exists in nvlist (use embedded separator of '.'
+ * and return array index).  See nvlist_lookup_nvpair_ei_sep for more detailed
+ * description.
+ */
+int nvlist_lookup_nvpair_embedded_index(nvlist_t *nvl,
+    const char *name, nvpair_t **ret, int *ip, char **ep)
+{
+	return (nvlist_lookup_nvpair_ei_sep(nvl, name, '.', ret, ip, ep));
+}
+
+boolean_t
+nvlist_exists(nvlist_t *nvl, const char *name)
+{
+	nvpriv_t *priv;
+	nvpair_t *nvp;
+	i_nvp_t *curr;
+
+	if (name == NULL || nvl == NULL ||
+	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (B_FALSE);
+
+	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+		nvp = &curr->nvi_nvp;
+
+		if (strcmp(name, NVP_NAME(nvp)) == 0)
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+int
+nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val));
+}
+
+int
+nvpair_value_byte(nvpair_t *nvp, uchar_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val));
+}
+
+int
+nvpair_value_int8(nvpair_t *nvp, int8_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val));
+}
+
+int
+nvpair_value_uint8(nvpair_t *nvp, uint8_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val));
+}
+
+int
+nvpair_value_int16(nvpair_t *nvp, int16_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val));
+}
+
+int
+nvpair_value_uint16(nvpair_t *nvp, uint16_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val));
+}
+
+int
+nvpair_value_int32(nvpair_t *nvp, int32_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val));
+}
+
+int
+nvpair_value_uint32(nvpair_t *nvp, uint32_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val));
+}
+
+int
+nvpair_value_int64(nvpair_t *nvp, int64_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val));
+}
+
+int
+nvpair_value_uint64(nvpair_t *nvp, uint64_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val));
+}
+
+#if !defined(_KERNEL)
+int
+nvpair_value_double(nvpair_t *nvp, double *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_DOUBLE, NULL, val));
+}
+#endif
+
+int
+nvpair_value_string(nvpair_t *nvp, char **val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val));
+}
+
+int
+nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val));
+}
+
+int
+nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val)
+{
+	return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val));
+}
+
+/*
+ * Add specified pair to the list.
+ */
+int
+nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+	if (nvl == NULL || nvp == NULL)
+		return (EINVAL);
+
+	return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp),
+	    NVP_NELEM(nvp), NVP_VALUE(nvp)));
+}
+
+/*
+ * Merge the supplied nvlists and put the result in dst.
+ * The merged list will contain all names specified in both lists,
+ * the values are taken from nvl in the case of duplicates.
+ * Return 0 on success.
+ */
+/*ARGSUSED*/
+int
+nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag)
+{
+	if (nvl == NULL || dst == NULL)
+		return (EINVAL);
+
+	if (dst != nvl)
+		return (nvlist_copy_pairs(nvl, dst));
+
+	return (0);
+}
+
+/*
+ * Encoding related routines
+ */
+#define	NVS_OP_ENCODE	0
+#define	NVS_OP_DECODE	1
+#define	NVS_OP_GETSIZE	2
+
+typedef struct nvs_ops nvs_ops_t;
+
+typedef struct {
+	int		nvs_op;
+	const nvs_ops_t	*nvs_ops;
+	void		*nvs_private;
+	nvpriv_t	*nvs_priv;
+} nvstream_t;
+
+/*
+ * nvs operations are:
+ *   - nvs_nvlist
+ *     encoding / decoding of a nvlist header (nvlist_t)
+ *     calculates the size used for header and end detection
+ *
+ *   - nvs_nvpair
+ *     responsible for the first part of encoding / decoding of an nvpair
+ *     calculates the decoded size of an nvpair
+ *
+ *   - nvs_nvp_op
+ *     second part of encoding / decoding of an nvpair
+ *
+ *   - nvs_nvp_size
+ *     calculates the encoding size of an nvpair
+ *
+ *   - nvs_nvl_fini
+ *     encodes the end detection mark (zeros).
+ */
+struct nvs_ops {
+	int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *);
+	int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *);
+	int (*nvs_nvp_op)(nvstream_t *, nvpair_t *);
+	int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *);
+	int (*nvs_nvl_fini)(nvstream_t *);
+};
+
+typedef struct {
+	char	nvh_encoding;	/* nvs encoding method */
+	char	nvh_endian;	/* nvs endian */
+	char	nvh_reserved1;	/* reserved for future use */
+	char	nvh_reserved2;	/* reserved for future use */
+} nvs_header_t;
+
+static int
+nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	i_nvp_t *curr;
+
+	/*
+	 * Walk nvpair in list and encode each nvpair
+	 */
+	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
+		if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0)
+			return (EFAULT);
+
+	return (nvs->nvs_ops->nvs_nvl_fini(nvs));
+}
+
+static int
+nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl)
+{
+	nvpair_t *nvp;
+	size_t nvsize;
+	int err;
+
+	/*
+	 * Get decoded size of next pair in stream, alloc
+	 * memory for nvpair_t, then decode the nvpair
+	 */
+	while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) {
+		if (nvsize == 0) /* end of list */
+			break;
+
+		/* make sure len makes sense */
+		if (nvsize < NVP_SIZE_CALC(1, 0))
+			return (EFAULT);
+
+		if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL)
+			return (ENOMEM);
+
+		if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) {
+			nvp_buf_free(nvl, nvp);
+			return (err);
+		}
+
+		if (i_validate_nvpair(nvp) != 0) {
+			nvpair_free(nvp);
+			nvp_buf_free(nvl, nvp);
+			return (EFAULT);
+		}
+
+		nvp_buf_link(nvl, nvp);
+	}
+	return (err);
+}
+
+static int
+nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
+{
+	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+	i_nvp_t *curr;
+	uint64_t nvsize = *buflen;
+	size_t size;
+
+	/*
+	 * Get encoded size of nvpairs in nvlist
+	 */
+	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+		if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0)
+			return (EINVAL);
+
+		if ((nvsize += size) > INT32_MAX)
+			return (EINVAL);
+	}
+
+	*buflen = nvsize;
+	return (0);
+}
+
+static int
+nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
+{
+	int err;
+
+	if (nvl->nvl_priv == 0)
+		return (EFAULT);
+
+	/*
+	 * Perform the operation, starting with header, then each nvpair
+	 */
+	if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0)
+		return (err);
+
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+		err = nvs_encode_pairs(nvs, nvl);
+		break;
+
+	case NVS_OP_DECODE:
+		err = nvs_decode_pairs(nvs, nvl);
+		break;
+
+	case NVS_OP_GETSIZE:
+		err = nvs_getsize_pairs(nvs, nvl, buflen);
+		break;
+
+	default:
+		err = EINVAL;
+	}
+
+	return (err);
+}
+
+static int
+nvs_embedded(nvstream_t *nvs, nvlist_t *embedded)
+{
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+		return (nvs_operation(nvs, embedded, NULL));
+
+	case NVS_OP_DECODE: {
+		nvpriv_t *priv;
+		int err;
+
+		if (embedded->nvl_version != NV_VERSION)
+			return (ENOTSUP);
+
+		if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL)
+			return (ENOMEM);
+
+		nvlist_init(embedded, embedded->nvl_nvflag, priv);
+
+		if ((err = nvs_operation(nvs, embedded, NULL)) != 0)
+			nvlist_free(embedded);
+		return (err);
+	}
+	default:
+		break;
+	}
+
+	return (EINVAL);
+}
+
+static int
+nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+	size_t nelem = NVP_NELEM(nvp);
+	nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
+	int i;
+
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+		for (i = 0; i < nelem; i++)
+			if (nvs_embedded(nvs, nvlp[i]) != 0)
+				return (EFAULT);
+		break;
+
+	case NVS_OP_DECODE: {
+		size_t len = nelem * sizeof (uint64_t);
+		nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len);
+
+		bzero(nvlp, len);	/* don't trust packed data */
+		for (i = 0; i < nelem; i++) {
+			if (nvs_embedded(nvs, embedded) != 0) {
+				nvpair_free(nvp);
+				return (EFAULT);
+			}
+
+			nvlp[i] = embedded++;
+		}
+		break;
+	}
+	case NVS_OP_GETSIZE: {
+		uint64_t nvsize = 0;
+
+		for (i = 0; i < nelem; i++) {
+			size_t nvp_sz = 0;
+
+			if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0)
+				return (EINVAL);
+
+			if ((nvsize += nvp_sz) > INT32_MAX)
+				return (EINVAL);
+		}
+
+		*size = nvsize;
+		break;
+	}
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *);
+static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *);
+
+/*
+ * Common routine for nvlist operations:
+ * encode, decode, getsize (encoded size).
+ */
+static int
+nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding,
+    int nvs_op)
+{
+	int err = 0;
+	nvstream_t nvs;
+	int nvl_endian;
+#ifdef	_LITTLE_ENDIAN
+	int host_endian = 1;
+#else
+	int host_endian = 0;
+#endif	/* _LITTLE_ENDIAN */
+	nvs_header_t *nvh = (void *)buf;
+
+	if (buflen == NULL || nvl == NULL ||
+	    (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+		return (EINVAL);
+
+	nvs.nvs_op = nvs_op;
+
+	/*
+	 * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and
+	 * a buffer is allocated.  The first 4 bytes in the buffer are
+	 * used for encoding method and host endian.
+	 */
+	switch (nvs_op) {
+	case NVS_OP_ENCODE:
+		if (buf == NULL || *buflen < sizeof (nvs_header_t))
+			return (EINVAL);
+
+		nvh->nvh_encoding = encoding;
+		nvh->nvh_endian = nvl_endian = host_endian;
+		nvh->nvh_reserved1 = 0;
+		nvh->nvh_reserved2 = 0;
+		break;
+
+	case NVS_OP_DECODE:
+		if (buf == NULL || *buflen < sizeof (nvs_header_t))
+			return (EINVAL);
+
+		/* get method of encoding from first byte */
+		encoding = nvh->nvh_encoding;
+		nvl_endian = nvh->nvh_endian;
+		break;
+
+	case NVS_OP_GETSIZE:
+		nvl_endian = host_endian;
+
+		/*
+		 * add the size for encoding
+		 */
+		*buflen = sizeof (nvs_header_t);
+		break;
+
+	default:
+		return (ENOTSUP);
+	}
+
+	/*
+	 * Create an nvstream with proper encoding method
+	 */
+	switch (encoding) {
+	case NV_ENCODE_NATIVE:
+		/*
+		 * check endianness, in case we are unpacking
+		 * from a file
+		 */
+		if (nvl_endian != host_endian)
+			return (ENOTSUP);
+		err = nvs_native(&nvs, nvl, buf, buflen);
+		break;
+	case NV_ENCODE_XDR:
+		err = nvs_xdr(&nvs, nvl, buf, buflen);
+		break;
+	default:
+		err = ENOTSUP;
+		break;
+	}
+
+	return (err);
+}
+
+int
+nvlist_size(nvlist_t *nvl, size_t *size, int encoding)
+{
+	return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE));
+}
+
+/*
+ * Pack nvlist into contiguous memory
+ */
+/*ARGSUSED1*/
+int
+nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
+    int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+	return (nvlist_xpack(nvl, bufp, buflen, encoding,
+	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+	return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
+    nv_alloc_t *nva)
+{
+	nvpriv_t nvpriv;
+	size_t alloc_size;
+	char *buf;
+	int err;
+
+	if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL)
+		return (EINVAL);
+
+	if (*bufp != NULL)
+		return (nvlist_common(nvl, *bufp, buflen, encoding,
+		    NVS_OP_ENCODE));
+
+	/*
+	 * Here is a difficult situation:
+	 * 1. The nvlist has fixed allocator properties.
+	 *    All other nvlist routines (like nvlist_add_*, ...) use
+	 *    these properties.
+	 * 2. When using nvlist_pack() the user can specify his own
+	 *    allocator properties (e.g. by using KM_NOSLEEP).
+	 *
+	 * We use the user specified properties (2). A clearer solution
+	 * will be to remove the kmflag from nvlist_pack(), but we will
+	 * not change the interface.
+	 */
+	nv_priv_init(&nvpriv, nva, 0);
+
+	if (err = nvlist_size(nvl, &alloc_size, encoding))
+		return (err);
+
+	if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL)
+		return (ENOMEM);
+
+	if ((err = nvlist_common(nvl, buf, &alloc_size, encoding,
+	    NVS_OP_ENCODE)) != 0) {
+		nv_mem_free(&nvpriv, buf, alloc_size);
+	} else {
+		*buflen = alloc_size;
+		*bufp = buf;
+	}
+
+	return (err);
+}
+
+/*
+ * Unpack buf into an nvlist_t
+ */
+/*ARGSUSED1*/
+int
+nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+	return (nvlist_xunpack(buf, buflen, nvlp,
+	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+	return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva)
+{
+	nvlist_t *nvl;
+	int err;
+
+	if (nvlp == NULL)
+		return (EINVAL);
+
+	if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0)
+		return (err);
+
+	if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0)
+		nvlist_free(nvl);
+	else
+		*nvlp = nvl;
+
+	return (err);
+}
+
+/*
+ * Native encoding functions
+ */
+typedef struct {
+	/*
+	 * This structure is used when decoding a packed nvpair in
+	 * the native format.  n_base points to a buffer containing the
+	 * packed nvpair.  n_end is a pointer to the end of the buffer.
+	 * (n_end actually points to the first byte past the end of the
+	 * buffer.)  n_curr is a pointer that lies between n_base and n_end.
+	 * It points to the current data that we are decoding.
+	 * The amount of data left in the buffer is equal to n_end - n_curr.
+	 * n_flag is used to recognize a packed embedded list.
+	 */
+	caddr_t n_base;
+	caddr_t n_end;
+	caddr_t n_curr;
+	uint_t  n_flag;
+} nvs_native_t;
+
+static int
+nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf,
+    size_t buflen)
+{
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+	case NVS_OP_DECODE:
+		nvs->nvs_private = native;
+		native->n_curr = native->n_base = buf;
+		native->n_end = buf + buflen;
+		native->n_flag = 0;
+		return (0);
+
+	case NVS_OP_GETSIZE:
+		nvs->nvs_private = native;
+		native->n_curr = native->n_base = native->n_end = NULL;
+		native->n_flag = 0;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+}
+
+/*ARGSUSED*/
+static void
+nvs_native_destroy(nvstream_t *nvs)
+{
+}
+
+static int
+native_cp(nvstream_t *nvs, void *buf, size_t size)
+{
+	nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+
+	if (native->n_curr + size > native->n_end)
+		return (EFAULT);
+
+	/*
+	 * The bcopy() below eliminates alignment requirement
+	 * on the buffer (stream) and is preferred over direct access.
+	 */
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+		bcopy(buf, native->n_curr, size);
+		break;
+	case NVS_OP_DECODE:
+		bcopy(native->n_curr, buf, size);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	native->n_curr += size;
+	return (0);
+}
+
+/*
+ * operate on nvlist_t header
+ */
+static int
+nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
+{
+	nvs_native_t *native = nvs->nvs_private;
+
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+	case NVS_OP_DECODE:
+		if (native->n_flag)
+			return (0);	/* packed embedded list */
+
+		native->n_flag = 1;
+
+		/* copy version and nvflag of the nvlist_t */
+		if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 ||
+		    native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0)
+			return (EFAULT);
+
+		return (0);
+
+	case NVS_OP_GETSIZE:
+		/*
+		 * if calculate for packed embedded list
+		 * 	4 for end of the embedded list
+		 * else
+		 * 	2 * sizeof (int32_t) for nvl_version and nvl_nvflag
+		 * 	and 4 for end of the entire list
+		 */
+		if (native->n_flag) {
+			*size += 4;
+		} else {
+			native->n_flag = 1;
+			*size += 2 * sizeof (int32_t) + 4;
+		}
+
+		return (0);
+
+	default:
+		return (EINVAL);
+	}
+}
+
+static int
+nvs_native_nvl_fini(nvstream_t *nvs)
+{
+	if (nvs->nvs_op == NVS_OP_ENCODE) {
+		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+		/*
+		 * Add 4 zero bytes at end of nvlist. They are used
+		 * for end detection by the decode routine.
+		 */
+		if (native->n_curr + sizeof (int) > native->n_end)
+			return (EFAULT);
+
+		bzero(native->n_curr, sizeof (int));
+		native->n_curr += sizeof (int);
+	}
+
+	return (0);
+}
+
+static int
+nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp)
+{
+	if (nvs->nvs_op == NVS_OP_ENCODE) {
+		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+		nvlist_t *packed = (void *)
+		    (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
+		/*
+		 * Null out the pointer that is meaningless in the packed
+		 * structure. The address may not be aligned, so we have
+		 * to use bzero.
+		 */
+		bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
+	}
+
+	return (nvs_embedded(nvs, EMBEDDED_NVL(nvp)));
+}
+
+static int
+nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp)
+{
+	if (nvs->nvs_op == NVS_OP_ENCODE) {
+		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+		char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp);
+		size_t len = NVP_NELEM(nvp) * sizeof (uint64_t);
+		nvlist_t *packed = (nvlist_t *)((uintptr_t)value + len);
+		int i;
+		/*
+		 * Null out pointers that are meaningless in the packed
+		 * structure. The addresses may not be aligned, so we have
+		 * to use bzero.
+		 */
+		bzero(value, len);
+
+		for (i = 0; i < NVP_NELEM(nvp); i++, packed++)
+			/*
+			 * Null out the pointer that is meaningless in the
+			 * packed structure. The address may not be aligned,
+			 * so we have to use bzero.
+			 */
+			bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
+	}
+
+	return (nvs_embedded_nvl_array(nvs, nvp, NULL));
+}
+
+static void
+nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp)
+{
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE: {
+		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+		uint64_t *strp = (void *)
+		    (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
+		/*
+		 * Null out pointers that are meaningless in the packed
+		 * structure. The addresses may not be aligned, so we have
+		 * to use bzero.
+		 */
+		bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t));
+		break;
+	}
+	case NVS_OP_DECODE: {
+		char **strp = (void *)NVP_VALUE(nvp);
+		char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t));
+		int i;
+
+		for (i = 0; i < NVP_NELEM(nvp); i++) {
+			strp[i] = buf;
+			buf += strlen(buf) + 1;
+		}
+		break;
+	}
+	}
+}
+
+static int
+nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
+{
+	data_type_t type;
+	int value_sz;
+	int ret = 0;
+
+	/*
+	 * We do the initial bcopy of the data before we look at
+	 * the nvpair type, because when we're decoding, we won't
+	 * have the correct values for the pair until we do the bcopy.
+	 */
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+	case NVS_OP_DECODE:
+		if (native_cp(nvs, nvp, nvp->nvp_size) != 0)
+			return (EFAULT);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	/* verify nvp_name_sz, check the name string length */
+	if (i_validate_nvpair_name(nvp) != 0)
+		return (EFAULT);
+
+	type = NVP_TYPE(nvp);
+
+	/*
+	 * Verify type and nelem and get the value size.
+	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
+	 * is the size of the string(s) excluded.
+	 */
+	if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)
+		return (EFAULT);
+
+	if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)
+		return (EFAULT);
+
+	switch (type) {
+	case DATA_TYPE_NVLIST:
+		ret = nvpair_native_embedded(nvs, nvp);
+		break;
+	case DATA_TYPE_NVLIST_ARRAY:
+		ret = nvpair_native_embedded_array(nvs, nvp);
+		break;
+	case DATA_TYPE_STRING_ARRAY:
+		nvpair_native_string_array(nvs, nvp);
+		break;
+	default:
+		break;
+	}
+
+	return (ret);
+}
+
+static int
+nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+	uint64_t nvp_sz = nvp->nvp_size;
+
+	switch (NVP_TYPE(nvp)) {
+	case DATA_TYPE_NVLIST: {
+		size_t nvsize = 0;
+
+		if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0)
+			return (EINVAL);
+
+		nvp_sz += nvsize;
+		break;
+	}
+	case DATA_TYPE_NVLIST_ARRAY: {
+		size_t nvsize;
+
+		if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0)
+			return (EINVAL);
+
+		nvp_sz += nvsize;
+		break;
+	}
+	default:
+		break;
+	}
+
+	if (nvp_sz > INT32_MAX)
+		return (EINVAL);
+
+	*size = nvp_sz;
+
+	return (0);
+}
+
+static int
+nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+		return (nvs_native_nvp_op(nvs, nvp));
+
+	case NVS_OP_DECODE: {
+		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+		int32_t decode_len;
+
+		/* try to read the size value from the stream */
+		if (native->n_curr + sizeof (int32_t) > native->n_end)
+			return (EFAULT);
+		bcopy(native->n_curr, &decode_len, sizeof (int32_t));
+
+		/* sanity check the size value */
+		if (decode_len < 0 ||
+		    decode_len > native->n_end - native->n_curr)
+			return (EFAULT);
+
+		*size = decode_len;
+
+		/*
+		 * If at the end of the stream then move the cursor
+		 * forward, otherwise nvpair_native_op() will read
+		 * the entire nvpair at the same cursor position.
+		 */
+		if (*size == 0)
+			native->n_curr += sizeof (int32_t);
+		break;
+	}
+
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+static const nvs_ops_t nvs_native_ops = {
+	nvs_native_nvlist,
+	nvs_native_nvpair,
+	nvs_native_nvp_op,
+	nvs_native_nvp_size,
+	nvs_native_nvl_fini
+};
+
+static int
+nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
+{
+	nvs_native_t native;
+	int err;
+
+	nvs->nvs_ops = &nvs_native_ops;
+
+	if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t),
+	    *buflen - sizeof (nvs_header_t))) != 0)
+		return (err);
+
+	err = nvs_operation(nvs, nvl, buflen);
+
+	nvs_native_destroy(nvs);
+
+	return (err);
+}
+
+/*
+ * XDR encoding functions
+ *
+ * An xdr packed nvlist is encoded as:
+ *
+ *  - encoding methode and host endian (4 bytes)
+ *  - nvl_version (4 bytes)
+ *  - nvl_nvflag (4 bytes)
+ *
+ *  - encoded nvpairs, the format of one xdr encoded nvpair is:
+ *	- encoded size of the nvpair (4 bytes)
+ *	- decoded size of the nvpair (4 bytes)
+ *	- name string, (4 + sizeof(NV_ALIGN4(string))
+ *	  a string is coded as size (4 bytes) and data
+ *	- data type (4 bytes)
+ *	- number of elements in the nvpair (4 bytes)
+ *	- data
+ *
+ *  - 2 zero's for end of the entire list (8 bytes)
+ */
+static int
+nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen)
+{
+	/* xdr data must be 4 byte aligned */
+	if ((ulong_t)buf % 4 != 0)
+		return (EFAULT);
+
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+		xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE);
+		nvs->nvs_private = xdr;
+		return (0);
+	case NVS_OP_DECODE:
+		xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE);
+		nvs->nvs_private = xdr;
+		return (0);
+	case NVS_OP_GETSIZE:
+		nvs->nvs_private = NULL;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+}
+
+static void
+nvs_xdr_destroy(nvstream_t *nvs)
+{
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+	case NVS_OP_DECODE:
+		xdr_destroy((XDR *)nvs->nvs_private);
+		break;
+	default:
+		break;
+	}
+}
+
+static int
+nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
+{
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE:
+	case NVS_OP_DECODE: {
+		XDR 	*xdr = nvs->nvs_private;
+
+		if (!xdr_int(xdr, &nvl->nvl_version) ||
+		    !xdr_u_int(xdr, &nvl->nvl_nvflag))
+			return (EFAULT);
+		break;
+	}
+	case NVS_OP_GETSIZE: {
+		/*
+		 * 2 * 4 for nvl_version + nvl_nvflag
+		 * and 8 for end of the entire list
+		 */
+		*size += 2 * 4 + 8;
+		break;
+	}
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+nvs_xdr_nvl_fini(nvstream_t *nvs)
+{
+	if (nvs->nvs_op == NVS_OP_ENCODE) {
+		XDR *xdr = nvs->nvs_private;
+		int zero = 0;
+
+		if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero))
+			return (EFAULT);
+	}
+
+	return (0);
+}
+
+/*
+ * The format of xdr encoded nvpair is:
+ * encode_size, decode_size, name string, data type, nelem, data
+ */
+static int
+nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
+{
+	data_type_t type;
+	char	*buf;
+	char	*buf_end = (char *)nvp + nvp->nvp_size;
+	int	value_sz;
+	uint_t	nelem, buflen;
+	bool_t	ret = FALSE;
+	XDR	*xdr = nvs->nvs_private;
+
+	ASSERT(xdr != NULL && nvp != NULL);
+
+	/* name string */
+	if ((buf = NVP_NAME(nvp)) >= buf_end)
+		return (EFAULT);
+	buflen = buf_end - buf;
+
+	if (!xdr_string(xdr, &buf, buflen - 1))
+		return (EFAULT);
+	nvp->nvp_name_sz = strlen(buf) + 1;
+
+	/* type and nelem */
+	if (!xdr_int(xdr, (int *)&nvp->nvp_type) ||
+	    !xdr_int(xdr, &nvp->nvp_value_elem))
+		return (EFAULT);
+
+	type = NVP_TYPE(nvp);
+	nelem = nvp->nvp_value_elem;
+
+	/*
+	 * Verify type and nelem and get the value size.
+	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
+	 * is the size of the string(s) excluded.
+	 */
+	if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)
+		return (EFAULT);
+
+	/* if there is no data to extract then return */
+	if (nelem == 0)
+		return (0);
+
+	/* value */
+	if ((buf = NVP_VALUE(nvp)) >= buf_end)
+		return (EFAULT);
+	buflen = buf_end - buf;
+
+	if (buflen < value_sz)
+		return (EFAULT);
+
+	switch (type) {
+	case DATA_TYPE_NVLIST:
+		if (nvs_embedded(nvs, (void *)buf) == 0)
+			return (0);
+		break;
+
+	case DATA_TYPE_NVLIST_ARRAY:
+		if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0)
+			return (0);
+		break;
+
+	case DATA_TYPE_BOOLEAN:
+		ret = TRUE;
+		break;
+
+	case DATA_TYPE_BYTE:
+	case DATA_TYPE_INT8:
+	case DATA_TYPE_UINT8:
+		ret = xdr_char(xdr, buf);
+		break;
+
+	case DATA_TYPE_INT16:
+		ret = xdr_short(xdr, (void *)buf);
+		break;
+
+	case DATA_TYPE_UINT16:
+		ret = xdr_u_short(xdr, (void *)buf);
+		break;
+
+	case DATA_TYPE_BOOLEAN_VALUE:
+	case DATA_TYPE_INT32:
+		ret = xdr_int(xdr, (void *)buf);
+		break;
+
+	case DATA_TYPE_UINT32:
+		ret = xdr_u_int(xdr, (void *)buf);
+		break;
+
+	case DATA_TYPE_INT64:
+		ret = xdr_longlong_t(xdr, (void *)buf);
+		break;
+
+	case DATA_TYPE_UINT64:
+		ret = xdr_u_longlong_t(xdr, (void *)buf);
+		break;
+
+	case DATA_TYPE_HRTIME:
+		/*
+		 * NOTE: must expose the definition of hrtime_t here
+		 */
+		ret = xdr_longlong_t(xdr, (void *)buf);
+		break;
+#if !defined(_KERNEL)
+	case DATA_TYPE_DOUBLE:
+		ret = xdr_double(xdr, (void *)buf);
+		break;
+#endif
+	case DATA_TYPE_STRING:
+		ret = xdr_string(xdr, &buf, buflen - 1);
+		break;
+
+	case DATA_TYPE_BYTE_ARRAY:
+		ret = xdr_opaque(xdr, buf, nelem);
+		break;
+
+	case DATA_TYPE_INT8_ARRAY:
+	case DATA_TYPE_UINT8_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t),
+		    (xdrproc_t)xdr_char);
+		break;
+
+	case DATA_TYPE_INT16_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t),
+		    sizeof (int16_t), (xdrproc_t)xdr_short);
+		break;
+
+	case DATA_TYPE_UINT16_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t),
+		    sizeof (uint16_t), (xdrproc_t)xdr_u_short);
+		break;
+
+	case DATA_TYPE_BOOLEAN_ARRAY:
+	case DATA_TYPE_INT32_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t),
+		    sizeof (int32_t), (xdrproc_t)xdr_int);
+		break;
+
+	case DATA_TYPE_UINT32_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t),
+		    sizeof (uint32_t), (xdrproc_t)xdr_u_int);
+		break;
+
+	case DATA_TYPE_INT64_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t),
+		    sizeof (int64_t), (xdrproc_t)xdr_longlong_t);
+		break;
+
+	case DATA_TYPE_UINT64_ARRAY:
+		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t),
+		    sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t);
+		break;
+
+	case DATA_TYPE_STRING_ARRAY: {
+		size_t len = nelem * sizeof (uint64_t);
+		char **strp = (void *)buf;
+		int i;
+
+		if (nvs->nvs_op == NVS_OP_DECODE)
+			bzero(buf, len);	/* don't trust packed data */
+
+		for (i = 0; i < nelem; i++) {
+			if (buflen <= len)
+				return (EFAULT);
+
+			buf += len;
+			buflen -= len;
+
+			if (xdr_string(xdr, &buf, buflen - 1) != TRUE)
+				return (EFAULT);
+
+			if (nvs->nvs_op == NVS_OP_DECODE)
+				strp[i] = buf;
+			len = strlen(buf) + 1;
+		}
+		ret = TRUE;
+		break;
+	}
+	default:
+		break;
+	}
+
+	return (ret == TRUE ? 0 : EFAULT);
+}
+
+static int
+nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+	data_type_t type = NVP_TYPE(nvp);
+	/*
+	 * encode_size + decode_size + name string size + data type + nelem
+	 * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp)))
+	 */
+	uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4;
+
+	switch (type) {
+	case DATA_TYPE_BOOLEAN:
+		break;
+
+	case DATA_TYPE_BOOLEAN_VALUE:
+	case DATA_TYPE_BYTE:
+	case DATA_TYPE_INT8:
+	case DATA_TYPE_UINT8:
+	case DATA_TYPE_INT16:
+	case DATA_TYPE_UINT16:
+	case DATA_TYPE_INT32:
+	case DATA_TYPE_UINT32:
+		nvp_sz += 4;	/* 4 is the minimum xdr unit */
+		break;
+
+	case DATA_TYPE_INT64:
+	case DATA_TYPE_UINT64:
+	case DATA_TYPE_HRTIME:
+#if !defined(_KERNEL)
+	case DATA_TYPE_DOUBLE:
+#endif
+		nvp_sz += 8;
+		break;
+
+	case DATA_TYPE_STRING:
+		nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp)));
+		break;
+
+	case DATA_TYPE_BYTE_ARRAY:
+		nvp_sz += NV_ALIGN4(NVP_NELEM(nvp));
+		break;
+
+	case DATA_TYPE_BOOLEAN_ARRAY:
+	case DATA_TYPE_INT8_ARRAY:
+	case DATA_TYPE_UINT8_ARRAY:
+	case DATA_TYPE_INT16_ARRAY:
+	case DATA_TYPE_UINT16_ARRAY:
+	case DATA_TYPE_INT32_ARRAY:
+	case DATA_TYPE_UINT32_ARRAY:
+		nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp);
+		break;
+
+	case DATA_TYPE_INT64_ARRAY:
+	case DATA_TYPE_UINT64_ARRAY:
+		nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp);
+		break;
+
+	case DATA_TYPE_STRING_ARRAY: {
+		int i;
+		char **strs = (void *)NVP_VALUE(nvp);
+
+		for (i = 0; i < NVP_NELEM(nvp); i++)
+			nvp_sz += 4 + NV_ALIGN4(strlen(strs[i]));
+
+		break;
+	}
+
+	case DATA_TYPE_NVLIST:
+	case DATA_TYPE_NVLIST_ARRAY: {
+		size_t nvsize = 0;
+		int old_nvs_op = nvs->nvs_op;
+		int err;
+
+		nvs->nvs_op = NVS_OP_GETSIZE;
+		if (type == DATA_TYPE_NVLIST)
+			err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize);
+		else
+			err = nvs_embedded_nvl_array(nvs, nvp, &nvsize);
+		nvs->nvs_op = old_nvs_op;
+
+		if (err != 0)
+			return (EINVAL);
+
+		nvp_sz += nvsize;
+		break;
+	}
+
+	default:
+		return (EINVAL);
+	}
+
+	if (nvp_sz > INT32_MAX)
+		return (EINVAL);
+
+	*size = nvp_sz;
+
+	return (0);
+}
+
+
+/*
+ * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates
+ * the largest nvpair that could be encoded in the buffer.
+ *
+ * See comments above nvpair_xdr_op() for the format of xdr encoding.
+ * The size of a xdr packed nvpair without any data is 5 words.
+ *
+ * Using the size of the data directly as an estimate would be ok
+ * in all cases except one.  If the data type is of DATA_TYPE_STRING_ARRAY
+ * then the actual nvpair has space for an array of pointers to index
+ * the strings.  These pointers are not encoded into the packed xdr buffer.
+ *
+ * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are
+ * of length 0, then each string is endcoded in xdr format as a single word.
+ * Therefore when expanded to an nvpair there will be 2.25 word used for
+ * each string.  (a int64_t allocated for pointer usage, and a single char
+ * for the null termination.)
+ *
+ * This is the calculation performed by the NVS_XDR_MAX_LEN macro.
+ */
+#define	NVS_XDR_HDR_LEN		((size_t)(5 * 4))
+#define	NVS_XDR_DATA_LEN(y)	(((size_t)(y) <= NVS_XDR_HDR_LEN) ? \
+					0 : ((size_t)(y) - NVS_XDR_HDR_LEN))
+#define	NVS_XDR_MAX_LEN(x)	(NVP_SIZE_CALC(1, 0) + \
+					(NVS_XDR_DATA_LEN(x) * 2) + \
+					NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4)))
+
+static int
+nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+	XDR 	*xdr = nvs->nvs_private;
+	int32_t	encode_len, decode_len;
+
+	switch (nvs->nvs_op) {
+	case NVS_OP_ENCODE: {
+		size_t nvsize;
+
+		if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0)
+			return (EFAULT);
+
+		decode_len = nvp->nvp_size;
+		encode_len = nvsize;
+		if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
+			return (EFAULT);
+
+		return (nvs_xdr_nvp_op(nvs, nvp));
+	}
+	case NVS_OP_DECODE: {
+		struct xdr_bytesrec bytesrec;
+
+		/* get the encode and decode size */
+		if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
+			return (EFAULT);
+		*size = decode_len;
+
+		/* are we at the end of the stream? */
+		if (*size == 0)
+			return (0);
+
+		/* sanity check the size parameter */
+		if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec))
+			return (EFAULT);
+
+		if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail))
+			return (EFAULT);
+		break;
+	}
+
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static const struct nvs_ops nvs_xdr_ops = {
+	nvs_xdr_nvlist,
+	nvs_xdr_nvpair,
+	nvs_xdr_nvp_op,
+	nvs_xdr_nvp_size,
+	nvs_xdr_nvl_fini
+};
+
+static int
+nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
+{
+	XDR xdr;
+	int err;
+
+	nvs->nvs_ops = &nvs_xdr_ops;
+
+	if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t),
+	    *buflen - sizeof (nvs_header_t))) != 0)
+		return (err);
+
+	err = nvs_operation(nvs, nvl, buflen);
+
+	nvs_xdr_destroy(nvs);
+
+	return (err);
+}
diff --git a/common/nvpair/nvpair_alloc_fixed.c b/common/nvpair/nvpair_alloc_fixed.c
new file mode 100644
index 000000000000..b1128eeb9bc3
--- /dev/null
+++ b/common/nvpair/nvpair_alloc_fixed.c
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/stropts.h>
+#include <sys/isa_defs.h>
+#include <sys/nvpair.h>
+#include <sys/sysmacros.h>
+#if defined(_KERNEL) && !defined(_BOOT)
+#include <sys/varargs.h>
+#else
+#include <stdarg.h>
+#include <strings.h>
+#endif
+
+/*
+ * This allocator is very simple.
+ *  - it uses a pre-allocated buffer for memory allocations.
+ *  - it does _not_ free memory in the pre-allocated buffer.
+ *
+ * The reason for the selected implemention is simplicity.
+ * This allocator is designed for the usage in interrupt context when
+ * the caller may not wait for free memory.
+ */
+
+/* pre-allocated buffer for memory allocations */
+typedef struct nvbuf {
+	uintptr_t	nvb_buf;	/* address of pre-allocated buffer */
+	uintptr_t 	nvb_lim;	/* limit address in the buffer */
+	uintptr_t	nvb_cur;	/* current address in the buffer */
+} nvbuf_t;
+
+/*
+ * Initialize the pre-allocated buffer allocator. The caller needs to supply
+ *
+ *   buf	address of pre-allocated buffer
+ *   bufsz	size of pre-allocated buffer
+ *
+ * nv_fixed_init() calculates the remaining members of nvbuf_t.
+ */
+static int
+nv_fixed_init(nv_alloc_t *nva, va_list valist)
+{
+	uintptr_t base = va_arg(valist, uintptr_t);
+	uintptr_t lim = base + va_arg(valist, size_t);
+	nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t));
+
+	if (base == 0 || (uintptr_t)&nvb[1] > lim)
+		return (EINVAL);
+
+	nvb->nvb_buf = (uintptr_t)&nvb[0];
+	nvb->nvb_cur = (uintptr_t)&nvb[1];
+	nvb->nvb_lim = lim;
+	nva->nva_arg = nvb;
+
+	return (0);
+}
+
+static void *
+nv_fixed_alloc(nv_alloc_t *nva, size_t size)
+{
+	nvbuf_t *nvb = nva->nva_arg;
+	uintptr_t new = nvb->nvb_cur;
+
+	if (size == 0 || new + size > nvb->nvb_lim)
+		return (NULL);
+
+	nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t));
+
+	return ((void *)new);
+}
+
+/*ARGSUSED*/
+static void
+nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size)
+{
+	/* don't free memory in the pre-allocated buffer */
+}
+
+static void
+nv_fixed_reset(nv_alloc_t *nva)
+{
+	nvbuf_t *nvb = nva->nva_arg;
+
+	nvb->nvb_cur = (uintptr_t)&nvb[1];
+}
+
+const nv_alloc_ops_t nv_fixed_ops_def = {
+	nv_fixed_init,	/* nv_ao_init() */
+	NULL,		/* nv_ao_fini() */
+	nv_fixed_alloc,	/* nv_ao_alloc() */
+	nv_fixed_free,	/* nv_ao_free() */
+	nv_fixed_reset	/* nv_ao_reset() */
+};
+
+const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def;
diff --git a/common/unicode/u8_textprep.c b/common/unicode/u8_textprep.c
new file mode 100644
index 000000000000..8faf1a97e47e
--- /dev/null
+++ b/common/unicode/u8_textprep.c
@@ -0,0 +1,2132 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+/*
+ * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458).
+ *
+ * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F),
+ * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also
+ * the section 3C man pages.
+ * Interface stability: Committed.
+ */
+
+#include <sys/types.h>
+#ifdef	_KERNEL
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#else
+#include <sys/u8_textprep.h>
+#include <strings.h>
+#endif	/* _KERNEL */
+#include <sys/byteorder.h>
+#include <sys/errno.h>
+#include <sys/u8_textprep_data.h>
+
+
+/* The maximum possible number of bytes in a UTF-8 character. */
+#define	U8_MB_CUR_MAX			(4)
+
+/*
+ * The maximum number of bytes needed for a UTF-8 character to cover
+ * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2.
+ */
+#define	U8_MAX_BYTES_UCS2		(3)
+
+/* The maximum possible number of bytes in a Stream-Safe Text. */
+#define	U8_STREAM_SAFE_TEXT_MAX		(128)
+
+/*
+ * The maximum number of characters in a combining/conjoining sequence and
+ * the actual upperbound limit of a combining/conjoining sequence.
+ */
+#define	U8_MAX_CHARS_A_SEQ		(32)
+#define	U8_UPPER_LIMIT_IN_A_SEQ		(31)
+
+/* The combining class value for Starter. */
+#define	U8_COMBINING_CLASS_STARTER	(0)
+
+/*
+ * Some Hangul related macros at below.
+ *
+ * The first and the last of Hangul syllables, Hangul Jamo Leading consonants,
+ * Vowels, and optional Trailing consonants in Unicode scalar values.
+ *
+ * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not
+ * the actual U+11A8. This is due to that the trailing consonant is optional
+ * and thus we are doing a pre-calculation of subtracting one.
+ *
+ * Each of 19 modern leading consonants has total 588 possible syllables since
+ * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for
+ * no trailing consonant case, i.e., 21 x 28 = 588.
+ *
+ * We also have bunch of Hangul related macros at below. Please bear in mind
+ * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is
+ * a Hangul Jamo or not but the value does not guarantee that it is a Hangul
+ * Jamo; it just guarantee that it will be most likely.
+ */
+#define	U8_HANGUL_SYL_FIRST		(0xAC00U)
+#define	U8_HANGUL_SYL_LAST		(0xD7A3U)
+
+#define	U8_HANGUL_JAMO_L_FIRST		(0x1100U)
+#define	U8_HANGUL_JAMO_L_LAST		(0x1112U)
+#define	U8_HANGUL_JAMO_V_FIRST		(0x1161U)
+#define	U8_HANGUL_JAMO_V_LAST		(0x1175U)
+#define	U8_HANGUL_JAMO_T_FIRST		(0x11A7U)
+#define	U8_HANGUL_JAMO_T_LAST		(0x11C2U)
+
+#define	U8_HANGUL_V_COUNT		(21)
+#define	U8_HANGUL_VT_COUNT		(588)
+#define	U8_HANGUL_T_COUNT		(28)
+
+#define	U8_HANGUL_JAMO_1ST_BYTE		(0xE1U)
+
+#define	U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \
+	(s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \
+	(s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \
+	(s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU));
+
+#define	U8_HANGUL_JAMO_L(u) \
+	((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST)
+
+#define	U8_HANGUL_JAMO_V(u) \
+	((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST)
+
+#define	U8_HANGUL_JAMO_T(u) \
+	((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
+
+#define	U8_HANGUL_JAMO(u) \
+	((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
+
+#define	U8_HANGUL_SYLLABLE(u) \
+	((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST)
+
+#define	U8_HANGUL_COMPOSABLE_L_V(s, u) \
+	((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u)))
+
+#define	U8_HANGUL_COMPOSABLE_LV_T(s, u) \
+	((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u)))
+
+/* The types of decomposition mappings. */
+#define	U8_DECOMP_BOTH			(0xF5U)
+#define	U8_DECOMP_CANONICAL		(0xF6U)
+
+/* The indicator for 16-bit table. */
+#define	U8_16BIT_TABLE_INDICATOR	(0x8000U)
+
+/* The following are some convenience macros. */
+#define	U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3) \
+	(u) = ((uint32_t)(b1) & 0x0F) << 12 | ((uint32_t)(b2) & 0x3F) << 6 | \
+		(uint32_t)(b3) & 0x3F;
+
+#define	U8_SIMPLE_SWAP(a, b, t) \
+	(t) = (a); \
+	(a) = (b); \
+	(b) = (t);
+
+#define	U8_ASCII_TOUPPER(c) \
+	(((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c))
+
+#define	U8_ASCII_TOLOWER(c) \
+	(((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c))
+
+#define	U8_ISASCII(c)			(((uchar_t)(c)) < 0x80U)
+/*
+ * The following macro assumes that the two characters that are to be
+ * swapped are adjacent to each other and 'a' comes before 'b'.
+ *
+ * If the assumptions are not met, then, the macro will fail.
+ */
+#define	U8_SWAP_COMB_MARKS(a, b) \
+	for (k = 0; k < disp[(a)]; k++) \
+		u8t[k] = u8s[start[(a)] + k]; \
+	for (k = 0; k < disp[(b)]; k++) \
+		u8s[start[(a)] + k] = u8s[start[(b)] + k]; \
+	start[(b)] = start[(a)] + disp[(b)]; \
+	for (k = 0; k < disp[(a)]; k++) \
+		u8s[start[(b)] + k] = u8t[k]; \
+	U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \
+	U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc);
+
+/* The possible states during normalization. */
+typedef enum {
+	U8_STATE_START = 0,
+	U8_STATE_HANGUL_L = 1,
+	U8_STATE_HANGUL_LV = 2,
+	U8_STATE_HANGUL_LVT = 3,
+	U8_STATE_HANGUL_V = 4,
+	U8_STATE_HANGUL_T = 5,
+	U8_STATE_COMBINING_MARK = 6
+} u8_normalization_states_t;
+
+/*
+ * The three vectors at below are used to check bytes of a given UTF-8
+ * character are valid and not containing any malformed byte values.
+ *
+ * We used to have a quite relaxed UTF-8 binary representation but then there
+ * was some security related issues and so the Unicode Consortium defined
+ * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it
+ * one more time at the Unicode 3.2. The following three tables are based on
+ * that.
+ */
+
+#define	U8_ILLEGAL_NEXT_BYTE_COMMON(c)	((c) < 0x80 || (c) > 0xBF)
+
+#define	I_				U8_ILLEGAL_CHAR
+#define	O_				U8_OUT_OF_RANGE_CHAR
+
+const int8_t u8_number_of_bytes[0x100] = {
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+
+/*	80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*  	90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*  	A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*	B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF  */
+	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/*	C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF  */
+	I_, I_, 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+
+/*	D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF  */
+	2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+
+/*	E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF  */
+	3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+
+/*	F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF  */
+	4,  4,  4,  4,  4,  O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_,
+};
+
+#undef	I_
+#undef	O_
+
+const uint8_t u8_valid_min_2nd_byte[0x100] = {
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+/*	C0    C1    C2    C3    C4    C5    C6    C7    */
+	0,    0,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	C8    C9    CA    CB    CC    CD    CE    CF    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	D0    D1    D2    D3    D4    D5    D6    D7    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	D8    D9    DA    DB    DC    DD    DE    DF    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	E0    E1    E2    E3    E4    E5    E6    E7    */
+	0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	E8    E9    EA    EB    EC    ED    EE    EF    */
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/*	F0    F1    F2    F3    F4    F5    F6    F7    */
+	0x90, 0x80, 0x80, 0x80, 0x80, 0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+};
+
+const uint8_t u8_valid_max_2nd_byte[0x100] = {
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+/*	C0    C1    C2    C3    C4    C5    C6    C7    */
+	0,    0,    0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	C8    C9    CA    CB    CC    CD    CE    CF    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	D0    D1    D2    D3    D4    D5    D6    D7    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	D8    D9    DA    DB    DC    DD    DE    DF    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	E0    E1    E2    E3    E4    E5    E6    E7    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/*	E8    E9    EA    EB    EC    ED    EE    EF    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
+/*	F0    F1    F2    F3    F4    F5    F6    F7    */
+	0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0,    0,    0,
+	0,    0,    0,    0,    0,    0,    0,    0,
+};
+
+
+/*
+ * The u8_validate() validates on the given UTF-8 character string and
+ * calculate the byte length. It is quite similar to mblen(3C) except that
+ * this will validate against the list of characters if required and
+ * specific to UTF-8 and Unicode.
+ */
+int
+u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum)
+{
+	uchar_t *ib;
+	uchar_t *ibtail;
+	uchar_t **p;
+	uchar_t *s1;
+	uchar_t *s2;
+	uchar_t f;
+	int sz;
+	size_t i;
+	int ret_val;
+	boolean_t second;
+	boolean_t no_need_to_validate_entire;
+	boolean_t check_additional;
+	boolean_t validate_ucs2_range_only;
+
+	if (! u8str)
+		return (0);
+
+	ib = (uchar_t *)u8str;
+	ibtail = ib + n;
+
+	ret_val = 0;
+
+	no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE);
+	check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL;
+	validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE;
+
+	while (ib < ibtail) {
+		/*
+		 * The first byte of a UTF-8 character tells how many
+		 * bytes will follow for the character. If the first byte
+		 * is an illegal byte value or out of range value, we just
+		 * return -1 with an appropriate error number.
+		 */
+		sz = u8_number_of_bytes[*ib];
+		if (sz == U8_ILLEGAL_CHAR) {
+			*errnum = EILSEQ;
+			return (-1);
+		}
+
+		if (sz == U8_OUT_OF_RANGE_CHAR ||
+		    (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) {
+			*errnum = ERANGE;
+			return (-1);
+		}
+
+		/*
+		 * If we don't have enough bytes to check on, that's also
+		 * an error. As you can see, we give illegal byte sequence
+		 * checking higher priority then EINVAL cases.
+		 */
+		if ((ibtail - ib) < sz) {
+			*errnum = EINVAL;
+			return (-1);
+		}
+
+		if (sz == 1) {
+			ib++;
+			ret_val++;
+		} else {
+			/*
+			 * Check on the multi-byte UTF-8 character. For more
+			 * details on this, see comment added for the used
+			 * data structures at the beginning of the file.
+			 */
+			f = *ib++;
+			ret_val++;
+			second = B_TRUE;
+			for (i = 1; i < sz; i++) {
+				if (second) {
+					if (*ib < u8_valid_min_2nd_byte[f] ||
+					    *ib > u8_valid_max_2nd_byte[f]) {
+						*errnum = EILSEQ;
+						return (-1);
+					}
+					second = B_FALSE;
+				} else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) {
+					*errnum = EILSEQ;
+					return (-1);
+				}
+				ib++;
+				ret_val++;
+			}
+		}
+
+		if (check_additional) {
+			for (p = (uchar_t **)list, i = 0; p[i]; i++) {
+				s1 = ib - sz;
+				s2 = p[i];
+				while (s1 < ib) {
+					if (*s1 != *s2 || *s2 == '\0')
+						break;
+					s1++;
+					s2++;
+				}
+
+				if (s1 >= ib && *s2 == '\0') {
+					*errnum = EBADF;
+					return (-1);
+				}
+			}
+		}
+
+		if (no_need_to_validate_entire)
+			break;
+	}
+
+	return (ret_val);
+}
+
+/*
+ * The do_case_conv() looks at the mapping tables and returns found
+ * bytes if any. If not found, the input bytes are returned. The function
+ * always terminate the return bytes with a null character assuming that
+ * there are plenty of room to do so.
+ *
+ * The case conversions are simple case conversions mapping a character to
+ * another character as specified in the Unicode data. The byte size of
+ * the mapped character could be different from that of the input character.
+ *
+ * The return value is the byte length of the returned character excluding
+ * the terminating null byte.
+ */
+static size_t
+do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper)
+{
+	size_t i;
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b3_tbl;
+	uint16_t b3_base;
+	uint16_t b4 = 0;
+	size_t start_id;
+	size_t end_id;
+
+	/*
+	 * At this point, the only possible values for sz are 2, 3, and 4.
+	 * The u8s should point to a vector that is well beyond the size of
+	 * 5 bytes.
+	 */
+	if (sz == 2) {
+		b3 = u8s[0] = s[0];
+		b4 = u8s[1] = s[1];
+	} else if (sz == 3) {
+		b2 = u8s[0] = s[0];
+		b3 = u8s[1] = s[1];
+		b4 = u8s[2] = s[2];
+	} else if (sz == 4) {
+		b1 = u8s[0] = s[0];
+		b2 = u8s[1] = s[1];
+		b3 = u8s[2] = s[2];
+		b4 = u8s[3] = s[3];
+	} else {
+		/* This is not possible but just in case as a fallback. */
+		if (is_it_toupper)
+			*u8s = U8_ASCII_TOUPPER(*s);
+		else
+			*u8s = U8_ASCII_TOLOWER(*s);
+		u8s[1] = '\0';
+
+		return (1);
+	}
+	u8s[sz] = '\0';
+
+	/*
+	 * Let's find out if we have a corresponding character.
+	 */
+	b1 = u8_common_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	b2 = u8_case_common_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	if (is_it_toupper) {
+		b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id;
+		if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+			return ((size_t)sz);
+
+		start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1];
+
+		/* Either there is no match or an error at the table. */
+		if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
+			return ((size_t)sz);
+
+		b3_base = u8_toupper_b3_tbl[uv][b2][b3].base;
+
+		for (i = 0; start_id < end_id; start_id++)
+			u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id];
+	} else {
+		b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id;
+		if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+			return ((size_t)sz);
+
+		start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1];
+
+		if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
+			return ((size_t)sz);
+
+		b3_base = u8_tolower_b3_tbl[uv][b2][b3].base;
+
+		for (i = 0; start_id < end_id; start_id++)
+			u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id];
+	}
+
+	/*
+	 * If i is still zero, that means there is no corresponding character.
+	 */
+	if (i == 0)
+		return ((size_t)sz);
+
+	u8s[i] = '\0';
+
+	return (i);
+}
+
+/*
+ * The do_case_compare() function compares the two input strings, s1 and s2,
+ * one character at a time doing case conversions if applicable and return
+ * the comparison result as like strcmp().
+ *
+ * Since, in empirical sense, most of text data are 7-bit ASCII characters,
+ * we treat the 7-bit ASCII characters as a special case trying to yield
+ * faster processing time.
+ */
+static int
+do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1,
+	size_t n2, boolean_t is_it_toupper, int *errnum)
+{
+	int f;
+	int sz1;
+	int sz2;
+	size_t j;
+	size_t i1;
+	size_t i2;
+	uchar_t u8s1[U8_MB_CUR_MAX + 1];
+	uchar_t u8s2[U8_MB_CUR_MAX + 1];
+
+	i1 = i2 = 0;
+	while (i1 < n1 && i2 < n2) {
+		/*
+		 * Find out what would be the byte length for this UTF-8
+		 * character at string s1 and also find out if this is
+		 * an illegal start byte or not and if so, issue a proper
+		 * error number and yet treat this byte as a character.
+		 */
+		sz1 = u8_number_of_bytes[*s1];
+		if (sz1 < 0) {
+			*errnum = EILSEQ;
+			sz1 = 1;
+		}
+
+		/*
+		 * For 7-bit ASCII characters mainly, we do a quick case
+		 * conversion right at here.
+		 *
+		 * If we don't have enough bytes for this character, issue
+		 * an EINVAL error and use what are available.
+		 *
+		 * If we have enough bytes, find out if there is
+		 * a corresponding uppercase character and if so, copy over
+		 * the bytes for a comparison later. If there is no
+		 * corresponding uppercase character, then, use what we have
+		 * for the comparison.
+		 */
+		if (sz1 == 1) {
+			if (is_it_toupper)
+				u8s1[0] = U8_ASCII_TOUPPER(*s1);
+			else
+				u8s1[0] = U8_ASCII_TOLOWER(*s1);
+			s1++;
+			u8s1[1] = '\0';
+		} else if ((i1 + sz1) > n1) {
+			*errnum = EINVAL;
+			for (j = 0; (i1 + j) < n1; )
+				u8s1[j++] = *s1++;
+			u8s1[j] = '\0';
+		} else {
+			(void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper);
+			s1 += sz1;
+		}
+
+		/* Do the same for the string s2. */
+		sz2 = u8_number_of_bytes[*s2];
+		if (sz2 < 0) {
+			*errnum = EILSEQ;
+			sz2 = 1;
+		}
+
+		if (sz2 == 1) {
+			if (is_it_toupper)
+				u8s2[0] = U8_ASCII_TOUPPER(*s2);
+			else
+				u8s2[0] = U8_ASCII_TOLOWER(*s2);
+			s2++;
+			u8s2[1] = '\0';
+		} else if ((i2 + sz2) > n2) {
+			*errnum = EINVAL;
+			for (j = 0; (i2 + j) < n2; )
+				u8s2[j++] = *s2++;
+			u8s2[j] = '\0';
+		} else {
+			(void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper);
+			s2 += sz2;
+		}
+
+		/* Now compare the two characters. */
+		if (sz1 == 1 && sz2 == 1) {
+			if (*u8s1 > *u8s2)
+				return (1);
+			if (*u8s1 < *u8s2)
+				return (-1);
+		} else {
+			f = strcmp((const char *)u8s1, (const char *)u8s2);
+			if (f != 0)
+				return (f);
+		}
+
+		/*
+		 * They were the same. Let's move on to the next
+		 * characters then.
+		 */
+		i1 += sz1;
+		i2 += sz2;
+	}
+
+	/*
+	 * We compared until the end of either or both strings.
+	 *
+	 * If we reached to or went over the ends for the both, that means
+	 * they are the same.
+	 *
+	 * If we reached only one of the two ends, that means the other string
+	 * has something which then the fact can be used to determine
+	 * the return value.
+	 */
+	if (i1 >= n1) {
+		if (i2 >= n2)
+			return (0);
+		return (-1);
+	}
+	return (1);
+}
+
+/*
+ * The combining_class() function checks on the given bytes and find out
+ * the corresponding Unicode combining class value. The return value 0 means
+ * it is a Starter. Any illegal UTF-8 character will also be treated as
+ * a Starter.
+ */
+static uchar_t
+combining_class(size_t uv, uchar_t *s, size_t sz)
+{
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b4 = 0;
+
+	if (sz == 1 || sz > 4)
+		return (0);
+
+	if (sz == 2) {
+		b3 = s[0];
+		b4 = s[1];
+	} else if (sz == 3) {
+		b2 = s[0];
+		b3 = s[1];
+		b4 = s[2];
+	} else if (sz == 4) {
+		b1 = s[0];
+		b2 = s[1];
+		b3 = s[2];
+		b4 = s[3];
+	}
+
+	b1 = u8_common_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return (0);
+
+	b2 = u8_combining_class_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return (0);
+
+	b3 = u8_combining_class_b3_tbl[uv][b2][b3];
+	if (b3 == U8_TBL_ELEMENT_NOT_DEF)
+		return (0);
+
+	return (u8_combining_class_b4_tbl[uv][b3][b4]);
+}
+
+/*
+ * The do_decomp() function finds out a matching decomposition if any
+ * and return. If there is no match, the input bytes are copied and returned.
+ * The function also checks if there is a Hangul, decomposes it if necessary
+ * and returns.
+ *
+ * To save time, a single byte 7-bit ASCII character should be handled by
+ * the caller.
+ *
+ * The function returns the number of bytes returned sans always terminating
+ * the null byte. It will also return a state that will tell if there was
+ * a Hangul character decomposed which then will be used by the caller.
+ */
+static size_t
+do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
+	boolean_t canonical_decomposition, u8_normalization_states_t *state)
+{
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b3_tbl;
+	uint16_t b3_base;
+	uint16_t b4 = 0;
+	size_t start_id;
+	size_t end_id;
+	size_t i;
+	uint32_t u1;
+
+	if (sz == 2) {
+		b3 = u8s[0] = s[0];
+		b4 = u8s[1] = s[1];
+		u8s[2] = '\0';
+	} else if (sz == 3) {
+		/* Convert it to a Unicode scalar value. */
+		U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]);
+
+		/*
+		 * If this is a Hangul syllable, we decompose it into
+		 * a leading consonant, a vowel, and an optional trailing
+		 * consonant and then return.
+		 */
+		if (U8_HANGUL_SYLLABLE(u1)) {
+			u1 -= U8_HANGUL_SYL_FIRST;
+
+			b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT;
+			b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT)
+			    / U8_HANGUL_T_COUNT;
+			b3 = u1 % U8_HANGUL_T_COUNT;
+
+			U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1);
+			U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2);
+			if (b3) {
+				b3 += U8_HANGUL_JAMO_T_FIRST;
+				U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3);
+
+				u8s[9] = '\0';
+				*state = U8_STATE_HANGUL_LVT;
+				return (9);
+			}
+
+			u8s[6] = '\0';
+			*state = U8_STATE_HANGUL_LV;
+			return (6);
+		}
+
+		b2 = u8s[0] = s[0];
+		b3 = u8s[1] = s[1];
+		b4 = u8s[2] = s[2];
+		u8s[3] = '\0';
+
+		/*
+		 * If this is a Hangul Jamo, we know there is nothing
+		 * further that we can decompose.
+		 */
+		if (U8_HANGUL_JAMO_L(u1)) {
+			*state = U8_STATE_HANGUL_L;
+			return (3);
+		}
+
+		if (U8_HANGUL_JAMO_V(u1)) {
+			if (*state == U8_STATE_HANGUL_L)
+				*state = U8_STATE_HANGUL_LV;
+			else
+				*state = U8_STATE_HANGUL_V;
+			return (3);
+		}
+
+		if (U8_HANGUL_JAMO_T(u1)) {
+			if (*state == U8_STATE_HANGUL_LV)
+				*state = U8_STATE_HANGUL_LVT;
+			else
+				*state = U8_STATE_HANGUL_T;
+			return (3);
+		}
+	} else if (sz == 4) {
+		b1 = u8s[0] = s[0];
+		b2 = u8s[1] = s[1];
+		b3 = u8s[2] = s[2];
+		b4 = u8s[3] = s[3];
+		u8s[4] = '\0';
+	} else {
+		/*
+		 * This is a fallback and should not happen if the function
+		 * was called properly.
+		 */
+		u8s[0] = s[0];
+		u8s[1] = '\0';
+		*state = U8_STATE_START;
+		return (1);
+	}
+
+	/*
+	 * At this point, this rountine does not know what it would get.
+	 * The caller should sort it out if the state isn't a Hangul one.
+	 */
+	*state = U8_STATE_START;
+
+	/* Try to find matching decomposition mapping byte sequence. */
+	b1 = u8_common_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	b2 = u8_decomp_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id;
+	if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+		return ((size_t)sz);
+
+	/*
+	 * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR
+	 * which is 0x8000, this means we couldn't fit the mappings into
+	 * the cardinality of a unsigned byte.
+	 */
+	if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
+		b3_tbl -= U8_16BIT_TABLE_INDICATOR;
+		start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4];
+		end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
+	} else {
+		start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1];
+	}
+
+	/* This also means there wasn't any matching decomposition. */
+	if (start_id >= end_id)
+		return ((size_t)sz);
+
+	/*
+	 * The final table for decomposition mappings has three types of
+	 * byte sequences depending on whether a mapping is for compatibility
+	 * decomposition, canonical decomposition, or both like the following:
+	 *
+	 * (1) Compatibility decomposition mappings:
+	 *
+	 *	+---+---+-...-+---+
+	 *	| B0| B1| ... | Bm|
+	 *	+---+---+-...-+---+
+	 *
+	 *	The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH).
+	 *
+	 * (2) Canonical decomposition mappings:
+	 *
+	 *	+---+---+---+-...-+---+
+	 *	| T | b0| b1| ... | bn|
+	 *	+---+---+---+-...-+---+
+	 *
+	 *	where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL).
+	 *
+	 * (3) Both mappings:
+	 *
+	 *	+---+---+---+---+-...-+---+---+---+-...-+---+
+	 *	| T | D | b0| b1| ... | bn| B0| B1| ... | Bm|
+	 *	+---+---+---+---+-...-+---+---+---+-...-+---+
+	 *
+	 *	where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement
+	 *	byte, b0 to bn are canonical mapping bytes and B0 to Bm are
+	 *	compatibility mapping bytes.
+	 *
+	 * Note that compatibility decomposition means doing recursive
+	 * decompositions using both compatibility decomposition mappings and
+	 * canonical decomposition mappings. On the other hand, canonical
+	 * decomposition means doing recursive decompositions using only
+	 * canonical decomposition mappings. Since the table we have has gone
+	 * through the recursions already, we do not need to do so during
+	 * runtime, i.e., the table has been completely flattened out
+	 * already.
+	 */
+
+	b3_base = u8_decomp_b3_tbl[uv][b2][b3].base;
+
+	/* Get the type, T, of the byte sequence. */
+	b1 = u8_decomp_final_tbl[uv][b3_base + start_id];
+
+	/*
+	 * If necessary, adjust start_id, end_id, or both. Note that if
+	 * this is compatibility decomposition mapping, there is no
+	 * adjustment.
+	 */
+	if (canonical_decomposition) {
+		/* Is the mapping only for compatibility decomposition? */
+		if (b1 < U8_DECOMP_BOTH)
+			return ((size_t)sz);
+
+		start_id++;
+
+		if (b1 == U8_DECOMP_BOTH) {
+			end_id = start_id +
+			    u8_decomp_final_tbl[uv][b3_base + start_id];
+			start_id++;
+		}
+	} else {
+		/*
+		 * Unless this is a compatibility decomposition mapping,
+		 * we adjust the start_id.
+		 */
+		if (b1 == U8_DECOMP_BOTH) {
+			start_id++;
+			start_id += u8_decomp_final_tbl[uv][b3_base + start_id];
+		} else if (b1 == U8_DECOMP_CANONICAL) {
+			start_id++;
+		}
+	}
+
+	for (i = 0; start_id < end_id; start_id++)
+		u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id];
+	u8s[i] = '\0';
+
+	return (i);
+}
+
+/*
+ * The find_composition_start() function uses the character bytes given and
+ * find out the matching composition mappings if any and return the address
+ * to the composition mappings as explained in the do_composition().
+ */
+static uchar_t *
+find_composition_start(size_t uv, uchar_t *s, size_t sz)
+{
+	uint16_t b1 = 0;
+	uint16_t b2 = 0;
+	uint16_t b3 = 0;
+	uint16_t b3_tbl;
+	uint16_t b3_base;
+	uint16_t b4 = 0;
+	size_t start_id;
+	size_t end_id;
+
+	if (sz == 1) {
+		b4 = s[0];
+	} else if (sz == 2) {
+		b3 = s[0];
+		b4 = s[1];
+	} else if (sz == 3) {
+		b2 = s[0];
+		b3 = s[1];
+		b4 = s[2];
+	} else if (sz == 4) {
+		b1 = s[0];
+		b2 = s[1];
+		b3 = s[2];
+		b4 = s[3];
+	} else {
+		/*
+		 * This is a fallback and should not happen if the function
+		 * was called properly.
+		 */
+		return (NULL);
+	}
+
+	b1 = u8_composition_b1_tbl[uv][b1];
+	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+		return (NULL);
+
+	b2 = u8_composition_b2_tbl[uv][b1][b2];
+	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+		return (NULL);
+
+	b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id;
+	if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+		return (NULL);
+
+	if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
+		b3_tbl -= U8_16BIT_TABLE_INDICATOR;
+		start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4];
+		end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
+	} else {
+		start_id = u8_composition_b4_tbl[uv][b3_tbl][b4];
+		end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1];
+	}
+
+	if (start_id >= end_id)
+		return (NULL);
+
+	b3_base = u8_composition_b3_tbl[uv][b2][b3].base;
+
+	return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id]));
+}
+
+/*
+ * The blocked() function checks on the combining class values of previous
+ * characters in this sequence and return whether it is blocked or not.
+ */
+static boolean_t
+blocked(uchar_t *comb_class, size_t last)
+{
+	uchar_t my_comb_class;
+	size_t i;
+
+	my_comb_class = comb_class[last];
+	for (i = 1; i < last; i++)
+		if (comb_class[i] >= my_comb_class ||
+		    comb_class[i] == U8_COMBINING_CLASS_STARTER)
+			return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * The do_composition() reads the character string pointed by 's' and
+ * do necessary canonical composition and then copy over the result back to
+ * the 's'.
+ *
+ * The input argument 's' cannot contain more than 32 characters.
+ */
+static size_t
+do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
+	uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
+{
+	uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t tc[U8_MB_CUR_MAX];
+	uint8_t saved_marks[U8_MAX_CHARS_A_SEQ];
+	size_t saved_marks_count;
+	uchar_t *p;
+	uchar_t *saved_p;
+	uchar_t *q;
+	size_t i;
+	size_t saved_i;
+	size_t j;
+	size_t k;
+	size_t l;
+	size_t C;
+	size_t saved_l;
+	size_t size;
+	uint32_t u1;
+	uint32_t u2;
+	boolean_t match_not_found = B_TRUE;
+
+	/*
+	 * This should never happen unless the callers are doing some strange
+	 * and unexpected things.
+	 *
+	 * The "last" is the index pointing to the last character not last + 1.
+	 */
+	if (last >= U8_MAX_CHARS_A_SEQ)
+		last = U8_UPPER_LIMIT_IN_A_SEQ;
+
+	for (i = l = 0; i <= last; i++) {
+		/*
+		 * The last or any non-Starters at the beginning, we don't
+		 * have any chance to do composition and so we just copy them
+		 * to the temporary buffer.
+		 */
+		if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) {
+SAVE_THE_CHAR:
+			p = s + start[i];
+			size = disp[i];
+			for (k = 0; k < size; k++)
+				t[l++] = *p++;
+			continue;
+		}
+
+		/*
+		 * If this could be a start of Hangul Jamos, then, we try to
+		 * conjoin them.
+		 */
+		if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) {
+			U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]],
+			    s[start[i] + 1], s[start[i] + 2]);
+			U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3],
+			    s[start[i] + 4], s[start[i] + 5]);
+
+			if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) {
+				u1 -= U8_HANGUL_JAMO_L_FIRST;
+				u2 -= U8_HANGUL_JAMO_V_FIRST;
+				u1 = U8_HANGUL_SYL_FIRST +
+				    (u1 * U8_HANGUL_V_COUNT + u2) *
+				    U8_HANGUL_T_COUNT;
+
+				i += 2;
+				if (i <= last) {
+					U8_PUT_3BYTES_INTO_UTF32(u2,
+					    s[start[i]], s[start[i] + 1],
+					    s[start[i] + 2]);
+
+					if (U8_HANGUL_JAMO_T(u2)) {
+						u1 += u2 -
+						    U8_HANGUL_JAMO_T_FIRST;
+						i++;
+					}
+				}
+
+				U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1);
+				i--;
+				l += 3;
+				continue;
+			}
+		}
+
+		/*
+		 * Let's then find out if this Starter has composition
+		 * mapping.
+		 */
+		p = find_composition_start(uv, s + start[i], disp[i]);
+		if (p == NULL)
+			goto SAVE_THE_CHAR;
+
+		/*
+		 * We have a Starter with composition mapping and the next
+		 * character is a non-Starter. Let's try to find out if
+		 * we can do composition.
+		 */
+
+		saved_p = p;
+		saved_i = i;
+		saved_l = l;
+		saved_marks_count = 0;
+
+TRY_THE_NEXT_MARK:
+		q = s + start[++i];
+		size = disp[i];
+
+		/*
+		 * The next for() loop compares the non-Starter pointed by
+		 * 'q' with the possible (joinable) characters pointed by 'p'.
+		 *
+		 * The composition final table entry pointed by the 'p'
+		 * looks like the following:
+		 *
+		 * +---+---+---+-...-+---+---+---+---+-...-+---+---+
+		 * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F |
+		 * +---+---+---+-...-+---+---+---+---+-...-+---+---+
+		 *
+		 * where C is the count byte indicating the number of
+		 * mapping pairs where each pair would be look like
+		 * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second
+		 * character of a canonical decomposition and the B0-Bm are
+		 * the bytes of a matching composite character. The F is
+		 * a filler byte after each character as the separator.
+		 */
+
+		match_not_found = B_TRUE;
+
+		for (C = *p++; C > 0; C--) {
+			for (k = 0; k < size; p++, k++)
+				if (*p != q[k])
+					break;
+
+			/* Have we found it? */
+			if (k >= size && *p == U8_TBL_ELEMENT_FILLER) {
+				match_not_found = B_FALSE;
+
+				l = saved_l;
+
+				while (*++p != U8_TBL_ELEMENT_FILLER)
+					t[l++] = *p;
+
+				break;
+			}
+
+			/* We didn't find; skip to the next pair. */
+			if (*p != U8_TBL_ELEMENT_FILLER)
+				while (*++p != U8_TBL_ELEMENT_FILLER)
+					;
+			while (*++p != U8_TBL_ELEMENT_FILLER)
+				;
+			p++;
+		}
+
+		/*
+		 * If there was no match, we will need to save the combining
+		 * mark for later appending. After that, if the next one
+		 * is a non-Starter and not blocked, then, we try once
+		 * again to do composition with the next non-Starter.
+		 *
+		 * If there was no match and this was a Starter, then,
+		 * this is a new start.
+		 *
+		 * If there was a match and a composition done and we have
+		 * more to check on, then, we retrieve a new composition final
+		 * table entry for the composite and then try to do the
+		 * composition again.
+		 */
+
+		if (match_not_found) {
+			if (comb_class[i] == U8_COMBINING_CLASS_STARTER) {
+				i--;
+				goto SAVE_THE_CHAR;
+			}
+
+			saved_marks[saved_marks_count++] = i;
+		}
+
+		if (saved_l == l) {
+			while (i < last) {
+				if (blocked(comb_class, i + 1))
+					saved_marks[saved_marks_count++] = ++i;
+				else
+					break;
+			}
+			if (i < last) {
+				p = saved_p;
+				goto TRY_THE_NEXT_MARK;
+			}
+		} else if (i < last) {
+			p = find_composition_start(uv, t + saved_l,
+			    l - saved_l);
+			if (p != NULL) {
+				saved_p = p;
+				goto TRY_THE_NEXT_MARK;
+			}
+		}
+
+		/*
+		 * There is no more composition possible.
+		 *
+		 * If there was no composition what so ever then we copy
+		 * over the original Starter and then append any non-Starters
+		 * remaining at the target string sequentially after that.
+		 */
+
+		if (saved_l == l) {
+			p = s + start[saved_i];
+			size = disp[saved_i];
+			for (j = 0; j < size; j++)
+				t[l++] = *p++;
+		}
+
+		for (k = 0; k < saved_marks_count; k++) {
+			p = s + start[saved_marks[k]];
+			size = disp[saved_marks[k]];
+			for (j = 0; j < size; j++)
+				t[l++] = *p++;
+		}
+	}
+
+	/*
+	 * If the last character is a Starter and if we have a character
+	 * (possibly another Starter) that can be turned into a composite,
+	 * we do so and we do so until there is no more of composition
+	 * possible.
+	 */
+	if (comb_class[last] == U8_COMBINING_CLASS_STARTER) {
+		p = *os;
+		saved_l = l - disp[last];
+
+		while (p < oslast) {
+			size = u8_number_of_bytes[*p];
+			if (size <= 1 || (p + size) > oslast)
+				break;
+
+			saved_p = p;
+
+			for (i = 0; i < size; i++)
+				tc[i] = *p++;
+
+			q = find_composition_start(uv, t + saved_l,
+			    l - saved_l);
+			if (q == NULL) {
+				p = saved_p;
+				break;
+			}
+
+			match_not_found = B_TRUE;
+
+			for (C = *q++; C > 0; C--) {
+				for (k = 0; k < size; q++, k++)
+					if (*q != tc[k])
+						break;
+
+				if (k >= size && *q == U8_TBL_ELEMENT_FILLER) {
+					match_not_found = B_FALSE;
+
+					l = saved_l;
+
+					while (*++q != U8_TBL_ELEMENT_FILLER) {
+						/*
+						 * This is practically
+						 * impossible but we don't
+						 * want to take any chances.
+						 */
+						if (l >=
+						    U8_STREAM_SAFE_TEXT_MAX) {
+							p = saved_p;
+							goto SAFE_RETURN;
+						}
+						t[l++] = *q;
+					}
+
+					break;
+				}
+
+				if (*q != U8_TBL_ELEMENT_FILLER)
+					while (*++q != U8_TBL_ELEMENT_FILLER)
+						;
+				while (*++q != U8_TBL_ELEMENT_FILLER)
+					;
+				q++;
+			}
+
+			if (match_not_found) {
+				p = saved_p;
+				break;
+			}
+		}
+SAFE_RETURN:
+		*os = p;
+	}
+
+	/*
+	 * Now we copy over the temporary string to the target string.
+	 * Since composition always reduces the number of characters or
+	 * the number of characters stay, we don't need to worry about
+	 * the buffer overflow here.
+	 */
+	for (i = 0; i < l; i++)
+		s[i] = t[i];
+	s[l] = '\0';
+
+	return (l);
+}
+
+/*
+ * The collect_a_seq() function checks on the given string s, collect
+ * a sequence of characters at u8s, and return the sequence. While it collects
+ * a sequence, it also applies case conversion, canonical or compatibility
+ * decomposition, canonical decomposition, or some or all of them and
+ * in that order.
+ *
+ * The collected sequence cannot be bigger than 32 characters since if
+ * it is having more than 31 characters, the sequence will be terminated
+ * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into
+ * a Stream-Safe Text. The collected sequence is always terminated with
+ * a null byte and the return value is the byte length of the sequence
+ * including 0. The return value does not include the terminating
+ * null byte.
+ */
+static size_t
+collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
+	boolean_t is_it_toupper,
+	boolean_t is_it_tolower,
+	boolean_t canonical_decomposition,
+	boolean_t compatibility_decomposition,
+	boolean_t canonical_composition,
+	int *errnum, u8_normalization_states_t *state)
+{
+	uchar_t *s;
+	int sz;
+	int saved_sz;
+	size_t i;
+	size_t j;
+	size_t k;
+	size_t l;
+	uchar_t comb_class[U8_MAX_CHARS_A_SEQ];
+	uchar_t disp[U8_MAX_CHARS_A_SEQ];
+	uchar_t start[U8_MAX_CHARS_A_SEQ];
+	uchar_t u8t[U8_MB_CUR_MAX];
+	uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t tc;
+	size_t last;
+	size_t saved_last;
+	uint32_t u1;
+
+	/*
+	 * Save the source string pointer which we will return a changed
+	 * pointer if we do processing.
+	 */
+	s = *source;
+
+	/*
+	 * The following is a fallback for just in case callers are not
+	 * checking the string boundaries before the calling.
+	 */
+	if (s >= slast) {
+		u8s[0] = '\0';
+
+		return (0);
+	}
+
+	/*
+	 * As the first thing, let's collect a character and do case
+	 * conversion if necessary.
+	 */
+
+	sz = u8_number_of_bytes[*s];
+
+	if (sz < 0) {
+		*errnum = EILSEQ;
+
+		u8s[0] = *s++;
+		u8s[1] = '\0';
+
+		*source = s;
+
+		return (1);
+	}
+
+	if (sz == 1) {
+		if (is_it_toupper)
+			u8s[0] = U8_ASCII_TOUPPER(*s);
+		else if (is_it_tolower)
+			u8s[0] = U8_ASCII_TOLOWER(*s);
+		else
+			u8s[0] = *s;
+		s++;
+		u8s[1] = '\0';
+	} else if ((s + sz) > slast) {
+		*errnum = EINVAL;
+
+		for (i = 0; s < slast; )
+			u8s[i++] = *s++;
+		u8s[i] = '\0';
+
+		*source = s;
+
+		return (i);
+	} else {
+		if (is_it_toupper || is_it_tolower) {
+			i = do_case_conv(uv, u8s, s, sz, is_it_toupper);
+			s += sz;
+			sz = i;
+		} else {
+			for (i = 0; i < sz; )
+				u8s[i++] = *s++;
+			u8s[i] = '\0';
+		}
+	}
+
+	/*
+	 * And then canonical/compatibility decomposition followed by
+	 * an optional canonical composition. Please be noted that
+	 * canonical composition is done only when a decomposition is
+	 * done.
+	 */
+	if (canonical_decomposition || compatibility_decomposition) {
+		if (sz == 1) {
+			*state = U8_STATE_START;
+
+			saved_sz = 1;
+
+			comb_class[0] = 0;
+			start[0] = 0;
+			disp[0] = 1;
+
+			last = 1;
+		} else {
+			saved_sz = do_decomp(uv, u8s, u8s, sz,
+			    canonical_decomposition, state);
+
+			last = 0;
+
+			for (i = 0; i < saved_sz; ) {
+				sz = u8_number_of_bytes[u8s[i]];
+
+				comb_class[last] = combining_class(uv,
+				    u8s + i, sz);
+				start[last] = i;
+				disp[last] = sz;
+
+				last++;
+				i += sz;
+			}
+
+			/*
+			 * Decomposition yields various Hangul related
+			 * states but not on combining marks. We need to
+			 * find out at here by checking on the last
+			 * character.
+			 */
+			if (*state == U8_STATE_START) {
+				if (comb_class[last - 1])
+					*state = U8_STATE_COMBINING_MARK;
+			}
+		}
+
+		saved_last = last;
+
+		while (s < slast) {
+			sz = u8_number_of_bytes[*s];
+
+			/*
+			 * If this is an illegal character, an incomplete
+			 * character, or an 7-bit ASCII Starter character,
+			 * then we have collected a sequence; break and let
+			 * the next call deal with the two cases.
+			 *
+			 * Note that this is okay only if you are using this
+			 * function with a fixed length string, not on
+			 * a buffer with multiple calls of one chunk at a time.
+			 */
+			if (sz <= 1) {
+				break;
+			} else if ((s + sz) > slast) {
+				break;
+			} else {
+				/*
+				 * If the previous character was a Hangul Jamo
+				 * and this character is a Hangul Jamo that
+				 * can be conjoined, we collect the Jamo.
+				 */
+				if (*s == U8_HANGUL_JAMO_1ST_BYTE) {
+					U8_PUT_3BYTES_INTO_UTF32(u1,
+					    *s, *(s + 1), *(s + 2));
+
+					if (U8_HANGUL_COMPOSABLE_L_V(*state,
+					    u1)) {
+						i = 0;
+						*state = U8_STATE_HANGUL_LV;
+						goto COLLECT_A_HANGUL;
+					}
+
+					if (U8_HANGUL_COMPOSABLE_LV_T(*state,
+					    u1)) {
+						i = 0;
+						*state = U8_STATE_HANGUL_LVT;
+						goto COLLECT_A_HANGUL;
+					}
+				}
+
+				/*
+				 * Regardless of whatever it was, if this is
+				 * a Starter, we don't collect the character
+				 * since that's a new start and we will deal
+				 * with it at the next time.
+				 */
+				i = combining_class(uv, s, sz);
+				if (i == U8_COMBINING_CLASS_STARTER)
+					break;
+
+				/*
+				 * We know the current character is a combining
+				 * mark. If the previous character wasn't
+				 * a Starter (not Hangul) or a combining mark,
+				 * then, we don't collect this combining mark.
+				 */
+				if (*state != U8_STATE_START &&
+				    *state != U8_STATE_COMBINING_MARK)
+					break;
+
+				*state = U8_STATE_COMBINING_MARK;
+COLLECT_A_HANGUL:
+				/*
+				 * If we collected a Starter and combining
+				 * marks up to 30, i.e., total 31 characters,
+				 * then, we terminate this degenerately long
+				 * combining sequence with a U+034F COMBINING
+				 * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in
+				 * UTF-8 and turn this into a Stream-Safe
+				 * Text. This will be extremely rare but
+				 * possible.
+				 *
+				 * The following will also guarantee that
+				 * we are not writing more than 32 characters
+				 * plus a NULL at u8s[].
+				 */
+				if (last >= U8_UPPER_LIMIT_IN_A_SEQ) {
+TURN_STREAM_SAFE:
+					*state = U8_STATE_START;
+					comb_class[last] = 0;
+					start[last] = saved_sz;
+					disp[last] = 2;
+					last++;
+
+					u8s[saved_sz++] = 0xCD;
+					u8s[saved_sz++] = 0x8F;
+
+					break;
+				}
+
+				/*
+				 * Some combining marks also do decompose into
+				 * another combining mark or marks.
+				 */
+				if (*state == U8_STATE_COMBINING_MARK) {
+					k = last;
+					l = sz;
+					i = do_decomp(uv, uts, s, sz,
+					    canonical_decomposition, state);
+					for (j = 0; j < i; ) {
+						sz = u8_number_of_bytes[uts[j]];
+
+						comb_class[last] =
+						    combining_class(uv,
+						    uts + j, sz);
+						start[last] = saved_sz + j;
+						disp[last] = sz;
+
+						last++;
+						if (last >=
+						    U8_UPPER_LIMIT_IN_A_SEQ) {
+							last = k;
+							goto TURN_STREAM_SAFE;
+						}
+						j += sz;
+					}
+
+					*state = U8_STATE_COMBINING_MARK;
+					sz = i;
+					s += l;
+
+					for (i = 0; i < sz; i++)
+						u8s[saved_sz++] = uts[i];
+				} else {
+					comb_class[last] = i;
+					start[last] = saved_sz;
+					disp[last] = sz;
+					last++;
+
+					for (i = 0; i < sz; i++)
+						u8s[saved_sz++] = *s++;
+				}
+
+				/*
+				 * If this is U+0345 COMBINING GREEK
+				 * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a.,
+				 * iota subscript, and need to be converted to
+				 * uppercase letter, convert it to U+0399 GREEK
+				 * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8),
+				 * i.e., convert to capital adscript form as
+				 * specified in the Unicode standard.
+				 *
+				 * This is the only special case of (ambiguous)
+				 * case conversion at combining marks and
+				 * probably the standard will never have
+				 * anything similar like this in future.
+				 */
+				if (is_it_toupper && sz >= 2 &&
+				    u8s[saved_sz - 2] == 0xCD &&
+				    u8s[saved_sz - 1] == 0x85) {
+					u8s[saved_sz - 2] = 0xCE;
+					u8s[saved_sz - 1] = 0x99;
+				}
+			}
+		}
+
+		/*
+		 * Let's try to ensure a canonical ordering for the collected
+		 * combining marks. We do this only if we have collected
+		 * at least one more non-Starter. (The decomposition mapping
+		 * data tables have fully (and recursively) expanded and
+		 * canonically ordered decompositions.)
+		 *
+		 * The U8_SWAP_COMB_MARKS() convenience macro has some
+		 * assumptions and we are meeting the assumptions.
+		 */
+		last--;
+		if (last >= saved_last) {
+			for (i = 0; i < last; i++)
+				for (j = last; j > i; j--)
+					if (comb_class[j] &&
+					    comb_class[j - 1] > comb_class[j]) {
+						U8_SWAP_COMB_MARKS(j - 1, j);
+					}
+		}
+
+		*source = s;
+
+		if (! canonical_composition) {
+			u8s[saved_sz] = '\0';
+			return (saved_sz);
+		}
+
+		/*
+		 * Now do the canonical composition. Note that we do this
+		 * only after a canonical or compatibility decomposition to
+		 * finish up NFC or NFKC.
+		 */
+		sz = do_composition(uv, u8s, comb_class, start, disp, last,
+		    &s, slast);
+	}
+
+	*source = s;
+
+	return ((size_t)sz);
+}
+
+/*
+ * The do_norm_compare() function does string comparion based on Unicode
+ * simple case mappings and Unicode Normalization definitions.
+ *
+ * It does so by collecting a sequence of character at a time and comparing
+ * the collected sequences from the strings.
+ *
+ * The meanings on the return values are the same as the usual strcmp().
+ */
+static int
+do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
+	int flag, int *errnum)
+{
+	int result;
+	size_t sz1;
+	size_t sz2;
+	uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1];
+	uchar_t *s1last;
+	uchar_t *s2last;
+	boolean_t is_it_toupper;
+	boolean_t is_it_tolower;
+	boolean_t canonical_decomposition;
+	boolean_t compatibility_decomposition;
+	boolean_t canonical_composition;
+	u8_normalization_states_t state;
+
+	s1last = s1 + n1;
+	s2last = s2 + n2;
+
+	is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
+	is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
+	canonical_decomposition = flag & U8_CANON_DECOMP;
+	compatibility_decomposition = flag & U8_COMPAT_DECOMP;
+	canonical_composition = flag & U8_CANON_COMP;
+
+	while (s1 < s1last && s2 < s2last) {
+		/*
+		 * If the current character is a 7-bit ASCII and the last
+		 * character, or, if the current character and the next
+		 * character are both some 7-bit ASCII characters then
+		 * we treat the current character as a sequence.
+		 *
+		 * In any other cases, we need to call collect_a_seq().
+		 */
+
+		if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last ||
+		    ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) {
+			if (is_it_toupper)
+				u8s1[0] = U8_ASCII_TOUPPER(*s1);
+			else if (is_it_tolower)
+				u8s1[0] = U8_ASCII_TOLOWER(*s1);
+			else
+				u8s1[0] = *s1;
+			u8s1[1] = '\0';
+			sz1 = 1;
+			s1++;
+		} else {
+			state = U8_STATE_START;
+			sz1 = collect_a_seq(uv, u8s1, &s1, s1last,
+			    is_it_toupper, is_it_tolower,
+			    canonical_decomposition,
+			    compatibility_decomposition,
+			    canonical_composition, errnum, &state);
+		}
+
+		if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last ||
+		    ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) {
+			if (is_it_toupper)
+				u8s2[0] = U8_ASCII_TOUPPER(*s2);
+			else if (is_it_tolower)
+				u8s2[0] = U8_ASCII_TOLOWER(*s2);
+			else
+				u8s2[0] = *s2;
+			u8s2[1] = '\0';
+			sz2 = 1;
+			s2++;
+		} else {
+			state = U8_STATE_START;
+			sz2 = collect_a_seq(uv, u8s2, &s2, s2last,
+			    is_it_toupper, is_it_tolower,
+			    canonical_decomposition,
+			    compatibility_decomposition,
+			    canonical_composition, errnum, &state);
+		}
+
+		/*
+		 * Now compare the two characters. If they are the same,
+		 * we move on to the next character sequences.
+		 */
+		if (sz1 == 1 && sz2 == 1) {
+			if (*u8s1 > *u8s2)
+				return (1);
+			if (*u8s1 < *u8s2)
+				return (-1);
+		} else {
+			result = strcmp((const char *)u8s1, (const char *)u8s2);
+			if (result != 0)
+				return (result);
+		}
+	}
+
+	/*
+	 * We compared until the end of either or both strings.
+	 *
+	 * If we reached to or went over the ends for the both, that means
+	 * they are the same.
+	 *
+	 * If we reached only one end, that means the other string has
+	 * something which then can be used to determine the return value.
+	 */
+	if (s1 >= s1last) {
+		if (s2 >= s2last)
+			return (0);
+		return (-1);
+	}
+	return (1);
+}
+
+/*
+ * The u8_strcmp() function compares two UTF-8 strings quite similar to
+ * the strcmp(). For the comparison, however, Unicode Normalization specific
+ * equivalency and Unicode simple case conversion mappings based equivalency
+ * can be requested and checked against.
+ */
+int
+u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
+		int *errnum)
+{
+	int f;
+	size_t n1;
+	size_t n2;
+
+	*errnum = 0;
+
+	/*
+	 * Check on the requested Unicode version, case conversion, and
+	 * normalization flag values.
+	 */
+
+	if (uv > U8_UNICODE_LATEST) {
+		*errnum = ERANGE;
+		uv = U8_UNICODE_LATEST;
+	}
+
+	if (flag == 0) {
+		flag = U8_STRCMP_CS;
+	} else {
+		f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER |
+		    U8_STRCMP_CI_LOWER);
+		if (f == 0) {
+			flag |= U8_STRCMP_CS;
+		} else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER &&
+		    f != U8_STRCMP_CI_LOWER) {
+			*errnum = EBADF;
+			flag = U8_STRCMP_CS;
+		}
+
+		f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
+		if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC &&
+		    f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) {
+			*errnum = EBADF;
+			flag = U8_STRCMP_CS;
+		}
+	}
+
+	if (flag == U8_STRCMP_CS) {
+		return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n));
+	}
+
+	n1 = strlen(s1);
+	n2 = strlen(s2);
+	if (n != 0) {
+		if (n < n1)
+			n1 = n;
+		if (n < n2)
+			n2 = n;
+	}
+
+	/*
+	 * Simple case conversion can be done much faster and so we do
+	 * them separately here.
+	 */
+	if (flag == U8_STRCMP_CI_UPPER) {
+		return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
+		    n1, n2, B_TRUE, errnum));
+	} else if (flag == U8_STRCMP_CI_LOWER) {
+		return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
+		    n1, n2, B_FALSE, errnum));
+	}
+
+	return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2,
+	    flag, errnum));
+}
+
+size_t
+u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
+	int flag, size_t unicode_version, int *errnum)
+{
+	int f;
+	int sz;
+	uchar_t *ib;
+	uchar_t *ibtail;
+	uchar_t *ob;
+	uchar_t *obtail;
+	boolean_t do_not_ignore_null;
+	boolean_t do_not_ignore_invalid;
+	boolean_t is_it_toupper;
+	boolean_t is_it_tolower;
+	boolean_t canonical_decomposition;
+	boolean_t compatibility_decomposition;
+	boolean_t canonical_composition;
+	size_t ret_val;
+	size_t i;
+	size_t j;
+	uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1];
+	u8_normalization_states_t state;
+
+	if (unicode_version > U8_UNICODE_LATEST) {
+		*errnum = ERANGE;
+		return ((size_t)-1);
+	}
+
+	f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER);
+	if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) {
+		*errnum = EBADF;
+		return ((size_t)-1);
+	}
+
+	f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
+	if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC &&
+	    f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) {
+		*errnum = EBADF;
+		return ((size_t)-1);
+	}
+
+	if (inarray == NULL || *inlen == 0)
+		return (0);
+
+	if (outarray == NULL) {
+		*errnum = E2BIG;
+		return ((size_t)-1);
+	}
+
+	ib = (uchar_t *)inarray;
+	ob = (uchar_t *)outarray;
+	ibtail = ib + *inlen;
+	obtail = ob + *outlen;
+
+	do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL);
+	do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID);
+	is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
+	is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
+
+	ret_val = 0;
+
+	/*
+	 * If we don't have a normalization flag set, we do the simple case
+	 * conversion based text preparation separately below. Text
+	 * preparation involving Normalization will be done in the false task
+	 * block, again, separately since it will take much more time and
+	 * resource than doing simple case conversions.
+	 */
+	if (f == 0) {
+		while (ib < ibtail) {
+			if (*ib == '\0' && do_not_ignore_null)
+				break;
+
+			sz = u8_number_of_bytes[*ib];
+
+			if (sz < 0) {
+				if (do_not_ignore_invalid) {
+					*errnum = EILSEQ;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				sz = 1;
+				ret_val++;
+			}
+
+			if (sz == 1) {
+				if (ob >= obtail) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if (is_it_toupper)
+					*ob = U8_ASCII_TOUPPER(*ib);
+				else if (is_it_tolower)
+					*ob = U8_ASCII_TOLOWER(*ib);
+				else
+					*ob = *ib;
+				ib++;
+				ob++;
+			} else if ((ib + sz) > ibtail) {
+				if (do_not_ignore_invalid) {
+					*errnum = EINVAL;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if ((obtail - ob) < (ibtail - ib)) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				/*
+				 * We treat the remaining incomplete character
+				 * bytes as a character.
+				 */
+				ret_val++;
+
+				while (ib < ibtail)
+					*ob++ = *ib++;
+			} else {
+				if (is_it_toupper || is_it_tolower) {
+					i = do_case_conv(unicode_version, u8s,
+					    ib, sz, is_it_toupper);
+
+					if ((obtail - ob) < i) {
+						*errnum = E2BIG;
+						ret_val = (size_t)-1;
+						break;
+					}
+
+					ib += sz;
+
+					for (sz = 0; sz < i; sz++)
+						*ob++ = u8s[sz];
+				} else {
+					if ((obtail - ob) < sz) {
+						*errnum = E2BIG;
+						ret_val = (size_t)-1;
+						break;
+					}
+
+					for (i = 0; i < sz; i++)
+						*ob++ = *ib++;
+				}
+			}
+		}
+	} else {
+		canonical_decomposition = flag & U8_CANON_DECOMP;
+		compatibility_decomposition = flag & U8_COMPAT_DECOMP;
+		canonical_composition = flag & U8_CANON_COMP;
+
+		while (ib < ibtail) {
+			if (*ib == '\0' && do_not_ignore_null)
+				break;
+
+			/*
+			 * If the current character is a 7-bit ASCII
+			 * character and it is the last character, or,
+			 * if the current character is a 7-bit ASCII
+			 * character and the next character is also a 7-bit
+			 * ASCII character, then, we copy over this
+			 * character without going through collect_a_seq().
+			 *
+			 * In any other cases, we need to look further with
+			 * the collect_a_seq() function.
+			 */
+			if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail ||
+			    ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) {
+				if (ob >= obtail) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if (is_it_toupper)
+					*ob = U8_ASCII_TOUPPER(*ib);
+				else if (is_it_tolower)
+					*ob = U8_ASCII_TOLOWER(*ib);
+				else
+					*ob = *ib;
+				ib++;
+				ob++;
+			} else {
+				*errnum = 0;
+				state = U8_STATE_START;
+
+				j = collect_a_seq(unicode_version, u8s,
+				    &ib, ibtail,
+				    is_it_toupper,
+				    is_it_tolower,
+				    canonical_decomposition,
+				    compatibility_decomposition,
+				    canonical_composition,
+				    errnum, &state);
+
+				if (*errnum && do_not_ignore_invalid) {
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				if ((obtail - ob) < j) {
+					*errnum = E2BIG;
+					ret_val = (size_t)-1;
+					break;
+				}
+
+				for (i = 0; i < j; i++)
+					*ob++ = u8s[i];
+			}
+		}
+	}
+
+	*inlen = ibtail - ib;
+	*outlen = obtail - ob;
+
+	return (ret_val);
+}
diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c
new file mode 100644
index 000000000000..ed9b67ea3bc9
--- /dev/null
+++ b/common/zfs/zfs_comutil.c
@@ -0,0 +1,202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This file is intended for functions that ought to be common between user
+ * land (libzfs) and the kernel. When many common routines need to be shared
+ * then a separate file should to be created.
+ */
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/fs/zfs.h>
+#include <sys/int_limits.h>
+#include <sys/nvpair.h>
+#include "zfs_comutil.h"
+
+/*
+ * Are there allocatable vdevs?
+ */
+boolean_t
+zfs_allocatable_devs(nvlist_t *nv)
+{
+	uint64_t is_log;
+	uint_t c;
+	nvlist_t **child;
+	uint_t children;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0) {
+		return (B_FALSE);
+	}
+	for (c = 0; c < children; c++) {
+		is_log = 0;
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+		    &is_log);
+		if (!is_log)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+void
+zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp)
+{
+	nvlist_t *policy;
+	nvpair_t *elem;
+	char *nm;
+
+	/* Defaults */
+	zrpp->zrp_request = ZPOOL_NO_REWIND;
+	zrpp->zrp_maxmeta = 0;
+	zrpp->zrp_maxdata = UINT64_MAX;
+	zrpp->zrp_txg = UINT64_MAX;
+
+	if (nvl == NULL)
+		return;
+
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+		nm = nvpair_name(elem);
+		if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) {
+			if (nvpair_value_nvlist(elem, &policy) == 0)
+				zpool_get_rewind_policy(policy, zrpp);
+			return;
+		} else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) {
+			if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0)
+				if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES)
+					zrpp->zrp_request = ZPOOL_NO_REWIND;
+		} else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) {
+			(void) nvpair_value_uint64(elem, &zrpp->zrp_txg);
+		} else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) {
+			(void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta);
+		} else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) {
+			(void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata);
+		}
+	}
+	if (zrpp->zrp_request == 0)
+		zrpp->zrp_request = ZPOOL_NO_REWIND;
+}
+
+typedef struct zfs_version_spa_map {
+	int	version_zpl;
+	int	version_spa;
+} zfs_version_spa_map_t;
+
+/*
+ * Keep this table in monotonically increasing version number order.
+ */
+static zfs_version_spa_map_t zfs_version_table[] = {
+	{ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
+	{ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
+	{ZPL_VERSION_FUID, SPA_VERSION_FUID},
+	{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
+	{ZPL_VERSION_SA, SPA_VERSION_SA},
+	{0, 0}
+};
+
+/*
+ * Return the max zpl version for a corresponding spa version
+ * -1 is returned if no mapping exists.
+ */
+int
+zfs_zpl_version_map(int spa_version)
+{
+	int i;
+	int version = -1;
+
+	for (i = 0; zfs_version_table[i].version_spa; i++) {
+		if (spa_version >= zfs_version_table[i].version_spa)
+			version = zfs_version_table[i].version_zpl;
+	}
+
+	return (version);
+}
+
+/*
+ * Return the min spa version for a corresponding spa version
+ * -1 is returned if no mapping exists.
+ */
+int
+zfs_spa_version_map(int zpl_version)
+{
+	int i;
+	int version = -1;
+
+	for (i = 0; zfs_version_table[i].version_zpl; i++) {
+		if (zfs_version_table[i].version_zpl >= zpl_version)
+			return (zfs_version_table[i].version_spa);
+	}
+
+	return (version);
+}
+
+const char *zfs_history_event_names[LOG_END] = {
+	"invalid event",
+	"pool create",
+	"vdev add",
+	"pool remove",
+	"pool destroy",
+	"pool export",
+	"pool import",
+	"vdev attach",
+	"vdev replace",
+	"vdev detach",
+	"vdev online",
+	"vdev offline",
+	"vdev upgrade",
+	"pool clear",
+	"pool scrub",
+	"pool property set",
+	"create",
+	"clone",
+	"destroy",
+	"destroy_begin_sync",
+	"inherit",
+	"property set",
+	"quota set",
+	"permission update",
+	"permission remove",
+	"permission who remove",
+	"promote",
+	"receive",
+	"rename",
+	"reservation set",
+	"replay_inc_sync",
+	"replay_full_sync",
+	"rollback",
+	"snapshot",
+	"filesystem version upgrade",
+	"refquota set",
+	"refreservation set",
+	"pool scrub done",
+	"user hold",
+	"user release",
+	"pool split",
+};
diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h
new file mode 100644
index 000000000000..61327f9aa909
--- /dev/null
+++ b/common/zfs/zfs_comutil.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_ZFS_COMUTIL_H
+#define	_ZFS_COMUTIL_H
+
+#include <sys/fs/zfs.h>
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+extern boolean_t zfs_allocatable_devs(nvlist_t *);
+extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
+
+extern int zfs_zpl_version_map(int spa_version);
+extern int zfs_spa_version_map(int zpl_version);
+extern const char *zfs_history_event_names[LOG_END];
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_COMUTIL_H */
diff --git a/common/zfs/zfs_deleg.c b/common/zfs/zfs_deleg.c
new file mode 100644
index 000000000000..83d9edb21389
--- /dev/null
+++ b/common/zfs/zfs_deleg.c
@@ -0,0 +1,237 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#include <sys/sunddi.h>
+#include <sys/ctype.h>
+#else
+#include <stdio.h>
+#include <unistd.h>
+#include <strings.h>
+#include <libnvpair.h>
+#include <ctype.h>
+#endif
+/* XXX includes zfs_context.h, so why bother with the above? */
+#include <sys/dsl_deleg.h>
+#include "zfs_prop.h"
+#include "zfs_deleg.h"
+#include "zfs_namecheck.h"
+
+/*
+ * permission table
+ *
+ * Keep this table in sorted order
+ *
+ * This table is used for displaying all permissions for
+ * zfs allow
+ */
+
+zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
+	{ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW},
+	{ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
+	{ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
+	{ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
+	{ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
+	{ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
+	{ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
+	{ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
+	{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
+	{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
+	{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
+	{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
+	{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
+	{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
+	{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
+	{ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
+	{ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
+	{ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
+	{ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
+	{ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
+	{NULL, ZFS_DELEG_NOTE_NONE }
+};
+
+static int
+zfs_valid_permission_name(const char *perm)
+{
+	if (zfs_deleg_canonicalize_perm(perm))
+		return (0);
+
+	return (permset_namecheck(perm, NULL, NULL));
+}
+
+const char *
+zfs_deleg_canonicalize_perm(const char *perm)
+{
+	int i;
+	zfs_prop_t prop;
+
+	for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
+		if (strcmp(perm, zfs_deleg_perm_tab[i].z_perm) == 0)
+			return (perm);
+	}
+
+	prop = zfs_name_to_prop(perm);
+	if (prop != ZPROP_INVAL && zfs_prop_delegatable(prop))
+		return (zfs_prop_to_name(prop));
+	return (NULL);
+
+}
+
+static int
+zfs_validate_who(char *who)
+{
+	char *p;
+
+	if (who[2] != ZFS_DELEG_FIELD_SEP_CHR)
+		return (-1);
+
+	switch (who[0]) {
+	case ZFS_DELEG_USER:
+	case ZFS_DELEG_GROUP:
+	case ZFS_DELEG_USER_SETS:
+	case ZFS_DELEG_GROUP_SETS:
+		if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT)
+			return (-1);
+		for (p = &who[3]; *p; p++)
+			if (!isdigit(*p))
+				return (-1);
+		break;
+
+	case ZFS_DELEG_NAMED_SET:
+	case ZFS_DELEG_NAMED_SET_SETS:
+		if (who[1] != ZFS_DELEG_NA)
+			return (-1);
+		return (permset_namecheck(&who[3], NULL, NULL));
+
+	case ZFS_DELEG_CREATE:
+	case ZFS_DELEG_CREATE_SETS:
+		if (who[1] != ZFS_DELEG_NA)
+			return (-1);
+		if (who[3] != '\0')
+			return (-1);
+		break;
+
+	case ZFS_DELEG_EVERYONE:
+	case ZFS_DELEG_EVERYONE_SETS:
+		if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT)
+			return (-1);
+		if (who[3] != '\0')
+			return (-1);
+		break;
+
+	default:
+		return (-1);
+	}
+
+	return (0);
+}
+
+int
+zfs_deleg_verify_nvlist(nvlist_t *nvp)
+{
+	nvpair_t *who, *perm_name;
+	nvlist_t *perms;
+	int error;
+
+	if (nvp == NULL)
+		return (-1);
+
+	who = nvlist_next_nvpair(nvp, NULL);
+	if (who == NULL)
+		return (-1);
+
+	do {
+		if (zfs_validate_who(nvpair_name(who)))
+			return (-1);
+
+		error = nvlist_lookup_nvlist(nvp, nvpair_name(who), &perms);
+
+		if (error && error != ENOENT)
+			return (-1);
+		if (error == ENOENT)
+			continue;
+
+		perm_name = nvlist_next_nvpair(perms, NULL);
+		if (perm_name == NULL) {
+			return (-1);
+		}
+		do {
+			error = zfs_valid_permission_name(
+			    nvpair_name(perm_name));
+			if (error)
+				return (-1);
+		} while (perm_name = nvlist_next_nvpair(perms, perm_name));
+	} while (who = nvlist_next_nvpair(nvp, who));
+	return (0);
+}
+
+/*
+ * Construct the base attribute name.  The base attribute names
+ * are the "key" to locate the jump objects which contain the actual
+ * permissions.  The base attribute names are encoded based on
+ * type of entry and whether it is a local or descendent permission.
+ *
+ * Arguments:
+ * attr - attribute name return string, attribute is assumed to be
+ *        ZFS_MAX_DELEG_NAME long.
+ * type - type of entry to construct
+ * inheritchr - inheritance type (local,descendent, or NA for create and
+ *                               permission set definitions
+ * data - is either a permission set name or a 64 bit uid/gid.
+ */
+void
+zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
+    char inheritchr, void *data)
+{
+	int len = ZFS_MAX_DELEG_NAME;
+	uint64_t *id = data;
+
+	switch (type) {
+	case ZFS_DELEG_USER:
+	case ZFS_DELEG_GROUP:
+	case ZFS_DELEG_USER_SETS:
+	case ZFS_DELEG_GROUP_SETS:
+		(void) snprintf(attr, len, "%c%c%c%lld", type, inheritchr,
+		    ZFS_DELEG_FIELD_SEP_CHR, (longlong_t)*id);
+		break;
+	case ZFS_DELEG_NAMED_SET_SETS:
+	case ZFS_DELEG_NAMED_SET:
+		(void) snprintf(attr, len, "%c-%c%s", type,
+		    ZFS_DELEG_FIELD_SEP_CHR, (char *)data);
+		break;
+	case ZFS_DELEG_CREATE:
+	case ZFS_DELEG_CREATE_SETS:
+		(void) snprintf(attr, len, "%c-%c", type,
+		    ZFS_DELEG_FIELD_SEP_CHR);
+		break;
+	case ZFS_DELEG_EVERYONE:
+	case ZFS_DELEG_EVERYONE_SETS:
+		(void) snprintf(attr, len, "%c%c%c", type, inheritchr,
+		    ZFS_DELEG_FIELD_SEP_CHR);
+		break;
+	default:
+		ASSERT(!"bad zfs_deleg_who_type_t");
+	}
+}
diff --git a/common/zfs/zfs_deleg.h b/common/zfs/zfs_deleg.h
new file mode 100644
index 000000000000..b4cb8e2b4e37
--- /dev/null
+++ b/common/zfs/zfs_deleg.h
@@ -0,0 +1,85 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_ZFS_DELEG_H
+#define	_ZFS_DELEG_H
+
+#include <sys/fs/zfs.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	ZFS_DELEG_SET_NAME_CHR		'@'		/* set name lead char */
+#define	ZFS_DELEG_FIELD_SEP_CHR		'$'		/* field separator */
+
+/*
+ * Max name length for a delegation attribute
+ */
+#define	ZFS_MAX_DELEG_NAME	128
+
+#define	ZFS_DELEG_LOCAL		'l'
+#define	ZFS_DELEG_DESCENDENT	'd'
+#define	ZFS_DELEG_NA		'-'
+
+typedef enum {
+	ZFS_DELEG_NOTE_CREATE,
+	ZFS_DELEG_NOTE_DESTROY,
+	ZFS_DELEG_NOTE_SNAPSHOT,
+	ZFS_DELEG_NOTE_ROLLBACK,
+	ZFS_DELEG_NOTE_CLONE,
+	ZFS_DELEG_NOTE_PROMOTE,
+	ZFS_DELEG_NOTE_RENAME,
+	ZFS_DELEG_NOTE_RECEIVE,
+	ZFS_DELEG_NOTE_ALLOW,
+	ZFS_DELEG_NOTE_USERPROP,
+	ZFS_DELEG_NOTE_MOUNT,
+	ZFS_DELEG_NOTE_SHARE,
+	ZFS_DELEG_NOTE_USERQUOTA,
+	ZFS_DELEG_NOTE_GROUPQUOTA,
+	ZFS_DELEG_NOTE_USERUSED,
+	ZFS_DELEG_NOTE_GROUPUSED,
+	ZFS_DELEG_NOTE_HOLD,
+	ZFS_DELEG_NOTE_RELEASE,
+	ZFS_DELEG_NOTE_DIFF,
+	ZFS_DELEG_NOTE_NONE
+} zfs_deleg_note_t;
+
+typedef struct zfs_deleg_perm_tab {
+	char *z_perm;
+	zfs_deleg_note_t z_note;
+} zfs_deleg_perm_tab_t;
+
+extern zfs_deleg_perm_tab_t zfs_deleg_perm_tab[];
+
+int zfs_deleg_verify_nvlist(nvlist_t *nvlist);
+void zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
+    char checkflag, void *data);
+const char *zfs_deleg_canonicalize_perm(const char *perm);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_DELEG_H */
diff --git a/common/zfs/zfs_fletcher.c b/common/zfs/zfs_fletcher.c
new file mode 100644
index 000000000000..fa43ce6bdb5d
--- /dev/null
+++ b/common/zfs/zfs_fletcher.c
@@ -0,0 +1,246 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Fletcher Checksums
+ * ------------------
+ *
+ * ZFS's 2nd and 4th order Fletcher checksums are defined by the following
+ * recurrence relations:
+ *
+ *	a  = a    + f
+ *	 i    i-1    i-1
+ *
+ *	b  = b    + a
+ *	 i    i-1    i
+ *
+ *	c  = c    + b		(fletcher-4 only)
+ *	 i    i-1    i
+ *
+ *	d  = d    + c		(fletcher-4 only)
+ *	 i    i-1    i
+ *
+ * Where
+ *	a_0 = b_0 = c_0 = d_0 = 0
+ * and
+ *	f_0 .. f_(n-1) are the input data.
+ *
+ * Using standard techniques, these translate into the following series:
+ *
+ *	     __n_			     __n_
+ *	     \   |			     \   |
+ *	a  =  >     f			b  =  >     i * f
+ *	 n   /___|   n - i		 n   /___|	 n - i
+ *	     i = 1			     i = 1
+ *
+ *
+ *	     __n_			     __n_
+ *	     \   |  i*(i+1)		     \   |  i*(i+1)*(i+2)
+ *	c  =  >     ------- f		d  =  >     ------------- f
+ *	 n   /___|     2     n - i	 n   /___|	  6	   n - i
+ *	     i = 1			     i = 1
+ *
+ * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
+ * Since the additions are done mod (2^64), errors in the high bits may not
+ * be noticed.  For this reason, fletcher-2 is deprecated.
+ *
+ * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
+ * A conservative estimate of how big the buffer can get before we overflow
+ * can be estimated using f_i = 0xffffffff for all i:
+ *
+ * % bc
+ *  f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
+ * 2264
+ *  quit
+ * %
+ *
+ * So blocks of up to 2k will not overflow.  Our largest block size is
+ * 128k, which has 32k 4-byte words, so we can compute the largest possible
+ * accumulators, then divide by 2^64 to figure the max amount of overflow:
+ *
+ * % bc
+ *  a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
+ *  a/2^64;b/2^64;c/2^64;d/2^64
+ * 0
+ * 0
+ * 1365
+ * 11186858
+ *  quit
+ * %
+ *
+ * So a and b cannot overflow.  To make sure each bit of input has some
+ * effect on the contents of c and d, we can look at what the factors of
+ * the coefficients in the equations for c_n and d_n are.  The number of 2s
+ * in the factors determines the lowest set bit in the multiplier.  Running
+ * through the cases for n*(n+1)/2 reveals that the highest power of 2 is
+ * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15.  So while some data may overflow
+ * the 64-bit accumulators, every bit of every f_i effects every accumulator,
+ * even for 128k blocks.
+ *
+ * If we wanted to make a stronger version of fletcher4 (fletcher4c?),
+ * we could do our calculations mod (2^32 - 1) by adding in the carries
+ * periodically, and store the number of carries in the top 32-bits.
+ *
+ * --------------------
+ * Checksum Performance
+ * --------------------
+ *
+ * There are two interesting components to checksum performance: cached and
+ * uncached performance.  With cached data, fletcher-2 is about four times
+ * faster than fletcher-4.  With uncached data, the performance difference is
+ * negligible, since the cost of a cache fill dominates the processing time.
+ * Even though fletcher-4 is slower than fletcher-2, it is still a pretty
+ * efficient pass over the data.
+ *
+ * In normal operation, the data which is being checksummed is in a buffer
+ * which has been filled either by:
+ *
+ *	1. a compression step, which will be mostly cached, or
+ *	2. a bcopy() or copyin(), which will be uncached (because the
+ *	   copy is cache-bypassing).
+ *
+ * For both cached and uncached data, both fletcher checksums are much faster
+ * than sha-256, and slower than 'off', which doesn't touch the data at all.
+ */
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/byteorder.h>
+#include <sys/zio.h>
+#include <sys/spa.h>
+
+void
+fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	const uint64_t *ip = buf;
+	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
+	uint64_t a0, b0, a1, b1;
+
+	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+		a0 += ip[0];
+		a1 += ip[1];
+		b0 += a0;
+		b1 += a1;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+}
+
+void
+fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	const uint64_t *ip = buf;
+	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
+	uint64_t a0, b0, a1, b1;
+
+	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+		a0 += BSWAP_64(ip[0]);
+		a1 += BSWAP_64(ip[1]);
+		b0 += a0;
+		b1 += a1;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+}
+
+void
+fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	const uint32_t *ip = buf;
+	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+	uint64_t a, b, c, d;
+
+	for (a = b = c = d = 0; ip < ipend; ip++) {
+		a += ip[0];
+		b += a;
+		c += b;
+		d += c;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
+
+void
+fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	const uint32_t *ip = buf;
+	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+	uint64_t a, b, c, d;
+
+	for (a = b = c = d = 0; ip < ipend; ip++) {
+		a += BSWAP_32(ip[0]);
+		b += a;
+		c += b;
+		d += c;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
+
+void
+fletcher_4_incremental_native(const void *buf, uint64_t size,
+    zio_cksum_t *zcp)
+{
+	const uint32_t *ip = buf;
+	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+	uint64_t a, b, c, d;
+
+	a = zcp->zc_word[0];
+	b = zcp->zc_word[1];
+	c = zcp->zc_word[2];
+	d = zcp->zc_word[3];
+
+	for (; ip < ipend; ip++) {
+		a += ip[0];
+		b += a;
+		c += b;
+		d += c;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
+
+void
+fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
+    zio_cksum_t *zcp)
+{
+	const uint32_t *ip = buf;
+	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+	uint64_t a, b, c, d;
+
+	a = zcp->zc_word[0];
+	b = zcp->zc_word[1];
+	c = zcp->zc_word[2];
+	d = zcp->zc_word[3];
+
+	for (; ip < ipend; ip++) {
+		a += BSWAP_32(ip[0]);
+		b += a;
+		c += b;
+		d += c;
+	}
+
+	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
diff --git a/common/zfs/zfs_fletcher.h b/common/zfs/zfs_fletcher.h
new file mode 100644
index 000000000000..b49df0cf4f0f
--- /dev/null
+++ b/common/zfs/zfs_fletcher.h
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ZFS_FLETCHER_H
+#define	_ZFS_FLETCHER_H
+
+#include <sys/types.h>
+#include <sys/spa.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * fletcher checksum functions
+ */
+
+void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
+void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_incremental_native(const void *, uint64_t,
+    zio_cksum_t *);
+void fletcher_4_incremental_byteswap(const void *, uint64_t,
+    zio_cksum_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_FLETCHER_H */
diff --git a/common/zfs/zfs_namecheck.c b/common/zfs/zfs_namecheck.c
new file mode 100644
index 000000000000..5cfafea471b3
--- /dev/null
+++ b/common/zfs/zfs_namecheck.c
@@ -0,0 +1,345 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Common name validation routines for ZFS.  These routines are shared by the
+ * userland code as well as the ioctl() layer to ensure that we don't
+ * inadvertently expose a hole through direct ioctl()s that never gets tested.
+ * In userland, however, we want significantly more information about _why_ the
+ * name is invalid.  In the kernel, we only care whether it's valid or not.
+ * Each routine therefore takes a 'namecheck_err_t' which describes exactly why
+ * the name failed to validate.
+ *
+ * Each function returns 0 on success, -1 on error.
+ */
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include <sys/param.h>
+#include <sys/nvpair.h>
+#include "zfs_namecheck.h"
+#include "zfs_deleg.h"
+
+static int
+valid_char(char c)
+{
+	return ((c >= 'a' && c <= 'z') ||
+	    (c >= 'A' && c <= 'Z') ||
+	    (c >= '0' && c <= '9') ||
+	    c == '-' || c == '_' || c == '.' || c == ':' || c == ' ');
+}
+
+/*
+ * Snapshot names must be made up of alphanumeric characters plus the following
+ * characters:
+ *
+ * 	[-_.: ]
+ */
+int
+snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+	const char *loc;
+
+	if (strlen(path) >= MAXNAMELEN) {
+		if (why)
+			*why = NAME_ERR_TOOLONG;
+		return (-1);
+	}
+
+	if (path[0] == '\0') {
+		if (why)
+			*why = NAME_ERR_EMPTY_COMPONENT;
+		return (-1);
+	}
+
+	for (loc = path; *loc; loc++) {
+		if (!valid_char(*loc)) {
+			if (why) {
+				*why = NAME_ERR_INVALCHAR;
+				*what = *loc;
+			}
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+
+/*
+ * Permissions set name must start with the letter '@' followed by the
+ * same character restrictions as snapshot names, except that the name
+ * cannot exceed 64 characters.
+ */
+int
+permset_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+	if (strlen(path) >= ZFS_PERMSET_MAXLEN) {
+		if (why)
+			*why = NAME_ERR_TOOLONG;
+		return (-1);
+	}
+
+	if (path[0] != '@') {
+		if (why) {
+			*why = NAME_ERR_NO_AT;
+			*what = path[0];
+		}
+		return (-1);
+	}
+
+	return (snapshot_namecheck(&path[1], why, what));
+}
+
+/*
+ * Dataset names must be of the following form:
+ *
+ * 	[component][/]*[component][@component]
+ *
+ * Where each component is made up of alphanumeric characters plus the following
+ * characters:
+ *
+ * 	[-_.:%]
+ *
+ * We allow '%' here as we use that character internally to create unique
+ * names for temporary clones (for online recv).
+ */
+int
+dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+	const char *loc, *end;
+	int found_snapshot;
+
+	/*
+	 * Make sure the name is not too long.
+	 *
+	 * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
+	 * which is the same as MAXNAMELEN used in the kernel.
+	 * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
+	 * places using MAXNAMELEN.
+	 */
+
+	if (strlen(path) >= MAXNAMELEN) {
+		if (why)
+			*why = NAME_ERR_TOOLONG;
+		return (-1);
+	}
+
+	/* Explicitly check for a leading slash.  */
+	if (path[0] == '/') {
+		if (why)
+			*why = NAME_ERR_LEADING_SLASH;
+		return (-1);
+	}
+
+	if (path[0] == '\0') {
+		if (why)
+			*why = NAME_ERR_EMPTY_COMPONENT;
+		return (-1);
+	}
+
+	loc = path;
+	found_snapshot = 0;
+	for (;;) {
+		/* Find the end of this component */
+		end = loc;
+		while (*end != '/' && *end != '@' && *end != '\0')
+			end++;
+
+		if (*end == '\0' && end[-1] == '/') {
+			/* trailing slashes are not allowed */
+			if (why)
+				*why = NAME_ERR_TRAILING_SLASH;
+			return (-1);
+		}
+
+		/* Zero-length components are not allowed */
+		if (loc == end) {
+			if (why) {
+				/*
+				 * Make sure this is really a zero-length
+				 * component and not a '@@'.
+				 */
+				if (*end == '@' && found_snapshot) {
+					*why = NAME_ERR_MULTIPLE_AT;
+				} else {
+					*why = NAME_ERR_EMPTY_COMPONENT;
+				}
+			}
+
+			return (-1);
+		}
+
+		/* Validate the contents of this component */
+		while (loc != end) {
+			if (!valid_char(*loc) && *loc != '%') {
+				if (why) {
+					*why = NAME_ERR_INVALCHAR;
+					*what = *loc;
+				}
+				return (-1);
+			}
+			loc++;
+		}
+
+		/* If we've reached the end of the string, we're OK */
+		if (*end == '\0')
+			return (0);
+
+		if (*end == '@') {
+			/*
+			 * If we've found an @ symbol, indicate that we're in
+			 * the snapshot component, and report a second '@'
+			 * character as an error.
+			 */
+			if (found_snapshot) {
+				if (why)
+					*why = NAME_ERR_MULTIPLE_AT;
+				return (-1);
+			}
+
+			found_snapshot = 1;
+		}
+
+		/*
+		 * If there is a '/' in a snapshot name
+		 * then report an error
+		 */
+		if (*end == '/' && found_snapshot) {
+			if (why)
+				*why = NAME_ERR_TRAILING_SLASH;
+			return (-1);
+		}
+
+		/* Update to the next component */
+		loc = end + 1;
+	}
+}
+
+
+/*
+ * mountpoint names must be of the following form:
+ *
+ *	/[component][/]*[component][/]
+ */
+int
+mountpoint_namecheck(const char *path, namecheck_err_t *why)
+{
+	const char *start, *end;
+
+	/*
+	 * Make sure none of the mountpoint component names are too long.
+	 * If a component name is too long then the mkdir of the mountpoint
+	 * will fail but then the mountpoint property will be set to a value
+	 * that can never be mounted.  Better to fail before setting the prop.
+	 * Extra slashes are OK, they will be tossed by the mountpoint mkdir.
+	 */
+
+	if (path == NULL || *path != '/') {
+		if (why)
+			*why = NAME_ERR_LEADING_SLASH;
+		return (-1);
+	}
+
+	/* Skip leading slash  */
+	start = &path[1];
+	do {
+		end = start;
+		while (*end != '/' && *end != '\0')
+			end++;
+
+		if (end - start >= MAXNAMELEN) {
+			if (why)
+				*why = NAME_ERR_TOOLONG;
+			return (-1);
+		}
+		start = end + 1;
+
+	} while (*end != '\0');
+
+	return (0);
+}
+
+/*
+ * For pool names, we have the same set of valid characters as described in
+ * dataset names, with the additional restriction that the pool name must begin
+ * with a letter.  The pool names 'raidz' and 'mirror' are also reserved names
+ * that cannot be used.
+ */
+int
+pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
+{
+	const char *c;
+
+	/*
+	 * Make sure the name is not too long.
+	 *
+	 * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
+	 * which is the same as MAXNAMELEN used in the kernel.
+	 * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
+	 * places using MAXNAMELEN.
+	 */
+	if (strlen(pool) >= MAXNAMELEN) {
+		if (why)
+			*why = NAME_ERR_TOOLONG;
+		return (-1);
+	}
+
+	c = pool;
+	while (*c != '\0') {
+		if (!valid_char(*c)) {
+			if (why) {
+				*why = NAME_ERR_INVALCHAR;
+				*what = *c;
+			}
+			return (-1);
+		}
+		c++;
+	}
+
+	if (!(*pool >= 'a' && *pool <= 'z') &&
+	    !(*pool >= 'A' && *pool <= 'Z')) {
+		if (why)
+			*why = NAME_ERR_NOLETTER;
+		return (-1);
+	}
+
+	if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
+		if (why)
+			*why = NAME_ERR_RESERVED;
+		return (-1);
+	}
+
+	if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
+		if (why)
+			*why = NAME_ERR_DISKLIKE;
+		return (-1);
+	}
+
+	return (0);
+}
diff --git a/common/zfs/zfs_namecheck.h b/common/zfs/zfs_namecheck.h
new file mode 100644
index 000000000000..7711da099be9
--- /dev/null
+++ b/common/zfs/zfs_namecheck.h
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ZFS_NAMECHECK_H
+#define	_ZFS_NAMECHECK_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+	NAME_ERR_LEADING_SLASH,		/* name begins with leading slash */
+	NAME_ERR_EMPTY_COMPONENT,	/* name contains an empty component */
+	NAME_ERR_TRAILING_SLASH,	/* name ends with a slash */
+	NAME_ERR_INVALCHAR,		/* invalid character found */
+	NAME_ERR_MULTIPLE_AT,		/* multiple '@' characters found */
+	NAME_ERR_NOLETTER,		/* pool doesn't begin with a letter */
+	NAME_ERR_RESERVED,		/* entire name is reserved */
+	NAME_ERR_DISKLIKE,		/* reserved disk name (c[0-9].*) */
+	NAME_ERR_TOOLONG,		/* name is too long */
+	NAME_ERR_NO_AT,			/* permission set is missing '@' */
+} namecheck_err_t;
+
+#define	ZFS_PERMSET_MAXLEN	64
+
+int pool_namecheck(const char *, namecheck_err_t *, char *);
+int dataset_namecheck(const char *, namecheck_err_t *, char *);
+int mountpoint_namecheck(const char *, namecheck_err_t *);
+int snapshot_namecheck(const char *, namecheck_err_t *, char *);
+int permset_namecheck(const char *, namecheck_err_t *, char *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_NAMECHECK_H */
diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c
new file mode 100644
index 000000000000..f29bcf62718f
--- /dev/null
+++ b/common/zfs/zfs_prop.c
@@ -0,0 +1,595 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Portions Copyright 2010 Robert Milkowski */
+
+#include <sys/zio.h>
+#include <sys/spa.h>
+#include <sys/u8_textprep.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+
+#include "zfs_prop.h"
+#include "zfs_deleg.h"
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#endif
+
+static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
+
+/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
+const char *zfs_userquota_prop_prefixes[] = {
+	"userused@",
+	"userquota@",
+	"groupused@",
+	"groupquota@"
+};
+
+zprop_desc_t *
+zfs_prop_get_table(void)
+{
+	return (zfs_prop_table);
+}
+
+void
+zfs_prop_init(void)
+{
+	static zprop_index_t checksum_table[] = {
+		{ "on",		ZIO_CHECKSUM_ON },
+		{ "off",	ZIO_CHECKSUM_OFF },
+		{ "fletcher2",	ZIO_CHECKSUM_FLETCHER_2 },
+		{ "fletcher4",	ZIO_CHECKSUM_FLETCHER_4 },
+		{ "sha256",	ZIO_CHECKSUM_SHA256 },
+		{ NULL }
+	};
+
+	static zprop_index_t dedup_table[] = {
+		{ "on",		ZIO_CHECKSUM_ON },
+		{ "off",	ZIO_CHECKSUM_OFF },
+		{ "verify",	ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY },
+		{ "sha256",	ZIO_CHECKSUM_SHA256 },
+		{ "sha256,verify",
+				ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
+		{ NULL }
+	};
+
+	static zprop_index_t compress_table[] = {
+		{ "on",		ZIO_COMPRESS_ON },
+		{ "off",	ZIO_COMPRESS_OFF },
+		{ "lzjb",	ZIO_COMPRESS_LZJB },
+		{ "gzip",	ZIO_COMPRESS_GZIP_6 },	/* gzip default */
+		{ "gzip-1",	ZIO_COMPRESS_GZIP_1 },
+		{ "gzip-2",	ZIO_COMPRESS_GZIP_2 },
+		{ "gzip-3",	ZIO_COMPRESS_GZIP_3 },
+		{ "gzip-4",	ZIO_COMPRESS_GZIP_4 },
+		{ "gzip-5",	ZIO_COMPRESS_GZIP_5 },
+		{ "gzip-6",	ZIO_COMPRESS_GZIP_6 },
+		{ "gzip-7",	ZIO_COMPRESS_GZIP_7 },
+		{ "gzip-8",	ZIO_COMPRESS_GZIP_8 },
+		{ "gzip-9",	ZIO_COMPRESS_GZIP_9 },
+		{ "zle",	ZIO_COMPRESS_ZLE },
+		{ NULL }
+	};
+
+	static zprop_index_t snapdir_table[] = {
+		{ "hidden",	ZFS_SNAPDIR_HIDDEN },
+		{ "visible",	ZFS_SNAPDIR_VISIBLE },
+		{ NULL }
+	};
+
+	static zprop_index_t acl_inherit_table[] = {
+		{ "discard",	ZFS_ACL_DISCARD },
+		{ "noallow",	ZFS_ACL_NOALLOW },
+		{ "restricted",	ZFS_ACL_RESTRICTED },
+		{ "passthrough", ZFS_ACL_PASSTHROUGH },
+		{ "secure",	ZFS_ACL_RESTRICTED }, /* bkwrd compatability */
+		{ "passthrough-x", ZFS_ACL_PASSTHROUGH_X },
+		{ NULL }
+	};
+
+	static zprop_index_t case_table[] = {
+		{ "sensitive",		ZFS_CASE_SENSITIVE },
+		{ "insensitive",	ZFS_CASE_INSENSITIVE },
+		{ "mixed",		ZFS_CASE_MIXED },
+		{ NULL }
+	};
+
+	static zprop_index_t copies_table[] = {
+		{ "1",		1 },
+		{ "2",		2 },
+		{ "3",		3 },
+		{ NULL }
+	};
+
+	/*
+	 * Use the unique flags we have to send to u8_strcmp() and/or
+	 * u8_textprep() to represent the various normalization property
+	 * values.
+	 */
+	static zprop_index_t normalize_table[] = {
+		{ "none",	0 },
+		{ "formD",	U8_TEXTPREP_NFD },
+		{ "formKC",	U8_TEXTPREP_NFKC },
+		{ "formC",	U8_TEXTPREP_NFC },
+		{ "formKD",	U8_TEXTPREP_NFKD },
+		{ NULL }
+	};
+
+	static zprop_index_t version_table[] = {
+		{ "1",		1 },
+		{ "2",		2 },
+		{ "3",		3 },
+		{ "4",		4 },
+		{ "5",		5 },
+		{ "current",	ZPL_VERSION },
+		{ NULL }
+	};
+
+	static zprop_index_t boolean_table[] = {
+		{ "off",	0 },
+		{ "on",		1 },
+		{ NULL }
+	};
+
+	static zprop_index_t logbias_table[] = {
+		{ "latency",	ZFS_LOGBIAS_LATENCY },
+		{ "throughput",	ZFS_LOGBIAS_THROUGHPUT },
+		{ NULL }
+	};
+
+	static zprop_index_t canmount_table[] = {
+		{ "off",	ZFS_CANMOUNT_OFF },
+		{ "on",		ZFS_CANMOUNT_ON },
+		{ "noauto",	ZFS_CANMOUNT_NOAUTO },
+		{ NULL }
+	};
+
+	static zprop_index_t cache_table[] = {
+		{ "none",	ZFS_CACHE_NONE },
+		{ "metadata",	ZFS_CACHE_METADATA },
+		{ "all",	ZFS_CACHE_ALL },
+		{ NULL }
+	};
+
+	static zprop_index_t sync_table[] = {
+		{ "standard",	ZFS_SYNC_STANDARD },
+		{ "always",	ZFS_SYNC_ALWAYS },
+		{ "disabled",	ZFS_SYNC_DISABLED },
+		{ NULL }
+	};
+
+	/* inherit index properties */
+	zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "standard | always | disabled", "SYNC",
+	    sync_table);
+	zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
+	    ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
+	    ZFS_TYPE_VOLUME,
+	    "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
+	    checksum_table);
+	zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "on | off | verify | sha256[,verify]", "DEDUP",
+	    dedup_table);
+	zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
+	    ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS",
+	    compress_table);
+	zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+	    "hidden | visible", "SNAPDIR", snapdir_table);
+	zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
+	    ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+	    "discard | noallow | restricted | passthrough | passthrough-x",
+	    "ACLINHERIT", acl_inherit_table);
+	zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "1 | 2 | 3", "COPIES", copies_table);
+	zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
+	    ZFS_CACHE_ALL, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
+	    "all | none | metadata", "PRIMARYCACHE", cache_table);
+	zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
+	    ZFS_CACHE_ALL, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
+	    "all | none | metadata", "SECONDARYCACHE", cache_table);
+	zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "latency | throughput", "LOGBIAS", logbias_table);
+
+	/* inherit index (boolean) properties */
+	zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);
+	zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
+	    boolean_table);
+	zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC",
+	    boolean_table);
+	zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
+	    boolean_table);
+	zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
+	    boolean_table);
+	zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
+	zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR",
+	    boolean_table);
+	zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN",
+	    boolean_table);
+	zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
+	    boolean_table);
+
+	/* default index properties */
+	zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+	    "1 | 2 | 3 | 4 | current", "VERSION", version_table);
+	zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
+	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
+	    "CANMOUNT", canmount_table);
+
+	/* readonly index (boolean) properties */
+	zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
+	    ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
+	zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
+	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
+	    boolean_table);
+
+	/* set once index properties */
+	zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
+	    PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+	    "none | formC | formD | formKC | formKD", "NORMALIZATION",
+	    normalize_table);
+	zprop_register_index(ZFS_PROP_CASE, "casesensitivity",
+	    ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
+	    ZFS_TYPE_SNAPSHOT,
+	    "sensitive | insensitive | mixed", "CASE", case_table);
+
+	/* set once index (boolean) properties */
+	zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+	    "on | off", "UTF8ONLY", boolean_table);
+
+	/* string properties */
+	zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
+	zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
+	    "MOUNTPOINT");
+	zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
+	    "SHARENFS");
+	zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
+	    ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE");
+	zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+	    "on | off | sharemgr(1M) options", "SHARESMB");
+	zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
+	    ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
+	    "<sensitivity label>", "MLSLABEL");
+
+	/* readonly number properties */
+	zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
+	    ZFS_TYPE_DATASET, "<size>", "USED");
+	zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
+	zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
+	zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
+	    PROP_READONLY, ZFS_TYPE_DATASET,
+	    "<1.00x or higher if compressed>", "RATIO");
+	zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
+	    ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
+	    ZFS_TYPE_VOLUME, "512 to 128k, power of 2",	"VOLBLOCK");
+	zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
+	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
+	    "USEDSNAP");
+	zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0,
+	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
+	    "USEDDS");
+	zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0,
+	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
+	    "USEDCHILD");
+	zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
+	    PROP_READONLY,
+	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
+	zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
+	    ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
+
+	/* default number properties */
+	zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
+	    ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA");
+	zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0,
+	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "<size> | none", "RESERV");
+	zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
+	    ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
+	zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
+	    ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
+	zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
+	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "<size> | none", "REFRESERV");
+
+	/* inherit number properties */
+	zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
+	    SPA_MAXBLOCKSIZE, PROP_INHERIT,
+	    ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
+
+	/* hidden properties */
+	zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG");
+	zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
+	zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
+	zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions",
+	    PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
+	zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
+	    PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
+	    "STMF_SBD_LU");
+	zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "GUID");
+	zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
+	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
+	    "USERACCOUNTING");
+	zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE");
+	zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
+
+	/*
+	 * Property to be removed once libbe is integrated
+	 */
+	zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
+	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
+	    "PRIV_PROP");
+
+	/* oddball properties */
+	zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
+	    NULL, PROP_READONLY, ZFS_TYPE_DATASET,
+	    "<date>", "CREATION", B_FALSE, B_TRUE, NULL);
+}
+
+boolean_t
+zfs_prop_delegatable(zfs_prop_t prop)
+{
+	zprop_desc_t *pd = &zfs_prop_table[prop];
+
+	/* The mlslabel property is never delegatable. */
+	if (prop == ZFS_PROP_MLSLABEL)
+		return (B_FALSE);
+
+	return (pd->pd_attr != PROP_READONLY);
+}
+
+/*
+ * Given a zfs dataset property name, returns the corresponding property ID.
+ */
+zfs_prop_t
+zfs_name_to_prop(const char *propname)
+{
+	return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
+}
+
+/*
+ * For user property names, we allow all lowercase alphanumeric characters, plus
+ * a few useful punctuation characters.
+ */
+static int
+valid_char(char c)
+{
+	return ((c >= 'a' && c <= 'z') ||
+	    (c >= '0' && c <= '9') ||
+	    c == '-' || c == '_' || c == '.' || c == ':');
+}
+
+/*
+ * Returns true if this is a valid user-defined property (one with a ':').
+ */
+boolean_t
+zfs_prop_user(const char *name)
+{
+	int i;
+	char c;
+	boolean_t foundsep = B_FALSE;
+
+	for (i = 0; i < strlen(name); i++) {
+		c = name[i];
+		if (!valid_char(c))
+			return (B_FALSE);
+		if (c == ':')
+			foundsep = B_TRUE;
+	}
+
+	if (!foundsep)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * Returns true if this is a valid userspace-type property (one with a '@').
+ * Note that after the @, any character is valid (eg, another @, for SID
+ * user@domain).
+ */
+boolean_t
+zfs_prop_userquota(const char *name)
+{
+	zfs_userquota_prop_t prop;
+
+	for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
+		if (strncmp(name, zfs_userquota_prop_prefixes[prop],
+		    strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
+			return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Tables of index types, plus functions to convert between the user view
+ * (strings) and internal representation (uint64_t).
+ */
+int
+zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index)
+{
+	return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET));
+}
+
+int
+zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
+{
+	return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET));
+}
+
+uint64_t
+zfs_prop_random_value(zfs_prop_t prop, uint64_t seed)
+{
+	return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET));
+}
+
+/*
+ * Returns TRUE if the property applies to any of the given dataset types.
+ */
+boolean_t
+zfs_prop_valid_for_type(int prop, zfs_type_t types)
+{
+	return (zprop_valid_for_type(prop, types));
+}
+
+zprop_type_t
+zfs_prop_get_type(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_proptype);
+}
+
+/*
+ * Returns TRUE if the property is readonly.
+ */
+boolean_t
+zfs_prop_readonly(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
+	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+}
+
+/*
+ * Returns TRUE if the property is only allowed to be set once.
+ */
+boolean_t
+zfs_prop_setonce(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+}
+
+const char *
+zfs_prop_default_string(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_strdefault);
+}
+
+uint64_t
+zfs_prop_default_numeric(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_numdefault);
+}
+
+/*
+ * Given a dataset property ID, returns the corresponding name.
+ * Assuming the zfs dataset property ID is valid.
+ */
+const char *
+zfs_prop_to_name(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_name);
+}
+
+/*
+ * Returns TRUE if the property is inheritable.
+ */
+boolean_t
+zfs_prop_inheritable(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_attr == PROP_INHERIT ||
+	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+}
+
+#ifndef _KERNEL
+
+/*
+ * Returns a string describing the set of acceptable values for the given
+ * zfs property, or NULL if it cannot be set.
+ */
+const char *
+zfs_prop_values(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_values);
+}
+
+/*
+ * Returns TRUE if this property is a string type.  Note that index types
+ * (compression, checksum) are treated as strings in userland, even though they
+ * are stored numerically on disk.
+ */
+int
+zfs_prop_is_string(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING ||
+	    zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX);
+}
+
+/*
+ * Returns the column header for the given property.  Used only in
+ * 'zfs list -o', but centralized here with the other property information.
+ */
+const char *
+zfs_prop_column_name(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_colname);
+}
+
+/*
+ * Returns whether the given property should be displayed right-justified for
+ * 'zfs list'.
+ */
+boolean_t
+zfs_prop_align_right(zfs_prop_t prop)
+{
+	return (zfs_prop_table[prop].pd_rightalign);
+}
+
+#endif
diff --git a/common/zfs/zfs_prop.h b/common/zfs/zfs_prop.h
new file mode 100644
index 000000000000..a63262311b3d
--- /dev/null
+++ b/common/zfs/zfs_prop.h
@@ -0,0 +1,129 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ZFS_PROP_H
+#define	_ZFS_PROP_H
+
+#include <sys/fs/zfs.h>
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * For index types (e.g. compression and checksum), we want the numeric value
+ * in the kernel, but the string value in userland.
+ */
+typedef enum {
+	PROP_TYPE_NUMBER,	/* numeric value */
+	PROP_TYPE_STRING,	/* string value */
+	PROP_TYPE_INDEX		/* numeric value indexed by string */
+} zprop_type_t;
+
+typedef enum {
+	PROP_DEFAULT,
+	PROP_READONLY,
+	PROP_INHERIT,
+	/*
+	 * ONETIME properties are a sort of conglomeration of READONLY
+	 * and INHERIT.  They can be set only during object creation,
+	 * after that they are READONLY.  If not explicitly set during
+	 * creation, they can be inherited.
+	 */
+	PROP_ONETIME
+} zprop_attr_t;
+
+typedef struct zfs_index {
+	const char *pi_name;
+	uint64_t pi_value;
+} zprop_index_t;
+
+typedef struct {
+	const char *pd_name;		/* human-readable property name */
+	int pd_propnum;			/* property number */
+	zprop_type_t pd_proptype;	/* string, boolean, index, number */
+	const char *pd_strdefault;	/* default for strings */
+	uint64_t pd_numdefault;		/* for boolean / index / number */
+	zprop_attr_t pd_attr;		/* default, readonly, inherit */
+	int pd_types;			/* bitfield of valid dataset types */
+					/* fs | vol | snap; or pool */
+	const char *pd_values;		/* string telling acceptable values */
+	const char *pd_colname;		/* column header for "zfs list" */
+	boolean_t pd_rightalign;	/* column alignment for "zfs list" */
+	boolean_t pd_visible;		/* do we list this property with the */
+					/* "zfs get" help message */
+	const zprop_index_t *pd_table;	/* for index properties, a table */
+					/* defining the possible values */
+	size_t pd_table_size;		/* number of entries in pd_table[] */
+} zprop_desc_t;
+
+/*
+ * zfs dataset property functions
+ */
+void zfs_prop_init(void);
+zprop_type_t zfs_prop_get_type(zfs_prop_t);
+boolean_t zfs_prop_delegatable(zfs_prop_t prop);
+zprop_desc_t *zfs_prop_get_table(void);
+
+/*
+ * zpool property functions
+ */
+void zpool_prop_init(void);
+zprop_type_t zpool_prop_get_type(zpool_prop_t);
+zprop_desc_t *zpool_prop_get_table(void);
+
+/*
+ * Common routines to initialize property tables
+ */
+void zprop_register_impl(int, const char *, zprop_type_t, uint64_t,
+    const char *, zprop_attr_t, int, const char *, const char *,
+    boolean_t, boolean_t, const zprop_index_t *);
+void zprop_register_string(int, const char *, const char *,
+    zprop_attr_t attr, int, const char *, const char *);
+void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int,
+    const char *, const char *);
+void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int,
+    const char *, const char *, const zprop_index_t *);
+void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
+    int, const char *);
+
+/*
+ * Common routines for zfs and zpool property management
+ */
+int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t);
+int zprop_name_to_prop(const char *, zfs_type_t);
+int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t);
+int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t);
+uint64_t zprop_random_value(int, uint64_t, zfs_type_t);
+const char *zprop_values(int, zfs_type_t);
+size_t zprop_width(int, boolean_t *, zfs_type_t);
+boolean_t zprop_valid_for_type(int, zfs_type_t);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_PROP_H */
diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c
new file mode 100644
index 000000000000..988d05de6e20
--- /dev/null
+++ b/common/zfs/zpool_prop.c
@@ -0,0 +1,202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zio.h>
+#include <sys/spa.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+
+#include "zfs_prop.h"
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#endif
+
+static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS];
+
+zprop_desc_t *
+zpool_prop_get_table(void)
+{
+	return (zpool_prop_table);
+}
+
+void
+zpool_prop_init(void)
+{
+	static zprop_index_t boolean_table[] = {
+		{ "off",	0},
+		{ "on",		1},
+		{ NULL }
+	};
+
+	static zprop_index_t failuremode_table[] = {
+		{ "wait",	ZIO_FAILURE_MODE_WAIT },
+		{ "continue",	ZIO_FAILURE_MODE_CONTINUE },
+		{ "panic",	ZIO_FAILURE_MODE_PANIC },
+		{ NULL }
+	};
+
+	/* string properties */
+	zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
+	    ZFS_TYPE_POOL, "<path>", "ALTROOT");
+	zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
+	    ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
+	zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
+
+	/* readonly number properties */
+	zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
+	    ZFS_TYPE_POOL, "<size>", "SIZE");
+	zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
+	    ZFS_TYPE_POOL, "<size>", "FREE");
+	zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
+	    PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
+	zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
+	    ZFS_TYPE_POOL, "<size>", "CAP");
+	zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
+	    ZFS_TYPE_POOL, "<guid>", "GUID");
+	zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
+	    ZFS_TYPE_POOL, "<state>", "HEALTH");
+	zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0,
+	    PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
+	    "DEDUP");
+
+	/* default number properties */
+	zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
+	zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
+
+	/* default index (boolean) properties */
+	zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION",
+	    boolean_table);
+	zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
+	zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS",
+	    boolean_table);
+	zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
+	zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0,
+	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table);
+
+	/* default index properties */
+	zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
+	    ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
+	    "wait | continue | panic", "FAILMODE", failuremode_table);
+
+	/* hidden properties */
+	zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
+	    PROP_READONLY, ZFS_TYPE_POOL, "NAME");
+}
+
+/*
+ * Given a property name and its type, returns the corresponding property ID.
+ */
+zpool_prop_t
+zpool_name_to_prop(const char *propname)
+{
+	return (zprop_name_to_prop(propname, ZFS_TYPE_POOL));
+}
+
+/*
+ * Given a pool property ID, returns the corresponding name.
+ * Assuming the pool propety ID is valid.
+ */
+const char *
+zpool_prop_to_name(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_name);
+}
+
+zprop_type_t
+zpool_prop_get_type(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_proptype);
+}
+
+boolean_t
+zpool_prop_readonly(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_attr == PROP_READONLY);
+}
+
+const char *
+zpool_prop_default_string(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_strdefault);
+}
+
+uint64_t
+zpool_prop_default_numeric(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_numdefault);
+}
+
+int
+zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
+    uint64_t *index)
+{
+	return (zprop_string_to_index(prop, string, index, ZFS_TYPE_POOL));
+}
+
+int
+zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index,
+    const char **string)
+{
+	return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL));
+}
+
+uint64_t
+zpool_prop_random_value(zpool_prop_t prop, uint64_t seed)
+{
+	return (zprop_random_value(prop, seed, ZFS_TYPE_POOL));
+}
+
+#ifndef _KERNEL
+
+const char *
+zpool_prop_values(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_values);
+}
+
+const char *
+zpool_prop_column_name(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_colname);
+}
+
+boolean_t
+zpool_prop_align_right(zpool_prop_t prop)
+{
+	return (zpool_prop_table[prop].pd_rightalign);
+}
+#endif
diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c
new file mode 100644
index 000000000000..0bbf20d4f02c
--- /dev/null
+++ b/common/zfs/zprop_common.c
@@ -0,0 +1,426 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Common routines used by zfs and zpool property management.
+ */
+
+#include <sys/zio.h>
+#include <sys/spa.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+#include <sys/fs/zfs.h>
+
+#include "zfs_prop.h"
+#include "zfs_deleg.h"
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#include <util/qsort.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#endif
+
+static zprop_desc_t *
+zprop_get_proptable(zfs_type_t type)
+{
+	if (type == ZFS_TYPE_POOL)
+		return (zpool_prop_get_table());
+	else
+		return (zfs_prop_get_table());
+}
+
+static int
+zprop_get_numprops(zfs_type_t type)
+{
+	if (type == ZFS_TYPE_POOL)
+		return (ZPOOL_NUM_PROPS);
+	else
+		return (ZFS_NUM_PROPS);
+}
+
+void
+zprop_register_impl(int prop, const char *name, zprop_type_t type,
+    uint64_t numdefault, const char *strdefault, zprop_attr_t attr,
+    int objset_types, const char *values, const char *colname,
+    boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl)
+{
+	zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types);
+	zprop_desc_t *pd;
+
+	pd = &prop_tbl[prop];
+
+	ASSERT(pd->pd_name == NULL || pd->pd_name == name);
+	ASSERT(name != NULL);
+	ASSERT(colname != NULL);
+
+	pd->pd_name = name;
+	pd->pd_propnum = prop;
+	pd->pd_proptype = type;
+	pd->pd_numdefault = numdefault;
+	pd->pd_strdefault = strdefault;
+	pd->pd_attr = attr;
+	pd->pd_types = objset_types;
+	pd->pd_values = values;
+	pd->pd_colname = colname;
+	pd->pd_rightalign = rightalign;
+	pd->pd_visible = visible;
+	pd->pd_table = idx_tbl;
+	pd->pd_table_size = 0;
+	while (idx_tbl && (idx_tbl++)->pi_name != NULL)
+		pd->pd_table_size++;
+}
+
+void
+zprop_register_string(int prop, const char *name, const char *def,
+    zprop_attr_t attr, int objset_types, const char *values,
+    const char *colname)
+{
+	zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
+	    objset_types, values, colname, B_FALSE, B_TRUE, NULL);
+
+}
+
+void
+zprop_register_number(int prop, const char *name, uint64_t def,
+    zprop_attr_t attr, int objset_types, const char *values,
+    const char *colname)
+{
+	zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
+	    objset_types, values, colname, B_TRUE, B_TRUE, NULL);
+}
+
+void
+zprop_register_index(int prop, const char *name, uint64_t def,
+    zprop_attr_t attr, int objset_types, const char *values,
+    const char *colname, const zprop_index_t *idx_tbl)
+{
+	zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
+	    objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
+}
+
+void
+zprop_register_hidden(int prop, const char *name, zprop_type_t type,
+    zprop_attr_t attr, int objset_types, const char *colname)
+{
+	zprop_register_impl(prop, name, type, 0, NULL, attr,
+	    objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
+}
+
+
+/*
+ * A comparison function we can use to order indexes into property tables.
+ */
+static int
+zprop_compare(const void *arg1, const void *arg2)
+{
+	const zprop_desc_t *p1 = *((zprop_desc_t **)arg1);
+	const zprop_desc_t *p2 = *((zprop_desc_t **)arg2);
+	boolean_t p1ro, p2ro;
+
+	p1ro = (p1->pd_attr == PROP_READONLY);
+	p2ro = (p2->pd_attr == PROP_READONLY);
+
+	if (p1ro == p2ro)
+		return (strcmp(p1->pd_name, p2->pd_name));
+
+	return (p1ro ? -1 : 1);
+}
+
+/*
+ * Iterate over all properties in the given property table, calling back
+ * into the specified function for each property. We will continue to
+ * iterate until we either reach the end or the callback function returns
+ * something other than ZPROP_CONT.
+ */
+int
+zprop_iter_common(zprop_func func, void *cb, boolean_t show_all,
+    boolean_t ordered, zfs_type_t type)
+{
+	int i, num_props, size, prop;
+	zprop_desc_t *prop_tbl;
+	zprop_desc_t **order;
+
+	prop_tbl = zprop_get_proptable(type);
+	num_props = zprop_get_numprops(type);
+	size = num_props * sizeof (zprop_desc_t *);
+
+#if defined(_KERNEL)
+	order = kmem_alloc(size, KM_SLEEP);
+#else
+	if ((order = malloc(size)) == NULL)
+		return (ZPROP_CONT);
+#endif
+
+	for (int j = 0; j < num_props; j++)
+		order[j] = &prop_tbl[j];
+
+	if (ordered) {
+		qsort((void *)order, num_props, sizeof (zprop_desc_t *),
+		    zprop_compare);
+	}
+
+	prop = ZPROP_CONT;
+	for (i = 0; i < num_props; i++) {
+		if ((order[i]->pd_visible || show_all) &&
+		    (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) {
+			prop = order[i]->pd_propnum;
+			break;
+		}
+	}
+
+#if defined(_KERNEL)
+	kmem_free(order, size);
+#else
+	free(order);
+#endif
+	return (prop);
+}
+
+static boolean_t
+propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
+{
+	const char *propname = prop_entry->pd_name;
+#ifndef _KERNEL
+	const char *colname = prop_entry->pd_colname;
+	int c;
+#endif
+
+	if (len == strlen(propname) &&
+	    strncmp(p, propname, len) == 0)
+		return (B_TRUE);
+
+#ifndef _KERNEL
+	if (colname == NULL || len != strlen(colname))
+		return (B_FALSE);
+
+	for (c = 0; c < len; c++)
+		if (p[c] != tolower(colname[c]))
+			break;
+
+	return (colname[c] == '\0');
+#else
+	return (B_FALSE);
+#endif
+}
+
+typedef struct name_to_prop_cb {
+	const char *propname;
+	zprop_desc_t *prop_tbl;
+} name_to_prop_cb_t;
+
+static int
+zprop_name_to_prop_cb(int prop, void *cb_data)
+{
+	name_to_prop_cb_t *data = cb_data;
+
+	if (propname_match(data->propname, strlen(data->propname),
+	    &data->prop_tbl[prop]))
+		return (prop);
+
+	return (ZPROP_CONT);
+}
+
+int
+zprop_name_to_prop(const char *propname, zfs_type_t type)
+{
+	int prop;
+	name_to_prop_cb_t cb_data;
+
+	cb_data.propname = propname;
+	cb_data.prop_tbl = zprop_get_proptable(type);
+
+	prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data,
+	    B_TRUE, B_FALSE, type);
+
+	return (prop == ZPROP_CONT ? ZPROP_INVAL : prop);
+}
+
+int
+zprop_string_to_index(int prop, const char *string, uint64_t *index,
+    zfs_type_t type)
+{
+	zprop_desc_t *prop_tbl;
+	const zprop_index_t *idx_tbl;
+	int i;
+
+	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
+		return (-1);
+
+	ASSERT(prop < zprop_get_numprops(type));
+	prop_tbl = zprop_get_proptable(type);
+	if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
+		return (-1);
+
+	for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
+		if (strcmp(string, idx_tbl[i].pi_name) == 0) {
+			*index = idx_tbl[i].pi_value;
+			return (0);
+		}
+	}
+
+	return (-1);
+}
+
+int
+zprop_index_to_string(int prop, uint64_t index, const char **string,
+    zfs_type_t type)
+{
+	zprop_desc_t *prop_tbl;
+	const zprop_index_t *idx_tbl;
+	int i;
+
+	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
+		return (-1);
+
+	ASSERT(prop < zprop_get_numprops(type));
+	prop_tbl = zprop_get_proptable(type);
+	if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
+		return (-1);
+
+	for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
+		if (idx_tbl[i].pi_value == index) {
+			*string = idx_tbl[i].pi_name;
+			return (0);
+		}
+	}
+
+	return (-1);
+}
+
+/*
+ * Return a random valid property value.  Used by ztest.
+ */
+uint64_t
+zprop_random_value(int prop, uint64_t seed, zfs_type_t type)
+{
+	zprop_desc_t *prop_tbl;
+	const zprop_index_t *idx_tbl;
+
+	ASSERT((uint_t)prop < zprop_get_numprops(type));
+	prop_tbl = zprop_get_proptable(type);
+	idx_tbl = prop_tbl[prop].pd_table;
+
+	if (idx_tbl == NULL)
+		return (seed);
+
+	return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value);
+}
+
+const char *
+zprop_values(int prop, zfs_type_t type)
+{
+	zprop_desc_t *prop_tbl;
+
+	ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
+	ASSERT(prop < zprop_get_numprops(type));
+
+	prop_tbl = zprop_get_proptable(type);
+
+	return (prop_tbl[prop].pd_values);
+}
+
+/*
+ * Returns TRUE if the property applies to any of the given dataset types.
+ */
+boolean_t
+zprop_valid_for_type(int prop, zfs_type_t type)
+{
+	zprop_desc_t *prop_tbl;
+
+	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
+		return (B_FALSE);
+
+	ASSERT(prop < zprop_get_numprops(type));
+	prop_tbl = zprop_get_proptable(type);
+	return ((prop_tbl[prop].pd_types & type) != 0);
+}
+
+#ifndef _KERNEL
+
+/*
+ * Determines the minimum width for the column, and indicates whether it's fixed
+ * or not.  Only string columns are non-fixed.
+ */
+size_t
+zprop_width(int prop, boolean_t *fixed, zfs_type_t type)
+{
+	zprop_desc_t *prop_tbl, *pd;
+	const zprop_index_t *idx;
+	size_t ret;
+	int i;
+
+	ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
+	ASSERT(prop < zprop_get_numprops(type));
+
+	prop_tbl = zprop_get_proptable(type);
+	pd = &prop_tbl[prop];
+
+	*fixed = B_TRUE;
+
+	/*
+	 * Start with the width of the column name.
+	 */
+	ret = strlen(pd->pd_colname);
+
+	/*
+	 * For fixed-width values, make sure the width is large enough to hold
+	 * any possible value.
+	 */
+	switch (pd->pd_proptype) {
+	case PROP_TYPE_NUMBER:
+		/*
+		 * The maximum length of a human-readable number is 5 characters
+		 * ("20.4M", for example).
+		 */
+		if (ret < 5)
+			ret = 5;
+		/*
+		 * 'creation' is handled specially because it's a number
+		 * internally, but displayed as a date string.
+		 */
+		if (prop == ZFS_PROP_CREATION)
+			*fixed = B_FALSE;
+		break;
+	case PROP_TYPE_INDEX:
+		idx = prop_tbl[prop].pd_table;
+		for (i = 0; idx[i].pi_name != NULL; i++) {
+			if (strlen(idx[i].pi_name) > ret)
+				ret = strlen(idx[i].pi_name);
+		}
+		break;
+
+	case PROP_TYPE_STRING:
+		*fixed = B_FALSE;
+		break;
+	}
+
+	return (ret);
+}
+
+#endif