96 files changed, 10404 insertions, 2168 deletions
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
index 569ee631416c..75fcdc9152cd 100644
--- a/sys/fs/cd9660/cd9660_lookup.c
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -47,8 +47,8 @@
 #include <fs/cd9660/iso_rrip.h>
 
 struct cd9660_ino_alloc_arg {
-	cd_ino_t ino;
-	cd_ino_t i_ino;
+	ino_t ino;
+	ino_t i_ino;
 	struct iso_directory_record *ep;
 };
 
@@ -115,7 +115,7 @@ cd9660_lookup(struct vop_cachedlookup_args *ap)
 	struct cd9660_ino_alloc_arg dd_arg;
 	u_long bmask;			/* block offset mask */
 	int error;
-	cd_ino_t ino, i_ino;
+	ino_t ino, i_ino;
 	int ltype, reclen;
 	u_short namelen;
 	int isoflags;
@@ -125,7 +125,7 @@ cd9660_lookup(struct vop_cachedlookup_args *ap)
 	char *name;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
-	int flags = cnp->cn_flags;
+	uint64_t flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 
 	ep2 = ep = NULL;
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
index 67270b40f2b0..ce6ec3aa7a1c 100644
--- a/sys/fs/cd9660/cd9660_node.c
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -281,10 +281,10 @@ cd9660_tstamp_conv17(u_char *pi, struct timespec *pu)
 	return cd9660_tstamp_conv7(buf, pu, ISO_FTYPE_DEFAULT);
 }
 
-cd_ino_t
+ino_t
 isodirino(struct iso_directory_record *isodir, struct iso_mnt *imp)
 {
-	cd_ino_t ino;
+	ino_t ino;
 
 	/*
 	 * Note there is an inverse calculation in
@@ -293,7 +293,7 @@ isodirino(struct iso_directory_record *isodir, struct iso_mnt *imp)
 	 * and also a calculation of the isodir pointer
 	 * from an inode in cd9660_vnops.c:cd9660_readlink()
 	 */
-	ino = ((cd_ino_t)isonum_733(isodir->extent) +
+	ino = ((ino_t)isonum_733(isodir->extent) +
 		isonum_711(isodir->ext_attr_length)) << imp->im_bshift;
 	return ino;
 }
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
index 9dc84dd57c0e..6021c1681c5d 100644
--- a/sys/fs/cd9660/cd9660_node.h
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -56,7 +56,7 @@ typedef	struct	{
 
 struct iso_node {
 	struct	vnode *i_vnode;	/* vnode associated with this inode */
-	cd_ino_t	i_number;	/* the identity of the inode */
+	ino_t	i_number;	/* the identity of the inode */
 				/* we use the actual starting block of the file */
 	struct	iso_mnt *i_mnt;	/* filesystem associated with this inode */
 	struct	lockf *i_lockf;	/* head of byte-level lock list */
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
index 26825062d25a..d0b0008d10b2 100644
--- a/sys/fs/cd9660/cd9660_rrip.c
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -593,7 +593,7 @@ static RRIP_TABLE rrip_table_getname[] = {
 
 int
 cd9660_rrip_getname(struct iso_directory_record *isodir, char *outbuf,
-    u_short *outlen, cd_ino_t *inump, struct iso_mnt *imp)
+    u_short *outlen, ino_t *inump, struct iso_mnt *imp)
 {
 	ISO_RRIP_ANALYZE analyze;
 	RRIP_TABLE *tab;
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
index f067453d3458..b4db4c4f7331 100644
--- a/sys/fs/cd9660/cd9660_vfsops.c
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -394,7 +394,7 @@ iso_mountfs(struct vnode *devvp, struct mount *mp)
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
-	isomp->im_fmask = isomp->im_dmask = ACCESSPERMS;
+	isomp->im_fmask = isomp->im_dmask = ALLPERMS;
 
 	vfs_flagopt(mp->mnt_optnew, "norrip", &isomp->im_flags, ISOFSMNT_NORRIP);
 	vfs_flagopt(mp->mnt_optnew, "gens", &isomp->im_flags, ISOFSMNT_GENS);
@@ -560,7 +560,7 @@ cd9660_root(struct mount *mp, int flags, struct vnode **vpp)
 	struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct iso_directory_record *dp =
 	    (struct iso_directory_record *)imp->root;
-	cd_ino_t ino = isodirino(dp, imp);
+	ino_t ino = isodirino(dp, imp);
 
 	/*
 	 * With RRIP we must use the `.' entry of the root directory.
@@ -660,15 +660,15 @@ static int
 cd9660_vfs_hash_cmp(struct vnode *vp, void *pino)
 {
 	struct iso_node *ip;
-	cd_ino_t ino;
+	ino_t ino;
 
 	ip = VTOI(vp);
-	ino = *(cd_ino_t *)pino;
+	ino = *(ino_t *)pino;
 	return (ip->i_number != ino);
 }
 
 int
-cd9660_vget_internal(struct mount *mp, cd_ino_t ino, int flags,
+cd9660_vget_internal(struct mount *mp, ino_t ino, int flags,
     struct vnode **vpp, int relocated, struct iso_directory_record *isodir)
 {
 	struct iso_mnt *imp;
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
index 33ca58472490..c4d0e6ba7b30 100644
--- a/sys/fs/cd9660/cd9660_vnops.c
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -443,7 +443,7 @@ cd9660_readdir(struct vop_readdir_args *ap)
 	u_short namelen;
 	u_int ncookies = 0;
 	uint64_t *cookies = NULL;
-	cd_ino_t ino;
+	ino_t ino;
 
 	dp = VTOI(vdp);
 	imp = dp->i_mnt;
@@ -758,6 +758,9 @@ cd9660_pathconf(struct vop_pathconf_args *ap)
 	/* NOTREACHED */
 }
 
+_Static_assert(sizeof(struct ifid) <= sizeof(struct fid),
+    "struct ifid must be no larger than struct fid");
+
 /*
  * Vnode pointer to File handle
  */
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
index a9733f62c077..40047cc92de6 100644
--- a/sys/fs/cd9660/iso.h
+++ b/sys/fs/cd9660/iso.h
@@ -212,21 +212,12 @@ struct iso_extended_attributes {
 	u_char len_au			[ISODCL (247, 250)]; /* 723 */
 };
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_WANT_ISO_MNT)
 
 /* CD-ROM Format type */
 enum ISO_FTYPE	{ ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP,
 		  ISO_FTYPE_JOLIET, ISO_FTYPE_ECMA, ISO_FTYPE_HIGH_SIERRA };
 
-#ifndef	ISOFSMNT_ROOT
-#define	ISOFSMNT_ROOT	0
-#endif
-
-/*
- * When ino_t becomes 64-bit, we can remove this definition in favor of ino_t.
- */
-typedef __uint64_t cd_ino_t;
-
 struct iso_mnt {
 	uint64_t im_flags;
 
@@ -262,12 +253,16 @@ struct iso_mnt {
 	void *im_l2d;
 };
 
+#endif /* defined(_KERNEL) || defined(_WANT_ISO_MNT) */
+
+#ifdef _KERNEL
+
 struct ifid {
 	u_short		ifid_len;
 	u_short		ifid_pad;
-	cd_ino_t	ifid_ino;
+	ino_t		ifid_ino;
 	long		ifid_start;
-};
+} __packed;
 
 #define VFSTOISOFS(mp)	((struct iso_mnt *)((mp)->mnt_data))
 
@@ -276,7 +271,7 @@ struct ifid {
 #define lblkno(imp, loc)	((loc) >> (imp)->im_bshift)
 #define blksize(imp, ip, lbn)	((imp)->logical_block_size)
 
-int cd9660_vget_internal(struct mount *, cd_ino_t, int, struct vnode **, int,
+int cd9660_vget_internal(struct mount *, ino_t	, int, struct vnode **, int,
 			 struct iso_directory_record *);
 #define cd9660_sysctl ((int (*)(int *, u_int, void *, size_t *, void *, \
 				size_t, struct proc *))eopnotsupp)
@@ -287,7 +282,7 @@ extern struct vop_vector cd9660_fifoops;
 int isochar(u_char *, u_char *, int, u_short *, int *, int, void *);
 int isofncmp(u_char *, int, u_char *, int, int, int, void *, void *);
 void isofntrans(u_char *, int, u_char *, u_short *, int, int, int, int, void *);
-cd_ino_t isodirino(struct iso_directory_record *, struct iso_mnt *);
+ino_t isodirino(struct iso_directory_record *, struct iso_mnt *);
 u_short sgetrune(const char *, size_t, char const **, int, void *);
 
 #endif /* _KERNEL */
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
index bea0811eccf4..5a75beb08d93 100644
--- a/sys/fs/cd9660/iso_rrip.h
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -63,7 +63,7 @@ typedef struct {
 	off_t		iso_ce_off;	/* offset of continuation area */
 	int		iso_ce_len;	/* length of continuation area */
 	struct iso_mnt	*imp;		/* mount structure */
-	cd_ino_t	*inump;		/* inode number pointer */
+	ino_t		*inump;		/* inode number pointer */
 	char		*outbuf;	/* name/symbolic link output area */
 	u_short		*outlen;	/* length of above */
 	u_short		maxlen;		/* maximum length of above */
@@ -76,7 +76,7 @@ int cd9660_rrip_analyze(struct iso_directory_record *isodir,
 			    struct iso_node *inop, struct iso_mnt *imp);
 int cd9660_rrip_getname(struct iso_directory_record *isodir,
 			    char *outbuf, u_short *outlen,
-			    cd_ino_t *inump, struct iso_mnt *imp);
+			    ino_t *inump, struct iso_mnt *imp);
 int cd9660_rrip_getsymname(struct iso_directory_record *isodir,
 			       char *outbuf, u_short *outlen,
 			       struct iso_mnt *imp);
diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c
index 9ef234c35427..d63a7d4691cf 100644
--- a/sys/fs/cuse/cuse.c
+++ b/sys/fs/cuse/cuse.c
@@ -191,13 +191,13 @@ static void cuse_client_kqfilter_write_detach(struct knote *kn);
 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
 
-static struct filterops cuse_client_kqfilter_read_ops = {
+static const struct filterops cuse_client_kqfilter_read_ops = {
 	.f_isfd = 1,
 	.f_detach = cuse_client_kqfilter_read_detach,
 	.f_event = cuse_client_kqfilter_read_event,
 };
 
-static struct filterops cuse_client_kqfilter_write_ops = {
+static const struct filterops cuse_client_kqfilter_write_ops = {
 	.f_isfd = 1,
 	.f_detach = cuse_client_kqfilter_write_detach,
 	.f_event = cuse_client_kqfilter_write_event,
@@ -332,7 +332,7 @@ cuse_kern_uninit(void *arg)
 
 	mtx_destroy(&cuse_global_mtx);
 }
-SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
+SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, NULL);
 
 static int
 cuse_server_get(struct cuse_server **ppcs)
diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c
index db879efe803a..124f9f0449af 100644
--- a/sys/fs/devfs/devfs_devs.c
+++ b/sys/fs/devfs/devfs_devs.c
@@ -86,6 +86,9 @@ sysctl_devname(SYSCTL_HANDLER_ARGS)
 	struct cdev_priv *cdp;
 	struct cdev *dev;
 
+	if (req->newptr == NULL)
+		return (EINVAL);
+
 #ifdef COMPAT_FREEBSD11
 	if (req->newlen == sizeof(ud_compat)) {
 		error = SYSCTL_IN(req, &ud_compat, sizeof(ud_compat));
@@ -118,11 +121,8 @@ SYSCTL_PROC(_kern, OID_AUTO, devname,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_devname, "", "devname(3) handler");
 
-SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD,
-    SYSCTL_NULL_INT_PTR, sizeof(struct cdev), "sizeof(struct cdev)");
-
-SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD,
-    SYSCTL_NULL_INT_PTR, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)");
+SYSCTL_SIZEOF_STRUCT(cdev);
+SYSCTL_SIZEOF_STRUCT(cdev_priv);
 
 struct cdev *
 devfs_alloc(int flags)
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index a35f6dbf9520..1d744e6593c0 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -66,7 +66,7 @@
 
 static struct vop_vector devfs_vnodeops;
 static struct vop_vector devfs_specops;
-static struct fileops devfs_ops_f;
+static const struct fileops devfs_ops_f;
 
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
@@ -555,8 +555,7 @@ loop:
 		if (devfs_allocv_drop_refs(0, dmp, de)) {
 			vput(vp);
 			return (ENOENT);
-		}
-		else if (VN_IS_DOOMED(vp)) {
+		} else if (VN_IS_DOOMED(vp)) {
 			mtx_lock(&devfs_de_interlock);
 			if (de->de_vnode == vp) {
 				de->de_vnode = NULL;
@@ -1516,6 +1515,8 @@ devfs_readdir(struct vop_readdir_args *ap)
 	 */
 	if (tmp_ncookies != NULL)
 		ap->a_ncookies = tmp_ncookies;
+	if (dd == NULL && error == 0 && ap->a_eofflag != NULL)
+		*ap->a_eofflag = 1;
 
 	return (error);
 }
@@ -2038,7 +2039,7 @@ devfs_cmp_f(struct file *fp1, struct file *fp2, struct thread *td)
 	return (kcmp_cmp((uintptr_t)fp1->f_data, (uintptr_t)fp2->f_data));
 }
 
-static struct fileops devfs_ops_f = {
+static const struct fileops devfs_ops_f = {
 	.fo_read =	devfs_read_f,
 	.fo_write =	devfs_write_f,
 	.fo_truncate =	devfs_truncate_f,
diff --git a/sys/fs/ext2fs/ext2_extents.c b/sys/fs/ext2fs/ext2_extents.c
index 3ae1da4fe6b7..146aa48f6743 100644
--- a/sys/fs/ext2fs/ext2_extents.c
+++ b/sys/fs/ext2fs/ext2_extents.c
@@ -711,7 +711,7 @@ ext4_ext_tree_init(struct inode *ip)
 
 	ip->i_flag |= IN_E4EXTENTS;
 
-	memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR);
+	memset(ip->i_data, 0, sizeof(ip->i_data));
 	ehp = (struct ext4_extent_header *)ip->i_data;
 	ehp->eh_magic = htole16(EXT4_EXT_MAGIC);
 	ehp->eh_max = htole16(ext4_ext_space_root(ip));
diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c
index bffbf4546f37..9e7a03fffd71 100644
--- a/sys/fs/ext2fs/ext2_vfsops.c
+++ b/sys/fs/ext2fs/ext2_vfsops.c
@@ -1345,7 +1345,7 @@ ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 		return (ESTALE);
 	}
 	*vpp = nvp;
-	vnode_create_vobject(*vpp, 0, curthread);
+	vnode_create_vobject(*vpp, ip->i_size, curthread);
 	return (0);
 }
 
diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c
index dfbb11f75421..064c10bd18b2 100644
--- a/sys/fs/ext2fs/ext2_vnops.c
+++ b/sys/fs/ext2fs/ext2_vnops.c
@@ -1889,6 +1889,8 @@ ext2_vptofh(struct vop_vptofh_args *ap)
 {
 	struct inode *ip;
 	struct ufid *ufhp;
+	_Static_assert(sizeof(struct ufid) <= sizeof(struct fid),
+	    "struct ufid cannot be larger than struct fid");
 
 	ip = VTOI(ap->a_vp);
 	ufhp = (struct ufid *)ap->a_fhp;
diff --git a/sys/fs/ext2fs/inode.h b/sys/fs/ext2fs/inode.h
index 9ee1b5672da6..c45339bfde40 100644
--- a/sys/fs/ext2fs/inode.h
+++ b/sys/fs/ext2fs/inode.h
@@ -187,10 +187,10 @@ struct indir {
 
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
-	uint16_t ufid_len;		/* Length of structure. */
-	uint16_t ufid_pad;		/* Force 32-bit alignment. */
-	ino_t	ufid_ino;		/* File number (ino). */
-	uint32_t ufid_gen;		/* Generation number. */
+	uint16_t	ufid_len;	/* Length of structure. */
+	uint16_t	ufid_pad;	/* Force 32-bit alignment. */
+	uint32_t	ufid_gen;	/* Generation number. */
+	ino_t		ufid_ino;	/* File number (ino). */
 };
 #endif	/* _KERNEL */
 
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 9ec80794e795..58a22b8bdc50 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -502,7 +502,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
 		    cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
 	} else {
 		error = getvnode_path(td, fd,
-		    cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+		    cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
 	}
 	if (error) {
 		/*
@@ -547,6 +547,8 @@ fdesc_readdir(struct vop_readdir_args *ap)
 	fmp = VFSTOFDESC(ap->a_vp->v_mount);
 	if (ap->a_ncookies != NULL)
 		*ap->a_ncookies = 0;
+	if (ap->a_eofflag != NULL)
+		*ap->a_eofflag = 0;
 
 	off = (int)uio->uio_offset;
 	if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
@@ -559,7 +561,12 @@ fdesc_readdir(struct vop_readdir_args *ap)
 	fcnt = i - 2;		/* The first two nodes are `.' and `..' */
 
 	FILEDESC_SLOCK(fdp);
-	while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
+	while (uio->uio_resid >= UIO_MX) {
+		if (i >= fdp->fd_nfiles + 2) {
+			if (ap->a_eofflag != NULL)
+				*ap->a_eofflag = 1;
+			break;
+		}
 		bzero((caddr_t)dp, UIO_MX);
 		switch (i) {
 		case 0:	/* `.' */
@@ -639,7 +646,7 @@ fdesc_readlink(struct vop_readlink_args *va)
 	VOP_UNLOCK(vn);
 
 	td = curthread;
-	error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL);
+	error = fget_cap(td, fd_fd, &cap_no_rights, NULL, &fp, NULL);
 	if (error != 0)
 		goto out;
 
diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c
index 892793993ecc..57b3559731f7 100644
--- a/sys/fs/fuse/fuse_device.c
+++ b/sys/fs/fuse/fuse_device.c
@@ -82,6 +82,8 @@
 #include <sys/sysctl.h>
 #include <sys/poll.h>
 #include <sys/selinfo.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
 
 #include "fuse.h"
 #include "fuse_internal.h"
@@ -120,13 +122,13 @@ static int fuse_device_filt_read(struct knote *kn, long hint);
 static int fuse_device_filt_write(struct knote *kn, long hint);
 static void fuse_device_filt_detach(struct knote *kn);
 
-struct filterops fuse_device_rfiltops = {
+static const struct filterops fuse_device_rfiltops = {
 	.f_isfd = 1,
 	.f_detach = fuse_device_filt_detach,
 	.f_event = fuse_device_filt_read,
 };
 
-struct filterops fuse_device_wfiltops = {
+static const struct filterops fuse_device_wfiltops = {
 	.f_isfd = 1,
 	.f_event = fuse_device_filt_write,
 };
@@ -152,7 +154,7 @@ fdata_dtor(void *arg)
 	FUSE_LOCK();
 	fuse_lck_mtx_lock(fdata->aw_mtx);
 	/* wakup poll()ers */
-	selwakeuppri(&fdata->ks_rsel, PZERO + 1);
+	selwakeuppri(&fdata->ks_rsel, PZERO);
 	/* Don't let syscall handlers wait in vain */
 	while ((tick = fuse_aw_pop(fdata))) {
 		fuse_lck_mtx_lock(tick->tk_aw_mtx);
@@ -193,7 +195,7 @@ fuse_device_filter(struct cdev *dev, struct knote *kn)
 		kn->kn_fop = &fuse_device_wfiltops;
 		error = 0;
 	} else if (error == 0) {
-		error = EINVAL;
+		error = EXTERROR(EINVAL, "Unsupported kevent filter");
 		kn->kn_data = error;
 	}
 
@@ -319,7 +321,7 @@ again:
 			"we know early on that reader should be kicked so we "
 			"don't wait for news");
 		fuse_lck_mtx_unlock(data->ms_mtx);
-		return (ENODEV);
+		return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
 	}
 	if (!(tick = fuse_ms_pop(data))) {
 		/* check if we may block */
@@ -331,7 +333,10 @@ again:
 			err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0);
 			if (err != 0) {
 				fuse_lck_mtx_unlock(data->ms_mtx);
-				return (fdata_get_dead(data) ? ENODEV : err);
+				if (fdata_get_dead(data))
+					err = EXTERROR(ENODEV,
+						"This FUSE session is about to be closed");
+				return (err);
 			}
 			tick = fuse_ms_pop(data);
 		}
@@ -361,8 +366,8 @@ again:
 			FUSE_ASSERT_MS_DONE(tick);
 			fuse_ticket_drop(tick);
 		}
-		return (ENODEV);	/* This should make the daemon get off
-					 * of us */
+		/* This should make the daemon get off of us */
+		return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
 	}
 	SDT_PROBE2(fusefs, , device, trace, 1,
 		"fuse device read message successfully");
@@ -385,7 +390,7 @@ again:
 		fdata_set_dead(data);
 		SDT_PROBE2(fusefs, , device, trace, 2,
 		    "daemon is stupid, kick it off...");
-		err = ENODEV;
+		err = EXTERROR(ENODEV, "Partial read attempted");
 	} else {
 		err = uiomove(buf, buflen, uio);
 	}
@@ -403,12 +408,14 @@ fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
 		SDT_PROBE2(fusefs, , device, trace, 1,
 			"Format error: body size "
 			"differs from size claimed by header");
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Format error: body size "
+		    "differs from size claimed by header"));
 	}
 	if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
 		SDT_PROBE2(fusefs, , device, trace, 1, 
 			"Format error: non zero error but message had a body");
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Format error: non zero error, "
+		    "but message had a body"));
 	}
 
 	return (0);
@@ -439,13 +446,12 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 	err = devfs_get_cdevpriv((void **)&data);
 	if (err != 0)
 		return (err);
-	mp = data->mp;
 
 	if (uio->uio_resid < sizeof(struct fuse_out_header)) {
 		SDT_PROBE2(fusefs, , device, trace, 1,
 			"fuse_device_write got less than a header!");
 		fdata_set_dead(data);
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "fuse_device_write got less than a header!"));
 	}
 	if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0)
 		return (err);
@@ -453,7 +459,7 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 	if (data->linux_errnos != 0 && ohead.error != 0) {
 		err = -ohead.error;
 		if (err < 0 || err >= nitems(linux_to_bsd_errtbl))
-			return (EINVAL);
+			return (EXTERROR(EINVAL, "Unknown Linux errno", err));
 
 		/* '-', because it will get flipped again below */
 		ohead.error = -linux_to_bsd_errtbl[err];
@@ -521,7 +527,7 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 				memcpy(&tick->tk_aw_ohead, &ohead,
 					sizeof(ohead));
 				tick->tk_aw_handler(tick, uio);
-				err = EINVAL;
+				err = EXTERROR(EINVAL, "Unknown errno", ohead.error);
 			} else {
 				memcpy(&tick->tk_aw_ohead, &ohead,
 					sizeof(ohead));
@@ -542,6 +548,13 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 	} else if (ohead.unique == 0){
 		/* unique == 0 means asynchronous notification */
 		SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
+		mp = data->mp;
+		vfs_ref(mp);
+		err = vfs_busy(mp, 0);
+		vfs_rel(mp);
+		if (err)
+			return (err);
+
 		switch (ohead.error) {
 		case FUSE_NOTIFY_INVAL_ENTRY:
 			err = fuse_internal_invalidate_entry(mp, uio);
@@ -564,8 +577,10 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 			/* Unimplemented.  See comments in fuse_vnops */
 		default:
 			/* Not implemented */
-			err = ENOSYS;
+			err = EXTERROR(ENOSYS, "Unimplemented FUSE notification code",
+				ohead.error);
 		}
+		vfs_unbusy(mp);
 	} else {
 		/* no callback at all! */
 		SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket, 
@@ -582,7 +597,7 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
 			 */
 			err = 0;
 		} else {
-			err = EINVAL;
+			err = EXTERROR(EINVAL, "FUSE ticket is missing");
 		}
 	}
 
diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
index 88de12d59425..5f5819c2ccae 100644
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -122,7 +122,6 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
     struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred)
 {
 	struct mount *mp = vnode_mount(vp);
-	struct fuse_data *data = fuse_get_mpdata(mp);
 	struct fuse_dispatcher fdi;
 	const struct fuse_open_out default_foo = {
 		.fh = 0,
@@ -132,12 +131,10 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
 	struct fuse_open_in *foi = NULL;
 	const struct fuse_open_out *foo;
 	fufh_type_t fufh_type;
-	int dataflags = data->dataflags;
 	int err = 0;
 	int oflags = 0;
 	int op = FUSE_OPEN;
 	int relop = FUSE_RELEASE;
-	int fsess_no_op_support = FSESS_NO_OPEN_SUPPORT;
 
 	fufh_type = fflags_2_fufh_type(a_mode);
 	oflags = fufh_type_2_fflags(fufh_type);
@@ -145,12 +142,11 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
 	if (vnode_isdir(vp)) {
 		op = FUSE_OPENDIR;
 		relop = FUSE_RELEASEDIR;
-		fsess_no_op_support = FSESS_NO_OPENDIR_SUPPORT;
 		/* vn_open_vnode already rejects FWRITE on directories */
 		MPASS(fufh_type == FUFH_RDONLY || fufh_type == FUFH_EXEC);
 	}
 	fdisp_init(&fdi, sizeof(*foi));
-	if (fsess_not_impl(mp, op) && dataflags & fsess_no_op_support) {
+	if (fsess_not_impl(mp, op)) {
 		/* The operation implicitly succeeds */
 		foo = &default_foo;
 	} else {
@@ -160,7 +156,7 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
 		foi->flags = oflags;
 
 		err = fdisp_wait_answ(&fdi);
-		if (err == ENOSYS && dataflags & fsess_no_op_support) {
+		if (err == ENOSYS) {
 			/* The operation implicitly succeeds */
 			foo = &default_foo;
 			fsess_set_notimpl(mp, op);
@@ -174,6 +170,7 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
 			goto out;
 		} else {
 			foo = fdi.answ;
+			fsess_set_impl(mp, op);
 		}
 	}
 
diff --git a/sys/fs/fuse/fuse_file.h b/sys/fs/fuse/fuse_file.h
index 2a90e66d1b23..232132473953 100644
--- a/sys/fs/fuse/fuse_file.h
+++ b/sys/fs/fuse/fuse_file.h
@@ -139,7 +139,7 @@ struct fuse_filehandle {
 
 	/*
 	 * flags returned by FUSE_OPEN
-	 * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE
+	 * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE, FOPEN_NOFLUSH
 	 * Unsupported:
 	 *     FOPEN_NONSEEKABLE: Adding support would require a new per-file
 	 *     or per-vnode attribute, which would have to be checked by
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index 29d88fc942f4..61fe2ed032f6 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -282,12 +282,12 @@ fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
 			 * dirty writes!  That's a server bug.
 			 */
 			if (fuse_libabi_geq(data, 7, 23)) {
-				msg = "writeback cache incoherent!."
+				msg = "writeback cache incoherent!  "
 				    "To prevent data corruption, disable "
 				    "the writeback cache according to your "
 				    "FUSE server's documentation.";
 			} else {
-				msg = "writeback cache incoherent!."
+				msg = "writeback cache incoherent!  "
 				    "To prevent data corruption, disable "
 				    "the writeback cache by setting "
 				    "vfs.fusefs.data_cache_mode to 0 or 1.";
@@ -979,6 +979,9 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
 	struct fuse_data *data = tick->tk_data;
 	struct fuse_init_out *fiio = NULL;
 
+	if (fdata_get_dead(data))
+		goto out;
+
 	if ((err = tick->tk_aw_ohead.error)) {
 		goto out;
 	}
@@ -1010,10 +1013,6 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
 				data->dataflags |= FSESS_POSIX_LOCKS;
 			if (fiio->flags & FUSE_EXPORT_SUPPORT)
 				data->dataflags |= FSESS_EXPORT_SUPPORT;
-			if (fiio->flags & FUSE_NO_OPEN_SUPPORT)
-				data->dataflags |= FSESS_NO_OPEN_SUPPORT;
-			if (fiio->flags & FUSE_NO_OPENDIR_SUPPORT)
-				data->dataflags |= FSESS_NO_OPENDIR_SUPPORT;
 			/* 
 			 * Don't bother to check FUSE_BIG_WRITES, because it's
 			 * redundant with max_write
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 00b348814642..0760d7641c7d 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -932,7 +932,7 @@ fuse_io_invalbuf(struct vnode *vp, struct thread *td)
 		if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
 			return EIO;
 		fvdat->flag |= FN_FLUSHWANT;
-		tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
+		tsleep(&fvdat->flag, PRIBIO, "fusevinv", 2 * hz);
 		error = 0;
 		if (p != NULL) {
 			PROC_LOCK(p);
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
index f1f9f801bf4d..0b6048644d32 100644
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -443,11 +443,6 @@ retry:
 	if (err == EWOULDBLOCK) {
 		SDT_PROBE2(fusefs, , ipc, trace, 3,
 			"fticket_wait_answer: EWOULDBLOCK");
-#ifdef XXXIP				/* die conditionally */
-		if (!fdata_get_dead(data)) {
-			fdata_set_dead(data);
-		}
-#endif
 		err = ETIMEDOUT;
 		fticket_set_answered(ftick);
 	} else if ((err == EINTR || err == ERESTART)) {
@@ -593,7 +588,7 @@ fdata_set_dead(struct fuse_data *data)
 	fuse_lck_mtx_lock(data->ms_mtx);
 	data->dataflags |= FSESS_DEAD;
 	wakeup_one(data);
-	selwakeuppri(&data->ks_rsel, PZERO + 1);
+	selwakeuppri(&data->ks_rsel, PZERO);
 	wakeup(&data->ticketer);
 	fuse_lck_mtx_unlock(data->ms_mtx);
 	FUSE_UNLOCK();
@@ -669,7 +664,7 @@ fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
 	else
 		fuse_ms_push(ftick);
 	wakeup_one(ftick->tk_data);
-	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
+	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO);
 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
 }
diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h
index 0ec556138be0..3bfc859dbac9 100644
--- a/sys/fs/fuse/fuse_ipc.h
+++ b/sys/fs/fuse/fuse_ipc.h
@@ -227,8 +227,6 @@ struct fuse_data {
                                          /* (and being observed by the daemon) */
 #define FSESS_PUSH_SYMLINKS_IN    0x0020 /* prefix absolute symlinks with mp */
 #define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */
-#define FSESS_NO_OPEN_SUPPORT     0x0080 /* can elide FUSE_OPEN ops */
-#define FSESS_NO_OPENDIR_SUPPORT  0x0100 /* can elide FUSE_OPENDIR ops */
 #define FSESS_ASYNC_READ          0x1000 /* allow multiple reads of some file */
 #define FSESS_POSIX_LOCKS         0x2000 /* daemon supports POSIX locks */
 #define FSESS_EXPORT_SUPPORT      0x10000 /* daemon supports NFS-style lookups */
@@ -240,6 +238,8 @@ struct fuse_data {
 #define FSESS_WARN_WB_CACHE_INCOHERENT 0x400000	/* WB cache incoherent */
 #define	FSESS_WARN_ILLEGAL_INODE  0x800000 /* Illegal inode for new file */
 #define FSESS_WARN_READLINK_EMBEDDED_NUL 0x1000000 /* corrupt READLINK output */
+#define FSESS_WARN_DOT_LOOKUP	  0x2000000 /* Inconsistent . LOOKUP response */
+#define FSESS_WARN_INODE_MISMATCH 0x4000000 /* ino != nodeid */
 #define FSESS_MNTOPTS_MASK	( \
 	FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \
 	FSESS_DEFAULT_PERMISSIONS | FSESS_INTR)
diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h
index ad93a26adaab..942448b47365 100644
--- a/sys/fs/fuse/fuse_kernel.h
+++ b/sys/fs/fuse/fuse_kernel.h
@@ -161,6 +161,33 @@
  *  - add FOPEN_CACHE_DIR
  *  - add FUSE_MAX_PAGES, add max_pages to init_out
  *  - add FUSE_CACHE_SYMLINKS
+ *
+ *  7.29
+ *  - add FUSE_NO_OPENDIR_SUPPORT flag
+ *
+ *  7.30
+ *  - add FUSE_EXPLICIT_INVAL_DATA
+ *  - add FUSE_IOCTL_COMPAT_X32
+ *
+ *  7.31
+ *  - add FUSE_WRITE_KILL_PRIV flag
+ *  - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ *  - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
+ *
+ *  7.32
+ *  - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
+ *
+ *  7.33
+ *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
+ *  - add FUSE_OPEN_KILL_SUIDGID
+ *  - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
+ *  - add FUSE_SETXATTR_ACL_KILL_SGID
+ *
+ *  7.34
+ *  - add FUSE_SYNCFS
+ *
+ *  7.35
+ *  - add FOPEN_NOFLUSH
  */
 
 #ifndef _FUSE_FUSE_KERNEL_H
@@ -196,7 +223,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 29
+#define FUSE_KERNEL_MINOR_VERSION 35
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -220,7 +247,7 @@ struct fuse_attr {
 	uint32_t	gid;
 	uint32_t	rdev;
 	uint32_t	blksize;
-	uint32_t	padding;
+	uint32_t	flags;
 };
 
 struct fuse_kstatfs {
@@ -257,6 +284,7 @@ struct fuse_file_lock {
 #define FATTR_MTIME_NOW	(1 << 8)
 #define FATTR_LOCKOWNER	(1 << 9)
 #define FATTR_CTIME	(1 << 10)
+#define FATTR_KILL_SUIDGID (1 << 11)
 
 /**
  * Flags returned by the OPEN request
@@ -265,11 +293,15 @@ struct fuse_file_lock {
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
  * FOPEN_NONSEEKABLE: the file is not seekable
  * FOPEN_CACHE_DIR: allow caching this directory
+ * FOPEN_STREAM: the file is stream-like (no file position at all)
+ * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
 #define FOPEN_NONSEEKABLE	(1 << 2)
 #define FOPEN_CACHE_DIR		(1 << 3)
+#define FOPEN_STREAM		(1 << 4)
+#define FOPEN_NOFLUSH		(1 << 5)
 
 /**
  * INIT request/reply flags
@@ -299,6 +331,17 @@ struct fuse_file_lock {
  * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
  * FUSE_CACHE_SYMLINKS: cache READLINK responses
  * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
+ *		       foffset and moffset fields in struct
+ *		       fuse_setupmapping_out and fuse_removemapping_one.
+ * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
+ * FUSE_HANDLE_KILLPRIV_V2: fs kills suid/sgid/cap on write/chown/trunc.
+ *			Upon write/truncate suid/sgid is only killed if caller
+ *			does not have CAP_FSETID. Additionally upon
+ *			write/truncate sgid is killed only if file has group
+ *			execute permission. (Same as Linux VFS behavior).
+ * FUSE_SETXATTR_EXT:	Server supports extended struct fuse_setxattr_in
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -325,6 +368,11 @@ struct fuse_file_lock {
 #define FUSE_MAX_PAGES		(1 << 22)
 #define FUSE_CACHE_SYMLINKS	(1 << 23)
 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT	(1 << 26)
+#define FUSE_SUBMOUNTS		(1 << 27)
+#define FUSE_HANDLE_KILLPRIV_V2	(1 << 28)
+#define FUSE_SETXATTR_EXT	(1 << 29)
 
 #ifdef linux
 /**
@@ -356,9 +404,14 @@ struct fuse_file_lock {
  *
  * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
  * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits
  */
 #define FUSE_WRITE_CACHE	(1 << 0)
 #define FUSE_WRITE_LOCKOWNER	(1 << 1)
+#define FUSE_WRITE_KILL_SUIDGID (1 << 2)
+
+/* Obsolete alias; this flag implies killing suid/sgid only. */
+#define FUSE_WRITE_KILL_PRIV	FUSE_WRITE_KILL_SUIDGID
 
 /**
  * Read flags
@@ -373,6 +426,7 @@ struct fuse_file_lock {
  * FUSE_IOCTL_RETRY: retry with new iovecs
  * FUSE_IOCTL_32BIT: 32bit ioctl
  * FUSE_IOCTL_DIR: is a directory
+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t)
  *
  * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
  */
@@ -381,6 +435,7 @@ struct fuse_file_lock {
 #define FUSE_IOCTL_RETRY	(1 << 2)
 #define FUSE_IOCTL_32BIT	(1 << 3)
 #define FUSE_IOCTL_DIR		(1 << 4)
+#define FUSE_IOCTL_COMPAT_X32	(1 << 5)
 
 #define FUSE_IOCTL_MAX_IOV	256
 
@@ -404,6 +459,25 @@ struct fuse_file_lock {
 #define FUSE_FALLOC_FL_KEEP_SIZE	0x1
 #define FUSE_FALLOC_FL_PUNCH_HOLE	0x2
 
+/**
+ * fuse_attr flags
+ *
+ * FUSE_ATTR_SUBMOUNT: Object is a submount root
+ */
+#define FUSE_ATTR_SUBMOUNT      (1 << 0)
+
+/**
+ * Open flags
+ * FUSE_OPEN_KILL_SUIDGID: Kill suid and sgid if executable
+ */
+#define FUSE_OPEN_KILL_SUIDGID	(1 << 0)
+
+/**
+ * setxattr flags
+ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set
+ */
+#define FUSE_SETXATTR_ACL_KILL_SGID	(1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
@@ -450,10 +524,16 @@ enum fuse_opcode {
 	FUSE_RENAME2		= 45,
 	FUSE_LSEEK		= 46,
 	FUSE_COPY_FILE_RANGE	= 47,
+	FUSE_SETUPMAPPING	= 48,
+	FUSE_REMOVEMAPPING	= 49,
+	FUSE_SYNCFS		= 50,
 
 #ifdef linux
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
+	/* Reserved opcodes: helpful to detect structure endian-ness */
+	CUSE_INIT_BSWAP_RESERVED	= 1048576,	/* CUSE_INIT << 8 */
+	FUSE_INIT_BSWAP_RESERVED	= 436207616,	/* FUSE_INIT << 24 */
 #endif /* linux */
 };
 
@@ -561,14 +641,14 @@ struct fuse_setattr_in {
 
 struct fuse_open_in {
 	uint32_t	flags;
-	uint32_t	unused;
+	uint32_t	open_flags;   /* FUSE_OPEN_... */
 };
 
 struct fuse_create_in {
 	uint32_t	flags;
 	uint32_t	mode;
 	uint32_t	umask;
-	uint32_t	padding;
+	uint32_t	open_flags;   /* FUSE_OPEN_... */
 };
 
 struct fuse_open_out {
@@ -630,9 +710,13 @@ struct fuse_fsync_in {
 	uint32_t	padding;
 };
 
+#define FUSE_COMPAT_SETXATTR_IN_SIZE 8
+
 struct fuse_setxattr_in {
 	uint32_t	size;
 	uint32_t	flags;
+	uint32_t	setxattr_flags;
+	uint32_t	padding;
 };
 
 struct fuse_listxattr_in {
@@ -692,7 +776,7 @@ struct fuse_init_out {
 	uint32_t	max_write;
 	uint32_t	time_gran;
 	uint16_t	max_pages;
-	uint16_t	padding;
+	uint16_t	map_alignment;
 	uint32_t	unused[8];
 };
 
@@ -863,6 +947,10 @@ struct fuse_notify_retrieve_in {
 	uint64_t	dummy4;
 };
 
+/* Device ioctls: */
+#define FUSE_DEV_IOC_MAGIC	229
+#define FUSE_DEV_IOC_CLONE	_IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
+
 struct fuse_lseek_in {
 	uint64_t	fh;
 	uint64_t	offset;
@@ -884,4 +972,38 @@ struct fuse_copy_file_range_in {
 	uint64_t	flags;
 };
 
+#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
+#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
+struct fuse_setupmapping_in {
+	/* An already open handle */
+	uint64_t	fh;
+	/* Offset into the file to start the mapping */
+	uint64_t	foffset;
+	/* Length of mapping required */
+	uint64_t	len;
+	/* Flags, FUSE_SETUPMAPPING_FLAG_* */
+	uint64_t	flags;
+	/* Offset in Memory Window */
+	uint64_t	moffset;
+};
+
+struct fuse_removemapping_in {
+	/* number of fuse_removemapping_one follows */
+	uint32_t        count;
+};
+
+struct fuse_removemapping_one {
+	/* Offset into the dax window start the unmapping */
+	uint64_t        moffset;
+	/* Length of mapping required */
+	uint64_t	len;
+};
+
+#define FUSE_REMOVEMAPPING_MAX_ENTRY   \
+		(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
+
+struct fuse_syncfs_in {
+	uint64_t	padding;
+};
+
 #endif /* _FUSE_FUSE_KERNEL_H */
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
index 777519450954..742dc66bcafc 100644
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -297,6 +297,8 @@ fuse_vnode_get(struct mount *mp,
     __enum_uint8(vtype) vtyp)
 {
 	struct thread *td = curthread;
+	bool exportable = fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT;
+
 	/* 
 	 * feo should only be NULL for the root directory, which (when libfuse
 	 * is used) always has generation 0
@@ -309,6 +311,23 @@ fuse_vnode_get(struct mount *mp,
 			"Assigned same inode to both parent and child.");
 		return EIO;
 	}
+	if (feo && feo->nodeid != feo->attr.ino && exportable) {
+		/*
+		 * NFS servers (both kernelspace and userspace) rely on
+		 * VFS_VGET to lookup inodes.  But that's only possible if the
+		 * file's inode number matches its nodeid, which isn't
+		 * necessarily the case for FUSE.  If they don't match, then we
+		 * can complete the current operation, but future VFS_VGET
+		 * operations will almost certainly return spurious results.
+		 * Warn the operator.
+		 *
+		 * But only warn the operator if the file system reports
+		 * NFS-compatibility, because that's the only time that this
+		 * matters, and dumb fuse servers abound.
+		 */
+		fuse_warn(fuse_get_mpdata(mp), FSESS_WARN_INODE_MISMATCH,
+		    "file has different inode number and nodeid.");
+	}
 
 	err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp);
 	if (err) {
@@ -354,7 +373,7 @@ void
 fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td)
 {
 	if (vnode_vtype(vp) == VREG)
-		vnode_create_vobject(vp, 0, td);
+		vnode_create_vobject(vp, VNODE_NO_SIZE, td);
 }
 
 int
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
index e088f92bf5bf..1b858a988289 100644
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -81,6 +81,8 @@
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/fcntl.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
 
 #include "fuse.h"
 #include "fuse_node.h"
@@ -272,7 +274,7 @@ fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
 	int error;
 
 	if (!(fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT))
-		return EOPNOTSUPP;
+		return (EXTERROR(EOPNOTSUPP, "NFS-style lookups are not supported"));
 
 	error = VFS_VGET(mp, ffhp->nid, LK_EXCLUSIVE, &nvp);
 	if (error) {
@@ -286,7 +288,7 @@ fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
 		return (ESTALE);
 	}
 	*vpp = nvp;
-	vnode_create_vobject(*vpp, 0, curthread);
+	vnode_create_vobject(*vpp, VNODE_NO_SIZE, curthread);
 	return (0);
 }
 
@@ -321,11 +323,11 @@ fuse_vfsop_mount(struct mount *mp)
 	opts = mp->mnt_optnew;
 
 	if (!opts)
-		return EINVAL;
+		return (EXTERROR(EINVAL, "Mount options were not supplied"));
 
 	/* `fspath' contains the mount point (eg. /mnt/fuse/sshfs); REQUIRED */
 	if (!vfs_getopts(opts, "fspath", &err))
-		return err;
+		return (EXTERROR(err, "Mount options are missing 'fspath'"));
 
 	/*
 	 * With the help of underscored options the mount program
@@ -358,11 +360,12 @@ fuse_vfsop_mount(struct mount *mp)
 	/* `from' contains the device name (eg. /dev/fuse0); REQUIRED */
 	fspec = vfs_getopts(opts, "from", &err);
 	if (!fspec)
-		return err;
+		return (EXTERROR(err, "Mount options are missing 'from'"));
 
 	/* `fd' contains the filedescriptor for this session; REQUIRED */
 	if (vfs_scanopt(opts, "fd", "%d", &fd) != 1)
-		return EINVAL;
+		return (EXTERROR(EINVAL, "Mount options contain an invalid value "
+		    "for 'fd'"));
 
 	err = fuse_getdevice(fspec, td, &fdev);
 	if (err != 0)
@@ -398,11 +401,17 @@ fuse_vfsop_mount(struct mount *mp)
 	/* Sanity + permission checks */
 	if (!data->daemoncred)
 		panic("fuse daemon found, but identity unknown");
-	if (mntopts & FSESS_DAEMON_CAN_SPY)
+	if (mntopts & FSESS_DAEMON_CAN_SPY) {
 		err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER);
-	if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid)
+		EXTERROR(err, "FUSE daemon requires privileges "
+		    "due to 'allow_other' option");
+	}
+	if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) {
 		/* are we allowed to do the first mount? */
 		err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER);
+		EXTERROR(err, "Mounting as a user that is different from the FUSE "
+		    "daemon's requires privileges");
+	}
 	if (err) {
 		FUSE_UNLOCK();
 		goto out;
@@ -549,7 +558,7 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 		 * nullfs mount of a fusefs file system.
 		 */
 		SDT_PROBE1(fusefs, , vfsops, invalidate_without_export, mp);
-		return (EOPNOTSUPP);
+		return (EXTERROR(EOPNOTSUPP, "NFS-style lookups are not supported"));
 	}
 
 	error = fuse_internal_get_cached_vnode(mp, ino, flags, vpp);
@@ -565,15 +574,28 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 	error = fdisp_wait_answ(&fdi);
 
 	if (error)
-		return error;
+		goto out;
 
 	feo = (struct fuse_entry_out *)fdi.answ;
+
 	if (feo->nodeid == 0) {
 		/* zero nodeid means ENOENT and cache it */
 		error = ENOENT;
 		goto out;
 	}
 
+	if (feo->nodeid != nodeid) {
+		/*
+		 * Something is very wrong with the server if "foo/." has a
+		 * different inode number than "foo".
+		 */
+		static const char exterr[] = "Inconsistent LOOKUP response: "
+		    "\"FILE/.\" has a different inode number than \"FILE\".";
+		fuse_warn(data, FSESS_WARN_DOT_LOOKUP, exterr);
+		error = EXTERROR(EIO, exterr);
+		goto out;
+	}
+
 	vtyp = IFTOVT(feo->attr.mode);
 	error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
 	if (error)
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 3f8f3322162a..ae28617537fd 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -89,6 +89,8 @@
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -289,6 +291,10 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 	if (err)
 		return err;
 
+	if (fufh->fuse_open_flags & FOPEN_NOFLUSH &&
+	    (!fsess_opt_writeback(vnode_mount(vp))))
+		return (0);
+
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
 	ffi = fdi.indata;
@@ -395,6 +401,9 @@ fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred,
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_LSEEK);
+	} else if (err == ENXIO) {
+		/* Note: ENXIO means "no more hole/data regions until EOF" */
+		fsess_set_impl(mp, FUSE_LSEEK);
 	} else if (err == 0) {
 		fsess_set_impl(mp, FUSE_LSEEK);
 		flso = fdi.answ;
@@ -432,7 +441,8 @@ fuse_vnop_access(struct vop_access_args *ap)
 		if (vnode_isvroot(vp)) {
 			return 0;
 		}
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (!(data->dataflags & FSESS_INITED)) {
 		if (vnode_isvroot(vp)) {
@@ -441,7 +451,8 @@ fuse_vnop_access(struct vop_access_args *ap)
 				return 0;
 			}
 		}
-		return EBADF;
+		return (EXTERROR(EBADF, "Access denied until FUSE session "
+		    "is initialized"));
 	}
 	if (vnode_islnk(vp)) {
 		return 0;
@@ -482,7 +493,8 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 
 	switch(ap->a_op) {
@@ -499,7 +511,7 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
 		op = FUSE_SETLK;
 		break;
 	default:
-		return EINVAL;
+		return (EXTERROR(EINVAL, "Unsupported lock flags"));
 	}
 
 	if (!(dataflags & FSESS_POSIX_LOCKS))
@@ -527,14 +539,14 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
 		size = vattr.va_size;
 		if (size > OFF_MAX ||
 		    (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) {
-			err = EOVERFLOW;
+			err = EXTERROR(EOVERFLOW, "Offset is too large");
 			goto out;
 		}
 		start = size + fl->l_start;
 		break;
 
 	default:
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Unsupported offset type"));
 	}
 
 	err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
@@ -596,15 +608,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp))
-		return (ENXIO);
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	switch (vp->v_type) {
 	case VFIFO:
 		return (ESPIPE);
 	case VLNK:
 	case VREG:
-		if (vfs_isrdonly(mp))
-			return (EROFS);
 		break;
 	default:
 		return (ENODEV);
@@ -614,7 +625,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
 		return (EROFS);
 
 	if (fsess_not_impl(mp, FUSE_FALLOCATE))
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "This server does not implement "
+		    "FUSE_FALLOCATE"));
 
 	io.uio_offset = *offset;
 	io.uio_resid = *len;
@@ -644,13 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_FALLOCATE);
-		err = EINVAL;
+		err = EXTERROR(EINVAL, "This server does not implement "
+		    "FUSE_ALLOCATE");
 	} else if (err == EOPNOTSUPP) {
 		/*
 		 * The file system server does not support FUSE_FALLOCATE with
 		 * the supplied mode for this particular file.
 		 */
-		err = EINVAL;
+		err = EXTERROR(EINVAL, "This file can't be pre-allocated");
 	} else if (!err) {
 		*offset += *len;
 		*len = 0;
@@ -696,7 +709,8 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
 	int maxrun;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 
 	mp = vnode_mount(vp);
@@ -793,6 +807,9 @@ fuse_vnop_close(struct vop_close_args *ap)
 	if (fflag & IO_NDELAY)
 		return 0;
 
+	if (cred == NULL)
+		cred = td->td_ucred;
+
 	err = fuse_flush(vp, cred, pid, fflag);
 	if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
 		struct vattr vap;
@@ -860,19 +877,21 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
 	pid_t pid;
 	int err;
 
-	err = ENOSYS;
 	if (mp == NULL || mp != vnode_mount(outvp))
-		goto fallback;
+		return (EXTERROR(ENOSYS, "Mount points do not match"));
 
 	if (incred->cr_uid != outcred->cr_uid)
-		goto fallback;
+		return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
+		    "support different credentials for infd and outfd"));
 
 	if (incred->cr_groups[0] != outcred->cr_groups[0])
-		goto fallback;
+		return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
+		    "support different credentials for infd and outfd"));
 
 	/* Caller busied mp, mnt_data can be safely accessed. */
 	if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE))
-		goto fallback;
+		return (EXTERROR(ENOSYS, "This daemon does not "
+		    "implement COPY_FILE_RANGE"));
 
 	if (ap->a_fsizetd == NULL)
 		td = curthread;
@@ -882,7 +901,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
 
 	vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE);
 	if (invp->v_data == NULL || outvp->v_data == NULL) {
-		err = EBADF;
+		err = EXTERROR(EBADF, "vnode got reclaimed");
 		goto unlock;
 	}
 
@@ -946,7 +965,6 @@ unlock:
 
 	if (err == ENOSYS)
 		fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE);
-fallback:
 
 	/*
 	 * No need to call vn_rlimit_fsizex_res before return, since the uio is
@@ -1014,7 +1032,8 @@ fuse_vnop_create(struct vop_create_args *ap)
 	int flags;
 
 	if (fuse_isdeadfs(dvp))
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	/* FUSE expects sockets to be created with FUSE_MKNOD */
 	if (vap->va_type == VSOCK)
@@ -1030,7 +1049,7 @@ fuse_vnop_create(struct vop_create_args *ap)
 	bzero(&fdi, sizeof(fdi));
 
 	if (vap->va_type != VREG)
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Only regular files can be created"));
 
 	if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
@@ -1211,8 +1230,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
 	if (!(dataflags & FSESS_INITED)) {
 		if (!vnode_isvroot(vp)) {
 			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
-			err = ENOTCONN;
-			return err;
+			return (EXTERROR(ENOTCONN, "FUSE daemon is not "
+			    "initialized"));
 		} else {
 			goto fake;
 		}
@@ -1341,10 +1360,11 @@ fuse_vnop_link(struct vop_link_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
-		return EXDEV;
+		return (EXDEV);
 	}
 
 	/*
@@ -1354,7 +1374,7 @@ fuse_vnop_link(struct vop_link_args *ap)
 	 * validating that nlink does not overflow.
 	 */
 	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
-		return EMLINK;
+		return (EMLINK);
 	fli.oldnodeid = VTOI(vp);
 
 	fdisp_init(&fdi, 0);
@@ -1366,12 +1386,13 @@ fuse_vnop_link(struct vop_link_args *ap)
 	feo = fdi.answ;
 
 	if (fli.oldnodeid != feo->nodeid) {
+		static const char exterr[] = "Server assigned wrong inode "
+		    "for a hard link.";
 		struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
-		fuse_warn(data, FSESS_WARN_ILLEGAL_INODE,
-			"Assigned wrong inode for a hard link.");
+		fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, exterr);
 		fuse_vnode_clear_attr_cache(vp);
 		fuse_vnode_clear_attr_cache(tdvp);
-		err = EIO;
+		err = EXTERROR(EIO, exterr);
 		goto out;
 	}
 
@@ -1428,8 +1449,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
 	struct timespec now;
 
 	int nameiop = cnp->cn_nameiop;
-	int flags = cnp->cn_flags;
-	int islastcn = flags & ISLASTCN;
+	bool isdotdot = cnp->cn_flags & ISDOTDOT;
+	bool islastcn = cnp->cn_flags & ISLASTCN;
 	struct mount *mp = vnode_mount(dvp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 	int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
@@ -1448,7 +1469,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
 
 	if (fuse_isdeadfs(dvp)) {
 		*vpp = NULL;
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (!vnode_isdir(dvp))
 		return ENOTDIR;
@@ -1462,14 +1484,14 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
 		return err;
 
 	is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
-	if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT))
-	{
+	if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
 		if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
 			/*
 			 * Since the file system doesn't support ".." lookups,
 			 * we have no way to find this entry.
 			 */
-			return ESTALE;
+			return (EXTERROR(ESTALE, "This server does not support "
+			    "'..' lookups"));
 		}
 		nid = VTOFUD(dvp)->parent_nid;
 		if (nid == 0)
@@ -1577,7 +1599,7 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
 		}
 	} else {
 		/* Entry was found */
-		if (flags & ISDOTDOT) {
+		if (isdotdot) {
 			struct fuse_lookup_alloc_arg flaa;
 
 			flaa.nid = nid;
@@ -1592,11 +1614,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
 				vref(dvp);
 				*vpp = dvp;
 			} else {
+				static const char exterr[] = "Server assigned "
+				    "same inode to both parent and child.";
 				fuse_warn(fuse_get_mpdata(mp),
-				    FSESS_WARN_ILLEGAL_INODE,
-				    "Assigned same inode to both parent and "
-				    "child.");
-				err = EIO;
+				    FSESS_WARN_ILLEGAL_INODE, exterr);
+				err = EXTERROR(EIO, exterr);
 			}
 
 		} else {
@@ -1684,7 +1706,8 @@ fuse_vnop_mkdir(struct vop_mkdir_args *ap)
 	struct fuse_mkdir_in fmdi;
 
 	if (fuse_isdeadfs(dvp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	fmdi.umask = curthread->td_proc->p_pd->pd_cmask;
@@ -1711,7 +1734,8 @@ fuse_vnop_mknod(struct vop_mknod_args *ap)
 	struct vattr *vap = ap->a_vap;
 
 	if (fuse_isdeadfs(dvp))
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	return fuse_internal_mknod(dvp, vpp, cnp, vap);
 }
@@ -1735,11 +1759,13 @@ fuse_vnop_open(struct vop_open_args *ap)
 	pid_t pid = td->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp))
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
-		return (EOPNOTSUPP);
+		return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type",
+		    vp->v_type));
 	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
-		return EINVAL;
+		return (EXTERROR(EINVAL, "Illegal mode", a_mode));
 
 	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
 		fuse_vnode_open(vp, 0, td);
@@ -1754,6 +1780,9 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct mount *mp;
+	struct fuse_filehandle *fufh;
+	int err;
+	bool closefufh = false;
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
@@ -1783,22 +1812,45 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 		    !fsess_not_impl(mp, FUSE_LSEEK)) {
 			off_t offset = 0;
 
-			/* Issue a FUSE_LSEEK to find out if it's implemented */
-			fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred,
-			    curthread->td_proc->p_pid, &offset, SEEK_DATA);
+			/*
+			 * Issue a FUSE_LSEEK to find out if it's supported.
+			 * Use SEEK_DATA instead of SEEK_HOLE, because the
+			 * latter generally requires sequential scans of file
+			 * metadata, which can be slow.
+			 */
+			err = fuse_vnop_do_lseek(vp, curthread,
+			    curthread->td_ucred, curthread->td_proc->p_pid,
+			    &offset, SEEK_DATA);
+			if (err == EBADF) {
+				/*
+				 * pathconf() doesn't necessarily open the
+				 * file.  So we may need to do it here.
+				 */
+				err = fuse_filehandle_open(vp, FREAD, &fufh,
+				    curthread, curthread->td_ucred);
+				if (err == 0) {
+					closefufh = true;
+					err = fuse_vnop_do_lseek(vp, curthread,
+					    curthread->td_ucred,
+					    curthread->td_proc->p_pid, &offset,
+					    SEEK_DATA);
+				}
+				if (closefufh)
+					fuse_filehandle_close(vp, fufh,
+					    curthread, curthread->td_ucred);
+			}
+
 		}
 
 		if (fsess_is_impl(mp, FUSE_LSEEK)) {
 			*ap->a_retval = 1;
 			return (0);
+		} else if (fsess_not_impl(mp, FUSE_LSEEK)) {
+			/* FUSE_LSEEK is not implemented */
+			return (EXTERROR(EINVAL, "This server does not "
+			    "implement FUSE_LSEEK"));
 		} else {
-			/*
-			 * Probably FUSE_LSEEK is not implemented.  It might
-			 * be, if the FUSE_LSEEK above returned an error like
-			 * EACCES, but in that case we can't tell, so it's
-			 * safest to report EINVAL anyway.
-			 */
-			return (EINVAL);
+			return (err);
 		}
 	default:
 		return (vop_stdpathconf(ap));
@@ -1830,7 +1882,8 @@ fuse_vnop_read(struct vop_read_args *ap)
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
@@ -1907,20 +1960,18 @@ fuse_vnop_readdir(struct vop_readdir_args *ap)
 	if (ap->a_eofflag)
 		*ap->a_eofflag = 0;
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
-	}
-	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
-	    (uio_resid(uio) < sizeof(struct dirent))) {
-		return EINVAL;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
+	if (uio_resid(uio) < sizeof(struct dirent))
+		return (EXTERROR(EINVAL, "Buffer is too small"));
 
 	tresid = uio->uio_resid;
 	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
 	if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) {
-		KASSERT(fuse_get_mpdata(mp)->dataflags
-				& FSESS_NO_OPENDIR_SUPPORT,
-			("FUSE file systems that don't set "
-			 "FUSE_NO_OPENDIR_SUPPORT should not be exported"));
+		KASSERT(!fsess_is_impl(mp, FUSE_OPENDIR),
+			("FUSE file systems that implement "
+			 "FUSE_OPENDIR should not be exported"));
 		/* 
 		 * nfsd will do VOP_READDIR without first doing VOP_OPEN.  We
 		 * must implicitly open the directory here.
@@ -1983,7 +2034,8 @@ fuse_vnop_readlink(struct vop_readlink_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (!vnode_islnk(vp)) {
 		return EINVAL;
@@ -1994,10 +2046,11 @@ fuse_vnop_readlink(struct vop_readlink_args *ap)
 		goto out;
 	}
 	if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) {
+		static const char exterr[] = "Server returned an embedded NUL "
+		    "from FUSE_READLINK.";
 		struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
-		fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL,
-				"Returned an embedded NUL from FUSE_READLINK.");
-		err = EIO;
+		fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, exterr);
+		err = EXTERROR(EIO, exterr);
 		goto out;
 	}
 	if (((char *)fdi.answ)[0] == '/' &&
@@ -2081,10 +2134,11 @@ fuse_vnop_remove(struct vop_remove_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (vnode_isdir(vp)) {
-		return EPERM;
+		return (EXTERROR(EPERM, "vnode is a directory"));
 	}
 
 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
@@ -2117,12 +2171,13 @@ fuse_vnop_rename(struct vop_rename_args *ap)
 	int err = 0;
 
 	if (fuse_isdeadfs(fdvp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp && fvp->v_mount != tvp->v_mount)) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
-		err = EXDEV;
+		err = EXTERROR(EXDEV, "Cross-device rename");
 		goto out;
 	}
 	cache_purge(fvp);
@@ -2193,10 +2248,12 @@ fuse_vnop_rmdir(struct vop_rmdir_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	if (VTOFUD(vp) == VTOFUD(dvp)) {
-		return EINVAL;
+		return (EXTERROR(EINVAL, "Directory to be removed "
+		    "contains itself"));
 	}
 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
 
@@ -2233,7 +2290,8 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
 	checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL) {
@@ -2248,19 +2306,15 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
 					return (err2);
 				if (vap->va_uid != old_va.va_uid)
 					return err;
-				else
-					accmode |= VADMIN;
 				drop_suid = true;
-			} else
-				accmode |= VADMIN;
-		} else
-			accmode |= VADMIN;
+			}
+		}
+		accmode |= VADMIN;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
 			drop_suid = true;
-		if (checkperm && !groupmember(vap->va_gid, cred))
-		{
+		if (checkperm && !groupmember(vap->va_gid, cred)) {
 			/*
 			 * Non-root users may only chgrp to one of their own
 			 * groups 
@@ -2274,11 +2328,9 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
 					return (err2);
 				if (vap->va_gid != old_va.va_gid)
 					return err;
-				accmode |= VADMIN;
-			} else
-				accmode |= VADMIN;
-		} else
-			accmode |= VADMIN;
+			}
+		}
+		accmode |= VADMIN;
 	}
 	if (vap->va_size != VNOVAL) {
 		switch (vp->v_type) {
@@ -2404,7 +2456,8 @@ fuse_vnop_symlink(struct vop_symlink_args *ap)
 	size_t len;
 
 	if (fuse_isdeadfs(dvp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 	/*
 	 * Unlike the other creator type calls, here we have to create a message
@@ -2450,7 +2503,8 @@ fuse_vnop_write(struct vop_write_args *ap)
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	if (fuse_isdeadfs(vp)) {
-		return ENXIO;
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
@@ -2603,10 +2657,12 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp))
-		return (ENXIO);
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	if (fsess_not_impl(mp, FUSE_GETXATTR))
-		return EOPNOTSUPP;
+		return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+		    "extended attributes"));
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
 	if (err)
@@ -2644,7 +2700,8 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
 	if (err != 0) {
 		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_GETXATTR);
-			err = EOPNOTSUPP;
+			err = (EXTERROR(EOPNOTSUPP, "This server does not "
+			    "implement extended attributes"));
 		}
 		goto out;
 	}
@@ -2683,16 +2740,19 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
+	size_t struct_size = FUSE_COMPAT_SETXATTR_IN_SIZE;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 
 	if (fuse_isdeadfs(vp))
-		return (ENXIO);
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	if (fsess_not_impl(mp, FUSE_SETXATTR))
-		return EOPNOTSUPP;
+		return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+		    "setting extended attributes"));
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
@@ -2704,9 +2764,11 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 		 * return EOPNOTSUPP.
 		 */
 		if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
-			return (EOPNOTSUPP);
+			return (EXTERROR(EOPNOTSUPP, "This server does not "
+			    "implement removing extended attributess"));
 		else
-			return (EINVAL);
+			return (EXTERROR(EINVAL, "DELETEEXTATTR should be used "
+			    "to remove extattrs"));
 	}
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
@@ -2723,17 +2785,26 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
-	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
+	/* older FUSE servers  use a smaller fuse_setxattr_in struct*/
+	if (fuse_libabi_geq(fuse_get_mpdata(mp), 7, 33))
+		struct_size = sizeof(*set_xattr_in);
+
+	fdisp_init(&fdi, len + struct_size + uio->uio_resid);
 	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
 
 	set_xattr_in = fdi.indata;
 	set_xattr_in->size = uio->uio_resid;
 
-	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
+	if (fuse_libabi_geq(fuse_get_mpdata(mp), 7, 33)) {
+		set_xattr_in->setxattr_flags = 0;
+		set_xattr_in->padding = 0;
+	}
+
+	attr_str = (char *)fdi.indata + struct_size;
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
-	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
+	err = uiomove((char *)fdi.indata + struct_size + len,
 	    uio->uio_resid, uio);
 	if (err != 0) {
 		goto out;
@@ -2743,7 +2814,8 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_SETXATTR);
-		err = EOPNOTSUPP;
+		err = EXTERROR(EOPNOTSUPP, "This server does not implement "
+		    "setting extended attributes");
 	}
 	if (err == ERESTART) {
 		/* Can't restart after calling uiomove */
@@ -2854,10 +2926,12 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp))
-		return (ENXIO);
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	if (fsess_not_impl(mp, FUSE_LISTXATTR))
-		return EOPNOTSUPP;
+		return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+		    "extended attributes"));
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
 	if (err)
@@ -2885,7 +2959,8 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 	if (err != 0) {
 		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
-			err = EOPNOTSUPP;
+			err = EXTERROR(EOPNOTSUPP, "This server does not "
+			    "implement extended attributes");
 		}
 		goto out;
 	}
@@ -2985,7 +3060,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap)
 	bool closefufh = false;
 
 	if (fuse_isdeadfs(vp))
-		return (ENXIO);
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	if (vfs_isrdonly(mp))
 		return (EROFS);
@@ -3053,8 +3129,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap)
 			    false);
 	}
 
-out:
 	fdisp_destroy(&fdi);
+out:
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
@@ -3091,10 +3167,12 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
 	int err;
 
 	if (fuse_isdeadfs(vp))
-		return (ENXIO);
+		return (EXTERROR(ENXIO, "This FUSE session is about "
+		    "to be closed"));
 
 	if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
-		return EOPNOTSUPP;
+		return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+		    "removing extended attributes"));
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
@@ -3123,7 +3201,8 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
-		err = EOPNOTSUPP;
+		err = EXTERROR(EOPNOTSUPP, "This server does not implement "
+		    "removing extended attributes");
 	}
 
 	fdisp_destroy(&fdi);
@@ -3177,25 +3256,27 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
 		/* NFS requires lookups for "." and ".." */
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"VOP_VPTOFH without FUSE_EXPORT_SUPPORT");
-		return EOPNOTSUPP;
+		return (EXTERROR(EOPNOTSUPP, "This server is "
+		    "missing FUSE_EXPORT_SUPPORT"));
 	}
 	if ((mp->mnt_flag & MNT_EXPORTED) &&
-		!(data->dataflags & FSESS_NO_OPENDIR_SUPPORT))
+		fsess_is_impl(mp, FUSE_OPENDIR))
 	{
 		/*
 		 * NFS is stateless, so nfsd must reopen a directory on every
 		 * call to VOP_READDIR, passing in the d_off field from the
-		 * final dirent of the previous invocation.  But without
-		 * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not
+		 * final dirent of the previous invocation.  But if the server
+		 * implements FUSE_OPENDIR, the FUSE protocol does not
 		 * guarantee that d_off will be valid after a directory is
 		 * closed and reopened.  So prohibit exporting FUSE file
-		 * systems that don't set that flag.
+		 * systems that implement FUSE_OPENDIR.
 		 *
 		 * But userspace NFS servers don't have this problem.
                  */
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
-			"VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT");
-		return EOPNOTSUPP;
+			"VOP_VPTOFH with FUSE_OPENDIR");
+		return (EXTERROR(EOPNOTSUPP, "This server implements "
+		    "FUSE_OPENDIR so is not compatible with getfh"));
 	}
 
 	err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
@@ -3209,6 +3290,7 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
 	if (fvdat->generation <= UINT32_MAX)
 		fhp->gen = fvdat->generation;
 	else
-		return EOVERFLOW;
+		return (EXTERROR(EOVERFLOW, "inode generation "
+		    "number overflow"));
 	return (0);
 }
diff --git a/sys/fs/msdosfs/denode.h b/sys/fs/msdosfs/denode.h
index 0d31b0583fa6..e6928fb46052 100644
--- a/sys/fs/msdosfs/denode.h
+++ b/sys/fs/msdosfs/denode.h
@@ -212,7 +212,7 @@ struct denode {
 	     ((dep)->de_Attributes & ATTR_DIRECTORY) ? 0 : (dep)->de_FileSize), \
 	 putushort((dp)->deHighClust, (dep)->de_StartCluster >> 16))
 
-#if defined(_KERNEL) || defined(MAKEFS)
+#if defined(_KERNEL) || defined(_WANT_MSDOSFS_INTERNALS)
 
 #define	VTODE(vp)	((struct denode *)(vp)->v_data)
 #define	DETOV(de)	((de)->de_vnode)
@@ -294,5 +294,5 @@ int removede(struct denode *pdep, struct denode *dep);
 int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred);
 int doscheckpath( struct denode *source, struct denode *target,
     daddr_t *wait_scn);
-#endif	/* _KERNEL || MAKEFS */
+#endif	/* _KERNEL || _WANT_MSDOSFS_INTERNALS */
 #endif	/* !_FS_MSDOSFS_DENODE_H_ */
diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h
index a88bfb94e91d..344cd5a9416d 100644
--- a/sys/fs/msdosfs/fat.h
+++ b/sys/fs/msdosfs/fat.h
@@ -81,7 +81,7 @@
 
 #define	MSDOSFSEOF(pmp, cn)	((((cn) | ~(pmp)->pm_fatmask) & CLUST_EOFS) == CLUST_EOFS)
 
-#if defined (_KERNEL) || defined(MAKEFS)
+#if defined (_KERNEL) || defined(_WANT_MSDOSFS_INTERNALS)
 /*
  * These are the values for the function argument to the function
  * fatentry().
@@ -110,5 +110,5 @@ markvoldirty(struct msdosfsmount *pmp, bool dirty)
 	return (markvoldirty_upgrade(pmp, dirty, false));
 }
 
-#endif	/* _KERNEL || MAKEFS */
+#endif	/* _KERNEL || _WANT_MSDOSFS_INTERNALS */
 #endif	/* !_FS_MSDOSFS_FAT_H_ */
diff --git a/sys/fs/msdosfs/msdosfs_conv.c b/sys/fs/msdosfs/msdosfs_conv.c
index da4848169173..208b64930e61 100644
--- a/sys/fs/msdosfs/msdosfs_conv.c
+++ b/sys/fs/msdosfs/msdosfs_conv.c
@@ -797,19 +797,24 @@ mbsadjpos(const char **instr, size_t inlen, size_t outlen, int weight, int flag,
 static u_char *
 dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
 {
-	u_char c, *outp;
-	size_t len, olen;
+	u_char c, *outp, *outp1;
+	size_t i, len, olen;
 
 	outp = outbuf;
 	if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
 		olen = len = 4;
 
+		outp1 = outp;
 		if (lower & (LCASE_BASE | LCASE_EXT))
 			msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr,
 						  ilen, (char **)&outp, &olen, KICONV_LOWER);
 		else
 			msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr,
 					     ilen, (char **)&outp, &olen);
+		for (i = 0; i < outp - outp1; i++) {
+			if (outp1[i] == '/')
+				outp1[i] = '?';
+		}
 		len -= olen;
 
 		/*
@@ -826,6 +831,8 @@ dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struc
 		c = dos2unix[c];
 		if (lower & (LCASE_BASE | LCASE_EXT))
 			c = u2l[c];
+		if (c == '/')
+			c = '?';
 		*outp++ = c;
 		outbuf[1] = '\0';
 	}
diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c
index 2a90339d0878..8ab6d35a2685 100644
--- a/sys/fs/msdosfs/msdosfs_lookup.c
+++ b/sys/fs/msdosfs/msdosfs_lookup.c
@@ -198,7 +198,9 @@ msdosfs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname
 	switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename,
 	    cnp->cn_namelen, 0, pmp)) {
 	case 0:
-		return (EINVAL);
+		if (nameiop == CREATE || nameiop == RENAME)
+			return (EINVAL);
+		return (ENOENT);
 	case 1:
 		break;
 	case 2:
@@ -843,7 +845,6 @@ doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn)
 	*wait_scn = 0;
 
 	pmp = target->de_pmp;
-	lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED);
 	KASSERT(pmp == source->de_pmp,
 	    ("doscheckpath: source and target on different filesystems"));
 
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
index 258c701bd300..4431d36c8a8e 100644
--- a/sys/fs/msdosfs/msdosfs_vfsops.c
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -575,7 +575,6 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp)
 	pmp->pm_bo = bo;
 
 	lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0);
-	lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0);
 
 	TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp);
 
@@ -722,7 +721,9 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp)
 		}
 	}
 
-	clusters = (pmp->pm_fatsize / pmp->pm_fatmult) * pmp->pm_fatdiv ;
+	clusters = (pmp->pm_fatsize / pmp->pm_fatmult) * pmp->pm_fatdiv;
+	if (clusters >= (CLUST_RSRVD & pmp->pm_fatmask))
+		clusters = CLUST_RSRVD & pmp->pm_fatmask;
 	if (pmp->pm_maxcluster >= clusters) {
 #ifdef MSDOSFS_DEBUG
 		printf("Warning: number of clusters (%ld) exceeds FAT "
@@ -869,7 +870,6 @@ error_exit:
 	}
 	if (pmp != NULL) {
 		lockdestroy(&pmp->pm_fatlock);
-		lockdestroy(&pmp->pm_checkpath_lock);
 		free(pmp->pm_inusemap, M_MSDOSFSFAT);
 		free(pmp, M_MSDOSFSMNT);
 		mp->mnt_data = NULL;
@@ -969,7 +969,6 @@ msdosfs_unmount(struct mount *mp, int mntflags)
 	dev_rel(pmp->pm_dev);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
 	lockdestroy(&pmp->pm_fatlock);
-	lockdestroy(&pmp->pm_checkpath_lock);
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = NULL;
 	return (error);
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 078ea5e52312..33e0d94954d7 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -945,7 +945,7 @@ msdosfs_rename(struct vop_rename_args *ap)
 	struct denode *fdip, *fip, *tdip, *tip, *nip;
 	u_char toname[12], oldname[11];
 	u_long to_diroffset;
-	bool checkpath_locked, doingdirectory, newparent;
+	bool doingdirectory, newparent;
 	int error;
 	u_long cn, pcl, blkoff;
 	daddr_t bn, wait_scn, scn;
@@ -986,8 +986,6 @@ msdosfs_rename(struct vop_rename_args *ap)
 	if (tvp != NULL && tvp != tdvp)
 		VOP_UNLOCK(tvp);
 
-	checkpath_locked = false;
-
 relock:
 	doingdirectory = newparent = false;
 
@@ -1108,12 +1106,8 @@ relock:
 	if (doingdirectory && newparent) {
 		if (error != 0)	/* write access check above */
 			goto unlock;
-		lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL);
-		checkpath_locked = true;
 		error = doscheckpath(fip, tdip, &wait_scn);
 		if (wait_scn != 0) {
-			lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
-			checkpath_locked = false;
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
@@ -1276,8 +1270,6 @@ relock:
 	cache_purge(fvp);
 
 unlock:
-	if (checkpath_locked)
-		lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
 	vput(fdvp);
 	vput(fvp);
 	if (tvp != NULL) {
@@ -1289,7 +1281,6 @@ unlock:
 	vput(tdvp);
 	return (error);
 releout:
-	MPASS(!checkpath_locked);
 	vrele(tdvp);
 	if (tvp != NULL)
 		vrele(tvp);
@@ -1530,6 +1521,9 @@ msdosfs_readdir(struct vop_readdir_args *ap)
 	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
 #endif
 
+	if (ap->a_eofflag != NULL)
+		*ap->a_eofflag = 0;
+
 	/*
 	 * msdosfs_readdir() won't operate properly on regular files since
 	 * it does i/o only with the filesystem vnode, and hence can
@@ -1623,8 +1617,11 @@ msdosfs_readdir(struct vop_readdir_args *ap)
 		on = (offset - bias) & pmp->pm_crbomask;
 		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
-		if (diff <= 0)
-			break;
+		if (diff <= 0) {
+			if (ap->a_eofflag != NULL)
+				*ap->a_eofflag = 1;
+			goto out;
+		}
 		n = min(n, diff);
 		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
 		if (error)
@@ -1655,6 +1652,8 @@ msdosfs_readdir(struct vop_readdir_args *ap)
 			 */
 			if (dentp->deName[0] == SLOT_EMPTY) {
 				brelse(bp);
+				if (ap->a_eofflag != NULL)
+					*ap->a_eofflag = 1;
 				goto out;
 			}
 			/*
@@ -1752,15 +1751,6 @@ out:
 
 	uio->uio_offset = off;
 
-	/*
-	 * Set the eofflag (NFS uses it)
-	 */
-	if (ap->a_eofflag) {
-		if (dep->de_FileSize - (offset - bias) <= 0)
-			*ap->a_eofflag = 1;
-		else
-			*ap->a_eofflag = 0;
-	}
 	return (error);
 }
 
@@ -1951,6 +1941,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap)
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
+	case _PC_HAS_HIDDENSYSTEM:
+		*ap->a_retval = 1;
+		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
@@ -1962,6 +1955,8 @@ msdosfs_vptofh(struct vop_vptofh_args *ap)
 {
 	struct denode *dep;
 	struct defid *defhp;
+	_Static_assert(sizeof(struct defid) <= sizeof(struct fid),
+	    "struct defid cannot be larger than struct fid");
 
 	dep = VTODE(ap->a_vp);
 	defhp = (struct defid *)ap->a_fhp;
diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h
index 8f15bc2eaf42..04e6b75bea2a 100644
--- a/sys/fs/msdosfs/msdosfsmount.h
+++ b/sys/fs/msdosfs/msdosfsmount.h
@@ -52,14 +52,17 @@
 #ifndef _MSDOSFS_MSDOSFSMOUNT_H_
 #define	_MSDOSFS_MSDOSFSMOUNT_H_
 
-#if defined (_KERNEL) || defined(MAKEFS)
+#if defined(_KERNEL) || defined(_WANT_MSDOSFS_INTERNALS)
 
 #include <sys/types.h>
-#ifndef MAKEFS
+#ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
-#include <sys/_task.h>
+#else
+#include <sys/_lock.h>
+#include <sys/_lockmgr.h>
 #endif
+#include <sys/_task.h>
 #include <sys/tree.h>
 
 #ifdef MALLOC_DECLARE
@@ -114,11 +117,8 @@ struct msdosfsmount {
 	void *pm_w2u;	/* Unicode->Local iconv handle */
 	void *pm_u2d;	/* Unicode->DOS iconv handle */
 	void *pm_d2u;	/* DOS->Local iconv handle */
-#ifndef MAKEFS
 	struct lock pm_fatlock;	/* lockmgr protecting allocations */
-	struct lock pm_checkpath_lock; /* protects doscheckpath result */
 	struct task pm_rw2ro_task; /* context for emergency remount ro */
-#endif
 };
 
 /*
@@ -245,9 +245,9 @@ struct msdosfs_fileno {
 #define	MSDOSFS_ASSERT_MP_LOCKED(pmp) \
 	lockmgr_assert(&(pmp)->pm_fatlock, KA_XLOCKED)
 
-#endif /* _KERNEL || MAKEFS */
+#endif /* _KERNEL || _WANT_MSDOSFS_INTERNALS */
 
-#ifndef MAKEFS
+#ifdef _KERNEL
 /*
  *  Arguments to mount MSDOS filesystems.
  */
@@ -265,7 +265,7 @@ struct msdosfs_args {
 	char	*cs_local;	/* Local Charset */
 	mode_t	dirmask;	/* dir  mask to be applied for msdosfs perms */
 };
-#endif /* MAKEFS */
+#endif /* _KERNEL */
 
 /*
  * Msdosfs mount options:
diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h
index 9b09520b3257..e6a125b388a8 100644
--- a/sys/fs/nfs/nfs.h
+++ b/sys/fs/nfs/nfs.h
@@ -865,6 +865,8 @@ struct nfsslot {
 /* Enumerated type for nfsuserd state. */
 typedef enum { NOTRUNNING=0, STARTSTOP=1, RUNNING=2 } nfsuserd_state;
 
+typedef enum { UNKNOWN=0, DELETED=1, NLINK_ZERO=2, VALID=3 } nfsremove_status;
+
 #endif	/* _KERNEL */
 
 #endif	/* _NFS_NFS_H */
diff --git a/sys/fs/nfs/nfs_commonacl.c b/sys/fs/nfs/nfs_commonacl.c
index 55e6f89dd8ec..bba1d8821a9b 100644
--- a/sys/fs/nfs/nfs_commonacl.c
+++ b/sys/fs/nfs/nfs_commonacl.c
@@ -65,7 +65,7 @@ nfsrv_dissectace(struct nfsrv_descript *nd, struct acl_entry *acep,
 		goto nfsmout;
 	} else if (len == 0) {
 		/* Netapp filers return a 0 length who for nil users */
-		acep->ae_tag = ACL_UNDEFINED_TAG;
+		acep->ae_tag = ACL_EVERYONE;	/* Avoid panics. */
 		acep->ae_id = ACL_UNDEFINED_ID;
 		acep->ae_perm = (acl_perm_t)0;
 		acep->ae_entry_type = ACL_ENTRY_TYPE_DENY;
@@ -352,32 +352,7 @@ nfsrv_buildace(struct nfsrv_descript *nd, u_char *name, int namelen,
 		if (ace->ae_perm & ACL_SYNCHRONIZE)
 			acemask |= NFSV4ACE_SYNCHRONIZE;
 	} else {
-		if (ace->ae_perm & ACL_READ_DATA)
-			acemask |= NFSV4ACE_READDATA;
-		if (ace->ae_perm & ACL_WRITE_DATA)
-			acemask |= NFSV4ACE_WRITEDATA;
-		if (ace->ae_perm & ACL_APPEND_DATA)
-			acemask |= NFSV4ACE_APPENDDATA;
-		if (ace->ae_perm & ACL_READ_NAMED_ATTRS)
-			acemask |= NFSV4ACE_READNAMEDATTR;
-		if (ace->ae_perm & ACL_WRITE_NAMED_ATTRS)
-			acemask |= NFSV4ACE_WRITENAMEDATTR;
-		if (ace->ae_perm & ACL_EXECUTE)
-			acemask |= NFSV4ACE_EXECUTE;
-		if (ace->ae_perm & ACL_READ_ATTRIBUTES)
-			acemask |= NFSV4ACE_READATTRIBUTES;
-		if (ace->ae_perm & ACL_WRITE_ATTRIBUTES)
-			acemask |= NFSV4ACE_WRITEATTRIBUTES;
-		if (ace->ae_perm & ACL_DELETE)
-			acemask |= NFSV4ACE_DELETE;
-		if (ace->ae_perm & ACL_READ_ACL)
-			acemask |= NFSV4ACE_READACL;
-		if (ace->ae_perm & ACL_WRITE_ACL)
-			acemask |= NFSV4ACE_WRITEACL;
-		if (ace->ae_perm & ACL_WRITE_OWNER)
-			acemask |= NFSV4ACE_WRITEOWNER;
-		if (ace->ae_perm & ACL_SYNCHRONIZE)
-			acemask |= NFSV4ACE_SYNCHRONIZE;
+		acemask = nfs_aceperm(ace->ae_perm);
 	}
 	*tl++ = txdr_unsigned(acemask);
 	*tl++ = txdr_unsigned(namelen);
@@ -388,6 +363,43 @@ nfsrv_buildace(struct nfsrv_descript *nd, u_char *name, int namelen,
 }
 
 /*
+ * Convert ae_perm to NFSv4 ACL acemask4 for regular files.
+ */
+uint32_t
+nfs_aceperm(acl_perm_t ae_perm)
+{
+	uint32_t acemask = 0x0;
+
+	if (ae_perm & ACL_READ_DATA)
+		acemask |= NFSV4ACE_READDATA;
+	if (ae_perm & ACL_WRITE_DATA)
+		acemask |= NFSV4ACE_WRITEDATA;
+	if (ae_perm & ACL_APPEND_DATA)
+		acemask |= NFSV4ACE_APPENDDATA;
+	if (ae_perm & ACL_READ_NAMED_ATTRS)
+		acemask |= NFSV4ACE_READNAMEDATTR;
+	if (ae_perm & ACL_WRITE_NAMED_ATTRS)
+		acemask |= NFSV4ACE_WRITENAMEDATTR;
+	if (ae_perm & ACL_EXECUTE)
+		acemask |= NFSV4ACE_EXECUTE;
+	if (ae_perm & ACL_READ_ATTRIBUTES)
+		acemask |= NFSV4ACE_READATTRIBUTES;
+	if (ae_perm & ACL_WRITE_ATTRIBUTES)
+		acemask |= NFSV4ACE_WRITEATTRIBUTES;
+	if (ae_perm & ACL_DELETE)
+		acemask |= NFSV4ACE_DELETE;
+	if (ae_perm & ACL_READ_ACL)
+		acemask |= NFSV4ACE_READACL;
+	if (ae_perm & ACL_WRITE_ACL)
+		acemask |= NFSV4ACE_WRITEACL;
+	if (ae_perm & ACL_WRITE_OWNER)
+		acemask |= NFSV4ACE_WRITEOWNER;
+	if (ae_perm & ACL_SYNCHRONIZE)
+		acemask |= NFSV4ACE_SYNCHRONIZE;
+	return (acemask);
+}
+
+/*
  * Build an NFSv4 ACL.
  */
 int
diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c
index e5c658ce76d2..0ae3b94bef89 100644
--- a/sys/fs/nfs/nfs_commonkrpc.c
+++ b/sys/fs/nfs/nfs_commonkrpc.c
@@ -670,7 +670,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
     struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
     u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
 {
-	uint32_t retseq, retval, slotseq, *tl;
+	uint32_t retseq, retval, retval0, slotseq, *tl;
 	int i = 0, j = 0, opcnt, set_sigset = 0, slot;
 	int error = 0, usegssname = 0, secflavour = AUTH_SYS;
 	int freeslot, maxslot, reterr, slotpos, timeo;
@@ -1039,7 +1039,7 @@ tryagain:
 			sep->nfsess_badslots |= (0x1ULL << nd->nd_slotid);
 			mtx_unlock(&sep->nfsess_mtx);
 			/* And free the slot. */
-			nfsv4_freeslot(sep, nd->nd_slotid, false);
+			nfsv4_freeslot(sep, nd->nd_slotid, true);
 		}
 		if (stat == RPC_INTR)
 			error = EINTR;
@@ -1192,15 +1192,22 @@ tryagain:
 					if (retseq != sep->nfsess_slotseq[slot])
 						printf("retseq diff 0x%x\n",
 						    retseq);
-					retval = fxdr_unsigned(uint32_t, *++tl);
+					retval0 = fxdr_unsigned(uint32_t,*tl++);
+					retval = fxdr_unsigned(uint32_t, *tl);
 					if ((retval + 1) < sep->nfsess_foreslots
-					    )
+					    ) {
 						sep->nfsess_foreslots = (retval
 						    + 1);
-					else if ((retval + 1) >
-					    sep->nfsess_foreslots)
-						sep->nfsess_foreslots = (retval
-						    < 64) ? (retval + 1) : 64;
+						nfs_resetslots(sep);
+					} else if ((retval + 1) >
+					    sep->nfsess_foreslots) {
+						if (retval0 > retval)
+							printf("Sess:highest > "
+							    "target_highest\n");
+						sep->nfsess_foreslots =
+						    (retval < NFSV4_SLOTS) ?
+						    (retval + 1) : NFSV4_SLOTS;
+					}
 				}
 				mtx_unlock(&sep->nfsess_mtx);
 
@@ -1464,6 +1471,25 @@ nfsmout:
 }
 
 /*
+ * Reset slots above nfsess_foreslots that are not busy.
+ */
+void
+nfs_resetslots(struct nfsclsession *sep)
+{
+	int i;
+	uint64_t bitval;
+
+	mtx_assert(&sep->nfsess_mtx, MA_OWNED);
+	bitval = (1 << sep->nfsess_foreslots);
+	for (i = sep->nfsess_foreslots; i < NFSV4_SLOTS; i++) {
+		if ((sep->nfsess_slots & bitval) == 0 &&
+		    (sep->nfsess_badslots & bitval) == 0)
+			sep->nfsess_slotseq[i] = 0;
+		bitval <<= 1;
+	}
+}
+
+/*
  * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
  * wait for all requests to complete. This is used by forced unmounts
  * to terminate any outstanding RPCs.
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index 2db9af5b9ea9..0c94f4e7dc52 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -258,7 +258,8 @@ newnfs_copycred(struct nfscred *nfscr, struct ucred *cr)
 	KASSERT(nfscr->nfsc_ngroups >= 0,
 	    ("newnfs_copycred: negative nfsc_ngroups"));
 	cr->cr_uid = nfscr->nfsc_uid;
-	crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups);
+	crsetgroups_fallback(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups,
+	    GID_NOGROUP);
 }
 
 /*
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index 3c9af40253ad..a957315aaa12 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -135,7 +135,7 @@ struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = {
 	{ 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Lookupp */
 	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* NVerify */
 	{ 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 },		/* Open */
-	{ 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* OpenAttr */
+	{ 1, 1, 1, 1, LK_EXCLUSIVE, 1, 1 },		/* OpenAttr */
 	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* OpenConfirm */
 	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* OpenDowngrade */
 	{ 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* PutFH */
@@ -219,18 +219,19 @@ NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL;
 static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 0, 0, 1, 0, 0, 0, 0, 0 };
+    1, 0, 0, 1, 0, 0, 0, 0, 0, 0 };
 
 /* local functions */
 static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep);
 static void nfsv4_wanted(struct nfsv4lock *lp);
 static uint32_t nfsv4_filesavail(struct statfs *, struct mount *);
-static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len);
 static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name);
 static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser);
 static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **,
     int *, int *);
 static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *);
+static uint32_t vtonfsv4_type(struct vattr *);
+static __enum_uint8(vtype) nfsv4tov_type(uint32_t, uint16_t *);
 
 static struct {
 	int	op;
@@ -250,10 +251,10 @@ static struct {
 	{ NFSV4OP_CREATE, 5, "Create", 6, },
 	{ NFSV4OP_CREATE, 1, "Create", 6, },
 	{ NFSV4OP_CREATE, 3, "Create", 6, },
+	{ NFSV4OP_REMOVE, 3, "Remove", 6, },
 	{ NFSV4OP_REMOVE, 1, "Remove", 6, },
-	{ NFSV4OP_REMOVE, 1, "Remove", 6, },
-	{ NFSV4OP_SAVEFH, 5, "Rename", 6, },
-	{ NFSV4OP_SAVEFH, 4, "Link", 4, },
+	{ NFSV4OP_SAVEFH, 7, "Rename", 6, },
+	{ NFSV4OP_SAVEFH, 6, "Link", 4, },
 	{ NFSV4OP_READDIR, 2, "Readdir", 7, },
 	{ NFSV4OP_READDIR, 2, "Readdir", 7, },
 	{ NFSV4OP_GETATTR, 1, "Getattr", 7, },
@@ -308,6 +309,7 @@ static struct {
 	{ NFSV4OP_DEALLOCATE, 2, "Deallocate", 10, },
 	{ NFSV4OP_LAYOUTERROR, 1, "LayoutError", 11, },
 	{ NFSV4OP_VERIFY, 3, "AppendWrite", 11, },
+	{ NFSV4OP_OPENATTR, 3, "OpenAttr", 8, },
 };
 
 /*
@@ -317,7 +319,7 @@ static int nfs_bigrequest[NFSV42_NPROCS] = {
 	0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
-	0, 1
+	0, 1, 0
 };
 
 /*
@@ -610,32 +612,43 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap,
 		break;
 	case ND_NFSV4:
 		NFSZERO_ATTRBIT(&attrbits);
-		if (vap->va_mode != (mode_t)VNOVAL)
-			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE);
+		np = NULL;
+		if (strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0)
+			np = VTONFS(vp);
+		if (vap->va_mode != (mode_t)VNOVAL) {
+			if ((flags & NFSSATTR_NEWFILE) != 0 && np != NULL &&
+			    NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+			    NFSATTRBIT_MODEUMASK))
+				NFSSETBIT_ATTRBIT(&attrbits,
+				    NFSATTRBIT_MODEUMASK);
+			else
+				NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE);
+		}
 		if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL)
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
 		if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL)
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP);
 		if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL)
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
+		if ((flags & NFSSATTR_FULL) && vap->va_flags != VNOVAL) {
+			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN);
+			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM);
+		}
 		if (vap->va_atime.tv_sec != VNOVAL)
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET);
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET);
-		if (vap->va_birthtime.tv_sec != VNOVAL &&
-		    strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0) {
-			/*
-			 * We can only test for support of TimeCreate if
-			 * the "vp" argument is for an NFS vnode.
-			 */
-			np = VTONFS(vp);
-			if (NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
-			    NFSATTRBIT_TIMECREATE))
-				NFSSETBIT_ATTRBIT(&attrbits,
-				    NFSATTRBIT_TIMECREATE);
-		}
+		/*
+		 * We can only test for support of TimeCreate if
+		 * the "vp" argument is for an NFS vnode.
+		 */
+		if (vap->va_birthtime.tv_sec != VNOVAL && np != NULL &&
+		    NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+		    NFSATTRBIT_TIMECREATE))
+			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
 		(void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0,
-		    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
+		    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL,
+		    false, false, false);
 		break;
 	}
 }
@@ -980,6 +993,17 @@ nfsm_fhtom(struct nfsmount *nmp, struct nfsrv_descript *nd, u_int8_t *fhp,
 		    (nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
 			fhp = nmp->nm_fh;
 			size = nmp->nm_fhsize;
+		} else if (size >= NFSX_FHMAX + NFSX_V4NAMEDDIRFH &&
+		    size <= NFSX_FHMAX + NFSX_V4NAMEDATTRFH) {
+			size -= (NFSX_FHMAX - NFSX_MYFH);
+			NFSM_BUILD(tl, uint32_t *, NFSX_MYFH +
+			    2 * NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(size);
+			NFSBCOPY(fhp, tl, NFSX_MYFH);
+			tl += (NFSX_MYFH / NFSX_UNSIGNED);
+			*tl = 0;
+			bytesize = NFSX_MYFH + 2 * NFSX_UNSIGNED;
+			break;
 		}
 		fullsiz = NFSM_RNDUP(size);
 		if (set_true) {
@@ -1277,7 +1301,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
     struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize,
     struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp,
     struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp,
-    u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred)
+    u_int32_t *leasep, u_int32_t *rderrp, bool *has_namedattrp,
+    NFSPROC_T *p, struct ucred *cred)
 {
 	u_int32_t *tl;
 	int i = 0, j, k, l = 0, m, bitpos, attrsum = 0;
@@ -1293,6 +1318,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 	gid_t gid;
 	u_int32_t freenum = 0, tuint;
 	u_int64_t uquad = 0, thyp, thyp2;
+	uint16_t tui16;
+	long has_pathconf;
 #ifdef QUOTA
 	struct dqblk dqb;
 	uid_t savuid;
@@ -1316,6 +1343,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 		 * Just set default values to some of the important ones.
 		 */
 		if (nap != NULL) {
+			VATTR_NULL(&nap->na_vattr);
 			nap->na_type = VREG;
 			nap->na_mode = 0;
 			nap->na_rdev = (NFSDEV_T)0;
@@ -1365,6 +1393,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 			sfp->sf_tbytes = UINT64_MAX;
 			sfp->sf_abytes = UINT64_MAX;
 		}
+		if (has_namedattrp != NULL)
+			*has_namedattrp = false;
 	}
 
 	/*
@@ -1397,6 +1427,16 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 				NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL);
 				NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT);
 		   	   }
+			   /* Some filesystems do not support uf_hidden */
+			   if (vp == NULL || VOP_PATHCONF(vp,
+				_PC_HAS_HIDDENSYSTEM, &has_pathconf) != 0)
+			       has_pathconf = 0;
+			   if (has_pathconf == 0) {
+				 NFSCLRBIT_ATTRBIT(&checkattrbits,
+				    NFSATTRBIT_HIDDEN);
+				 NFSCLRBIT_ATTRBIT(&checkattrbits,
+				    NFSATTRBIT_SYSTEM);
+			   }
 			   if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits)
 			       || retnotsup)
 				*retcmpp = NFSERR_NOTSAME;
@@ -1407,11 +1447,16 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (compare) {
 				if (!(*retcmpp)) {
-				    if (nap->na_type != nfsv34tov_type(*tl))
+				    tui16 = 0;
+				    if (nap->na_type != nfsv4tov_type(*tl,
+					&tui16) ||
+					((nap->na_bsdflags & SFBSD_NAMEDATTR) ^
+					 tui16) != 0)
 					*retcmpp = NFSERR_NOTSAME;
 				}
 			} else if (nap != NULL) {
-				nap->na_type = nfsv34tov_type(*tl);
+				nap->na_type = nfsv4tov_type(*tl,
+				    &nap->na_bsdflags);
 			}
 			attrsum += NFSX_UNSIGNED;
 			break;
@@ -1490,9 +1535,23 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 			break;
 		case NFSATTRBIT_NAMEDATTR:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			if (compare && !(*retcmpp)) {
-				if (*tl != newnfs_false)
-					*retcmpp = NFSERR_NOTSAME;
+			if (compare) {
+				if (!(*retcmpp)) {
+					if (vp == NULL || VOP_PATHCONF(vp,
+					    _PC_HAS_NAMEDATTR, &has_pathconf)
+					    != 0)
+						has_pathconf = 0;
+					if ((has_pathconf != 0 &&
+					     *tl != newnfs_true) ||
+					    (has_pathconf == 0 &&
+					    *tl != newnfs_false))
+						*retcmpp = NFSERR_NOTSAME;
+				}
+			} else if (has_namedattrp != NULL) {
+				if (*tl == newnfs_true)
+					*has_namedattrp = true;
+				else
+					*has_namedattrp = false;
 			}
 			attrsum += NFSX_UNSIGNED;
 			break;
@@ -1666,6 +1725,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 				goto nfsmout;
 			tfhsize = tnfhp->nfh_len;
 			if (compare) {
+				if (tfhsize > NFSX_MYFH)
+					tfhsize = NFSX_MYFH;
 				if (!(*retcmpp) &&
 				    !NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize,
 				     fhp, fhsize))
@@ -1745,9 +1806,17 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 				free(cp2, M_NFSSTRING);
 			break;
 		case NFSATTRBIT_HIDDEN:
-			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			if (compare && !(*retcmpp))
-				*retcmpp = NFSERR_ATTRNOTSUPP;
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			if (compare) {
+				if (!(*retcmpp) && ((*tl == newnfs_true &&
+				    (nap->na_flags & UF_HIDDEN) == 0) ||
+				    (*tl == newnfs_false &&
+				     (nap->na_flags & UF_HIDDEN) != 0)))
+					*retcmpp = NFSERR_NOTSAME;
+			} else if (nap != NULL) {
+				if (*tl == newnfs_true)
+					nap->na_flags |= UF_HIDDEN;
+			}
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_HOMOGENEOUS:
@@ -2119,9 +2188,17 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 			attrsum += NFSX_HYPER;
 			break;
 		case NFSATTRBIT_SYSTEM:
-			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			if (compare && !(*retcmpp))
-				*retcmpp = NFSERR_ATTRNOTSUPP;
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			if (compare) {
+				if (!(*retcmpp) && ((*tl == newnfs_true &&
+				    (nap->na_flags & UF_SYSTEM) == 0) ||
+				    (*tl == newnfs_false &&
+				     (nap->na_flags & UF_SYSTEM) != 0)))
+					*retcmpp = NFSERR_NOTSAME;
+			} else if (nap != NULL) {
+				if (*tl == newnfs_true)
+					nap->na_flags |= UF_SYSTEM;
+			}
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_TIMEACCESS:
@@ -2297,6 +2374,23 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
 			if (compare && !(*retcmpp) && i != nfs_srvmaxio)
 				*retcmpp = NFSERR_NOTSAME;
 			break;
+		case NFSATTRBIT_CHANGEATTRTYPE:
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			if (compare) {
+				if (!(*retcmpp)) {
+				    tuint = NFSV4CHANGETYPE_UNDEFINED;
+				    if ((vp->v_mount->mnt_vfc->vfc_flags &
+					VFCF_FILEREVINC) != 0)
+					tuint = NFSV4CHANGETYPE_VERS_COUNTER_NOPNFS;
+				    else if ((vp->v_mount->mnt_vfc->vfc_flags &
+					VFCF_FILEREVCT) != 0)
+					tuint = NFSV4CHANGETYPE_TIME_METADATA;
+				    if (fxdr_unsigned(uint32_t, *tl) != tuint)
+					*retcmpp = NFSERR_NOTSAME;
+				}
+			}
+			attrsum += NFSX_UNSIGNED;
+			break;
 		default:
 			printf("EEK! nfsv4_loadattr unknown attr=%d\n",
 				bitpos);
@@ -2553,7 +2647,8 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
     NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror,
     nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram,
     int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno,
-    struct statfs *pnfssf)
+    struct statfs *pnfssf, bool xattrsupp, bool has_hiddensystem,
+    bool has_namedattr)
 {
 	int bitpos, retnum = 0;
 	u_int32_t *tl;
@@ -2567,8 +2662,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 	struct nfsfsinfo fsinf;
 	struct timespec temptime;
 	NFSACL_T *aclp, *naclp = NULL;
-	size_t atsiz;
-	bool xattrsupp;
+	short irflag;
 #ifdef QUOTA
 	struct dqblk dqb;
 	uid_t savuid;
@@ -2652,18 +2746,6 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 		}
 	}
 
-	/* Check to see if Extended Attributes are supported. */
-	xattrsupp = false;
-	if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_XATTRSUPPORT)) {
-		if (NFSVOPLOCK(vp, LK_SHARED) == 0) {
-			error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER,
-			    "xxx", NULL, &atsiz, cred, p);
-			NFSVOPUNLOCK(vp);
-			if (error != EOPNOTSUPP)
-				xattrsupp = true;
-		}
-	}
-
 	/*
 	 * Put out the attribute bitmap for the ones being filled in
 	 * and get the field for the number of attributes returned.
@@ -2685,11 +2767,15 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 			    NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT);
 			    NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL);
 			}
+			if (!has_hiddensystem) {
+			    NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN);
+			    NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM);
+			}
 			retnum += nfsrv_putattrbit(nd, &attrbits);
 			break;
 		case NFSATTRBIT_TYPE:
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
-			*tl = vtonfsv34_type(vap->va_type);
+			*tl = vtonfsv4_type(vap);
 			retnum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_FHEXPIRETYPE:
@@ -2725,7 +2811,10 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 			break;
 		case NFSATTRBIT_NAMEDATTR:
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
-			*tl = newnfs_false;
+			if (has_namedattr)
+				*tl = newnfs_true;
+			else
+				*tl = newnfs_false;
 			retnum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_FSID:
@@ -2786,7 +2875,15 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 			retnum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_FILEHANDLE:
-			retnum += nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, 0, 0);
+			siz = 0;
+			if (vp != NULL) {
+				irflag = vn_irflag_read(vp);
+				if ((irflag & VIRF_NAMEDDIR) != 0)
+					siz = NFSX_FHMAX + 2;
+				else if ((irflag & VIRF_NAMEDATTR) != 0)
+					siz = NFSX_FHMAX + 3;
+			}
+			retnum += nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, siz, 0);
 			break;
 		case NFSATTRBIT_FILEID:
 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER);
@@ -2819,6 +2916,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 			*tl = 0;
 			retnum += 2 * NFSX_UNSIGNED;
 			break;
+		case NFSATTRBIT_HIDDEN:
+			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+			if ((vap->va_flags & UF_HIDDEN) != 0)
+				*tl = newnfs_true;
+			else
+				*tl = newnfs_false;
+			retnum += NFSX_UNSIGNED;
+			break;
 		case NFSATTRBIT_HOMOGENEOUS:
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS)
@@ -3008,6 +3113,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 			txdr_hyper(vap->va_bytes, tl);
 			retnum += NFSX_HYPER;
 			break;
+		case NFSATTRBIT_SYSTEM:
+			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+			if ((vap->va_flags & UF_SYSTEM) != 0)
+				*tl = newnfs_true;
+			else
+				*tl = newnfs_false;
+			retnum += NFSX_UNSIGNED;
+			break;
 		case NFSATTRBIT_TIMEACCESS:
 			NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME);
 			txdr_nfsv4time(&vap->va_atime, tl);
@@ -3109,6 +3222,33 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
 				*tl = newnfs_false;
 			retnum += NFSX_UNSIGNED;
 			break;
+		case NFSATTRBIT_MODEUMASK:
+			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			/*
+			 * Since FreeBSD applies the umask above the VFS/VOP,
+			 * there is no umask to handle here.  If FreeBSD
+			 * moves handling of umask to below the VFS/VOP,
+			 * this could change.
+			 */
+			*tl++ = vtonfsv34_mode(vap->va_mode);
+			*tl = 0;
+			retnum += 2 * NFSX_UNSIGNED;
+			break;
+		case NFSATTRBIT_CHANGEATTRTYPE:
+			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+			*tl = txdr_unsigned(NFSV4CHANGETYPE_UNDEFINED);
+			if (mp != NULL) {
+				if ((mp->mnt_vfc->vfc_flags &
+				    VFCF_FILEREVINC) != 0)
+					*tl = txdr_unsigned(
+					   NFSV4CHANGETYPE_VERS_COUNTER_NOPNFS);
+				else if ((mp->mnt_vfc->vfc_flags &
+				    VFCF_FILEREVCT) != 0)
+					*tl = txdr_unsigned(
+					   NFSV4CHANGETYPE_TIME_METADATA);
+			}
+			retnum += NFSX_UNSIGNED;
+			break;
 		default:
 			printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos);
 		}
@@ -3419,13 +3559,13 @@ tryagain:
 		/*
 		 * If an '@' is found and the domain name matches, search for
 		 * the name with dns stripped off.
-		 * Mixed case alpahbetics will match for the domain name, but
-		 * all upper case will not.
+		 * The match for alphabetics in now case insensitive,
+		 * since RFC8881 defines this string as a DNS domain name.
 		 */
 		if (cnt == 0 && i < len && i > 0 &&
 		    (len - 1 - i) == NFSD_VNET(nfsrv_dnsnamelen) &&
-		    !nfsrv_cmpmixedcase(cp,
-		     NFSD_VNET(nfsrv_dnsname), NFSD_VNET(nfsrv_dnsnamelen))) {
+		    strncasecmp(cp, NFSD_VNET(nfsrv_dnsname),
+		     NFSD_VNET(nfsrv_dnsnamelen)) == 0) {
 			len -= (NFSD_VNET(nfsrv_dnsnamelen) + 1);
 			*(cp - 1) = '\0';
 		}
@@ -3646,8 +3786,8 @@ tryagain:
 		 */
 		if (cnt == 0 && i < len && i > 0 &&
 		    (len - 1 - i) == NFSD_VNET(nfsrv_dnsnamelen) &&
-		    !nfsrv_cmpmixedcase(cp,
-		     NFSD_VNET(nfsrv_dnsname), NFSD_VNET(nfsrv_dnsnamelen))) {
+		    strncasecmp(cp, NFSD_VNET(nfsrv_dnsname),
+		    NFSD_VNET(nfsrv_dnsnamelen)) == 0) {
 			len -= (NFSD_VNET(nfsrv_dnsnamelen) + 1);
 			*(cp - 1) = '\0';
 		}
@@ -3696,35 +3836,6 @@ out:
 }
 
 /*
- * Cmp len chars, allowing mixed case in the first argument to match lower
- * case in the second, but not if the first argument is all upper case.
- * Return 0 for a match, 1 otherwise.
- */
-static int
-nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len)
-{
-	int i;
-	u_char tmp;
-	int fndlower = 0;
-
-	for (i = 0; i < len; i++) {
-		if (*cp >= 'A' && *cp <= 'Z') {
-			tmp = *cp++ + ('a' - 'A');
-		} else {
-			tmp = *cp++;
-			if (tmp >= 'a' && tmp <= 'z')
-				fndlower = 1;
-		}
-		if (tmp != *cp2++)
-			return (1);
-	}
-	if (fndlower)
-		return (0);
-	else
-		return (1);
-}
-
-/*
  * Set the port for the nfsuserd.
  */
 int
@@ -4032,8 +4143,9 @@ nfssvc_idname(struct nfsd_idargs *nidp)
 			 */
 			cr = crget();
 			cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid;
-			crsetgroups(cr, nidp->nid_ngroup, grps);
-			cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0];
+			crsetgroups_fallback(cr, nidp->nid_ngroup, grps,
+			    GID_NOGROUP);
+			cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
 			cr->cr_prison = curthread->td_ucred->cr_prison;
 			prison_hold(cr->cr_prison);
 #ifdef MAC
@@ -4644,7 +4756,7 @@ newnfs_sndlock(int *flagp)
 		ts.tv_sec = 0;
 		ts.tv_nsec = 0;
 		(void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR,
-		    PZERO - 1, "nfsndlck", &ts);
+		    PVFS, "nfsndlck", &ts);
 	}
 	*flagp |= NFSR_SNDLOCK;
 	NFSUNLOCKSOCK();
@@ -5025,6 +5137,8 @@ nfsv4_freeslot(struct nfsclsession *sep, int slot, bool resetseq)
 	mtx_lock(&sep->nfsess_mtx);
 	if (resetseq)
 		sep->nfsess_slotseq[slot]--;
+	else if (slot > sep->nfsess_foreslots)
+		sep->nfsess_slotseq[slot] = 0;
 	if ((bitval & sep->nfsess_slots) == 0)
 		printf("freeing free slot!!\n");
 	sep->nfsess_slots &= ~bitval;
@@ -5154,3 +5268,46 @@ nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclsession *tsep,
 	m_freem(nd->nd_mrep);
 	return (error);
 }
+
+/*
+ * Translate a vnode type into an NFSv4 type, including the named
+ * attribute types.
+ */
+static uint32_t
+vtonfsv4_type(struct vattr *vap)
+{
+	nfstype ntyp;
+
+	if (vap->va_type >= 9)
+		ntyp = NFNON;
+	else
+		ntyp = nfsv34_type[vap->va_type];
+	if ((vap->va_bsdflags & SFBSD_NAMEDATTR) != 0) {
+		if (ntyp == NFDIR)
+			ntyp = NFATTRDIR;
+		else if (ntyp == NFREG)
+			ntyp = NFNAMEDATTR;
+	}
+	return (txdr_unsigned((uint32_t)ntyp));
+}
+
+/*
+ * Translate an NFS type to a vnode type.
+ */
+static __enum_uint8(vtype)
+nfsv4tov_type(uint32_t ntyp, uint16_t *bsdflags)
+{
+	__enum_uint8(vtype) vtyp;
+
+	ntyp = fxdr_unsigned(uint32_t, ntyp) % (NFNAMEDATTR + 1);
+	if (ntyp == NFATTRDIR) {
+		vtyp = VDIR;
+		*bsdflags |= SFBSD_NAMEDATTR;
+	} else if (ntyp == NFNAMEDATTR) {
+		vtyp = VREG;
+		*bsdflags |= SFBSD_NAMEDATTR;
+	} else {
+		vtyp = nv34tov_type[ntyp];
+	}
+	return (vtyp);
+}
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 950e0c097457..54f60a753c50 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -169,6 +169,7 @@ int nfsrv_mdscopymr(char *, char *, char *, char *, int *, char *, NFSPROC_T *,
     struct vnode **, struct vnode **, struct pnfsdsfile **, struct nfsdevice **,
     struct nfsdevice **);
 void nfsrv_marknospc(char *, bool);
+void nfsrv_removedeleg(fhandle_t *, struct nfsrv_descript *, NFSPROC_T *);
 
 /* nfs_nfsdserv.c */
 int nfsrvd_access(struct nfsrv_descript *, int,
@@ -340,7 +341,7 @@ int nfsv4_loadattr(struct nfsrv_descript *, vnode_t,
     struct nfsvattr *, struct nfsfh **, fhandle_t *, int,
     struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *,
     struct nfsfsinfo *, NFSACL_T *,
-    int, int *, u_int32_t *, u_int32_t *, NFSPROC_T *, struct ucred *);
+    int, int *, u_int32_t *, u_int32_t *, bool *, NFSPROC_T *, struct ucred *);
 int nfsv4_lock(struct nfsv4lock *, int, int *, struct mtx *, struct mount *);
 void nfsv4_unlock(struct nfsv4lock *, int);
 void nfsv4_relref(struct nfsv4lock *);
@@ -394,8 +395,9 @@ int nfsrv_putopbit(struct nfsrv_descript *, nfsopbit_t *);
 void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int,
     struct nfsvattr *);
 int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *,
-    struct vattr *, fhandle_t *, int, nfsattrbit_t *,
-    struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *);
+    struct vattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *,
+    NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *, bool, bool,
+    bool);
 void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *);
 struct mbuf *nfsrv_adj(struct mbuf *, int, int);
 void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *);
@@ -438,6 +440,7 @@ int nfs_supportsnfsv4acls(vnode_t);
 /* nfs_commonacl.c */
 int nfsrv_dissectace(struct nfsrv_descript *, struct acl_entry *,
     bool, int *, int *, NFSPROC_T *);
+uint32_t nfs_aceperm(acl_perm_t);
 int nfsrv_buildacl(struct nfsrv_descript *, NFSACL_T *, __enum_uint8(vtype),
     NFSPROC_T *);
 int nfsrv_compareacl(NFSACL_T *, NFSACL_T *);
@@ -481,11 +484,13 @@ int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t,
 int nfsrpc_create(vnode_t, char *, int, struct vattr *, nfsquad_t,
     int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
     struct nfsfh **, int *, int *);
-int nfsrpc_remove(vnode_t, char *, int, vnode_t, struct ucred *, NFSPROC_T *,
-    struct nfsvattr *, int *);
-int nfsrpc_rename(vnode_t, vnode_t, char *, int, vnode_t, vnode_t, char *, int,
-    struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
-    int *, int *);
+int nfsrpc_remove(struct vnode *, char *, int, struct vnode *,
+    struct nfsvattr *, int *, nfsremove_status *, struct nfsvattr *, int *,
+    struct ucred *, NFSPROC_T *);
+int nfsrpc_rename(struct vnode *, struct vnode *, char *, int, struct vnode *,
+    struct vnode *, char *, int, nfsremove_status *, struct nfsvattr *,
+    struct nfsvattr *, int *, int *, struct nfsvattr *, int *, struct ucred *,
+    NFSPROC_T *);
 int nfsrpc_link(vnode_t, vnode_t, char *, int,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
     int *, int *);
@@ -515,7 +520,7 @@ int nfsrpc_statfs(vnode_t, struct nfsstatfs *, struct nfsfsinfo *, uint32_t *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
 int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *,
     NFSPROC_T *, struct nfsvattr *, int *);
-int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *,
+int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, bool *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
 int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *,
     NFSPROC_T *);
@@ -568,6 +573,9 @@ int nfsrpc_listextattr(vnode_t, uint64_t *, struct uio *, size_t *, bool *,
 int nfsrpc_rmextattr(vnode_t, const char *, struct nfsvattr *, int *,
     struct ucred *, NFSPROC_T *);
 void nfsrpc_bindconnsess(CLIENT *, void *, struct ucred *);
+int nfsrpc_openattr(struct nfsmount *, struct vnode *, uint8_t *, int,
+    bool, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsfh **,
+    int *);
 
 /* nfs_clstate.c */
 int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int,
@@ -606,12 +614,12 @@ int nfscl_doclose(vnode_t, struct nfsclclient **, NFSPROC_T *);
 int nfsrpc_doclose(struct nfsmount *, struct nfsclopen *, NFSPROC_T *, bool,
     bool);
 int nfscl_deleg(mount_t, struct nfsclclient *, u_int8_t *, int,
-    struct ucred *, NFSPROC_T *, struct nfscldeleg **);
+    struct ucred *, NFSPROC_T *, struct nfscldeleg *);
 void nfscl_lockinit(struct nfsv4lock *);
 void nfscl_lockexcl(struct nfsv4lock *, void *);
 void nfscl_lockunlock(struct nfsv4lock *);
 void nfscl_lockderef(struct nfsv4lock *);
-void nfscl_delegreturnvp(vnode_t, NFSPROC_T *);
+void nfscl_delegreturnvp(struct vnode *, bool, NFSPROC_T *);
 void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *);
 void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *,
     int);
@@ -626,7 +634,7 @@ int nfscl_renamedeleg(vnode_t, nfsv4stateid_t *, int *, vnode_t,
     nfsv4stateid_t *, int *, NFSPROC_T *);
 void nfscl_reclaimnode(vnode_t);
 void nfscl_newnode(vnode_t);
-void nfscl_delegmodtime(vnode_t);
+void nfscl_delegmodtime(struct vnode *, struct timespec *);
 void nfscl_deleggetmodtime(vnode_t, struct timespec *);
 int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
     struct nfsmount *, NFSPROC_T *);
@@ -651,6 +659,8 @@ void nfscl_freelayout(struct nfscllayout *);
 void nfscl_freeflayout(struct nfsclflayout *);
 void nfscl_freedevinfo(struct nfscldevinfo *);
 int nfscl_layoutcommit(vnode_t, NFSPROC_T *);
+int nfscl_delegacecheck(struct vnode *, accmode_t, struct ucred *);
+void nfscl_startdelegrecall(struct nfsclclient *, struct nfsfh *);
 
 /* nfs_clport.c */
 int nfscl_nget(mount_t, vnode_t, struct nfsfh *,
@@ -707,12 +717,12 @@ int nfsvno_symlink(struct nameidata *, struct nfsvattr *, char *, int, int,
     uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *);
 int nfsvno_getsymlink(struct nfsrv_descript *, struct nfsvattr *,
     NFSPROC_T *, char **, int *);
-int nfsvno_removesub(struct nameidata *, int, struct ucred *, NFSPROC_T *,
-    struct nfsexstuff *);
+int nfsvno_removesub(struct nameidata *, bool, struct nfsrv_descript *,
+    NFSPROC_T *, struct nfsexstuff *);
 int nfsvno_rmdirsub(struct nameidata *, int, struct ucred *, NFSPROC_T *,
     struct nfsexstuff *);
-int nfsvno_rename(struct nameidata *, struct nameidata *, u_int32_t,
-    u_int32_t, struct ucred *, NFSPROC_T *);
+int nfsvno_rename(struct nameidata *, struct nameidata *,
+    struct nfsrv_descript *, NFSPROC_T *);
 int nfsvno_link(struct nameidata *, vnode_t, nfsquad_t, struct ucred *,
     NFSPROC_T *, struct nfsexstuff *);
 int nfsvno_fsync(vnode_t, u_int64_t, int, struct ucred *, NFSPROC_T *);
@@ -726,7 +736,8 @@ int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct nfsrv_descript *,
     NFSPROC_T *);
 int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t,
     struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *,
-    struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t);
+    struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, bool, bool,
+    bool);
 int nfsrv_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *,
     NFSACL_T *, NFSPROC_T *);
 int nfsv4_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *,
@@ -780,6 +791,7 @@ int newnfs_request(struct nfsrv_descript *, struct nfsmount *,
     struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *,
     struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *,
     struct nfsclsession *);
+void nfs_resetslots(struct nfsclsession *);
 int newnfs_connect(struct nfsmount *, struct nfssockreq *,
     struct ucred *, NFSPROC_T *, int, bool, struct __rpc_client **);
 void newnfs_disconnect(struct nfsmount *, struct nfssockreq *);
diff --git a/sys/fs/nfs/nfscl.h b/sys/fs/nfs/nfscl.h
index a52b9e433145..3b1445e1923c 100644
--- a/sys/fs/nfs/nfscl.h
+++ b/sys/fs/nfs/nfscl.h
@@ -68,10 +68,11 @@ struct nfsv4node {
  * These flag bits are used for the argument to nfscl_fillsattr() to
  * indicate special handling of the attributes.
  */
-#define	NFSSATTR_FULL		0x1
-#define	NFSSATTR_SIZE0		0x2
-#define	NFSSATTR_SIZENEG1	0x4
-#define	NFSSATTR_SIZERDEV	0x8
+#define	NFSSATTR_FULL		0x01
+#define	NFSSATTR_SIZE0		0x02
+#define	NFSSATTR_SIZENEG1	0x04
+#define	NFSSATTR_SIZERDEV	0x08
+#define	NFSSATTR_NEWFILE	0x10
 
 /* Use this macro for debug printfs. */
 #define	NFSCL_DEBUG(level, ...)	do {					\
diff --git a/sys/fs/nfs/nfsclstate.h b/sys/fs/nfs/nfsclstate.h
index d9f5ed13b54f..92669ff8d1aa 100644
--- a/sys/fs/nfs/nfsclstate.h
+++ b/sys/fs/nfs/nfsclstate.h
@@ -116,6 +116,10 @@ struct nfsclclient {
 	struct proc		*nfsc_renewthread;
 	struct nfsmount		*nfsc_nmp;
 	time_t			nfsc_expire;
+	int			nfsc_delegcnt;
+	int			nfsc_deleghighwater;
+	int			nfsc_layoutcnt;
+	int			nfsc_layouthighwater;
 	u_int32_t		nfsc_clientidrev;
 	u_int32_t		nfsc_rev;
 	u_int32_t		nfsc_renew;
diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h
index 0b16ba9b85a8..c30b46261df0 100644
--- a/sys/fs/nfs/nfsport.h
+++ b/sys/fs/nfs/nfsport.h
@@ -439,10 +439,13 @@
 /* Do an NFSv4 Verify+Write. */
 #define	NFSPROC_APPENDWRITE	69
 
+/* Do a NFSv4 Openattr. */
+#define	NFSPROC_OPENATTR	70
+
 /*
  * Must be defined as one higher than the last NFSv4.2 Proc# above.
  */
-#define	NFSV42_NPROCS		70
+#define	NFSV42_NPROCS		71
 
 /* Value of NFSV42_NPROCS for old nfsstats structure. (Always 69) */
 #define	NFSV42_OLDNPROCS	69
@@ -474,7 +477,7 @@ struct nfsstatsv1 {
 	uint64_t	readlink_bios;
 	uint64_t	biocache_readdirs;
 	uint64_t	readdir_bios;
-	uint64_t	rpccnt[NFSV42_NPROCS + 10];
+	uint64_t	rpccnt[NFSV42_NPROCS + 9];
 	uint64_t	rpcretries;
 	uint64_t	srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15];
 	uint64_t	srvlayouts;
@@ -690,6 +693,7 @@ struct nfsvattr {
 #define	na_bytes	na_vattr.va_bytes
 #define	na_filerev	na_vattr.va_filerev
 #define	na_vaflags	na_vattr.va_vaflags
+#define	na_bsdflags	na_vattr.va_bsdflags
 
 #include <fs/nfsclient/nfsnode.h>
 
@@ -1180,9 +1184,11 @@ struct nfsreq {
  */
 #ifdef VV_DISABLEDELEG
 #define	NFSVNO_DELEGOK(v)						\
-	((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0)
+	((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0 ||	\
+	 (vn_irflag_read(v) & VIRF_NAMEDATTR) == 0)
 #else
-#define	NFSVNO_DELEGOK(v)	(1)
+#define	NFSVNO_DELEGOK(v)						\
+	((v) == NULL || (vn_irflag_read(v) & VIRF_NAMEDATTR) == 0)
 #endif
 
 /*
diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h
index cef886755d5a..cb5a80e8df73 100644
--- a/sys/fs/nfs/nfsproto.h
+++ b/sys/fs/nfs/nfsproto.h
@@ -275,6 +275,8 @@
 #define	NFSX_V4SESSIONID	16
 #define	NFSX_V4DEVICEID		16
 #define	NFSX_V4PNFSFH		(sizeof(fhandle_t) + 1)
+#define	NFSX_V4NAMEDDIRFH	2
+#define	NFSX_V4NAMEDATTRFH	3
 #define	NFSX_V4FILELAYOUT	(4 * NFSX_UNSIGNED + NFSX_V4DEVICEID +	\
 				 NFSX_HYPER + NFSM_RNDUP(NFSX_V4PNFSFH))
 #define	NFSX_V4FLEXLAYOUT(m)	(NFSX_HYPER + 3 * NFSX_UNSIGNED +		\
@@ -406,10 +408,13 @@
 /* Do an NFSv4 Verify+Write. */
 #define	NFSPROC_APPENDWRITE	69
 
+/* Do a NFSv4 Openattr. */
+#define	NFSPROC_OPENATTR	70
+
 /*
  * Must be defined as one higher than the last NFSv4.2 Proc# above.
  */
-#define	NFSV42_NPROCS		70
+#define	NFSV42_NPROCS		71
 
 /* Value of NFSV42_NPROCS for old nfsstats structure. (Always 69) */
 #define	NFSV42_OLDNPROCS	69
@@ -619,6 +624,8 @@
 #define	NFSV4OPEN_WDCONTENTION		0x00100000
 #define	NFSV4OPEN_WDNOTWANTED		0x00200000
 #define	NFSV4OPEN_WDSUPPFTYPE		0x00400000
+#define	NFSV4OPEN_WDNOTSUPPDOWNGRADE	0x00800000
+#define	NFSV4OPEN_WDNOTSUPPUPGRADE	0x01000000
 
 /*
  * NFS V4 File Handle types
@@ -742,6 +749,17 @@
 #define	NFSSECINFONONAME_CURFH	0
 #define	NFSSECINFONONAME_PARENT	1
 
+/* Bits for CB_RECALL_ANY. */
+#define	NFSRCA4_RDATA_DLG	0x00000001
+#define	NFSRCA4_WDATA_DLG	0x00000002
+#define	NFSRCA4_DIR_DLG		0x00000004
+#define	NFSRCA4_FILE_LAYOUT	0x00000008
+#define	NFSRCA4_BLK_LAYOUT	0x00000010
+#define	NFSRCA4_OBJ_LAYOUT_MIN	0x00000100
+#define	NFSRCA4_OBJ_LAYOUT_MAX	0x00000200
+#define	NFSRCA4_FF_LAYOUT_READ	0x00010000
+#define	NFSRCA4_FF_LAYOUT_RW	0x00020000
+
 #if defined(_KERNEL) || defined(KERNEL)
 /* Conversion macros */
 #define	vtonfsv2_mode(t,m) 						\
@@ -1002,7 +1020,7 @@ struct nfsv3_sattr {
 #define	NFSATTRBIT_SPACEFREED		78
 #define	NFSATTRBIT_CHANGEATTRTYPE	79
 #define	NFSATTRBIT_SECLABEL		80
-/* Not sure what attribute bit #81 is? */
+#define	NFSATTRBIT_MODEUMASK		81
 #define	NFSATTRBIT_XATTRSUPPORT		82
 
 #define	NFSATTRBM_SUPPORTEDATTRS	0x00000001
@@ -1086,7 +1104,7 @@ struct nfsv3_sattr {
 #define	NFSATTRBM_SPACEFREED		0x00004000
 #define	NFSATTRBM_CHANGEATTRTYPE	0x00008000
 #define	NFSATTRBM_SECLABEL		0x00010000
-/* Not sure what attribute bit#81/0x00020000 is? */
+#define	NFSATTRBM_MODEUMASK		0x00020000
 #define	NFSATTRBM_XATTRSUPPORT		0x00040000
 
 #define	NFSATTRBIT_MAX			83
@@ -1124,6 +1142,7 @@ struct nfsv3_sattr {
  	NFSATTRBM_FILESFREE |						\
  	NFSATTRBM_FILESTOTAL |						\
 	NFSATTRBM_FSLOCATIONS |						\
+	NFSATTRBM_HIDDEN |						\
  	NFSATTRBM_HOMOGENEOUS |						\
  	NFSATTRBM_MAXFILESIZE |						\
  	NFSATTRBM_MAXLINK |						\
@@ -1145,6 +1164,7 @@ struct nfsv3_sattr {
  	NFSATTRBM_SPACEFREE |						\
  	NFSATTRBM_SPACETOTAL |						\
  	NFSATTRBM_SPACEUSED |						\
+	NFSATTRBM_SYSTEM |						\
  	NFSATTRBM_TIMEACCESS |						\
  	NFSATTRBM_TIMECREATE |						\
  	NFSATTRBM_TIMEDELTA |						\
@@ -1174,6 +1194,7 @@ struct nfsv3_sattr {
 	NFSATTRBM_LAYOUTBLKSIZE |					\
 	NFSATTRBM_LAYOUTALIGNMENT |					\
 	NFSATTRBM_SUPPATTREXCLCREAT |					\
+	NFSATTRBM_CHANGEATTRTYPE |					\
 	NFSATTRBM_XATTRSUPPORT)
 
 /*
@@ -1181,7 +1202,8 @@ struct nfsv3_sattr {
  */
 #define	NFSATTRBIT_SUPPSETONLY1	 (NFSATTRBM_TIMEACCESSSET |		\
 				 NFSATTRBM_TIMEMODIFYSET)
-#define	NFSATTRBIT_SUPPSETONLY2	(NFSATTRBM_MODESETMASKED)
+#define	NFSATTRBIT_SUPPSETONLY2	(NFSATTRBM_MODESETMASKED |		\
+				 NFSATTRBM_MODEUMASK)
 
 /*
  * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31
@@ -1190,16 +1212,19 @@ struct nfsv3_sattr {
  */
 #define	NFSATTRBIT_SETABLE0						\
 	(NFSATTRBM_SIZE |						\
+	NFSATTRBM_HIDDEN |						\
 	NFSATTRBM_ACL)
 #define	NFSATTRBIT_SETABLE1						\
  	(NFSATTRBM_MODE |						\
  	NFSATTRBM_OWNER |						\
  	NFSATTRBM_OWNERGROUP |						\
- 	NFSATTRBM_TIMECREATE |					\
+	NFSATTRBM_SYSTEM |						\
+ 	NFSATTRBM_TIMECREATE |						\
  	NFSATTRBM_TIMEACCESSSET |					\
  	NFSATTRBM_TIMEMODIFYSET)
 #define	NFSATTRBIT_SETABLE2						\
-	(NFSATTRBM_MODESETMASKED)
+	(NFSATTRBM_MODESETMASKED |					\
+	NFSATTRBM_MODEUMASK)
 
 /*
  * NFSATTRBIT_NFSV41 - Attributes only supported by NFSv4.1.
@@ -1216,7 +1241,10 @@ struct nfsv3_sattr {
 /*
  * NFSATTRBIT_NFSV42 - Attributes only supported by NFSv4.2.
  */
-#define	NFSATTRBIT_NFSV42_2	NFSATTRBM_XATTRSUPPORT
+#define	NFSATTRBIT_NFSV42_2						\
+	(NFSATTRBM_CHANGEATTRTYPE |					\
+	NFSATTRBM_XATTRSUPPORT |					\
+	NFSATTRBM_MODEUMASK)
 
 /*
  * Set of attributes that the getattr vnode op needs.
@@ -1230,6 +1258,7 @@ struct nfsv3_sattr {
  	NFSATTRBM_SIZE |						\
  	NFSATTRBM_FSID |						\
  	NFSATTRBM_FILEID |						\
+	NFSATTRBM_HIDDEN |						\
  	NFSATTRBM_MAXREAD)
 
 /*
@@ -1242,6 +1271,7 @@ struct nfsv3_sattr {
  	NFSATTRBM_OWNERGROUP |						\
  	NFSATTRBM_RAWDEV |						\
  	NFSATTRBM_SPACEUSED |						\
+	NFSATTRBM_SYSTEM |						\
  	NFSATTRBM_TIMEACCESS |						\
 	NFSATTRBM_TIMECREATE |						\
  	NFSATTRBM_TIMEMETADATA |					\
@@ -1264,6 +1294,7 @@ struct nfsv3_sattr {
  	NFSATTRBM_SIZE |						\
  	NFSATTRBM_FSID |						\
  	NFSATTRBM_FILEID |						\
+	NFSATTRBM_HIDDEN |						\
  	NFSATTRBM_MAXREAD)
 
 /*
@@ -1274,6 +1305,7 @@ struct nfsv3_sattr {
  	NFSATTRBM_NUMLINKS |						\
  	NFSATTRBM_RAWDEV |						\
  	NFSATTRBM_SPACEUSED |						\
+	NFSATTRBM_SYSTEM |						\
  	NFSATTRBM_TIMEACCESS |						\
 	NFSATTRBM_TIMECREATE |						\
  	NFSATTRBM_TIMEMETADATA |					\
@@ -1390,6 +1422,7 @@ struct nfsv3_sattr {
  * NFSGETATTRBIT_PATHCONF0 - bits 0<->31
  */
 #define	NFSGETATTRBIT_PATHCONF0	(NFSATTRBIT_GETATTR0 |			\
+				NFSATTRBM_NAMEDATTR |			\
 			 	NFSATTRBM_CASEINSENSITIVE |		\
 			 	NFSATTRBM_CASEPRESERVING |		\
 			 	NFSATTRBM_CHOWNRESTRICTED |		\
@@ -1651,4 +1684,11 @@ typedef struct nfsv4stateid nfsv4stateid_t;
 #define	NFSV4SXATTR_CREATE	1
 #define	NFSV4SXATTR_REPLACE	2
 
+/* Values for ChangeAttrType (RFC-7862). */
+#define	NFSV4CHANGETYPE_MONOTONIC_INCR		0
+#define	NFSV4CHANGETYPE_VERS_COUNTER		1
+#define	NFSV4CHANGETYPE_VERS_COUNTER_NOPNFS	2
+#define	NFSV4CHANGETYPE_TIME_METADATA		3
+#define	NFSV4CHANGETYPE_UNDEFINED		4
+
 #endif	/* _NFS_NFSPROTO_H_ */
diff --git a/sys/fs/nfs/nfsrvstate.h b/sys/fs/nfs/nfsrvstate.h
index da214ae9d4e9..cc19ed6fa1d2 100644
--- a/sys/fs/nfs/nfsrvstate.h
+++ b/sys/fs/nfs/nfsrvstate.h
@@ -333,7 +333,7 @@ struct nfsf_rec {
 	u_int32_t	numboots;		/* Number of boottimes */
 };
 
-void nfsrv_cleanclient(struct nfsclient *, NFSPROC_T *);
+void nfsrv_cleanclient(struct nfsclient *, NFSPROC_T *, bool, SVCXPRT **);
 void nfsrv_freedeleglist(struct nfsstatehead *);
 
 /*
diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index c691e797aa01..e181bf593e23 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -366,7 +366,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
 	bool old_lock;
 
 	/*
-	 * Ensure the exclusove access to the node before checking
+	 * Ensure the exclusive access to the node before checking
 	 * whether the cache is consistent.
 	 */
 	old_lock = ncl_excl_start(vp);
diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c
index 270f39d03c90..bca0bdcd0df1 100644
--- a/sys/fs/nfsclient/nfs_clcomsubs.c
+++ b/sys/fs/nfsclient/nfs_clcomsubs.c
@@ -271,7 +271,8 @@ nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap)
 
 	if (nd->nd_flag & ND_NFSV4) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL,
-		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL,
+		    NULL);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR);
 		nap->na_type = nfsv34tov_type(fp->fa_type);
diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c
index be2024730cf0..f85f961d424e 100644
--- a/sys/fs/nfsclient/nfs_clnode.c
+++ b/sys/fs/nfsclient/nfs_clnode.c
@@ -205,7 +205,7 @@ nfs_freesillyrename(void *arg, __unused int pending)
 }
 
 static void
-ncl_releasesillyrename(struct vnode *vp, struct thread *td)
+ncl_releasesillyrename(struct vnode *vp, bool flushed, struct thread *td)
 {
 	struct nfsnode *np;
 	struct sillyrename *sp;
@@ -220,7 +220,8 @@ ncl_releasesillyrename(struct vnode *vp, struct thread *td)
 		sp = NULL;
 	if (sp != NULL) {
 		NFSUNLOCKNODE(np);
-		(void) ncl_vinvalbuf(vp, 0, td, 1);
+		if (flushed)
+			(void)ncl_vinvalbuf(vp, 0, td, 1);
 		/*
 		 * Remove the silly file that was rename'd earlier
 		 */
@@ -238,9 +239,13 @@ ncl_inactive(struct vop_inactive_args *ap)
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np;
 	struct thread *td;
+	struct nfsmount *nmp;
+	bool flushed;
 
 	td = curthread;
 	np = VTONFS(vp);
+	nmp = VFSTONFS(vp->v_mount);
+	flushed = true;
 	if (NFS_ISV4(vp) && vp->v_type == VREG) {
 		NFSLOCKNODE(np);
 		np->n_openstateid = NULL;
@@ -251,13 +256,18 @@ ncl_inactive(struct vop_inactive_args *ap)
 		 * buffers/pages must be flushed before the close, so that the
 		 * stateid is available for the writes.
 		 */
-		vnode_pager_clean_sync(vp);
-		(void)ncl_flush(vp, MNT_WAIT, td, 1, 0);
+		if ((nmp->nm_flag & NFSMNT_NOCTO) == 0 || !NFSHASNFSV4N(nmp) ||
+		    nfscl_mustflush(vp) != 0) {
+			vnode_pager_clean_sync(vp);
+			(void)ncl_flush(vp, MNT_WAIT, td, 1, 0);
+		} else {
+			flushed = false;
+		}
 		(void)nfsrpc_close(vp, 1, td);
 	}
 
 	NFSLOCKNODE(np);
-	ncl_releasesillyrename(vp, td);
+	ncl_releasesillyrename(vp, flushed, td);
 
 	/*
 	 * NMODIFIED means that there might be dirty/stale buffers
@@ -294,7 +304,7 @@ ncl_reclaim(struct vop_reclaim_args *ap)
 		nfs_reclaim_p(ap);
 
 	NFSLOCKNODE(np);
-	ncl_releasesillyrename(vp, td);
+	ncl_releasesillyrename(vp, true, td);
 
 	if (NFS_ISV4(vp) && vp->v_type == VREG) {
 		np->n_openstateid = NULL;
@@ -315,7 +325,7 @@ ncl_reclaim(struct vop_reclaim_args *ap)
 		MNT_ILOCK(mp);
 		if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0) {
 			MNT_IUNLOCK(mp);
-			nfscl_delegreturnvp(vp, td);
+			nfscl_delegreturnvp(vp, true, td);
 		} else
 			MNT_IUNLOCK(mp);
 	} else
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index 4e3a699fb170..b25d967982a1 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -828,7 +828,7 @@ nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp,
 	    == (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-		    NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, NULL, NULL, NULL);
 		if (error)
 			return (error);
 		/*
@@ -1489,3 +1489,4 @@ MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1);
 MODULE_DEPEND(nfscl, krpc, 1, 1, 1);
 MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1);
 MODULE_DEPEND(nfscl, xdr, 1, 1, 1);
+MODULE_DEPEND(nfscl, acl_nfs4, 1, 1, 1);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 8c5532268287..2f3c59b68518 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -142,6 +142,7 @@ static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
     int *, int *);
+static bool nfscl_invalidfname(bool, char *, int);
 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
     struct nfscllockowner *, u_int64_t, u_int64_t,
     u_int32_t, struct ucred *, NFSPROC_T *, int);
@@ -389,13 +390,25 @@ nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
 		mode |= NFSV4OPEN_ACCESSREAD;
 	if (amode & FWRITE)
 		mode |= NFSV4OPEN_ACCESSWRITE;
+	if (NFSHASNFSV4N(nmp)) {
+		if (!NFSHASPNFS(nmp) && nfscl_enablecallb != 0 &&
+		    nfs_numnfscbd > 0 &&
+		    (vn_irflag_read(vp) & VIRF_NAMEDATTR) == 0) {
+			if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
+				mode |= NFSV4OPEN_WANTWRITEDELEG;
+			else
+				mode |= NFSV4OPEN_WANTANYDELEG;
+		} else
+			mode |= NFSV4OPEN_WANTNODELEG;
+	}
 	nfhp = np->n_fhp;
 
 	retrycnt = 0;
 	do {
 	    dp = NULL;
-	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
-		cred, p, NULL, &op, &newone, &ret, 1, true);
+	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len,
+		(mode & NFSV4OPEN_ACCESSBOTH), 1, cred, p, NULL,
+		&op, &newone, &ret, 1, true);
 	    if (error) {
 		return (error);
 	    }
@@ -440,7 +453,7 @@ nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
 				NFSUNLOCKNODE(np);
 				(void) nfscl_deleg(nmp->nm_mountp,
 				    op->nfso_own->nfsow_clp,
-				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
+				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, dp);
 			}
 		} else if (NFSHASNFSV4N(nmp)) {
 			/*
@@ -473,7 +486,7 @@ nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
 				NFSUNLOCKNODE(np);
 				(void) nfscl_deleg(nmp->nm_mountp,
 				    op->nfso_own->nfsow_clp,
-				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
+				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, dp);
 			}
 		} else {
 			error = EIO;
@@ -547,7 +560,8 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
 	    cred);
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
-	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
+	*tl++ = txdr_unsigned(mode & (NFSV4OPEN_ACCESSBOTH |
+	    NFSV4OPEN_WANTDELEGMASK));
 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
@@ -664,6 +678,13 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
 			    &ret, &acesize, p);
 			if (error)
 				goto nfsmout;
+		} else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+		    NFSHASNFSV4N(nmp)) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			deleg = fxdr_unsigned(uint32_t, *tl);
+			if (deleg == NFSV4OPEN_CONTENTION ||
+			    deleg == NFSV4OPEN_RESOURCE)
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
@@ -675,7 +696,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
 			    ("nfsrpc_openrpc: Getattr repstat"));
 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-			    NULL, NULL, NULL, p, cred);
+			    NULL, NULL, NULL, NULL, p, cred);
 			if (error)
 				goto nfsmout;
 		}
@@ -1334,7 +1355,7 @@ nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
 		if ((nd->nd_flag & ND_NFSV4) != 0)
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
-			    NULL, NULL);
+			    NULL, NULL, NULL);
 		else
 			error = nfsm_loadattr(nd, nap);
 	} else
@@ -1546,7 +1567,7 @@ nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
 			NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_OPEN);
 			*tl++ = 0;		/* seqid, ignored. */
-			*tl++ = txdr_unsigned(openmode);
+			*tl++ = txdr_unsigned(openmode | NFSV4OPEN_WANTNODELEG);
 			*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
 			*tl++ = 0;		/* ClientID, ignored. */
 			*tl = 0;
@@ -1668,6 +1689,13 @@ nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
 			ndp->nfsdl_stateid.other[0] = *tl++;
 			ndp->nfsdl_stateid.other[1] = *tl++;
 			ndp->nfsdl_stateid.other[2] = *tl++;
+		} else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+		    NFSHASNFSV4N(nmp)) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			deleg = fxdr_unsigned(uint32_t, *tl);
+			if (deleg == NFSV4OPEN_CONTENTION ||
+			    deleg == NFSV4OPEN_RESOURCE)
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
@@ -2396,7 +2424,7 @@ nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		*tl = vtonfsv34_type(vtyp);
 	}
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
-		nfscl_fillsattr(nd, vap, dvp, 0, 0);
+		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
 	if ((nd->nd_flag & ND_NFSV3) &&
 	    (vtyp == VCHR || vtyp == VBLK)) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
@@ -2484,7 +2512,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		 */
 		if (dp != NULL)
 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
-			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
+			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, dp);
 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
@@ -2595,8 +2623,17 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 	 */
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
-	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
-	    NFSV4OPEN_ACCESSREAD);
+	if (NFSHASNFSV4N(nmp)) {
+		if (!NFSHASPNFS(nmp) && nfscl_enablecallb != 0 &&
+		    nfs_numnfscbd > 0)
+			*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+			    NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTWRITEDELEG);
+		else
+			*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+			    NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTNODELEG);
+	} else
+		*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+		    NFSV4OPEN_ACCESSREAD);
 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
@@ -2609,14 +2646,16 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 			if (NFSHASSESSPERSIST(nmp)) {
 				/* Use GUARDED for persistent sessions. */
 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
-				nfscl_fillsattr(nd, vap, dvp, 0, 0);
+				nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE,
+				    0);
 			} else {
 				/* Otherwise, use EXCLUSIVE4_1. */
 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 				*tl++ = cverf.lval[0];
 				*tl = cverf.lval[1];
-				nfscl_fillsattr(nd, vap, dvp, 0, 0);
+				nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE,
+				    0);
 			}
 		} else {
 			/* NFSv4.0 */
@@ -2627,7 +2666,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		}
 	} else {
 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
-		nfscl_fillsattr(nd, vap, dvp, 0, 0);
+		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
@@ -2714,6 +2753,13 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 			    &ret, &acesize, p);
 			if (error)
 				goto nfsmout;
+		} else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+		    NFSHASNFSV4N(nmp)) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			deleg = fxdr_unsigned(uint32_t, *tl);
+			if (deleg == NFSV4OPEN_CONTENTION ||
+			    deleg == NFSV4OPEN_RESOURCE)
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
@@ -2813,22 +2859,28 @@ nfsmout:
  * Nfs remove rpc
  */
 int
-nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
-    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
+nfsrpc_remove(struct vnode *dvp, char *name, int namelen, struct vnode *vp,
+    struct nfsvattr *nap, int *attrflagp, nfsremove_status *file_status,
+    struct nfsvattr *dnap, int *dattrflagp, struct ucred *cred, NFSPROC_T *p)
 {
-	u_int32_t *tl;
+	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	nfsv4stateid_t dstateid;
-	int error, ret = 0, i;
+	nfsattrbit_t attrbits;
+	int error, i, ret;
 
 	*dattrflagp = 0;
+	*attrflagp = 0;
+	*file_status = UNKNOWN;
+	ret = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	nmp = VFSTONFS(dvp->v_mount);
 tryagain:
-	if (NFSHASNFSV4(nmp) && ret == 0) {
+	if (NFSHASNFSV4(nmp) && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+	    !NFSHASNFSV4N(nmp)) && ret == 0) {
 		ret = nfscl_removedeleg(vp, p, &dstateid);
 		if (ret == 1) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
@@ -2853,9 +2905,19 @@ tryagain:
 	}
 	if (ret == 0)
 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
-	(void) nfsm_strtom(nd, name, namelen);
+	(void)nfsm_strtom(nd, name, namelen);
+	if (ret == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
+		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(NFSV4OP_PUTFH);
+		np = VTONFS(vp);
+		(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
+		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+		NFSGETATTR_ATTRBIT(&attrbits);
+		*tl = txdr_unsigned(NFSV4OP_GETATTR);
+		(void)nfsrv_putattrbit(nd, &attrbits);
+	}
 	error = nfscl_request(nd, dvp, p, cred);
-	if (error)
+	if (error != 0)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		/* For NFSv4, parse out any Delereturn replies. */
@@ -2878,7 +2940,41 @@ tryagain:
 		}
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
 	}
-	if (nd->nd_repstat && !error)
+	if (ret == 0 && (nd->nd_flag & (ND_NFSV4 |
+	    ND_NOMOREDATA)) == ND_NFSV4) {
+		/* Parse out the Remove reply for NFSPROC_REMOVE. */
+		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + 2 * NFSX_HYPER);
+		/* No use for change info for now. */
+		/* The Remove succeeded. */
+		nd->nd_repstat = 0;
+	}
+	if (ret == 0 && (nd->nd_flag & (ND_NFSV4 |
+	    ND_NOMOREDATA)) == ND_NFSV4) {
+		/* Parse out the PutFH, Getattr for NFSPROC_REMOVE. */
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		if (*(tl + 1) != 0) {
+			i = fxdr_unsigned(int, *(tl + 1));
+			if (i == NFSERR_STALE)
+				*file_status = DELETED;
+		} else {
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			if (*(tl + 1) != 0) {
+				i = fxdr_unsigned(int, *(tl + 1));
+				if (i == NFSERR_STALE)
+					*file_status = DELETED;
+			} else {
+				error = nfsm_loadattr(nd, nap);
+				if (error == 0) {
+					*attrflagp = 1;
+					if (nap->na_nlink == 0)
+						*file_status = NLINK_ZERO;
+					else
+						*file_status = VALID;
+				}
+			}
+		}
+	}
+	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	m_freem(nd->nd_mrep);
@@ -2889,12 +2985,14 @@ nfsmout:
  * Do an nfs rename rpc.
  */
 int
-nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
-    vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
-    NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
-    int *fattrflagp, int *tattrflagp)
+nfsrpc_rename(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
+    int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
+    int tnamelen, nfsremove_status *tvp_status, struct nfsvattr *fnap,
+    struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp,
+    struct nfsvattr *tvpnap, int *tvpattrflagp, struct ucred *cred,
+    NFSPROC_T *p)
 {
-	u_int32_t *tl;
+	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
@@ -2904,11 +3002,14 @@ nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
 
 	*fattrflagp = 0;
 	*tattrflagp = 0;
+	*tvpattrflagp = 0;
+	*tvp_status = UNKNOWN;
 	nmp = VFSTONFS(fdvp->v_mount);
 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 tryagain:
-	if (NFSHASNFSV4(nmp) && ret == 0) {
+	if (NFSHASNFSV4(nmp) && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+	    !NFSHASNFSV4N(nmp)) && ret == 0) {
 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
 		    &tdstateid, &gottd, p);
 		if (gotfd && gottd) {
@@ -2961,29 +3062,44 @@ tryagain:
 	}
 	if (ret == 0)
 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
-	if (nd->nd_flag & ND_NFSV4) {
+	if ((nd->nd_flag & ND_NFSV4) != 0) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSWCCATTR_ATTRBIT(&attrbits);
-		(void) nfsrv_putattrbit(nd, &attrbits);
+		(void)nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
-		(void) nfsrv_putattrbit(nd, &attrbits);
+		(void)nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_V4WCCATTR;
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_RENAME);
 	}
-	(void) nfsm_strtom(nd, fnameptr, fnamelen);
-	if (!(nd->nd_flag & ND_NFSV4))
+	(void)nfsm_strtom(nd, fnameptr, fnamelen);
+	if ((nd->nd_flag & ND_NFSV4) == 0)
 		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
-	(void) nfsm_strtom(nd, tnameptr, tnamelen);
+	(void)nfsm_strtom(nd, tnameptr, tnamelen);
+	if (ret == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
+		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+		/* When tvp == NULL, it doesn't matter which dvp is used. */
+		*tl = txdr_unsigned(NFSV4OP_PUTFH);
+		if (tvp != NULL)
+			(void)nfsm_fhtom(nmp, nd, VTONFS(tvp)->n_fhp->nfh_fh,
+			    VTONFS(tvp)->n_fhp->nfh_len, 0);
+		else
+			(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
+			    VTONFS(tdvp)->n_fhp->nfh_len, 0);
+		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(NFSV4OP_GETATTR);
+		NFSGETATTR_ATTRBIT(&attrbits);
+		(void)nfsrv_putattrbit(nd, &attrbits);
+	}
 	error = nfscl_request(nd, fdvp, p, cred);
-	if (error)
+	if (error != 0)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		/* For NFSv4, parse out any Delereturn replies. */
@@ -2999,7 +3115,7 @@ tryagain:
 		for (i = 0; i < (ret * 2); i++) {
 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 			    ND_NFSV4) {
-			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+			    NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			    if (*(tl + 1)) {
 				if (i == 1 && ret > 1) {
 				    /*
@@ -3019,23 +3135,57 @@ tryagain:
 		}
 		/* Now, the first wcc attribute reply. */
 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
-			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 		}
 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
 		/* and the second wcc attribute reply. */
 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
-		    !error) {
-			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+		    error == 0) {
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 		}
-		if (!error)
+		if (error == 0)
 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
 			    NULL, NULL);
 	}
-	if (nd->nd_repstat && !error)
+	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
+	    ret == 0 && error == 0) {
+		/* Parse out the rename successful reply. */
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED +
+		    4 * NFSX_HYPER);
+		nd->nd_repstat = 0;	/* Rename succeeded. */
+		/* Parse PutFH reply for tvp. */
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		if (*(tl + 1) != 0) {
+			if (tvp != NULL) {
+				i = fxdr_unsigned(int, *(tl + 1));
+				if (i == NFSERR_STALE)
+					*tvp_status = DELETED;
+			}
+		} else {
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			if (*(tl + 1) != 0) {
+				if (tvp != NULL) {
+					i = fxdr_unsigned(int, *(tl + 1));
+					if (i == NFSERR_STALE)
+						*tvp_status = DELETED;
+				}
+			} else {
+				error = nfsm_loadattr(nd, tvpnap);
+				if (error == 0 && tvp != NULL) {
+					*tvpattrflagp = 1;
+					if (tvpnap->na_nlink == 0)
+						*tvp_status = NLINK_ZERO;
+					else
+						*tvp_status = VALID;
+				}
+			}
+		}
+	}
+	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	m_freem(nd->nd_mrep);
@@ -3068,14 +3218,19 @@ nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
 		VTONFS(dvp)->n_fhp->nfh_len, 0);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
-		*tl = txdr_unsigned(NFSV4OP_GETATTR);
-		NFSWCCATTR_ATTRBIT(&attrbits);
-		(void) nfsrv_putattrbit(nd, &attrbits);
-		nd->nd_flag |= ND_V4WCCATTR;
-		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_LINK);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
+	if (nd->nd_flag & ND_NFSV4) {
+		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(NFSV4OP_GETATTR);
+		NFSGETATTR_ATTRBIT(&attrbits);
+		(void)nfsrv_putattrbit(nd, &attrbits);
+		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
+		*tl = txdr_unsigned(NFSV4OP_GETATTR);
+		(void)nfsrv_putattrbit(nd, &attrbits);
+	}
 	error = nfscl_request(nd, vp, p, cred);
 	if (error)
 		return (error);
@@ -3084,19 +3239,28 @@ nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
 		if (!error)
 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
 			    NULL, NULL);
-	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
+	} else if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
 		/*
-		 * First, parse out the PutFH and Getattr result.
+		 * First and parse out the PutFH and Link results.
 		 */
-		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
-		if (!(*(tl + 1)))
-			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
-		if (*(tl + 1))
+		NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED +
+		    2 * NFSX_HYPER);
+		if (*(tl + 3))
 			nd->nd_flag |= ND_NOMOREDATA;
 		/*
-		 * Get the pre-op attributes.
+		 * Get the directory post-op attributes.
 		 */
-		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
+		if ((nd->nd_flag & ND_NOMOREDATA) == 0)
+			error = nfscl_postop_attr(nd, dnap, dattrflagp);
+		if (error == 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
+			/* Get rid of the RestoreFH reply. */
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			if (*(tl + 1))
+				nd->nd_flag |= ND_NOMOREDATA;
+		}
+		/* Get the file's post-op attributes. */
+		if (error == 0 && (nd->nd_flag & ND_NOMOREDATA) == 0)
+			error = nfscl_postop_attr(nd, nap, attrflagp);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
@@ -3195,7 +3359,7 @@ nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		*tl = txdr_unsigned(NFDIR);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
-	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
+	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1 | NFSSATTR_NEWFILE, 0);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
@@ -3280,6 +3444,31 @@ nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
 }
 
 /*
+ * Check to make sure the file name in a Readdir reply is valid.
+ */
+static bool
+nfscl_invalidfname(bool is_v4, char *name, int len)
+{
+	int i;
+	char *cp;
+
+	if (is_v4 && ((len == 1 && name[0] == '.') ||
+	    (len == 2 && name[0] == '.' && name[1] == '.'))) {
+		printf("Readdir NFSv4 reply has dot or dotdot in it\n");
+		return (true);
+	}
+	cp = name;
+	for (i = 0; i < len; i++, cp++) {
+		if (*cp == '/' || *cp == '\0') {
+			printf("Readdir reply file name had imbedded / or nul"
+			    " byte\n");
+			return (true);
+		}
+	}
+	return (false);
+}
+
+/*
  * Readdir rpc.
  * Always returns with either uio_resid unchanged, if you are at the
  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
@@ -3327,10 +3516,13 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 	nfsattrbit_t attrbits, dattrbits;
 	u_int32_t rderr, *tl2 = NULL;
 	size_t tresid;
+	bool validentry;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirrpc bad uio"));
+	KASSERT(uiop->uio_segflg == UIO_SYSSPACE,
+	    ("nfsrpc_readdir: uio userspace"));
 	ncookie.lval[0] = ncookie.lval[1] = 0;
 	/*
 	 * There is no point in reading a lot more than uio_resid, however
@@ -3405,7 +3597,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				nfsva.na_mntonfileno = UINT64_MAX;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-				    NULL, NULL, NULL, p, cred);
+				    NULL, NULL, NULL, NULL, p, cred);
 				if (error) {
 				    dotdotfileid = dotfileid;
 				} else if (gotmnton) {
@@ -3550,6 +3742,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
+			validentry = true;
 			if (nd->nd_flag & ND_NFSV4) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
@@ -3588,6 +3781,17 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 			    uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
+				struct iovec saviov;
+				off_t savoff;
+				ssize_t savresid;
+				int savblksiz;
+
+				saviov.iov_base = uiop->uio_iov->iov_base;
+				saviov.iov_len = uiop->uio_iov->iov_len;
+				savoff = uiop->uio_offset;
+				savresid = uiop->uio_resid;
+				savblksiz = blksiz;
+
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_pad0 = dp->d_pad1 = 0;
 				dp->d_off = 0;
@@ -3603,20 +3807,36 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				uiop->uio_iov->iov_base =
 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
+				cp = uiop->uio_iov->iov_base;
 				error = nfsm_mbufuio(nd, uiop, len);
 				if (error)
 					goto nfsmout;
-				cp = uiop->uio_iov->iov_base;
-				tlen -= len;
-				NFSBZERO(cp, tlen);
-				cp += tlen;	/* points to cookie storage */
-				tl2 = (u_int32_t *)cp;
-				uiop->uio_iov->iov_base =
-				    (char *)uiop->uio_iov->iov_base + tlen +
-				    NFSX_HYPER;
-				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
-				uiop->uio_resid -= tlen + NFSX_HYPER;
-				uiop->uio_offset += (tlen + NFSX_HYPER);
+				/* Check for an invalid file name. */
+				if (nfscl_invalidfname(
+				    (nd->nd_flag & ND_NFSV4) != 0, cp, len)) {
+					/* Skip over this entry. */
+					uiop->uio_iov->iov_base =
+					    saviov.iov_base;
+					uiop->uio_iov->iov_len =
+					    saviov.iov_len;
+					uiop->uio_offset = savoff;
+					uiop->uio_resid = savresid;
+					blksiz = savblksiz;
+					validentry = false;
+				} else {
+					cp = uiop->uio_iov->iov_base;
+					tlen -= len;
+					NFSBZERO(cp, tlen);
+					cp += tlen; /* points to cookie store */
+					tl2 = (u_int32_t *)cp;
+					uiop->uio_iov->iov_base =
+					    (char *)uiop->uio_iov->iov_base +
+					    tlen + NFSX_HYPER;
+					uiop->uio_iov->iov_len -= tlen +
+					    NFSX_HYPER;
+					uiop->uio_resid -= tlen + NFSX_HYPER;
+					uiop->uio_offset += (tlen + NFSX_HYPER);
+				}
 			} else {
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 				if (error)
@@ -3627,7 +3847,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				nfsva.na_mntonfileno = UINT64_MAX;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-				    NULL, NULL, &rderr, p, cred);
+				    NULL, NULL, &rderr, NULL, p, cred);
 				if (error)
 					goto nfsmout;
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
@@ -3640,7 +3860,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				ncookie.lval[0] = 0;
 				ncookie.lval[1] = *tl++;
 			}
-			if (bigenough) {
+			if (bigenough && validentry) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
@@ -3777,11 +3997,16 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 	size_t tresid;
 	u_int32_t *tl2 = NULL, rderr;
 	struct timespec dctime, ts;
-	bool attr_ok;
+	bool attr_ok, named_dir, validentry;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirplusrpc bad uio"));
+	KASSERT(uiop->uio_segflg == UIO_SYSSPACE,
+	    ("nfsrpc_readdirplus: uio userspace"));
+	named_dir = false;
+	if ((vp->v_irflag & VIRF_NAMEDDIR) != 0)
+		named_dir = true;
 	ncookie.lval[0] = ncookie.lval[1] = 0;
 	timespecclear(&dctime);
 	*attrflagp = 0;
@@ -3847,7 +4072,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				nfsva.na_mntonfileno = UINT64_MAX;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-				    NULL, NULL, NULL, p, cred);
+				    NULL, NULL, NULL, NULL, p, cred);
 				if (error) {
 				    dotdotfileid = dotfileid;
 				} else if (gotmnton) {
@@ -3933,6 +4158,13 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
 		    NFSATTRBIT_TIMECREATE))
 			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
+		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
+		    NFSATTRBIT_HIDDEN) ||
+		    !NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
+		    NFSATTRBIT_SYSTEM)) {
+			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN);
+			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM);
+		}
 	}
 
 	/*
@@ -3986,6 +4218,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
+			validentry = true;
 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (nd->nd_flag & ND_NFSV4) {
 				ncookie.lval[0] = *tl++;
@@ -4017,6 +4250,17 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 			    uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
+				struct iovec saviov;
+				off_t savoff;
+				ssize_t savresid;
+				int savblksiz;
+
+				saviov.iov_base = uiop->uio_iov->iov_base;
+				saviov.iov_len = uiop->uio_iov->iov_len;
+				savoff = uiop->uio_offset;
+				savresid = uiop->uio_resid;
+				savblksiz = blksiz;
+
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_pad0 = dp->d_pad1 = 0;
 				dp->d_off = 0;
@@ -4035,25 +4279,42 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				NFSCNHASHZERO(cnp);
+				cp = uiop->uio_iov->iov_base;
 				error = nfsm_mbufuio(nd, uiop, len);
 				if (error)
 					goto nfsmout;
-				cp = uiop->uio_iov->iov_base;
-				tlen -= len;
-				NFSBZERO(cp, tlen);
-				cp += tlen;	/* points to cookie storage */
-				tl2 = (u_int32_t *)cp;
-				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
-				    cnp->cn_nameptr[1] == '.')
-					isdotdot = 1;
-				else
-					isdotdot = 0;
-				uiop->uio_iov->iov_base =
-				    (char *)uiop->uio_iov->iov_base + tlen +
-				    NFSX_HYPER;
-				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
-				uiop->uio_resid -= tlen + NFSX_HYPER;
-				uiop->uio_offset += (tlen + NFSX_HYPER);
+				/* Check for an invalid file name. */
+				if (nfscl_invalidfname(
+				    (nd->nd_flag & ND_NFSV4) != 0, cp, len)) {
+					/* Skip over this entry. */
+					uiop->uio_iov->iov_base =
+					    saviov.iov_base;
+					uiop->uio_iov->iov_len =
+					    saviov.iov_len;
+					uiop->uio_offset = savoff;
+					uiop->uio_resid = savresid;
+					blksiz = savblksiz;
+					validentry = false;
+				} else {
+					cp = uiop->uio_iov->iov_base;
+					tlen -= len;
+					NFSBZERO(cp, tlen);
+					cp += tlen; /* points to cookie store */
+					tl2 = (u_int32_t *)cp;
+					if (len == 2 &&
+					    cnp->cn_nameptr[0] == '.' &&
+					    cnp->cn_nameptr[1] == '.')
+						isdotdot = 1;
+					else
+						isdotdot = 0;
+					uiop->uio_iov->iov_base =
+					    (char *)uiop->uio_iov->iov_base +
+					    tlen + NFSX_HYPER;
+					uiop->uio_iov->iov_len -= tlen +
+					    NFSX_HYPER;
+					uiop->uio_resid -= tlen + NFSX_HYPER;
+					uiop->uio_offset += (tlen + NFSX_HYPER);
+				}
 			} else {
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 				if (error)
@@ -4085,12 +4346,12 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-				    NULL, NULL, &rderr, p, cred);
+				    NULL, NULL, &rderr, NULL, p, cred);
 				if (error)
 					goto nfsmout;
 			}
 
-			if (bigenough) {
+			if (bigenough && validentry) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
@@ -4190,7 +4451,8 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 				    if (cnp->cn_namelen <= NCHNAMLEN &&
 					ndp->ni_dvp != ndp->ni_vp &&
 					(newvp->v_type != VDIR ||
-					 dctime.tv_sec != 0)) {
+					 dctime.tv_sec != 0) &&
+					!named_dir) {
 					cache_enter_time_flags(ndp->ni_dvp,
 					    ndp->ni_vp, cnp,
 					    &nfsva.na_ctime,
@@ -4747,7 +5009,7 @@ nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
 		if (nd->nd_repstat == 0) {
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
-			    p, cred);
+			    NULL, p, cred);
 			if (!error) {
 				nmp->nm_fsid[0] = nap->na_filesid[0];
 				nmp->nm_fsid[1] = nap->na_filesid[1];
@@ -4800,7 +5062,7 @@ nfsmout:
  * nfs pathconf rpc
  */
 int
-nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
+nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, bool *has_namedattrp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
@@ -4810,6 +5072,7 @@ nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
 	int error;
 	struct nfsnode *np;
 
+	*has_namedattrp = false;
 	*attrflagp = 0;
 	nmp = VFSTONFS(vp->v_mount);
 	if (NFSHASNFSV4(nmp)) {
@@ -4836,8 +5099,8 @@ nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
 			return (error);
 		if (nd->nd_repstat == 0) {
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
-			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
-			    cred);
+			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
+			    has_namedattrp, p, cred);
 			if (!error)
 				*attrflagp = 1;
 		} else {
@@ -5132,7 +5395,7 @@ nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
 		return (error);
 	if (!nd->nd_repstat)
 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
-		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
+		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, NULL, p, cred);
 	else
 		error = nd->nd_repstat;
 	m_freem(nd->nd_mrep);
@@ -5173,7 +5436,8 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
-	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
+	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL, false, false,
+	    false);
 	error = nfscl_request(nd, vp, p, cred);
 	if (error)
 		return (error);
@@ -8109,7 +8373,8 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
 	    0, 0, cred);
 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
-	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
+	*tl++ = txdr_unsigned(mode & (NFSV4OPEN_ACCESSBOTH |
+	    NFSV4OPEN_WANTDELEGMASK));
 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
@@ -8210,6 +8475,13 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
 			    &ret, &acesize, p);
 			if (error != 0)
 				goto nfsmout;
+		} else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+		    NFSHASNFSV4N(nmp)) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			deleg = fxdr_unsigned(uint32_t, *tl);
+			if (deleg == NFSV4OPEN_CONTENTION ||
+			    deleg == NFSV4OPEN_RESOURCE)
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
@@ -8224,7 +8496,7 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
 		if (*++tl == 0) {
 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
-			    NULL, NULL, NULL, p, cred);
+			    NULL, NULL, NULL, NULL, p, cred);
 			if (error != 0)
 				goto nfsmout;
 			if (ndp != NULL) {
@@ -8301,8 +8573,17 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 	 */
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
-	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
-	    NFSV4OPEN_ACCESSREAD);
+	if (NFSHASNFSV4N(nmp)) {
+		if (!NFSHASPNFS(nmp) && nfscl_enablecallb != 0 &&
+		    nfs_numnfscbd > 0)
+			*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+			    NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTWRITEDELEG);
+		else
+			*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+			    NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTNODELEG);
+	} else
+		*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+		    NFSV4OPEN_ACCESSREAD);
 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
@@ -8314,18 +8595,18 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 		if (NFSHASSESSPERSIST(nmp)) {
 			/* Use GUARDED for persistent sessions. */
 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
-			nfscl_fillsattr(nd, vap, dvp, 0, 0);
+			nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
 		} else {
 			/* Otherwise, use EXCLUSIVE4_1. */
 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 			*tl++ = cverf.lval[0];
 			*tl = cverf.lval[1];
-			nfscl_fillsattr(nd, vap, dvp, 0, 0);
+			nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
 		}
 	} else {
 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
-		nfscl_fillsattr(nd, vap, dvp, 0, 0);
+		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
@@ -8421,6 +8702,13 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
 			    &ret, &acesize, p);
 			if (error != 0)
 				goto nfsmout;
+		} else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+		    NFSHASNFSV4N(nmp)) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			deleg = fxdr_unsigned(uint32_t, *tl);
+			if (deleg == NFSV4OPEN_CONTENTION ||
+			    deleg == NFSV4OPEN_RESOURCE)
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
@@ -9258,7 +9546,7 @@ nfsm_split(struct mbuf *mp, uint64_t xfer)
 	if (pgno == m->m_epg_npgs)
 		panic("nfsm_split: eroneous ext_pgs mbuf");
 
-	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
+	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs, 0);
 	m2->m_epg_flags |= EPG_FLAG_ANON;
 
 	/*
@@ -9381,6 +9669,50 @@ nfsmout:
 }
 
 /*
+ * nfs opeattr rpc
+ */
+int
+nfsrpc_openattr(struct nfsmount *nmp, struct vnode *vp, uint8_t *fhp, int fhlen,
+    bool createit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
+    struct nfsfh **nfhpp, int *attrflagp)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd, *nd = &nfsd;
+	nfsattrbit_t attrbits;
+	int error = 0;
+
+	*attrflagp = 0;
+	nfscl_reqstart(nd, NFSPROC_OPENATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
+	    cred);
+	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+	if (createit)
+		*tl = newnfs_true;
+	else
+		*tl = newnfs_false;
+	NFSGETATTR_ATTRBIT(&attrbits);
+	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
+	*tl = txdr_unsigned(NFSV4OP_GETATTR);
+	(void)nfsrv_putattrbit(nd, &attrbits);
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+		error = nfsm_getfh(nd, nfhpp);
+		if (error != 0)
+			goto nfsmout;
+		error = nfscl_postop_attr(nd, nap, attrflagp);
+	}
+nfsmout:
+	m_freem(nd->nd_mrep);
+	if (error == 0 && nd->nd_repstat != 0)
+		error = nd->nd_repstat;
+	return (error);
+}
+
+/*
  * Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
  * locked vnode.
  */
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 9fbaa6e63a56..99a781640c53 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -93,11 +93,7 @@ NFSREQSPINLOCK;
 NFSCLSTATEMUTEX;
 int nfscl_inited = 0;
 struct nfsclhead nfsclhead;	/* Head of clientid list */
-int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
-int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
 
-static int nfscl_delegcnt = 0;
-static int nfscl_layoutcnt = 0;
 static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
     u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
     struct nfscllockowner **, struct nfsclopen **);
@@ -433,25 +429,13 @@ nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
  */
 int
 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
-    int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
+    int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg *dp)
 {
-	struct nfscldeleg *dp = *dpp, *tdp;
+	struct nfscldeleg *tdp;
 	struct nfsmount *nmp;
 
 	KASSERT(mp != NULL, ("nfscl_deleg: mp NULL"));
 	nmp = VFSTONFS(mp);
-	/*
-	 * First, if we have received a Read delegation for a file on a
-	 * read/write file system, just return it, because they aren't
-	 * useful, imho.
-	 */
-	if (dp != NULL && !NFSMNT_RDONLY(mp) &&
-	    (dp->nfsdl_flags & NFSCLDL_READ)) {
-		nfscl_trydelegreturn(dp, cred, nmp, p);
-		free(dp, M_NFSCLDELEG);
-		*dpp = NULL;
-		return (0);
-	}
 
 	/*
 	 * Since a delegation might be added to the mount,
@@ -470,26 +454,40 @@ nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
 			NFSUNLOCKCLSTATE();
 			return (NFSERR_BADSTATEID);
 		}
-		*dpp = NULL;
 		TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
 		LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
 		    nfsdl_hash);
 		dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
 		nfsstatsv1.cldelegates++;
-		nfscl_delegcnt++;
+		clp->nfsc_delegcnt++;
 	} else {
 		/*
-		 * Delegation already exists, what do we do if a new one??
+		 * A delegation already exists.  If the new one is a Write
+		 * delegation and the old one a Read delegation, return the
+		 * Read delegation.  Otherwise, return the new delegation.
 		 */
 		if (dp != NULL) {
-			printf("Deleg already exists!\n");
-			free(dp, M_NFSCLDELEG);
-			*dpp = NULL;
+			if ((dp->nfsdl_flags & NFSCLDL_WRITE) != 0 &&
+			    (tdp->nfsdl_flags & NFSCLDL_READ) != 0) {
+				TAILQ_REMOVE(&clp->nfsc_deleg, tdp, nfsdl_list);
+				LIST_REMOVE(tdp, nfsdl_hash);
+				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
+				    nfsdl_list);
+				LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp,
+				    fhlen), dp, nfsdl_hash);
+				dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
+			} else {
+				tdp = dp;	/* Return this one. */
+			}
 		} else {
-			*dpp = tdp;
+			tdp = NULL;
 		}
 	}
 	NFSUNLOCKCLSTATE();
+	if (tdp != NULL) {
+		nfscl_trydelegreturn(tdp, cred, nmp, p);
+		free(tdp, M_NFSCLDELEG);
+	}
 	return (0);
 }
 
@@ -918,6 +916,10 @@ nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
 		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
 			LIST_INIT(&clp->nfsc_layouthash[i]);
 		clp->nfsc_flags = NFSCLFLAGS_INITED;
+		clp->nfsc_delegcnt = 0;
+		clp->nfsc_deleghighwater = NFSCLDELEGHIGHWATER;
+		clp->nfsc_layoutcnt = 0;
+		clp->nfsc_layouthighwater = NFSCLLAYOUTHIGHWATER;
 		clp->nfsc_clientidrev = 1;
 		clp->nfsc_cbident = nfscl_nextcbident();
 		nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
@@ -1632,7 +1634,7 @@ nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
 		}
 		if (dp != NULL)
 			nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
-			    op->nfso_fhlen, cred, p, &dp);
+			    op->nfso_fhlen, cred, p, dp);
 	}
 
 	/*
@@ -1750,10 +1752,10 @@ nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
 
 	TAILQ_REMOVE(hdp, dp, nfsdl_list);
 	LIST_REMOVE(dp, nfsdl_hash);
+	dp->nfsdl_clp->nfsc_delegcnt--;
 	if (freeit)
 		free(dp, M_NFSCLDELEG);
 	nfsstatsv1.cldelegates--;
-	nfscl_delegcnt--;
 }
 
 /*
@@ -2863,7 +2865,7 @@ tryagain:
 					nfsdl_list);
 				    LIST_REMOVE(dp, nfsdl_hash);
 				    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
-				    nfscl_delegcnt--;
+				    clp->nfsc_delegcnt--;
 				    nfsstatsv1.cldelegates--;
 				}
 				NFSLOCKCLSTATE();
@@ -2893,7 +2895,8 @@ tryagain:
 		 * The tailq list is in LRU order.
 		 */
 		dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
-		while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
+		while (clp->nfsc_delegcnt > clp->nfsc_deleghighwater &&
+		    dp != NULL) {
 		    ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
 		    if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
 			dp->nfsdl_rwlock.nfslock_lock == 0 &&
@@ -2920,7 +2923,7 @@ tryagain:
 			    TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
 			    LIST_REMOVE(dp, nfsdl_hash);
 			    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
-			    nfscl_delegcnt--;
+			    clp->nfsc_delegcnt--;
 			    nfsstatsv1.cldelegates--;
 			}
 		    }
@@ -2976,13 +2979,14 @@ tryagain2:
 		lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
 		while (lyp != NULL) {
 			nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
-			if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
+			if ((lyp->nfsly_timestamp < NFSD_MONOSEC ||
+			     clp->nfsc_layoutcnt > clp->nfsc_layouthighwater) &&
 			    (lyp->nfsly_flags & (NFSLY_RECALL |
 			     NFSLY_RETONCLOSE)) == 0 &&
 			    lyp->nfsly_lock.nfslock_usecnt == 0 &&
 			    lyp->nfsly_lock.nfslock_lock == 0) {
 				NFSCL_DEBUG(4, "ret stale lay=%d\n",
-				    nfscl_layoutcnt);
+				    clp->nfsc_layoutcnt);
 				recallp = malloc(sizeof(*recallp),
 				    M_NFSLAYRECALL, M_NOWAIT);
 				if (recallp == NULL)
@@ -3504,7 +3508,7 @@ nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p,
  * Return any delegation for this vp.
  */
 void
-nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
+nfscl_delegreturnvp(struct vnode *vp, bool retdeleg, NFSPROC_T *p)
 {
 	struct nfsclclient *clp;
 	struct nfscldeleg *dp;
@@ -3527,12 +3531,15 @@ nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
 	if (clp != NULL)
 		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
 		    np->n_fhp->nfh_len);
-	if (dp != NULL) {
+	if (dp != NULL &&
+	    (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0) {
 		nfscl_cleandeleg(dp);
 		nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
 		NFSUNLOCKCLSTATE();
-		newnfs_copycred(&dp->nfsdl_cred, cred);
-		nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
+		if (retdeleg) {
+			newnfs_copycred(&dp->nfsdl_cred, cred);
+			nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
+		}
 		free(dp, M_NFSCLDELEG);
 	} else
 		NFSUNLOCKCLSTATE();
@@ -3694,7 +3701,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
 			if (!error)
 				(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
 				    NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
-				    (uint64_t)0, NULL);
+				    (uint64_t)0, NULL, false, false, false);
 			break;
 		case NFSV4OP_CBRECALL:
 			NFSCL_DEBUG(4, "cbrecall\n");
@@ -3712,18 +3719,10 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
 					clp = nfscl_getclnt(cbident);
 				else
 					clp = nfscl_getclntsess(sessionid);
-				if (clp != NULL) {
-					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
-					    nfhp->nfh_len);
-					if (dp != NULL && (dp->nfsdl_flags &
-					    NFSCLDL_DELEGRET) == 0) {
-						dp->nfsdl_flags |=
-						    NFSCLDL_RECALL;
-						wakeup((caddr_t)clp);
-					}
-				} else {
+				if (clp != NULL)
+					nfscl_startdelegrecall(clp, nfhp);
+				else
 					error = NFSERR_SERVERFAULT;
-				}
 				NFSUNLOCKCLSTATE();
 			}
 			if (nfhp != NULL)
@@ -3933,6 +3932,77 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
 				*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
 			}
 			break;
+		case NFSV4OP_CBRECALLSLOT:
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			highslot = fxdr_unsigned(uint32_t, *tl);
+			NFSLOCKCLSTATE();
+			clp = nfscl_getclntsess(sessionid);
+			if (clp == NULL)
+				error = NFSERR_SERVERFAULT;
+			if (error == 0) {
+				tsep = nfsmnt_mdssession(clp->nfsc_nmp);
+				mtx_lock(&tsep->nfsess_mtx);
+				if ((highslot + 1) < tsep->nfsess_foreslots) {
+					tsep->nfsess_foreslots = (highslot + 1);
+					nfs_resetslots(tsep);
+				}
+				mtx_unlock(&tsep->nfsess_mtx);
+			}
+			NFSUNLOCKCLSTATE();
+			break;
+		case NFSV4OP_CBRECALLANY:
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			i = fxdr_unsigned(int, *tl++);
+			j = fxdr_unsigned(int, *tl);
+			if (i < 0 || j != 1)
+				error = NFSERR_BADXDR;
+			if (error == 0) {
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+				j = fxdr_unsigned(int, *tl);
+				if (i < 100)
+					i = 100;
+				else if (i > 100000)
+					i = 100000;
+				NFSLOCKCLSTATE();
+				clp = nfscl_getclntsess(sessionid);
+				if (clp == NULL)
+					error = NFSERR_SERVERFAULT;
+				if (((j & NFSRCA4_RDATA_DLG) != 0 ||
+				    (j & NFSRCA4_WDATA_DLG) != 0) &&
+				    error == 0 && i <
+				    clp->nfsc_deleghighwater)
+					clp->nfsc_deleghighwater = i;
+				if (error == 0 &&
+				    ((!NFSHASFLEXFILE(clp->nfsc_nmp) &&
+				     (j & NFSRCA4_FILE_LAYOUT) != 0 &&
+				     i < clp->nfsc_layouthighwater) ||
+				     (NFSHASFLEXFILE(clp->nfsc_nmp) &&
+				     (j & (NFSRCA4_FF_LAYOUT_READ |
+				     NFSRCA4_FF_LAYOUT_RW)) != 0 &&
+				     i < clp->nfsc_layouthighwater)))
+					clp->nfsc_layouthighwater = i;
+				NFSUNLOCKCLSTATE();
+			}
+			break;
+		case NFSV4OP_CBNOTIFY:
+		case NFSV4OP_CBRECALLOBJAVAIL:
+		case NFSV4OP_CBNOTIFYLOCK:
+			/*
+			 * These callbacks are not necessarily optional,
+			 * so I think it is better to reply NFS_OK than
+			 * NFSERR_NOTSUPP.
+			 * All provide information for which the FreeBSD client
+			 * does not currently have a use.
+			 * I am not sure if any of these could be generated
+			 * by a NFSv4.1/4.2 server for this client?
+			 */
+			error = 0;
+			NFSCL_DEBUG(1, "unsupp callback %d\n", op);
+			break;
+		case NFSV4OP_CBPUSHDELEG:
+			error = NFSERR_REJECTDELEG;
+			NFSCL_DEBUG(1, "unsupp callback %d\n", op);
+			break;
 		default:
 			if (i == 0 && minorvers != NFSV4_MINORVERSION)
 				error = NFSERR_OPNOTINSESS;
@@ -4647,7 +4717,7 @@ nfscl_mustflush(vnode_t vp)
 
 	np = VTONFS(vp);
 	nmp = VFSTONFS(vp->v_mount);
-	if (!NFSHASNFSV4(nmp))
+	if (!NFSHASNFSV4(nmp) || vp->v_type != VREG)
 		return (1);
 	NFSLOCKMNT(nmp);
 	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
@@ -4687,7 +4757,7 @@ nfscl_nodeleg(vnode_t vp, int writedeleg)
 
 	np = VTONFS(vp);
 	nmp = VFSTONFS(vp->v_mount);
-	if (!NFSHASNFSV4(nmp))
+	if (!NFSHASNFSV4(nmp) || vp->v_type != VREG)
 		return (1);
 	NFSLOCKMNT(nmp);
 	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
@@ -5099,7 +5169,7 @@ nfscl_newnode(vnode_t vp)
  * to the local clock time.
  */
 void
-nfscl_delegmodtime(vnode_t vp)
+nfscl_delegmodtime(struct vnode *vp, struct timespec *mtime)
 {
 	struct nfsclclient *clp;
 	struct nfscldeleg *dp;
@@ -5123,7 +5193,10 @@ nfscl_delegmodtime(vnode_t vp)
 	}
 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
 	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
-		nanotime(&dp->nfsdl_modtime);
+		if (mtime != NULL)
+			dp->nfsdl_modtime = *mtime;
+		else
+			nanotime(&dp->nfsdl_modtime);
 		dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 	}
 	NFSUNLOCKCLSTATE();
@@ -5266,7 +5339,7 @@ nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
 			LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
 			    nfsly_hash);
 			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
-			nfscl_layoutcnt++;
+			clp->nfsc_layoutcnt++;
 			nfsstatsv1.cllayouts++;
 		} else {
 			if (retonclose != 0)
@@ -5641,7 +5714,7 @@ nfscl_freelayout(struct nfscllayout *layp)
 		LIST_REMOVE(rp, nfsrecly_list);
 		free(rp, M_NFSLAYRECALL);
 	}
-	nfscl_layoutcnt--;
+	layp->nfsly_clp->nfsc_layoutcnt--;
 	nfsstatsv1.cllayouts--;
 	free(layp, M_NFSLAYOUT);
 }
@@ -5879,3 +5952,69 @@ tryagain:
 	NFSUNLOCKCLSTATE();
 	return (0);
 }
+
+/*
+ * Check access against a delegation ace.
+ * Return EINVAL for any case where the check cannot be completed.
+ */
+int
+nfscl_delegacecheck(struct vnode *vp, accmode_t accmode, struct ucred *cred)
+{
+	struct nfsclclient *clp;
+	struct nfscldeleg *dp;
+	struct nfsnode *np;
+	struct nfsmount *nmp;
+	struct acl *aclp;
+	int error;
+
+	np = VTONFS(vp);
+	nmp = VFSTONFS(vp->v_mount);
+	if (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) || vp->v_type != VREG)
+		return (EINVAL);
+	NFSLOCKMNT(nmp);
+	if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
+		NFSUNLOCKMNT(nmp);
+		return (EINVAL);
+	}
+	NFSUNLOCKMNT(nmp);
+	aclp = acl_alloc(M_WAITOK);
+	NFSLOCKCLSTATE();
+	clp = nfscl_findcl(nmp);
+	if (clp == NULL) {
+		NFSUNLOCKCLSTATE();
+		acl_free(aclp);
+		return (EINVAL);
+	}
+	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
+	if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_RECALL |
+	    NFSCLDL_DELEGRET)) == 0) {
+		memcpy(&aclp->acl_entry[0], &dp->nfsdl_ace,
+		    sizeof(struct acl_entry));
+		NFSUNLOCKCLSTATE();
+		aclp->acl_cnt = 1;
+		error = vaccess_acl_nfs4(vp->v_type, np->n_vattr.na_uid,
+		    np->n_vattr.na_gid, aclp, accmode, cred);
+		acl_free(aclp);
+		if (error == 0 || error == EACCES)
+			return (error);
+	} else {
+		NFSUNLOCKCLSTATE();
+		acl_free(aclp);
+	}
+	return (EINVAL);
+}
+
+/*
+ * Start the recall of a delegation.  Called for CB_RECALL and REMOVE
+ * when nlink == 0 after the REMOVE.
+ */
+void nfscl_startdelegrecall(struct nfsclclient *clp, struct nfsfh *nfhp)
+{
+	struct nfscldeleg *dp;
+
+	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
+	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) {
+		dp->nfsdl_flags |= NFSCLDL_RECALL;
+		wakeup((caddr_t)clp);
+	}
+}
diff --git a/sys/fs/nfsclient/nfs_clsubs.c b/sys/fs/nfsclient/nfs_clsubs.c
index 80ab979d22d7..ae9fa51947cc 100644
--- a/sys/fs/nfsclient/nfs_clsubs.c
+++ b/sys/fs/nfsclient/nfs_clsubs.c
@@ -54,6 +54,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/malloc.h>
+#include <sys/stdarg.h>
 #include <sys/syscall.h>
 #include <sys/sysproto.h>
 #include <sys/taskqueue.h>
@@ -71,12 +72,6 @@
 
 #include <netinet/in.h>
 
-/*
- * Note that stdarg.h and the ANSI style va_start macro is used for both
- * ANSI and traditional C compilers.
- */
-#include <machine/stdarg.h>
-
 extern struct mtx ncl_iod_mutex;
 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
@@ -188,7 +183,7 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper)
 	np = VTONFS(vp);
 	vap = &np->n_vattr.na_vattr;
 	nmp = VFSTONFS(vp->v_mount);
-	mustflush = nfscl_mustflush(vp);	/* must be before mtx_lock() */
+	mustflush = nfscl_nodeleg(vp, 0);	/* must be before mtx_lock() */
 	NFSLOCKNODE(np);
 	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
 	timeo = (time_second - np->n_mtime.tv_sec) / 10;
@@ -221,8 +216,8 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper)
 		    (time_second - np->n_attrstamp), timeo);
 #endif
 
-	if ((time_second - np->n_attrstamp) >= timeo &&
-	    (mustflush != 0 || np->n_attrstamp == 0)) {
+	if (mustflush != 0 && (np->n_attrstamp == 0 ||
+	    time_second - np->n_attrstamp >= timeo)) {
 		nfsstatsv1.attrcache_misses++;
 		NFSUNLOCKNODE(np);
 		KDTRACE_NFS_ATTRCACHE_GET_MISS(vp);
diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c
index c050eef7d4c3..0bd05c03885b 100644
--- a/sys/fs/nfsclient/nfs_clvfsops.c
+++ b/sys/fs/nfsclient/nfs_clvfsops.c
@@ -415,7 +415,7 @@ ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
 }
 
 /*
- * Mount a remote root fs via. nfs. This depends on the info in the
+ * Mount a remote root fs via nfs. This depends on the info in the
  * nfs_diskless structure that has been filled in properly by some primary
  * bootstrap.
  * It goes something like this:
@@ -1524,12 +1524,14 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
 #endif
 
 	NFSCL_DEBUG(3, "in mnt\n");
+	CURVNET_SET(CRED_TO_VNET(cred));
 	clp = NULL;
 	if (mp->mnt_flag & MNT_UPDATE) {
 		nmp = VFSTONFS(mp);
 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
 		free(nam, M_SONAME);
 		free(tlscertname, M_NEWNFSMNT);
+		CURVNET_RESTORE();
 		return (0);
 	} else {
 		/* NFS-over-TLS requires that rpctls be functioning. */
@@ -1544,6 +1546,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
 			if (error != 0) {
 				free(nam, M_SONAME);
 				free(tlscertname, M_NEWNFSMNT);
+				CURVNET_RESTORE();
 				return (error);
 			}
 		}
@@ -1798,12 +1801,18 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
 		if (argp->flags & NFSMNT_NFSV3)
 			ncl_fsinfo(nmp, *vpp, cred, td);
 
-		/* Mark if the mount point supports NFSv4 ACLs. */
-		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
-		    ret == 0 &&
-		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
+		/*
+		 * Mark if the mount point supports NFSv4 ACLs and
+		 * named attributes.
+		 */
+		if ((argp->flags & NFSMNT_NFSV4) != 0) {
 			MNT_ILOCK(mp);
-			mp->mnt_flag |= MNT_NFS4ACLS;
+			if (ret == 0 && nfsrv_useacl != 0 &&
+			    NFSISSET_ATTRBIT(&nfsva.na_suppattr,
+			    NFSATTRBIT_ACL))
+				mp->mnt_flag |= MNT_NFS4ACLS;
+			if (nmp->nm_minorvers > 0)
+				mp->mnt_flag |= MNT_NAMEDATTR;
 			MNT_IUNLOCK(mp);
 		}
 
@@ -1816,6 +1825,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
 		 */
 		NFSVOPUNLOCK(*vpp);
 		vfs_cache_root_set(mp, *vpp);
+		CURVNET_RESTORE();
 		return (0);
 	}
 	error = EIO;
@@ -1844,6 +1854,7 @@ bad:
 	free(nmp->nm_tlscertname, M_NEWNFSMNT);
 	free(nmp, M_NEWNFSMNT);
 	free(nam, M_SONAME);
+	CURVNET_RESTORE();
 	return (error);
 }
 
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 76a3cdf9281e..fa451887e73e 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -106,6 +106,7 @@ uint32_t	nfscl_accesscache_load_done_id;
 extern struct nfsstatsv1 nfsstatsv1;
 extern int nfsrv_useacl;
 extern int nfscl_debuglevel;
+NFSCLSTATEMUTEX;
 MALLOC_DECLARE(M_NEWNFSREQ);
 
 static vop_read_t	nfsfifo_read;
@@ -113,6 +114,8 @@ static vop_write_t	nfsfifo_write;
 static vop_close_t	nfsfifo_close;
 static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
 		    struct thread *);
+static int	nfs_get_namedattrdir(struct vnode *, struct componentname *,
+	    struct vnode **);
 static vop_lookup_t	nfs_lookup;
 static vop_create_t	nfs_create;
 static vop_mknod_t	nfs_mknod;
@@ -248,10 +251,13 @@ VFS_VOP_VECTOR_REGISTER(newnfs_fifoops);
 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, struct vattr *vap);
 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
-    int namelen, struct ucred *cred, struct thread *td);
+    int namelen, struct ucred *cred, struct thread *td, bool silly);
+static void nfs_removestatus(struct vnode *vp, nfsremove_status file_status,
+    bool silly, struct thread *td);
 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
-    char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
+    char *tnameptr, int tnamelen, bool silly, struct ucred *cred,
+    struct thread *td);
 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
     struct componentname *scnp, struct sillyrename *sp);
 
@@ -474,6 +480,18 @@ nfs_access(struct vop_access_args *ap)
 			break;
 		}
 	}
+
+	/*
+	 * For NFSv4, check for a delegation with an Allow ACE, to see
+	 * if that permits access.
+	 */
+	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) != 0) {
+		error = nfscl_delegacecheck(vp, ap->a_accmode, ap->a_cred);
+		if (error == 0)
+			return (error);
+		error = 0;
+	}
+
 	/*
 	 * For nfs v3 or v4, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
@@ -827,9 +845,11 @@ nfs_close(struct vop_close_args *ap)
 	struct ucred *cred;
 	int error = 0, ret, localcred = 0;
 	int fmode = ap->a_fflag;
+	struct nfsmount *nmp;
 
 	if (NFSCL_FORCEDISM(vp->v_mount))
 		return (0);
+	nmp = VFSTONFS(vp->v_mount);
 	/*
 	 * During shutdown, a_cred isn't valid, so just use root.
 	 */
@@ -883,7 +903,9 @@ nfs_close(struct vop_close_args *ap)
 		    error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else if (NFS_ISV4(vp)) { 
-			if (nfscl_mustflush(vp) != 0) {
+			if (!NFSHASNFSV4N(nmp) ||
+			    (nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+			    nfscl_mustflush(vp) != 0) {
 				int cm = newnfs_commit_on_close ? 1 : 0;
 				if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 					NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
@@ -925,7 +947,7 @@ nfs_close(struct vop_close_args *ap)
 	     *     is the cause of some caching/coherency issue that might
 	     *     crop up.)
  	     */
-	    if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
+	    if (nmp->nm_negnametimeo == 0) {
 		    np->n_attrstamp = 0;
 		    KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	    }
@@ -940,9 +962,9 @@ nfs_close(struct vop_close_args *ap)
 		/*
 		 * Get attributes so "change" is up to date.
 		 */
-		if (error == 0 && nfscl_mustflush(vp) != 0 &&
+		if (error == 0 && nfscl_nodeleg(vp, 0) != 0 &&
 		    vp->v_type == VREG &&
-		    (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
+		    (nmp->nm_flag & NFSMNT_NOCTO) == 0) {
 			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva);
 			if (!ret) {
 				np->n_change = nfsva.na_filerev;
@@ -1023,8 +1045,9 @@ nfs_getattr(struct vop_getattr_args *ap)
 			return (0);
 		}
 	}
+
 	error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva);
-	if (!error)
+	if (error == 0)
 		error = nfscl_loadattrcache(&vp, &nfsva, vap, 0, 0);
 	if (!error) {
 		/*
@@ -1051,21 +1074,29 @@ nfs_setattr(struct vop_setattr_args *ap)
 	int error = 0;
 	u_quad_t tsize;
 	struct timespec ts;
+	struct nfsmount *nmp;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
-	 * Setting of flags and marking of atimes are not supported.
+	 * Only setting of UF_HIDDEN and UF_SYSTEM are supported and
+	 * only for NFSv4 servers that support them.
 	 */
-	if (vap->va_flags != VNOVAL)
+	nmp = VFSTONFS(vp->v_mount);
+	if (vap->va_flags != VNOVAL && (!NFSHASNFSV4(nmp) ||
+	    (vap->va_flags & ~(UF_HIDDEN | UF_SYSTEM)) != 0 ||
+	    ((vap->va_flags & UF_HIDDEN) != 0 &&
+	     !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_HIDDEN)) ||
+	    ((vap->va_flags & UF_SYSTEM) != 0 &&
+	     !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_SYSTEM))))
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
-  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+	if ((vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL ||
 	    vap->va_birthtime.tv_sec != VNOVAL ||
@@ -1120,7 +1151,7 @@ nfs_setattr(struct vop_setattr_args *ap)
 			     * Call nfscl_delegmodtime() to set the modify time
 			     * locally, as required.
 			     */
-			    nfscl_delegmodtime(vp);
+			    nfscl_delegmodtime(vp, NULL);
  			} else
 			    NFSUNLOCKNODE(np);
 			/*
@@ -1158,6 +1189,8 @@ nfs_setattr(struct vop_setattr_args *ap)
 			NFSUNLOCKNODE(np);
 		}
 	}
+	if (vap->va_mtime.tv_sec != VNOVAL && error == 0)
+		nfscl_delegmodtime(vp, &vap->va_mtime);
 	return (error);
 }
 
@@ -1192,6 +1225,40 @@ nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 }
 
 /*
+ * Get a named attribute directory for the vnode.
+ */
+static int
+nfs_get_namedattrdir(struct vnode *vp, struct componentname *cnp,
+    struct vnode **vpp)
+{
+	struct nfsfh *nfhp;
+	struct nfsnode *np;
+	struct vnode *newvp;
+	struct nfsvattr nfsva;
+	int attrflag, error;
+
+	attrflag = 0;
+	*vpp = NULL;
+	np = VTONFS(vp);
+	error = nfsrpc_openattr(VFSTONFS(vp->v_mount), vp, np->n_fhp->nfh_fh,
+	    np->n_fhp->nfh_len, (cnp->cn_flags & CREATENAMED),
+	    cnp->cn_cred, curthread, &nfsva, &nfhp, &attrflag);
+	if (error == NFSERR_NOTSUPP)
+		error = ENOATTR;
+	if (error == 0)
+		error = nfscl_nget(vp->v_mount, vp, nfhp, cnp, curthread, &np,
+		    cnp->cn_lkflags);
+	if (error != 0)
+		return (error);
+	newvp = NFSTOV(np);
+	vn_irflag_set_cond(newvp, VIRF_NAMEDDIR);
+	if (attrflag != 0)
+		(void)nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+	*vpp = newvp;
+	return (0);
+}
+
+/*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
@@ -1203,7 +1270,7 @@ nfs_lookup(struct vop_lookup_args *ap)
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct mount *mp = dvp->v_mount;
-	int flags = cnp->cn_flags;
+	uint64_t flags = cnp->cn_flags;
 	struct vnode *newvp;
 	struct nfsmount *nmp;
 	struct nfsnode *np, *newnp;
@@ -1214,15 +1281,57 @@ nfs_lookup(struct vop_lookup_args *ap)
 	struct vattr vattr;
 	struct timespec nctime, ts;
 	uint32_t openmode;
+	bool is_nameddir, needs_nameddir, opennamed;
 
+	dattrflag = 0;
 	*vpp = NULLVP;
+	nmp = VFSTONFS(mp);
+	opennamed = (flags & (OPENNAMED | ISLASTCN)) == (OPENNAMED | ISLASTCN);
+	if (opennamed && (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp)))
+		return (ENOATTR);
+	is_nameddir = (vn_irflag_read(dvp) & VIRF_NAMEDDIR) != 0;
+	if ((is_nameddir && (flags & ISLASTCN) == 0 && (cnp->cn_namelen > 1 ||
+	    *cnp->cn_nameptr != '.')) ||
+	    (opennamed && !is_nameddir && (flags & ISDOTDOT) != 0))
+		return (ENOATTR);
 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
+	np = VTONFS(dvp);
+
+	needs_nameddir = false;
+	if (opennamed || is_nameddir) {
+		cnp->cn_flags &= ~MAKEENTRY;
+		if (!is_nameddir)
+			needs_nameddir = true;
+	}
+
+	/*
+	 * If the named attribute directory is needed, acquire it now.
+	 */
+	newvp = NULLVP;
+	if (needs_nameddir) {
+		KASSERT(np->n_v4 == NULL, ("nfs_lookup: O_NAMEDATTR when"
+		    " n_v4 not NULL"));
+		error = nfs_get_namedattrdir(dvp, cnp, &newvp);
+		if (error != 0)
+			goto handle_error;
+		if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
+			*vpp = newvp;
+			return (0);
+		}
+		dvp = newvp;
+		np = VTONFS(dvp);
+		newvp = NULLVP;
+	} else if (opennamed && cnp->cn_namelen == 1 &&
+	    *cnp->cn_nameptr == '.') {
+		VREF(dvp);
+		*vpp = dvp;
+		return (0);
+	}
+
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
-	nmp = VFSTONFS(mp);
-	np = VTONFS(dvp);
 
 	/* For NFSv4, wait until any remove is done. */
 	NFSLOCKNODE(np);
@@ -1235,80 +1344,91 @@ nfs_lookup(struct vop_lookup_args *ap)
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
-	error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
-	if (error > 0 && error != ENOENT)
-		return (error);
-	if (error == -1) {
-		/*
-		 * Lookups of "." are special and always return the
-		 * current directory.  cache_lookup() already handles
-		 * associated locking bookkeeping, etc.
-		 */
-		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
-			return (0);
-		}
 
-		/*
-		 * We only accept a positive hit in the cache if the
-		 * change time of the file matches our cached copy.
-		 * Otherwise, we discard the cache entry and fallback
-		 * to doing a lookup RPC.  We also only trust cache
-		 * entries for less than nm_nametimeo seconds.
-		 *
-		 * To better handle stale file handles and attributes,
-		 * clear the attribute cache of this node if it is a
-		 * leaf component, part of an open() call, and not
-		 * locally modified before fetching the attributes.
-		 * This should allow stale file handles to be detected
-		 * here where we can fall back to a LOOKUP RPC to
-		 * recover rather than having nfs_open() detect the
-		 * stale file handle and failing open(2) with ESTALE.
-		 */
-		newvp = *vpp;
-		newnp = VTONFS(newvp);
-		if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
-		    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
-		    !(newnp->n_flag & NMODIFIED)) {
-			NFSLOCKNODE(newnp);
-			newnp->n_attrstamp = 0;
-			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
-			NFSUNLOCKNODE(newnp);
-		}
-		if (nfscl_nodeleg(newvp, 0) == 0 ||
-		    ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
-		    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
-		    timespeccmp(&vattr.va_ctime, &nctime, ==))) {
-			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
-			return (0);
-		}
-		cache_purge(newvp);
-		if (dvp != newvp)
-			vput(newvp);
-		else 
-			vrele(newvp);
-		*vpp = NULLVP;
-	} else if (error == ENOENT) {
-		if (VN_IS_DOOMED(dvp))
-			return (ENOENT);
-		/*
-		 * We only accept a negative hit in the cache if the
-		 * modification time of the parent directory matches
-		 * the cached copy in the name cache entry.
-		 * Otherwise, we discard all of the negative cache
-		 * entries for this directory.  We also only trust
-		 * negative cache entries for up to nm_negnametimeo
-		 * seconds.
-		 */
-		if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
-		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
-		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
-			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
-			return (ENOENT);
+	if (!opennamed && !is_nameddir) {
+		error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
+		if (error > 0 && error != ENOENT)
+			return (error);
+		if (error == -1) {
+			/*
+			 * Lookups of "." are special and always return the
+			 * current directory.  cache_lookup() already handles
+			 * associated locking bookkeeping, etc.
+			 */
+			if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
+				return (0);
+			}
+
+			/*
+			 * We only accept a positive hit in the cache if the
+			 * change time of the file matches our cached copy.
+			 * Otherwise, we discard the cache entry and fallback
+			 * to doing a lookup RPC.  We also only trust cache
+			 * entries for less than nm_nametimeo seconds.
+			 *
+			 * To better handle stale file handles and attributes,
+			 * clear the attribute cache of this node if it is a
+			 * leaf component, part of an open() call, and not
+			 * locally modified before fetching the attributes.
+			 * This should allow stale file handles to be detected
+			 * here where we can fall back to a LOOKUP RPC to
+			 * recover rather than having nfs_open() detect the
+			 * stale file handle and failing open(2) with ESTALE.
+			 */
+			newvp = *vpp;
+			newnp = VTONFS(newvp);
+			if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
+			    (flags & (ISLASTCN | ISOPEN)) ==
+			     (ISLASTCN | ISOPEN) &&
+			    !(newnp->n_flag & NMODIFIED)) {
+				NFSLOCKNODE(newnp);
+				newnp->n_attrstamp = 0;
+				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
+				NFSUNLOCKNODE(newnp);
+			}
+			if (nfscl_nodeleg(newvp, 0) == 0 ||
+			    ((u_int)(ticks - ncticks) <
+			    (nmp->nm_nametimeo * hz) &&
+			    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
+			    timespeccmp(&vattr.va_ctime, &nctime, ==))) {
+				NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
+				return (0);
+			}
+			cache_purge(newvp);
+			if (dvp != newvp)
+				vput(newvp);
+			else
+				vrele(newvp);
+			*vpp = NULLVP;
+		} else if (error == ENOENT) {
+			if (VN_IS_DOOMED(dvp))
+				return (ENOENT);
+			/*
+			 * We only accept a negative hit in the cache if the
+			 * modification time of the parent directory matches
+			 * the cached copy in the name cache entry.
+			 * Otherwise, we discard all of the negative cache
+			 * entries for this directory.  We also only trust
+			 * negative cache entries for up to nm_negnametimeo
+			 * seconds.
+			 */
+			if ((u_int)(ticks - ncticks) <
+			    (nmp->nm_negnametimeo * hz) &&
+			    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
+			    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
+				NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
+				return (ENOENT);
+			}
+			cache_purge_negative(dvp);
 		}
-		cache_purge_negative(dvp);
 	}
 
 	openmode = 0;
+#if 0
+	/*
+	 * The use of LookupOpen breaks some builds.  It is disabled
+	 * until that is fixed.
+	 */
 	/*
 	 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount
 	 * option, it is possible to do the Open operation in the same
@@ -1321,13 +1441,14 @@ nfs_lookup(struct vop_lookup_args *ap)
 	if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) &&
 	    (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 &&
 	    (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) &&
-	    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) {
+	    (flags & (ISLASTCN | ISOPEN | OPENNAMED))) == (ISLASTCN | ISOPEN)) {
 		if ((flags & OPENREAD) != 0)
 			openmode |= NFSV4OPEN_ACCESSREAD;
 		if ((flags & OPENWRITE) != 0)
 			openmode |= NFSV4OPEN_ACCESSWRITE;
 	}
 	NFSUNLOCKMNT(nmp);
+#endif
 
 	newvp = NULLVP;
 	NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
@@ -1337,6 +1458,11 @@ nfs_lookup(struct vop_lookup_args *ap)
 	    openmode);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
+	if (needs_nameddir) {
+		vput(dvp);
+		dvp = ap->a_dvp;
+	}
+handle_error:
 	if (error) {
 		if (newvp != NULLVP) {
 			vput(newvp);
@@ -1345,13 +1471,14 @@ nfs_lookup(struct vop_lookup_args *ap)
 
 		if (error != ENOENT) {
 			if (NFS_ISV4(dvp))
-				error = nfscl_maperr(td, error, (uid_t)0,
-				    (gid_t)0);
+				error = nfscl_maperr(td, error,
+				    (uid_t)0, (gid_t)0);
 			return (error);
 		}
 
 		/* The requested file was not found. */
-		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+		if ((cnp->cn_nameiop == CREATE ||
+		     cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN)) {
 			/*
 			 * XXX: UFS does a full VOP_ACCESS(dvp,
@@ -1392,7 +1519,8 @@ nfs_lookup(struct vop_lookup_args *ap)
 			free(nfhp, M_NFSFH);
 			return (EISDIR);
 		}
-		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, LK_EXCLUSIVE);
+		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np,
+		    LK_EXCLUSIVE);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
@@ -1413,7 +1541,8 @@ nfs_lookup(struct vop_lookup_args *ap)
 		}
 		NFSUNLOCKNODE(np);
 		if (attrflag)
-			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
+			    0, 1);
 		*vpp = newvp;
 		return (0);
 	}
@@ -1454,19 +1583,23 @@ nfs_lookup(struct vop_lookup_args *ap)
 		if (error != 0)
 			return (error);
 		if (attrflag)
-			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
+			    0, 1);
 	} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 		free(nfhp, M_NFSFH);
 		VREF(dvp);
 		newvp = dvp;
 		if (attrflag)
-			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
+			    0, 1);
 	} else {
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np,
 		    cnp->cn_lkflags);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
+		if (opennamed)
+			vn_irflag_set_cond(newvp, VIRF_NAMEDATTR);
 		/*
 		 * If n_localmodtime >= time before RPC, then
 		 * a file modification operation, such as
@@ -1484,8 +1617,10 @@ nfs_lookup(struct vop_lookup_args *ap)
 		}
 		NFSUNLOCKNODE(np);
 		if (attrflag)
-			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
-		else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
+			(void)nfscl_loadattrcache(&newvp, &nfsva, NULL,
+			    0, 1);
+		else if ((flags & (ISLASTCN | ISOPEN)) ==
+		    (ISLASTCN | ISOPEN) &&
 		    !(np->n_flag & NMODIFIED)) {			
 			/*
 			 * Flush the attribute cache when opening a
@@ -1746,6 +1881,7 @@ nfs_create(struct vop_create_args *ap)
 	nfsquad_t cverf;
 	int error = 0, attrflag, dattrflag, fmode = 0;
 	struct vattr vattr;
+	bool is_nameddir, needs_nameddir, opennamed;
 
 	/*
 	 * Oops, not for me..
@@ -1759,6 +1895,32 @@ nfs_create(struct vop_create_args *ap)
 		fmode |= O_EXCL;
 	dnp = VTONFS(dvp);
 	nmp = VFSTONFS(dvp->v_mount);
+	needs_nameddir = false;
+	if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp)) {
+		opennamed = (cnp->cn_flags & (OPENNAMED | ISLASTCN)) ==
+		    (OPENNAMED | ISLASTCN);
+		is_nameddir = (vn_irflag_read(dvp) & VIRF_NAMEDDIR) != 0;
+		if (opennamed || is_nameddir) {
+			cnp->cn_flags &= ~MAKEENTRY;
+			if (!is_nameddir)
+				needs_nameddir = true;
+		}
+	}
+
+	/*
+	 * If the named attribute directory is needed, acquire it now.
+	 */
+	if (needs_nameddir) {
+		KASSERT(dnp->n_v4 == NULL, ("nfs_create: O_NAMEDATTR when"
+		    " n_v4 not NULL"));
+		error = nfs_get_namedattrdir(dvp, cnp, &newvp);
+		if (error != 0)
+			return (error);
+		dvp = newvp;
+		dnp = VTONFS(dvp);
+		newvp = NULL;
+	}
+
 again:
 	/* For NFSv4, wait until any remove is done. */
 	NFSLOCKNODE(dnp);
@@ -1841,6 +2003,8 @@ again:
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	NFSUNLOCKNODE(dnp);
+	if (needs_nameddir)
+		vput(dvp);
 	return (error);
 }
 
@@ -1864,6 +2028,7 @@ nfs_remove(struct vop_remove_args *ap)
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
+	struct nfsmount *nmp;
 
 	KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
 	if (vp->v_type == VDIR)
@@ -1871,6 +2036,7 @@ nfs_remove(struct vop_remove_args *ap)
 	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
 	    vattr.va_nlink > 1)) {
+		nmp = VFSTONFS(vp->v_mount);
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
@@ -1882,12 +2048,19 @@ nfs_remove(struct vop_remove_args *ap)
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
+		 * Flushing here would be more correct for the case
+		 * where nfs_close() did not do a flush.  However, it
+		 * could be a large performance hit for some servers
+		 * and only matters when the file name being removed is
+		 * one of multiple hard links.
 		 */
-		error = ncl_vinvalbuf(vp, 0, curthread, 1);
+		if (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) ||
+		    (nmp->nm_flag & NFSMNT_NOCTO) == 0)
+			error = ncl_vinvalbuf(vp, 0, curthread, 1);
 		if (error != EINTR && error != EIO)
 			/* Do the rpc */
 			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
-			    cnp->cn_namelen, cnp->cn_cred, curthread);
+			    cnp->cn_namelen, cnp->cn_cred, curthread, false);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
@@ -1918,7 +2091,32 @@ ncl_removeit(struct sillyrename *sp, struct vnode *vp)
 	if (sp->s_dvp->v_type == VBAD)
 		return (0);
 	return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
-	    sp->s_cred, NULL));
+	    sp->s_cred, NULL, true));
+}
+
+/*
+ * Handle the nfsremove_status reply from the RPC function.
+ */
+static void
+nfs_removestatus(struct vnode *vp, nfsremove_status file_status,
+    bool silly, struct thread *td)
+{
+
+	switch (file_status) {
+	case NLINK_ZERO:
+		/* Get rid of any delegation. */
+		nfscl_delegreturnvp(vp, false, td);
+		/* FALLTHROUGH */
+	case DELETED:
+		/* Throw away buffer cache blocks. */
+		(void)ncl_vinvalbuf(vp, 0, td, 1);
+		break;
+	case VALID:
+		/* Nothing to do, delegation is still ok. */
+		break;
+	default:
+		break;
+	}
 }
 
 /*
@@ -1926,17 +2124,20 @@ ncl_removeit(struct sillyrename *sp, struct vnode *vp)
  */
 static int
 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
-    int namelen, struct ucred *cred, struct thread *td)
+    int namelen, struct ucred *cred, struct thread *td, bool silly)
 {
-	struct nfsvattr dnfsva;
+	struct nfsvattr dnfsva, nfsva;
 	struct nfsnode *dnp = VTONFS(dvp);
-	int error = 0, dattrflag;
+	struct nfsmount *nmp;
+	int attrflag, error = 0, dattrflag;
+	nfsremove_status file_status;
 
+	nmp = VFSTONFS(dvp->v_mount);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NREMOVEINPROG;
 	NFSUNLOCKNODE(dnp);
-	error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
-	    &dattrflag);
+	error = nfsrpc_remove(dvp, name, namelen, vp, &nfsva, &attrflag,
+	    &file_status, &dnfsva, &dattrflag, cred, td);
 	NFSLOCKNODE(dnp);
 	if ((dnp->n_flag & NREMOVEWANT)) {
 		dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
@@ -1946,11 +2147,19 @@ nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
 		dnp->n_flag &= ~NREMOVEINPROG;
 		NFSUNLOCKNODE(dnp);
 	}
-	if (dattrflag)
+
+	if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp)) {
+		if (file_status != DELETED && attrflag != 0)
+			(void)nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
+		if ((nmp->nm_flag & NFSMNT_NOCTO) != 0)
+			nfs_removestatus(vp, file_status, silly, td);
+	}
+
+	if (dattrflag != 0)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
-	if (!dattrflag) {
+	if (dattrflag == 0) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
@@ -1975,6 +2184,7 @@ nfs_rename(struct vop_rename_args *ap)
 	struct nfsnode *fnp = VTONFS(ap->a_fvp);
 	struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
 	struct nfsv4node *newv4 = NULL;
+	struct nfsmount *nmp;
 	int error;
 
 	/* Check for cross-device rename */
@@ -1983,6 +2193,7 @@ nfs_rename(struct vop_rename_args *ap)
 		error = EXDEV;
 		goto out;
 	}
+	nmp = VFSTONFS(fvp->v_mount);
 
 	if (fvp == tvp) {
 		printf("nfs_rename: fvp == tvp (can't happen)\n");
@@ -2005,11 +2216,15 @@ nfs_rename(struct vop_rename_args *ap)
 	 * that was written back to our cache earlier. Not checking for
 	 * this condition can result in potential (silent) data loss.
 	 */
-	error = VOP_FSYNC(fvp, MNT_WAIT, curthread);
+	if ((nmp->nm_flag & NFSMNT_NOCTO) == 0 || !NFSHASNFSV4(nmp) ||
+	    !NFSHASNFSV4N(nmp) || nfscl_mustflush(fvp) != 0)
+		error = VOP_FSYNC(fvp, MNT_WAIT, curthread);
 	NFSVOPUNLOCK(fvp);
-	if (!error && tvp)
+	if (error == 0 && tvp != NULL && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+	    !NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) ||
+	    nfscl_mustflush(tvp) != 0))
 		error = VOP_FSYNC(tvp, MNT_WAIT, curthread);
-	if (error)
+	if (error != 0)
 		goto out;
 
 	/*
@@ -2024,7 +2239,7 @@ nfs_rename(struct vop_rename_args *ap)
 	}
 
 	error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
-	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
+	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, false, tcnp->cn_cred,
 	    curthread);
 
 	if (error == 0 && NFS_ISV4(tdvp)) {
@@ -2093,7 +2308,7 @@ nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
 {
 
 	return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
-	    sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
+	    sdvp, NULL, sp->s_name, sp->s_namlen, true, scnp->cn_cred,
 	    curthread));
 }
 
@@ -2103,16 +2318,19 @@ nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
 static int
 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
-    int tnamelen, struct ucred *cred, struct thread *td)
+    int tnamelen, bool silly, struct ucred *cred, struct thread *td)
 {
-	struct nfsvattr fnfsva, tnfsva;
+	struct nfsvattr fnfsva, tnfsva, tvpnfsva;
 	struct nfsnode *fdnp = VTONFS(fdvp);
 	struct nfsnode *tdnp = VTONFS(tdvp);
-	int error = 0, fattrflag, tattrflag;
+	struct nfsmount *nmp;
+	int error = 0, fattrflag, tattrflag, tvpattrflag;
+	nfsremove_status tvp_status;
 
+	nmp = VFSTONFS(fdvp->v_mount);
 	error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
-	    tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
-	    &tattrflag);
+	    tnameptr, tnamelen, &tvp_status, &fnfsva, &tnfsva, &fattrflag,
+	    &tattrflag, &tvpnfsva, &tvpattrflag, cred, td);
 	NFSLOCKNODE(fdnp);
 	fdnp->n_flag |= NMODIFIED;
 	if (fattrflag != 0) {
@@ -2133,6 +2351,15 @@ nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
 		NFSUNLOCKNODE(tdnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
+
+	if (tvp != NULL) {
+		if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp) &&
+		    (nmp->nm_flag & NFSMNT_NOCTO) != 0)
+			nfs_removestatus(tvp, tvp_status, silly, td);
+		if (!silly && tvpattrflag != 0)
+			(void)nfscl_loadattrcache(&tvp, &tvpnfsva, NULL, 0, 1);
+	}
+
 	if (error && NFS_ISV4(fdvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
@@ -2156,7 +2383,9 @@ nfs_link(struct vop_link_args *ap)
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
+#ifdef notnow
 	VOP_FSYNC(vp, MNT_WAIT, curthread);
+#endif
 
 	error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, &dattrflag);
@@ -4367,25 +4596,48 @@ nfs_pathconf(struct vop_pathconf_args *ap)
 	struct nfsmount *nmp;
 	struct thread *td = curthread;
 	off_t off;
-	bool eof;
+	bool eof, has_namedattr, named_enabled;
 	int attrflag, error;
+	struct nfsnode *np;
 
+	nmp = VFSTONFS(vp->v_mount);
+	np = VTONFS(vp);
+	named_enabled = false;
+	has_namedattr = false;
 	if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
 	    ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
 	    ap->a_name == _PC_NO_TRUNC)) ||
-	    (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
+	    (NFS_ISV4(vp) && (ap->a_name == _PC_ACL_NFS4 ||
+	     ap->a_name == _PC_HAS_NAMEDATTR))) {
 		/*
 		 * Since only the above 4 a_names are returned by the NFSv3
 		 * Pathconf RPC, there is no point in doing it for others.
 		 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
-		 * be used for _PC_NFS4_ACL as well.
+		 * be used for _PC_ACL_NFS4 and _PC_HAS_NAMEDATTR as well.
 		 */
-		error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
-		    &attrflag);
+		error = nfsrpc_pathconf(vp, &pc, &has_namedattr, td->td_ucred,
+		    td, &nfsva, &attrflag);
 		if (attrflag != 0)
 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error != 0)
 			return (error);
+	} else if (NFS_ISV4(vp) && ap->a_name == _PC_NAMEDATTR_ENABLED &&
+	    (np->n_flag & NNAMEDNOTSUPP) == 0) {
+		struct nfsfh *nfhp;
+
+		error = nfsrpc_openattr(nmp, vp, np->n_fhp->nfh_fh,
+		    np->n_fhp->nfh_len, false, td->td_ucred, td, &nfsva, &nfhp,
+		    &attrflag);
+		named_enabled = true;
+		if (error == 0) {
+			free(nfhp, M_NFSFH);
+		} else if (error == NFSERR_NOTSUPP) {
+			named_enabled = false;
+			NFSLOCKNODE(np);
+			np->n_flag |= NNAMEDNOTSUPP;
+			NFSUNLOCKNODE(np);
+		}
+		error = 0;
 	} else {
 		/*
 		 * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
@@ -4468,7 +4720,6 @@ nfs_pathconf(struct vop_pathconf_args *ap)
 	case _PC_MIN_HOLE_SIZE:
 		/* Only some NFSv4.2 servers support Seek for Holes. */
 		*ap->a_retval = 0;
-		nmp = VFSTONFS(vp->v_mount);
 		if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) {
 			/*
 			 * NFSv4.2 doesn't have an attribute for hole size,
@@ -4499,6 +4750,27 @@ nfs_pathconf(struct vop_pathconf_args *ap)
 			mtx_unlock(&nmp->nm_mtx);
 		}
 		break;
+	case _PC_NAMEDATTR_ENABLED:
+		if (named_enabled)
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+		break;
+	case _PC_HAS_NAMEDATTR:
+		if (has_namedattr)
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+		break;
+	case _PC_HAS_HIDDENSYSTEM:
+		if (NFS_ISV4(vp) && NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+		    NFSATTRBIT_HIDDEN) &&
+		    NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+		    NFSATTRBIT_SYSTEM))
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+		break;
 
 	default:
 		error = vop_stdpathconf(ap);
diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h
index cc1959b7bf79..9b2627015612 100644
--- a/sys/fs/nfsclient/nfsnode.h
+++ b/sys/fs/nfsclient/nfsnode.h
@@ -162,6 +162,7 @@ struct nfsnode {
 #define	NDSCOMMIT	0x00100000  /* Commit is done via the DS. */
 #define	NVNSETSZSKIP	0x00200000  /* Skipped vnode_pager_setsize() */
 #define	NMIGHTBELOCKED	0x00400000  /* Might be file locked. */
+#define	NNAMEDNOTSUPP	0x00800000  /* Openattr is not supported. */
 
 /*
  * Convert between nfsnode pointers and vnode pointers
diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c
index bf0ff4e84d98..de72187bbb91 100644
--- a/sys/fs/nfsserver/nfs_nfsdcache.c
+++ b/sys/fs/nfsserver/nfs_nfsdcache.c
@@ -392,7 +392,7 @@ loop:
 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
 			if ((rp->rc_flag & RC_LOCKED) != 0) {
 				rp->rc_flag |= RC_WANTED;
-				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+				(void)mtx_sleep(rp, mutex, PVFS | PDROP,
 				    "nfsrc", 10 * hz);
 				goto loop;
 			}
@@ -678,7 +678,7 @@ tryagain:
 		rp = hitrp;
 		if ((rp->rc_flag & RC_LOCKED) != 0) {
 			rp->rc_flag |= RC_WANTED;
-			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+			(void)mtx_sleep(rp, mutex, PVFS | PDROP,
 			    "nfsrc", 10 * hz);
 			goto tryagain;
 		}
@@ -750,7 +750,7 @@ nfsrc_lock(struct nfsrvcache *rp)
 	mtx_assert(mutex, MA_OWNED);
 	while ((rp->rc_flag & RC_LOCKED) != 0) {
 		rp->rc_flag |= RC_WANTED;
-		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
+		(void)mtx_sleep(rp, mutex, PVFS, "nfsrc", 0);
 	}
 	rp->rc_flag |= RC_LOCKED;
 }
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 6f5b2855bcf0..4f0d5946d6b9 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -69,6 +69,7 @@ extern int nfsrv_maxpnfsmirror;
 extern uint32_t nfs_srvmaxio;
 extern int nfs_bufpackets;
 extern u_long sb_max_adj;
+extern struct nfsv4lock nfsv4rootfs_lock;
 
 NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
 NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst);
@@ -121,7 +122,6 @@ extern struct nfsdevicehead nfsrv_devidhead;
 /* Map d_type to vnode type. */
 static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR,
     VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON };
-#define	NFS_DTYPETOVTYPE(t)	((t) <= DT_WHT ? dtype_to_vnode[(t)] : VNON)
 
 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
     struct iovec **);
@@ -129,6 +129,7 @@ static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
     struct mbuf **, struct iovec **);
 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
     int *);
+static void nfs_dtypetovtype(struct nfsvattr *, struct vnode *, uint8_t);
 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
     NFSPROC_T *);
 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
@@ -178,8 +179,6 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
-SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
-    &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
     0, "Debug level for NFS server");
 NFSD_VNET_DECLARE(int, nfsd_enable_stringtouid);
@@ -189,6 +188,10 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid,
 static int nfsrv_pnfsgetdsattr = 1;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
     &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
+static bool nfsrv_recalldeleg = false;
+SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, recalldeleg, CTLFLAG_RW,
+    &nfsrv_recalldeleg, 0,
+    "When set remove/rename recalls delegations for same client");
 
 /*
  * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
@@ -294,6 +297,38 @@ SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
     sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
 
+static int
+sysctl_dolocallocks(SYSCTL_HANDLER_ARGS)
+{
+	int error, igotlock, newdolocallocks;
+
+	newdolocallocks = nfsrv_dolocallocks;
+	error = sysctl_handle_int(oidp, &newdolocallocks, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (newdolocallocks == nfsrv_dolocallocks)
+		return (0);
+	if (jailed(curthread->td_ucred))
+		return (EINVAL);
+
+	NFSLOCKV4ROOTMUTEX();
+	do {
+		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
+		    NFSV4ROOTLOCKMUTEXPTR, NULL);
+	} while (!igotlock);
+	NFSUNLOCKV4ROOTMUTEX();
+
+	nfsrv_dolocallocks = newdolocallocks;
+
+	NFSLOCKV4ROOTMUTEX();
+	nfsv4_unlock(&nfsv4rootfs_lock, 0);
+	NFSUNLOCKV4ROOTMUTEX();
+	return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, enable_locallocks,
+    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
+    sysctl_dolocallocks, "IU", "Enable nfsd to acquire local locks on files");
+
 #define	MAX_REORDERED_RPC	16
 #define	NUM_HEURISTIC		1031
 #define	NHUSE_INIT		64
@@ -413,6 +448,8 @@ nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
 			gotattr = 1;
 	}
 
+	nvap->na_bsdflags = 0;
+	nvap->na_flags = 0;
 	error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
 	if (lockedit != 0)
 		NFSVOPUNLOCK(vp);
@@ -1451,32 +1488,61 @@ nfsmout:
  * Remove a non-directory object.
  */
 int
-nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
+nfsvno_removesub(struct nameidata *ndp, bool is_v4, struct nfsrv_descript *nd,
     struct thread *p, struct nfsexstuff *exp)
 {
-	struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
-	int error = 0, mirrorcnt;
+	struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS], *newvp;
+	struct mount *mp;
+	int error = 0, mirrorcnt, ret;
 	char fname[PNFS_FILENAME_LEN + 1];
 	fhandle_t fh;
 
 	vp = ndp->ni_vp;
 	dsdvp[0] = NULL;
-	if (vp->v_type == VDIR)
+	if (vp->v_type == VDIR) {
 		error = NFSERR_ISDIR;
-	else if (is_v4)
-		error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
-		    p);
+	} else if (is_v4) {
+		if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
+			error = nfsrv_checkremove(vp, 1, NULL,
+			    (nfsquad_t)((u_quad_t)0), p);
+		else
+			error = nfsrv_checkremove(vp, 1, NULL, nd->nd_clientid,
+			    p);
+	}
 	if (error == 0)
 		nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
 	if (!error)
 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (error == 0 && dsdvp[0] != NULL)
 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
+	if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0)
+		error = nfsvno_getfh(vp, &fh, p);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
+
+	/* Use ret to determine if the file still exists. */
+	if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) {
+		mp = vfs_busyfs(&fh.fh_fsid);
+		if (mp != NULL) {
+			/* Find out if the file still exists. */
+			ret = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &newvp);
+			if (ret == 0)
+				vput(newvp);
+			else
+				ret = ESTALE;
+			vfs_unbusy(mp);
+		} else {
+			ret = ESTALE;
+		}
+		if (ret == ESTALE) {
+			/* Get rid of any delegation. */
+			nfsrv_removedeleg(&fh, nd, p);
+		}
+	}
+
 	nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
@@ -1527,33 +1593,34 @@ out:
  */
 int
 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
-    u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
+    struct nfsrv_descript *nd, struct thread *p)
 {
-	struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
-	int error = 0, mirrorcnt;
+	struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS], *newvp;
+	struct mount *mp;
+	int error = 0, mirrorcnt, ret;
 	char fname[PNFS_FILENAME_LEN + 1];
-	fhandle_t fh;
+	fhandle_t fh, fh2;
 
 	dsdvp[0] = NULL;
 	fvp = fromndp->ni_vp;
-	if (ndstat) {
+	if (nd->nd_repstat != 0) {
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
-		error = ndstat;
+		error = nd->nd_repstat;
 		goto out1;
 	}
 	tdvp = tondp->ni_dvp;
 	tvp = tondp->ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
-			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
+			error = (nd->nd_flag & ND_NFSV2) ? EISDIR : EEXIST;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
-			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
+			error = (nd->nd_flag & ND_NFSV2) ? ENOTDIR : EEXIST;
 			goto out;
 		}
 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
-			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
+			error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 			goto out;
 		}
 
@@ -1572,35 +1639,45 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
 		}
 	}
 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
-		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
+		error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp->v_mount != tdvp->v_mount) {
-		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
+		error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp == tdvp) {
-		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
+		error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
 		goto out;
 	}
 	if (fvp == tvp) {
 		/*
-		 * If source and destination are the same, there is nothing to
-		 * do. Set error to -1 to indicate this.
+		 * If source and destination are the same, there is
+		 * nothing to do. Set error to EJUSTRETURN to indicate
+		 * this.
 		 */
-		error = -1;
+		error = EJUSTRETURN;
 		goto out;
 	}
-	if (ndflag & ND_NFSV4) {
+	if (nd->nd_flag & ND_NFSV4) {
 		if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
-			error = nfsrv_checkremove(fvp, 0, NULL,
-			    (nfsquad_t)((u_quad_t)0), p);
+			if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
+				error = nfsrv_checkremove(fvp, 0, NULL,
+				    (nfsquad_t)((u_quad_t)0), p);
+			else
+				error = nfsrv_checkremove(fvp, 0, NULL,
+				    nd->nd_clientid, p);
 			NFSVOPUNLOCK(fvp);
 		} else
 			error = EPERM;
-		if (tvp && !error)
-			error = nfsrv_checkremove(tvp, 1, NULL,
-			    (nfsquad_t)((u_quad_t)0), p);
+		if (tvp && !error) {
+			if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
+				error = nfsrv_checkremove(tvp, 1, NULL,
+				    (nfsquad_t)((u_quad_t)0), p);
+			else
+				error = nfsrv_checkremove(tvp, 1, NULL,
+				    nd->nd_clientid, p);
+		}
 	} else {
 		/*
 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
@@ -1612,15 +1689,35 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
 		nfsd_recalldelegation(fvp, p);
 	}
 	if (error == 0 && tvp != NULL) {
-		nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			error = nfsvno_getfh(tvp, &fh2, p);
+		if (error == 0)
+			nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname,
+			    &fh);
 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
 		    " dsdvp=%p\n", dsdvp[0]);
 	}
 out:
-	if (!error) {
+	mp = NULL;
+	if (error == 0) {
+		error = VOP_GETWRITEMOUNT(tondp->ni_dvp, &mp);
+		if (error == 0) {
+			if (mp == NULL) {
+				error = ENOENT;
+			} else {
+				error = lockmgr(&mp->mnt_renamelock,
+				    LK_EXCLUSIVE | LK_NOWAIT, NULL);
+				if (error != 0)
+					error = ERELOOKUP;
+			}
+		}
+	}
+	if (error == 0) {
 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
 		    &tondp->ni_cnd);
+		lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+		vfs_rel(mp);
 	} else {
 		if (tdvp == tvp)
 			vrele(tdvp);
@@ -1630,8 +1727,13 @@ out:
 			vput(tvp);
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
-		if (error == -1)
+		if (error == EJUSTRETURN) {
 			error = 0;
+		} else if (error == ERELOOKUP && mp != NULL) {
+			lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0);
+			lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+			vfs_rel(mp);
+		}
 	}
 
 	/*
@@ -1644,6 +1746,26 @@ out:
 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
 	}
 
+	/* Use ret to determine if the file still exists. */
+	if ((nd->nd_flag & ND_NFSV41) != 0 && error == 0) {
+		mp = vfs_busyfs(&fh2.fh_fsid);
+		if (mp != NULL) {
+			/* Find out if the file still exists. */
+			ret = VFS_FHTOVP(mp, &fh2.fh_fid, LK_SHARED, &newvp);
+			if (ret == 0)
+				vput(newvp);
+			else
+				ret = ESTALE;
+			vfs_unbusy(mp);
+		} else {
+			ret = ESTALE;
+		}
+		if (ret == ESTALE) {
+			/* Get rid of any delegation. */
+			nfsrv_removedeleg(&fh2, nd, p);
+		}
+	}
+
 	nfsvno_relpathbuf(tondp);
 out1:
 	nfsvno_relpathbuf(fromndp);
@@ -1990,7 +2112,8 @@ int
 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
     struct ucred *cred, struct thread *p, int isdgram, int reterr,
-    int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
+    int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno,
+    bool xattrsupp, bool has_hiddensystem, bool has_namedattr)
 {
 	struct statfs *sf;
 	int error;
@@ -2009,12 +2132,29 @@ nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
 	}
 	error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
 	    attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
-	    mounted_on_fileno, sf);
+	    mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr);
 	free(sf, M_TEMP);
 	NFSEXITCODE2(0, nd);
 	return (error);
 }
 
+/*
+ * Convert a dirent d_type to a vnode type.
+ */
+static void nfs_dtypetovtype(struct nfsvattr *nvap, struct vnode *vp,
+    uint8_t dtype)
+{
+
+	if ((vn_irflag_read(vp) & VIRF_NAMEDDIR) != 0) {
+		nvap->na_type = VREG;
+		nvap->na_bsdflags |= SFBSD_NAMEDATTR;
+	} else if (dtype <= DT_WHT) {
+		nvap->na_type = dtype_to_vnode[dtype];
+	} else {
+		nvap->na_type = VNON;
+	}
+}
+
 /* Since the Readdir vnode ops vary, put the entire functions in here. */
 /*
  * nfs readdir service
@@ -2309,7 +2449,7 @@ nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
 	struct nfsvattr nva, at, *nvap = &nva;
 	struct mbuf *mb0, *mb1;
 	struct nfsreferral *refp;
-	int nlen, r, error = 0, getret = 1, usevget = 1;
+	int nlen, r, error = 0, getret = 1, ret, usevget = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
 	caddr_t bpos0, bpos1;
 	u_int64_t off, toff, verf __unused;
@@ -2323,6 +2463,9 @@ nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
 	uint64_t mounted_on_fileno;
 	struct thread *p = curthread;
 	int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
+	size_t atsiz;
+	long pathval;
+	bool has_hiddensystem, has_namedattr, xattrsupp;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
@@ -2634,6 +2777,10 @@ again:
 						    LK_SHARED, &nvp);
 					else
 						r = EOPNOTSUPP;
+					if (r == 0 && (vn_irflag_read(vp) &
+					    VIRF_NAMEDDIR) != 0)
+						vn_irflag_set_cond(nvp,
+						    VIRF_NAMEDATTR);
 					if (r == EOPNOTSUPP) {
 						if (usevget) {
 							usevget = 0;
@@ -2648,6 +2795,10 @@ again:
 						cn.cn_namelen = nlen;
 						cn.cn_flags = ISLASTCN |
 						    NOFOLLOW | LOCKLEAF;
+						if ((vn_irflag_read(vp) &
+						    VIRF_NAMEDDIR) != 0)
+							cn.cn_flags |=
+							    OPENNAMED;
 						if (nlen == 2 &&
 						    dp->d_name[0] == '.' &&
 						    dp->d_name[1] == '.')
@@ -2765,7 +2916,7 @@ again:
 				/* Only need Type and/or Fileid. */
 				VATTR_NULL(&nvap->na_vattr);
 				nvap->na_fileid = dp->d_fileno;
-				nvap->na_type = NFS_DTYPETOVTYPE(dp->d_type);
+				nfs_dtypetovtype(nvap, vp, dp->d_type);
 			}
 
 			/*
@@ -2789,9 +2940,32 @@ again:
 				*tl++ = newnfs_true;
 				txdr_hyper(*cookiep, tl);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
+				xattrsupp = false;
+				has_hiddensystem = false;
+				has_namedattr = false;
 				if (nvp != NULL) {
 					supports_nfsv4acls =
 					    nfs_supportsnfsv4acls(nvp);
+					if (NFSISSET_ATTRBIT(&attrbits,
+					    NFSATTRBIT_XATTRSUPPORT)) {
+						ret = VOP_GETEXTATTR(nvp,
+						    EXTATTR_NAMESPACE_USER,
+						    "xxx", NULL, &atsiz,
+						    nd->nd_cred, p);
+						xattrsupp = ret != EOPNOTSUPP;
+					}
+					if (VOP_PATHCONF(nvp,
+					    _PC_HAS_HIDDENSYSTEM, &pathval) !=
+					    0)
+						pathval = 0;
+					has_hiddensystem = pathval > 0;
+					pathval = 0;
+					if (NFSISSET_ATTRBIT(&attrbits,
+					    NFSATTRBIT_NAMEDATTR) &&
+					    VOP_PATHCONF(nvp, _PC_HAS_NAMEDATTR,
+					    &pathval) != 0)
+						pathval = 0;
+					has_namedattr = pathval > 0;
 					NFSVOPUNLOCK(nvp);
 				} else
 					supports_nfsv4acls = 0;
@@ -2811,13 +2985,15 @@ again:
 					    nvp, nvap, &nfh, r, &rderrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
-					    mounted_on_fileno);
+					    mounted_on_fileno, xattrsupp,
+					    has_hiddensystem, has_namedattr);
 				} else {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &attrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
-					    mounted_on_fileno);
+					    mounted_on_fileno, xattrsupp,
+					    has_hiddensystem, has_namedattr);
 				}
 				if (nvp != NULL)
 					vrele(nvp);
@@ -2995,12 +3171,17 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 	/*
 	 * Loop around getting the setable attributes. If an unsupported
 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
+	 * Once nd_repstat != 0, do not set the attribute value, but keep
+	 * parsing the attribute(s).
 	 */
 	if (retnotsup) {
 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 		bitpos = NFSATTRBIT_MAX;
 	} else {
 		bitpos = 0;
+		if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_HIDDEN) ||
+		    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SYSTEM))
+			nvap->na_flags = 0;
 	}
 	moderet = 0;
 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
@@ -3012,12 +3193,13 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 		switch (bitpos) {
 		case NFSATTRBIT_SIZE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
-                     if (vp != NULL && vp->v_type != VREG) {
-                            error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
-                                NFSERR_INVAL;
-                            goto nfsmout;
+			if (!nd->nd_repstat) {
+				if (vp != NULL && vp->v_type != VREG)
+					nd->nd_repstat = (vp->v_type == VDIR) ?
+					    NFSERR_ISDIR : NFSERR_INVAL;
+				else
+					nvap->na_size = fxdr_hyper(tl);
 			}
-			nvap->na_size = fxdr_hyper(tl);
 			attrsum += NFSX_HYPER;
 			break;
 		case NFSATTRBIT_ACL:
@@ -3036,9 +3218,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_HIDDEN:
-			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			if (!nd->nd_repstat)
-				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			if (nd->nd_repstat == 0) {
+				if (*tl == newnfs_true)
+					nvap->na_flags |= UF_HIDDEN;
+			}
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_MIMETYPE:
@@ -3054,7 +3238,8 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 		case NFSATTRBIT_MODE:
 			moderet = NFSERR_INVAL;	/* Can't do MODESETMASKED. */
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			nvap->na_mode = nfstov_mode(*tl);
+			if (!nd->nd_repstat)
+				nvap->na_mode = nfstov_mode(*tl);
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_OWNER:
@@ -3112,9 +3297,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_SYSTEM:
-			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			if (!nd->nd_repstat)
-				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			if (nd->nd_repstat == 0) {
+				if (*tl == newnfs_true)
+					nvap->na_flags |= UF_SYSTEM;
+			}
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_TIMEACCESSSET:
@@ -3122,10 +3309,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
-			    fxdr_nfsv4time(tl, &nvap->na_atime);
+			    if (!nd->nd_repstat)
+				fxdr_nfsv4time(tl, &nvap->na_atime);
 			    toclient = 1;
 			    attrsum += NFSX_V4TIME;
-			} else {
+			} else if (!nd->nd_repstat) {
 			    vfs_timestamp(&nvap->na_atime);
 			    nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
@@ -3138,7 +3326,8 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 			break;
 		case NFSATTRBIT_TIMECREATE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
-			fxdr_nfsv4time(tl, &nvap->na_btime);
+			if (!nd->nd_repstat)
+				fxdr_nfsv4time(tl, &nvap->na_btime);
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMEMODIFYSET:
@@ -3146,10 +3335,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
-			    fxdr_nfsv4time(tl, &nvap->na_mtime);
+			    if (!nd->nd_repstat)
+				fxdr_nfsv4time(tl, &nvap->na_mtime);
 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			    attrsum += NFSX_V4TIME;
-			} else {
+			} else if (!nd->nd_repstat) {
 			    vfs_timestamp(&nvap->na_mtime);
 			    if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
@@ -3167,18 +3357,40 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 			 * specified and this attribute cannot be done in the
 			 * same Setattr operation.
 			 */
-			if ((nd->nd_flag & ND_NFSV41) == 0)
-				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
-			else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
-			    vp == NULL)
-				nd->nd_repstat = NFSERR_INVAL;
-			else if (moderet == 0)
-				moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
-			if (moderet == 0)
-				nvap->na_mode = (mode & mask) |
-				    (va.va_mode & ~mask);
-			else
-				nd->nd_repstat = moderet;
+			if (!nd->nd_repstat) {
+				if ((nd->nd_flag & ND_NFSV41) == 0)
+					nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+				else if ((mode & ~07777) != 0 ||
+				    (mask & ~07777) != 0 || vp == NULL)
+					nd->nd_repstat = NFSERR_INVAL;
+				else if (moderet == 0)
+					moderet = VOP_GETATTR(vp, &va,
+					    nd->nd_cred);
+				if (moderet == 0)
+					nvap->na_mode = (mode & mask) |
+					    (va.va_mode & ~mask);
+				else
+					nd->nd_repstat = moderet;
+			}
+			attrsum += 2 * NFSX_UNSIGNED;
+			break;
+		case NFSATTRBIT_MODEUMASK:
+			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+			mode = fxdr_unsigned(u_short, *tl++);
+			mask = fxdr_unsigned(u_short, *tl);
+			/*
+			 * If moderet != 0, mode has already been done.
+			 * If vp != NULL, this is not a file object creation.
+			 */
+			if (!nd->nd_repstat) {
+				if ((nd->nd_flag & ND_NFSV42) == 0)
+					nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+				else if ((mask & ~0777) != 0 || vp != NULL ||
+				    moderet != 0)
+					nd->nd_repstat = NFSERR_INVAL;
+				else
+					nvap->na_mode = (mode & ~mask);
+			}
 			attrsum += 2 * NFSX_UNSIGNED;
 			break;
 		default:
@@ -3193,7 +3405,7 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 
 	/*
 	 * some clients pad the attrlist, so we need to skip over the
-	 * padding.
+	 * padding.  This also skips over unparsed non-supported attributes.
 	 */
 	if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
@@ -3251,7 +3463,11 @@ nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
 		     NFSVNO_EXPORTANON(exp) ||
 		     (nd->nd_flag & ND_AUTHNONE) != 0) {
 			nd->nd_cred->cr_uid = credanon->cr_uid;
-			nd->nd_cred->cr_gid = credanon->cr_gid;
+			/*
+			 * 'credanon' is already a 'struct ucred' that was built
+			 * internally with calls to crsetgroups_fallback(), so
+			 * we don't need a fallback here.
+			 */
 			crsetgroups(nd->nd_cred, credanon->cr_ngroups,
 			    credanon->cr_groups);
 		} else if ((nd->nd_flag & ND_GSS) == 0) {
@@ -3398,6 +3614,15 @@ nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
 	    &credanon);
 	vfs_unbusy(mp);
 
+	if (nd->nd_repstat == 0 &&
+	    nfp->nfsrvfh_len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH &&
+	    nfp->nfsrvfh_len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) {
+		if (nfp->nfsrvfh_len == NFSX_MYFH + NFSX_V4NAMEDDIRFH)
+			vn_irflag_set_cond(*vpp, VIRF_NAMEDDIR);
+		else
+			vn_irflag_set_cond(*vpp, VIRF_NAMEDATTR);
+	}
+
 	/*
 	 * For NFSv4 without a pseudo root fs, unexported file handles
 	 * can be returned, so that Lookup works everywhere.
@@ -5464,7 +5689,7 @@ nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
-		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
@@ -5495,7 +5720,7 @@ nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
 	if (error == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
-		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
 	}
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
 nfsmout:
@@ -5661,7 +5886,7 @@ nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
-		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
 	} else
 		error = nd->nd_repstat;
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
@@ -5828,7 +6053,7 @@ nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
-		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
@@ -5842,7 +6067,7 @@ nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
-		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
 	} else
 		error = nd->nd_repstat;
 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
@@ -5990,7 +6215,7 @@ nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
-		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
@@ -6014,7 +6239,8 @@ nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 	if (error == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
-		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL,
+		    NULL);
 	}
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
 nfsmout:
@@ -6159,7 +6385,7 @@ nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 	 * the same type (VREG).
 	 */
 	nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
-	    NULL, 0, 0, 0, 0, 0, NULL);
+	    NULL, 0, 0, 0, 0, 0, NULL, false, false, false);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
@@ -6303,7 +6529,7 @@ nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 	if (nd->nd_repstat == 0) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 		    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
-		    NULL, NULL);
+		    NULL, NULL, NULL);
 		/*
 		 * We can only save the updated values in the extended
 		 * attribute if the vp is exclusively locked.
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index 0c8bda6dc6a6..9eebcda548c6 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -64,6 +64,7 @@ extern u_long sb_max_adj;
 extern int nfsrv_pnfsatime;
 extern int nfsrv_maxpnfsmirror;
 extern uint32_t nfs_srvmaxio;
+extern int nfsrv_issuedelegs;
 
 static int	nfs_async = 0;
 SYSCTL_DECL(_vfs_nfsd);
@@ -240,7 +241,7 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
 {
 	struct nfsvattr nva;
 	fhandle_t fh;
-	int at_root = 0, error = 0, supports_nfsv4acls;
+	int at_root = 0, error = 0, ret, supports_nfsv4acls;
 	struct nfsreferral *refp;
 	nfsattrbit_t attrbits, tmpbits;
 	struct mount *mp;
@@ -249,6 +250,9 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
 	uint64_t mounted_on_fileno = 0;
 	accmode_t accmode;
 	struct thread *p = curthread;
+	size_t atsiz;
+	long pathval;
+	bool has_hiddensystem, has_namedattr, xattrsupp;
 
 	if (nd->nd_repstat)
 		goto out;
@@ -306,6 +310,26 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
 				    &nva, &attrbits, p);
 			if (nd->nd_repstat == 0) {
 				supports_nfsv4acls = nfs_supportsnfsv4acls(vp);
+				xattrsupp = false;
+				if (NFSISSET_ATTRBIT(&attrbits,
+				    NFSATTRBIT_XATTRSUPPORT)) {
+					ret = VOP_GETEXTATTR(vp,
+					    EXTATTR_NAMESPACE_USER,
+					    "xxx", NULL, &atsiz, nd->nd_cred,
+					    p);
+					xattrsupp = ret != EOPNOTSUPP;
+				}
+				if (VOP_PATHCONF(vp, _PC_HAS_HIDDENSYSTEM,
+				    &pathval) != 0)
+					pathval = 0;
+				has_hiddensystem = pathval > 0;
+				pathval = 0;
+				if (NFSISSET_ATTRBIT(&attrbits,
+				    NFSATTRBIT_NAMEDATTR) &&
+				    VOP_PATHCONF(vp, _PC_HAS_NAMEDATTR,
+				    &pathval) != 0)
+					pathval = 0;
+				has_namedattr = pathval > 0;
 				mp = vp->v_mount;
 				if (nfsrv_enable_crossmntpt != 0 &&
 				    vp->v_type == VDIR &&
@@ -339,7 +363,9 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
 					(void)nfsvno_fillattr(nd, mp, vp, &nva,
 					    &fh, 0, &attrbits, nd->nd_cred, p,
 					    isdgram, 1, supports_nfsv4acls,
-					    at_root, mounted_on_fileno);
+					    at_root, mounted_on_fileno,
+					    xattrsupp, has_hiddensystem,
+					    has_namedattr);
 					vfs_unbusy(mp);
 				}
 				vrele(vp);
@@ -375,6 +401,7 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
 	NFSACL_T *aclp = NULL;
 	struct thread *p = curthread;
 
+	NFSZERO_ATTRBIT(&retbits);
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 		goto out;
@@ -401,9 +428,10 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
 	if (error)
 		goto nfsmout;
 
-	/* For NFSv4, only va_uid is used from nva2. */
-	NFSZERO_ATTRBIT(&retbits);
+	/* For NFSv4, only va_uid and va_flags is used from nva2. */
 	NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER);
+	NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_HIDDEN);
+	NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SYSTEM);
 	preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = preat_ret;
@@ -462,6 +490,9 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
 		    &nva, &attrbits, exp, p);
 
 	if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) {
+	    u_long oldflags;
+
+	    oldflags = nva2.na_flags;
 	    /*
 	     * For V4, try setting the attributes in sets, so that the
 	     * reply bitmap will be correct for an error case.
@@ -531,6 +562,32 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODESETMASKED);
 		}
 	    }
+	    if (!nd->nd_repstat &&
+		(NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN) ||
+		 NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM))) {
+		if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN)) {
+		    if ((nva.na_flags & UF_HIDDEN) != 0)
+			oldflags |= UF_HIDDEN;
+		    else
+			oldflags &= ~UF_HIDDEN;
+		}
+		if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM)) {
+		    if ((nva.na_flags & UF_SYSTEM) != 0)
+			oldflags |= UF_SYSTEM;
+		    else
+			oldflags &= ~UF_SYSTEM;
+		}
+		NFSVNO_ATTRINIT(&nva2);
+		NFSVNO_SETATTRVAL(&nva2, flags, oldflags);
+		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
+		    exp);
+		if (!nd->nd_repstat) {
+		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN))
+			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_HIDDEN);
+		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM))
+			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SYSTEM);
+		}
+	    }
 
 #ifdef NFS4_ACL_EXTATTR_NAME
 	    if (!nd->nd_repstat && aclp->acl_cnt > 0 &&
@@ -595,6 +652,8 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
+	struct componentname *cnp;
+	short irflag;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, dattr_ret, &dattr);
@@ -611,8 +670,12 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
 		goto out;
 	}
 
-	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
-	    LOCKLEAF);
+	cnp = &named.ni_cnd;
+	irflag = vn_irflag_read(dp);
+	if ((irflag & VIRF_NAMEDDIR) != 0)
+		NFSNAMEICNDSET(cnp, nd->nd_cred, LOOKUP, LOCKLEAF | OPENNAMED);
+	else
+		NFSNAMEICNDSET(cnp, nd->nd_cred, LOOKUP, LOCKLEAF);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
@@ -621,6 +684,10 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
 		goto out;
 	}
 	if (!nd->nd_repstat) {
+		/* Don't set OPENNAMED for Lookupp (".."). */
+		if (cnp->cn_namelen == 2 && *cnp->cn_pnbuf == '.' &&
+		    *(cnp->cn_pnbuf + 1) == '.')
+			cnp->cn_flags &= ~OPENNAMED;
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, &dirp);
 	} else {
 		vrele(dp);
@@ -1348,6 +1415,18 @@ nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram,
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		vtyp = nfsv34tov_type(*tl);
 		nfs4type = fxdr_unsigned(nfstype, *tl);
+		if ((vn_irflag_read(dp) & VIRF_NAMEDDIR) != 0) {
+			/*
+			 * Don't allow creation of non-regular file objects
+			 * in a named attribute directory.
+			 */
+			nd->nd_repstat = NFSERR_INVAL;
+			vrele(dp);
+#ifdef NFS4_ACL_EXTATTR_NAME
+			acl_free(aclp);
+#endif
+			goto out;
+		}
 		switch (nfs4type) {
 		case NFLNK:
 			error = nfsvno_getsymlink(nd, &nva, p, &pathcp,
@@ -1577,14 +1656,14 @@ nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram,
 				nd->nd_repstat = nfsvno_rmdirsub(&named, 1,
 				    nd->nd_cred, p, exp);
 			else
-				nd->nd_repstat = nfsvno_removesub(&named, 1,
-				    nd->nd_cred, p, exp);
+				nd->nd_repstat = nfsvno_removesub(&named, true,
+				    nd, p, exp);
 		} else if (nd->nd_procnum == NFSPROC_RMDIR) {
 			nd->nd_repstat = nfsvno_rmdirsub(&named, 0,
 			    nd->nd_cred, p, exp);
 		} else {
-			nd->nd_repstat = nfsvno_removesub(&named, 0,
-			    nd->nd_cred, p, exp);
+			nd->nd_repstat = nfsvno_removesub(&named, false, nd, p,
+			    exp);
 		}
 	}
 	if (!(nd->nd_flag & ND_NFSV2)) {
@@ -1680,8 +1759,7 @@ nfsrvd_rename(struct nfsrv_descript *nd, int isdgram,
 		}
 
 		/* If this is the same file handle, just VREF() the vnode. */
-		if (tfh.nfsrvfh_len == NFSX_MYFH &&
-		    !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) {
+		if (!NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) {
 			VREF(dp);
 			tdp = dp;
 			tnes = *exp;
@@ -1749,8 +1827,7 @@ nfsrvd_rename(struct nfsrv_descript *nd, int isdgram,
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, &tdirp);
-	nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat,
-	    nd->nd_flag, nd->nd_cred, p);
+	nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd, p);
 	if (fdirp)
 		fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL);
 	if (tdirp)
@@ -1804,8 +1881,15 @@ nfsrvd_link(struct nfsrv_descript *nd, int isdgram,
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
+	if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0 ||
+	    (tovp != NULL &&
+	     (vn_irflag_read(tovp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0)) {
+		nd->nd_repstat = NFSERR_INVAL;
+		if (tovp != NULL)
+			vrele(tovp);
+	}
 	NFSVOPUNLOCK(vp);
-	if (vp->v_type == VDIR) {
+	if (!nd->nd_repstat && vp->v_type == VDIR) {
 		if (nd->nd_flag & ND_NFSV4)
 			nd->nd_repstat = NFSERR_ISDIR;
 		else
@@ -2829,7 +2913,7 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 	int how = NFSCREATE_UNCHECKED;
 	int32_t cverf[2], tverf[2] = { 0, 0 };
 	vnode_t vp = NULL, dirp = NULL;
-	struct nfsvattr nva, dirfor, diraft;
+	struct nfsvattr nva, dirfor, diraft, nva2;
 	struct nameidata named;
 	nfsv4stateid_t stateid, delegstateid;
 	nfsattrbit_t attrbits;
@@ -2839,6 +2923,8 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 	NFSACL_T *aclp = NULL;
 	struct thread *p = curthread;
 	bool done_namei;
+	__enum_uint8_decl(wdelegace) { USENONE, USEMODE, USENFSV4ACL }
+	    delegace;
 
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
@@ -2846,6 +2932,7 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 #endif
 	NFSZERO_ATTRBIT(&attrbits);
 	done_namei = false;
+	delegace = USEMODE;
 	named.ni_cnd.cn_nameiop = 0;
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *(tl + 5));
@@ -2971,6 +3058,8 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 			cverf[0] = *tl++;
 			cverf[1] = *tl;
+			if ((vn_irflag_read(dp) & VIRF_NAMEDDIR) != 0)
+				nd->nd_repstat = NFSERR_INVAL;
 			break;
 		case NFSCREATE_EXCLUSIVE41:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
@@ -2979,7 +3068,8 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 			error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p);
 			if (error != 0)
 				goto nfsmout;
-			if (NFSISSET_ATTRBIT(&attrbits,
+			if ((vn_irflag_read(dp) & VIRF_NAMEDDIR) != 0 ||
+			    NFSISSET_ATTRBIT(&attrbits,
 			    NFSATTRBIT_TIMEACCESSSET))
 				nd->nd_repstat = NFSERR_INVAL;
 			/*
@@ -3076,11 +3166,23 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 			}
 			break;
 		    case NFSCREATE_EXCLUSIVE:
-			exclusive_flag = 1;
 			if (nd->nd_repstat == 0 && named.ni_vp == NULL)
 				nva.na_mode = 0;
-			break;
+			/* FALLTHROUGH */
 		    case NFSCREATE_EXCLUSIVE41:
+			if (nd->nd_repstat == 0 && named.ni_vp != NULL) {
+				nd->nd_repstat = nfsvno_getattr(named.ni_vp,
+				    &nva2, nd, p, 1, NULL);
+				if (nd->nd_repstat == 0) {
+					tverf[0] = nva2.na_atime.tv_sec;
+					tverf[1] = nva2.na_atime.tv_nsec;
+					if (cverf[0] != tverf[0] ||
+					     cverf[1] != tverf[1])
+						nd->nd_repstat = EEXIST;
+				}
+				if (nd->nd_repstat != 0)
+					done_namei = true;
+			}
 			exclusive_flag = 1;
 			break;
 		    }
@@ -3170,16 +3272,27 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	}
 
-	if (!nd->nd_repstat) {
+	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
-		if (!nd->nd_repstat) {
-			tverf[0] = nva.na_atime.tv_sec;
-			tverf[1] = nva.na_atime.tv_nsec;
+
+	if (nd->nd_repstat == 0 && aclp != NULL && nfsrv_issuedelegs != 0 &&
+	    (dp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) {
+		if (aclp->acl_cnt == 0 && create == NFSV4OPEN_NOCREATE) {
+			int retacl;
+
+			/* We do not yet have an ACL, so try and get one. */
+			retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp,
+			    nd->nd_cred, p);
+			if (retacl != 0 && retacl != ENOATTR &&
+			    retacl != EOPNOTSUPP && retacl != EINVAL)
+				delegace = USENONE;
+			else if (retacl == 0 && aclp->acl_cnt > 0)
+				delegace = USENFSV4ACL;
+		} else if (aclp->acl_cnt > 0 && create == NFSV4OPEN_CREATE) {
+			delegace = USENFSV4ACL;
 		}
 	}
-	if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] ||
-	    cverf[1] != tverf[1]))
-		nd->nd_repstat = EEXIST;
+
 	/*
 	 * Do the open locking/delegation stuff.
 	 */
@@ -3244,6 +3357,13 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE);
 				*tl = newnfs_false;
+			} else if ((rflags &
+			    NFSV4OPEN_WDNOTSUPPDOWNGRADE) != 0) {
+				NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+				*tl = txdr_unsigned(NFSV4OPEN_NOTSUPPDOWNGRADE);
+			} else if ((rflags & NFSV4OPEN_WDNOTSUPPUPGRADE) != 0) {
+				NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+				*tl = txdr_unsigned(NFSV4OPEN_NOTSUPPUPGRADE);
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OPEN_NOTWANTED);
@@ -3265,18 +3385,56 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
 				*tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE);
 				txdr_hyper(nva.na_size, tl);
 			}
-			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
-			*tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE);
-			*tl++ = txdr_unsigned(0x0);
-			acemask = NFSV4ACE_ALLFILESMASK;
-			if (nva.na_mode & S_IRUSR)
-			    acemask |= NFSV4ACE_READMASK;
-			if (nva.na_mode & S_IWUSR)
-			    acemask |= NFSV4ACE_WRITEMASK;
-			if (nva.na_mode & S_IXUSR)
-			    acemask |= NFSV4ACE_EXECUTEMASK;
-			*tl = txdr_unsigned(acemask);
-			(void) nfsm_strtom(nd, "OWNER@", 6);
+
+			/* Set up the write delegation ACE. */
+			NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
+			if (delegace == USENFSV4ACL) {
+				int j;
+
+				for (j = 0; j < aclp->acl_cnt; j++) {
+					if (aclp->acl_entry[j].ae_tag ==
+					    ACL_USER_OBJ ||
+					    aclp->acl_entry[j].ae_entry_type !=
+					    ACL_ENTRY_TYPE_ALLOW)
+						break;
+				}
+				if (j < aclp->acl_cnt &&
+				    aclp->acl_entry[j].ae_tag ==
+				    ACL_USER_OBJ &&
+				    aclp->acl_entry[j].ae_entry_type ==
+				    ACL_ENTRY_TYPE_ALLOW) {
+					/* Use this ACE. */
+					*tl++ = txdr_unsigned(
+					    NFSV4ACE_ALLOWEDTYPE);
+					*tl++ = txdr_unsigned(0x0);
+					*tl = txdr_unsigned(
+					    nfs_aceperm(
+					    aclp->acl_entry[j].ae_perm));
+					(void)nfsm_strtom(nd, "OWNER@", 6);
+				} else
+					delegace = USENONE;
+			}
+			if (delegace == USENONE) {
+				/* Don't allow anything. */
+				*tl++ = 0x0;
+				*tl++ = 0x0;
+				*tl = 0x0;
+				NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+				*tl = 0;
+			} else if (delegace == USEMODE) {
+				/* Build from mode. */
+				*tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE);
+				*tl++ = txdr_unsigned(0x0);
+				acemask = NFSV4ACE_ALLFILESMASK;
+				if (nva.na_mode & S_IRUSR)
+					acemask |= NFSV4ACE_READMASK;
+				if (nva.na_mode & S_IWUSR)
+					acemask |= NFSV4ACE_WRITEMASK;
+				if (nva.na_mode & S_IXUSR)
+					acemask |= NFSV4ACE_EXECUTEMASK;
+				*tl = txdr_unsigned(acemask);
+				(void)nfsm_strtom(nd, "OWNER@", 6);
+			}
 		}
 		*vpp = vp;
 	} else if (vp) {
@@ -3466,11 +3624,20 @@ nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram,
 {
 	fhandle_t fh;
 	struct thread *p = curthread;
+	int siz;
+	short irflag;
 
 	nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
+	irflag = vn_irflag_read(vp);
 	vput(vp);
-	if (!nd->nd_repstat)
-		(void)nfsm_fhtom(NULL, nd, (u_int8_t *)&fh, 0, 0);
+	if (nd->nd_repstat == 0) {
+		siz = 0;
+		if ((irflag & VIRF_NAMEDDIR) != 0)
+			siz = NFSX_FHMAX + NFSX_V4NAMEDDIRFH;
+		else if ((irflag & VIRF_NAMEDATTR) != 0)
+			siz = NFSX_FHMAX + NFSX_V4NAMEDATTRFH;
+		(void)nfsm_fhtom(NULL, nd, (u_int8_t *)&fh, siz, 0);
+	}
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
@@ -4180,7 +4347,8 @@ nfsrvd_verify(struct nfsrv_descript *nd, int isdgram,
 	if (!nd->nd_repstat) {
 		nfsvno_getfs(&fs, isdgram);
 		error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL,
-		    sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred);
+		    sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, NULL, p,
+		    nd->nd_cred);
 		if (!error) {
 			if (nd->nd_procnum == NFSV4OP_NVERIFY) {
 				if (ret == 0)
@@ -4202,15 +4370,42 @@ nfsrvd_verify(struct nfsrv_descript *nd, int isdgram,
  */
 int
 nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram,
-    vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp,
+    struct vnode *dp, struct vnode **vpp, __unused fhandle_t *fhp,
     __unused struct nfsexstuff *exp)
 {
-	u_int32_t *tl;
-	int error = 0, createdir __unused;
+	uint32_t *tl;
+	struct componentname cn;
+	int error = 0;
 
-	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-	createdir = fxdr_unsigned(int, *tl);
-	nd->nd_repstat = NFSERR_NOTSUPP;
+	NFSNAMEICNDSET(&cn, nd->nd_cred, LOOKUP, OPENNAMED | ISLASTCN |
+	    NOFOLLOW | LOCKLEAF);
+	cn.cn_nameptr = ".";
+	cn.cn_namelen = 1;
+	cn.cn_lkflags = LK_SHARED;
+	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+	if (*tl == newnfs_true)
+		cn.cn_flags |= CREATENAMED;
+
+	nd->nd_repstat = vn_lock(dp, LK_SHARED);
+	if (nd->nd_repstat != 0)
+		goto nfsmout;
+
+	if ((dp->v_mount->mnt_flag & MNT_NAMEDATTR) == 0)
+		nd->nd_repstat = NFSERR_NOTSUPP;
+	if (nd->nd_repstat == 0 && (vn_irflag_read(dp) & (VIRF_NAMEDDIR |
+	    VIRF_NAMEDATTR)) != 0)
+		nd->nd_repstat = NFSERR_WRONGTYPE;
+	if (nd->nd_repstat == 0) {
+		nd->nd_repstat = VOP_LOOKUP(dp, vpp, &cn);
+		if (nd->nd_repstat == ENOATTR)
+			nd->nd_repstat = NFSERR_NOENT;
+	}
+	if (nd->nd_repstat == 0)
+		NFSVOPUNLOCK(*vpp);
+
+	vput(dp);
+	NFSEXITCODE2(0, nd);
+	return (0);
 nfsmout:
 	vrele(dp);
 	NFSEXITCODE2(error, nd);
diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c
index 1f50634405d0..d1b6198ba0e1 100644
--- a/sys/fs/nfsserver/nfs_nfsdsocket.c
+++ b/sys/fs/nfsserver/nfs_nfsdsocket.c
@@ -797,7 +797,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag,
 					!LIST_EMPTY(&clp->lc_deleg))
 					nfsrv_writestable(clp->lc_id,
 					    clp->lc_idlen, NFSNST_REVOKE, p);
-				    nfsrv_cleanclient(clp, p);
+				    nfsrv_cleanclient(clp, p, false, NULL);
 				    nfsrv_freedeleglist(&clp->lc_deleg);
 				    nfsrv_freedeleglist(&clp->lc_olddeleg);
 				    LIST_REMOVE(clp, lc_hash);
@@ -1422,13 +1422,11 @@ static struct ucred *
 nfsrv_createrootcred(void)
 {
 	struct ucred *cr;
-	gid_t grp;
 
 	cr = crget();
 	cr->cr_uid = cr->cr_ruid = cr->cr_svuid = UID_ROOT;
-	grp = GID_WHEEL;
-	crsetgroups(cr, 1, &grp);
-	cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0];
+	crsetgroups_fallback(cr, 0, NULL, GID_WHEEL);
+	cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
 	cr->cr_prison = curthread->td_ucred->cr_prison;
 	prison_hold(cr->cr_prison);
 #ifdef MAC
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index c73840277022..2e27817389dd 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -115,6 +115,11 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
     &nfsrv_flexlinuxhack, 0,
     "For Linux clients, hack around Flex File Layout bug");
 
+NFSD_VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false;
+SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, testing_disable_grace,
+    CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_disable_grace),
+    0, "Disable grace for testing");
+
 /*
  * Hash lists for nfs V4.
  */
@@ -139,7 +144,7 @@ static void nfsrv_dumpaclient(struct nfsclient *clp,
     struct nfsd_dumpclients *dumpp);
 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
     NFSPROC_T *p);
-static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
+static void nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
     NFSPROC_T *p);
 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
     NFSPROC_T *p);
@@ -204,7 +209,7 @@ static void nfsrv_locklf(struct nfslockfile *lfp);
 static void nfsrv_unlocklf(struct nfslockfile *lfp);
 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
 static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
-    uint8_t *sessionid);
+    uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp);
 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
     int dont_replycache, struct nfsdsession **sepp, int *slotposp);
 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
@@ -240,6 +245,50 @@ static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
 static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
     struct nfsclient *clp);
+static void nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
+    struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
+    u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
+    struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
+    nfsv4stateid_t *delegstateidp);
+static void nfsrv_clientlock(bool mlocked);
+static void nfsrv_clientunlock(bool mlocked);
+
+/*
+ * Lock the client structure, either with the mutex or the exclusive nfsd lock.
+ */
+static void
+nfsrv_clientlock(bool mlocked)
+{
+	int igotlock;
+
+	if (mlocked) {
+		NFSLOCKSTATE();
+	} else {
+		NFSLOCKV4ROOTMUTEX();
+		nfsv4_relref(&nfsv4rootfs_lock);
+		do {
+			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
+			    NFSV4ROOTLOCKMUTEXPTR, NULL);
+		} while (!igotlock);
+		NFSUNLOCKV4ROOTMUTEX();
+	}
+}
+
+/*
+ * Unlock the client structure.
+ */
+static void
+nfsrv_clientunlock(bool mlocked)
+{
+
+	if (mlocked) {
+		NFSUNLOCKSTATE();
+	} else {
+		NFSLOCKV4ROOTMUTEX();
+		nfsv4_unlock(&nfsv4rootfs_lock, 1);
+		NFSUNLOCKV4ROOTMUTEX();
+	}
+}
 
 /*
  * Scan the client list for a match and either return the current one,
@@ -261,7 +310,10 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 	struct sockaddr_in6 *sin6, *rin6;
 #endif
 	struct nfsdsession *sep, *nsep;
-	int zapit = 0, gotit, hasstate = 0, igotlock;
+	SVCXPRT *old_xprt;
+	struct nfssessionhead old_sess;
+	int zapit = 0, gotit, hasstate = 0;
+	bool mlocked;
 	static u_int64_t confirm_index = 0;
 
 	/*
@@ -289,14 +341,11 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 		 */
 		new_clp->lc_program = 0;
 
+	mlocked = true;
+	if (nfsrv_dolocallocks != 0)
+		mlocked = false;
 	/* Lock out other nfsd threads */
-	NFSLOCKV4ROOTMUTEX();
-	nfsv4_relref(&nfsv4rootfs_lock);
-	do {
-		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
-		    NFSV4ROOTLOCKMUTEXPTR, NULL);
-	} while (!igotlock);
-	NFSUNLOCKV4ROOTMUTEX();
+	nfsrv_clientlock(mlocked);
 
 	/*
 	 * Search for a match in the client list.
@@ -313,6 +362,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 	    if (gotit == 0)
 		i++;
 	}
+	old_xprt = NULL;
 	if (!gotit ||
 	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
 		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
@@ -320,9 +370,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
 			 * client is trying to update a confirmed clientid.
 			 */
-			NFSLOCKV4ROOTMUTEX();
-			nfsv4_unlock(&nfsv4rootfs_lock, 1);
-			NFSUNLOCKV4ROOTMUTEX();
+			nfsrv_clientunlock(mlocked);
 			confirmp->lval[1] = 0;
 			error = NFSERR_NOENT;
 			goto out;
@@ -332,7 +380,10 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 		 */
 		if (i != nfsrv_clienthashsize) {
 			LIST_REMOVE(clp, lc_hash);
-			nfsrv_cleanclient(clp, p);
+			if (mlocked)
+				nfsrv_cleanclient(clp, p, true, &old_xprt);
+			else
+				nfsrv_cleanclient(clp, p, false, NULL);
 			nfsrv_freedeleglist(&clp->lc_deleg);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
 			zapit = 1;
@@ -367,11 +418,12 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 		NFSD_VNET(nfsstatsv1_p)->srvclients++;
 		nfsrv_openpluslock++;
 		nfsrv_clients++;
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
-		if (zapit)
+		nfsrv_clientunlock(mlocked);
+		if (zapit != 0) {
+			if (old_xprt != NULL)
+				SVC_RELEASE(old_xprt);
 			nfsrv_zapclient(clp, p);
+		}
 		*new_clpp = NULL;
 		goto out;
 	}
@@ -385,7 +437,10 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 	     */
 	    if (clp->lc_expiry < NFSD_MONOSEC &&
 	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
-		nfsrv_cleanclient(clp, p);
+		if (mlocked)
+		    nfsrv_cleanclient(clp, p, true, &old_xprt);
+		else
+		    nfsrv_cleanclient(clp, p, false, NULL);
 		nfsrv_freedeleglist(&clp->lc_deleg);
 	    }
 
@@ -430,9 +485,9 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 			break;
 #endif
 		}
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
+		nfsrv_clientunlock(mlocked);
+		if (old_xprt != NULL)
+			SVC_RELEASE(old_xprt);
 		error = NFSERR_CLIDINUSE;
 		goto out;
 	    }
@@ -442,17 +497,12 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 		/*
 		 * If the verifier has changed, the client has rebooted
 		 * and a new client id is issued. The old state info
-		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
+		 * can be thrown away once the SetClientID_Confirm or
+		 * Create_Session that confirms the clientid occurs.
 		 */
 		LIST_REMOVE(clp, lc_hash);
 
-		/* Get rid of all sessions on this clientid. */
-		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
-			ret = nfsrv_freesession(NULL, sep, NULL);
-			if (ret != 0)
-				printf("nfsrv_setclient: verifier changed free"
-				    " session failed=%d\n", ret);
-		}
+		LIST_NEWHEAD(&old_sess, &clp->lc_session, sess_list);
 
 		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
@@ -496,21 +546,31 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 		NFSD_VNET(nfsstatsv1_p)->srvclients++;
 		nfsrv_openpluslock++;
 		nfsrv_clients++;
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
+		if (!mlocked) {
+			nfsrv_clientunlock(mlocked);
+			NFSLOCKSTATE();
+		}
 
 		/*
 		 * Must wait until any outstanding callback on the old clp
 		 * completes.
 		 */
-		NFSLOCKSTATE();
 		while (clp->lc_cbref) {
 			clp->lc_flags |= LCL_WAKEUPWANTED;
-			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
 			    "nfsd clp", 10 * hz);
 		}
 		NFSUNLOCKSTATE();
+		if (old_xprt != NULL)
+			SVC_RELEASE(old_xprt);
+		/* Get rid of all sessions on this clientid. */
+		LIST_FOREACH_SAFE(sep, &old_sess, sess_list, nsep) {
+			ret = nfsrv_freesession(NULL, sep, NULL, false, NULL);
+			if (ret != 0)
+				printf("nfsrv_setclient: verifier changed free"
+				    " session failed=%d\n", ret);
+		}
+
 		nfsrv_zapclient(clp, p);
 		*new_clpp = NULL;
 		goto out;
@@ -562,24 +622,31 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
 		nfsrv_openpluslock++;
 		nfsrv_clients++;
 	}
-	NFSLOCKV4ROOTMUTEX();
-	nfsv4_unlock(&nfsv4rootfs_lock, 1);
-	NFSUNLOCKV4ROOTMUTEX();
+	if (!mlocked)
+		nfsrv_clientunlock(mlocked);
 
 	if ((nd->nd_flag & ND_NFSV41) == 0) {
 		/*
 		 * Must wait until any outstanding callback on the old clp
 		 * completes.
 		 */
-		NFSLOCKSTATE();
+		if (!mlocked)
+			NFSLOCKSTATE();
 		while (clp->lc_cbref) {
 			clp->lc_flags |= LCL_WAKEUPWANTED;
-			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
 			    "nfsdclp", 10 * hz);
 		}
 		NFSUNLOCKSTATE();
+		if (old_xprt != NULL)
+			SVC_RELEASE(old_xprt);
 		nfsrv_zapclient(clp, p);
 		*new_clpp = NULL;
+	} else {
+		if (mlocked)
+			NFSUNLOCKSTATE();
+		if (old_xprt != NULL)
+			SVC_RELEASE(old_xprt);
 	}
 
 out:
@@ -599,11 +666,13 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 	struct nfsstate *stp;
 	int i;
 	struct nfsclienthashhead *hp;
-	int error = 0, igotlock, doneok;
+	int error = 0, doneok, igotlock;
 	struct nfssessionhash *shp;
 	struct nfsdsession *sep;
 	uint64_t sessid[2];
-	bool sess_replay;
+	CLIENT *client;
+	SVCXPRT *old_xprt;
+	bool mlocked, sess_replay;
 	static uint64_t next_sess = 0;
 
 	if (clpp)
@@ -620,13 +689,27 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 	 * already held. Otherwise, we need to get either that or,
 	 * for the case of Confirm, lock out the nfsd threads.
 	 */
+	client = NULL;
+	old_xprt = NULL;
+	mlocked = true;
+	if (nfsrv_dolocallocks != 0)
+		mlocked = false;
 	if (opflags & CLOPS_CONFIRM) {
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_relref(&nfsv4rootfs_lock);
-		do {
-			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
-			    NFSV4ROOTLOCKMUTEXPTR, NULL);
-		} while (!igotlock);
+		if (nsep != NULL &&
+		    (nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
+			client = (struct __rpc_client *)
+			    clnt_bck_create(nd->nd_xprt->xp_socket,
+			    cbprogram, NFSV4_CBVERS);
+		if (mlocked) {
+			nfsrv_clientlock(mlocked);
+		} else {
+			NFSLOCKV4ROOTMUTEX();
+			nfsv4_relref(&nfsv4rootfs_lock);
+			do {
+				igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1,
+				    NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
+			} while (!igotlock);
+		}
 		/*
 		 * Create a new sessionid here, since we need to do it where
 		 * there is a mutex held to serialize update of next_sess.
@@ -635,7 +718,8 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 			sessid[0] = ++next_sess;
 			sessid[1] = clientid.qval;
 		}
-		NFSUNLOCKV4ROOTMUTEX();
+		if (!mlocked)
+			NFSUNLOCKV4ROOTMUTEX();
 	} else if (opflags != CLOPS_RENEW) {
 		NFSLOCKSTATE();
 	}
@@ -672,9 +756,9 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 	}
 	if (error) {
 		if (opflags & CLOPS_CONFIRM) {
-			NFSLOCKV4ROOTMUTEX();
-			nfsv4_unlock(&nfsv4rootfs_lock, 1);
-			NFSUNLOCKV4ROOTMUTEX();
+			nfsrv_clientunlock(mlocked);
+			if (client != NULL)
+				CLNT_RELEASE(client);
 		} else if (opflags != CLOPS_RENEW) {
 			NFSUNLOCKSTATE();
 		}
@@ -719,7 +803,10 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 			 * for an Open with CLAIM_DELEGATE_PREV unless in
 			 * grace, but get rid of the rest of the state.
 			 */
-			nfsrv_cleanclient(clp, p);
+			if (mlocked)
+				nfsrv_cleanclient(clp, p, true, &old_xprt);
+			else
+				nfsrv_cleanclient(clp, p, false, NULL);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
 			if (nfsrv_checkgrace(nd, clp, 0)) {
 			    /* In grace, so just delete delegations */
@@ -743,10 +830,10 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 			/* Hold a reference on the xprt for a backchannel. */
 			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
 			    != 0 && !sess_replay) {
-			    if (clp->lc_req.nr_client == NULL)
-				clp->lc_req.nr_client = (struct __rpc_client *)
-				    clnt_bck_create(nd->nd_xprt->xp_socket,
-				    cbprogram, NFSV4_CBVERS);
+			    if (clp->lc_req.nr_client == NULL) {
+				clp->lc_req.nr_client = client;
+				client = NULL;
+			    }
 			    if (clp->lc_req.nr_client != NULL) {
 				SVC_ACQUIRE(nd->nd_xprt);
 				CLNT_ACQUIRE(clp->lc_req.nr_client);
@@ -763,13 +850,15 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 			    NFSX_V4SESSIONID);
 			if (!sess_replay) {
 			    shp = NFSSESSIONHASH(nsep->sess_sessionid);
-			    NFSLOCKSTATE();
+			    if (!mlocked)
+				NFSLOCKSTATE();
 			    NFSLOCKSESSION(shp);
 			    LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
 			    LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
 			    nsep->sess_clp = clp;
 			    NFSUNLOCKSESSION(shp);
-			    NFSUNLOCKSTATE();
+			    if (!mlocked)
+				NFSUNLOCKSTATE();
 			}
 		    }
 		}
@@ -803,9 +892,11 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
 		clp->lc_expiry = nfsrv_leaseexpiry();
 	}
 	if (opflags & CLOPS_CONFIRM) {
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
+		nfsrv_clientunlock(mlocked);
+		if (client != NULL)
+			CLNT_RELEASE(client);
+		if (old_xprt != NULL)
+			SVC_RELEASE(old_xprt);
 	} else if (opflags != CLOPS_RENEW) {
 		NFSUNLOCKSTATE();
 	}
@@ -825,21 +916,20 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsclienthashhead *hp;
-	int error = 0, i, igotlock;
+	SVCXPRT *old_xprt;
+	int error = 0, i;
+	bool mlocked;
 
 	if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) {
 		error = NFSERR_STALECLIENTID;
 		goto out;
 	}
 
+	mlocked = true;
+	if (nfsrv_dolocallocks != 0)
+		mlocked = false;
 	/* Lock out other nfsd threads */
-	NFSLOCKV4ROOTMUTEX();
-	nfsv4_relref(&nfsv4rootfs_lock);
-	do {
-		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
-		    NFSV4ROOTLOCKMUTEXPTR, NULL);
-	} while (igotlock == 0);
-	NFSUNLOCKV4ROOTMUTEX();
+	nfsrv_clientlock(mlocked);
 
 	hp = NFSCLIENTHASH(clientid);
 	LIST_FOREACH(clp, hp, lc_hash) {
@@ -847,9 +937,7 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
 			break;
 	}
 	if (clp == NULL) {
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
+		nfsrv_clientunlock(mlocked);
 		/* Just return ok, since it is gone. */
 		goto out;
 	}
@@ -857,9 +945,7 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
 	/* Check for the SP4_MACH_CRED case. */
 	error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
 	if (error != 0) {
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
+		nfsrv_clientunlock(mlocked);
 		goto out;
 	}
 
@@ -872,28 +958,28 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
 	/* Scan for state on the clientid. */
 	for (i = 0; i < nfsrv_statehashsize; i++)
 		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
-			NFSLOCKV4ROOTMUTEX();
-			nfsv4_unlock(&nfsv4rootfs_lock, 1);
-			NFSUNLOCKV4ROOTMUTEX();
+			nfsrv_clientunlock(mlocked);
 			error = NFSERR_CLIENTIDBUSY;
 			goto out;
 		}
 	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
-		NFSLOCKV4ROOTMUTEX();
-		nfsv4_unlock(&nfsv4rootfs_lock, 1);
-		NFSUNLOCKV4ROOTMUTEX();
+		nfsrv_clientunlock(mlocked);
 		error = NFSERR_CLIENTIDBUSY;
 		goto out;
 	}
 
 	/* Destroy the clientid and return ok. */
-	nfsrv_cleanclient(clp, p);
+	old_xprt = NULL;
+	if (mlocked)
+		nfsrv_cleanclient(clp, p, true, &old_xprt);
+	else
+		nfsrv_cleanclient(clp, p, false, NULL);
 	nfsrv_freedeleglist(&clp->lc_deleg);
 	nfsrv_freedeleglist(&clp->lc_olddeleg);
 	LIST_REMOVE(clp, lc_hash);
-	NFSLOCKV4ROOTMUTEX();
-	nfsv4_unlock(&nfsv4rootfs_lock, 1);
-	NFSUNLOCKV4ROOTMUTEX();
+	nfsrv_clientunlock(mlocked);
+	if (old_xprt != NULL)
+		SVC_RELEASE(old_xprt);
 	nfsrv_zapclient(clp, p);
 out:
 	NFSEXITCODE2(error, nd);
@@ -956,7 +1042,7 @@ nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
 	 */
 	clp->lc_flags &= ~LCL_CALLBACKSON;
 	clp->lc_flags |= LCL_ADMINREVOKED;
-	nfsrv_cleanclient(clp, p);
+	nfsrv_cleanclient(clp, p, false, NULL);
 	nfsrv_freedeleglist(&clp->lc_deleg);
 	nfsrv_freedeleglist(&clp->lc_olddeleg);
 	NFSLOCKV4ROOTMUTEX();
@@ -1376,16 +1462,22 @@ nfsrv_servertimer(void *arg __unused)
  * there are no other active nfsd threads.
  */
 void
-nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
+nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p, bool locked,
+    SVCXPRT **old_xprtp)
 {
 	struct nfsstate *stp, *nstp;
 	struct nfsdsession *sep, *nsep;
 
-	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
-		nfsrv_freeopenowner(stp, 1, p);
+	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
+		if (locked)
+			nfsrv_freeopenowner(stp, 0, p);
+		else
+			nfsrv_freeopenowner(stp, 1, p);
+	}
 	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
 		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
-			(void)nfsrv_freesession(NULL, sep, NULL);
+			(void)nfsrv_freesession(NULL, sep, NULL, locked,
+			    old_xprtp);
 }
 
 /*
@@ -1479,7 +1571,7 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
 	while (nstp != LIST_END(&stp->ls_open)) {
 		tstp = nstp;
 		nstp = LIST_NEXT(nstp, ls_list);
-		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
+		nfsrv_freeopen(tstp, NULL, cansleep, p);
 	}
 	if (stp->ls_op)
 		nfsrvd_derefcache(stp->ls_op);
@@ -1494,12 +1586,11 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
  * are no other opens on the file.
  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
  */
-static int
+static void
 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 {
 	struct nfsstate *nstp, *tstp;
 	struct nfslockfile *lfp;
-	int ret;
 
 	LIST_REMOVE(stp, ls_hash);
 	LIST_REMOVE(stp, ls_list);
@@ -1508,35 +1599,46 @@ nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 	lfp = stp->ls_lfp;
 	/*
 	 * Now, free all lockowners associated with this open.
+	 * Note that, if vp != NULL, nfsrv_freelockowner() will
+	 * not call nfsrv_freeallnfslocks(), so it needs to be called, below.
 	 */
 	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
 		nfsrv_freelockowner(tstp, vp, cansleep, p);
 
+	if (vp != NULL) {
+		KASSERT(cansleep != 0, ("nfsrv_freeopen: cansleep == 0"));
+		mtx_assert(NFSSTATEMUTEXPTR, MA_OWNED);
+		/*
+		 * Only called with vp != NULL for Close when
+		 * vfs.nfsd.enable_locallocks != 0.
+		 * Lock the lfp so that it will not go away and do the
+		 * nfsrv_freeallnfslocks() call that was not done by
+		 * nfsrv_freelockowner().
+		 */
+		nfsrv_locklf(lfp);
+		NFSUNLOCKSTATE();
+		NFSVOPUNLOCK(vp);
+		nfsrv_freeallnfslocks(stp, vp, cansleep, p);
+		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+		NFSLOCKSTATE();
+		nfsrv_unlocklf(lfp);
+	}
+
 	/*
 	 * The nfslockfile is freed here if there are no locks
 	 * associated with the open.
 	 * If there are locks associated with the open, the
 	 * nfslockfile structure can be freed via nfsrv_freelockowner().
-	 * Acquire the state mutex to avoid races with calls to
-	 * nfsrv_getlockfile().
 	 */
-	if (cansleep != 0)
-		NFSLOCKSTATE();
 	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
 	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
 	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 	    lfp->lf_usecount == 0 &&
-	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
+	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
 		nfsrv_freenfslockfile(lfp);
-		ret = 1;
-	} else
-		ret = 0;
-	if (cansleep != 0)
-		NFSUNLOCKSTATE();
 	free(stp, M_NFSDSTATE);
 	NFSD_VNET(nfsstatsv1_p)->srvopens--;
 	nfsrv_openpluslock--;
-	return (ret);
 }
 
 /*
@@ -1549,7 +1651,8 @@ nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
 
 	LIST_REMOVE(stp, ls_hash);
 	LIST_REMOVE(stp, ls_list);
-	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
+	if (vp == NULL)
+		nfsrv_freeallnfslocks(stp, vp, cansleep, p);
 	if (stp->ls_op)
 		nfsrvd_derefcache(stp->ls_op);
 	free(stp, M_NFSDSTATE);
@@ -2648,6 +2751,8 @@ tryagain:
 	 *    considered a conflict since the client with a read delegation
 	 *    could have done an Open with ReadAccess and WriteDeny
 	 *    locally and then not have checked for the WriteDeny.)
+	 *    The exception is a NFSv4.1/4.2 client that has requested
+	 *    an atomic upgrade to a write delegation.
 	 * Don't check for a Reclaim, since that will be dealt with
 	 * by nfsrv_openctrl().
 	 */
@@ -2657,9 +2762,10 @@ tryagain:
 	    while (stp != LIST_END(&lfp->lf_deleg)) {
 		nstp = LIST_NEXT(stp, ls_file);
 		if ((readonly && stp->ls_clp != clp &&
-		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
+		     (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
 		    (!readonly && (stp->ls_clp != clp ||
-		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
+		     ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
+		      (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
 			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 			if (ret) {
 			    /*
@@ -2944,6 +3050,8 @@ tryagain:
 	 *    considered a conflict since the client with a read delegation
 	 *    could have done an Open with ReadAccess and WriteDeny
 	 *    locally and then not have checked for the WriteDeny.)
+	 *    The exception is a NFSv4.1/4.2 client that has requested
+	 *    an atomic upgrade to a write delegation.
 	 */
 	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
 	    stp = LIST_FIRST(&lfp->lf_deleg);
@@ -2951,12 +3059,15 @@ tryagain:
 		nstp = LIST_NEXT(stp, ls_file);
 		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
 			writedeleg = 0;
-		else
+		else if (stp->ls_clp != clp ||
+		    (stp->ls_flags & NFSLCK_DELEGWRITE) != 0 ||
+		    (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)
 			delegate = 0;
 		if ((readonly && stp->ls_clp != clp &&
-		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
+		     (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
 		    (!readonly && (stp->ls_clp != clp ||
-		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
+		     ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
+		      (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
 		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 			delegate = 2;
 		    } else {
@@ -3204,47 +3315,9 @@ tryagain:
 		    /*
 		     * This is where we can choose to issue a delegation.
 		     */
-		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
-			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
-		    else if (nfsrv_issuedelegs == 0)
-			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
-		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
-			*rflagsp |= NFSV4OPEN_WDRESOURCE;
-		    else if (delegate == 0 || writedeleg == 0 ||
-			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
-			nfsrv_writedelegifpos == 0) ||
-			!NFSVNO_DELEGOK(vp) ||
-			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
-			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
-			 LCL_CALLBACKSON)
-			*rflagsp |= NFSV4OPEN_WDCONTENTION;
-		    else {
-			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
-			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
-			    = clp->lc_clientid.lval[0];
-			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
-			    = clp->lc_clientid.lval[1];
-			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
-			    = nfsrv_nextstateindex(clp);
-			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
-			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
-			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
-			new_deleg->ls_uid = new_stp->ls_uid;
-			new_deleg->ls_lfp = lfp;
-			new_deleg->ls_clp = clp;
-			new_deleg->ls_filerev = filerev;
-			new_deleg->ls_compref = nd->nd_compref;
-			new_deleg->ls_lastrecall = 0;
-			nfsrv_writedelegcnt++;
-			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
-			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
-			    new_deleg->ls_stateid), new_deleg, ls_hash);
-			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
-			new_deleg = NULL;
-			NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
-			nfsrv_openpluslock++;
-			nfsrv_delegatecnt++;
-		    }
+		    nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
+			readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
+			new_stp, lfp, rflagsp, delegstateidp);
 		} else {
 		    new_open->ls_stateid.seqid = 1;
 		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
@@ -3269,52 +3342,9 @@ tryagain:
 		    /*
 		     * This is where we can choose to issue a delegation.
 		     */
-		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
-			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
-		    else if (nfsrv_issuedelegs == 0)
-			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
-		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
-			*rflagsp |= NFSV4OPEN_WDRESOURCE;
-		    else if (delegate == 0 || (writedeleg == 0 &&
-			readonly == 0) || !NFSVNO_DELEGOK(vp) ||
-			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
-			 LCL_CALLBACKSON)
-			*rflagsp |= NFSV4OPEN_WDCONTENTION;
-		    else {
-			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
-			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
-			    = clp->lc_clientid.lval[0];
-			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
-			    = clp->lc_clientid.lval[1];
-			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
-			    = nfsrv_nextstateindex(clp);
-			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
-			    (nfsrv_writedelegifpos || !readonly) &&
-			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
-			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
-				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
-			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
-			    nfsrv_writedelegcnt++;
-			} else {
-			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
-				NFSLCK_READACCESS);
-			    *rflagsp |= NFSV4OPEN_READDELEGATE;
-			}
-			new_deleg->ls_uid = new_stp->ls_uid;
-			new_deleg->ls_lfp = lfp;
-			new_deleg->ls_clp = clp;
-			new_deleg->ls_filerev = filerev;
-			new_deleg->ls_compref = nd->nd_compref;
-			new_deleg->ls_lastrecall = 0;
-			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
-			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
-			    new_deleg->ls_stateid), new_deleg, ls_hash);
-			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
-			new_deleg = NULL;
-			NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
-			nfsrv_openpluslock++;
-			nfsrv_delegatecnt++;
-		    }
+		    nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
+			readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
+			new_stp, lfp, rflagsp, delegstateidp);
 		}
 	} else {
 		/*
@@ -3337,78 +3367,28 @@ tryagain:
 		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 			new_stp->ls_flags = 0;
 		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
-			/* NFSv4.1 never needs confirmation. */
-			new_stp->ls_flags = 0;
+		    /*
+		     * This is where we can choose to issue a delegation.
+		     */
+		    nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
+			readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
+			new_stp, lfp, rflagsp, delegstateidp);
+		    /* NFSv4.1 never needs confirmation. */
+		    new_stp->ls_flags = 0;
 
-			/*
-			 * This is where we can choose to issue a delegation.
-			 */
-			if (delegate && nfsrv_issuedelegs &&
-			    (writedeleg || readonly) &&
-			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
-			     LCL_CALLBACKSON &&
-			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
-			    NFSVNO_DELEGOK(vp) &&
-			    ((nd->nd_flag & ND_NFSV41) == 0 ||
-			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
-				new_deleg->ls_stateid.seqid =
-				    delegstateidp->seqid = 1;
-				new_deleg->ls_stateid.other[0] =
-				    delegstateidp->other[0]
-				    = clp->lc_clientid.lval[0];
-				new_deleg->ls_stateid.other[1] =
-				    delegstateidp->other[1]
-				    = clp->lc_clientid.lval[1];
-				new_deleg->ls_stateid.other[2] =
-				    delegstateidp->other[2]
-				    = nfsrv_nextstateindex(clp);
-				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
-				    (nfsrv_writedelegifpos || !readonly) &&
-				    ((nd->nd_flag & ND_NFSV41) == 0 ||
-				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
-				     0)) {
-					new_deleg->ls_flags =
-					    (NFSLCK_DELEGWRITE |
-					     NFSLCK_READACCESS |
-					     NFSLCK_WRITEACCESS);
-					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
-					nfsrv_writedelegcnt++;
-				} else {
-					new_deleg->ls_flags =
-					    (NFSLCK_DELEGREAD |
-					     NFSLCK_READACCESS);
-					*rflagsp |= NFSV4OPEN_READDELEGATE;
-				}
-				new_deleg->ls_uid = new_stp->ls_uid;
-				new_deleg->ls_lfp = lfp;
-				new_deleg->ls_clp = clp;
-				new_deleg->ls_filerev = filerev;
-				new_deleg->ls_compref = nd->nd_compref;
-				new_deleg->ls_lastrecall = 0;
-				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
-				    ls_file);
-				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
-				    new_deleg->ls_stateid), new_deleg, ls_hash);
-				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
-				    ls_list);
-				new_deleg = NULL;
-				NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
-				nfsrv_openpluslock++;
-				nfsrv_delegatecnt++;
-			}
-			/*
-			 * Since NFSv4.1 never does an OpenConfirm, the first
-			 * open state will be acquired here.
-			 */
-			if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
-				clp->lc_flags |= LCL_STAMPEDSTABLE;
-				len = clp->lc_idlen;
-				NFSBCOPY(clp->lc_id, clidp, len);
-				gotstate = 1;
-			}
+		    /*
+		     * Since NFSv4.1 never does an OpenConfirm, the first
+		     * open state will be acquired here.
+		     */
+		    if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
+			clp->lc_flags |= LCL_STAMPEDSTABLE;
+			len = clp->lc_idlen;
+			NFSBCOPY(clp->lc_id, clidp, len);
+			gotstate = 1;
+		    }
 		} else {
-			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
-			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
+		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
+		    new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
 		}
 		nfsrvd_refcache(new_stp->ls_op);
 		new_stp->ls_noopens = 0;
@@ -3467,7 +3447,6 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 {
 	struct nfsstate *stp;
 	struct nfsclient *clp;
-	struct nfslockfile *lfp;
 	u_int32_t bits;
 	int error = 0, gotstate = 0, len = 0;
 	u_char *clidp = NULL;
@@ -3562,9 +3541,7 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 			NFSBCOPY(clp->lc_id, clidp, len);
 			gotstate = 1;
 		}
-		NFSUNLOCKSTATE();
 	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
-		lfp = stp->ls_lfp;
 		if (retwriteaccessp != NULL) {
 			if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
 				*retwriteaccessp = 1;
@@ -3572,20 +3549,10 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 				*retwriteaccessp = 0;
 		}
 		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
-			/* Get the lf lock */
-			nfsrv_locklf(lfp);
-			NFSUNLOCKSTATE();
 			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
-			NFSVOPUNLOCK(vp);
-			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
-				NFSLOCKSTATE();
-				nfsrv_unlocklf(lfp);
-				NFSUNLOCKSTATE();
-			}
-			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+			nfsrv_freeopen(stp, vp, 1, p);
 		} else {
-			(void) nfsrv_freeopen(stp, NULL, 0, p);
-			NFSUNLOCKSTATE();
+			nfsrv_freeopen(stp, NULL, 0, p);
 		}
 	} else {
 		/*
@@ -3603,8 +3570,8 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 		if ((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stp->ls_stateid.seqid == 0)
 			stp->ls_stateid.seqid = 1;
-		NFSUNLOCKSTATE();
 	}
+	NFSUNLOCKSTATE();
 
 	/*
 	 * If the client just confirmed its first open, write a timestamp
@@ -4419,11 +4386,13 @@ nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
 		 * ReclaimComplete.  If so, grace can end now.
 		 */
 		notreclaimed = 0;
-		LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
-		    nst_list) {
-			if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
-				notreclaimed = 1;
-				break;
+		if (!NFSD_VNET(nfsd_disable_grace)) {
+			LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
+			    nst_list) {
+				if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
+					notreclaimed = 1;
+					break;
+				}
 			}
 		}
 		if (notreclaimed == 0)
@@ -4616,7 +4585,7 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 			if (procnum != NFSV4PROC_CBNULL)
 				nfsv4_freeslot(&sep->sess_cbsess, slotpos,
 				    true);
-			nfsrv_freesession(NULL, sep, NULL);
+			nfsrv_freesession(NULL, sep, NULL, false, NULL);
 		} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 			error = newnfs_connect(NULL, &clp->lc_req, cred,
 			    NULL, 1, dotls, &clp->lc_req.nr_client);
@@ -4665,7 +4634,7 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 				nfsv4_freeslot(&sep->sess_cbsess, slotpos,
 				    true);
 			}
-			nfsrv_freesession(NULL, sep, NULL);
+			nfsrv_freesession(NULL, sep, NULL, false, NULL);
 		} else
 			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
 			    NULL, NULL, cred, clp->lc_program,
@@ -4706,7 +4675,7 @@ errout:
 		} else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
-			    p, NULL);
+			    NULL, p, NULL);
 		m_freem(nd->nd_mrep);
 	}
 	NFSLOCKSTATE();
@@ -5179,6 +5148,11 @@ nfsrv_markreclaim(struct nfsclient *clp)
 	 * Now, just set the flag.
 	 */
 	sp->nst_flag |= NFSNST_RECLAIMED;
+
+	/*
+	 * Free up any old delegations.
+	 */
+	nfsrv_freedeleglist(&clp->lc_olddeleg);
 }
 
 /*
@@ -5263,7 +5237,7 @@ nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
 	 */
 	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 	nfsrv_backupstable();
-	nfsrv_cleanclient(clp, p);
+	nfsrv_cleanclient(clp, p, false, NULL);
 	nfsrv_freedeleglist(&clp->lc_deleg);
 	nfsrv_freedeleglist(&clp->lc_olddeleg);
 	LIST_REMOVE(clp, lc_hash);
@@ -5455,7 +5429,7 @@ nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
 	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 	nfsrv_backupstable();
 	if (clp->lc_expiry < NFSD_MONOSEC) {
-		nfsrv_cleanclient(clp, p);
+		nfsrv_cleanclient(clp, p, false, NULL);
 		nfsrv_freedeleglist(&clp->lc_deleg);
 		nfsrv_freedeleglist(&clp->lc_olddeleg);
 		LIST_REMOVE(clp, lc_hash);
@@ -6262,7 +6236,7 @@ nfsrv_throwawayallstate(NFSPROC_T *p)
 	for (i = 0; i < nfsrv_clienthashsize; i++) {
 		LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
 		    nclp) {
-			nfsrv_cleanclient(clp, p);
+			nfsrv_cleanclient(clp, p, false, NULL);
 			nfsrv_freedeleglist(&clp->lc_deleg);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
 			free(clp->lc_stateid, M_NFSDCLIENT);
@@ -6485,7 +6459,7 @@ nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
 	} while (igotlock == 0);
 	NFSUNLOCKV4ROOTMUTEX();
 
-	error = nfsrv_freesession(nd, NULL, sessionid);
+	error = nfsrv_freesession(nd, NULL, sessionid, false, NULL);
 	if (error == 0 && samesess != 0)
 		nd->nd_flag &= ~ND_HASSEQUENCE;
 
@@ -6581,12 +6555,13 @@ out:
  */
 static int
 nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
-    uint8_t *sessionid)
+    uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp)
 {
 	struct nfssessionhash *shp;
 	int i;
 
-	NFSLOCKSTATE();
+	if (!locked)
+		NFSLOCKSTATE();
 	if (sep == NULL) {
 		shp = NFSSESSIONHASH(sessionid);
 		NFSLOCKSESSION(shp);
@@ -6600,28 +6575,36 @@ nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
 		if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
 		    nd, sep->sess_clp) != 0) {
 			NFSUNLOCKSESSION(shp);
-			NFSUNLOCKSTATE();
+			if (!locked)
+				NFSUNLOCKSTATE();
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 
 		sep->sess_refcnt--;
 		if (sep->sess_refcnt > 0) {
 			NFSUNLOCKSESSION(shp);
-			NFSUNLOCKSTATE();
+			if (!locked)
+				NFSUNLOCKSTATE();
 			return (NFSERR_BACKCHANBUSY);
 		}
 		LIST_REMOVE(sep, sess_hash);
 		LIST_REMOVE(sep, sess_list);
 	}
 	NFSUNLOCKSESSION(shp);
-	NFSUNLOCKSTATE();
+	if (!locked)
+		NFSUNLOCKSTATE();
 	if (sep == NULL)
 		return (NFSERR_BADSESSION);
 	for (i = 0; i < NFSV4_SLOTS; i++)
 		if (sep->sess_slots[i].nfssl_reply != NULL)
 			m_freem(sep->sess_slots[i].nfssl_reply);
-	if (sep->sess_cbsess.nfsess_xprt != NULL)
-		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
+	if (!locked) {
+		if (sep->sess_cbsess.nfsess_xprt != NULL)
+			SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
+		if (old_xprtp != NULL)
+			*old_xprtp = NULL;
+	} else if (old_xprtp != NULL)
+		*old_xprtp = sep->sess_cbsess.nfsess_xprt;
 	free(sep, M_NFSDSESSION);
 	return (0);
 }
@@ -8943,3 +8926,112 @@ nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
 		return (0);
 	return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 }
+
+/*
+ * Issue a delegation and, optionally set rflagsp for why not.
+ */
+static void
+nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
+    struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
+    u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
+    struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
+    nfsv4stateid_t *delegstateidp)
+{
+	struct nfsstate *up_deleg, *new_deleg;
+
+	new_deleg = *new_delegp;
+	up_deleg = LIST_FIRST(&lfp->lf_deleg);
+	if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
+		*rflagsp |= NFSV4OPEN_WDNOTWANTED;
+	else if (nfsrv_issuedelegs == 0)
+		*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
+	else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
+		*rflagsp |= NFSV4OPEN_WDRESOURCE;
+	else if (delegate == 0 || !NFSVNO_DELEGOK(vp) ||
+	    (writedeleg == 0 && (readonly == 0 ||
+	    (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0)) ||
+	    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
+	     LCL_CALLBACKSON) {
+		/* Is this a downgrade attempt? */
+		if (up_deleg != NULL && up_deleg->ls_clp == clp &&
+		    (up_deleg->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
+		    (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0)
+			*rflagsp |= NFSV4OPEN_WDNOTSUPPDOWNGRADE;
+		else
+			*rflagsp |= NFSV4OPEN_WDCONTENTION;
+	} else if (up_deleg != NULL &&
+	    (up_deleg->ls_flags & NFSLCK_DELEGREAD) != 0 &&
+	    (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0) {
+		/* This is an atomic upgrade. */
+		up_deleg->ls_stateid.seqid++;
+		delegstateidp->seqid = up_deleg->ls_stateid.seqid;
+		delegstateidp->other[0] = up_deleg->ls_stateid.other[0];
+		delegstateidp->other[1] = up_deleg->ls_stateid.other[1];
+		delegstateidp->other[2] = up_deleg->ls_stateid.other[2];
+		up_deleg->ls_flags = (NFSLCK_DELEGWRITE |
+		    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
+		*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+		nfsrv_writedelegcnt++;
+	} else {
+		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
+		new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
+		    = clp->lc_clientid.lval[0];
+		new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
+		    = clp->lc_clientid.lval[1];
+		new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
+		    = nfsrv_nextstateindex(clp);
+		if (writedeleg && !rdonly &&
+		    (nfsrv_writedelegifpos || !readonly) &&
+		    (new_stp->ls_flags & (NFSLCK_WANTRDELEG |
+		     NFSLCK_WANTWDELEG)) != NFSLCK_WANTRDELEG) {
+			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
+			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
+			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+			nfsrv_writedelegcnt++;
+		} else {
+			new_deleg->ls_flags = (NFSLCK_DELEGREAD |
+			    NFSLCK_READACCESS);
+			*rflagsp |= NFSV4OPEN_READDELEGATE;
+		}
+		new_deleg->ls_uid = new_stp->ls_uid;
+		new_deleg->ls_lfp = lfp;
+		new_deleg->ls_clp = clp;
+		new_deleg->ls_filerev = filerev;
+		new_deleg->ls_compref = nd->nd_compref;
+		new_deleg->ls_lastrecall = 0;
+		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
+		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid),
+		    new_deleg, ls_hash);
+		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
+		*new_delegp = NULL;
+		NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
+		nfsrv_openpluslock++;
+		nfsrv_delegatecnt++;
+	}
+}
+
+/*
+ * Find and remove any delegations for the fh.
+ */
+void
+nfsrv_removedeleg(fhandle_t *fhp, struct nfsrv_descript *nd, NFSPROC_T *p)
+{
+	struct nfsclient *clp;
+	struct nfsstate *stp, *nstp;
+	struct nfslockfile *lfp;
+	int error;
+
+	NFSLOCKSTATE();
+	error = nfsrv_getclient(nd->nd_clientid, CLOPS_RENEW, &clp, NULL,
+	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
+	if (error == 0)
+		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, fhp, 0);
+	/*
+	 * Now we must free any delegations.
+	 */
+	if (error == 0) {
+		LIST_FOREACH_SAFE(stp, &lfp->lf_deleg, ls_file, nstp)
+			nfsrv_freedeleg(stp);
+	}
+	NFSUNLOCKSTATE();
+}
diff --git a/sys/fs/nfsserver/nfs_nfsdsubs.c b/sys/fs/nfsserver/nfs_nfsdsubs.c
index 0d7e4c73fe69..b09ec1b3a062 100644
--- a/sys/fs/nfsserver/nfs_nfsdsubs.c
+++ b/sys/fs/nfsserver/nfs_nfsdsubs.c
@@ -57,9 +57,6 @@ NFSD_VNET_DECLARE(int, nfs_rootfhset);
 NFSD_VNET_DECLARE(uid_t, nfsrv_defaultuid);
 NFSD_VNET_DECLARE(gid_t, nfsrv_defaultgid);
 
-NFSD_VNET_DEFINE(struct nfsdontlisthead, nfsrv_dontlisthead);
-
-
 char nfs_v2pubfh[NFSX_V2FH];
 struct nfsdontlisthead nfsrv_dontlisthead;
 struct nfslayouthead nfsrv_recalllisthead;
@@ -1476,8 +1473,9 @@ int
 nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp)
 {
 	u_int32_t *tl;
-	int error = 0, len, copylen;
+	int error = 0, len, copylen, namedlen;
 
+	namedlen = 0;
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		len = fxdr_unsigned(int, *tl);
@@ -1493,6 +1491,11 @@ nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp)
 			copylen = NFSX_MYFH;
 			len = NFSM_RNDUP(len);
 			nd->nd_flag |= ND_DSSERVER;
+		} else if (len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH &&
+		    len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) {
+			copylen = NFSX_MYFH;
+			namedlen = len;
+			len = NFSM_RNDUP(len);
 		} else if (len < NFSRV_MINFH || len > NFSRV_MAXFH) {
 			if (nd->nd_flag & ND_NFSV4) {
 			    if (len > 0 && len <= NFSX_V4FHMAX) {
@@ -1527,7 +1530,10 @@ nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp)
 		goto nfsmout;
 	}
 	NFSBCOPY(tl, (caddr_t)fhp->nfsrvfh_data, copylen);
-	fhp->nfsrvfh_len = copylen;
+	if (namedlen > 0)
+		fhp->nfsrvfh_len = namedlen;
+	else
+		fhp->nfsrvfh_len = copylen;
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
@@ -1623,7 +1629,7 @@ nfsrv_checkuidgid(struct nfsrv_descript *nd, struct nfsvattr *nvap)
 	if (nd->nd_cred->cr_uid == 0)
 		goto out;
 	if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid != nd->nd_cred->cr_uid) ||
-	    (NFSVNO_ISSETGID(nvap) && nvap->na_gid != nd->nd_cred->cr_gid &&
+	    (NFSVNO_ISSETGID(nvap) &&
 	    !groupmember(nvap->na_gid, nd->nd_cred)))
 		error = NFSERR_PERM;
 
@@ -1682,8 +1688,7 @@ nfsrv_fixattr(struct nfsrv_descript *nd, vnode_t vp,
 	}
 	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) &&
 	    NFSVNO_ISSETGID(nvap)) {
-		if (nvap->na_gid == nd->nd_cred->cr_gid ||
-		    groupmember(nvap->na_gid, nd->nd_cred)) {
+		if (groupmember(nvap->na_gid, nd->nd_cred)) {
 			nd->nd_cred->cr_uid = 0;
 			nva.na_gid = nvap->na_gid;
 			change++;
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 0356877eaf05..7dcc83880bb9 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -245,6 +245,10 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
 		vp->v_object = lowervp->v_object;
 		vn_irflag_set(vp, VIRF_PGREAD);
 	}
+	if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0)
+		vn_irflag_set(vp, VIRF_INOTIFY);
+	if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0)
+		vn_irflag_set(vp, VIRF_INOTIFY_PARENT);
 	if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
 		vp->v_vflag |= VV_ROOT;
 
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 7ab1fb6c1a25..4cddf24a5745 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -365,12 +365,7 @@ nullfs_statfs(struct mount *mp, struct statfs *sbp)
 		return (error);
 	}
 
-	/* now copy across the "interesting" information and fake the rest */
 	sbp->f_type = mstat->f_type;
-	sbp->f_flags &= MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID | MNT_UNION |
-	    MNT_NOSYMFOLLOW | MNT_AUTOMOUNTED | MNT_EXPORTED | MNT_IGNORE;
-	mstat->f_flags &= ~(MNT_ROOTFS | MNT_AUTOMOUNTED | MNT_EXPORTED);
-	sbp->f_flags |= mstat->f_flags;
 	sbp->f_bsize = mstat->f_bsize;
 	sbp->f_iosize = mstat->f_iosize;
 	sbp->f_blocks = mstat->f_blocks;
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 4747b1dd5b82..74c1a8f3acb6 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -190,6 +190,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
 	&null_bug_bypass, 0, "");
 
 /*
+ * Synchronize inotify flags with the lower vnode:
+ * - If the upper vnode has the flag set and the lower does not, then the lower
+ *   vnode is unwatched and the upper vnode does not need to go through
+ *   VOP_INOTIFY.
+ * - If the lower vnode is watched, then the upper vnode should go through
+ *   VOP_INOTIFY, so copy the flag up.
+ */
+static void
+null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag)
+{
+	if ((vn_irflag_read(vp) & flag) != 0) {
+		if (__predict_false((vn_irflag_read(lvp) & flag) == 0))
+			vn_irflag_unset(vp, flag);
+	} else if ((vn_irflag_read(lvp) & flag) != 0) {
+		if (__predict_false((vn_irflag_read(vp) & flag) == 0))
+			vn_irflag_set(vp, flag);
+	}
+}
+
+/*
  * This is the 10-Apr-92 bypass routine.
  *    This version has been optimized for speed, throwing away some
  * safety checks.  It should still always work, but it's not as
@@ -305,7 +325,10 @@ null_bypass(struct vop_generic_args *ap)
 			lvp = *(vps_p[i]);
 
 			/*
-			 * Get rid of the transient hold on lvp.
+			 * Get rid of the transient hold on lvp.  Copy inotify
+			 * flags up in case something is watching the lower
+			 * layer.
+			 *
 			 * If lowervp was unlocked during VOP
 			 * operation, nullfs upper vnode could have
 			 * been reclaimed, which changes its v_vnlock
@@ -314,6 +337,10 @@ null_bypass(struct vop_generic_args *ap)
 			 * upper (reclaimed) vnode.
 			 */
 			if (lvp != NULLVP) {
+				null_copy_inotify(old_vps[i], lvp,
+				    VIRF_INOTIFY);
+				null_copy_inotify(old_vps[i], lvp,
+				    VIRF_INOTIFY_PARENT);
 				if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE &&
 				    old_vps[i]->v_vnlock != lvp->v_vnlock) {
 					VOP_UNLOCK(lvp);
@@ -385,7 +412,7 @@ null_lookup(struct vop_lookup_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
-	int flags = cnp->cn_flags;
+	uint64_t flags = cnp->cn_flags;
 	struct vnode *vp, *ldvp, *lvp;
 	struct mount *mp;
 	int error;
@@ -403,17 +430,25 @@ null_lookup(struct vop_lookup_args *ap)
 
 	/*
 	 * Renames in the lower mounts might create an inconsistent
-	 * configuration where lower vnode is moved out of the
-	 * directory tree remounted by our null mount.  Do not try to
-	 * handle it fancy, just avoid VOP_LOOKUP() with DOTDOT name
-	 * which cannot be handled by VOP, at least passing over lower
-	 * root.
+	 * configuration where lower vnode is moved out of the directory tree
+	 * remounted by our null mount.
+	 *
+	 * Do not try to handle it fancy, just avoid VOP_LOOKUP() with DOTDOT
+	 * name which cannot be handled by the VOP.
 	 */
-	if ((ldvp->v_vflag & VV_ROOT) != 0 && (flags & ISDOTDOT) != 0) {
-		KASSERT((dvp->v_vflag & VV_ROOT) == 0,
-		    ("ldvp %p fl %#x dvp %p fl %#x flags %#x",
-		    ldvp, ldvp->v_vflag, dvp, dvp->v_vflag, flags));
-		return (ENOENT);
+	if ((flags & ISDOTDOT) != 0) {
+		struct nameidata *ndp;
+
+		if ((ldvp->v_vflag & VV_ROOT) != 0) {
+			KASSERT((dvp->v_vflag & VV_ROOT) == 0,
+			    ("ldvp %p fl %#x dvp %p fl %#x flags %#jx",
+			    ldvp, ldvp->v_vflag, dvp, dvp->v_vflag,
+			    (uintmax_t)flags));
+			return (ENOENT);
+		}
+		ndp = vfs_lookup_nameidata(cnp);
+		if (ndp != NULL && vfs_lookup_isroot(ndp, ldvp))
+			return (ENOENT);
 	}
 
 	/*
@@ -528,7 +563,7 @@ null_setattr(struct vop_setattr_args *ap)
 		}
 	}
 
-	return (null_bypass((struct vop_generic_args *)ap));
+	return (null_bypass(&ap->a_gen));
 }
 
 /*
@@ -539,7 +574,7 @@ null_stat(struct vop_stat_args *ap)
 {
 	int error;
 
-	if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
+	if ((error = null_bypass(&ap->a_gen)) != 0)
 		return (error);
 
 	ap->a_sb->st_dev = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
@@ -551,7 +586,7 @@ null_getattr(struct vop_getattr_args *ap)
 {
 	int error;
 
-	if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
+	if ((error = null_bypass(&ap->a_gen)) != 0)
 		return (error);
 
 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
@@ -584,7 +619,7 @@ null_access(struct vop_access_args *ap)
 			break;
 		}
 	}
-	return (null_bypass((struct vop_generic_args *)ap));
+	return (null_bypass(&ap->a_gen));
 }
 
 static int
@@ -610,7 +645,7 @@ null_accessx(struct vop_accessx_args *ap)
 			break;
 		}
 	}
-	return (null_bypass((struct vop_generic_args *)ap));
+	return (null_bypass(&ap->a_gen));
 }
 
 /*
diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c
new file mode 100644
index 000000000000..547de98c4c03
--- /dev/null
+++ b/sys/fs/p9fs/p9_client.c
@@ -0,0 +1,1332 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains 9P client functions which prepares message to be sent to
+ * the server. Every fileop typically has a function defined here to interact
+ * with the host.
+ */
+
+#include <vm/uma.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_transport.h>
+
+#define QEMU_HEADER 7
+#define P9FS_MAX_FID_CNT (1024 * 1024 * 1024)
+#define P9FS_ROOT_FID_NO 2
+#define P9FS_MIN_TAG 1
+#define P9FS_MAX_TAG 65535
+#define WSTAT_SIZE 47
+#define WSTAT_EXTENSION_SIZE 14
+
+static MALLOC_DEFINE(M_P9CLNT, "p9_client", "p9fs client structure");
+static uma_zone_t p9fs_fid_zone;
+static uma_zone_t p9fs_req_zone;
+static uma_zone_t p9fs_buf_zone;
+
+SYSCTL_DECL(_vfs_p9fs);
+int p9_debug_level = 0;
+SYSCTL_INT(_vfs_p9fs, OID_AUTO, debug_level, CTLFLAG_RW,
+    &p9_debug_level, 0, "p9fs debug logging");
+
+static struct p9_req_t *p9_get_request(struct p9_client *c, int *error);
+static struct p9_req_t *p9_client_request(
+    struct p9_client *c, int8_t type, int *error, const char *fmt, ...);
+
+inline int
+p9_is_proto_dotl(struct p9_client *clnt)
+{
+
+	return (clnt->proto_version == p9_proto_2000L);
+}
+
+inline int
+p9_is_proto_dotu(struct p9_client *clnt)
+{
+
+	return (clnt->proto_version == p9_proto_2000u);
+}
+
+/* Parse mount options into client structure */
+static int
+p9_parse_opts(struct mount  *mp, struct p9_client *clnt)
+{
+	int error, len;
+	char *trans;
+
+	/*
+	 * Default to virtio since thats the only transport we have for now.
+	 */
+	error = vfs_getopt(mp->mnt_optnew, "trans", (void **)&trans, &len);
+	if (error == ENOENT)
+		trans = "virtio";
+
+	/* These are defaults for now */
+	clnt->proto_version = p9_proto_2000L;
+	clnt->msize = 8192;
+
+	/* Get the default trans callback */
+	clnt->ops = p9_get_trans_by_name(trans);
+
+	return (0);
+}
+
+/* Allocate buffer for sending request and getting responses */
+static struct p9_buffer *
+p9_buffer_alloc(int alloc_msize)
+{
+	struct p9_buffer *fc;
+
+	fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO);
+	fc->capacity = alloc_msize;
+	fc->offset = 0;
+	fc->size = 0;
+	fc->sdata = (char *)fc + sizeof(struct p9_buffer);
+
+	return (fc);
+}
+
+/* Free memory used by request and response buffers */
+static void
+p9_buffer_free(struct p9_buffer **buf)
+{
+
+	/* Free the sdata buffers first, then the whole structure*/
+	uma_zfree(p9fs_buf_zone, *buf);
+	*buf = NULL;
+}
+
+/* Free the request */
+static void
+p9_free_req(struct p9_client *clnt, struct p9_req_t *req)
+{
+
+	if (req->tc != NULL) {
+		if (req->tc->tag != P9_NOTAG)
+			p9_tag_destroy(clnt, req->tc->tag);
+		p9_buffer_free(&req->tc);
+	}
+
+	if (req->rc != NULL)
+		p9_buffer_free(&req->rc);
+
+	uma_zfree(p9fs_req_zone, req);
+}
+
+/* Allocate a request by tag */
+static struct p9_req_t *
+p9_get_request(struct p9_client *clnt, int *error)
+{
+	struct p9_req_t *req;
+	int alloc_msize;
+	uint16_t tag;
+
+	alloc_msize = P9FS_MTU;
+
+	req = uma_zalloc(p9fs_req_zone, M_WAITOK | M_ZERO);
+	req->tc = p9_buffer_alloc(alloc_msize);
+	req->rc = p9_buffer_alloc(alloc_msize);
+
+	tag = p9_tag_create(clnt);
+	if (tag == P9_NOTAG) {
+		*error = EAGAIN;
+		req->tc->tag = P9_NOTAG;
+		p9_free_req(clnt, req);
+		return (NULL);
+	}
+	req->tc->tag = tag;
+	return (req);
+}
+
+/* Parse header arguments of the response buffer */
+static int
+p9_parse_receive(struct p9_buffer *buf, struct p9_client *clnt)
+{
+	int8_t type;
+	int16_t tag;
+	int32_t size;
+	int error;
+
+	buf->offset = 0;
+
+	/* This value is set by QEMU for the header.*/
+	if (buf->size == 0)
+		buf->size = QEMU_HEADER;
+
+	/* This is the initial header. Parse size, type, and tag .*/
+	error = p9_buf_readf(buf, 0, "dbw", &size, &type, &tag);
+	if (error != 0)
+		goto out;
+
+	buf->size = size;
+	buf->id = type;
+	buf->tag = tag;
+	P9_DEBUG(TRANS, "%s: size=%d type: %d tag: %d\n",
+	    __func__, buf->size, buf->id, buf->tag);
+out:
+	return (error);
+}
+
+/* Check 9P response for any errors returned and process it */
+static int
+p9_client_check_return(struct p9_client *c, struct p9_req_t *req)
+{
+	int error;
+	int ecode;
+	char *ename;
+
+	/* Check what we have in the receive bufer .*/
+	error = p9_parse_receive(req->rc, c);
+	if (error != 0)
+		goto out;
+
+	/*
+	 * No error, We are done with the preprocessing. Return to the caller
+	 * and process the actual data.
+	 */
+	if (req->rc->id != P9PROTO_RERROR && req->rc->id != P9PROTO_RLERROR)
+		return (0);
+
+	/*
+	 * Interpreting the error is done in different ways for Linux and
+	 * Unix version. Make sure you interpret it right.
+	 */
+	if (req->rc->id == P9PROTO_RERROR) {
+	        error = p9_buf_readf(req->rc, c->proto_version, "s?d", &ename, &ecode);
+	} else if (req->rc->id == P9PROTO_RLERROR) {
+	        error = p9_buf_readf(req->rc, c->proto_version, "d", &ecode);
+	} else {
+		goto out;
+	}
+	if (error != 0)
+		goto out;
+
+	/* if there was an ecode error make this the err now */
+	error = ecode;
+
+	/*
+	 * Note this is still not completely an error, as lookups for files
+	 * not present can hit this and return. Hence it is made a debug print.
+	 */
+	if (error != 0) {
+	        if (req->rc->id == P9PROTO_RERROR) {
+		        P9_DEBUG(PROTO, "RERROR error %d ename %s\n",
+			    error, ename);
+	        } else if (req->rc->id == P9PROTO_RLERROR) {
+		        P9_DEBUG(PROTO, "RLERROR error %d\n", error);
+		}
+	}
+
+	if (req->rc->id == P9PROTO_RERROR) {
+	        free(ename, M_TEMP);
+	}
+	return (error);
+
+out:
+	P9_DEBUG(ERROR, "couldn't parse receive buffer error%d\n", error);
+	return (error);
+}
+
+/* State machine changing helpers */
+void p9_client_disconnect(struct p9_client *clnt)
+{
+
+	P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt);
+	clnt->trans_status = P9FS_DISCONNECT;
+}
+
+void p9_client_begin_disconnect(struct p9_client *clnt)
+{
+
+	P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt);
+	clnt->trans_status = P9FS_BEGIN_DISCONNECT;
+}
+
+static struct p9_req_t *
+p9_client_prepare_req(struct p9_client *c, int8_t type,
+    int req_size, int *error, const char *fmt, __va_list ap)
+{
+	struct p9_req_t *req;
+
+	P9_DEBUG(TRANS, "%s: client %p op %d\n", __func__, c, type);
+
+	/*
+	 * Before we start with the request, check if its possible to finish
+	 * this request. We are allowed to submit the request only if there
+	 * are no close sessions happening or else there can be race. If the
+	 * status is Disconnected, we stop any requests coming in after that.
+	 */
+	if (c->trans_status == P9FS_DISCONNECT) {
+		*error = EIO;
+		return (NULL);
+	}
+
+	/* Allow only cleanup clunk messages once teardown has started. */
+	if ((c->trans_status == P9FS_BEGIN_DISCONNECT) &&
+	    (type != P9PROTO_TCLUNK)) {
+		*error = EIO;
+		return (NULL);
+	}
+
+	/* Allocate buffer for transferring and receiving data from host */
+	req = p9_get_request(c, error);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: request allocation failed.\n", __func__);
+		return (NULL);
+	}
+
+	/* Marshall the data according to QEMU standards */
+	*error = p9_buf_prepare(req->tc, type);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_buf_prepare failed: %d\n",
+		    __func__, *error);
+		goto out;
+	}
+
+	*error = p9_buf_vwritef(req->tc, c->proto_version, fmt, ap);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_buf_vwrite failed: %d\n",
+		    __func__, *error);
+		goto out;
+	}
+
+	*error = p9_buf_finalize(c, req->tc);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_buf_finalize failed: %d \n",
+		    __func__, *error);
+		goto out;
+	}
+
+	return (req);
+out:
+	p9_free_req(c, req);
+	return (NULL);
+}
+
+/*
+ * Issue a request and wait for response. The routine takes care of preparing
+ * the 9P request header to be sent, parsing and checking for error conditions
+ * in the received buffer. It returns the request structure.
+ */
+static struct p9_req_t *
+p9_client_request(struct p9_client *c, int8_t type, int *error,
+    const char *fmt, ...)
+{
+	va_list ap;
+	struct p9_req_t *req;
+
+	va_start(ap, fmt);
+	req = p9_client_prepare_req(c, type, c->msize, error, fmt, ap);
+	va_end(ap);
+
+	/* Issue with allocation of request buffer */
+	if (*error != 0)
+		return (NULL);
+
+	/* Call into the transport for submission. */
+	*error = c->ops->request(c->handle, req);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, *error);
+		goto out;
+	}
+
+	/*
+	 * Before we return, pre process the header and the rc buffer before
+	 * calling into the protocol infra to analyze the data in rc.
+	 */
+	*error = p9_client_check_return(c, req);
+	if (*error != 0)
+		goto out;
+
+	return (req);
+out:
+	p9_free_req(c, req);
+	return (NULL);
+}
+
+/* Setup tag contents and structure  */
+uint16_t
+p9_tag_create(struct p9_client *clnt)
+{
+	int tag;
+
+	tag = alloc_unr(&clnt->tagpool);
+	P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag);
+
+	/* Alloc_unr returning -1 is an error for no units left */
+	if (tag == -1) {
+		return (P9_NOTAG);
+	}
+	return (tag);
+}
+
+/* Clean up tag structures */
+void
+p9_tag_destroy(struct p9_client *clnt, uint16_t tag)
+{
+
+	P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag);
+
+	/* Release to the pool */
+	free_unr(&clnt->tagpool, tag);
+}
+
+/* Allocate a new fid from the fidpool */
+struct p9_fid *
+p9_fid_create(struct p9_client *clnt)
+{
+	struct p9_fid *fid;
+
+
+	fid = uma_zalloc(p9fs_fid_zone, M_WAITOK | M_ZERO);
+	fid->fid = alloc_unr(&clnt->fidpool);
+	P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid);
+
+	/* Alloc_unr returning -1 is an error for no units left */
+	if (fid->fid == -1) {
+		uma_zfree(p9fs_fid_zone, fid);
+		return (NULL);
+	}
+	fid->mode = -1;
+	fid->uid = -1;
+	fid->clnt = clnt;
+
+	return (fid);
+}
+
+/* Free the fid by releasing it to fidpool */
+void
+p9_fid_destroy(struct p9_fid *fid)
+{
+	struct p9_client *clnt;
+
+	P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid);
+	clnt = fid->clnt;
+	/* Release to the pool */
+	free_unr(&clnt->fidpool, fid->fid);
+	uma_zfree(p9fs_fid_zone, fid);
+}
+
+/* Request the version of 9P protocol */
+int
+p9_client_version(struct p9_client *c)
+{
+	int error;
+	struct p9_req_t *req;
+	char *version;
+	int msize;
+
+	error = 0;
+
+	P9_DEBUG(PROTO, "TVERSION msize %d protocol %d\n",
+	    c->msize, c->proto_version);
+
+	switch (c->proto_version) {
+	case p9_proto_2000L:
+		req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds",
+		    c->msize, "9P2000.L");
+		break;
+	case p9_proto_2000u:
+		req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds",
+		    c->msize, "9P2000.u");
+		break;
+	case p9_proto_legacy:
+		req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds",
+		    c->msize, "9P2000");
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	/*  Always return the relevant error code */
+	if (error != 0)
+		return (error);
+
+	error = p9_buf_readf(req->rc, c->proto_version, "ds", &msize, &version);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: version error: %d\n", __func__, error);
+		goto out;
+	}
+
+	P9_DEBUG(PROTO, "RVERSION msize %d %s\n", msize, version);
+
+	if (!strncmp(version, "9P2000.L", 8))
+		c->proto_version = p9_proto_2000L;
+	else if (!strncmp(version, "9P2000.u", 8))
+		c->proto_version = p9_proto_2000u;
+	else if (!strncmp(version, "9P2000", 6))
+		c->proto_version = p9_proto_legacy;
+	else {
+		error = ENOMEM;
+		goto out;
+	}
+
+	/* limit the msize .*/
+	if (msize < c->msize)
+		c->msize = msize;
+out:
+	p9_free_req(c, req);
+	return (error);
+}
+
+/*
+ * Initialize zones for different things. This is called from Init module
+ * so that we just have them initalized once.
+ */
+void
+p9_init_zones(void)
+{
+
+	/* Create the request and the fid zones */
+	p9fs_fid_zone = uma_zcreate("p9fs fid zone",
+	    sizeof(struct p9_fid), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+	/* Create the request and the fid zones */
+	p9fs_req_zone = uma_zcreate("p9fs req zone",
+	    sizeof(struct p9_req_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+	/* Create the buffer zone */
+	p9fs_buf_zone = uma_zcreate("p9fs buf zone",
+	    sizeof(struct p9_buffer) + P9FS_MTU, NULL, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+void
+p9_destroy_zones(void)
+{
+
+	uma_zdestroy(p9fs_fid_zone);
+	uma_zdestroy(p9fs_req_zone);
+	uma_zdestroy(p9fs_buf_zone);
+}
+
+/* Return the client to the session in the FS to hold it */
+struct p9_client *
+p9_client_create(struct mount *mp, int *error, const char *mount_tag)
+{
+	struct p9_client *clnt;
+
+	clnt = malloc(sizeof(struct p9_client), M_P9CLNT, M_WAITOK | M_ZERO);
+	mtx_init(&clnt->clnt_mtx, "p9clnt", NULL, MTX_DEF);
+
+	/* Parse should have set trans_mod */
+	*error = p9_parse_opts(mp, clnt);
+	if (*error != 0)
+		goto out;
+
+	if (clnt->ops == NULL) {
+		*error = EINVAL;
+		P9_DEBUG(ERROR, "%s: no transport\n", __func__);
+		goto out;
+	}
+
+	/* All the structures from here are protected by the lock clnt_mtx */
+	init_unrhdr(&clnt->fidpool, P9FS_ROOT_FID_NO, P9FS_MAX_FID_CNT,
+	    &clnt->clnt_mtx);
+	init_unrhdr(&clnt->tagpool, P9FS_MIN_TAG, P9FS_MAX_TAG,
+	    &clnt->clnt_mtx);
+
+	P9_DEBUG(TRANS, "%s: clnt %p trans %p msize %d protocol %d\n",
+	    __func__, clnt, clnt->ops, clnt->msize, clnt->proto_version);
+
+	*error = clnt->ops->create(mount_tag, &clnt->handle);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: transport create failed .%d \n",
+		    __func__, *error);
+		goto out;
+	}
+	clnt->trans_status = P9FS_CONNECT;
+
+	*error = p9_client_version(clnt);
+	if (*error != 0)
+		goto out;
+
+	P9_DEBUG(TRANS, "%s: client creation succeeded.\n", __func__);
+	return (clnt);
+out:
+	free(clnt, M_P9CLNT);
+	return (NULL);
+}
+
+/* Destroy the client by destroying associated fidpool and tagpool */
+void
+p9_client_destroy(struct p9_client *clnt)
+{
+
+	P9_DEBUG(TRANS, "%s: client %p\n", __func__, clnt);
+	clnt->ops->close(clnt->handle);
+
+	P9_DEBUG(TRANS, "%s : Destroying fidpool\n", __func__);
+	clear_unrhdr(&clnt->fidpool);
+
+	P9_DEBUG(TRANS, "%s : Destroying tagpool\n", __func__);
+	clear_unrhdr(&clnt->tagpool);
+
+	free(clnt, M_P9CLNT);
+}
+
+/*
+ * Attach a user to the filesystem. Create a fid for that user to access
+ * the root of the filesystem.
+ */
+struct p9_fid *
+p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
+    const char *uname, uid_t n_uname, const char *aname, int *error)
+{
+	struct p9_req_t *req;
+	struct p9_fid *fid;
+	struct p9_qid qid;
+
+	P9_DEBUG(PROTO, "TATTACH uname=%s aname=%s, n_uname=%d\n",
+	    uname, aname, n_uname);
+	fid = p9_fid_create(clnt);
+	if (fid == NULL) {
+		*error = ENOMEM;
+		return (NULL);
+	}
+	fid->uid = n_uname;
+
+	req = p9_client_request(clnt, P9PROTO_TATTACH, error, "ddssd", fid->fid,
+	    P9PROTO_NOFID, uname, aname, n_uname);
+	if (*error != 0)
+		goto out;
+
+	*error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid);
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d \n",
+		    __func__, *error);
+		goto out;
+	}
+
+	P9_DEBUG(PROTO, "RATTACH qid %x.%llx.%x\n",
+	    qid.type, (unsigned long long)qid.path, qid.version);
+
+	memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+	p9_free_req(clnt, req);
+
+	return (fid);
+out:
+	if (req != NULL)
+		p9_free_req(clnt, req);
+	if (fid != NULL)
+		p9_fid_destroy(fid);
+
+	return (NULL);
+}
+
+/* Delete a file/directory. Corresponding fid will be cluncked too */
+int
+p9_client_remove(struct p9_fid *fid)
+{
+	int error;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DEBUG(PROTO, "TREMOVE fid %d\n", fid->fid);
+
+	error = 0;
+	clnt = fid->clnt;
+
+	req = p9_client_request(clnt, P9PROTO_TREMOVE, &error, "d", fid->fid);
+	if (error != 0) {
+		P9_DEBUG(PROTO, "RREMOVE fid %d\n", fid->fid);
+		return (error);
+	}
+
+	p9_free_req(clnt, req);
+	return (error);
+}
+
+int
+p9_client_unlink(struct p9_fid *dfid, const char *name, int32_t flags)
+{
+	int error;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	error = 0;
+	clnt = dfid->clnt;
+
+	req = p9_client_request(clnt, P9PROTO_TUNLINKAT, &error, "dsd",
+	    dfid->fid, name, flags);
+	if (error != 0) {
+		P9_DEBUG(PROTO, "RUNLINKAT fid %d\n", dfid->fid);
+		return (error);
+	}
+
+	p9_free_req(clnt, req);
+	return (error);
+}
+
+/* Inform the file server that the current file represented by fid is no longer
+ * needed by the client. Any allocated fid on the server needs a clunk to be
+ * destroyed.
+ */
+int
+p9_client_clunk(struct p9_fid *fid)
+{
+	int error;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	error = 0;
+
+	if (fid == NULL) {
+		P9_DEBUG(ERROR, "%s: clunk with NULL fid is bad\n", __func__);
+		return (0);
+	}
+
+	P9_DEBUG(PROTO, "TCLUNK fid %d \n", fid->fid);
+
+	clnt = fid->clnt;
+	req = p9_client_request(clnt, P9PROTO_TCLUNK, &error, "d", fid->fid);
+	if (req != NULL) {
+		P9_DEBUG(PROTO, "RCLUNK fid %d\n", fid->fid);
+		p9_free_req(clnt, req);
+	}
+
+	p9_fid_destroy(fid);
+	return (error);
+}
+
+/*
+ * Client_walk is for searching any component name in a directory.
+ * This is usually called on lookups. Also when we need a new open fid
+ * as 9p needs to have an open fid for every file to fileops, we call this
+ * validate the component of the file and return the newfid(openfid) created.
+ */
+struct p9_fid *
+p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames,
+    int clone, int *error)
+{
+	struct p9_client *clnt;
+	struct p9_fid *fid;
+	struct p9_qid *wqids;
+	struct p9_req_t *req;
+	uint16_t nwqids, count;
+
+	clnt = oldfid->clnt;
+	wqids = NULL;
+	nwqids = 0;
+
+	/*
+	 *  Before, we go and create fid, make sure we are not tearing
+	 *  down. Only then we create.
+	 *  Allow only cleanup clunk messages once we are starting to teardown.
+	 */
+	if (clnt->trans_status != P9FS_CONNECT) {
+		*error = EIO;
+		return (NULL);
+	}
+
+	if (clone) {
+		fid = p9_fid_create(clnt);
+		if (fid == NULL) {
+			*error = ENOMEM;
+			return (NULL);
+		}
+		fid->uid = oldfid->uid;
+	} else
+		fid = oldfid;
+
+	P9_DEBUG(PROTO, "TWALK fids %d,%d nwnames %u wname %s\n",
+	    oldfid->fid, fid->fid, nwnames,
+	    wnames != NULL ? wnames[nwnames-1] : NULL);
+
+	/*
+	 * The newfid is for the component in search. We are preallocating as
+	 * qemu on other side allocates or returns a fid if it sees a match
+	 */
+	req = p9_client_request(clnt, P9PROTO_TWALK, error, "ddT", oldfid->fid,
+	    fid->fid, wnames, nwnames);
+	if (*error != 0) {
+		if (fid != oldfid)
+			p9_fid_destroy(fid);
+		return (NULL);
+	}
+
+	*error = p9_buf_readf(req->rc, clnt->proto_version, "R", &nwqids,
+	    &wqids);
+	if (*error != 0)
+		goto out;
+
+	P9_DEBUG(PROTO, "RWALK nwqid %d:\n", nwqids);
+
+	if (nwqids != nwnames) {
+		*error = ENOENT;
+		goto out;
+	}
+
+	for (count = 0; count < nwqids; count++)
+		P9_DEBUG(TRANS, "%s: [%d] %x.%llx.%x\n",
+		    __func__, count, wqids[count].type,
+		    (unsigned long long)wqids[count].path,
+		    wqids[count].version);
+
+	if (nwnames)
+		memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
+	else
+		fid->qid = oldfid->qid;
+
+	p9_free_req(clnt, req);
+	free(wqids, M_TEMP);
+	return (fid);
+
+out:
+	p9_free_req(clnt, req);
+	if (wqids)
+		free(wqids, M_TEMP);
+	if (fid && fid != oldfid)
+		p9_client_clunk(fid);
+	return (NULL);
+}
+
+/* Open a file with given fid and mode */
+int
+p9_client_open(struct p9_fid *fid, int mode)
+{
+	int error, mtu;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	error = 0;
+	clnt = fid->clnt;
+	mtu = 0;
+
+	P9_DEBUG(PROTO, "%s fid %d mode %d\n",
+	    p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN",
+	    fid->fid, mode);
+
+	if (fid->mode != -1)
+		return (EINVAL);
+
+	if (p9_is_proto_dotl(clnt))
+		req = p9_client_request(clnt, P9PROTO_TLOPEN, &error, "dd",
+		    fid->fid, mode);
+	else
+		req = p9_client_request(clnt, P9PROTO_TOPEN, &error, "db",
+		    fid->fid, mode);
+
+	if (error != 0)
+		return (error);
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &fid->qid,
+	    &mtu);
+	if (error != 0)
+		goto out;
+
+	P9_DEBUG(PROTO, "%s qid %x.%llx.%x mtu %x\n",
+	    p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",
+	    (fid->qid).type, (unsigned long long)(fid->qid).path,
+	    (fid->qid).version, mtu);
+
+	fid->mode = mode;
+	fid->mtu = mtu;
+out:
+	p9_free_req(clnt, req);
+	return (error);
+}
+
+/* Request to get directory entries */
+int
+p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset,
+    uint32_t count)
+{
+	int error;
+	uint32_t rsize;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
+
+	P9_DEBUG(PROTO, "TREADDIR fid %d offset %llu count %d\n",
+	    fid->fid, (unsigned long long) offset, count);
+
+	error = 0;
+	rsize = fid->mtu;
+	clnt = fid->clnt;
+
+	if (rsize == 0 || rsize > clnt->msize)
+		rsize = clnt->msize;
+
+	if (count < rsize)
+		rsize = count;
+
+	req = p9_client_request(clnt, P9PROTO_TREADDIR, &error, "dqd",
+	    fid->fid, offset, rsize);
+
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: couldn't allocate req in client_readdir\n",
+			__func__);
+		return (-error);
+	}
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count,
+	    &dataptr);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: p0_buf_readf failed: %d\n",
+		    __func__, error);
+		p9_free_req(clnt, req);
+		return (-error);
+	}
+
+	P9_DEBUG(PROTO, "RREADDIR count %u\n", count);
+
+	/* Copy back the data into the input buffer. */
+	memmove(data, dataptr, count);
+	p9_free_req(clnt, req);
+	return (count);
+}
+
+/*
+ * Read count bytes from offset for the file fid into the character
+ * buffer data. This buffer is handed over to p9fs to process into user
+ * buffers. Note that this function typically returns the number of bytes read
+ * so in case of an error we return -error so that we can distinguish between
+ * error codes and bytes.
+ */
+int
+p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data)
+{
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	char *dataptr;
+	int error, rsize;
+
+	clnt = fid->clnt;
+	rsize = fid->mtu;
+	error = 0;
+
+	P9_DEBUG(PROTO, "TREAD fid %d offset %llu %u\n",
+	    fid->fid, (unsigned long long) offset, count);
+
+	if (!rsize || rsize > clnt->msize)
+		rsize = clnt->msize;
+
+	if (count < rsize)
+		rsize = count;
+
+	/* At this stage, we only have 8K buffers so only transfer */
+	req = p9_client_request(clnt, P9PROTO_TREAD, &error, "dqd", fid->fid,
+	    offset, rsize);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: failed allocate request\n", __func__);
+		return (-error);
+	}
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count,
+	    &dataptr);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d\n",
+		    __func__, error);
+		goto out;
+	}
+
+	if (rsize < count) {
+		P9_DEBUG(PROTO, "RREAD count (%d > %d)\n", count, rsize);
+		count = rsize;
+	}
+
+	P9_DEBUG(PROTO, "RREAD count %d\n", count);
+
+	if (count == 0) {
+		error = -EIO;
+		P9_DEBUG(ERROR, "%s: EIO error in client_read \n", __func__);
+		goto out;
+	}
+
+	/* Copy back the data into the input buffer. */
+	memmove(data, dataptr, count);
+	p9_free_req(clnt, req);
+	return (count);
+out:
+	p9_free_req(clnt, req);
+	return (-error);
+}
+
+/*
+ * Write count bytes from buffer to the offset for the file fid
+ * Note that this function typically returns the number of bytes written
+ * so in case of an error we return -error so that we can distinguish between
+ * error codes and bytes.
+ */
+
+int
+p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data)
+{
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	int ret, error, rsize;
+
+	clnt = fid->clnt;
+	rsize = fid->mtu;
+	ret = 0;
+	error = 0;
+
+	P9_DEBUG(PROTO, "TWRITE fid %d offset %llu  %u\n",
+	    fid->fid, (unsigned long long) offset, count);
+
+	if (!rsize || rsize > clnt->msize)
+		rsize = clnt->msize;
+
+	/* Limit set by Qemu ,8168 */
+	if (count > rsize) {
+		count = rsize;
+	}
+
+	/*
+	 * Doing the Data blob instead. If at all we add the zerocopy, we can
+	 * change it to uio direct copy
+	 */
+	req = p9_client_request(clnt, P9PROTO_TWRITE, &error, "dqD", fid->fid,
+	    offset, count, data);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: failed allocate request: %d\n",
+		    __func__, error);
+		return (-error);
+	}
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "d", &ret);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_buf_readf error: %d\n",
+		    __func__, error);
+		goto out;
+	}
+
+	if (count < ret) {
+		P9_DEBUG(PROTO, "RWRITE count (%d > %d)\n", count, ret);
+		ret = count;
+	}
+	P9_DEBUG(PROTO, "RWRITE count %d\n", ret);
+
+	if (count == 0) {
+		error = EIO;
+		P9_DEBUG(ERROR, "%s: EIO error\n", __func__);
+		goto out;
+	}
+
+	p9_free_req(clnt, req);
+	return (ret);
+out:
+	p9_free_req(clnt, req);
+	return (-error);
+}
+
+
+/* Create file under directory fid, with name, permissions, mode. */
+int
+p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode,
+    char *extension)
+{
+	int error;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+	struct p9_qid qid;
+	int mtu;
+
+	P9_DEBUG(PROTO, "TCREATE fid %d name %s perm %d mode %d\n",
+	    fid->fid, name, perm, mode);
+
+	clnt = fid->clnt;
+	error = 0;
+
+	if (fid->mode != -1)
+		return (EINVAL);
+
+	req = p9_client_request(clnt, P9PROTO_TCREATE, &error, "dsdb?s",
+	    fid->fid, name, perm, mode, extension);
+	if (error != 0)
+		return (error);
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &qid, &mtu);
+	if (error != 0)
+		goto out;
+
+	P9_DEBUG(PROTO, "RCREATE qid %x.%jx.%x mtu %x\n",
+	    qid.type, (uintmax_t)qid.path, qid.version, mtu);
+	fid->mode = mode;
+	fid->mtu = mtu;
+
+out:
+	p9_free_req(clnt, req);
+	return (error);
+}
+
+/* Request file system information of the file system */
+int
+p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat)
+{
+	int error;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	error = 0;
+	clnt = fid->clnt;
+
+	P9_DEBUG(PROTO, "TSTATFS fid %d\n", fid->fid);
+
+	req = p9_client_request(clnt, P9PROTO_TSTATFS, &error, "d", fid->fid);
+	if (error != 0) {
+		return (error);
+	}
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "ddqqqqqqd",
+	    &stat->type, &stat->bsize, &stat->blocks, &stat->bfree,
+	    &stat->bavail, &stat->files, &stat->ffree, &stat->fsid,
+	    &stat->namelen);
+
+	if (error != 0)
+		goto out;
+
+	P9_DEBUG(PROTO, "RSTATFS fid %d type 0x%jx bsize %ju "
+	    "blocks %ju bfree %ju bavail %ju files %ju ffree %ju "
+	    "fsid %ju namelen %ju\n",
+	    fid->fid, (uintmax_t)stat->type,
+	    (uintmax_t)stat->bsize, (uintmax_t)stat->blocks,
+	    (uintmax_t)stat->bfree, (uintmax_t)stat->bavail,
+	    (uintmax_t)stat->files, (uintmax_t)stat->ffree,
+	    (uintmax_t)stat->fsid, (uintmax_t)stat->namelen);
+
+out:
+	p9_free_req(clnt, req);
+	return (error);
+}
+
+/* Rename file referenced by the fid */
+int
+p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid,
+    char *newname)
+{
+	int error;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DEBUG(PROTO, "TRENAMEAT oldfid %d oldname %s newfid %d newfid %s",
+	    oldfid->fid, oldname, newfid->fid, newname);
+
+	error = 0;
+	clnt = oldfid->clnt;
+
+	/*
+	 * we are calling the request with TRENAMEAT tag and not TRENAME with
+	 * the 9p protocol version 9p2000.u as the QEMU version supports this
+	 * version of renaming
+	 */
+	req = p9_client_request(clnt, P9PROTO_TRENAMEAT, &error, "dsds",
+	    oldfid->fid, oldname, newfid->fid, newname);
+
+	if (error != 0)
+		return (error);
+
+	p9_free_req(clnt, req);
+	return (error);
+}
+
+/* Request to create symbolic link */
+int
+p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid)
+{
+	int error;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+	struct p9_qid qid;
+
+	error = 0;
+	clnt = fid->clnt;
+
+	P9_DEBUG(PROTO, "TSYMLINK fid %d name %s\n", fid->fid, name);
+
+	req = p9_client_request(clnt, P9PROTO_TSYMLINK, &error, "dssd",
+	    fid->fid, name, symtgt, gid);
+
+	if (error != 0)
+		return (error);
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error);
+		return (error);
+	}
+
+	P9_DEBUG(PROTO, "RSYMLINK qid %x.%jx.%x\n",
+	    qid.type, (uintmax_t)qid.path, qid.version);
+
+	p9_free_req(clnt, req);
+	return (0);
+}
+
+/* Request to create hard link */
+int
+p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name)
+{
+	int error;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	error = 0;
+	clnt = dfid->clnt;
+
+	P9_DEBUG(PROTO, "TLINK dfid %d oldfid %d name %s\n",
+	    dfid->fid, oldfid->fid, name);
+
+	req = p9_client_request(clnt, P9PROTO_TLINK, &error, "dds", dfid->fid,
+	    oldfid->fid, name);
+	if (error != 0)
+		return (error);
+
+	p9_free_req(clnt, req);
+	return (0);
+}
+
+/* Request to read contents of symbolic link */
+int
+p9_readlink(struct p9_fid *fid, char **target)
+{
+	int error;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	error = 0;
+	clnt = fid->clnt;
+
+	P9_DEBUG(PROTO, "TREADLINK fid %d\n", fid->fid);
+
+	req = p9_client_request(clnt, P9PROTO_TREADLINK, &error, "d", fid->fid);
+	if (error != 0)
+		return (error);
+
+	error = p9_buf_readf(req->rc, clnt->proto_version, "s", target);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error);
+		return (error);
+	}
+
+	P9_DEBUG(PROTO, "RREADLINK target %s \n", *target);
+
+	p9_free_req(clnt, req);
+	return (0);
+}
+
+/* Get file attributes of the file referenced by the fid */
+int
+p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl,
+    uint64_t request_mask)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+
+	P9_DEBUG(PROTO, "TGETATTR fid %d mask %ju\n",
+	    fid->fid, (uintmax_t)request_mask);
+
+	clnt = fid->clnt;
+	req = p9_client_request(clnt, P9PROTO_TGETATTR, &err, "dq", fid->fid,
+	    request_mask);
+	if (req == NULL) {
+		P9_DEBUG(ERROR, "%s: allocation failed %d", __func__, err);
+		goto error;
+	}
+
+	err = p9_buf_readf(req->rc, clnt->proto_version, "A", stat_dotl);
+	if (err != 0) {
+		P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, err);
+		goto error;
+	}
+
+	p9_free_req(clnt, req);
+	P9_DEBUG(PROTO, "RGETATTR fid %d qid %x.%jx.%x st_mode %8.8x "
+	    "uid %d gid %d nlink %ju rdev %jx st_size %jx blksize %ju "
+	    "blocks %ju st_atime_sec %ju, st_atime_nsec %ju "
+	    "st_mtime_sec %ju, st_mtime_nsec %ju st_ctime_sec %ju "
+	    "st_ctime_nsec %ju st_btime_sec %ju, st_btime_nsec %ju "
+	    "st_stat %ju, st_data_version %ju \n", fid->fid,
+	    stat_dotl->qid.type, (uintmax_t)stat_dotl->qid.path,
+	    stat_dotl->qid.version, stat_dotl->st_mode, stat_dotl->st_uid,
+	    stat_dotl->st_gid, (uintmax_t)stat_dotl->st_nlink,
+	    (uintmax_t)stat_dotl->st_rdev, (uintmax_t)stat_dotl->st_size,
+	    (uintmax_t)stat_dotl->st_blksize,
+	    (uintmax_t)stat_dotl->st_blocks, (uintmax_t)stat_dotl->st_atime_sec,
+	    (uintmax_t)stat_dotl->st_atime_nsec, (uintmax_t)stat_dotl->st_mtime_sec,
+	    (uintmax_t)stat_dotl->st_mtime_nsec, (uintmax_t)stat_dotl->st_ctime_sec,
+	    (uintmax_t)stat_dotl->st_ctime_nsec, (uintmax_t)stat_dotl->st_btime_sec,
+	    (uintmax_t)stat_dotl->st_btime_nsec, (uintmax_t)stat_dotl->st_gen,
+	    (uintmax_t)stat_dotl->st_data_version);
+
+	return (err);
+
+error:
+	if (req != NULL)
+		p9_free_req(clnt, req);
+
+	return (err);
+}
+
+/* Set file attributes of the file referenced by the fid */
+int
+p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+
+	P9_DEBUG(PROTO, "TSETATTR fid %d"
+	    " valid %x mode %x uid %d gid %d size %ju"
+	    " atime_sec %ju atime_nsec %ju"
+	    " mtime_sec %ju mtime_nsec %ju\n",
+	    fid->fid,
+	    p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+	    (uintmax_t)p9attr->size, (uintmax_t)p9attr->atime_sec,
+	    (uintmax_t)p9attr->atime_nsec, (uintmax_t)p9attr->mtime_sec,
+	    (uintmax_t)p9attr->mtime_nsec);
+
+	clnt = fid->clnt;
+
+	/* Any client_request error is converted to req == NULL error*/
+	req = p9_client_request(clnt, P9PROTO_TSETATTR, &err, "dA", fid->fid,
+	    p9attr);
+
+	if (req == NULL) {
+		P9_DEBUG(ERROR, "%s: allocation failed %d\n", __func__, err);
+		goto error;
+	}
+
+	p9_free_req(clnt, req);
+error:
+	return (err);
+}
+
diff --git a/sys/fs/p9fs/p9_client.h b/sys/fs/p9fs/p9_client.h
new file mode 100644
index 000000000000..4eb82c0232f4
--- /dev/null
+++ b/sys/fs/p9fs/p9_client.h
@@ -0,0 +1,169 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* 9P client definitions */
+
+#ifndef FS_P9FS_P9_CLIENT_H
+#define FS_P9FS_P9_CLIENT_H
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/_unrhdr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/dirent.h>
+#include <sys/stdarg.h>
+
+#include <fs/p9fs/p9_protocol.h>
+
+/* 9P protocol versions */
+enum p9_proto_versions {
+	p9_proto_legacy,	/* legacy version */
+	p9_proto_2000u,		/* Unix version */
+	p9_proto_2000L,		/* Linux version */
+};
+
+/* P9 Request exchanged between Host and Guest */
+struct p9_req_t {
+	struct p9_buffer *tc;	/* request buffer */
+	struct p9_buffer *rc;	/* response buffer */
+};
+
+/* 9P transport status */
+enum transport_status {
+	P9FS_CONNECT,		/* transport is connected */
+	P9FS_BEGIN_DISCONNECT,/* transport has begun to disconnect */
+	P9FS_DISCONNECT,	/* transport has been dosconnected */
+};
+
+/* This is set by QEMU so we will oblige */
+#define P9FS_MTU 8192
+
+/*
+ * Even though we have a 8k buffer, Qemu is typically doing 8168
+ * because of a HDR of 24. Use that amount for transfers so that we dont
+ * drop anything.
+ */
+#define P9FS_IOUNIT (P9FS_MTU - 24)
+#define P9FS_DIRENT_LEN 256
+#define P9_NOTAG 0
+
+/* Client state information */
+struct p9_client {
+	struct p9_trans_module *ops;		/* module API instantiated with this client */
+	void *handle;				/* module-specific client handle */
+	struct mtx clnt_mtx;			/* mutex to lock the client */
+	struct mtx req_mtx;			/* mutex to lock the request buffer */
+	struct cv req_cv;			/* condition variable on which to wake up thread */
+	unsigned int msize;			/* maximum data size */
+	unsigned char proto_version;		/* 9P version to use */
+	struct unrhdr fidpool;			/* fid handle accounting for session */
+	struct unrhdr tagpool;			/* transaction id accounting for session */
+	enum transport_status trans_status;	/* tranport instance state */
+};
+
+/* The main fid structure which keeps track of the file.*/
+struct p9_fid {
+	struct p9_client *clnt;	/* the instatntiating 9P client */
+	uint32_t fid;		/* numeric identifier */
+	int mode;		/* current mode of this fid */
+	struct p9_qid qid;	/* server identifier */
+	uint32_t mtu;		/* max transferrable unit at a time */
+	uid_t uid;		/* numeric uid of the local user who owns this handle */
+	int v_opens;		/* keep count on the number of opens called with this fiel handle */
+	STAILQ_ENTRY(p9_fid) fid_next;	/* points to next fid in the list */
+};
+
+/* Directory entry structure */
+struct p9_dirent {
+	struct p9_qid qid;		/* 9P server qid for this dirent */
+	uint64_t d_off;			/* offset to the next dirent */
+	unsigned char d_type;		/* file type */
+	char d_name[P9FS_DIRENT_LEN];	/* file name */
+	int len;
+};
+
+void p9_init_zones(void);
+void p9_destroy_zones(void);
+
+/* Session and client Init Ops */
+struct p9_client *p9_client_create(struct mount *mp, int *error,
+    const char *mount_tag);
+void p9_client_destroy(struct p9_client *clnt);
+struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *fid,
+    const char *uname, uid_t n_uname, const char *aname, int *error);
+
+/* FILE OPS - These are individually called from the specific vop function */
+
+int p9_client_open(struct p9_fid *fid, int mode);
+int p9_client_close(struct p9_fid *fid);
+struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames,
+    char **wnames, int clone, int *error);
+struct p9_fid *p9_fid_create(struct p9_client *clnt);
+void p9_fid_destroy(struct p9_fid *fid);
+uint16_t p9_tag_create(struct p9_client *clnt);
+void p9_tag_destroy(struct p9_client *clnt, uint16_t tag);
+int p9_client_clunk(struct p9_fid *fid);
+int p9_client_version(struct p9_client *clnt);
+int p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, uint32_t count);
+int p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data);
+int p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data);
+int p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode,
+    char *extension);
+int p9_client_remove(struct p9_fid *fid);
+int p9_client_unlink(struct p9_fid *dfid, const char *name, int32_t flags);
+int p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len,
+    struct p9_dirent *dirent);
+int p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat);
+int p9_client_statread(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st);
+int p9_is_proto_dotu(struct p9_client *clnt);
+int p9_is_proto_dotl(struct p9_client *clnt);
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
+int p9stat_read(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st);
+void p9_client_disconnect(struct p9_client *clnt);
+void p9_client_begin_disconnect(struct p9_client *clnt);
+int p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid);
+int p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name);
+int p9_readlink(struct p9_fid *fid, char **target);
+int p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, char *newname);
+int p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl,
+    uint64_t request_mask);
+int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr);
+
+int p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt,
+    va_list ap);
+int p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...);
+int p9_buf_prepare(struct p9_buffer *buf, int8_t type);
+int p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf);
+void p9_buf_reset(struct p9_buffer *buf);
+
+#endif /* FS_P9FS_P9_CLIENT_H */
diff --git a/sys/fs/p9fs/p9_debug.h b/sys/fs/p9fs/p9_debug.h
new file mode 100644
index 000000000000..463b009d00ad
--- /dev/null
+++ b/sys/fs/p9fs/p9_debug.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FS_P9FS_P9_DEBUG_H
+#define FS_P9FS_P9_DEBUG_H
+
+extern int p9_debug_level; /* All debugs on now */
+
+/* 9P debug flags */
+#define P9_DEBUG_TRANS			0x0001	/* Trace transport */
+#define P9_DEBUG_SUBR			0x0002	/* Trace driver submissions */
+#define P9_DEBUG_LPROTO			0x0004	/* Low level protocol tracing */
+#define P9_DEBUG_PROTO			0x0008	/* High level protocol tracing */
+#define P9_DEBUG_VOPS			0x0010	/* VOPs tracing */
+#define P9_DEBUG_ERROR			0x0020	/* verbose error messages */
+
+#define P9_DEBUG(category, fmt, ...) do {			\
+	if ((p9_debug_level & P9_DEBUG_##category) != 0)	\
+		printf(fmt, ##__VA_ARGS__);			\
+} while (0)
+
+#endif /* FS_P9FS_P9_DEBUG_H */
diff --git a/sys/fs/p9fs/p9_protocol.c b/sys/fs/p9fs/p9_protocol.c
new file mode 100644
index 000000000000..e0045f67993d
--- /dev/null
+++ b/sys/fs/p9fs/p9_protocol.c
@@ -0,0 +1,632 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * 9P Protocol Support Code
+ * This file provides the standard for the FS interactions with the server
+ * interface as it can understand only this protocol. The details of the
+ * protocol can be found here
+ * XXX (link to protocol details page on FreeBSD wiki)
+ */
+
+#include <sys/types.h>
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_protocol.h>
+
+#define P9FS_MAXLEN 255
+
+static int p9_buf_writef(struct p9_buffer *buf, int proto_version,
+    const char *fmt, ...);
+static void stat_free(struct p9_wstat *sbuf);
+
+static void
+stat_free(struct p9_wstat *stbuf)
+{
+
+	free(stbuf->name, M_TEMP);
+	free(stbuf->uid, M_TEMP);
+	free(stbuf->gid, M_TEMP);
+	free(stbuf->muid, M_TEMP);
+	free(stbuf->extension, M_TEMP);
+}
+
+static size_t
+buf_read(struct p9_buffer *buf, void *data, size_t size)
+{
+	size_t len;
+
+	len = min(buf->size - buf->offset, size);
+
+	memcpy(data, &buf->sdata[buf->offset], len);
+	buf->offset += len;
+
+	return (size - len);
+}
+
+static size_t
+buf_write(struct p9_buffer *buf, const void *data, size_t size)
+{
+	size_t len;
+
+	len = min(buf->capacity - buf->size, size);
+
+	memcpy(&buf->sdata[buf->size], data, len);
+	buf->size += len;
+
+	return (size - len);
+}
+
+/*
+ * Main buf_read routine. This copies the data from the buffer into the
+ * respective values based on the data type.
+ * Here
+ *	  b - int8_t
+ *	  w - int16_t
+ *	  d - int32_t
+ *	  q - int64_t
+ *	  s - string
+ *	  u - uid
+ *	  g - gid
+ *	  Q - qid
+ *	  S - stat
+ *	  A - getattr (9P2000.L)
+ *	  D - data blob (int32_t size followed by void *, results are not freed)
+ *	  T - array of strings (int16_t count, followed by strings)
+ *	  R - array of qids (int16_t count, followed by qids)
+ *	  ? - return if version is not .u or .l
+ */
+static int
+p9_buf_vreadf(struct p9_buffer *buf, int proto_version, const char *fmt,
+    va_list ap)
+{
+	const char *ptr;
+	int error;
+
+	error = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':
+		{
+			int8_t *val = va_arg(ap, int8_t *);
+
+			if (buf_read(buf, val, sizeof(*val)))
+				error = EFAULT;
+			break;
+		}
+		case 'w':
+		{
+			int16_t *val = va_arg(ap, int16_t *);
+
+			if (buf_read(buf, val, sizeof(*val)))
+				error = EFAULT;
+			break;
+		}
+		case 'd':
+		{
+			int32_t *val = va_arg(ap, int32_t *);
+
+			if (buf_read(buf, val, sizeof(*val)))
+				error = EFAULT;
+			break;
+		}
+		case 'q':
+		{
+			int64_t *val = va_arg(ap, int64_t *);
+
+			if (buf_read(buf, val, sizeof(*val)))
+				error = EFAULT;
+			break;
+		}
+		case 's':
+		{
+			char **sptr_p = va_arg(ap, char **);
+			uint16_t len;
+			char *sptr;
+
+			error = buf_read(buf, &len, sizeof(uint16_t));
+			if (error)
+				break;
+
+			sptr = malloc(len + 1, M_TEMP, M_NOWAIT | M_ZERO);
+
+			if (buf_read(buf, sptr, len)) {
+				error = EFAULT;
+				free(sptr, M_TEMP);
+				sptr = NULL;
+			} else {
+				(sptr)[len] = 0;
+				*sptr_p = sptr;
+			}
+			break;
+		}
+		case 'u':
+		{
+			uid_t *val = va_arg(ap, uid_t *);
+
+			if (buf_read(buf, val, sizeof(*val)))
+				error = EFAULT;
+			break;
+
+		}
+		case 'g':
+		{
+			gid_t *val = va_arg(ap, gid_t *);
+
+			if (buf_read(buf, val, sizeof(*val)))
+				error = EFAULT;
+			break;
+
+		}
+		case 'Q':
+		{
+			struct p9_qid *qid = va_arg(ap, struct p9_qid *);
+
+			error = p9_buf_readf(buf, proto_version, "bdq",
+			    &qid->type, &qid->version, &qid->path);
+
+			break;
+		}
+		case 'S':
+		{
+			struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *);
+
+			error = p9_buf_readf(buf, proto_version, "wwdQdddqssss?sddd",
+			    &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid,
+			    &stbuf->mode, &stbuf->atime, &stbuf->mtime, &stbuf->length,
+			    &stbuf->name, &stbuf->uid, &stbuf->gid, &stbuf->muid,
+			    &stbuf->extension, &stbuf->n_uid, &stbuf->n_gid, &stbuf->n_muid);
+
+			if (error != 0)
+				stat_free(stbuf);
+			break;
+		}
+		case 'A':
+		{
+			struct p9_stat_dotl *stbuf = va_arg(ap, struct p9_stat_dotl *);
+
+			error = p9_buf_readf(buf, proto_version, "qQdugqqqqqqqqqqqqqqq",
+			   &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode,
+			   &stbuf->st_uid,&stbuf->st_gid, &stbuf->st_nlink,
+			   &stbuf->st_rdev, &stbuf->st_size, &stbuf->st_blksize,
+			   &stbuf->st_blocks, &stbuf->st_atime_sec,
+			   &stbuf->st_atime_nsec, &stbuf->st_mtime_sec,
+			   &stbuf->st_mtime_nsec, &stbuf->st_ctime_sec,
+			   &stbuf->st_ctime_nsec, &stbuf->st_btime_sec,
+			   &stbuf->st_btime_nsec, &stbuf->st_gen,
+			   &stbuf->st_data_version);
+
+			break;
+		}
+		case 'D':
+		{
+			uint32_t *count = va_arg(ap, uint32_t *);
+			void **data = va_arg(ap, void **);
+
+			error = buf_read(buf, count, sizeof(uint32_t));
+			if (error == 0) {
+				*count = MIN(*count, buf->size - buf->offset);
+				*data = &buf->sdata[buf->offset];
+			}
+			break;
+		}
+		case 'T':
+		{
+			uint16_t *nwname_p = va_arg(ap, uint16_t *);
+			char ***wnames_p = va_arg(ap, char ***);
+			uint16_t nwname;
+			char **wnames;
+			int i;
+
+			error = buf_read(buf, nwname_p, sizeof(uint16_t));
+			if (error != 0)
+				break;
+
+			nwname = *nwname_p;
+			wnames = malloc(sizeof(char *) * nwname, M_TEMP, M_NOWAIT | M_ZERO);
+
+			for (i = 0; i < nwname && (error == 0); i++)
+				error = p9_buf_readf(buf, proto_version, "s", &wnames[i]);
+
+			if (error != 0) {
+				for (i = 0; i < nwname; i++)
+					free((wnames)[i], M_TEMP);
+				free(wnames, M_TEMP);
+			} else
+				*wnames_p = wnames;
+			break;
+		}
+		case 'R':
+		{
+			uint16_t *nwqid_p = va_arg(ap, uint16_t *);
+			struct p9_qid **wqids_p = va_arg(ap, struct p9_qid **);
+			uint16_t nwqid;
+			struct p9_qid *wqids;
+			int i;
+
+			wqids = NULL;
+			error = buf_read(buf, nwqid_p, sizeof(uint16_t));
+			if (error != 0)
+				break;
+
+			nwqid = *nwqid_p;
+			wqids = malloc(nwqid * sizeof(struct p9_qid), M_TEMP, M_NOWAIT | M_ZERO);
+			if (wqids == NULL) {
+				error = ENOMEM;
+				break;
+			}
+			for (i = 0; i < nwqid && (error == 0); i++)
+				error = p9_buf_readf(buf, proto_version, "Q", &(wqids)[i]);
+
+			if (error != 0) {
+				free(wqids, M_TEMP);
+			} else
+				*wqids_p = wqids;
+
+			break;
+		}
+		case '?':
+		{
+			if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L))
+				return (0);
+			break;
+		}
+		default:
+			break;
+		}
+
+		if (error != 0)
+			break;
+	}
+
+	return (error);
+}
+
+/*
+ * Main buf_write routine. This copies the data into the buffer from the
+ * respective values based on the data type.
+ * Here
+ *	  b - int8_t
+ *	  w - int16_t
+ *	  d - int32_t
+ *	  q - int64_t
+ *	  s - string
+ *	  u - uid
+ *	  g - gid
+ *	  Q - qid
+ *	  S - stat
+ *	  D - data blob (int32_t size followed by void *, results are not freed)
+ *	  T - array of strings (int16_t count, followed by strings)
+ *	  W - string of a specific length
+ *	  R - array of qids (int16_t count, followed by qids)
+ *	  A - setattr (9P2000.L)
+ *	  ? - return if version is not .u or .l
+ */
+
+int
+p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt,
+	va_list ap)
+{
+	const char *ptr;
+	int error;
+
+	error = 0;
+
+	for (ptr = fmt; *ptr; ptr++) {
+		switch (*ptr) {
+		case 'b':
+		{
+			int8_t val = va_arg(ap, int);
+
+			if (buf_write(buf, &val, sizeof(val)))
+				error = EFAULT;
+			break;
+		}
+		case 'w':
+		{
+			int16_t val = va_arg(ap, int);
+
+			if (buf_write(buf, &val, sizeof(val)))
+				error = EFAULT;
+			break;
+		}
+		case 'd':
+		{
+			int32_t val = va_arg(ap, int32_t);
+
+			if (buf_write(buf, &val, sizeof(val)))
+				error = EFAULT;
+			break;
+		}
+		case 'q':
+		{
+			int64_t val = va_arg(ap, int64_t);
+
+			if (buf_write(buf, &val, sizeof(val)))
+				error = EFAULT;
+
+			break;
+		}
+		case 's':
+		{
+			const char *sptr = va_arg(ap, const char *);
+		        uint16_t len = 0;
+
+	                if (sptr)
+			    len = MIN(strlen(sptr), P9FS_MAXLEN);
+
+			error = buf_write(buf, &len, sizeof(uint16_t));
+			if (error == 0 && buf_write(buf, sptr, len))
+				error = EFAULT;
+			break;
+		}
+		case 'u':
+		{
+			uid_t val = va_arg(ap, uid_t);
+
+			if (buf_write(buf, &val, sizeof(val)))
+				error = EFAULT;
+			break;
+
+		}
+		case 'g':
+		{
+			gid_t val = va_arg(ap, gid_t);
+
+			if (buf_write(buf, &val, sizeof(val)))
+				error = EFAULT;
+			break;
+
+		}
+		case 'Q':
+		{
+			const struct p9_qid *qid = va_arg(ap, const struct p9_qid *);
+
+			error = p9_buf_writef(buf, proto_version, "bdq",
+			    qid->type, qid->version, qid->path);
+			break;
+		}
+		case 'S':
+		{
+			struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *);
+
+			error = p9_buf_writef(buf, proto_version,
+			    "wwdQdddqssss?sddd", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid,
+			    stbuf->mode, stbuf->atime, stbuf->mtime, stbuf->length, stbuf->name,
+			    stbuf->uid, stbuf->gid, stbuf->muid, stbuf->extension, stbuf->n_uid,
+			    stbuf->n_gid, stbuf->n_muid);
+
+			if (error != 0)
+				stat_free(stbuf);
+
+			break;
+		}
+		case 'D':
+		{
+			uint32_t count = va_arg(ap, uint32_t);
+			void *data = va_arg(ap, void *);
+
+			error = buf_write(buf, &count, sizeof(uint32_t));
+			if ((error == 0) && buf_write(buf, data, count))
+				error = EFAULT;
+
+			break;
+		}
+		case 'T':
+		{
+                        char **wnames = va_arg(ap, char **);
+                        uint16_t nwnames = va_arg(ap, int);
+
+			error = buf_write(buf, &nwnames, sizeof(uint16_t));
+			if (error == 0) {
+				int i = 0;
+				for (i = 0; i < nwnames; i++) {
+					error = p9_buf_writef(buf, proto_version, "s", wnames[i]);
+					if (error != 0)
+						break;
+				}
+			}
+			break;
+		}
+                case 'W':
+                {
+                        const char *sptr = va_arg(ap, const char*);
+                        uint16_t len = va_arg(ap, int);
+
+			error = buf_write(buf, &len, sizeof(uint16_t));
+			if (error == 0 && buf_write(buf, sptr, len))
+				error = EFAULT;
+			break;
+
+                }
+		case 'R':
+		{
+			uint16_t nwqid = va_arg(ap, int);
+			struct p9_qid *wqids = va_arg(ap, struct p9_qid *);
+			int i;
+
+			error = buf_write(buf, &nwqid, sizeof(uint16_t));
+			if (error == 0) {
+
+				for (i = 0; i < nwqid; i++) {
+					error = p9_buf_writef(buf, proto_version, "Q", &wqids[i]);
+					if (error != 0)
+						break;
+				}
+			}
+			break;
+		}
+		case 'A':
+		{
+			struct p9_iattr_dotl *p9attr = va_arg(ap, struct p9_iattr_dotl *);
+
+			error = p9_buf_writef(buf, proto_version, "ddugqqqqq",
+			    p9attr->valid, p9attr->mode, p9attr->uid,
+			    p9attr->gid, p9attr->size, p9attr->atime_sec,
+			    p9attr->atime_nsec, p9attr->mtime_sec,
+			    p9attr->mtime_nsec);
+
+			break;
+		}
+		case '?':
+		{
+			if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L))
+				return (0);
+			break;
+		}
+		default:
+			break;
+		}
+
+		if (error != 0)
+			break;
+	}
+
+	return (error);
+}
+
+/* Variadic form of buf_read */
+int
+p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9_buf_vreadf(buf, proto_version, fmt, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+/* Variadic form of buf_write */
+static int
+p9_buf_writef(struct p9_buffer *buf, int proto_version, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = p9_buf_vwritef(buf, proto_version, fmt, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+/* File stats read routine for P9 to get attributes of files */
+int
+p9stat_read(struct p9_client *clnt, char *buf, size_t len, struct p9_wstat *st)
+{
+	struct p9_buffer msg_buf;
+	int ret;
+
+	msg_buf.size = len;
+	msg_buf.capacity = len;
+	msg_buf.sdata = buf;
+	msg_buf.offset = 0;
+
+	ret = p9_buf_readf(&msg_buf, clnt->proto_version, "S", st);
+	if (ret) {
+		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret);
+	}
+
+	return (ret);
+}
+
+/*
+ * P9_header preparation routine. All p9 buffers have to have this header(QEMU_HEADER) at the
+ * front of the buffer.
+ */
+int
+p9_buf_prepare(struct p9_buffer *buf, int8_t type)
+{
+	buf->id = type;
+	return (p9_buf_writef(buf, 0, "dbw", 0, type, buf->tag));
+}
+
+/*
+ * Final write to the buffer, this is the total size of the buffer. Since the buffer length can
+ * vary with request, this is computed at the end just before sending the request to the driver
+ */
+int
+p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf)
+{
+	int size;
+	int error;
+
+	size = buf->size;
+	buf->size = 0;
+	error = p9_buf_writef(buf, 0, "d", size);
+	buf->size = size;
+
+	P9_DEBUG(LPROTO, "%s: size=%d type: %d tag: %d\n",
+	    __func__, buf->size, buf->id, buf->tag);
+
+	return (error);
+}
+
+/* Reset values of the buffer */
+void
+p9_buf_reset(struct p9_buffer *buf)
+{
+
+	buf->offset = 0;
+	buf->size = 0;
+}
+
+/*
+ * Directory entry read with the buf we have. Call this once we have the buf to parse.
+ * This buf, obtained from the server, is parsed to make dirent in readdir.
+ */
+int
+p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len,
+	struct p9_dirent *dent)
+{
+	struct p9_buffer msg_buf;
+	int ret;
+	char *nameptr;
+	uint16_t sle;
+
+	msg_buf.size = len;
+	msg_buf.capacity = len;
+	msg_buf.sdata = buf;
+	msg_buf.offset = start;
+
+	ret = p9_buf_readf(&msg_buf, clnt->proto_version, "Qqbs", &dent->qid,
+	    &dent->d_off, &dent->d_type, &nameptr);
+	if (ret) {
+		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret);
+		goto out;
+	}
+
+	sle = strlen(nameptr);
+	strncpy(dent->d_name, nameptr, sle);
+	dent->len = sle;
+	free(nameptr, M_TEMP);
+out:
+	return (msg_buf.offset);
+}
diff --git a/sys/fs/p9fs/p9_protocol.h b/sys/fs/p9fs/p9_protocol.h
new file mode 100644
index 000000000000..7ffd7dd67bcf
--- /dev/null
+++ b/sys/fs/p9fs/p9_protocol.h
@@ -0,0 +1,282 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* File contains 9P protocol definitions */
+
+#ifndef FS_P9FS_P9_PROTOCOL_H
+#define FS_P9FS_P9_PROTOCOL_H
+
+#include <sys/types.h>
+
+/* 9P message types */
+enum p9_cmds_t {
+	P9PROTO_TLERROR = 6,	/* not used */
+	P9PROTO_RLERROR,	/* response for any failed request */
+	P9PROTO_TSTATFS = 8,	/* file system status request */
+	P9PROTO_RSTATFS,	/* file system status response */
+	P9PROTO_TLOPEN = 12,	/* open a file (9P2000.L) */
+	P9PROTO_RLOPEN,		/* response to opne request (9P2000.L) */
+	P9PROTO_TLCREATE = 14,	/* prepare for handle for I/O on a new file (9P2000.L) */
+	P9PROTO_RLCREATE,	/* response with file access information (9P2000.L) */
+	P9PROTO_TSYMLINK = 16,	/* symlink creation request */
+	P9PROTO_RSYMLINK,	/* symlink creation response */
+	P9PROTO_TMKNOD = 18,	/* create a special file object request */
+	P9PROTO_RMKNOD,		/* create a special file object response */
+	P9PROTO_TRENAME = 20,	/* rename a file request */
+	P9PROTO_RRENAME,	/* rename a file response */
+	P9PROTO_TREADLINK = 22,	/* request to read value of symbolic link */
+	P9PROTO_RREADLINK,	/* response to read value of symbolic link request */
+	P9PROTO_TGETATTR = 24,	/* get file attributes request */
+	P9PROTO_RGETATTR,	/* get file attributes response */
+	P9PROTO_TSETATTR = 26,	/* set file attributes request */
+	P9PROTO_RSETATTR,	/* set file attributes response */
+	P9PROTO_TXATTRWALK = 30,/* request to read extended attributes */
+	P9PROTO_RXATTRWALK,	/* response from server with attributes */
+	P9PROTO_TXATTRCREATE = 32,/* request to set extended attribute */
+	P9PROTO_RXATTRCREATE,	/* response from server for setting extended attribute */
+	P9PROTO_TREADDIR = 40,	/* request to read a directory */
+	P9PROTO_RREADDIR,	/* response from server for read request */
+	P9PROTO_TFSYNC = 50,	/* request to flush an cached data to disk */
+	P9PROTO_RFSYNC,		/* response when cache dat is flushed */
+	P9PROTO_TLOCK = 52,	/* acquire or release a POSIX record lock */
+	P9PROTO_RLOCK,		/* response with the status of the lock */
+	P9PROTO_TGETLOCK = 54,	/* request to check for presence of a POSIX record lock */
+	P9PROTO_RGETLOCK,	/* response with the details of the lock if acquired */
+	P9PROTO_TLINK = 70,	/* request to create hard link */
+	P9PROTO_RLINK,		/* create hard link response */
+	P9PROTO_TMKDIR = 72,	/* create a directory request */
+	P9PROTO_RMKDIR,		/* create a directory response */
+	P9PROTO_TRENAMEAT = 74,	/* request to rename a file or directory */
+	P9PROTO_RRENAMEAT,	/* reponse to rename request */
+	P9PROTO_TUNLINKAT = 76,	/* unlink a file or directory */
+	P9PROTO_RUNLINKAT,	/* reponse to unlink request */
+	P9PROTO_TVERSION = 100,	/* request for version handshake */
+	P9PROTO_RVERSION,	/* response for version handshake */
+	P9PROTO_TAUTH = 102,	/* request to establish authentication channel */
+	P9PROTO_RAUTH,		/* response with authentication information */
+	P9PROTO_TATTACH = 104,	/* establish a user access to a file system*/
+	P9PROTO_RATTACH,	/* response with top level handle to file hierarchy */
+	P9PROTO_TERROR = 106,	/* not used */
+	P9PROTO_RERROR,		/* response for any failed request */
+	P9PROTO_TFLUSH = 108,	/* request to abort a previous request */
+	P9PROTO_RFLUSH,		/* response when previous request has been cancelled */
+	P9PROTO_TWALK = 110,	/* descend a directory hierarchy */
+	P9PROTO_RWALK,		/* response with new handle for position within hierarchy */
+	P9PROTO_TOPEN = 112,	/* prepare file handle for I/O for an existing file */
+	P9PROTO_ROPEN,		/* response with file access information */
+	P9PROTO_TCREATE = 114,	/* prepare for handle for I/O on a new file */
+	P9PROTO_RCREATE,	/* response with file access information */
+	P9PROTO_TREAD = 116,	/* request to transfer data from a file */
+	P9PROTO_RREAD,		/* response with data requested */
+	P9PROTO_TWRITE = 118,	/* request to transfer data to a file */
+	P9PROTO_RWRITE,		/* response with how much data was written to the file */
+	P9PROTO_TCLUNK = 120,	/* forget about a handle to a file within the File System */
+	P9PROTO_RCLUNK,		/* response from the server for forgetting the file handle */
+	P9PROTO_TREMOVE = 122,	/* request to remove a file */
+	P9PROTO_RREMOVE,	/* response when server has removed the file */
+	P9PROTO_TSTAT = 124,	/* request file entity attributes */
+	P9PROTO_RSTAT,		/* response with file entity attributes */
+	P9PROTO_TWSTAT = 126,	/* request to update file entity attributes */
+	P9PROTO_RWSTAT,		/* response when file entity attributes are updated */
+};
+
+/* File Open Modes */
+enum p9_open_mode_t {
+	P9PROTO_OREAD = 0x00,	/* open file for reading only */
+	P9PROTO_OWRITE = 0x01,	/* open file for writing only */
+	P9PROTO_ORDWR = 0x02,	/* open file for both reading and writing */
+	P9PROTO_OEXEC = 0x03,	/* open file for execution */
+	P9PROTO_OTRUNC = 0x10,	/* truncate file to zero length  before opening it */
+	P9PROTO_OREXEC = 0x20,	/* close the file when exec system call is made */
+	P9PROTO_ORCLOSE = 0x40,	/* remove the file when it is closed */
+	P9PROTO_OAPPEND = 0x80,	/* open the file and seek to the end of the file */
+	P9PROTO_OEXCL = 0x1000,	/* only create a file and not open it */
+};
+
+/* FIle Permissions */
+enum p9_perm_t {
+	P9PROTO_DMDIR = 0x80000000,	/* permission  bit for directories */
+	P9PROTO_DMAPPEND = 0x40000000,	/* permission bit  for is append-only */
+	P9PROTO_DMEXCL = 0x20000000,	/* permission  bit for exclusive use (only one open handle allowed) */
+	P9PROTO_DMMOUNT = 0x10000000,	/* permission  bit for mount points */
+	P9PROTO_DMAUTH = 0x08000000,	/* permission  bit for authentication file */
+	P9PROTO_DMTMP = 0x04000000,	/* permission bit for non-backed-up files */
+	P9PROTO_DMSYMLINK = 0x02000000,	/* permission bit for symbolic link (9P2000.u) */
+	P9PROTO_DMLINK = 0x01000000,	/* permission bit for hard-link (9P2000.u) */
+	P9PROTO_DMDEVICE = 0x00800000,	/* permission bit for device files (9P2000.u) */
+	P9PROTO_DMNAMEDPIPE = 0x00200000,/* permission bit for named pipe (9P2000.u) */
+	P9PROTO_DMSOCKET = 0x00100000,	/* permission bit for socket (9P2000.u) */
+	P9PROTO_DMSETUID = 0x00080000,	/* permission bit for setuid (9P2000.u) */
+	P9PROTO_DMSETGID = 0x00040000,	/* permission bit for setgid (9P2000.u) */
+	P9PROTO_DMSETVTX = 0x00010000,	/* permission bit for sticky bit (9P2000.u) */
+};
+
+/*
+ * QID types - they are primarly used to
+ * differentiate semantics for a file system
+ */
+enum p9_qid_t {
+	P9PROTO_QTDIR = 0x80,		/* directory */
+	P9PROTO_QTAPPEND = 0x40,	/* append-only */
+	P9PROTO_QTEXCL = 0x20,		/* exclusive use (only one open handle allowed)*/
+	P9PROTO_QTMOUNT = 0x10,		/* mount points */
+	P9PROTO_QTAUTH = 0x08,		/* authentication file */
+	P9PROTO_QTTMP = 0x04,		/* non-backed-up files */
+	P9PROTO_QTSYMLINK = 0x02,	/* symbolic links */
+	P9PROTO_QTLINK = 0x01,		/* hard link */
+	P9PROTO_QTFILE = 0x00,		/* normal files */
+};
+
+/* P9 Magic Numbers */
+#define P9PROTO_NOFID	(uint32_t)(~0)
+#define P9_DEFUNAME	"nobody"
+#define P9_DEFANAME	""
+#define P9_NONUNAME	(uint32_t)(~0)
+#define P9_MAXWELEM	16
+
+/* Exchange unit between Qemu and Client */
+struct p9_qid {
+	uint8_t type;		/* the type of the file */
+	uint32_t version;	/* version number for given path */
+	uint64_t path;		/* the file servers unique id for file */
+};
+
+/* FS information stat structure */
+struct p9_statfs {
+	uint32_t type;		/* type of file system */
+	uint32_t bsize;		/* optimal transfer block size */
+	uint64_t blocks;	/* total data blocks in file system */
+	uint64_t bfree;		/* free blocks in fs */
+	uint64_t bavail;	/* free blocks avail to non-superuser */
+	uint64_t files;		/* total file nodes in file system */
+	uint64_t ffree;		/* free file nodes in fs */
+	uint64_t fsid;		/* file system id */
+	uint32_t namelen;	/* maximum length of filenames */
+};
+
+
+/* File system metadata information */
+struct p9_wstat {
+	uint16_t size;		/* total byte count of the following data */
+	uint16_t type;		/* type of file */
+	uint32_t dev;		/* id of device containing file */
+	struct p9_qid qid;	/* identifier used by server for file system entity information */
+	uint32_t mode;		/* protection */
+	uint32_t atime;		/* time of last access */
+	uint32_t mtime;		/* time of last modification */
+	uint64_t length;	/* length of file in bytes */
+	char *name;		/* file name */
+	char *uid;		/* user ID of owner */
+	char *gid;		/* group ID of owner */
+	char *muid;		/* name of the user who last modified the file */
+	char *extension;	/* 9p2000.u extensions */
+	uid_t n_uid;		/* 9p2000.u extensions */
+	gid_t n_gid;		/* 9p2000.u extensions */
+	uid_t n_muid;		/* 9p2000.u extensions */
+};
+
+/* The linux version of FS information stat structure*/
+struct p9_stat_dotl {
+	uint64_t st_result_mask;/* indicates fields that are requested */
+	struct p9_qid qid;	/* identifier used by server for file system entity information */
+	uint32_t st_mode;	/* protection */
+	uid_t st_uid;		/* user ID of owner */
+	gid_t st_gid;		/* group ID of owner */
+	uint64_t st_nlink;	/* number of hard links */
+	uint64_t st_rdev;	/* device ID (if special file) */
+	uint64_t st_size;	/* total size, in bytes */
+	uint64_t st_blksize;	/* blocksize for file system I/O */
+	uint64_t st_blocks;	/* number of 512B blocks allocated */
+	uint64_t st_atime_sec;	/* time of last access, seconds */
+	uint64_t st_atime_nsec;	/* time of last access, nanoseconds */
+	uint64_t st_mtime_sec;	/* time of last modification, seconds */
+	uint64_t st_mtime_nsec;	/* time of last modifictaion, nanoseconds */
+	uint64_t st_ctime_sec;	/* time of last status change, seconds*/
+	uint64_t st_ctime_nsec;	/* time of last status change, nanoseconds*/
+	uint64_t st_btime_sec;	/* following memebers are reserved for future use */
+	uint64_t st_btime_nsec;
+	uint64_t st_gen;
+	uint64_t st_data_version;
+};
+
+/* P9 inode attribute for setattr */
+struct p9_iattr_dotl {
+	uint32_t valid;		/* bit fields specifying which fields are valid */
+	uint32_t mode;		/* protection */
+	uid_t uid;		/* user id of owner */
+	gid_t gid;		/* group id */
+	uint64_t size;		/* file size */
+	uint64_t atime_sec;	/* last access time in seconds */
+	uint64_t atime_nsec;	/* last access time in nanoseconds */
+	uint64_t mtime_sec;	/* last modification time in seconds */
+	uint64_t mtime_nsec;	/* last modification time in nanoseconds */
+};
+
+#define P9PROTO_STATS_MODE		0x00000001ULL
+#define P9PROTO_STATS_NLINK		0x00000002ULL
+#define P9PROTO_STATS_UID		0x00000004ULL
+#define P9PROTO_STATS_GID		0x00000008ULL
+#define P9PROTO_STATS_RDEV		0x00000010ULL
+#define P9PROTO_STATS_ATIME		0x00000020ULL
+#define P9PROTO_STATS_MTIME		0x00000040ULL
+#define P9PROTO_STATS_CTIME		0x00000080ULL
+#define P9PROTO_STATS_INO		0x00000100ULL
+#define P9PROTO_STATS_SIZE		0x00000200ULL
+#define P9PROTO_STATS_BLOCKS		0x00000400ULL
+
+#define P9PROTO_STATS_BTIME		0x00000800ULL
+#define P9PROTO_STATS_GEN		0x00001000ULL
+#define P9PROTO_STATS_DATA_VERSION	0x00002000ULL
+
+#define P9PROTO_STATS_BASIC		0x000007ffULL /* Mask for fields up to BLOCKS */
+#define P9PROTO_STATS_ALL		0x00003fffULL /* Mask for All fields above */
+
+#define P9PROTO_SETATTR_MODE		0x00000001UL
+#define P9PROTO_SETATTR_UID		0x00000002UL
+#define P9PROTO_SETATTR_GID		0x00000004UL
+#define P9PROTO_SETATTR_SIZE		0x00000008UL
+#define P9PROTO_SETATTR_ATIME		0x00000010UL
+#define P9PROTO_SETATTR_MTIME		0x00000020UL
+#define P9PROTO_SETATTR_CTIME		0x00000040UL
+#define P9PROTO_SETATTR_ATIME_SET	0x00000080UL
+#define P9PROTO_SETATTR_MTIME_SET	0x00000100UL
+#define P9PROTO_SETATTR_MASK		0x000001bfUL
+
+#define P9PROTO_TGETATTR_BLK		512
+
+#define	P9PROTO_UNLINKAT_REMOVEDIR	0x200
+
+/* PDU buffer used for SG lists. */
+struct p9_buffer {
+	uint32_t size;
+	uint16_t tag;
+	uint8_t id;
+	size_t offset;
+	size_t capacity;
+	uint8_t *sdata;
+};
+
+#endif /* FS_P9FS_P9_PROTOCOL_H */
diff --git a/sys/fs/p9fs/p9_transport.c b/sys/fs/p9fs/p9_transport.c
new file mode 100644
index 000000000000..c82d81fedcd7
--- /dev/null
+++ b/sys/fs/p9fs/p9_transport.c
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2022-present Doug Rabson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/kassert.h>
+#include <sys/libkern.h>
+
+#include <fs/p9fs/p9_transport.h>
+
+TAILQ_HEAD(, p9_trans_module) transports;
+
+static void
+p9_transport_init(void)
+{
+
+        TAILQ_INIT(&transports);
+}
+
+SYSINIT(p9_transport, SI_SUB_DRIVERS, SI_ORDER_FIRST, p9_transport_init, NULL);
+
+void
+p9_register_trans(struct p9_trans_module *m)
+{
+
+        TAILQ_INSERT_TAIL(&transports, m, link);
+}
+        
+void
+p9_unregister_trans(struct p9_trans_module *m)
+{
+
+        TAILQ_REMOVE(&transports, m, link);
+}
+
+struct p9_trans_module *
+p9_get_trans_by_name(char *name)
+{
+        struct p9_trans_module *m;
+
+        TAILQ_FOREACH(m, &transports, link) {
+                if (strcmp(m->name, name) == 0)
+                        return (m);
+        }
+        return (NULL);
+}
+
diff --git a/sys/fs/p9fs/p9_transport.h b/sys/fs/p9fs/p9_transport.h
new file mode 100644
index 000000000000..143c29f2382e
--- /dev/null
+++ b/sys/fs/p9fs/p9_transport.h
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* Transport definitions */
+#ifndef FS_P9FS_P9_TRANSPORT_H
+#define FS_P9FS_P9_TRANSPORT_H
+
+#include <sys/queue.h>
+
+struct p9_req_t;
+
+/* Tranport module interface */
+struct p9_trans_module {
+	TAILQ_ENTRY(p9_trans_module) link;
+	char *name;			/* name of transport */
+	/* member function to create a new conection on this transport*/
+	int (*create)(const char *mount_tag, void **handlep);
+	/* member function to terminate a connection on this transport */
+	void (*close) (void *handle);
+	/* member function to issue a request to the transport*/
+	int (*request) (void *handle, struct p9_req_t *req);
+	/* member function to cancel a request if it has been sent */
+	int (*cancel) (void *handle, struct p9_req_t *req);
+};
+
+void p9_register_trans(struct p9_trans_module *m);
+void p9_unregister_trans(struct p9_trans_module *m);
+struct p9_trans_module *p9_get_trans_by_name(char *s);
+
+#endif /* FS_P9FS_P9_TRANSPORT_H */
diff --git a/sys/fs/p9fs/p9fs.h b/sys/fs/p9fs/p9fs.h
new file mode 100644
index 000000000000..a270d8b5ce5f
--- /dev/null
+++ b/sys/fs/p9fs/p9fs.h
@@ -0,0 +1,203 @@
+/*-
+ * Copyright (c) 2017-2020 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* This file has prototypes specific to the p9fs file system */
+
+#ifndef FS_P9FS_P9FS_H
+#define FS_P9FS_P9FS_H
+
+struct p9fs_session;
+
+/* QID: Unique identification for the file being accessed */
+struct p9fs_qid {
+	uint8_t qid_mode;	/* file mode specifiying file type */
+	uint32_t qid_version;	/* version of the file */
+	uint64_t qid_path;	/* unique integer among all files in hierarchy */
+};
+
+/*
+ * The in memory representation of the on disk inode. Save the current
+ * fields to write it back later.
+ */
+struct p9fs_inode {
+        /* Make it simple first, Add more fields later */
+	uint64_t i_size;	/* size of the inode */
+	uint16_t i_type;	/* type of inode */
+	uint32_t i_dev;		/* type of device */
+	uint32_t i_mode;	/* mode of the inode */
+	uint32_t i_atime;	/* time of last access */
+	uint32_t i_mtime;	/* time of last modification */
+	uint32_t i_ctime;	/* time of last status change */
+	uint32_t i_atime_nsec;	/* times of last access in nanoseconds resolution */
+	uint32_t i_mtime_nsec;	/* time of last modification in nanoseconds resolution */
+	uint32_t i_ctime_nsec;	/* time of last status change in nanoseconds resolution */
+	uint64_t i_length;
+	char *i_name;		/* inode name */
+	char *i_uid;		/* inode user id */
+	char *i_gid;		/* inode group id */
+	char *i_muid;
+	char *i_extension;       /* 9p2000.u extensions */
+	uid_t n_uid;            /* 9p2000.u extensions */
+	gid_t n_gid;            /* 9p2000.u extensions */
+	uid_t n_muid;           /* 9p2000.u extensions */
+	/* bookkeeping info on the client. */
+	uint16_t i_links_count;  /*number of references to the inode*/
+	uint64_t i_qid_path;    /* using inode number for reference. */
+	uint64_t i_flags;
+	uint64_t blksize;	/* block size for file system */
+	uint64_t blocks;	/* number of 512B blocks allocated */
+	uint64_t gen;		/* reserved for future use */
+	uint64_t data_version;	/* reserved for future use */
+
+};
+
+#define P9FS_VFID_MTX(_sc) (&(_sc)->vfid_mtx)
+#define P9FS_VFID_LOCK(_sc) mtx_lock(P9FS_VFID_MTX(_sc))
+#define P9FS_VFID_UNLOCK(_sc) mtx_unlock(P9FS_VFID_MTX(_sc))
+#define P9FS_VFID_LOCK_INIT(_sc) mtx_init(P9FS_VFID_MTX(_sc), \
+    "VFID List lock", NULL, MTX_DEF)
+#define P9FS_VFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VFID_MTX(_sc))
+
+#define P9FS_VOFID_MTX(_sc) (&(_sc)->vofid_mtx)
+#define P9FS_VOFID_LOCK(_sc) mtx_lock(P9FS_VOFID_MTX(_sc))
+#define P9FS_VOFID_UNLOCK(_sc) mtx_unlock(P9FS_VOFID_MTX(_sc))
+#define P9FS_VOFID_LOCK_INIT(_sc) mtx_init(P9FS_VOFID_MTX(_sc), \
+    "VOFID List lock", NULL, MTX_DEF)
+#define P9FS_VOFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VOFID_MTX(_sc))
+
+#define VFID	0x01
+#define VOFID	0x02
+
+/* A Plan9 node. */
+struct p9fs_node {
+	STAILQ_HEAD( ,p9_fid) vfid_list;	/* vfid related to uid */
+	struct mtx vfid_mtx;			/* mutex for vfid list */
+	STAILQ_HEAD( ,p9_fid) vofid_list;	/* vofid related to uid */
+	struct mtx vofid_mtx;			/* mutex for vofid list */
+	struct p9fs_node *parent;		/* pointer to parent p9fs node */
+	struct p9fs_qid vqid;			/* the server qid, will be from the host */
+	struct vnode *v_node;			/* vnode for this fs_node. */
+	struct p9fs_inode inode;		/* in memory representation of ondisk information*/
+	struct p9fs_session *p9fs_ses;	/*  Session_ptr for this node */
+	STAILQ_ENTRY(p9fs_node) p9fs_node_next;
+	uint64_t flags;
+};
+
+#define P9FS_VTON(vp) ((struct p9fs_node *)(vp)->v_data)
+#define P9FS_NTOV(node) ((node)->v_node)
+#define	VFSTOP9(mp) ((struct p9fs_mount *)(mp)->mnt_data)
+#define QEMU_DIRENTRY_SZ	25
+#define P9FS_NODE_MODIFIED	0x1  /* indicating file change */
+#define P9FS_ROOT		0x2  /* indicating root p9fs node */
+#define P9FS_NODE_DELETED	0x4  /* indicating file or directory delete */
+#define P9FS_NODE_IN_SESSION	0x8  /* p9fs_node is in the session - virt_node_list */
+#define IS_ROOT(node)	(node->flags & P9FS_ROOT)
+
+#define P9FS_SET_LINKS(inode) do {	\
+	(inode)->i_links_count = 1;	\
+} while (0)				\
+
+#define P9FS_INCR_LINKS(inode) do {	\
+	(inode)->i_links_count++;	\
+} while (0)				\
+
+#define P9FS_DECR_LINKS(inode) do {	\
+	(inode)->i_links_count--;	\
+} while (0)				\
+
+#define P9FS_CLR_LINKS(inode) do {	\
+	(inode)->i_links_count = 0;	\
+} while (0)				\
+
+#define P9FS_MTX(_sc) (&(_sc)->p9fs_mtx)
+#define P9FS_LOCK(_sc) mtx_lock(P9FS_MTX(_sc))
+#define P9FS_UNLOCK(_sc) mtx_unlock(P9FS_MTX(_sc))
+#define P9FS_LOCK_INIT(_sc) mtx_init(P9FS_MTX(_sc), \
+    "P9FS session chain lock", NULL, MTX_DEF)
+#define P9FS_LOCK_DESTROY(_sc) mtx_destroy(P9FS_MTX(_sc))
+
+/* Session structure for the FS */
+struct p9fs_session {
+	unsigned char flags;				/* these flags for the session */
+	struct mount *p9fs_mount;			/* mount point */
+	struct p9fs_node rnp;				/* root p9fs node for this session */
+	uid_t uid;					/* the uid that has access */
+	const char *uname;				/* user name to mount as */
+	const char *aname;				/* name of remote file tree being mounted */
+	struct p9_client *clnt;				/* 9p client */
+	struct mtx p9fs_mtx;				/* mutex used for guarding the chain.*/
+	STAILQ_HEAD( ,p9fs_node) virt_node_list;	/* list of p9fs nodes in this session*/
+	struct p9_fid *mnt_fid;				/* to save nobody 's fid for unmounting as root user */
+};
+
+struct p9fs_mount {
+	struct p9fs_session p9fs_session;		/* per instance session information */
+	struct mount *p9fs_mountp;			/* mount point */
+	int mount_tag_len;				/* length of the mount tag */
+	char *mount_tag;				/* mount tag used */
+};
+
+/* All session flags based on 9p versions  */
+enum virt_session_flags {
+	P9FS_PROTO_2000U	= 0x01,
+	P9FS_PROTO_2000L	= 0x02,
+};
+
+/* Session access flags */
+#define P9_ACCESS_ANY		0x04	/* single attach for all users */
+#define P9_ACCESS_SINGLE	0x08	/* access to only the user who mounts */
+#define P9_ACCESS_USER		0x10	/* new attach established for every user */
+#define P9_ACCESS_MASK	(P9_ACCESS_ANY|P9_ACCESS_SINGLE|P9_ACCESS_USER)
+
+u_quad_t p9fs_round_filesize_to_bytes(uint64_t filesize, uint64_t bsize);
+u_quad_t p9fs_pow2_filesize_to_bytes(uint64_t filesize, uint64_t bsize);
+
+/* These are all the P9FS specific vops */
+int p9fs_stat_vnode_l(void);
+int p9fs_stat_vnode_dotl(struct p9_stat_dotl *st, struct vnode *vp);
+int p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred);
+int p9fs_proto_dotl(struct p9fs_session *vses);
+struct p9_fid *p9fs_init_session(struct mount *mp, int *error);
+void p9fs_close_session(struct mount *mp);
+void p9fs_prepare_to_close(struct mount *mp);
+void p9fs_complete_close(struct mount *mp);
+int p9fs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp);
+int p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
+    struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
+    char *name);
+int p9fs_node_cmp(struct vnode *vp, void *arg);
+void p9fs_destroy_node(struct p9fs_node **npp);
+void p9fs_dispose_node(struct p9fs_node **npp);
+void p9fs_cleanup(struct p9fs_node *vp);
+void p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids);
+void p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *vfid,
+    int fid_type);
+void p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid,
+    int fid_type);
+struct p9_fid *p9fs_get_fid(struct p9_client *clnt,
+    struct p9fs_node *np, struct ucred *cred, int fid_type, int mode, int *error);
+
+#endif /* FS_P9FS_P9FS_H */
diff --git a/sys/fs/p9fs/p9fs_proto.h b/sys/fs/p9fs/p9fs_proto.h
new file mode 100644
index 000000000000..d78caa686f36
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_proto.h
@@ -0,0 +1,42 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+/*
+ * Plan9 filesystem (9P2000.u) protocol definitions.
+ */
+
+#ifndef	FS_P9FS_P9FS_PROTO_H
+#define	FS_P9FS_P9FS_PROTO_H
+
+//#include <dev/virtio/virtio_fs_9p.h>
+
+/* File permissions */
+#define	P9FS_OREAD	0
+#define	P9FS_OWRITE	1
+#define	P9FS_ORDWR	2
+#define	P9FS_OEXEC	3
+#define	P9FS_OTRUNC	0x10
+
+#endif /* FS_P9FS_P9FS_PROTO_H */
diff --git a/sys/fs/p9fs/p9fs_subr.c b/sys/fs/p9fs/p9fs_subr.c
new file mode 100644
index 000000000000..d0f04f6c5e97
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_subr.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+/*-
+ * 9P filesystem subroutines. This file consists of all the Non VFS subroutines.
+ * It contains all of the functions related to the driver submission which form
+ * the upper layer i.e, p9fs driver. This will interact with the client to make
+ * sure we have correct API calls in the header.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/limits.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include "p9fs_proto.h"
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_protocol.h>
+#include <fs/p9fs/p9fs.h>
+
+int
+p9fs_proto_dotl(struct p9fs_session *vses)
+{
+
+	return (vses->flags & P9FS_PROTO_2000L);
+}
+
+/* Initialize a p9fs session */
+struct p9_fid *
+p9fs_init_session(struct mount *mp, int *error)
+{
+	struct p9fs_session *vses;
+	struct p9fs_mount *virtmp;
+	struct p9_fid *fid;
+	char *access;
+
+	virtmp = VFSTOP9(mp);
+	vses = &virtmp->p9fs_session;
+	vses->uid = P9_NONUNAME;
+	vses->uname = P9_DEFUNAME;
+	vses->aname = P9_DEFANAME;
+
+	/*
+	 * Create the client structure. Call into the driver to create
+	 * driver structures for the actual IO transfer.
+	 */
+	vses->clnt = p9_client_create(mp, error, virtmp->mount_tag);
+
+	if (vses->clnt == NULL) {
+		P9_DEBUG(ERROR, "%s: p9_client_create failed\n", __func__);
+		return (NULL);
+	}
+	/*
+	 * Find the client version and cache the copy. We will use this copy
+	 * throughout FS layer.
+	 */
+	if (p9_is_proto_dotl(vses->clnt))
+		vses->flags |= P9FS_PROTO_2000L;
+	else if (p9_is_proto_dotu(vses->clnt))
+		vses->flags |= P9FS_PROTO_2000U;
+
+	/* Set the access mode */
+	access = vfs_getopts(mp->mnt_optnew, "access", error);
+	if (access == NULL)
+		vses->flags |= P9_ACCESS_USER;
+	else if (!strcmp(access, "any"))
+		vses->flags |= P9_ACCESS_ANY;
+	else if (!strcmp(access, "single"))
+		vses->flags |= P9_ACCESS_SINGLE;
+	else if (!strcmp(access, "user"))
+		vses->flags |= P9_ACCESS_USER;
+	else {
+		P9_DEBUG(ERROR, "%s: unknown access mode\n", __func__);
+		*error = EINVAL;
+		goto out;
+	}
+
+	*error = 0;
+	/* Attach with the backend host*/
+	fid = p9_client_attach(vses->clnt, NULL, vses->uname, P9_NONUNAME,
+	    vses->aname, error);
+	vses->mnt_fid = fid;
+
+	if (*error != 0) {
+		P9_DEBUG(ERROR, "%s: attach failed: %d\n", __func__, *error);
+		goto out;
+	}
+	P9_DEBUG(SUBR, "%s: attach successful fid :%p\n", __func__, fid);
+	fid->uid = vses->uid;
+
+	/* initialize the node list for the session */
+	STAILQ_INIT(&vses->virt_node_list);
+	P9FS_LOCK_INIT(vses);
+
+	P9_DEBUG(SUBR, "%s: INIT session successful\n", __func__);
+
+	return (fid);
+out:
+	p9_client_destroy(vses->clnt);
+	return (NULL);
+}
+
+/* Begin to terminate a session */
+void
+p9fs_prepare_to_close(struct mount *mp)
+{
+	struct p9fs_session *vses;
+	struct p9fs_mount *vmp;
+	struct p9fs_node *np, *pnp, *tmp;
+
+	vmp = VFSTOP9(mp);
+	vses = &vmp->p9fs_session;
+
+	/* break the node->parent references */
+	STAILQ_FOREACH_SAFE(np, &vses->virt_node_list, p9fs_node_next, tmp) {
+		if (np->parent && np->parent != np) {
+			pnp = np->parent;
+			np->parent = NULL;
+			vrele(P9FS_NTOV(pnp));
+		}
+	}
+
+	/* We are about to teardown, we dont allow anything other than clunk after this.*/
+	p9_client_begin_disconnect(vses->clnt);
+}
+
+/* Shutdown a session */
+void
+p9fs_complete_close(struct mount *mp)
+{
+	struct p9fs_session *vses;
+	struct p9fs_mount *vmp;
+
+	vmp = VFSTOP9(mp);
+	vses = &vmp->p9fs_session;
+
+	/* Finish the close*/
+	p9_client_disconnect(vses->clnt);
+}
+
+
+/* Call from unmount. Close the session. */
+void
+p9fs_close_session(struct mount *mp)
+{
+	struct p9fs_session *vses;
+	struct p9fs_mount *vmp;
+
+	vmp = VFSTOP9(mp);
+	vses = &vmp->p9fs_session;
+
+	p9fs_complete_close(mp);
+	/* Clean up the clnt structure. */
+	p9_client_destroy(vses->clnt);
+	P9FS_LOCK_DESTROY(vses);
+	P9_DEBUG(SUBR, "%s: Clean close session .\n", __func__);
+}
+
+/*
+ * Remove all the fids of a particular type from a p9fs node
+ * as well as destroy/clunk them.
+ */
+void
+p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids)
+{
+	struct p9_fid *fid, *tfid;
+
+	STAILQ_FOREACH_SAFE(fid, &np->vfid_list, fid_next, tfid) {
+		STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next);
+		p9_client_clunk(fid);
+	}
+
+	if (!leave_ofids) {
+		STAILQ_FOREACH_SAFE(fid, &np->vofid_list, fid_next, tfid) {
+			STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next);
+			p9_client_clunk(fid);
+		}
+	}
+}
+
+
+/* Remove a fid from its corresponding fid list */
+void
+p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *fid, int fid_type)
+{
+
+	switch (fid_type) {
+	case VFID:
+		P9FS_VFID_LOCK(np);
+		STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next);
+		P9FS_VFID_UNLOCK(np);
+		break;
+	case VOFID:
+		P9FS_VOFID_LOCK(np);
+		STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next);
+		P9FS_VOFID_UNLOCK(np);
+		break;
+	}
+}
+
+/* Add a fid to the corresponding fid list */
+void
+p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid, int fid_type)
+{
+
+	switch (fid_type) {
+	case VFID:
+		P9FS_VFID_LOCK(np);
+		STAILQ_INSERT_TAIL(&np->vfid_list, fid, fid_next);
+		P9FS_VFID_UNLOCK(np);
+		break;
+	case VOFID:
+		P9FS_VOFID_LOCK(np);
+		STAILQ_INSERT_TAIL(&np->vofid_list, fid, fid_next);
+		P9FS_VOFID_UNLOCK(np);
+		break;
+	}
+}
+
+/* Build the path from root to current directory */
+static int
+p9fs_get_full_path(struct p9fs_node *np, char ***names)
+{
+	int i, n;
+	struct p9fs_node *node;
+	char **wnames;
+
+	n = 0;
+	for (node = np ; (node != NULL) && !IS_ROOT(node) ; node = node->parent)
+		n++;
+
+	if (node == NULL)
+		return (0);
+
+	wnames = malloc(n * sizeof(char *), M_TEMP, M_ZERO|M_WAITOK);
+
+	for (i = n-1, node = np; i >= 0 ; i--, node = node->parent)
+		wnames[i] = node->inode.i_name;
+
+	*names = wnames;
+	return (n);
+}
+
+/*
+ * Return TRUE if this fid can be used for the requested mode.
+ */
+static int
+p9fs_compatible_mode(struct p9_fid *fid, int mode)
+{
+	/*
+	 * Return TRUE for an exact match. For OREAD and OWRITE, allow
+	 * existing ORDWR fids to match. Only check the low two bits
+	 * of mode.
+	 *
+	 * TODO: figure out if this is correct for O_APPEND
+	 */
+	int fid_mode = fid->mode & 3;
+	if (fid_mode == mode)
+		return (TRUE);
+	if (fid_mode == P9PROTO_ORDWR)
+		return (mode == P9PROTO_OREAD || mode == P9PROTO_OWRITE);
+	return (FALSE);
+}
+
+/*
+ * Retrieve fid structure corresponding to a particular
+ * uid and fid type for a p9fs node
+ */
+static struct p9_fid *
+p9fs_get_fid_from_uid(struct p9fs_node *np, uid_t uid, int fid_type, int mode)
+{
+	struct p9_fid *fid;
+
+	switch (fid_type) {
+	case VFID:
+		P9FS_VFID_LOCK(np);
+		STAILQ_FOREACH(fid, &np->vfid_list, fid_next) {
+			if (fid->uid == uid) {
+				P9FS_VFID_UNLOCK(np);
+				return (fid);
+			}
+		}
+		P9FS_VFID_UNLOCK(np);
+		break;
+	case VOFID:
+		P9FS_VOFID_LOCK(np);
+		STAILQ_FOREACH(fid, &np->vofid_list, fid_next) {
+			if (fid->uid == uid && p9fs_compatible_mode(fid, mode)) {
+				P9FS_VOFID_UNLOCK(np);
+				return (fid);
+			}
+		}
+		P9FS_VOFID_UNLOCK(np);
+		break;
+	}
+
+	return (NULL);
+}
+
+/*
+ * Function returns the fid sturcture for a file corresponding to current user id.
+ * First it searches in the fid list of the corresponding p9fs node.
+ * New fid will be created if not already present and added in the corresponding
+ * fid list in the p9fs node.
+ * If the user is not already attached then this will attach the user first
+ * and then create a new fid for this particular file by doing dir walk.
+ */
+struct p9_fid *
+p9fs_get_fid(struct p9_client *clnt, struct p9fs_node *np, struct ucred *cred,
+    int fid_type, int mode, int *error)
+{
+	uid_t uid;
+	struct p9_fid *fid, *oldfid;
+	struct p9fs_node *root;
+	struct p9fs_session *vses;
+	int i, l, clone;
+	char **wnames = NULL;
+	uint16_t nwnames;
+
+	oldfid = NULL;
+	vses = np->p9fs_ses;
+
+	if (vses->flags & P9_ACCESS_ANY)
+		uid = vses->uid;
+	else if (cred)
+		uid = cred->cr_uid;
+	else
+		uid = 0;
+
+	/*
+	 * Search for the fid in corresponding fid list.
+	 * We should return NULL for VOFID if it is not present in the list.
+	 * Because VOFID should have been created during the file open.
+	 * If VFID is not present in the list then we should create one.
+	 */
+	fid = p9fs_get_fid_from_uid(np, uid, fid_type, mode);
+	if (fid != NULL || fid_type == VOFID)
+		return (fid);
+
+	/* Check root if the user is attached */
+	root = &np->p9fs_ses->rnp;
+	fid = p9fs_get_fid_from_uid(root, uid, fid_type, mode);
+	if(fid == NULL) {
+		/* Attach the user */
+		fid = p9_client_attach(clnt, NULL, NULL, uid,
+		    vses->aname, error);
+		if (*error != 0)
+			return (NULL);
+		p9fs_fid_add(root, fid, fid_type);
+	}
+
+	/* If we are looking for root then return it */
+	if (IS_ROOT(np))
+		return (fid);
+
+	/* Get full path from root to p9fs node */
+	nwnames = p9fs_get_full_path(np, &wnames);
+
+	/*
+	 * Could not get full path.
+	 * If p9fs node is not deleted, parent should exist.
+	 */
+	KASSERT(nwnames != 0, ("%s: Directory of %s doesn't exist", __func__, np->inode.i_name));
+
+	clone = 1;
+	i = 0;
+	while (i < nwnames) {
+		l = MIN(nwnames - i, P9_MAXWELEM);
+
+		fid = p9_client_walk(fid, l, wnames, clone, error);
+		if (*error != 0) {
+			if (oldfid)
+				p9_client_clunk(oldfid);
+			fid = NULL;
+			goto bail_out;
+		}
+		oldfid = fid;
+		clone = 0;
+		i += l ;
+	}
+	p9fs_fid_add(np, fid, fid_type);
+bail_out:
+	free(wnames, M_TEMP);
+	return (fid);
+}
diff --git a/sys/fs/p9fs/p9fs_vfsops.c b/sys/fs/p9fs/p9fs_vfsops.c
new file mode 100644
index 000000000000..3451bc052187
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_vfsops.c
@@ -0,0 +1,610 @@
+/*-
+ * Copyright (c) 2017-2020 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file consists of all the VFS interactions of VFS ops which include
+ * mount, unmount, initilaize etc. for p9fs.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/fnv_hash.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <vm/uma.h>
+
+#include <fs/p9fs/p9fs_proto.h>
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9fs.h>
+
+SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "Plan 9 filesystem");
+
+/* This count is static now. Can be made tunable later */
+#define P9FS_FLUSH_RETRIES 10
+
+static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
+static uma_zone_t p9fs_node_zone;
+uma_zone_t p9fs_io_buffer_zone;
+uma_zone_t p9fs_getattr_zone;
+uma_zone_t p9fs_setattr_zone;
+uma_zone_t p9fs_pbuf_zone;
+extern struct vop_vector p9fs_vnops;
+
+/* option parsing */
+static const char *p9fs_opts[] = {
+        "from", "trans", "access", NULL
+};
+
+/* Dispose p9fs node, freeing it to the UMA zone */
+void
+p9fs_dispose_node(struct p9fs_node **npp)
+{
+	struct p9fs_node *node;
+	struct vnode *vp;
+
+	node = *npp;
+
+	if (node == NULL)
+		return;
+
+	if (node->parent && node->parent != node) {
+		vrele(P9FS_NTOV(node->parent));
+	}
+
+	P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
+
+	vp = P9FS_NTOV(node);
+	vp->v_data = NULL;
+
+	/* Free our associated memory */
+	if (!(vp->v_vflag & VV_ROOT)) {
+		free(node->inode.i_name, M_TEMP);
+		uma_zfree(p9fs_node_zone, node);
+	}
+
+	*npp = NULL;
+}
+
+/* Initialize memory allocation */
+static int
+p9fs_init(struct vfsconf *vfsp)
+{
+
+	p9fs_node_zone = uma_zcreate("p9fs node zone",
+	    sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+	/* Create the getattr_dotl zone */
+	p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
+	    sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+	/* Create the setattr_dotl zone */
+	p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
+	    sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+	/* Create the putpages zone */
+	p9fs_pbuf_zone = pbuf_zsecond_create("p9fs pbuf zone", nswbuf / 2);
+
+	/*
+	 * Create the io_buffer zone pool to keep things simpler in case of
+	 * multiple threads. Each thread works with its own so there is no
+	 * contention.
+	 */
+	p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
+	    P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+	return (0);
+}
+
+/* Destroy all the allocated memory */
+static int
+p9fs_uninit(struct vfsconf *vfsp)
+{
+
+	uma_zdestroy(p9fs_node_zone);
+	uma_zdestroy(p9fs_io_buffer_zone);
+	uma_zdestroy(p9fs_getattr_zone);
+	uma_zdestroy(p9fs_setattr_zone);
+	uma_zdestroy(p9fs_pbuf_zone);
+
+	return (0);
+}
+
+/* Function to umount p9fs */
+static int
+p9fs_unmount(struct mount *mp, int mntflags)
+{
+	struct p9fs_mount *vmp;
+	struct p9fs_session *vses;
+	int error, flags, i;
+
+	error = 0;
+	flags = 0;
+	vmp = VFSTOP9(mp);
+	if (vmp == NULL)
+		return (0);
+
+	vses = &vmp->p9fs_session;
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	p9fs_prepare_to_close(mp);
+	for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
+
+		/* Flush everything on this mount point.*/
+		error = vflush(mp, 1, flags, curthread);
+
+		if (error == 0 || (mntflags & MNT_FORCE) == 0)
+			break;
+		/* Sleep until interrupted or 1 tick expires. */
+		error = tsleep(&error, PSOCK, "p9unmnt", 1);
+		if (error == EINTR)
+			break;
+		error = EBUSY;
+	}
+
+	if (error != 0)
+		goto out;
+	p9fs_close_session(mp);
+	/* Cleanup the mount structure. */
+	free(vmp, M_P9MNT);
+	mp->mnt_data = NULL;
+	return (error);
+out:
+	/* Restore the flag in case of error */
+	vses->clnt->trans_status = P9FS_CONNECT;
+	return (error);
+}
+
+/*
+ * Compare qid stored in p9fs node
+ * Return 1 if does not match otherwise return 0
+ */
+int
+p9fs_node_cmp(struct vnode *vp, void *arg)
+{
+	struct p9fs_node *np;
+	struct p9_qid *qid;
+
+	np = vp->v_data;
+	qid = (struct p9_qid *)arg;
+
+	if (np == NULL)
+		return (1);
+
+	if (np->vqid.qid_path == qid->path) {
+		if (vp->v_vflag & VV_ROOT)
+			return (0);
+		else if (np->vqid.qid_mode == qid->type &&
+			    np->vqid.qid_version == qid->version)
+			return (0);
+	}
+
+	return (1);
+}
+
+/*
+ * Cleanup p9fs node
+ *  - Destroy the FID LIST locks
+ *  - Dispose all node knowledge
+ */
+void
+p9fs_destroy_node(struct p9fs_node **npp)
+{
+	struct p9fs_node *np;
+
+	np = *npp;
+
+	if (np == NULL)
+		return;
+
+	/* Destroy the FID LIST locks */
+	P9FS_VFID_LOCK_DESTROY(np);
+	P9FS_VOFID_LOCK_DESTROY(np);
+
+	/* Dispose all node knowledge.*/
+	p9fs_dispose_node(&np);
+}
+
+/*
+ * Common code used across p9fs to return vnode for the file represented
+ * by the fid.
+ * Lookup for the vnode in hash_list. This lookup is based on the qid path
+ * which is unique to a file. p9fs_node_cmp is called in this lookup process.
+ * I. If the vnode we are looking for is found in the hash list
+ *    1. Check if the vnode is a valid vnode by reloading its stats
+ *       a. if the reloading of the vnode stats returns error then remove the
+ *          vnode from hash list and return
+ *       b. If reloading of vnode stats returns without any error then, clunk the
+ *          new fid which was created for the vnode as we know that the vnode
+ *          already has a fid associated with it and return the vnode.
+ *          This is to avoid fid leaks
+ * II. If vnode is not found in the hash list then, create new vnode, p9fs
+ *     node and return the vnode
+ */
+int
+p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
+    struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
+    char *name)
+{
+	struct p9fs_mount *vmp;
+	struct p9fs_session *vses;
+	struct vnode *vp;
+	struct p9fs_node *node;
+	struct thread *td;
+	uint32_t hash;
+	int error, error_reload = 0;
+	struct p9fs_inode *inode;
+
+	td = curthread;
+	vmp = VFSTOP9(mp);
+	vses = &vmp->p9fs_session;
+
+	/* Look for vp in the hash_list */
+	hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
+	error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
+	    &fid->qid);
+	if (error != 0)
+		return (error);
+	else if (vp != NULL) {
+		if (vp->v_vflag & VV_ROOT) {
+			if (np == NULL)
+				p9_client_clunk(fid);
+			*vpp = vp;
+			return (0);
+		}
+		error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
+		if (error != 0) {
+			node = vp->v_data;
+			/* Remove stale vnode from hash list */
+			vfs_hash_remove(vp);
+			node->flags |= P9FS_NODE_DELETED;
+
+			vput(vp);
+			*vpp = NULLVP;
+			vp = NULL;
+		} else {
+			*vpp = vp;
+			/* Clunk the new fid if not root */
+			p9_client_clunk(fid);
+			return (0);
+		}
+	}
+
+	/*
+	 * We must promote to an exclusive lock for vnode creation.  This
+	 * can happen if lookup is passed LOCKSHARED.
+	 */
+	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
+		flags &= ~LK_TYPE_MASK;
+		flags |= LK_EXCLUSIVE;
+	}
+
+	/* Allocate a new vnode. */
+	if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
+		*vpp = NULLVP;
+		P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
+		return (error);
+	}
+
+	/* If we dont have it, create one. */
+	if (np == NULL) {
+		np =  uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
+		/* Initialize the VFID list */
+		P9FS_VFID_LOCK_INIT(np);
+		STAILQ_INIT(&np->vfid_list);
+		p9fs_fid_add(np, fid, VFID);
+
+		/* Initialize the VOFID list */
+		P9FS_VOFID_LOCK_INIT(np);
+		STAILQ_INIT(&np->vofid_list);
+
+		vref(P9FS_NTOV(parent));
+		np->parent = parent;
+		np->p9fs_ses = vses; /* Map the current session */
+		inode = &np->inode;
+		/*Fill the name of the file in inode */
+		inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
+		strlcpy(inode->i_name, name, strlen(name)+1);
+	} else {
+		vp->v_type = VDIR; /* root vp is a directory */
+		vp->v_vflag |= VV_ROOT;
+		vref(vp); /* Increment a reference on root vnode during mount */
+	}
+
+	vp->v_data = np;
+	np->v_node = vp;
+	inode = &np->inode;
+	inode->i_qid_path = fid->qid.path;
+	P9FS_SET_LINKS(inode);
+
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
+	if (vp->v_type != VFIFO)
+		VN_LOCK_ASHARE(vp);
+	error = insmntque(vp, mp);
+	if (error != 0) {
+		/*
+		 * vput(vp) is already called from insmntque_stddtr().
+		 * Just goto 'out' to dispose the node.
+		 */
+		goto out;
+	}
+
+	/* Init the vnode with the disk info*/
+	error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
+	if (error != 0) {
+		error_reload = 1;
+		goto out;
+	}
+
+	error = vfs_hash_insert(vp, hash, flags, td, vpp,
+	    p9fs_node_cmp, &fid->qid);
+	if (error != 0) {
+		goto out;
+	}
+
+	if (*vpp == NULL) {
+		P9FS_LOCK(vses);
+		STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
+		np->flags |= P9FS_NODE_IN_SESSION;
+		P9FS_UNLOCK(vses);
+
+		*vpp = vp;
+	} else {
+		/*
+		 * Returning matching vp found in hashlist.
+		 * So cleanup the np allocated above in this context.
+		 */
+		if (!IS_ROOT(np)) {
+			p9fs_destroy_node(&np);
+		}
+	}
+
+	return (0);
+out:
+	/* Something went wrong, dispose the node */
+	if (!IS_ROOT(np)) {
+		p9fs_destroy_node(&np);
+	}
+
+	if (error_reload) {
+		vput(vp);
+	}
+
+	*vpp = NULLVP;
+	return (error);
+}
+
+/* Main mount function for 9pfs */
+static int
+p9_mount(struct mount *mp)
+{
+	struct p9_fid *fid;
+	struct p9fs_mount *vmp;
+	struct p9fs_session *vses;
+	struct p9fs_node *p9fs_root;
+	int error;
+	char *from;
+	int len;
+
+	/* Verify the validity of mount options */
+	if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
+		return (EINVAL);
+
+	/* Extract NULL terminated mount tag from mount options */
+	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
+	if (error != 0 || from[len - 1] != '\0')
+		return (EINVAL);
+
+	/* Allocate and initialize the private mount structure. */
+	vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
+	mp->mnt_data = vmp;
+	vmp->p9fs_mountp = mp;
+	vmp->mount_tag = from;
+	vmp->mount_tag_len = len;
+	vses = &vmp->p9fs_session;
+	vses->p9fs_mount = mp;
+	p9fs_root = &vses->rnp;
+	/* Hardware iosize from the Qemu */
+	mp->mnt_iosize_max = PAGE_SIZE;
+	/*
+	 * Init the session for the p9fs root. This creates a new root fid and
+	 * attaches the client and server.
+	 */
+	fid = p9fs_init_session(mp, &error);
+	if (fid == NULL) {
+		goto out;
+	}
+
+	P9FS_VFID_LOCK_INIT(p9fs_root);
+	STAILQ_INIT(&p9fs_root->vfid_list);
+	p9fs_fid_add(p9fs_root, fid, VFID);
+	P9FS_VOFID_LOCK_INIT(p9fs_root);
+	STAILQ_INIT(&p9fs_root->vofid_list);
+	p9fs_root->parent = p9fs_root;
+	p9fs_root->flags |= P9FS_ROOT;
+	p9fs_root->p9fs_ses = vses;
+	vfs_getnewfsid(mp);
+	strlcpy(mp->mnt_stat.f_mntfromname, from,
+	    sizeof(mp->mnt_stat.f_mntfromname));
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
+	MNT_IUNLOCK(mp);
+	P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
+	/* Mount structures created. */
+
+	return (0);
+out:
+	P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
+	if (vmp != NULL) {
+		free(vmp, M_P9MNT);
+		mp->mnt_data = NULL;
+	}
+	return (error);
+}
+
+/* Mount entry point */
+static int
+p9fs_mount(struct mount *mp)
+{
+	int error;
+
+	/*
+	 * Minimal support for MNT_UPDATE - allow changing from
+	 * readonly.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
+			mp->mnt_flag &= ~MNT_RDONLY;
+		}
+		return (0);
+	}
+
+	error = p9_mount(mp);
+	if (error != 0)
+		(void) p9fs_unmount(mp, MNT_FORCE);
+
+	return (error);
+}
+
+/*
+ * Retrieve the root vnode of this mount. After filesystem is mounted, the root
+ * vnode is created for the first time. Subsequent calls to p9fs root will
+ * return the same vnode created during mount.
+ */
+static int
+p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
+{
+	struct p9fs_mount *vmp;
+	struct p9fs_node *np;
+	struct p9_client *clnt;
+	struct p9_fid *vfid;
+	int error;
+
+	vmp = VFSTOP9(mp);
+	np = &vmp->p9fs_session.rnp;
+	clnt = vmp->p9fs_session.clnt;
+	error = 0;
+
+	P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
+
+	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
+
+	if (error != 0) {
+		/* for root use the nobody user's fid as vfid.
+		 * This is used while unmounting as root when non-root
+		 * user has mounted p9fs
+		 */
+		if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
+			vfid = vmp->p9fs_session.mnt_fid;
+		else {
+			*vpp = NULLVP;
+			return (error);
+		}
+	}
+
+	error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
+	if (error != 0) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	np->v_node = *vpp;
+	return (error);
+}
+
+/* Retrieve the file system statistics */
+static int
+p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
+{
+	struct p9fs_mount *vmp;
+	struct p9fs_node *np;
+	struct p9_client *clnt;
+	struct p9_fid *vfid;
+	struct p9_statfs statfs;
+	int res, error;
+
+	vmp = VFSTOP9(mp);
+	np = &vmp->p9fs_session.rnp;
+	clnt = vmp->p9fs_session.clnt;
+	error = 0;
+
+	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
+	if (error != 0) {
+		return (error);
+	}
+
+	res = p9_client_statfs(vfid, &statfs);
+
+	if (res == 0) {
+		buf->f_type = statfs.type;
+		/*
+		 * We have a limit of 4k irrespective of what the
+		 * Qemu server can do.
+		 */
+		if (statfs.bsize > PAGE_SIZE)
+			buf->f_bsize = PAGE_SIZE;
+		else
+			buf->f_bsize = statfs.bsize;
+
+		buf->f_iosize = buf->f_bsize;
+		buf->f_blocks = statfs.blocks;
+		buf->f_bfree = statfs.bfree;
+		buf->f_bavail = statfs.bavail;
+		buf->f_files = statfs.files;
+		buf->f_ffree = statfs.ffree;
+	}
+	else {
+		/* Atleast set these if stat fail */
+		buf->f_bsize = PAGE_SIZE;
+		buf->f_iosize = buf->f_bsize;   /* XXX */
+	}
+
+	return (0);
+}
+
+static int
+p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
+{
+
+	return (EINVAL);
+}
+
+struct vfsops p9fs_vfsops = {
+	.vfs_init  =	p9fs_init,
+	.vfs_uninit =	p9fs_uninit,
+	.vfs_mount =	p9fs_mount,
+	.vfs_unmount =	p9fs_unmount,
+	.vfs_root =	p9fs_root,
+	.vfs_statfs =	p9fs_statfs,
+	.vfs_fhtovp =	p9fs_fhtovp,
+};
+
+VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
+MODULE_VERSION(p9fs, 1);
diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c
new file mode 100644
index 000000000000..227e2b93883e
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_vnops.c
@@ -0,0 +1,2236 @@
+/*
+ * Copyright (c) 2017-2020 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+*	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* This file contains VFS file ops for the 9P protocol.
+ * This makes the upper layer of the p9fs driver. These functions interact
+ * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
+ * the user file operations are handled here.
+ */
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/rwlock.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vnode_pager.h>
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9fs.h>
+#include <fs/p9fs/p9fs_proto.h>
+
+/* File permissions. */
+#define IEXEC		0000100 /* Executable. */
+#define IWRITE		0000200 /* Writeable. */
+#define IREAD		0000400 /* Readable. */
+#define ISVTX		0001000 /* Sticky bit. */
+#define ISGID		0002000 /* Set-gid. */
+#define ISUID		0004000 /* Set-uid. */
+
+static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
+extern uma_zone_t p9fs_io_buffer_zone;
+extern uma_zone_t p9fs_getattr_zone;
+extern uma_zone_t p9fs_setattr_zone;
+extern uma_zone_t p9fs_pbuf_zone;
+/* For the root vnode's vnops. */
+struct vop_vector p9fs_vnops;
+
+static uint32_t p9fs_unix2p9_mode(uint32_t mode);
+
+static void
+p9fs_itimes(struct vnode *vp)
+{
+	struct p9fs_node *node;
+	struct timespec ts;
+	struct p9fs_inode *inode;
+
+	node = P9FS_VTON(vp);
+	inode = &node->inode;
+
+	vfs_timestamp(&ts);
+	inode->i_mtime = ts.tv_sec;
+}
+
+/*
+ * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
+ * The cleanup includes invalidating all cache entries for the vnode,
+ * destroying the vobject, removing vnode from hashlist, removing p9fs node
+ * from the list of session p9fs nodes, and disposing of the p9fs node.
+ * Basically it is doing a reverse of what a create/vget does.
+ */
+void
+p9fs_cleanup(struct p9fs_node *np)
+{
+	struct vnode *vp;
+	struct p9fs_session *vses;
+
+	if (np == NULL)
+		return;
+
+	vp = P9FS_NTOV(np);
+	vses = np->p9fs_ses;
+
+	/* Remove the vnode from hash list if vnode is not already deleted */
+	if ((np->flags & P9FS_NODE_DELETED) == 0)
+		vfs_hash_remove(vp);
+
+	P9FS_LOCK(vses);
+	if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
+		np->flags &= ~P9FS_NODE_IN_SESSION;
+		STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
+	} else {
+		P9FS_UNLOCK(vses);
+		return;
+	}
+	P9FS_UNLOCK(vses);
+
+	/* Invalidate all entries to a particular vnode. */
+	cache_purge(vp);
+
+	/* Destroy the vm object and flush associated pages. */
+	vnode_destroy_vobject(vp);
+
+	/* Remove all the FID */
+	p9fs_fid_remove_all(np, FALSE);
+
+	/* Dispose all node knowledge.*/
+	p9fs_destroy_node(&np);
+}
+
+/*
+ * Reclaim VOP is defined to be called for every vnode. This starts off
+ * the cleanup by clunking(remove the fid on the server) and calls
+ * p9fs_cleanup to free all the resources allocated for p9fs node.
+ */
+static int
+p9fs_reclaim(struct vop_reclaim_args *ap)
+{
+	struct vnode *vp;
+	struct p9fs_node *np;
+
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+
+	P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
+	p9fs_cleanup(np);
+
+	return (0);
+}
+
+/*
+ * recycle vnodes which are no longer referenced i.e, their usecount is zero
+ */
+static int
+p9fs_inactive(struct vop_inactive_args *ap)
+{
+	struct vnode *vp;
+	struct p9fs_node *np;
+
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+
+	P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
+	if (np->flags & P9FS_NODE_DELETED)
+		vrecycle(vp);
+
+	return (0);
+}
+
+struct p9fs_lookup_alloc_arg {
+	struct componentname *cnp;
+	struct p9fs_node *dnp;
+	struct p9_fid *newfid;
+};
+
+/* Callback for vn_get_ino */
+static int
+p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
+{
+	struct p9fs_lookup_alloc_arg *p9aa = arg;
+
+	return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
+		p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
+}
+
+/*
+ * p9fs_lookup is called for every component name that is being searched for.
+ *
+ * I. If component is found on the server, we look for the in-memory
+ *    repesentation(vnode) of this component in namecache.
+ *    A. If the node is found in the namecache, we check is the vnode is still
+ *	 valid.
+ *	 1. If it is still valid, return vnode.
+ *	 2. If it is not valid, we remove this vnode from the name cache and
+ *	    create a new vnode for the component and return that vnode.
+ *    B. If the vnode is not found in the namecache, we look for it in the
+ *       hash list.
+ *       1. If the vnode is in the hash list, we check if the vnode is still
+ *	    valid.
+ *	    a. If it is still valid, we add that vnode to the namecache for
+ *	       future lookups and return the vnode.
+ *	    b. If it is not valid, create a new vnode and p9fs node,
+ *	       initialize them and return the vnode.
+ *	 2. If the vnode is not found in the hash list, we create a new vnode
+ *	    and p9fs node, initialize them and return the vnode.
+ * II. If the component is not found on the server, an error code is returned.
+ *     A. For the creation case, we return EJUSTRETURN so VFS can handle it.
+ *     B. For all other cases, ENOENT is returned.
+ */
+static int
+p9fs_lookup(struct vop_lookup_args *ap)
+{
+	struct vnode *dvp;
+	struct vnode **vpp, *vp;
+	struct componentname *cnp;
+	struct p9fs_node *dnp; /*dir p9_node */
+	struct p9fs_node *np;
+	struct p9fs_session *vses;
+	struct mount *mp; /* Get the mount point */
+	struct p9_fid *dvfid, *newfid;
+	uint64_t flags;
+	int error;
+	struct vattr vattr;
+	char tmpchr;
+
+	dvp = ap->a_dvp;
+	vpp = ap->a_vpp;
+	cnp = ap->a_cnp;
+	dnp = P9FS_VTON(dvp);
+	error = 0;
+	flags = cnp->cn_flags;
+	*vpp = NULLVP;
+
+	if (dnp == NULL)
+		return (ENOENT);
+
+	if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) {
+		vref(dvp);
+		*vpp = dvp;
+		return (0);
+	}
+
+	vses = dnp->p9fs_ses;
+	mp = vses->p9fs_mount;
+
+	/* Do the cache part ourselves */
+	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+		return (EROFS);
+
+	if (dvp->v_type != VDIR)
+		return (ENOTDIR);
+
+	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
+	if (error)
+		return (error);
+
+	/* Do the directory walk on host to check if file exist */
+	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+	if (error)
+		return (error);
+
+	/*
+	 * Save the character present at namelen in nameptr string and
+	 * null terminate the character to get the search name for p9_dir_walk
+	 * This is done to handle when lookup is for "a" and component
+	 * name contains a/b/c
+	 */
+	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+	/*
+	 * If the client_walk fails, it means the file looking for doesnt exist.
+	 * Create the file is the flags are set or just return the error
+	 */
+	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
+
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+
+	if (error != 0 || newfid == NULL) {
+		/* Clunk the newfid if it is not NULL */
+		if (newfid != NULL)
+			p9_client_clunk(newfid);
+
+		if (error != ENOENT)
+			return (error);
+
+		/* The requested file was not found. */
+		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+		    (flags & ISLASTCN)) {
+
+			if (mp->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+
+			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
+			    curthread);
+			if (!error) {
+				return (EJUSTRETURN);
+			}
+		}
+		return (error);
+	}
+
+	/* Look for the entry in the component cache*/
+	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
+	if (error > 0 && error != ENOENT) {
+		P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
+		goto out;
+	}
+
+	if (error == -1) {
+		vp = *vpp;
+		/* Check if the entry in cache is stale or not */
+		if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
+		    ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
+			goto out;
+		}
+		/*
+		 * This case, we have an error coming from getattr,
+		 * act accordingly.
+		 */
+		cache_purge(vp);
+		if (dvp != vp)
+			vput(vp);
+		else
+			vrele(vp);
+
+		*vpp = NULLVP;
+	} else if (error == ENOENT) {
+		if (VN_IS_DOOMED(dvp))
+			goto out;
+		if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
+			error = ENOENT;
+			goto out;
+		}
+		cache_purge_negative(dvp);
+	}
+	/* Reset values */
+	error = 0;
+	vp = NULLVP;
+
+	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+	/*
+	 * Looks like we have found an entry. Now take care of all other cases.
+	 */
+	if (flags & ISDOTDOT) {
+		struct p9fs_lookup_alloc_arg p9aa;
+		p9aa.cnp = cnp;
+		p9aa.dnp = dnp;
+		p9aa.newfid = newfid;
+		error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
+		if (error)
+			goto out;
+		*vpp = vp;
+	} else {
+		/*
+		 * client_walk is equivalent to searching a component name in a
+		 * directory(fid) here. If new fid is returned, we have found an
+		 * entry for this component name so, go and create the rest of
+		 * the vnode infra(vget_common) for the returned newfid.
+		 */
+		if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
+		    && (flags & ISLASTCN)) {
+			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
+			    curthread);
+			if (error)
+				goto out;
+
+			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+			    dnp, newfid, &vp, cnp->cn_nameptr);
+			if (error)
+				goto out;
+
+			*vpp = vp;
+			np = P9FS_VTON(vp);
+			if ((dnp->inode.i_mode & ISVTX) &&
+			    cnp->cn_cred->cr_uid != 0 &&
+			    cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
+			    cnp->cn_cred->cr_uid != np->inode.n_uid) {
+				vput(*vpp);
+				*vpp = NULL;
+				cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+				return (EPERM);
+			}
+		} else {
+			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+			    dnp, newfid, &vp, cnp->cn_nameptr);
+			if (error)
+				goto out;
+			*vpp = vp;
+		}
+	}
+
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+
+	/* Store the result the cache if MAKEENTRY is specified in flags */
+	if ((cnp->cn_flags & MAKEENTRY) != 0)
+		cache_enter(dvp, *vpp, cnp);
+	return (error);
+out:
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+	p9_client_clunk(newfid);
+	return (error);
+}
+
+/*
+ * Common creation function for file/directory with respective flags. We first
+ * open the parent directory in order to create the file under it. For this,
+ * as 9P protocol suggests, we need to call client_walk to create the open fid.
+ * Once we have the open fid, the file_create function creates the direntry with
+ * the name and perm specified under the parent dir. If this succeeds (an entry
+ * is created for the new file on the server), we create our metadata for this
+ * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
+ * fid of the parent directory.
+ */
+static int
+create_common(struct p9fs_node *dnp, struct componentname *cnp,
+    char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
+{
+	char tmpchr;
+	struct p9_fid *dvfid, *ofid, *newfid;
+	struct p9fs_session *vses;
+	struct mount *mp;
+	int error;
+
+	P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);
+
+	vses = dnp->p9fs_ses;
+	mp = vses->p9fs_mount;
+	newfid = NULL;
+	error = 0;
+
+	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+	if (error != 0)
+		return (error);
+
+	/* Clone the directory fid to create the new file */
+	ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Save the character present at namelen in nameptr string and
+	 * null terminate the character to get the search name for p9_dir_walk
+	 */
+	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+	error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
+		    extension);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
+		goto out;
+	}
+
+	/* If its not hardlink only then do the walk, else we are done. */
+	if (!(perm & P9PROTO_DMLINK)) {
+		/*
+		 * Do the lookup part and add the vnode, p9fs node. Note that vpp
+		 * is filled in here.
+		 */
+		newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
+		if (newfid != NULL) {
+			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+			    dnp, newfid, vpp, cnp->cn_nameptr);
+			if (error != 0)
+				goto out;
+		} else {
+			/* Not found return NOENTRY.*/
+			goto out;
+		}
+
+		if ((cnp->cn_flags & MAKEENTRY) != 0)
+			cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
+	}
+	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
+	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
+	/* Clunk the open ofid. */
+	if (ofid != NULL)
+		(void)p9_client_clunk(ofid);
+
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+	return (0);
+out:
+	if (ofid != NULL)
+		(void)p9_client_clunk(ofid);
+
+	if (newfid != NULL)
+		(void)p9_client_clunk(newfid);
+
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+	return (error);
+}
+
+/*
+ * This is the main file creation VOP. Make the permissions of the new
+ * file and call the create_common common code to complete the create.
+ */
+static int
+p9fs_create(struct vop_create_args *ap)
+{
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+	uint32_t mode;
+	struct p9fs_node *dnp;
+	struct p9fs_inode *dinode;
+	uint32_t perm;
+	int ret;
+
+	dvp = ap->a_dvp;
+	vpp = ap->a_vpp;
+	cnp = ap->a_cnp;
+	dnp = P9FS_VTON(dvp);
+	dinode = &dnp->inode;
+	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+	perm = p9fs_unix2p9_mode(mode);
+
+	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
+	if (ret == 0) {
+		P9FS_INCR_LINKS(dinode);
+	}
+
+	return (ret);
+}
+
+/*
+ * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
+ * and call the create_common common code to complete the create.
+ */
+static int
+p9fs_mkdir(struct vop_mkdir_args *ap)
+{
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+	uint32_t mode;
+	struct p9fs_node *dnp;
+	struct p9fs_inode *dinode;
+	uint32_t perm;
+	int ret;
+
+	dvp = ap->a_dvp;
+	vpp = ap->a_vpp;
+	cnp = ap->a_cnp;
+	dnp = P9FS_VTON(dvp);
+	dinode = &dnp->inode;
+	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+	perm = p9fs_unix2p9_mode(mode | S_IFDIR);
+
+	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
+	if (ret == 0)
+		P9FS_INCR_LINKS(dinode);
+
+	return (ret);
+}
+
+/*
+ * p9fs_mknod is the main node creation vop. Make the permissions of the new node
+ * and call the create_common common code to complete the create.
+ */
+static int
+p9fs_mknod(struct vop_mknod_args *ap)
+{
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+	uint32_t mode;
+	struct p9fs_node *dnp;
+	struct p9fs_inode *dinode;
+	uint32_t perm;
+	int ret;
+
+	dvp = ap->a_dvp;
+	vpp = ap->a_vpp;
+	cnp = ap->a_cnp;
+	dnp = P9FS_VTON(dvp);
+	dinode = &dnp->inode;
+	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+	perm = p9fs_unix2p9_mode(mode);
+
+	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
+	if (ret == 0) {
+		P9FS_INCR_LINKS(dinode);
+	}
+
+	return (ret);
+}
+
+/* Convert open mode permissions to P9 */
+static int
+p9fs_uflags_mode(int uflags, int extended)
+{
+	uint32_t ret;
+
+	/* Convert first to O flags.*/
+	uflags = OFLAGS(uflags);
+
+	switch (uflags & 3) {
+
+	case O_RDONLY:
+	    ret = P9PROTO_OREAD;
+	    break;
+
+	case O_WRONLY:
+	    ret = P9PROTO_OWRITE;
+	    break;
+
+	case O_RDWR:
+	    ret = P9PROTO_ORDWR;
+	    break;
+	}
+
+	if (extended) {
+		if (uflags & O_EXCL)
+			ret |= P9PROTO_OEXCL;
+
+		if (uflags & O_APPEND)
+			ret |= P9PROTO_OAPPEND;
+	}
+
+	return (ret);
+}
+
+/*
+ * This is the main open VOP for every file open. If the file is already
+ * open, then increment and return. If there is no open fid for this file,
+ * there needs to be a client_walk which creates a new open fid for this file.
+ * Once we have a open fid, call the open on this file with the mode creating
+ * the vobject.
+ */
+static int
+p9fs_open(struct vop_open_args *ap)
+{
+	int error;
+	struct vnode *vp;
+	struct p9fs_node *np;
+	struct p9fs_session *vses;
+	struct p9_fid *vofid, *vfid;
+	size_t filesize;
+	uint32_t mode;
+
+	error = 0;
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+	vses = np->p9fs_ses;
+
+	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
+
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+		return (EOPNOTSUPP);
+
+	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
+	if (error != 0)
+		return (error);
+
+	ASSERT_VOP_LOCKED(vp, __func__);
+	/*
+	 * Invalidate the pages of the vm_object cache if the file is modified
+	 * based on the flag set in reload stats
+	 */
+	if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
+		error = vinvalbuf(vp, 0, 0, 0);
+		if (error != 0)
+			return (error);
+		np->flags &= ~P9FS_NODE_MODIFIED;
+	}
+
+	vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Translate kernel fflags to 9p mode
+	 */
+	mode = p9fs_uflags_mode(ap->a_mode, 1);
+
+	/*
+	 * Search the fid in vofid_list for current user. If found increase the open
+	 * count and return. If not found clone a new fid and open the file using
+	 * that cloned fid.
+	 */
+	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
+	if (vofid != NULL) {
+		vofid->v_opens++;
+		return (0);
+	} else {
+		/*vofid is the open fid for this file.*/
+		vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
+		if (error != 0)
+			return (error);
+	}
+
+	error = p9_client_open(vofid, mode);
+	if (error != 0)
+		p9_client_clunk(vofid);
+	else {
+		vofid->v_opens = 1;
+		filesize = np->inode.i_size;
+		vnode_create_vobject(vp, filesize, ap->a_td);
+		p9fs_fid_add(np, vofid, VOFID);
+	}
+
+	return (error);
+}
+
+/*
+ * Close the open references. Just reduce the open count on vofid and return.
+ * Let clunking of VOFID happen in p9fs_reclaim.
+ */
+static int
+p9fs_close(struct vop_close_args *ap)
+{
+	struct vnode *vp;
+	struct p9fs_node *np;
+	struct p9fs_session *vses;
+	struct p9_fid *vofid;
+	int error;
+
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+
+	if (np == NULL)
+		return (0);
+
+	vses = np->p9fs_ses;
+	error = 0;
+
+	P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);
+
+	/*
+	 * Translate kernel fflags to 9p mode
+	 */
+	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
+	    p9fs_uflags_mode(ap->a_fflag, 1), &error);
+	if (vofid == NULL)
+		return (0);
+
+	vofid->v_opens--;
+
+	return (0);
+}
+
+/* Helper routine for checking if fileops are possible on this file */
+static int
+p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
+{
+
+	/* Check if we are allowed to write */
+	switch (vap->va_type) {
+	case VDIR:
+	case VLNK:
+	case VREG:
+		/*
+		 * Normal nodes: check if we're on a read-only mounted
+		 * file system and bail out if we're trying to write.
+		 */
+		if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
+			return (EROFS);
+		break;
+	case VBLK:
+	case VCHR:
+	case VSOCK:
+	case VFIFO:
+		/*
+		 * Special nodes: even on read-only mounted file systems
+		 * these are allowed to be written to if permissions allow.
+		 */
+		break;
+	default:
+		/* No idea what this is */
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+/* Check the access permissions of the file. */
+static int
+p9fs_access(struct vop_access_args *ap)
+{
+	struct vnode *vp;
+	accmode_t accmode;
+	struct ucred *cred;
+	struct vattr vap;
+	int error;
+
+	vp = ap->a_vp;
+	accmode = ap->a_accmode;
+	cred = ap->a_cred;
+
+	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
+
+	/* make sure getattr is working correctly and is defined.*/
+	error = VOP_GETATTR(vp, &vap, cred);
+	if (error != 0)
+		return (error);
+
+	error = p9fs_check_possible(vp, &vap, accmode);
+	if (error != 0)
+		return (error);
+
+	/* Call the Generic Access check in VOPS*/
+	error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
+	    cred);
+
+
+	return (error);
+}
+
+/*
+ * Reload the file stats from the server and update the inode structure present
+ * in p9fs node.
+ */
+int
+p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
+{
+	struct p9_stat_dotl *stat;
+	int error;
+	struct p9fs_node *node;
+	struct p9fs_session *vses;
+	struct p9_fid *vfid;
+
+	error = 0;
+	node = P9FS_VTON(vp);
+	vses = node->p9fs_ses;
+
+	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
+	if (vfid == NULL) {
+		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
+		if (error)
+			return (error);
+	}
+
+	stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);
+
+	error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
+		goto out;
+	}
+
+	/* Init the vnode with the disk info */
+	p9fs_stat_vnode_dotl(stat, vp);
+out:
+	if (stat != NULL) {
+		uma_zfree(p9fs_getattr_zone, stat);
+	}
+
+	return (error);
+}
+
+/*
+ * Read the current inode values into the vap attr. We reload the stats from
+ * the server.
+ */
+static int
+p9fs_getattr_dotl(struct vop_getattr_args *ap)
+{
+	struct vnode *vp;
+	struct vattr *vap;
+	struct p9fs_node *node;
+	struct p9fs_inode *inode;
+	int error;
+
+	vp = ap->a_vp;
+	vap = ap->a_vap;
+	node = P9FS_VTON(vp);
+
+	if (node == NULL)
+		return (ENOENT);
+
+	inode = &node->inode;
+
+	P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));
+
+	/* Reload our stats once to get the right values.*/
+	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
+		return (error);
+	}
+
+	/* Basic info */
+	VATTR_NULL(vap);
+
+	vap->va_atime.tv_sec = inode->i_atime;
+	vap->va_mtime.tv_sec = inode->i_mtime;
+	vap->va_ctime.tv_sec = inode->i_ctime;
+	vap->va_atime.tv_nsec = inode->i_atime_nsec;
+	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
+	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
+	vap->va_type = IFTOVT(inode->i_mode);
+	vap->va_mode = inode->i_mode;
+	vap->va_uid = inode->n_uid;
+	vap->va_gid = inode->n_gid;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	vap->va_size = inode->i_size;
+	vap->va_nlink = inode->i_links_count;
+	vap->va_blocksize = inode->blksize;
+	vap->va_fileid = inode->i_qid_path;
+	vap->va_flags = inode->i_flags;
+	vap->va_gen = inode->gen;
+	vap->va_filerev = inode->data_version;
+	vap->va_vaflags = 0;
+	vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;
+
+	return (0);
+}
+
+/* Convert a standard FreeBSD permission to P9. */
+static uint32_t
+p9fs_unix2p9_mode(uint32_t mode)
+{
+	uint32_t res;
+
+	res = mode & 0777;
+	if (S_ISDIR(mode))
+		res |= P9PROTO_DMDIR;
+	if (S_ISSOCK(mode))
+		res |= P9PROTO_DMSOCKET;
+	if (S_ISLNK(mode))
+		res |= P9PROTO_DMSYMLINK;
+	if (S_ISFIFO(mode))
+		res |= P9PROTO_DMNAMEDPIPE;
+	if ((mode & S_ISUID) == S_ISUID)
+		res |= P9PROTO_DMSETUID;
+	if ((mode & S_ISGID) == S_ISGID)
+		res |= P9PROTO_DMSETGID;
+	if ((mode & S_ISVTX) == S_ISVTX)
+		res |= P9PROTO_DMSETVTX;
+
+	return (res);
+}
+
+/* Update inode with the stats read from server.(9P2000.L version) */
+int
+p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
+{
+	struct p9fs_node *np;
+	struct p9fs_inode *inode;
+
+	np = P9FS_VTON(vp);
+	inode = &np->inode;
+
+	ASSERT_VOP_LOCKED(vp, __func__);
+	/* Update the pager size if file size changes on host */
+	if (inode->i_size != stat->st_size) {
+		inode->i_size = stat->st_size;
+		if (vp->v_type == VREG)
+			vnode_pager_setsize(vp, inode->i_size);
+	}
+
+	inode->i_mtime = stat->st_mtime_sec;
+	inode->i_atime = stat->st_atime_sec;
+	inode->i_ctime = stat->st_ctime_sec;
+	inode->i_mtime_nsec = stat->st_mtime_nsec;
+	inode->i_atime_nsec = stat->st_atime_nsec;
+	inode->i_ctime_nsec = stat->st_ctime_nsec;
+	inode->n_uid = stat->st_uid;
+	inode->n_gid = stat->st_gid;
+	inode->i_mode = stat->st_mode;
+	vp->v_type = IFTOVT(inode->i_mode);
+	inode->i_links_count = stat->st_nlink;
+	inode->blksize = stat->st_blksize;
+	inode->blocks = stat->st_blocks;
+	inode->gen = stat->st_gen;
+	inode->data_version = stat->st_data_version;
+
+	ASSERT_VOP_LOCKED(vp, __func__);
+	/* Setting a flag if file changes based on qid version */
+	if (np->vqid.qid_version != stat->qid.version)
+		np->flags |= P9FS_NODE_MODIFIED;
+	memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));
+
+	return (0);
+}
+
+/*
+ * Write the current in memory inode stats into persistent stats structure
+ * to write to the server(for linux version).
+ */
+static int
+p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
+{
+	p9attr->size = inode->i_size;
+	p9attr->mode = inode->i_mode;
+	p9attr->uid = inode->n_uid;
+	p9attr->gid = inode->n_gid;
+	p9attr->atime_sec = inode->i_atime;
+	p9attr->atime_nsec = inode->i_atime_nsec;
+	p9attr->mtime_sec = inode->i_mtime;
+	p9attr->mtime_nsec = inode->i_mtime_nsec;
+
+	return (0);
+}
+
+/*
+ * Modify the ownership of a file whenever the chown is called on the
+ * file.
+ */
+static int
+p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
+    struct thread *td)
+{
+	struct p9fs_node *np;
+	struct p9fs_inode *inode;
+	uid_t ouid;
+	gid_t ogid;
+	int error;
+
+	np = P9FS_VTON(vp);
+	inode = &np->inode;
+
+	if (uid == (uid_t)VNOVAL)
+		uid = inode->n_uid;
+	if (gid == (gid_t)VNOVAL)
+		gid = inode->n_gid;
+	/*
+	 * To modify the ownership of a file, must possess VADMIN for that
+	 * file.
+	 */
+	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
+		return (error);
+	/*
+	 * To change the owner of a file, or change the group of a file to a
+	 * group of which we are not a member, the caller must have
+	 * privilege.
+	 */
+	if (((uid != inode->n_uid && uid != cred->cr_uid) ||
+	    (gid != inode->n_gid && !groupmember(gid, cred))) &&
+	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
+		return (error);
+
+	ogid = inode->n_gid;
+	ouid = inode->n_uid;
+
+	inode->n_gid = gid;
+	inode->n_uid = uid;
+
+	if ((inode->i_mode & (ISUID | ISGID)) &&
+	    (ouid != uid || ogid != gid)) {
+
+		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
+			inode->i_mode &= ~(ISUID | ISGID);
+	}
+	P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);
+
+	return (0);
+}
+
+/*
+ * Update the in memory inode with all chmod new permissions/mode. Typically a
+ * setattr is called to update it to server.
+ */
+static int
+p9fs_chmod(struct vnode *vp, uint32_t  mode, struct ucred *cred, struct thread *td)
+{
+	struct p9fs_node *np;
+	struct p9fs_inode *inode;
+	uint32_t nmode;
+	int error;
+
+	np = P9FS_VTON(vp);
+	inode = &np->inode;
+
+	P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n",  __func__, vp, mode, cred, td);
+	/*
+	 * To modify the permissions on a file, must possess VADMIN
+	 * for that file.
+	 */
+	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+		return (error);
+
+	/*
+	 * Privileged processes may set the sticky bit on non-directories,
+	 * as well as set the setgid bit on a file with a group that the
+	 * process is not a member of. Both of these are allowed in
+	 * jail(8).
+	 */
+	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
+		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
+			return (EFTYPE);
+	}
+	if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
+		error = priv_check_cred(cred, PRIV_VFS_SETGID);
+		if (error != 0)
+			return (error);
+	}
+
+	/*
+	 * Deny setting setuid if we are not the file owner.
+	 */
+	if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
+		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
+		if (error != 0)
+			return (error);
+	}
+	nmode = inode->i_mode;
+	nmode &= ~ALLPERMS;
+	nmode |= (mode & ALLPERMS);
+	inode->i_mode = nmode;
+
+	P9_DEBUG(VOPS, "%s: to mode %x  %d \n ", __func__, nmode, error);
+
+	return (error);
+}
+
+/*
+ * Set the attributes of a file referenced by fid. A valid bitmask is sent
+ * in request selecting which fields to set
+ */
+static int
+p9fs_setattr_dotl(struct vop_setattr_args *ap)
+{
+	struct vnode *vp;
+	struct vattr *vap;
+	struct p9fs_node *node;
+	struct p9fs_inode *inode;
+	struct ucred *cred;
+	struct thread *td;
+	struct p9_iattr_dotl *p9attr;
+	struct p9fs_session *vses;
+	struct p9_fid *vfid;
+	uint64_t oldfilesize;
+	int error;
+
+	vp = ap->a_vp;
+	vap = ap->a_vap;
+	node = P9FS_VTON(vp);
+	inode = &node->inode;
+	cred = ap->a_cred;
+	td = curthread;
+	vses = node->p9fs_ses;
+	error = 0;
+
+	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+		P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
+		return (EINVAL);
+	}
+	/* Disallow write attempts on read only filesystem */
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	/* Setting of flags is not supported */
+	if (vap->va_flags != VNOVAL)
+		return (EOPNOTSUPP);
+
+	/* Allocate p9attr struct */
+	p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
+	if (p9attr == NULL)
+		return (ENOMEM);
+
+	/* Check if we need to change the ownership of the file*/
+	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+		P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
+		    vp, td, vap->va_uid, vap->va_gid);
+
+		error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
+		p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
+			P9PROTO_SETATTR_MODE;
+		if (error)
+			goto out;
+	}
+
+	/* Check for mode changes */
+	if (vap->va_mode != (mode_t)VNOVAL) {
+		P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
+		    vap->va_mode);
+
+		error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
+		p9attr->valid |= P9PROTO_SETATTR_MODE;
+		if (error)
+			goto out;
+	}
+
+	/* Update the size of the file and update mtime */
+	if (vap->va_size != (uint64_t)VNOVAL) {
+		P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
+		    vp, td, (uintmax_t)vap->va_size);
+		switch (vp->v_type) {
+			case VDIR:
+				error = EISDIR;
+				goto out;
+			case VLNK:
+			case VREG:
+				/* Invalidate cached pages of vp */
+				error = vinvalbuf(vp, 0, 0, 0);
+				if (error)
+					goto out;
+				oldfilesize = inode->i_size;
+				inode->i_size = vap->va_size;
+				/* Update the p9fs_inode time */
+				p9fs_itimes(vp);
+				p9attr->valid |= P9PROTO_SETATTR_SIZE |
+				    P9PROTO_SETATTR_ATIME |
+				    P9PROTO_SETATTR_MTIME |
+				    P9PROTO_SETATTR_ATIME_SET |
+				    P9PROTO_SETATTR_MTIME_SET ;
+				break;
+			default:
+				goto out;
+		}
+	} else if (vap->va_atime.tv_sec != VNOVAL ||
+		    vap->va_mtime.tv_sec != VNOVAL) {
+		P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
+		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
+		    (uintmax_t)vap->va_mtime.tv_sec);
+		/* Update the p9fs_inode times */
+		p9fs_itimes(vp);
+		p9attr->valid |= P9PROTO_SETATTR_ATIME |
+			P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
+			P9PROTO_SETATTR_MTIME_SET;
+	}
+
+	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
+	if (vfid == NULL) {
+		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
+		if (error)
+			goto out;
+	}
+
+	/* Write the inode structure values into p9attr */
+	p9fs_inode_to_iattr(inode, p9attr);
+	error = p9_client_setattr(vfid, p9attr);
+	if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
+		if (error)
+			inode->i_size = oldfilesize;
+		else
+			vnode_pager_setsize(vp, inode->i_size);
+	}
+out:
+	if (p9attr) {
+		uma_zfree(p9fs_setattr_zone, p9attr);
+	}
+	P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
+	return (error);
+}
+
+struct open_fid_state {
+	struct p9_fid *vofid;
+	int fflags;
+	int opened;
+};
+
+/*
+ * TODO: change this to take P9PROTO_* mode and avoid routing through
+ * VOP_OPEN, factoring out implementation of p9fs_open.
+ */
+static int
+p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
+{
+	struct p9fs_node *np;
+	struct p9fs_session *vses;
+	struct p9_fid *vofid;
+	int mode = p9fs_uflags_mode(fflags, TRUE);
+	int error = 0;
+
+	statep->opened = FALSE;
+
+	np = P9FS_VTON(vp);
+	vses = np->p9fs_ses;
+	vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
+	if (vofid == NULL) {
+		error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
+		if (error) {
+			return (error);
+		}
+		vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
+		if (vofid == NULL) {
+			return (EBADF);
+		}
+		statep->fflags = fflags;
+		statep->opened = TRUE;
+	}
+	statep->vofid = vofid;
+	return (0);
+}
+
+static void
+p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
+{
+	if (statep->opened) {
+		(void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
+	}
+}
+
+/*
+ * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
+ * need to copy data into. As long as resid is greater than zero, we call
+ * client_read to read data from offset(offset into the file) in the open fid
+ * for the file into the I/O buffer. The data is read into the user data buffer.
+ */
+static int
+p9fs_read(struct vop_read_args *ap)
+{
+	struct vnode *vp;
+	struct uio *uio;
+	struct p9fs_node *np;
+	uint64_t offset;
+	int64_t ret;
+	uint64_t resid;
+	uint32_t count;
+	int error;
+	char *io_buffer = NULL;
+	uint64_t filesize;
+	struct open_fid_state ostate;
+
+	vp = ap->a_vp;
+	uio = ap->a_uio;
+	np = P9FS_VTON(vp);
+	error = 0;
+
+	if (vp->v_type == VCHR || vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+	if (vp->v_type != VREG)
+		return (EISDIR);
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+
+	error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
+	if (error)
+		return (error);
+
+	/* where in the file are we to start reading */
+	offset = uio->uio_offset;
+	filesize = np->inode.i_size;
+	if (uio->uio_offset >= filesize)
+		goto out;
+
+	P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
+	    __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);
+
+	/* Work with a local buffer from the pool for this vop */
+
+	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
+	while ((resid = uio->uio_resid) > 0) {
+		if (offset >= filesize)
+			break;
+		count = MIN(filesize - uio->uio_offset , resid);
+		if (count == 0)
+			break;
+
+		/* Copy count bytes into the uio */
+		ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
+		/*
+		 * This is the only place in the entire p9fs where we check the
+		 * error for < 0 as p9_client_read/write return the number of
+		 * bytes instead of an error code. In this case if ret is < 0,
+		 * it means there is an IO error.
+		 */
+		if (ret < 0) {
+			error = -ret;
+			goto out;
+		}
+		error = uiomove(io_buffer, ret, uio);
+		if (error != 0)
+			goto out;
+
+		offset += ret;
+	}
+	uio->uio_offset = offset;
+out:
+	uma_zfree(p9fs_io_buffer_zone, io_buffer);
+	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
+
+	return (error);
+}
+
+/*
+ * The user buffer contains the data to be written. This data is copied first
+ * from uio into I/O buffer. This I/O  buffer is used to do the client_write to
+ * the fid of the file starting from the offset given upto count bytes. The
+ * number of bytes written is returned to the caller.
+ */
+static int
+p9fs_write(struct vop_write_args *ap)
+{
+	struct vnode *vp;
+	struct uio *uio;
+	struct p9fs_node *np;
+	uint64_t off, offset;
+	int64_t ret;
+	uint64_t resid, bytes_written;
+	uint32_t count;
+	int error, ioflag;
+	uint64_t file_size;
+	char *io_buffer = NULL;
+	struct open_fid_state ostate;
+
+	vp = ap->a_vp;
+	uio = ap->a_uio;
+	np = P9FS_VTON(vp);
+	error = 0;
+	ioflag = ap->a_ioflag;
+
+	error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
+	if (error)
+		return (error);
+
+	P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
+	    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
+
+	if (uio->uio_offset < 0) {
+		error = EINVAL;
+		goto out;
+	}
+	if (uio->uio_resid == 0)
+		goto out;
+
+	file_size = np->inode.i_size;
+
+	switch (vp->v_type) {
+	case VREG:
+		if (ioflag & IO_APPEND)
+			uio->uio_offset = file_size;
+		break;
+	case VDIR:
+		return (EISDIR);
+	case VLNK:
+		break;
+	default:
+		panic("%s: bad file type vp: %p", __func__, vp);
+	}
+
+	resid = uio->uio_resid;
+	offset = uio->uio_offset;
+	bytes_written = 0;
+	error = 0;
+
+	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
+	while ((resid = uio->uio_resid) > 0) {
+                off = 0;
+		count = MIN(resid, P9FS_IOUNIT);
+		error = uiomove(io_buffer, count, uio);
+
+		if (error != 0) {
+			P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
+			goto out;
+		}
+
+		/* While count still exists, keep writing.*/
+		while (count > 0) {
+			/* Copy count bytes from the uio */
+			ret = p9_client_write(ostate.vofid, offset, count,
+                                io_buffer + off);
+			if (ret < 0) {
+				if (bytes_written == 0) {
+					error = -ret;
+					goto out;
+				} else {
+					break;
+				}
+			}
+			P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
+			    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
+
+                        off += ret;
+			offset += ret;
+			bytes_written += ret;
+			count -= ret;
+		}
+	}
+	/* Update the fields in the node to reflect the change*/
+	if (file_size < uio->uio_offset + uio->uio_resid) {
+		np->inode.i_size = uio->uio_offset + uio->uio_resid;
+		vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
+	}
+out:
+	if (io_buffer)
+		uma_zfree(p9fs_io_buffer_zone, io_buffer);
+	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
+
+	return (error);
+}
+
+/*
+ * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
+ * client_remove op to send messages to remove the node's fid on the server.
+ * After that, does a node metadata cleanup on client side.
+ */
+static int
+remove_common(struct p9fs_node *dnp, struct p9fs_node *np, const char *name,
+    struct ucred *cred)
+{
+	int error;
+	struct p9fs_session *vses;
+	struct vnode *vp;
+	struct p9_fid *vfid;
+
+	error = 0;
+	vses = np->p9fs_ses;
+	vp = P9FS_NTOV(np);
+
+	vfid = p9fs_get_fid(vses->clnt, dnp, cred, VFID, -1, &error);
+	if (error != 0)
+		return (error);
+
+	error = p9_client_unlink(vfid, name,
+	    np->v_node->v_type == VDIR ? P9PROTO_UNLINKAT_REMOVEDIR : 0);
+	if (error != 0)
+		return (error);
+
+	/* Remove all non-open fids associated with the vp */
+	if (np->inode.i_links_count == 1)
+		p9fs_fid_remove_all(np, TRUE);
+
+	/* Invalidate all entries of vnode from name cache and hash list. */
+	cache_purge(vp);
+	vfs_hash_remove(vp);
+
+	np->flags |= P9FS_NODE_DELETED;
+
+	return (error);
+}
+
+/* Remove vop for all files. Call common code for remove and adjust links */
+static int
+p9fs_remove(struct vop_remove_args *ap)
+{
+	struct vnode *vp;
+	struct p9fs_node *np;
+	struct vnode *dvp;
+	struct p9fs_node *dnp;
+	struct p9fs_inode *dinode;
+	struct componentname *cnp;
+	int error;
+
+	cnp = ap->a_cnp;
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+	dvp = ap->a_dvp;
+	dnp = P9FS_VTON(dvp);
+	dinode = &dnp->inode;
+
+	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
+
+	if (vp->v_type == VDIR)
+		return (EISDIR);
+
+	error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
+	if (error == 0)
+		P9FS_DECR_LINKS(dinode);
+
+	return (error);
+}
+
+/* Remove vop for all directories. Call common code for remove and adjust links */
+static int
+p9fs_rmdir(struct vop_rmdir_args *ap)
+{
+	struct vnode *vp;
+	struct p9fs_node *np;
+	struct vnode *dvp;
+	struct p9fs_node *dnp;
+	struct p9fs_inode *dinode;
+	struct componentname *cnp;
+	int error;
+
+	cnp = ap->a_cnp;
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+	dvp = ap->a_dvp;
+	dnp = P9FS_VTON(dvp);
+	dinode = &dnp->inode;
+
+	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
+
+	error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
+	if (error == 0)
+		P9FS_DECR_LINKS(dinode);
+
+	return (error);
+}
+
+/*
+ * Create symlinks. Make the permissions and call create_common code
+ * for Soft links.
+ */
+static int
+p9fs_symlink(struct vop_symlink_args *ap)
+{
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct vattr *vap;
+	struct componentname *cnp;
+	char *symtgt;
+	struct p9fs_node *dnp;
+	struct p9fs_session *vses;
+	struct mount *mp;
+	struct p9_fid *dvfid, *newfid;
+	int error;
+	char tmpchr;
+	gid_t gid;
+
+	dvp = ap->a_dvp;
+	vpp = ap->a_vpp;
+	vap = ap->a_vap;
+	cnp = ap->a_cnp;
+	symtgt = (char*)(uintptr_t) ap->a_target;
+	dnp = P9FS_VTON(dvp);
+	vses = dnp->p9fs_ses;
+	mp = vses->p9fs_mount;
+	newfid = NULL;
+	error = 0;
+	gid = vap->va_gid;
+
+	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+	/*
+	 * Save the character present at namelen in nameptr string and
+	 * null terminate the character to get the search name for p9_dir_walk
+	 */
+	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+	if (error != 0)
+		goto out;
+
+	error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
+	if (error != 0)
+		goto out;
+
+	/*create vnode for symtgt */
+	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
+	if (newfid != NULL) {
+		error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+		    dnp, newfid, vpp, cnp->cn_nameptr);
+		if (error != 0)
+			goto out;
+	} else
+		goto out;
+
+	if ((cnp->cn_flags & MAKEENTRY) != 0) {
+		cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
+	}
+	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
+	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
+
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+	return (error);
+
+out:
+	if (newfid != NULL)
+		p9_client_clunk(newfid);
+	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+	return (error);
+}
+
+/* Create hard link */
+static int
+p9fs_link(struct vop_link_args *ap)
+{
+	struct vnode *vp;
+	struct vnode *tdvp;
+	struct componentname *cnp;
+	struct p9fs_node *dnp;
+	struct p9fs_node *np;
+	struct p9fs_inode *inode;
+	struct p9fs_session *vses;
+	struct p9_fid *dvfid, *oldvfid;
+	int error;
+
+	vp = ap->a_vp;
+	tdvp = ap->a_tdvp;
+	cnp = ap->a_cnp;
+	dnp = P9FS_VTON(tdvp);
+	np = P9FS_VTON(vp);
+	inode = &np->inode;
+	vses = np->p9fs_ses;
+	error = 0;
+
+	P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);
+
+	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+	if (error != 0)
+		return (error);
+	oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
+	if (error != 0)
+		return (error);
+
+	error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
+	if (error != 0)
+		return (error);
+	/* Increment ref count on the inode */
+	P9FS_INCR_LINKS(inode);
+
+	return (0);
+}
+
+/* Read contents of the symbolic link */
+static int
+p9fs_readlink(struct vop_readlink_args *ap)
+{
+	struct vnode *vp;
+	struct uio *uio;
+	struct p9fs_node *dnp;
+	struct p9fs_session *vses;
+	struct p9_fid *dvfid;
+	int error, len;
+	char *target;
+
+	vp = ap->a_vp;
+	uio = ap->a_uio;
+	dnp = P9FS_VTON(vp);
+	vses = dnp->p9fs_ses;
+	error = 0;
+
+	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
+
+	dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
+	if (error != 0)
+		return (error);
+
+	error = p9_readlink(dvfid, &target);
+	if (error != 0)
+		return (error);
+
+	len = strlen(target);
+	error = uiomove(target, len, uio);
+
+	return (0);
+}
+
+/*
+ * Iterate through a directory. An entire 8k data is read into the I/O buffer.
+ * This buffer is parsed to make dir entries and fed to the user buffer to
+ * complete it to the VFS.
+ */
+static int
+p9fs_readdir(struct vop_readdir_args *ap)
+{
+	struct uio *uio;
+	struct vnode *vp;
+	struct dirent cde;
+	int64_t offset;
+	uint64_t diroffset;
+	struct p9fs_node *np;
+	int error;
+	int32_t count;
+	struct p9_client *clnt;
+	struct p9_dirent dent;
+	char *io_buffer;
+	struct p9_fid *vofid;
+
+	uio = ap->a_uio;
+	vp = ap->a_vp;
+	np = P9FS_VTON(ap->a_vp);
+	offset = 0;
+	diroffset = 0;
+	error = 0;
+	count = 0;
+	clnt = np->p9fs_ses->clnt;
+
+	P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);
+
+	if (ap->a_uio->uio_iov->iov_len <= 0)
+		return (EINVAL);
+
+	if (vp->v_type != VDIR)
+		return (ENOTDIR);
+
+	vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
+	if (vofid == NULL) {
+		P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
+		return (EBADF);
+	}
+
+	if (ap->a_eofflag != NULL)
+		*ap->a_eofflag = 0;
+
+	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
+
+	/* We haven't reached the end yet. read more. */
+	diroffset = uio->uio_offset;
+	while (uio->uio_resid >= sizeof(struct dirent)) {
+		/*
+		 * We need to read more data as what is indicated by filesize because
+		 * filesize is based on data stored in struct dirent structure but
+		 * we read data in struct p9_dirent format which has different size.
+		 * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
+		 * it into struct dirent structure and send it back.
+		 */
+		count = P9FS_IOUNIT;
+		bzero(io_buffer, P9FS_MTU);
+		count = p9_client_readdir(vofid, (char *)io_buffer,
+		    diroffset, count);
+
+		if (count == 0) {
+			if (ap->a_eofflag != NULL)
+				*ap->a_eofflag = 1;
+			break;
+		}
+
+		if (count < 0) {
+			error = EIO;
+			goto out;
+		}
+
+		offset = 0;
+		while (offset + QEMU_DIRENTRY_SZ <= count) {
+
+			/*
+			 * Read and make sense out of the buffer in one dirent
+			 * This is part of 9p protocol read. This reads one p9_dirent,
+			 * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
+			 */
+			bzero(&dent, sizeof(dent));
+			offset = p9_dirent_read(clnt, io_buffer, offset, count,
+				&dent);
+			if (offset < 0 || offset > count) {
+				error = EIO;
+				goto out;
+			}
+
+			bzero(&cde, sizeof(cde));
+			strncpy(cde.d_name, dent.d_name, dent.len);
+			cde.d_fileno = dent.qid.path;
+			cde.d_type = dent.d_type;
+			cde.d_namlen = dent.len;
+			cde.d_reclen = GENERIC_DIRSIZ(&cde);
+
+                        /*
+                         * If there isn't enough space in the uio to return a
+                         * whole dirent, break off read
+                         */
+                        if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
+                                break;
+
+			/* Transfer */
+			error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
+			if (error != 0) {
+				error = EIO;
+				goto out;
+			}
+			diroffset = dent.d_off;
+		}
+	}
+	/* Pass on last transferred offset */
+	uio->uio_offset = diroffset;
+
+out:
+	uma_zfree(p9fs_io_buffer_zone, io_buffer);
+
+	return (error);
+}
+
+static void
+p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
+{
+	struct uio *uiov;
+	struct iovec io;
+	int error;
+	uint64_t off, offset;
+	uint64_t filesize;
+	uint64_t resid;
+	uint32_t count;
+	int64_t ret;
+	struct p9fs_node *np;
+	char *io_buffer;
+
+	error = 0;
+	np = P9FS_VTON(vp);
+
+	filesize = np->inode.i_size;
+	uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
+	uiov->uio_iov = &io;
+	uiov->uio_iovcnt = 1;
+	uiov->uio_segflg = UIO_SYSSPACE;
+	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
+
+	if (bp->b_iocmd == BIO_READ) {
+		io.iov_len = uiov->uio_resid = bp->b_bcount;
+		io.iov_base = bp->b_data;
+		uiov->uio_rw = UIO_READ;
+
+		switch (vp->v_type) {
+
+		case VREG:
+		{
+			uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
+
+			if (uiov->uio_resid) {
+				int left = uiov->uio_resid;
+				int nread = bp->b_bcount - left;
+
+				if (left > 0)
+					bzero((char *)bp->b_data + nread, left);
+			}
+			/* where in the file are we to start reading */
+			offset = uiov->uio_offset;
+			if (uiov->uio_offset >= filesize)
+				goto out;
+
+			while ((resid = uiov->uio_resid) > 0) {
+				if (offset >= filesize)
+					break;
+				count = min(filesize - uiov->uio_offset, resid);
+				if (count == 0)
+					break;
+
+				P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
+				    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
+
+				/* Copy count bytes into the uio */
+				ret = p9_client_read(vofid, offset, count, io_buffer);
+				error = uiomove(io_buffer, ret, uiov);
+
+				if (error != 0)
+					goto out;
+				offset += ret;
+			}
+			break;
+		}
+		default:
+			printf("vfs:  type %x unexpected\n", vp->v_type);
+			break;
+		}
+	} else {
+		if (bp->b_dirtyend > bp->b_dirtyoff) {
+			io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
+			uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
+			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+			uiov->uio_rw = UIO_WRITE;
+
+			if (uiov->uio_offset < 0) {
+				error = EINVAL;
+				goto out;
+			}
+
+			if (uiov->uio_resid == 0)
+				goto out;
+
+			resid = uiov->uio_resid;
+			offset = uiov->uio_offset;
+			error = 0;
+
+			while ((resid = uiov->uio_resid) > 0) {
+                                off = 0;
+				count = MIN(resid, P9FS_IOUNIT);
+				error = uiomove(io_buffer, count, uiov);
+				if (error != 0) {
+					goto out;
+				}
+
+				while (count > 0) {
+					/* Copy count bytes from the uio */
+					ret = p9_client_write(vofid, offset, count,
+                                                io_buffer + off);
+					if (ret < 0)
+						goto out;
+
+					P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
+					    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
+                                        off += ret;
+					offset += ret;
+					count -= ret;
+				}
+			}
+
+			/* Update the fields in the node to reflect the change */
+			if (filesize < uiov->uio_offset + uiov->uio_resid) {
+				np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
+				vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
+				/* update the modified timers. */
+				p9fs_itimes(vp);
+			}
+		} else {
+			 bp->b_resid = 0;
+			 goto out1;
+		}
+	}
+out:
+	/* Set the error */
+	if (error != 0) {
+		bp->b_error = error;
+		bp->b_ioflags |= BIO_ERROR;
+	}
+	bp->b_resid = uiov->uio_resid;
+out1:
+	bufdone(bp);
+	uma_zfree(p9fs_io_buffer_zone, io_buffer);
+	free(uiov, M_P9UIOV);
+}
+
+/*
+ * The I/O buffer is mapped to a uio and a client_write/client_read is performed
+ * the same way as p9fs_read and p9fs_write.
+ */
+static int
+p9fs_strategy(struct vop_strategy_args *ap)
+{
+	struct vnode *vp;
+	struct buf *bp;
+	struct ucred *cr;
+	int error;
+	struct open_fid_state ostate;
+
+	vp = ap->a_vp;
+	bp = ap->a_bp;
+	error = 0;
+
+	P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);
+
+	if (bp->b_iocmd == BIO_READ)
+		cr = bp->b_rcred;
+	else
+		cr = bp->b_wcred;
+
+	error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
+	if (error) {
+		P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
+		bp->b_error = error;
+		bp->b_ioflags |= BIO_ERROR;
+		bufdone(bp);
+		return (0);
+	}
+
+	p9fs_doio(vp, bp, ostate.vofid, cr);
+	p9fs_release_open_fid(vp, cr, &ostate);
+
+	return (0);
+}
+
+/* Rename a file */
+static int
+p9fs_rename(struct vop_rename_args *ap)
+{
+	struct vnode *tvp;
+	struct vnode *tdvp;
+	struct vnode *fvp;
+	struct vnode *fdvp;
+	struct componentname *tcnp;
+	struct componentname *fcnp;
+	struct p9fs_node *tdnode;
+	struct p9fs_node *fdnode;
+	struct p9fs_inode *fdinode;
+	struct p9fs_node *fnode;
+	struct p9fs_inode *finode;
+	struct p9fs_session *vses;
+	struct p9fs_node *tnode;
+	struct p9fs_inode *tinode;
+	struct p9_fid *olddirvfid, *newdirvfid ;
+	int error;
+
+	tvp = ap->a_tvp;
+	tdvp = ap->a_tdvp;
+	fvp = ap->a_fvp;
+	fdvp = ap->a_fdvp;
+	tcnp = ap->a_tcnp;
+	fcnp = ap->a_fcnp;
+	tdnode = P9FS_VTON(tdvp);
+	fdnode = P9FS_VTON(fdvp);
+	fdinode = &fdnode->inode;
+	fnode = P9FS_VTON(fvp);
+	finode = &fnode->inode;
+	vses = fnode->p9fs_ses;
+	error = 0;
+
+	P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);
+
+	/* Check for cross mount operation */
+	if (fvp->v_mount != tdvp->v_mount ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+		goto out;
+	}
+
+	/* warning  if you are renaming to the same name */
+	if (fvp == tvp)
+		error = 0;
+
+	olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
+	if (error != 0)
+		goto out;
+	newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
+	if (error != 0)
+		goto out;
+
+	error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
+	if (error != 0)
+		goto out;
+
+	/*
+	 * decrement the link count on the "from" file whose name is going
+	 * to be changed if its a directory
+	 */
+	if (fvp->v_type == VDIR) {
+		if (tvp && tvp->v_type == VDIR)
+			cache_purge(tdvp);
+		P9FS_DECR_LINKS(fdinode);
+		cache_purge(fdvp);
+	}
+
+	/* Taking exclusive lock on the from node before decrementing the link count */
+	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
+		goto out;
+	P9FS_DECR_LINKS(finode);
+	VOP_UNLOCK(fvp);
+
+	if (tvp) {
+		tnode = P9FS_VTON(tvp);
+		tinode = &tnode->inode;
+		P9FS_DECR_LINKS(tinode);
+	}
+
+out:
+	if (tdvp == tvp)
+		vrele(tdvp);
+	else
+		vput(tdvp);
+	if (tvp)
+		vput(tvp);
+	vrele(fdvp);
+	vrele(fvp);
+	return (error);
+}
+
+/*
+ * Put VM pages, synchronously.
+ * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
+ */
+static int
+p9fs_putpages(struct vop_putpages_args *ap)
+{
+	struct uio uio;
+	struct iovec iov;
+	int i, error, npages, count;
+	off_t offset;
+	int *rtvals;
+	struct vnode *vp;
+	struct thread *td;
+	struct ucred *cred;
+	struct p9fs_node *np;
+	vm_page_t *pages;
+	vm_offset_t kva;
+	struct buf *bp;
+
+	vp = ap->a_vp;
+	np = P9FS_VTON(vp);
+	td = curthread;
+	cred = curthread->td_ucred;
+	pages = ap->a_m;
+	count = ap->a_count;
+	rtvals = ap->a_rtvals;
+	npages = btoc(count);
+	offset = IDX_TO_OFF(pages[0]->pindex);
+
+	/*
+	 * When putting pages, do not extend file past EOF.
+	 */
+	if (offset + count > np->inode.i_size) {
+		count = np->inode.i_size - offset;
+		if (count < 0)
+			count = 0;
+	}
+
+	for (i = 0; i < npages; i++)
+		rtvals[i] = VM_PAGER_ERROR;
+
+	bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
+	kva = (vm_offset_t) bp->b_data;
+	pmap_qenter(kva, pages, npages);
+
+	VM_CNT_INC(v_vnodeout);
+	VM_CNT_ADD(v_vnodepgsout, count);
+
+	iov.iov_base = (caddr_t) kva;
+	iov.iov_len = count;
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = offset;
+	uio.uio_resid = count;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_rw = UIO_WRITE;
+	uio.uio_td = td;
+
+	P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);
+
+	error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
+	    cred);
+
+	pmap_qremove(kva, npages);
+	uma_zfree(p9fs_pbuf_zone, bp);
+
+	if (error == 0)
+		vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
+		    np->inode.i_size - offset, npages * PAGE_SIZE);
+
+	return (rtvals[0]);
+}
+
+struct vop_vector p9fs_vnops = {
+	.vop_default =		&default_vnodeops,
+	.vop_lookup =		p9fs_lookup,
+	.vop_open =		p9fs_open,
+	.vop_close =		p9fs_close,
+	.vop_access =		p9fs_access,
+	.vop_getattr =		p9fs_getattr_dotl,
+	.vop_setattr =		p9fs_setattr_dotl,
+	.vop_reclaim =		p9fs_reclaim,
+	.vop_inactive =		p9fs_inactive,
+	.vop_readdir =		p9fs_readdir,
+	.vop_create =		p9fs_create,
+	.vop_mknod =		p9fs_mknod,
+	.vop_read =		p9fs_read,
+	.vop_write =		p9fs_write,
+	.vop_remove =		p9fs_remove,
+	.vop_mkdir =		p9fs_mkdir,
+	.vop_rmdir =		p9fs_rmdir,
+	.vop_strategy =		p9fs_strategy,
+	.vop_symlink =		p9fs_symlink,
+	.vop_rename =           p9fs_rename,
+	.vop_link =		p9fs_link,
+	.vop_readlink =		p9fs_readlink,
+	.vop_putpages =		p9fs_putpages,
+};
+VFS_VOP_VECTOR_REGISTER(p9fs_vnops);
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
index 6ef725ee0ee7..0020b8f8a8d8 100644
--- a/sys/fs/procfs/procfs_mem.c
+++ b/sys/fs/procfs/procfs_mem.c
@@ -41,6 +41,7 @@
 #include <sys/ptrace.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
+#include <sys/priv.h>
 
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
diff --git a/sys/fs/procfs/procfs_osrel.c b/sys/fs/procfs/procfs_osrel.c
index fd6a4d7e0eea..0102090de4da 100644
--- a/sys/fs/procfs/procfs_osrel.c
+++ b/sys/fs/procfs/procfs_osrel.c
@@ -45,9 +45,11 @@ procfs_doosrel(PFS_FILL_ARGS)
 
 	if (uio == NULL)
 		return (EOPNOTSUPP);
-	if (uio->uio_rw == UIO_READ) {
+	switch (uio->uio_rw) {
+	case UIO_READ:
 		sbuf_printf(sb, "%d\n", p->p_osrel);
-	} else {
+		break;
+	case UIO_WRITE:
 		sbuf_trim(sb);
 		sbuf_finish(sb);
 		pp = sbuf_data(sb);
@@ -62,6 +64,7 @@ procfs_doosrel(PFS_FILL_ARGS)
 			osrel = ov;
 		}
 		p->p_osrel = osrel;
+		break;
 	}
 	return (0);
 }
diff --git a/sys/fs/procfs/procfs_rlimit.c b/sys/fs/procfs/procfs_rlimit.c
index 83e11f44b3f8..6be933ac6e44 100644
--- a/sys/fs/procfs/procfs_rlimit.c
+++ b/sys/fs/procfs/procfs_rlimit.c
@@ -57,6 +57,9 @@
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
 
+_Static_assert(nitems(rlimit_ident) == RLIM_NLIMITS,
+    "resource.h RLIMIT_IDENT needs update");
+
 int
 procfs_doprocrlimit(PFS_FILL_ARGS)
 {
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
index 9c2f42a45102..38070e0946bb 100644
--- a/sys/fs/procfs/procfs_status.c
+++ b/sys/fs/procfs/procfs_status.c
@@ -61,6 +61,7 @@
 int
 procfs_doprocstatus(PFS_FILL_ARGS)
 {
+	struct timeval start, ut, st;
 	struct session *sess;
 	struct thread *tdfirst;
 	struct tty *tp;
@@ -121,21 +122,16 @@ procfs_doprocstatus(PFS_FILL_ARGS)
 		wmesg = "nochan";
 	thread_unlock(tdfirst);
 
-	if (p->p_flag & P_INMEM) {
-		struct timeval start, ut, st;
-
-		PROC_STATLOCK(p);
-		calcru(p, &ut, &st);
-		PROC_STATUNLOCK(p);
-		start = p->p_stats->p_start;
-		getboottime(&boottime);
-		timevaladd(&start, &boottime);
-		sbuf_printf(sb, " %jd,%ld %jd,%ld %jd,%ld",
-		    (intmax_t)start.tv_sec, start.tv_usec,
-		    (intmax_t)ut.tv_sec, ut.tv_usec,
-		    (intmax_t)st.tv_sec, st.tv_usec);
-	} else
-		sbuf_printf(sb, " -1,-1 -1,-1 -1,-1");
+	PROC_STATLOCK(p);
+	calcru(p, &ut, &st);
+	PROC_STATUNLOCK(p);
+	start = p->p_stats->p_start;
+	getboottime(&boottime);
+	timevaladd(&start, &boottime);
+	sbuf_printf(sb, " %jd,%ld %jd,%ld %jd,%ld",
+	    (intmax_t)start.tv_sec, start.tv_usec,
+	    (intmax_t)ut.tv_sec, ut.tv_usec,
+	    (intmax_t)st.tv_sec, st.tv_usec);
 
 	sbuf_printf(sb, " %s", wmesg);
 
diff --git a/sys/fs/pseudofs/pseudofs.c b/sys/fs/pseudofs/pseudofs.c
index eb4ca8a82456..ef45f96a6192 100644
--- a/sys/fs/pseudofs/pseudofs.c
+++ b/sys/fs/pseudofs/pseudofs.c
@@ -98,12 +98,10 @@ pfs_alloc_node(struct pfs_info *pi, const char *name, pfs_type_t type)
 /*
  * Add a node to a directory
  */
-static void
+static int
 pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
 {
-#ifdef INVARIANTS
 	struct pfs_node *iter;
-#endif
 
 	KASSERT(parent != NULL,
 	    ("%s(): parent is NULL", __func__));
@@ -123,8 +121,6 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
 			KASSERT(iter->pn_type != pfstype_procdir,
 			    ("%s(): nested process directories", __func__));
 	for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
-		KASSERT(strcmp(pn->pn_name, iter->pn_name) != 0,
-		    ("%s(): homonymous siblings", __func__));
 		if (pn->pn_type == pfstype_procdir)
 			KASSERT(iter->pn_type != pfstype_procdir,
 			    ("%s(): sibling process directories", __func__));
@@ -133,8 +129,19 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
 
 	pn->pn_parent = parent;
 	pfs_fileno_alloc(pn);
-
 	pfs_lock(parent);
+	for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
+		if (strcmp(pn->pn_name, iter->pn_name) != 0)
+			continue;
+		printf("pfs_add_node: homonymous siblings: '%s/%s' type %d",
+		    parent->pn_name, pn->pn_name, pn->pn_type);
+		/* Do not detach, because we are not yet attached. */
+		pn->pn_parent = NULL;
+		pfs_unlock(parent);
+		return (EEXIST);
+	}
+
+
 	if ((parent->pn_flags & PFS_PROCDEP) != 0)
 		pn->pn_flags |= PFS_PROCDEP;
 	if (parent->pn_nodes == NULL) {
@@ -151,10 +158,11 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
 		parent->pn_last_node = pn;
 	}
 	pfs_unlock(parent);
+	return (0);
 }
 
 /*
- * Detach a node from its aprent
+ * Detach a node from its parent
  */
 static void
 pfs_detach_node(struct pfs_node *pn)
@@ -196,6 +204,7 @@ static int
 pfs_fixup_dir_flags(struct pfs_node *parent, int flags)
 {
 	struct pfs_node *dot, *dotdot;
+	int rc;
 
 	dot = pfs_alloc_node_flags(parent->pn_info, ".", pfstype_this, flags);
 	if (dot == NULL)
@@ -205,9 +214,14 @@ pfs_fixup_dir_flags(struct pfs_node *parent, int flags)
 		pfs_destroy(dot);
 		return (ENOMEM);
 	}
-	pfs_add_node(parent, dot);
-	pfs_add_node(parent, dotdot);
-	return (0);
+	rc = pfs_add_node(parent, dot);
+	if (rc == 0)
+		rc = pfs_add_node(parent, dotdot);
+	if (rc != 0) {
+		pfs_destroy(dot);
+		pfs_destroy(dotdot);
+	}
+	return (rc);
 }
 
 static void
@@ -236,11 +250,12 @@ pfs_create_dir(struct pfs_node *parent, const char *name,
 	pn->pn_vis = vis;
 	pn->pn_destroy = destroy;
 	pn->pn_flags = flags;
-	pfs_add_node(parent, pn);
-	rc = pfs_fixup_dir_flags(pn, flags);
-	if (rc) {
+	rc = pfs_add_node(parent, pn);
+	if (rc == 0)
+		rc = pfs_fixup_dir_flags(pn, flags);
+	if (rc != 0) {
 		pfs_destroy(pn);
-		return (NULL);
+		pn = NULL;
 	}
 	return (pn);
 }
@@ -263,8 +278,10 @@ pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill,
 	pn->pn_vis = vis;
 	pn->pn_destroy = destroy;
 	pn->pn_flags = flags;
-	pfs_add_node(parent, pn);
-
+	if (pfs_add_node(parent, pn) != 0) {
+		pfs_destroy(pn);
+		pn = NULL;
+	}
 	return (pn);
 }
 
@@ -286,7 +303,10 @@ pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill,
 	pn->pn_vis = vis;
 	pn->pn_destroy = destroy;
 	pn->pn_flags = flags;
-	pfs_add_node(parent, pn);
+	if (pfs_add_node(parent, pn) != 0) {
+		pfs_destroy(pn);
+		pn = NULL;
+	}
 
 	return (pn);
 }
diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c
index 324f38abd10e..35454998fc8e 100644
--- a/sys/fs/smbfs/smbfs_io.c
+++ b/sys/fs/smbfs/smbfs_io.c
@@ -629,7 +629,7 @@ smbfs_vinvalbuf(struct vnode *vp, struct thread *td)
 
 	while (np->n_flag & NFLUSHINPROG) {
 		np->n_flag |= NFLUSHWANT;
-		error = tsleep(&np->n_flag, PRIBIO + 2, "smfsvinv", 2 * hz);
+		error = tsleep(&np->n_flag, PRIBIO, "smfsvinv", 2 * hz);
 		error = smb_td_intr(td);
 		if (error == EINTR)
 			return EINTR;
diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c
index 1e7dcafb1121..5d412cabadb8 100644
--- a/sys/fs/smbfs/smbfs_vnops.c
+++ b/sys/fs/smbfs/smbfs_vnops.c
@@ -810,6 +810,9 @@ smbfs_pathconf(struct vop_pathconf_args *ap)
 	    case _PC_NO_TRUNC:
 		*retval = 1;
 		break;
+	    case _PC_HAS_HIDDENSYSTEM:
+		*retval = 1;
+		break;
 	    default:
 		error = vop_stdpathconf(ap);
 	}
@@ -1051,7 +1054,7 @@ smbfs_lookup(struct vop_lookup_args *ap)
 	struct smbfattr fattr, *fap;
 	struct smb_cred *scred;
 	char *name = cnp->cn_nameptr;
-	int flags = cnp->cn_flags;
+	uint64_t flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	int nmlen = cnp->cn_namelen;
 	int error, islastcn, isdot;
diff --git a/sys/fs/tarfs/tarfs.h b/sys/fs/tarfs/tarfs.h
index ff1985e488cd..46fa8b55b3ad 100644
--- a/sys/fs/tarfs/tarfs.h
+++ b/sys/fs/tarfs/tarfs.h
@@ -74,7 +74,7 @@ struct tarfs_node {
 	struct timespec		 mtime;
 	struct timespec		 ctime;
 	struct timespec		 birthtime;
-	unsigned long		 gen;
+	uint32_t		 gen;
 
 	/* Block map */
 	size_t			 nblk;
@@ -161,10 +161,9 @@ struct tarfs_zio {
 };
 
 struct tarfs_fid {
-	u_short			 len;	/* length of data in bytes */
-	u_short			 data0;	/* force alignment */
-	ino_t			 ino;
-	unsigned long		 gen;
+	u_short		 len;	/* length of data in bytes */
+	uint32_t	 gen;
+	ino_t		 ino;
 };
 
 #define	TARFS_NODE_LOCK(tnp) \
diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c
index 8c97fab185fc..afb8e05f5929 100644
--- a/sys/fs/tarfs/tarfs_vnops.c
+++ b/sys/fs/tarfs/tarfs_vnops.c
@@ -668,6 +668,8 @@ tarfs_vptofh(struct vop_vptofh_args *ap)
 {
 	struct tarfs_fid *tfp;
 	struct tarfs_node *tnp;
+	_Static_assert(sizeof(struct tarfs_fid) <= sizeof(struct fid),
+	    "struct tarfs_fid cannot be larger than struct fid");
 
 	tfp = (struct tarfs_fid *)ap->a_fhp;
 	tnp = VP_TO_TARFS_NODE(ap->a_vp);
diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index c28f3a02a7bf..52307cc7c7b2 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -292,6 +292,15 @@ struct tmpfs_node {
 			 */
 			off_t			tn_readdir_lastn;
 			struct tmpfs_dirent *	tn_readdir_lastp;
+
+			/*
+			 * Total size of whiteout directory entries.  This
+			 * must be a multiple of sizeof(struct tmpfs_dirent)
+			 * and is used to determine whether a directory is
+			 * empty (excluding whiteout entries) during rename/
+			 * rmdir operations.
+			 */
+			off_t			tn_wht_size;	/* (v) */
 		} tn_dir;
 
 		/* Valid when tn_type == VLNK. */
@@ -439,11 +448,10 @@ struct tmpfs_mount {
  * NFS code.
  */
 struct tmpfs_fid_data {
+	unsigned short		tfd_len;
 	ino_t			tfd_id;
 	unsigned long		tfd_gen;
-};
-_Static_assert(sizeof(struct tmpfs_fid_data) <= MAXFIDSZ,
-    "(struct tmpfs_fid_data) is larger than (struct fid).fid_data");
+} __packed;
 
 struct tmpfs_dir_cursor {
 	struct tmpfs_dirent	*tdc_current;
@@ -484,6 +492,7 @@ int	tmpfs_dir_getdents(struct tmpfs_mount *, struct tmpfs_node *,
 	    struct uio *, int, uint64_t *, int *);
 int	tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);
 void	tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);
+void	tmpfs_dir_clear_whiteouts(struct vnode *);
 int	tmpfs_reg_resize(struct vnode *, off_t, boolean_t);
 int	tmpfs_reg_punch_hole(struct vnode *vp, off_t *, off_t *);
 int	tmpfs_chflags(struct vnode *, u_long, struct ucred *, struct thread *);
@@ -533,6 +542,8 @@ tmpfs_update(struct vnode *vp)
 #define TMPFS_VALIDATE_DIR(node) do { \
 	MPASS((node)->tn_type == VDIR); \
 	MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
+	MPASS((node)->tn_dir.tn_wht_size % sizeof(struct tmpfs_dirent) == 0); \
+	MPASS((node)->tn_dir.tn_wht_size <= (node)->tn_size); \
 } while (0)
 
 /*
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 9bdcc4575511..1237f6b92cdb 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -120,7 +120,7 @@ tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old,
 	/*
 	 * Forced unmount?
 	 */
-	if (vp == NULL) {
+	if (vp == NULL || vp->v_object == NULL) {
 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
 		    ("object %p with OBJ_TMPFS_VREF but without vnode",
 		    object));
@@ -183,6 +183,9 @@ tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start,
 	KASSERT((object->flags & OBJ_ANON) == 0,
 	    ("%s: object %p with OBJ_ANON", __func__, object));
 	old = object->un_pager.swp.writemappings;
+	KASSERT(old >= (vm_ooffset_t)end - start,
+	    ("tmpfs obj %p writecount %jx dec %jx", object, (uintmax_t)old,
+	    (uintmax_t)((vm_ooffset_t)end - start)));
 	object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
 	new = object->un_pager.swp.writemappings;
 	tmpfs_pager_writecount_recalc(object, old, new);
@@ -346,7 +349,7 @@ tmpfs_node_init(void *mem, int size, int flags)
 
 	node = mem;
 	node->tn_id = 0;
-	mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF);
+	mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF | MTX_NEW);
 	node->tn_gen = arc4random();
 	return (0);
 }
@@ -425,7 +428,7 @@ sysctl_mem_percent(SYSCTL_HANDLER_ARGS)
 	if ((unsigned) percent > 100)
 		return (EINVAL);
 
-	*(long *)arg1 = percent;
+	*(int *)arg1 = percent;
 	tmpfs_set_reserve_from_percent();
 	return (0);
 }
@@ -440,7 +443,7 @@ tmpfs_set_reserve_from_percent(void)
 }
 
 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_percent,
-    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_mem_percent, 0,
+    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, &tmpfs_mem_percent, 0,
     sysctl_mem_percent, "I",
     "Percent of available memory that can be used if no size limit");
 
@@ -490,50 +493,11 @@ static int
 tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
     int end, boolean_t ignerr)
 {
-	vm_page_t m;
-	int rv, error;
-
-	VM_OBJECT_ASSERT_WLOCKED(object);
-	KASSERT(base >= 0, ("%s: base %d", __func__, base));
-	KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
-	    end));
-	error = 0;
-
-retry:
-	m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
-	if (m != NULL) {
-		MPASS(vm_page_all_valid(m));
-	} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
-		m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL |
-		    VM_ALLOC_WAITFAIL);
-		if (m == NULL)
-			goto retry;
-		vm_object_pip_add(object, 1);
-		VM_OBJECT_WUNLOCK(object);
-		rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
-		VM_OBJECT_WLOCK(object);
-		vm_object_pip_wakeup(object);
-		if (rv == VM_PAGER_OK) {
-			/*
-			 * Since the page was not resident, and therefore not
-			 * recently accessed, immediately enqueue it for
-			 * asynchronous laundering.  The current operation is
-			 * not regarded as an access.
-			 */
-			vm_page_launder(m);
-		} else {
-			vm_page_free(m);
-			m = NULL;
-			if (!ignerr)
-				error = EIO;
-		}
-	}
-	if (m != NULL) {
-		pmap_zero_page_area(m, base, end - base);
-		vm_page_set_dirty(m);
-		vm_page_xunbusy(m);
-	}
+	int error;
 
+	error = vm_page_grab_zero_partial(object, idx, base, end);
+	if (ignerr)
+		error = 0;
 	return (error);
 }
 
@@ -643,6 +607,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype)
 		nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
 		nnode->tn_dir.tn_readdir_lastn = 0;
 		nnode->tn_dir.tn_readdir_lastp = NULL;
+		nnode->tn_dir.tn_wht_size = 0;
 		nnode->tn_links++;
 		TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
 		nnode->tn_dir.tn_parent->tn_links++;
@@ -954,6 +919,8 @@ tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj)
 
 	VM_OBJECT_WLOCK(obj);
 	VI_LOCK(vp);
+	vp->v_object = NULL;
+
 	/*
 	 * May be going through forced unmount.
 	 */
@@ -1094,15 +1061,19 @@ loop:
 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
 		    ("%s: object %p with OBJ_TMPFS_VREF but without vnode",
 		    __func__, object));
-		KASSERT(object->un_pager.swp.writemappings == 0,
-		    ("%s: object %p has writemappings",
-		    __func__, object));
 		VI_LOCK(vp);
 		KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
 		vp->v_object = object;
 		vn_irflag_set_locked(vp, (tm->tm_pgread ? VIRF_PGREAD : 0) |
 		    VIRF_TEXT_REF);
 		VI_UNLOCK(vp);
+		VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
+		    ("leaked OBJ_TMPFS_VREF"));
+		if (object->un_pager.swp.writemappings > 0) {
+			vrefact(vp);
+			vlazy(vp);
+			vm_object_set_flag(object, OBJ_TMPFS_VREF);
+		}
 		VM_OBJECT_WUNLOCK(object);
 		break;
 	case VDIR:
@@ -1822,13 +1793,16 @@ int
 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
 	int error;
 
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		return (error);
+	dnode = VP_TO_TMPFS_DIR(dvp);
 	tmpfs_dir_attach(dvp, de);
+	dnode->tn_dir.tn_wht_size += sizeof(*de);
 	return (0);
 }
 
@@ -1836,14 +1810,44 @@ void
 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
 
-	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
+	dnode = VP_TO_TMPFS_DIR(dvp);
+	de = tmpfs_dir_lookup(dnode, NULL, cnp);
 	MPASS(de != NULL && de->td_node == NULL);
+	MPASS(dnode->tn_dir.tn_wht_size >= sizeof(*de));
+	dnode->tn_dir.tn_wht_size -= sizeof(*de);
 	tmpfs_dir_detach(dvp, de);
 	tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
 }
 
 /*
+ * Frees any dirents still associated with the directory represented
+ * by dvp in preparation for the removal of the directory.  This is
+ * required when removing a directory which contains only whiteout
+ * entries.
+ */
+void
+tmpfs_dir_clear_whiteouts(struct vnode *dvp)
+{
+	struct tmpfs_dir_cursor dc;
+	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
+
+	dnode = VP_TO_TMPFS_DIR(dvp);
+
+	while ((de = tmpfs_dir_first(dnode, &dc)) != NULL) {
+		KASSERT(de->td_node == NULL, ("%s: non-whiteout dirent %p",
+		    __func__, de));
+		dnode->tn_dir.tn_wht_size -= sizeof(*de);
+		tmpfs_dir_detach(dvp, de);
+		tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
+	}
+	MPASS(dnode->tn_size == 0);
+	MPASS(dnode->tn_dir.tn_wht_size == 0);
+}
+
+/*
  * Resizes the aobj associated with the regular file pointed to by 'vp' to the
  * size 'newsize'.  'vp' must point to a vnode that represents a regular file.
  * 'newsize' must be positive.
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index 32eb9c958df1..431893b77bb9 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -208,7 +208,7 @@ again:
 			continue;
 		}
 		vm = vmspace_acquire_ref(p);
-		_PHOLD_LITE(p);
+		_PHOLD(p);
 		PROC_UNLOCK(p);
 		if (vm == NULL) {
 			PRELE(p);
@@ -585,29 +585,25 @@ static int
 tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags,
     struct vnode **vpp)
 {
-	struct tmpfs_fid_data tfd;
+	struct tmpfs_fid_data *tfd;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	int error;
 
-	if (fhp->fid_len != sizeof(tfd))
+	if (fhp->fid_len != sizeof(*tfd))
 		return (EINVAL);
 
-	/*
-	 * Copy from fid_data onto the stack to avoid unaligned pointer use.
-	 * See the comment in sys/mount.h on struct fid for details.
-	 */
-	memcpy(&tfd, fhp->fid_data, fhp->fid_len);
+	tfd = (struct tmpfs_fid_data *)fhp;
 
 	tmp = VFS_TO_TMPFS(mp);
 
-	if (tfd.tfd_id >= tmp->tm_nodes_max)
+	if (tfd->tfd_id >= tmp->tm_nodes_max)
 		return (EINVAL);
 
 	TMPFS_LOCK(tmp);
 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
-		if (node->tn_id == tfd.tfd_id &&
-		    node->tn_gen == tfd.tfd_gen) {
+		if (node->tn_id == tfd->tfd_id &&
+		    node->tn_gen == tfd->tfd_gen) {
 			tmpfs_ref_node(node);
 			break;
 		}
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 718cfef6bfa3..9d2a587b177a 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -476,6 +476,7 @@ tmpfs_stat(struct vop_stat_args *v)
 	sb->st_blksize = PAGE_SIZE;
 	sb->st_flags = node->tn_flags;
 	sb->st_gen = node->tn_gen;
+	sb->st_filerev = 0;
 	if (vp->v_type == VREG) {
 #ifdef __ILP32__
 		vm_object_t obj = node->tn_reg.tn_aobj;
@@ -1078,7 +1079,9 @@ tmpfs_rename(struct vop_rename_args *v)
 		}
 
 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
-			if (tnode->tn_size > 0) {
+			if (tnode->tn_size != 0 &&
+			    ((tcnp->cn_flags & IGNOREWHITEOUT) == 0 ||
+			    tnode->tn_size > tnode->tn_dir.tn_wht_size)) {
 				error = ENOTEMPTY;
 				goto out_locked;
 			}
@@ -1239,6 +1242,16 @@ tmpfs_rename(struct vop_rename_args *v)
 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
 		tmpfs_dir_detach(tdvp, tde);
 
+		/*
+		 * If we are overwriting a directory, per the ENOTEMPTY check
+		 * above it must either be empty or contain only whiteout
+		 * entries.  In the latter case (which can only happen if
+		 * IGNOREWHITEOUT was passed in tcnp->cn_flags), clear the
+		 * whiteout entries to avoid leaking memory.
+		 */
+		if (tnode->tn_type == VDIR && tnode->tn_size > 0)
+			tmpfs_dir_clear_whiteouts(tvp);
+
 		/* Update node's ctime because of possible hardlinks. */
 		tnode->tn_status |= TMPFS_NODE_CHANGED;
 		tmpfs_update(tvp);
@@ -1309,6 +1322,7 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
+	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
@@ -1320,13 +1334,18 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_DIR(vp);
 
-	/* Directories with more than two entries ('.' and '..') cannot be
-	 * removed. */
-	 if (node->tn_size > 0) {
-		 error = ENOTEMPTY;
-		 goto out;
-	 }
+	/*
+	 * Directories with more than two non-whiteout entries ('.' and '..')
+	 * cannot be removed.
+	 */
+	if (node->tn_size != 0 &&
+	    ((cnp->cn_flags & IGNOREWHITEOUT) == 0 ||
+	    node->tn_size > node->tn_dir.tn_wht_size)) {
+		error = ENOTEMPTY;
+		goto out;
+	}
 
+	/* Check flags to see if we are allowed to remove the directory. */
 	if ((dnode->tn_flags & APPEND)
 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
@@ -1334,27 +1353,31 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 	}
 
 	/* This invariant holds only if we are not trying to remove "..".
-	  * We checked for that above so this is safe now. */
+	 * We checked for that above so this is safe now. */
 	MPASS(node->tn_dir.tn_parent == dnode);
 
 	/* Get the directory entry associated with node (vp).  This was
 	 * filled by tmpfs_lookup while looking up the entry. */
-	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
+	de = tmpfs_dir_lookup(dnode, node, cnp);
 	MPASS(TMPFS_DIRENT_MATCHES(de,
-	    v->a_cnp->cn_nameptr,
-	    v->a_cnp->cn_namelen));
-
-	/* Check flags to see if we are allowed to remove the directory. */
-	if ((dnode->tn_flags & APPEND) != 0 ||
-	    (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
-		error = EPERM;
-		goto out;
-	}
+	    cnp->cn_nameptr,
+	    cnp->cn_namelen));
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
-	if (v->a_cnp->cn_flags & DOWHITEOUT)
-		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
+
+	/*
+	 * If we are removing a directory, per the ENOTEMPTY check above it
+	 * must either be empty or contain only whiteout entries.  In the
+	 * latter case (which can only happen if IGNOREWHITEOUT was passed
+	 * in cnp->cn_flags), clear the whiteout entries to avoid leaking
+	 * memory.
+	 */
+	if (node->tn_size > 0)
+		tmpfs_dir_clear_whiteouts(vp);
+
+	if (cnp->cn_flags & DOWHITEOUT)
+		tmpfs_dir_whiteout_add(dvp, cnp);
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);
@@ -1668,6 +1691,10 @@ tmpfs_pathconf(struct vop_pathconf_args *v)
 		*retval = PAGE_SIZE;
 		break;
 
+	case _PC_HAS_HIDDENSYSTEM:
+		*retval = 1;
+		break;
+
 	default:
 		error = vop_stdpathconf(v);
 	}
@@ -1684,21 +1711,15 @@ vop_vptofh {
 };
 */
 {
-	struct tmpfs_fid_data tfd;
+	struct tmpfs_fid_data *const tfd = (struct tmpfs_fid_data *)ap->a_fhp;
 	struct tmpfs_node *node;
-	struct fid *fhp;
+	_Static_assert(sizeof(struct tmpfs_fid_data) <= sizeof(struct fid),
+	    "struct tmpfs_fid_data cannot be larger than struct fid");
 
 	node = VP_TO_TMPFS_NODE(ap->a_vp);
-	fhp = ap->a_fhp;
-	fhp->fid_len = sizeof(tfd);
-
-	/*
-	 * Copy into fid_data from the stack to avoid unaligned pointer use.
-	 * See the comment in sys/mount.h on struct fid for details.
-	 */
-	tfd.tfd_id = node->tn_id;
-	tfd.tfd_gen = node->tn_gen;
-	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
+	tfd->tfd_len = sizeof(*tfd);
+	tfd->tfd_gen = node->tn_gen;
+	tfd->tfd_id = node->tn_id;
 
 	return (0);
 }
@@ -2070,31 +2091,10 @@ tmpfs_setextattr(struct vop_setextattr_args *ap)
 static off_t
 tmpfs_seek_data_locked(vm_object_t obj, off_t noff)
 {
-	vm_page_t m;
-	vm_pindex_t p, p_m, p_swp;
-
-	p = OFF_TO_IDX(noff);
-	m = vm_page_find_least(obj, p);
-
-	/*
-	 * Microoptimize the most common case for SEEK_DATA, where
-	 * there is no hole and the page is resident.
-	 */
-	if (m != NULL && vm_page_any_valid(m) && m->pindex == p)
-		return (noff);
-
-	p_swp = swap_pager_find_least(obj, p);
-	if (p_swp == p)
-		return (noff);
-
-	p_m = m == NULL ? obj->size : m->pindex;
-	return (IDX_TO_OFF(MIN(p_m, p_swp)));
-}
+	vm_pindex_t p;
 
-static off_t
-tmpfs_seek_next(off_t noff)
-{
-	return (noff + PAGE_SIZE - (noff & PAGE_MASK));
+	p = swap_pager_seek_data(obj, OFF_TO_IDX(noff));
+	return (p == OFF_TO_IDX(noff) ? noff : IDX_TO_OFF(p));
 }
 
 static int
@@ -2111,30 +2111,8 @@ tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata)
 static off_t
 tmpfs_seek_hole_locked(vm_object_t obj, off_t noff)
 {
-	vm_page_t m;
-	vm_pindex_t p, p_swp;
-
-	for (;; noff = tmpfs_seek_next(noff)) {
-		/*
-		 * Walk over the largest sequential run of the valid pages.
-		 */
-		for (m = vm_page_lookup(obj, OFF_TO_IDX(noff));
-		    m != NULL && vm_page_any_valid(m);
-		    m = vm_page_next(m), noff = tmpfs_seek_next(noff))
-			;
 
-		/*
-		 * Found a hole in the object's page queue.  Check if
-		 * there is a hole in the swap at the same place.
-		 */
-		p = OFF_TO_IDX(noff);
-		p_swp = swap_pager_find_least(obj, p);
-		if (p_swp != p) {
-			noff = IDX_TO_OFF(p);
-			break;
-		}
-	}
-	return (noff);
+	return (IDX_TO_OFF(swap_pager_seek_hole(obj, OFF_TO_IDX(noff))));
 }
 
 static int
diff --git a/sys/fs/udf/ecma167-udf.h b/sys/fs/udf/ecma167-udf.h
index 839bbec08254..19e114763cac 100644
--- a/sys/fs/udf/ecma167-udf.h
+++ b/sys/fs/udf/ecma167-udf.h
@@ -243,7 +243,7 @@ struct part_map_spare {
 	uint8_t			n_st;	/* Number of Sparing Tables */
 	uint8_t			reserved1;
 	uint32_t		st_size;
-	uint32_t		st_loc[1];
+	uint32_t		st_loc[];
 } __packed;
 
 union udf_pmap {
@@ -266,7 +266,7 @@ struct udf_sparing_table {
 	uint16_t		rt_l;	/* Relocation Table len */
 	uint8_t			reserved[2];
 	uint32_t		seq_num;
-	struct spare_map_entry	entries[1];
+	struct spare_map_entry	entries[];
 } __packed;
 
 /* Partition Descriptor [3/10.5] */
diff --git a/sys/fs/udf/udf_vfsops.c b/sys/fs/udf/udf_vfsops.c
index 866d0172f745..c5ef1f686093 100644
--- a/sys/fs/udf/udf_vfsops.c
+++ b/sys/fs/udf/udf_vfsops.c
@@ -32,7 +32,7 @@
 /*
  * Ok, here's how it goes.  The UDF specs are pretty clear on how each data
  * structure is made up, but not very clear on how they relate to each other.
- * Here is the skinny... This demostrates a filesystem with one file in the
+ * Here is the skinny... This demonstrates a filesystem with one file in the
  * root directory.  Subdirectories are treated just as normal files, but they
  * have File Id Descriptors of their children as their file data.  As for the
  * Anchor Volume Descriptor Pointer, it can exist in two of the following three
@@ -81,6 +81,7 @@
 #include <sys/fcntl.h>
 #include <sys/iconv.h>
 #include <sys/kernel.h>
+#include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
@@ -729,7 +730,7 @@ udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 	struct ifid *ifhp;
 	struct vnode *nvp;
 	struct udf_node *np;
-	off_t fsize;
+	uint64_t fsize;
 	int error;
 
 	ifhp = (struct ifid *)fhp;
@@ -741,6 +742,10 @@ udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 
 	np = VTON(nvp);
 	fsize = le64toh(np->fentry->inf_len);
+	if (fsize > OFF_MAX) {
+		*vpp = NULLVP;
+		return (EIO);
+	}
 
 	*vpp = nvp;
 	vnode_create_vobject(*vpp, fsize, curthread);
diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c
index f230ca0c72fa..37889241e8c3 100644
--- a/sys/fs/udf/udf_vnops.c
+++ b/sys/fs/udf/udf_vnops.c
@@ -39,6 +39,7 @@
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/iconv.h>
+#include <sys/limits.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
@@ -182,11 +183,14 @@ udf_access(struct vop_access_args *a)
 }
 
 static int
-udf_open(struct vop_open_args *ap) {
+udf_open(struct vop_open_args *ap)
+{
 	struct udf_node *np = VTON(ap->a_vp);
-	off_t fsize;
+	uint64_t fsize;
 
 	fsize = le64toh(np->fentry->inf_len);
+	if (fsize > OFF_MAX)
+		return (EIO);
 	vnode_create_vobject(ap->a_vp, fsize, ap->a_td);
 	return 0;
 }
@@ -314,12 +318,13 @@ udf_getattr(struct vop_getattr_args *a)
 		 * that directories consume at least one logical block,
 		 * make it appear so.
 		 */
-		if (fentry->logblks_rec != 0) {
-			vap->va_size =
-			    le64toh(fentry->logblks_rec) * node->udfmp->bsize;
-		} else {
+		vap->va_size = le64toh(fentry->logblks_rec);
+		if (vap->va_size == 0)
 			vap->va_size = node->udfmp->bsize;
-		}
+		else if (vap->va_size > UINT64_MAX / node->udfmp->bsize)
+			vap->va_size = UINT64_MAX;
+		else
+			vap->va_size *= node->udfmp->bsize;
 	} else {
 		vap->va_size = le64toh(fentry->inf_len);
 	}
@@ -446,6 +451,7 @@ udf_read(struct vop_read_args *ap)
 	struct buf *bp;
 	uint8_t *data;
 	daddr_t lbn, rablock;
+	uint64_t len;
 	off_t diff, fsize;
 	ssize_t n;
 	int error = 0;
@@ -471,7 +477,12 @@ udf_read(struct vop_read_args *ap)
 		return (error);
 	}
 
-	fsize = le64toh(node->fentry->inf_len);
+	len = le64toh(node->fentry->inf_len);
+	if (len > OFF_MAX) {
+		/* too big, just cap to the requested length */
+		len = uio->uio_resid;
+	}
+	fsize = len;
 	udfmp = node->udfmp;
 	do {
 		lbn = lblkno(udfmp, uio->uio_offset);
@@ -783,6 +794,7 @@ udf_readdir(struct vop_readdir_args *a)
 	struct udf_uiodir uiodir;
 	struct udf_dirstream *ds;
 	uint64_t *cookies = NULL;
+	uint64_t len;
 	int ncookies;
 	int error = 0;
 
@@ -800,8 +812,6 @@ udf_readdir(struct vop_readdir_args *a)
 		 */
 		ncookies = uio->uio_resid / 8;
 		cookies = malloc(sizeof(*cookies) * ncookies, M_TEMP, M_WAITOK);
-		if (cookies == NULL)
-			return (ENOMEM);
 		uiodir.ncookies = ncookies;
 		uiodir.cookies = cookies;
 		uiodir.acookies = 0;
@@ -813,8 +823,12 @@ udf_readdir(struct vop_readdir_args *a)
 	 * Iterate through the file id descriptors.  Give the parent dir
 	 * entry special attention.
 	 */
-	ds = udf_opendir(node, uio->uio_offset, le64toh(node->fentry->inf_len),
-	    node->udfmp);
+	len = le64toh(node->fentry->inf_len);
+	if (len > INT_MAX) {
+		/* too big, just cap to INT_MAX */
+		len = INT_MAX;
+	}
+	ds = udf_opendir(node, uio->uio_offset, len, node->udfmp);
 
 	while ((fid = udf_getfid(ds)) != NULL) {
 		/* XXX Should we return an error on a bad fid? */
@@ -906,7 +920,8 @@ udf_readlink(struct vop_readlink_args *ap)
 	struct udf_node *node;
 	void *buf;
 	char *cp;
-	int error, len, root;
+	uint64_t len;
+	int error, root;
 
 	/*
 	 * A symbolic link in UDF is a list of variable-length path
@@ -916,6 +931,8 @@ udf_readlink(struct vop_readlink_args *ap)
 	vp = ap->a_vp;
 	node = VTON(vp);
 	len = le64toh(node->fentry->inf_len);
+	if (len > MAXPATHLEN)
+		return (EIO);
 	buf = malloc(len, M_DEVBUF, M_WAITOK);
 	iov[0].iov_len = len;
 	iov[0].iov_base = buf;
@@ -1118,13 +1135,14 @@ udf_lookup(struct vop_cachedlookup_args *a)
 	struct udf_mnt *udfmp;
 	struct fileid_desc *fid = NULL;
 	struct udf_dirstream *ds;
+	uint64_t fsize;
 	u_long nameiop;
 	u_long flags;
 	char *nameptr;
 	long namelen;
 	ino_t id = 0;
 	int offset, error = 0;
-	int fsize, lkflags, ltype, numdirpasses;
+	int lkflags, ltype, numdirpasses;
 
 	dvp = a->a_dvp;
 	node = VTON(dvp);
@@ -1135,6 +1153,10 @@ udf_lookup(struct vop_cachedlookup_args *a)
 	nameptr = a->a_cnp->cn_nameptr;
 	namelen = a->a_cnp->cn_namelen;
 	fsize = le64toh(node->fentry->inf_len);
+	if (fsize > INT_MAX) {
+		/* too big, just cap to INT_MAX */
+		fsize = INT_MAX;
+	}
 
 	/*
 	 * If this is a LOOKUP and we've already partially searched through
@@ -1276,6 +1298,8 @@ udf_vptofh(struct vop_vptofh_args *a)
 {
 	struct udf_node *node;
 	struct ifid *ifhp;
+	_Static_assert(sizeof(struct ifid) <= sizeof(struct fid),
+	    "struct ifid cannot be larger than struct fid");
 
 	node = VTON(a->a_vp);
 	ifhp = (struct ifid *)a->a_fhp;
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
index 467db3b29ff8..0bd1894a2195 100644
--- a/sys/fs/unionfs/union.h
+++ b/sys/fs/unionfs/union.h
@@ -97,15 +97,17 @@ struct unionfs_node {
 
 	char           *un_path;		/* path */
 	int		un_pathlen;		/* strlen of path */
-	int		un_flag;		/* unionfs node flag */
-};
 
-/*
- * unionfs node flags
- * It needs the vnode with exclusive lock, when changing the un_flag variable.
- */
-#define UNIONFS_OPENEXTL	0x01	/* openextattr (lower) */
-#define UNIONFS_OPENEXTU	0x02	/* openextattr (upper) */
+	/*
+	 * unionfs node flags
+	 * Changing these flags requires the vnode to be locked exclusive.
+	 */
+	#define UNIONFS_OPENEXTL		0x01	/* openextattr (lower) */
+	#define UNIONFS_OPENEXTU		0x02	/* openextattr (upper) */
+	#define UNIONFS_COPY_IN_PROGRESS	0x04	/* copy/dir shadow in progres */
+	#define UNIONFS_LOOKUP_IN_PROGRESS	0x08
+	unsigned int	un_flag;		/* unionfs node flag */
+};
 
 extern struct vop_vector unionfs_vnodeops;
 
@@ -131,34 +133,32 @@ int	unionfs_uninit(struct vfsconf *);
 int	unionfs_nodeget(struct mount *, struct vnode *, struct vnode *,
 	    struct vnode *, struct vnode **, struct componentname *);
 void	unionfs_noderem(struct vnode *);
+struct unionfs_node_status *	unionfs_find_node_status(struct unionfs_node *,
+	    struct thread *td);
 void	unionfs_get_node_status(struct unionfs_node *, struct thread *,
 	    struct unionfs_node_status **);
 void	unionfs_tryrem_node_status(struct unionfs_node *,
 	    struct unionfs_node_status *);
 int	unionfs_check_rmdir(struct vnode *, struct ucred *, struct thread *td);
-int	unionfs_copyfile(struct unionfs_node *, int, struct ucred *,
+int	unionfs_copyfile(struct vnode *, int, struct ucred *,
 	    struct thread *);
 void	unionfs_create_uppervattr_core(struct unionfs_mount *, struct vattr *,
 	    struct vattr *, struct thread *);
 int	unionfs_create_uppervattr(struct unionfs_mount *, struct vnode *,
 	    struct vattr *, struct ucred *, struct thread *);
-int	unionfs_mkshadowdir(struct unionfs_mount *, struct vnode *,
-	    struct unionfs_node *, struct componentname *, struct thread *);
+int	unionfs_mkshadowdir(struct vnode *, struct vnode *,
+	    struct componentname *, struct thread *);
 int	unionfs_mkwhiteout(struct vnode *, struct vnode *,
 	    struct componentname *, struct thread *, char *, int);
 int	unionfs_relookup(struct vnode *, struct vnode **,
 	    struct componentname *, struct componentname *, struct thread *,
 	    char *, int, u_long);
-int	unionfs_relookup_for_create(struct vnode *, struct componentname *,
-	    struct thread *);
-int	unionfs_relookup_for_delete(struct vnode *, struct componentname *,
-	    struct thread *);
-int	unionfs_relookup_for_rename(struct vnode *, struct componentname *,
-	    struct thread *);
 void	unionfs_forward_vop_start_pair(struct vnode *, int *,
 	    struct vnode *, int *);
 bool	unionfs_forward_vop_finish_pair(struct vnode *, struct vnode *, int,
 	    struct vnode *, struct vnode *, int);
+int	unionfs_set_in_progress_flag(struct vnode *, unsigned int);
+void	unionfs_clear_in_progress_flag(struct vnode *, unsigned int);
 
 static inline void
 unionfs_forward_vop_start(struct vnode *basevp, int *lkflags)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index bb57f3d56ade..edcc6716b674 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -203,19 +203,19 @@ unionfs_ins_cached_vnode(struct unionfs_node *uncp,
 	struct unionfs_node_hashhead *hd;
 	struct vnode *vp;
 
-	ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
-	ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
-	KASSERT(uncp->un_uppervp == NULLVP || uncp->un_uppervp->v_type == VDIR,
-	    ("%s: v_type != VDIR", __func__));
-	KASSERT(uncp->un_lowervp == NULLVP || uncp->un_lowervp->v_type == VDIR,
-	    ("%s: v_type != VDIR", __func__));
-
 	vp = NULLVP;
 	VI_LOCK(dvp);
-	if (uncp->un_uppervp != NULL)
+	if (uncp->un_uppervp != NULLVP) {
+		ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
+		KASSERT(uncp->un_uppervp->v_type == VDIR,
+		    ("%s: v_type != VDIR", __func__));
 		vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp);
-	else if (uncp->un_lowervp != NULL)
+	} else if (uncp->un_lowervp != NULLVP) {
+		ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
+		KASSERT(uncp->un_lowervp->v_type == VDIR,
+		    ("%s: v_type != VDIR", __func__));
 		vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp);
+	}
 	if (vp == NULLVP) {
 		hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ?
 		    uncp->un_uppervp : uncp->un_lowervp));
@@ -276,9 +276,11 @@ unionfs_nodeget_cleanup(struct vnode *vp, struct unionfs_node *unp)
 
 	if (unp->un_dvp != NULLVP)
 		vrele(unp->un_dvp);
-	if (unp->un_uppervp != NULLVP)
+	if (unp->un_uppervp != NULLVP) {
 		vput(unp->un_uppervp);
-	if (unp->un_lowervp != NULLVP)
+		if (unp->un_lowervp != NULLVP)
+			vrele(unp->un_lowervp);
+	} else if (unp->un_lowervp != NULLVP)
 		vput(unp->un_lowervp);
 	if (unp->un_hashtbl != NULL)
 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
@@ -314,7 +316,7 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
 	*vpp = NULLVP;
 
 	if (uppervp == NULLVP && lowervp == NULLVP)
-		panic("%s: upper and lower is null", __func__);
+		panic("%s: upper and lower are both null", __func__);
 
 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
 
@@ -327,7 +329,9 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp);
 		if (vp != NULLVP) {
 			*vpp = vp;
-			goto unionfs_nodeget_out;
+			if (lkflags != 0)
+				vn_lock(*vpp, lkflags | LK_RETRY);
+			return (0);
 		}
 	}
 
@@ -385,27 +389,47 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
 	KASSERT(dvp != NULL || (vp->v_vflag & VV_ROOT) != 0,
 	    ("%s: NULL dvp for non-root vp %p", __func__, vp));
 
-	vn_lock_pair(lowervp, false, LK_EXCLUSIVE, uppervp, false,
-	    LK_EXCLUSIVE);
+
+	/*
+	 * NOTE: There is still a possibility for cross-filesystem locking here.
+	 * If dvp has an upper FS component and is locked, while the new vnode
+	 * created here only has a lower-layer FS component, then we will end
+	 * up taking a lower-FS lock while holding an upper-FS lock.
+	 * That situation could be dealt with here using vn_lock_pair().
+	 * However, that would only address one instance out of many in which
+	 * a child vnode lock is taken while holding a lock on its parent
+	 * directory. This is done in many places in common VFS code, as well as
+	 * a few places within unionfs (which could lead to the same cross-FS
+	 * locking issue if, for example, the upper FS is another nested unionfs
+	 * instance).  Additionally, it is unclear under what circumstances this
+	 * specific lock sequence (a directory on one FS followed by a child of
+	 * its 'peer' directory on another FS) would present the practical
+	 * possibility of deadlock due to some other agent on the system
+	 * attempting to lock those two specific vnodes in the opposite order.
+	 */
+	if (uppervp != NULLVP)
+		vn_lock(uppervp, LK_EXCLUSIVE | LK_RETRY);
+	else
+		vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
 	error = insmntque1(vp, mp);
 	if (error != 0) {
 		unionfs_nodeget_cleanup(vp, unp);
 		return (error);
 	}
-	if (lowervp != NULL && VN_IS_DOOMED(lowervp)) {
-		vput(lowervp);
-		unp->un_lowervp = lowervp = NULL;
-	}
-	if (uppervp != NULL && VN_IS_DOOMED(uppervp)) {
-		vput(uppervp);
-		unp->un_uppervp = uppervp = NULL;
-		if (lowervp != NULLVP)
-			vp->v_vnlock = lowervp->v_vnlock;
-	}
-	if (lowervp == NULL && uppervp == NULL) {
-		unionfs_nodeget_cleanup(vp, unp);
-		return (ENOENT);
-	}
+	/*
+	 * lowervp and uppervp should only be doomed by a forced unmount of
+	 * their respective filesystems, but that can only happen if the
+	 * unionfs instance is first unmounted.  We also effectively hold the
+	 * lock on the new unionfs vnode at this point.  Therefore, if a
+	 * unionfs umount has not yet reached the point at which the above
+	 * insmntque1() would fail, then its vflush() call will end up
+	 * blocked on our vnode lock, effectively also preventing unmount
+	 * of the underlying filesystems.
+	 */
+	VNASSERT(lowervp == NULLVP || !VN_IS_DOOMED(lowervp), vp,
+	    ("%s: doomed lowervp %p", __func__, lowervp));
+	VNASSERT(uppervp == NULLVP || !VN_IS_DOOMED(uppervp), vp,
+	    ("%s: doomed lowervp %p", __func__, uppervp));
 
 	vn_set_state(vp, VSTATE_CONSTRUCTED);
 
@@ -413,18 +437,16 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
 		*vpp = unionfs_ins_cached_vnode(unp, dvp);
 	if (*vpp != NULLVP) {
 		unionfs_nodeget_cleanup(vp, unp);
-		vp = *vpp;
-	} else {
-		if (uppervp != NULL)
-			VOP_UNLOCK(uppervp);
-		if (lowervp != NULL)
-			VOP_UNLOCK(lowervp);
+		if (lkflags != 0)
+			vn_lock(*vpp, lkflags | LK_RETRY);
+		return (0);
+	} else
 		*vpp = vp;
-	}
 
-unionfs_nodeget_out:
-	if (lkflags & LK_TYPE_MASK)
-		vn_lock(vp, lkflags | LK_RETRY);
+	if ((lkflags & LK_SHARED) != 0)
+		vn_lock(vp, LK_DOWNGRADE);
+	else if ((lkflags & LK_EXCLUSIVE) == 0)
+		VOP_UNLOCK(vp);
 
 	return (0);
 }
@@ -443,6 +465,7 @@ unionfs_noderem(struct vnode *vp)
 	struct vnode   *dvp;
 	int		count;
 	int		writerefs;
+	bool		unlock_lvp;
 
 	/*
 	 * The root vnode lock may be recursed during unmount, because
@@ -455,18 +478,36 @@ unionfs_noderem(struct vnode *vp)
 	 */
 	KASSERT(vp->v_vnlock->lk_recurse == 0 || (vp->v_vflag & VV_ROOT) != 0,
 	    ("%s: vnode %p locked recursively", __func__, vp));
+
+	unp = VTOUNIONFS(vp);
+	VNASSERT(unp != NULL, vp, ("%s: already reclaimed", __func__));
+	lvp = unp->un_lowervp;
+	uvp = unp->un_uppervp;
+	dvp = unp->un_dvp;
+	unlock_lvp = (uvp == NULLVP);
+
+	/*
+	 * Lock the lower vnode in addition to the upper vnode lock in order
+	 * to synchronize against any unionfs_lock() operation which may still
+	 * hold the lower vnode lock.  We do not need to do this for the root
+	 * vnode, as the root vnode should always have both upper and lower
+	 * base vnodes for its entire lifecycled, so unionfs_lock() should
+	 * never attempt to lock its lower vnode in the first place.
+	 * Moreover, during unmount of a non-"below" unionfs mount, the lower
+	 * root vnode will already be locked as it is the covered vnode.
+	 */
+	if (uvp != NULLVP && lvp != NULLVP && (vp->v_vflag & VV_ROOT) == 0) {
+		vn_lock_pair(uvp, true, LK_EXCLUSIVE, lvp, false, LK_EXCLUSIVE);
+		unlock_lvp = true;
+	}
+
 	if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 		panic("%s: failed to acquire lock for vnode lock", __func__);
-
 	/*
 	 * Use the interlock to protect the clearing of v_data to
 	 * prevent faults in unionfs_lock().
 	 */
 	VI_LOCK(vp);
-	unp = VTOUNIONFS(vp);
-	lvp = unp->un_lowervp;
-	uvp = unp->un_uppervp;
-	dvp = unp->un_dvp;
 	unp->un_lowervp = unp->un_uppervp = NULLVP;
 	vp->v_vnlock = &(vp->v_lock);
 	vp->v_data = NULL;
@@ -502,18 +543,16 @@ unionfs_noderem(struct vnode *vp)
 		    ("%s: write reference without upper vnode", __func__));
 		VOP_ADD_WRITECOUNT(uvp, -writerefs);
 	}
-	if (lvp != NULLVP)
-		VOP_UNLOCK(lvp);
 	if (uvp != NULLVP)
-		VOP_UNLOCK(uvp);
+		vput(uvp);
+	if (unlock_lvp)
+		vput(lvp);
+	else if (lvp != NULLVP)
+		vrele(lvp);
 
 	if (dvp != NULLVP)
 		unionfs_rem_cached_vnode(unp, dvp);
 
-	if (lvp != NULLVP)
-		vrele(lvp);
-	if (uvp != NULLVP)
-		vrele(uvp);
 	if (unp->un_path != NULL) {
 		free(unp->un_path, M_UNIONFSPATH);
 		unp->un_path = NULL;
@@ -539,35 +578,52 @@ unionfs_noderem(struct vnode *vp)
 }
 
 /*
- * Get the unionfs node status object for the vnode corresponding to unp,
- * for the process that owns td.  Allocate a new status object if one
- * does not already exist.
+ * Find the unionfs node status object for the vnode corresponding to unp,
+ * for the process that owns td.  Return NULL if no such object exists.
  */
-void
-unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
-    struct unionfs_node_status **unspp)
+struct unionfs_node_status *
+unionfs_find_node_status(struct unionfs_node *unp, struct thread *td)
 {
 	struct unionfs_node_status *unsp;
 	pid_t pid;
 
 	pid = td->td_proc->p_pid;
 
-	KASSERT(NULL != unspp, ("%s: NULL status", __func__));
 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
 
 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
 		if (unsp->uns_pid == pid) {
-			*unspp = unsp;
-			return;
+			return (unsp);
 		}
 	}
 
-	/* create a new unionfs node status */
-	unsp = malloc(sizeof(struct unionfs_node_status),
-	    M_TEMP, M_WAITOK | M_ZERO);
+	return (NULL);
+}
+
+/*
+ * Get the unionfs node status object for the vnode corresponding to unp,
+ * for the process that owns td.  Allocate a new status object if one
+ * does not already exist.
+ */
+void
+unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
+    struct unionfs_node_status **unspp)
+{
+	struct unionfs_node_status *unsp;
+	pid_t pid;
+
+	pid = td->td_proc->p_pid;
 
-	unsp->uns_pid = pid;
-	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
+	KASSERT(NULL != unspp, ("%s: NULL status", __func__));
+	unsp = unionfs_find_node_status(unp, td);
+	if (unsp == NULL) {
+		/* create a new unionfs node status */
+		unsp = malloc(sizeof(struct unionfs_node_status),
+		    M_TEMP, M_WAITOK | M_ZERO);
+
+		unsp->uns_pid = pid;
+		LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
+	}
 
 	*unspp = unsp;
 }
@@ -697,110 +753,6 @@ unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
 }
 
 /*
- * relookup for CREATE namei operation.
- *
- * dvp is unionfs vnode. dvp should be locked.
- *
- * If it called 'unionfs_copyfile' function by unionfs_link etc,
- * VOP_LOOKUP information is broken.
- * So it need relookup in order to create link etc.
- */
-int
-unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
-    struct thread *td)
-{
-	struct vnode *udvp;
-	struct vnode *vp;
-	struct componentname cn;
-	int error;
-
-	udvp = UNIONFSVPTOUPPERVP(dvp);
-	vp = NULLVP;
-
-	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
-	    cnp->cn_namelen, CREATE);
-	if (error)
-		return (error);
-
-	if (vp != NULLVP) {
-		if (udvp == vp)
-			vrele(vp);
-		else
-			vput(vp);
-
-		error = EEXIST;
-	}
-
-	return (error);
-}
-
-/*
- * relookup for DELETE namei operation.
- *
- * dvp is unionfs vnode. dvp should be locked.
- */
-int
-unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
-    struct thread *td)
-{
-	struct vnode *udvp;
-	struct vnode *vp;
-	struct componentname cn;
-	int error;
-
-	udvp = UNIONFSVPTOUPPERVP(dvp);
-	vp = NULLVP;
-
-	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
-	    cnp->cn_namelen, DELETE);
-	if (error)
-		return (error);
-
-	if (vp == NULLVP)
-		error = ENOENT;
-	else {
-		if (udvp == vp)
-			vrele(vp);
-		else
-			vput(vp);
-	}
-
-	return (error);
-}
-
-/*
- * relookup for RENAME namei operation.
- *
- * dvp is unionfs vnode. dvp should be locked.
- */
-int
-unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
-    struct thread *td)
-{
-	struct vnode *udvp;
-	struct vnode *vp;
-	struct componentname cn;
-	int error;
-
-	udvp = UNIONFSVPTOUPPERVP(dvp);
-	vp = NULLVP;
-
-	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
-	    cnp->cn_namelen, RENAME);
-	if (error)
-		return (error);
-
-	if (vp != NULLVP) {
-		if (udvp == vp)
-			vrele(vp);
-		else
-			vput(vp);
-	}
-
-	return (error);
-}
-
-/*
  * Update the unionfs_node.
  * 
  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
@@ -836,6 +788,8 @@ unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
 	vp->v_vnlock = uvp->v_vnlock;
 	VI_UNLOCK(vp);
 
+	for (count = 0; count < lockrec + 1; count++)
+		VOP_UNLOCK(lvp);
 	/*
 	 * Re-cache the unionfs vnode against the upper vnode
 	 */
@@ -851,18 +805,87 @@ unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
 }
 
 /*
+ * Mark a unionfs operation as being in progress, sleeping if the
+ * same operation is already in progress.
+ * This is useful, for example, during copy-up operations in which
+ * we may drop the target vnode lock, but we want to avoid the
+ * possibility of a concurrent copy-up on the same vnode triggering
+ * a spurious failure.
+ */
+int
+unionfs_set_in_progress_flag(struct vnode *vp, unsigned int flag)
+{
+	struct unionfs_node *unp;
+	int error;
+
+	error = 0;
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	VI_LOCK(vp);
+	unp = VTOUNIONFS(vp);
+	while (error == 0 && (unp->un_flag & flag) != 0) {
+		VOP_UNLOCK(vp);
+		error = msleep(vp, VI_MTX(vp), PCATCH | PDROP, "unioncp", 0);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+		VI_LOCK(vp);
+		if (error == 0) {
+			/*
+			 * If we waited on a concurrent copy-up and that
+			 * copy-up was successful, return a non-fatal
+			 * indication that the desired operation is already
+			 * complete.  If we waited on a concurrent lookup,
+			 * return ERELOOKUP to indicate the VFS cache should
+			 * be re-queried to avoid creating a duplicate unionfs
+			 * vnode.
+			 */
+			unp = VTOUNIONFS(vp);
+			if (unp == NULL)
+				error = ENOENT;
+			else if (flag == UNIONFS_COPY_IN_PROGRESS &&
+			    unp->un_uppervp != NULLVP)
+				error = EJUSTRETURN;
+			else if (flag == UNIONFS_LOOKUP_IN_PROGRESS)
+				error = ERELOOKUP;
+		}
+	}
+	if (error == 0)
+		unp->un_flag |= flag;
+	VI_UNLOCK(vp);
+
+	return (error);
+}
+
+void
+unionfs_clear_in_progress_flag(struct vnode *vp, unsigned int flag)
+{
+	struct unionfs_node *unp;
+
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	unp = VTOUNIONFS(vp);
+	VI_LOCK(vp);
+	if (unp != NULL) {
+		VNASSERT((unp->un_flag & flag) != 0, vp,
+		    ("%s: copy not in progress", __func__));
+		unp->un_flag &= ~flag;
+	}
+	wakeup(vp);
+	VI_UNLOCK(vp);
+}
+
+/*
  * Create a new shadow dir.
  * 
- * udvp should be locked on entry and will be locked on return.
+ * dvp and vp are unionfs vnodes representing a parent directory and
+ * child file, should be locked on entry, and will be locked on return.
  * 
  * If no error returned, unp will be updated.
  */
 int
-unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
-    struct unionfs_node *unp, struct componentname *cnp, struct thread *td)
+unionfs_mkshadowdir(struct vnode *dvp, struct vnode *vp,
+    struct componentname *cnp, struct thread *td)
 {
 	struct vnode   *lvp;
 	struct vnode   *uvp;
+	struct vnode   *udvp;
 	struct vattr	va;
 	struct vattr	lva;
 	struct nameidata nd;
@@ -870,10 +893,25 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
 	struct ucred   *cred;
 	struct ucred   *credbk;
 	struct uidinfo *rootinfo;
+	struct unionfs_mount *ump;
+	struct unionfs_node *dunp;
+	struct unionfs_node *unp;
 	int		error;
 
+	ASSERT_VOP_ELOCKED(dvp, __func__);
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	ump = MOUNTTOUNIONFSMOUNT(vp->v_mount);
+	unp = VTOUNIONFS(vp);
 	if (unp->un_uppervp != NULLVP)
 		return (EEXIST);
+	dunp = VTOUNIONFS(dvp);
+	udvp = dunp->un_uppervp;
+
+	error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
+	if (error == EJUSTRETURN)
+		return (0);
+	else if (error != 0)
+		return (error);
 
 	lvp = unp->un_lowervp;
 	uvp = NULLVP;
@@ -882,11 +920,6 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
 	/* Authority change to root */
 	rootinfo = uifind((uid_t)0);
 	cred = crdup(cnp->cn_cred);
-	/*
-	 * The calls to chgproccnt() are needed to compensate for change_ruid()
-	 * calling chgproccnt().
-	 */
-	chgproccnt(cred->cr_ruidinfo, 1, 0);
 	change_euid(cred, rootinfo);
 	change_ruid(cred, rootinfo);
 	change_svuid(cred, (uid_t)0);
@@ -897,11 +930,29 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
 	NDPREINIT(&nd);
 
 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
-		goto unionfs_mkshadowdir_abort;
+		goto unionfs_mkshadowdir_finish;
 
+	vref(udvp);
+	VOP_UNLOCK(vp);
 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td,
-	    cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
-		goto unionfs_mkshadowdir_abort;
+	    cnp->cn_nameptr, cnp->cn_namelen, CREATE))) {
+		/*
+		 * When handling error cases here, we drop udvp's lock and
+		 * then jump to exit code that relocks dvp, which in most
+		 * cases will effectively relock udvp.  However, this is
+		 * not guaranteed to be the case, as various calls made
+		 * here (such as unionfs_relookup() above and VOP_MKDIR()
+		 * below) may unlock and then relock udvp, allowing dvp to
+		 * be reclaimed in the meantime.  In such a situation dvp
+		 * will no longer share its lock with udvp.  Since
+		 * performance isn't a concern for these error cases, it
+		 * makes more sense to reuse the common code that locks
+		 * dvp on exit than to explicitly check for reclamation
+		 * of dvp.
+		 */
+		vput(udvp);
+		goto unionfs_mkshadowdir_relock;
+	}
 	if (uvp != NULLVP) {
 		if (udvp == uvp)
 			vrele(uvp);
@@ -909,11 +960,14 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
 			vput(uvp);
 
 		error = EEXIST;
-		goto unionfs_mkshadowdir_abort;
+		vput(udvp);
+		goto unionfs_mkshadowdir_relock;
 	}
 
-	if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)))
-		goto unionfs_mkshadowdir_abort;
+	if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH))) {
+		vput(udvp);
+		goto unionfs_mkshadowdir_relock;
+	}
 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
 
 	/*
@@ -924,7 +978,7 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
 	 * component.  This *should* be fine, as cn_namelen will still
 	 * correctly indicate the length of only the current component,
 	 * but ZFS in particular does not respect cn_namelen in its VOP_MKDIR
-	 * implementation
+	 * implementation.
 	 * Note that this assumes nd.ni_cnd.cn_pnbuf was allocated by
 	 * something like a local namei() operation and the temporary
 	 * NUL-termination will not have an effect on other threads.
@@ -934,29 +988,59 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
 	*pathend = '\0';
 	error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va);
 	*pathend = pathterm;
-
-	if (!error) {
-		/*
-		 * XXX The bug which cannot set uid/gid was corrected.
-		 * Ignore errors.
-		 */
-		va.va_type = VNON;
-		VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
-
+	if (error != 0) {
 		/*
-		 * VOP_SETATTR() may transiently drop uvp's lock, so it's
-		 * important to call it before unionfs_node_update() transfers
-		 * the unionfs vnode's lock from lvp to uvp; otherwise the
-		 * unionfs vnode itself would be transiently unlocked and
-		 * potentially doomed.
+		 * See the comment after unionfs_relookup() above for an
+		 * explanation of why we unlock udvp here only to relock
+		 * dvp on exit.
 		 */
-		unionfs_node_update(unp, uvp, td);
+		vput(udvp);
+		vn_finished_write(mp);
+		goto unionfs_mkshadowdir_relock;
 	}
+
+	/*
+	 * XXX The bug which cannot set uid/gid was corrected.
+	 * Ignore errors.
+	 */
+	va.va_type = VNON;
+	/*
+	 * VOP_SETATTR() may transiently drop uvp's lock, so it's
+	 * important to call it before unionfs_node_update() transfers
+	 * the unionfs vnode's lock from lvp to uvp; otherwise the
+	 * unionfs vnode itself would be transiently unlocked and
+	 * potentially doomed.
+	 */
+	VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
+
+	/*
+	 * uvp may become doomed during VOP_VPUT_PAIR() if the implementation
+	 * must temporarily drop uvp's lock.  However, since we hold a
+	 * reference to uvp from the VOP_MKDIR() call above, this would require
+	 * a forcible unmount of uvp's filesystem, which in turn can only
+	 * happen if our unionfs instance is first forcibly unmounted.  We'll
+	 * therefore catch this case in the NULL check of unp below.
+	 */
+	VOP_VPUT_PAIR(udvp, &uvp, false);
 	vn_finished_write(mp);
+	vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE);
+	unp = VTOUNIONFS(vp);
+	if (unp == NULL) {
+		vput(uvp);
+		error = ENOENT;
+	} else
+		unionfs_node_update(unp, uvp, td);
+	VOP_UNLOCK(vp);
+
+unionfs_mkshadowdir_relock:
+	vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	if (error == 0 && (VN_IS_DOOMED(dvp) || VN_IS_DOOMED(vp)))
+		error = ENOENT;
 
-unionfs_mkshadowdir_abort:
+unionfs_mkshadowdir_finish:
+	unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
 	cnp->cn_cred = credbk;
-	chgproccnt(cred->cr_ruidinfo, -1, 0);
 	crfree(cred);
 
 	return (error);
@@ -1116,23 +1200,31 @@ unionfs_forward_vop_finish_pair(
 /*
  * Create a new whiteout.
  * 
- * udvp and dvp should be locked on entry and will be locked on return.
+ * dvp and vp are unionfs vnodes representing a parent directory and
+ * child file, should be locked on entry, and will be locked on return.
  */
 int
-unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp,
+unionfs_mkwhiteout(struct vnode *dvp, struct vnode *vp,
     struct componentname *cnp, struct thread *td, char *path, int pathlen)
 {
+	struct vnode   *udvp;
 	struct vnode   *wvp;
 	struct nameidata nd;
 	struct mount   *mp;
 	int		error;
-	int		lkflags;
+	bool		dvp_locked;
+
+	ASSERT_VOP_ELOCKED(dvp, __func__);
+	ASSERT_VOP_ELOCKED(vp, __func__);
 
+	udvp = VTOUNIONFS(dvp)->un_uppervp;
 	wvp = NULLVP;
 	NDPREINIT(&nd);
+	vref(udvp);
+	VOP_UNLOCK(vp);
 	if ((error = unionfs_relookup(udvp, &wvp, cnp, &nd.ni_cnd, td, path,
 	    pathlen, CREATE))) {
-		return (error);
+		goto unionfs_mkwhiteout_cleanup;
 	}
 	if (wvp != NULLVP) {
 		if (udvp == wvp)
@@ -1140,18 +1232,27 @@ unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp,
 		else
 			vput(wvp);
 
-		return (EEXIST);
+		if (nd.ni_cnd.cn_flags & ISWHITEOUT)
+			error = 0;
+		else
+			error = EEXIST;
+		goto unionfs_mkwhiteout_cleanup;
 	}
 
 	if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)))
-		goto unionfs_mkwhiteout_free_out;
-	unionfs_forward_vop_start(udvp, &lkflags);
+		goto unionfs_mkwhiteout_cleanup;
 	error = VOP_WHITEOUT(udvp, &nd.ni_cnd, CREATE);
-	unionfs_forward_vop_finish(dvp, udvp, lkflags);
-
 	vn_finished_write(mp);
 
-unionfs_mkwhiteout_free_out:
+unionfs_mkwhiteout_cleanup:
+	if (VTOUNIONFS(dvp) == NULL) {
+		vput(udvp);
+		dvp_locked = false;
+	} else {
+		vrele(udvp);
+		dvp_locked = true;
+	}
+	vn_lock_pair(dvp, dvp_locked, LK_EXCLUSIVE, vp, false, LK_EXCLUSIVE);
 	return (error);
 }
 
@@ -1165,10 +1266,11 @@ unionfs_mkwhiteout_free_out:
  */
 static int
 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
-    struct unionfs_node *unp, struct vattr *uvap, struct thread *td)
+    struct vnode *vp, struct vattr *uvap, struct thread *td)
 {
 	struct unionfs_mount *ump;
-	struct vnode   *vp;
+	struct unionfs_node *unp;
+	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct ucred   *cred;
 	struct vattr	lva;
@@ -1176,8 +1278,10 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
 	int		fmode;
 	int		error;
 
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	unp = VTOUNIONFS(vp);
 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
-	vp = NULLVP;
+	uvp = NULLVP;
 	lvp = unp->un_lowervp;
 	cred = td->td_ucred;
 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
@@ -1200,42 +1304,39 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
 	NDPREINIT(&nd);
 
 	vref(udvp);
-	if ((error = vfs_relookup(udvp, &vp, &nd.ni_cnd, false)) != 0)
-		goto unionfs_vn_create_on_upper_free_out2;
-	vrele(udvp);
+	VOP_UNLOCK(vp);
+	if ((error = vfs_relookup(udvp, &uvp, &nd.ni_cnd, false)) != 0) {
+		vrele(udvp);
+		return (error);
+	}
 
-	if (vp != NULLVP) {
-		if (vp == udvp)
-			vrele(vp);
+	if (uvp != NULLVP) {
+		if (uvp == udvp)
+			vrele(uvp);
 		else
-			vput(vp);
+			vput(uvp);
 		error = EEXIST;
-		goto unionfs_vn_create_on_upper_free_out1;
+		goto unionfs_vn_create_on_upper_cleanup;
 	}
 
-	if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0)
-		goto unionfs_vn_create_on_upper_free_out1;
+	if ((error = VOP_CREATE(udvp, &uvp, &nd.ni_cnd, uvap)) != 0)
+		goto unionfs_vn_create_on_upper_cleanup;
 
-	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
-		vput(vp);
-		goto unionfs_vn_create_on_upper_free_out1;
+	if ((error = VOP_OPEN(uvp, fmode, cred, td, NULL)) != 0) {
+		vput(uvp);
+		goto unionfs_vn_create_on_upper_cleanup;
 	}
-	error = VOP_ADD_WRITECOUNT(vp, 1);
+	error = VOP_ADD_WRITECOUNT(uvp, 1);
 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
-	    __func__, vp, vp->v_writecount);
+	    __func__, uvp, uvp->v_writecount);
 	if (error == 0) {
-		*vpp = vp;
+		*vpp = uvp;
 	} else {
-		VOP_CLOSE(vp, fmode, cred, td);
+		VOP_CLOSE(uvp, fmode, cred, td);
 	}
 
-unionfs_vn_create_on_upper_free_out1:
-	VOP_UNLOCK(udvp);
-
-unionfs_vn_create_on_upper_free_out2:
-	KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path,
-	    ("%s: cn_pnbuf changed", __func__));
-
+unionfs_vn_create_on_upper_cleanup:
+	vput(udvp);
 	return (error);
 }
 
@@ -1310,13 +1411,18 @@ unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
  * 
  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
  * docopy.
+ *
+ * vp is a unionfs vnode that should be locked on entry and will be
+ * locked on return.
  * 
  * If no error returned, unp will be updated.
  */
 int
-unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
+unionfs_copyfile(struct vnode *vp, int docopy, struct ucred *cred,
     struct thread *td)
 {
+	struct unionfs_node *unp;
+	struct unionfs_node *dunp;
 	struct mount   *mp;
 	struct vnode   *udvp;
 	struct vnode   *lvp;
@@ -1324,6 +1430,8 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
 	struct vattr	uva;
 	int		error;
 
+	ASSERT_VOP_ELOCKED(vp, __func__);
+	unp = VTOUNIONFS(vp);
 	lvp = unp->un_lowervp;
 	uvp = NULLVP;
 
@@ -1333,22 +1441,51 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
 		return (EINVAL);
 	if (unp->un_uppervp != NULLVP)
 		return (EEXIST);
-	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
+
+	udvp = NULLVP;
+	VI_LOCK(unp->un_dvp);
+	dunp = VTOUNIONFS(unp->un_dvp);
+	if (dunp != NULL)
+		udvp = dunp->un_uppervp;
+	VI_UNLOCK(unp->un_dvp);
+
 	if (udvp == NULLVP)
 		return (EROFS);
 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
+	ASSERT_VOP_UNLOCKED(udvp, __func__);
+
+	error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
+	if (error == EJUSTRETURN)
+		return (0);
+	else if (error != 0)
+		return (error);
 
 	error = VOP_ACCESS(lvp, VREAD, cred, td);
 	if (error != 0)
-		return (error);
+		goto unionfs_copyfile_cleanup;
 
 	if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)) != 0)
-		return (error);
-	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
+		goto unionfs_copyfile_cleanup;
+	error = unionfs_vn_create_on_upper(&uvp, udvp, vp, &uva, td);
 	if (error != 0) {
 		vn_finished_write(mp);
-		return (error);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+		goto unionfs_copyfile_cleanup;
+	}
+
+	/*
+	 * Note that it's still possible for e.g. VOP_WRITE to relock
+	 * uvp below while holding vp[=lvp] locked.  Replacing
+	 * unionfs_copyfile_core with vn_generic_copy_file_range() will
+	 * allow us to avoid the problem by moving this vn_lock_pair()
+	 * call much later.
+	 */
+	vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE);
+	unp = VTOUNIONFS(vp);
+	if (unp == NULL) {
+		error = ENOENT;
+		goto unionfs_copyfile_cleanup;
 	}
 
 	if (docopy != 0) {
@@ -1369,18 +1506,30 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
 		/* Reset the attributes. Ignore errors. */
 		uva.va_type = VNON;
 		VOP_SETATTR(uvp, &uva, cred);
+		unionfs_node_update(unp, uvp, td);
 	}
 
-	unionfs_node_update(unp, uvp, td);
-
+unionfs_copyfile_cleanup:
+	unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
 	return (error);
 }
 
 /*
- * It checks whether vp can rmdir. (check empty)
+ * Determine if the unionfs view of a directory is empty such that
+ * an rmdir operation can be permitted.
+ *
+ * We assume the VOP_RMDIR() against the upper layer vnode will take
+ * care of this check for us where the upper FS is concerned, so here
+ * we concentrate on the lower FS.  We need to check for the presence
+ * of files other than "." and ".." in the lower FS directory and
+ * then cross-check any files we find against the upper FS to see if
+ * a whiteout is present (in which case we treat the lower file as
+ * non-present).
+ *
+ * The logic here is based heavily on vn_dir_check_empty().
  *
- * vp is unionfs vnode.
- * vp should be locked.
+ * vp should be a locked unionfs node, and vp's lowervp should also be
+ * locked.
  */
 int
 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
@@ -1388,115 +1537,127 @@ unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct vnode   *tvp;
+	char *dirbuf;
+	size_t dirbuflen, len;
+	off_t off;
 	struct dirent  *dp;
-	struct dirent  *edp;
 	struct componentname cn;
-	struct iovec	iov;
-	struct uio	uio;
 	struct vattr	va;
 	int		error;
 	int		eofflag;
-	int		lookuperr;
-
-	/*
-	 * The size of buf needs to be larger than DIRBLKSIZ.
-	 */
-	char		buf[256 * 6];
-
-	ASSERT_VOP_ELOCKED(vp, __func__);
 
 	eofflag = 0;
-	uvp = UNIONFSVPTOUPPERVP(vp);
 	lvp = UNIONFSVPTOLOWERVP(vp);
+	uvp = UNIONFSVPTOUPPERVP(vp);
+
+	/*
+	 * Note that the locking here still isn't ideal: We expect the caller
+	 * to hold both the upper and lower layer locks as well as the upper
+	 * parent directory lock, which it can do in a manner that avoids
+	 * deadlock.  However, if the cross-check logic below needs to call
+	 * VOP_LOOKUP(), that may relock the upper vnode and lock any found
+	 * child vnode in a way that doesn't protect against deadlock given
+	 * the other held locks.  Beyond that, the various other VOPs we issue
+	 * below, such as VOP_OPEN() and VOP_READDIR(), may also re-lock the
+	 * lower vnode.
+	 * We might instead just handoff between the upper vnode lock
+	 * (and its parent directory lock) and the lower vnode lock as needed,
+	 * so that the lower lock is never held at the same time as the upper
+	 * locks, but that opens up a wider window in which the upper
+	 * directory (and also the lower directory if it isn't truly
+	 * read-only) may change while the relevant lock is dropped.  But
+	 * since re-locking may happen here and open up such a window anyway,
+	 * perhaps that is a worthwile tradeoff?  Or perhaps we can ultimately
+	 * do sufficient tracking of empty state within the unionfs vnode
+	 * (in conjunction with upcalls from the lower FSes to notify us
+	 * of out-of-band state changes) that we can avoid these costly checks
+	 * altogether.
+	 */
+	ASSERT_VOP_LOCKED(lvp, __func__);
+	ASSERT_VOP_ELOCKED(uvp, __func__);
 
-	/* check opaque */
 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
 		return (error);
 	if (va.va_flags & OPAQUE)
 		return (0);
 
-	/* open vnode */
 #ifdef MAC
-	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
+	if ((error = mac_vnode_check_open(cred, lvp, VEXEC | VREAD)) != 0)
 		return (error);
 #endif
-	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
+	if ((error = VOP_ACCESS(lvp, VEXEC | VREAD, cred, td)) != 0)
+		return (error);
+	if ((error = VOP_OPEN(lvp, FREAD, cred, td, NULL)) != 0)
 		return (error);
-	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
+	if ((error = VOP_GETATTR(lvp, &va, cred)) != 0)
 		return (error);
 
-	uio.uio_rw = UIO_READ;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_td = td;
-	uio.uio_offset = 0;
+	dirbuflen = max(DEV_BSIZE, GENERIC_MAXDIRSIZ);
+	if (dirbuflen < va.va_blocksize)
+		dirbuflen = va.va_blocksize;
+	dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
 
-#ifdef MAC
-	error = mac_vnode_check_readdir(td->td_ucred, lvp);
-#endif
-	while (!error && !eofflag) {
-		iov.iov_base = buf;
-		iov.iov_len = sizeof(buf);
-		uio.uio_iov = &iov;
-		uio.uio_iovcnt = 1;
-		uio.uio_resid = iov.iov_len;
+	len = 0;
+	off = 0;
+	eofflag = 0;
 
-		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
+	for (;;) {
+		error = vn_dir_next_dirent(lvp, td, dirbuf, dirbuflen,
+		    &dp, &len, &off, &eofflag);
 		if (error != 0)
 			break;
-		KASSERT(eofflag != 0 || uio.uio_resid < sizeof(buf),
-		    ("%s: empty read from lower FS", __func__));
-
-		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
-		for (dp = (struct dirent*)buf; !error && dp < edp;
-		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
-			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
-			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
-			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
-				continue;
-
-			cn.cn_namelen = dp->d_namlen;
-			cn.cn_pnbuf = NULL;
-			cn.cn_nameptr = dp->d_name;
-			cn.cn_nameiop = LOOKUP;
-			cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
-			cn.cn_lkflags = LK_EXCLUSIVE;
-			cn.cn_cred = cred;
-
-			/*
-			 * check entry in lower.
-			 * Sometimes, readdir function returns
-			 * wrong entry.
-			 */
-			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
 
-			if (!lookuperr)
-				vput(tvp);
-			else
-				continue; /* skip entry */
-
-			/*
-			 * check entry
-			 * If it has no exist/whiteout entry in upper,
-			 * directory is not empty.
-			 */
-			cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
-			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
+		if (len == 0) {
+			/* EOF */
+			error = 0;
+			break;
+		}
 
-			if (!lookuperr)
-				vput(tvp);
+		if (dp->d_type == DT_WHT)
+			continue;
 
-			/* ignore exist or whiteout entry */
-			if (!lookuperr ||
-			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
-				continue;
+		/*
+		 * Any file in the directory which is not '.' or '..' indicates
+		 * the directory is not empty.
+		 */
+		switch (dp->d_namlen) {
+		case 2:
+			if (dp->d_name[1] != '.') {
+				/* Can't be '..' (nor '.') */
+				break;
+			}
+			/* FALLTHROUGH */
+		case 1:
+			if (dp->d_name[0] != '.') {
+				/* Can't be '..' nor '.' */
+				break;
+			}
+			continue;
+		default:
+			break;
+		}
 
+		cn.cn_namelen = dp->d_namlen;
+		cn.cn_pnbuf = NULL;
+		cn.cn_nameptr = dp->d_name;
+		cn.cn_nameiop = LOOKUP;
+		cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
+		cn.cn_lkflags = LK_EXCLUSIVE;
+		cn.cn_cred = cred;
+
+		error = VOP_LOOKUP(uvp, &tvp, &cn);
+		if (tvp != NULLVP)
+			vput(tvp);
+		if (error != 0 && error != ENOENT && error != EJUSTRETURN)
+			break;
+		else if ((cn.cn_flags & ISWHITEOUT) == 0) {
 			error = ENOTEMPTY;
-		}
+			break;
+		} else
+			error = 0;
 	}
 
-	/* close vnode */
-	VOP_CLOSE(vp, FREAD, cred, td);
-
+	VOP_CLOSE(lvp, FREAD, cred, td);
+	free(dirbuf, M_TEMP);
 	return (error);
 }
-
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
index cb55c2dd6474..9342317ad08e 100644
--- a/sys/fs/unionfs/union_vfsops.c
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -327,18 +327,15 @@ unionfs_domount(struct mount *mp)
 	 * unionfs_lock()) and the mountpoint's busy count.  Without this,
 	 * unmount will lock the covered vnode lock (directly through the
 	 * covered vnode) and wait for the busy count to drain, while a
-	 * concurrent lookup will increment the busy count and then lock
+	 * concurrent lookup will increment the busy count and then may lock
 	 * the covered vnode lock (indirectly through unionfs_lock()).
 	 *
-	 * Note that we can't yet use this facility for the 'below' case
-	 * in which the upper vnode is the covered vnode, because that would
-	 * introduce a different LOR in which the cross-mount lookup would
-	 * effectively hold the upper vnode lock before acquiring the lower
-	 * vnode lock, while an unrelated lock operation would still acquire
-	 * the lower vnode lock before the upper vnode lock, which is the
-	 * order unionfs currently requires.
+	 * Note that this is only needed for the 'below' case in which the
+	 * upper vnode is also the covered vnode, because unionfs_lock()
+	 * only locks the upper vnode as long as both lower and upper vnodes
+	 * are present (which they will always be for the unionfs mount root).
 	 */
-	if (!below) {
+	if (below) {
 		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
 		mp->mnt_vnodecovered->v_vflag |= VV_CROSSLOCK;
 		VOP_UNLOCK(mp->mnt_vnodecovered);
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index aa2a7273825a..03130f0ca949 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -76,6 +76,21 @@
 	VNASSERT(((vp)->v_op == &unionfs_vnodeops), vp, \
 	    ("%s: non-unionfs vnode", __func__))
 
+static bool
+unionfs_lookup_isroot(struct componentname *cnp, struct vnode *dvp)
+{
+	struct nameidata *ndp;
+
+	if (dvp == NULL)
+		return (false);
+	if ((dvp->v_vflag & VV_ROOT) != 0)
+		return (true);
+	ndp = vfs_lookup_nameidata(cnp);
+	if (ndp == NULL)
+		return (false);
+	return (vfs_lookup_isroot(ndp, dvp));
+}
+
 static int
 unionfs_lookup(struct vop_cachedlookup_args *ap)
 {
@@ -84,13 +99,12 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
 	struct vattr	va;
 	struct componentname *cnp;
 	struct thread  *td;
+	uint64_t	cnflags;
 	u_long		nameiop;
-	u_long		cnflags, cnflagsbk;
-	int		iswhiteout;
 	int		lockflag;
-	int		error , uerror, lerror;
+	int		lkflags;
+	int		error, uerror, lerror;
 
-	iswhiteout = 0;
 	lockflag = 0;
 	error = uerror = lerror = ENOENT;
 	cnp = ap->a_cnp;
@@ -120,87 +134,185 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
 		return (EROFS);
 
 	/*
+	 * Note that a lookup is in-flight, and block if another lookup
+	 * is already in-flight against dvp.  This is done because we may
+	 * end up dropping dvp's lock to look up a lower vnode or to create
+	 * a shadow directory, opening up the possibility of parallel lookups
+	 * against the same directory creating duplicate unionfs vnodes for
+	 * the same file(s).  Note that if this function encounters an
+	 * in-progress lookup for the directory, it will block until the
+	 * lookup is complete and then return ERELOOKUP to allow any
+	 * existing unionfs vnode to be loaded from the VFS cache.
+	 * This is really a hack; filesystems that support MNTK_LOOKUP_SHARED
+	 * (which unionfs currently doesn't) seem to deal with this by using
+	 * the vfs_hash_* functions to manage a per-mount vnode cache keyed
+	 * by the inode number (or some roughly equivalent unique ID
+	 * usually assocated with the storage medium).  It may make sense
+	 * for unionfs to adopt something similar as a replacement for its
+	 * current half-baked directory-only cache implementation, particularly
+	 * if we want to support MNTK_LOOKUP_SHARED here.
+	 */
+	error = unionfs_set_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS);
+	if (error != 0)
+		return (error);
+	/*
 	 * lookup dotdot
 	 */
 	if (cnflags & ISDOTDOT) {
-		if (LOOKUP != nameiop && udvp == NULLVP)
-			return (EROFS);
+		if (LOOKUP != nameiop && udvp == NULLVP) {
+			error = EROFS;
+			goto unionfs_lookup_return;
+		}
 
-		if (udvp != NULLVP) {
-			dtmpvp = udvp;
-			if (ldvp != NULLVP)
-				VOP_UNLOCK(ldvp);
+		if (unionfs_lookup_isroot(cnp, udvp) ||
+		    unionfs_lookup_isroot(cnp, ldvp)) {
+			error = ENOENT;
+			goto unionfs_lookup_return;
 		}
+
+		if (udvp != NULLVP)
+			dtmpvp = udvp;
 		else
 			dtmpvp = ldvp;
 
+		unionfs_forward_vop_start(dtmpvp, &lkflags);
 		error = VOP_LOOKUP(dtmpvp, &vp, cnp);
+		unionfs_forward_vop_finish(dvp, dtmpvp, lkflags);
 
-		if (dtmpvp == udvp && ldvp != NULLVP) {
-			VOP_UNLOCK(udvp);
-			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-			dunp = VTOUNIONFS(dvp);
-			if (error == 0 && dunp == NULL)
-				error = ENOENT;
-		}
+		/*
+		 * Drop the lock and reference on vp.  If the lookup was
+		 * successful, we'll either need to exchange vp's lock and
+		 * reference for the unionfs parent vnode's lock and
+		 * reference, or (if dvp was reclaimed) we'll need to drop
+		 * vp's lock and reference to return early.
+		 */
+		if (vp != NULLVP)
+			vput(vp);
+		dunp = VTOUNIONFS(dvp);
+		if (error == 0 && dunp == NULL)
+			error = ENOENT;
 
 		if (error == 0) {
-			/*
-			 * Exchange lock and reference from vp to
-			 * dunp->un_dvp. vp is upper/lower vnode, but it
-			 * will need to return the unionfs vnode.
-			 */
-			if (nameiop == DELETE  || nameiop == RENAME ||
-			    (cnp->cn_lkflags & LK_TYPE_MASK))
-				VOP_UNLOCK(vp);
-			vrele(vp);
-
 			dtmpvp = dunp->un_dvp;
 			vref(dtmpvp);
 			VOP_UNLOCK(dvp);
 			*(ap->a_vpp) = dtmpvp;
 
-			if (nameiop == DELETE || nameiop == RENAME)
-				vn_lock(dtmpvp, LK_EXCLUSIVE | LK_RETRY);
-			else if (cnp->cn_lkflags & LK_TYPE_MASK)
-				vn_lock(dtmpvp, cnp->cn_lkflags |
-				    LK_RETRY);
+			vn_lock(dtmpvp, cnp->cn_lkflags | LK_RETRY);
 
+			if (VN_IS_DOOMED(dtmpvp)) {
+				vput(dtmpvp);
+				*(ap->a_vpp) = NULLVP;
+				error = ENOENT;
+			}
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-		} else if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
-			cache_enter(dvp, NULLVP, cnp);
+		}
 
-		goto unionfs_lookup_return;
+		goto unionfs_lookup_cleanup;
 	}
 
 	/*
+	 * Lookup lower layer.  We do this before looking up the the upper
+	 * layer, as we may drop the upper parent directory's lock, and we
+	 * want to ensure the upper parent remains locked from the point of
+	 * lookup through any ensuing VOP that may require it to be locked.
+	 * The cost of this is that we may end up performing an unnecessary
+	 * lower layer lookup if a whiteout is present in the upper layer.
+	 */
+	if (ldvp != NULLVP && !(cnflags & DOWHITEOUT)) {
+		struct componentname lcn;
+		bool is_dot;
+
+		if (udvp != NULLVP) {
+			vref(ldvp);
+			VOP_UNLOCK(dvp);
+			vn_lock(ldvp, LK_EXCLUSIVE | LK_RETRY);
+		}
+
+		lcn = *cnp;
+		/* always op is LOOKUP */
+		lcn.cn_nameiop = LOOKUP;
+		lcn.cn_flags = cnflags;
+		is_dot = false;
+
+		if (udvp == NULLVP)
+			unionfs_forward_vop_start(ldvp, &lkflags);
+		lerror = VOP_LOOKUP(ldvp, &lvp, &lcn);
+		if (udvp == NULLVP &&
+		    unionfs_forward_vop_finish(dvp, ldvp, lkflags)) {
+			if (lvp != NULLVP)
+				VOP_UNLOCK(lvp);
+			error =  ENOENT;
+			goto unionfs_lookup_cleanup;
+		}
+
+		if (udvp == NULLVP)
+			cnp->cn_flags = lcn.cn_flags;
+
+		if (lerror == 0) {
+			if (ldvp == lvp) {	/* is dot */
+				vrele(lvp);
+				*(ap->a_vpp) = dvp;
+				vref(dvp);
+				is_dot = true;
+				error = lerror;
+			} else if (lvp != NULLVP)
+				VOP_UNLOCK(lvp);
+		}
+
+		if (udvp != NULLVP) {
+			vput(ldvp);
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+			if (VN_IS_DOOMED(dvp))
+				error = ENOENT;
+		}
+		if (is_dot)
+			goto unionfs_lookup_return;
+		else if (error != 0)
+			goto unionfs_lookup_cleanup;
+	}
+	/*
 	 * lookup upper layer
 	 */
 	if (udvp != NULLVP) {
+		bool iswhiteout = false;
+
+		unionfs_forward_vop_start(udvp, &lkflags);
 		uerror = VOP_LOOKUP(udvp, &uvp, cnp);
+		if (unionfs_forward_vop_finish(dvp, udvp, lkflags)) {
+			if (uvp != NULLVP)
+				VOP_UNLOCK(uvp);
+			error = ENOENT;
+			goto unionfs_lookup_cleanup;
+		}
 
 		if (uerror == 0) {
 			if (udvp == uvp) {	/* is dot */
+				if (lvp != NULLVP)
+					vrele(lvp);
 				vrele(uvp);
 				*(ap->a_vpp) = dvp;
 				vref(dvp);
 
 				error = uerror;
 				goto unionfs_lookup_return;
-			}
-			if (nameiop == DELETE || nameiop == RENAME ||
-			    (cnp->cn_lkflags & LK_TYPE_MASK))
+			} else if (uvp != NULLVP)
 				VOP_UNLOCK(uvp);
 		}
 
 		/* check whiteout */
-		if (uerror == ENOENT || uerror == EJUSTRETURN)
-			if (cnp->cn_flags & ISWHITEOUT)
-				iswhiteout = 1;	/* don't lookup lower */
-		if (iswhiteout == 0 && ldvp != NULLVP)
-			if (!VOP_GETATTR(udvp, &va, cnp->cn_cred) &&
-			    (va.va_flags & OPAQUE))
-				iswhiteout = 1;	/* don't lookup lower */
+		if ((uerror == ENOENT || uerror == EJUSTRETURN) &&
+		    (cnp->cn_flags & ISWHITEOUT))
+			iswhiteout = true;
+		else if (VOP_GETATTR(udvp, &va, cnp->cn_cred) == 0 &&
+		    (va.va_flags & OPAQUE))
+			iswhiteout = true;
+
+		if (iswhiteout && lvp != NULLVP) {
+			vrele(lvp);
+			lvp = NULLVP;
+		}
+
 #if 0
 		UNIONFS_INTERNAL_DEBUG(
 		    "unionfs_lookup: debug: whiteout=%d, path=%s\n",
@@ -209,39 +321,6 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
 	}
 
 	/*
-	 * lookup lower layer
-	 */
-	if (ldvp != NULLVP && !(cnflags & DOWHITEOUT) && iswhiteout == 0) {
-		/* always op is LOOKUP */
-		cnp->cn_nameiop = LOOKUP;
-		cnflagsbk = cnp->cn_flags;
-		cnp->cn_flags = cnflags;
-
-		lerror = VOP_LOOKUP(ldvp, &lvp, cnp);
-
-		cnp->cn_nameiop = nameiop;
-		if (udvp != NULLVP && (uerror == 0 || uerror == EJUSTRETURN))
-			cnp->cn_flags = cnflagsbk;
-
-		if (lerror == 0) {
-			if (ldvp == lvp) {	/* is dot */
-				if (uvp != NULLVP)
-					vrele(uvp);	/* no need? */
-				vrele(lvp);
-				*(ap->a_vpp) = dvp;
-				vref(dvp);
-
-				UNIONFS_INTERNAL_DEBUG(
-				    "unionfs_lookup: leave (%d)\n", lerror);
-
-				return (lerror);
-			}
-			if (cnp->cn_lkflags & LK_TYPE_MASK)
-				VOP_UNLOCK(lvp);
-		}
-	}
-
-	/*
 	 * check lookup result
 	 */
 	if (uvp == NULLVP && lvp == NULLVP) {
@@ -280,8 +359,7 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
 		if (unp == NULL)
 			error = ENOENT;
 		else
-			error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount),
-			    udvp, unp, cnp, td);
+			error = unionfs_mkshadowdir(dvp, vp, cnp, td);
 		if (lockflag != 0)
 			VOP_UNLOCK(vp);
 		if (error != 0) {
@@ -293,6 +371,10 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
 				vrele(vp);
 			goto unionfs_lookup_cleanup;
 		}
+		/*
+		 * TODO: Since unionfs_mkshadowdir() relocks udvp after
+		 * creating the new directory, return ERELOOKUP here?
+		 */
 		if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_SHARED)
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 	}
@@ -313,9 +395,12 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
 			    "unionfs_lookup: Unable to create unionfs vnode.");
 			goto unionfs_lookup_cleanup;
 		}
-		if ((nameiop == DELETE || nameiop == RENAME) &&
-		    (cnp->cn_lkflags & LK_TYPE_MASK) == 0)
-			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	}
+
+	if (VN_IS_DOOMED(dvp) || VN_IS_DOOMED(vp)) {
+		error = ENOENT;
+		vput(vp);
+		goto unionfs_lookup_cleanup;
 	}
 
 	*(ap->a_vpp) = vp;
@@ -329,10 +414,12 @@ unionfs_lookup_cleanup:
 	if (lvp != NULLVP)
 		vrele(lvp);
 
-	if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
+	if (error == ENOENT && (cnflags & MAKEENTRY) != 0 &&
+	    !VN_IS_DOOMED(dvp))
 		cache_enter(dvp, NULLVP, cnp);
 
 unionfs_lookup_return:
+	unionfs_clear_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error);
 
@@ -492,6 +579,61 @@ unionfs_downgrade_lock(struct vnode *vp, enum unionfs_lkupgrade status)
 		vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
 }
 
+/*
+ * Exchange the default (upper vnode) lock on a unionfs vnode for the lower
+ * vnode lock, in support of operations that require access to the lower vnode
+ * even when an upper vnode is present.  We don't use vn_lock_pair() to hold
+ * both vnodes at the same time, primarily because the caller may proceed
+ * to issue VOPs to the lower layer which re-lock or perform other operations
+ * which may not be safe in the presence of a locked vnode from another FS.
+ * Moreover, vn_lock_pair()'s deadlock resolution approach can introduce
+ * additional overhead that isn't necessary on these paths.
+ *
+ * vp must be a locked unionfs vnode; the lock state of this vnode is
+ * returned through *lkflags for later use in unionfs_unlock_lvp().
+ *
+ * Returns the locked lower vnode, or NULL if the lower vnode (and therefore
+ * also the unionfs vnode above it) has been doomed.
+ */
+static struct vnode *
+unionfs_lock_lvp(struct vnode *vp, int *lkflags)
+{
+	struct unionfs_node *unp;
+	struct vnode *lvp;
+
+	unp = VTOUNIONFS(vp);
+	lvp = unp->un_lowervp;
+	ASSERT_VOP_LOCKED(vp, __func__);
+	ASSERT_VOP_UNLOCKED(lvp, __func__);
+	*lkflags = VOP_ISLOCKED(vp);
+	vref(lvp);
+	VOP_UNLOCK(vp);
+	vn_lock(lvp, *lkflags | LK_RETRY);
+	if (VN_IS_DOOMED(lvp)) {
+		vput(lvp);
+		lvp = NULLVP;
+		vn_lock(vp, *lkflags | LK_RETRY);
+	}
+	return (lvp);
+}
+
+/*
+ * Undo a previous call to unionfs_lock_lvp(), restoring the default lock
+ * on the unionfs vnode.  This function reloads and returns the vnode
+ * private data for the unionfs vnode, which will be NULL if the unionfs
+ * vnode became doomed while its lock was dropped.  The caller must check
+ * for this case.
+ */
+static struct unionfs_node *
+unionfs_unlock_lvp(struct vnode *vp, struct vnode *lvp, int lkflags)
+{
+	ASSERT_VOP_LOCKED(lvp, __func__);
+	ASSERT_VOP_UNLOCKED(vp, __func__);
+	vput(lvp);
+	vn_lock(vp, lkflags | LK_RETRY);
+	return (VTOUNIONFS(vp));
+}
+
 static int
 unionfs_open(struct vop_open_args *ap)
 {
@@ -504,7 +646,9 @@ unionfs_open(struct vop_open_args *ap)
 	struct ucred   *cred;
 	struct thread  *td;
 	int		error;
+	int		lkflags;
 	enum unionfs_lkupgrade lkstatus;
+	bool		lock_lvp, open_lvp;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n");
 
@@ -515,6 +659,7 @@ unionfs_open(struct vop_open_args *ap)
 	targetvp = NULLVP;
 	cred = ap->a_cred;
 	td = ap->a_td;
+	open_lvp = lock_lvp = false;
 
 	/*
 	 * The executable loader path may call this function with vp locked
@@ -546,10 +691,12 @@ unionfs_open(struct vop_open_args *ap)
 	if (targetvp == NULLVP) {
 		if (uvp == NULLVP) {
 			if ((ap->a_mode & FWRITE) && lvp->v_type == VREG) {
-				error = unionfs_copyfile(unp,
+				error = unionfs_copyfile(vp,
 				    !(ap->a_mode & O_TRUNC), cred, td);
-				if (error != 0)
+				if (error != 0) {
+					unp = VTOUNIONFS(vp);
 					goto unionfs_open_abort;
+				}
 				targetvp = uvp = unp->un_uppervp;
 			} else
 				targetvp = lvp;
@@ -557,30 +704,69 @@ unionfs_open(struct vop_open_args *ap)
 			targetvp = uvp;
 	}
 
+	if (targetvp == uvp && uvp->v_type == VDIR && lvp != NULLVP &&
+	    unsp->uns_lower_opencnt <= 0)
+		open_lvp = true;
+	else if (targetvp == lvp && uvp != NULLVP)
+		lock_lvp = true;
+
+	if (lock_lvp) {
+		unp = NULL;
+		lvp = unionfs_lock_lvp(vp, &lkflags);
+		if (lvp == NULLVP) {
+			error = ENOENT;
+			goto unionfs_open_abort;
+		}
+	} else
+		unionfs_forward_vop_start(targetvp, &lkflags);
+
 	error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fp);
-	if (error == 0) {
-		if (targetvp == uvp) {
-			if (uvp->v_type == VDIR && lvp != NULLVP &&
-			    unsp->uns_lower_opencnt <= 0) {
-				/* open lower for readdir */
-				error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
-				if (error != 0) {
-					VOP_CLOSE(uvp, ap->a_mode, cred, td);
-					goto unionfs_open_abort;
-				}
-				unsp->uns_node_flag |= UNS_OPENL_4_READDIR;
-				unsp->uns_lower_opencnt++;
+
+	if (lock_lvp) {
+		unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+		if (unp == NULL && error == 0)
+			error = ENOENT;
+	} else if (unionfs_forward_vop_finish(vp, targetvp, lkflags))
+		error = error ? error : ENOENT;
+
+	if (error != 0)
+		goto unionfs_open_abort;
+
+	if (targetvp == uvp) {
+		if (open_lvp) {
+			unp = NULL;
+			lvp = unionfs_lock_lvp(vp, &lkflags);
+			if (lvp == NULLVP) {
+				error = ENOENT;
+				goto unionfs_open_abort;
 			}
-			unsp->uns_upper_opencnt++;
-		} else {
+			/* open lower for readdir */
+			error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
+			unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+			if (unp == NULL) {
+				error = error ? error : ENOENT;
+				goto unionfs_open_abort;
+			}
+			if (error != 0) {
+				unionfs_forward_vop_start(uvp, &lkflags);
+				VOP_CLOSE(uvp, ap->a_mode, cred, td);
+				if (unionfs_forward_vop_finish(vp, uvp, lkflags))
+					unp = NULL;
+				goto unionfs_open_abort;
+			}
+			unsp->uns_node_flag |= UNS_OPENL_4_READDIR;
 			unsp->uns_lower_opencnt++;
-			unsp->uns_lower_openmode = ap->a_mode;
 		}
-		vp->v_object = targetvp->v_object;
+		unsp->uns_upper_opencnt++;
+	} else {
+		unsp->uns_lower_opencnt++;
+		unsp->uns_lower_openmode = ap->a_mode;
 	}
+	vp->v_object = targetvp->v_object;
 
 unionfs_open_abort:
-	if (error != 0)
+
+	if (error != 0 && unp != NULL)
 		unionfs_tryrem_node_status(unp, unsp);
 
 unionfs_open_cleanup:
@@ -599,9 +785,13 @@ unionfs_close(struct vop_close_args *ap)
 	struct ucred   *cred;
 	struct thread  *td;
 	struct vnode   *vp;
+	struct vnode   *uvp;
+	struct vnode   *lvp;
 	struct vnode   *ovp;
 	int		error;
+	int		lkflags;
 	enum unionfs_lkupgrade lkstatus;
+	bool		lock_lvp;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n");
 
@@ -611,6 +801,7 @@ unionfs_close(struct vop_close_args *ap)
 	cred = ap->a_cred;
 	td = ap->a_td;
 	error = 0;
+	lock_lvp = false;
 
 	/*
 	 * If the vnode is reclaimed while upgrading, we can't safely use unp
@@ -621,44 +812,77 @@ unionfs_close(struct vop_close_args *ap)
 		goto unionfs_close_cleanup;
 
 	unp = VTOUNIONFS(vp);
-	unionfs_get_node_status(unp, td, &unsp);
+	lvp = unp->un_lowervp;
+	uvp = unp->un_uppervp;
+	unsp = unionfs_find_node_status(unp, td);
 
-	if (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0) {
+	if (unsp == NULL ||
+	    (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0)) {
 #ifdef DIAGNOSTIC
-		printf("unionfs_close: warning: open count is 0\n");
+		if (unsp != NULL)
+			printf("unionfs_close: warning: open count is 0\n");
 #endif
-		if (unp->un_uppervp != NULLVP)
-			ovp = unp->un_uppervp;
+		if (uvp != NULLVP)
+			ovp = uvp;
 		else
-			ovp = unp->un_lowervp;
+			ovp = lvp;
 	} else if (unsp->uns_upper_opencnt > 0)
-		ovp = unp->un_uppervp;
+		ovp = uvp;
 	else
-		ovp = unp->un_lowervp;
+		ovp = lvp;
+
+	if (ovp == lvp && uvp != NULLVP) {
+		lock_lvp = true;
+		unp = NULL;
+		lvp = unionfs_lock_lvp(vp, &lkflags);
+		if (lvp == NULLVP) {
+			error = ENOENT;
+			goto unionfs_close_abort;
+		}
+	} else
+		unionfs_forward_vop_start(ovp, &lkflags);
 
 	error = VOP_CLOSE(ovp, ap->a_fflag, cred, td);
 
+	if (lock_lvp) {
+		unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+		if (unp == NULL && error == 0)
+			error = ENOENT;
+	} else if (unionfs_forward_vop_finish(vp, ovp, lkflags))
+		error = error ? error : ENOENT;
+
 	if (error != 0)
 		goto unionfs_close_abort;
 
 	vp->v_object = ovp->v_object;
 
-	if (ovp == unp->un_uppervp) {
-		unsp->uns_upper_opencnt--;
-		if (unsp->uns_upper_opencnt == 0) {
+	if (ovp == uvp) {
+		if (unsp != NULL && ((--unsp->uns_upper_opencnt) == 0)) {
 			if (unsp->uns_node_flag & UNS_OPENL_4_READDIR) {
-				VOP_CLOSE(unp->un_lowervp, FREAD, cred, td);
+				unp = NULL;
+				lvp = unionfs_lock_lvp(vp, &lkflags);
+				if (lvp == NULLVP) {
+					error = ENOENT;
+					goto unionfs_close_abort;
+				}
+				VOP_CLOSE(lvp, FREAD, cred, td);
+				unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+				if (unp == NULL) {
+					error = ENOENT;
+					goto unionfs_close_abort;
+				}
 				unsp->uns_node_flag &= ~UNS_OPENL_4_READDIR;
 				unsp->uns_lower_opencnt--;
 			}
 			if (unsp->uns_lower_opencnt > 0)
-				vp->v_object = unp->un_lowervp->v_object;
+				vp->v_object = lvp->v_object;
 		}
-	} else
+	} else if (unsp != NULL)
 		unsp->uns_lower_opencnt--;
 
 unionfs_close_abort:
-	unionfs_tryrem_node_status(unp, unsp);
+	if (unp != NULL && unsp != NULL)
+		unionfs_tryrem_node_status(unp, unsp);
 
 unionfs_close_cleanup:
 	unionfs_downgrade_lock(vp, lkstatus);
@@ -883,7 +1107,7 @@ unionfs_setattr(struct vop_setattr_args *ap)
 		return (EROFS);
 
 	if (uvp == NULLVP && lvp->v_type == VREG) {
-		error = unionfs_copyfile(unp, (vap->va_size != 0),
+		error = unionfs_copyfile(ap->a_vp, (vap->va_size != 0),
 		    ap->a_cred, td);
 		if (error != 0)
 			return (error);
@@ -1078,8 +1302,10 @@ unionfs_remove(struct vop_remove_args *ap)
 		error = VOP_REMOVE(udvp, uvp, cnp);
 		unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
 		    ap->a_vp, uvp, uvp_lkflags);
-	} else if (lvp != NULLVP)
-		error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, path, pathlen);
+	} else if (lvp != NULLVP) {
+		error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td,
+		    path, pathlen);
+	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error);
 
@@ -1096,7 +1322,6 @@ unionfs_link(struct vop_link_args *ap)
 	struct componentname *cnp;
 	struct thread  *td;
 	int		error;
-	int		needrelookup;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n");
 
@@ -1104,7 +1329,6 @@ unionfs_link(struct vop_link_args *ap)
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = 0;
-	needrelookup = 0;
 	dunp = VTOUNIONFS(ap->a_tdvp);
 	unp = NULL;
 	udvp = dunp->un_uppervp;
@@ -1121,16 +1345,15 @@ unionfs_link(struct vop_link_args *ap)
 		if (ap->a_vp->v_type != VREG)
 			return (EOPNOTSUPP);
 
-		error = unionfs_copyfile(unp, 1, cnp->cn_cred, td);
-		if (error != 0)
-			return (error);
-		needrelookup = 1;
+		VOP_UNLOCK(ap->a_tdvp);
+		error = unionfs_copyfile(ap->a_vp, 1, cnp->cn_cred, td);
+		vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY);
+		if (error == 0)
+			error = ERELOOKUP;
+		return (error);
 	}
 	uvp = unp->un_uppervp;
 
-	if (needrelookup != 0)
-		error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td);
-
 	if (error == 0) {
 		int udvp_lkflags, uvp_lkflags;
 		unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
@@ -1154,8 +1377,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	struct vnode   *tdvp;
 	struct vnode   *tvp;
 	struct componentname *tcnp;
-	struct vnode   *ltdvp;
-	struct vnode   *ltvp;
 	struct thread  *td;
 
 	/* rename target vnodes */
@@ -1164,7 +1385,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	struct vnode   *rtdvp;
 	struct vnode   *rtvp;
 
-	struct unionfs_mount *ump;
 	struct unionfs_node *unp;
 	int		error;
 
@@ -1177,8 +1397,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	tdvp = ap->a_tdvp;
 	tvp = ap->a_tvp;
 	tcnp = ap->a_tcnp;
-	ltdvp = NULLVP;
-	ltvp = NULLVP;
 	td = curthread;
 	rfdvp = fdvp;
 	rfvp = fvp;
@@ -1238,7 +1456,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n",
 	    fvp, unp->un_uppervp, unp->un_lowervp);
 #endif
-	ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount);
 	/*
 	 * If we only have a lower vnode, copy the source file to the upper
 	 * FS so that the rename operation can be issued against the upper FS.
@@ -1282,10 +1499,10 @@ unionfs_rename(struct vop_rename_args *ap)
 		else if (unp->un_uppervp == NULLVP) {
 			switch (fvp->v_type) {
 			case VREG:
-				error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
+				error = unionfs_copyfile(fvp, 1, fcnp->cn_cred, td);
 				break;
 			case VDIR:
-				error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
+				error = unionfs_mkshadowdir(fdvp, fvp, fcnp, td);
 				break;
 			default:
 				error = ENODEV;
@@ -1327,7 +1544,6 @@ unionfs_rename(struct vop_rename_args *ap)
 		goto unionfs_rename_abort;
 	}
 	rtdvp = unp->un_uppervp;
-	ltdvp = unp->un_lowervp;
 	vref(rtdvp);
 
 	if (tvp != NULLVP) {
@@ -1348,7 +1564,6 @@ unionfs_rename(struct vop_rename_args *ap)
 				goto unionfs_rename_abort;
 			}
 			rtvp = unp->un_uppervp;
-			ltvp = unp->un_lowervp;
 			vref(rtvp);
 		}
 	}
@@ -1365,12 +1580,8 @@ unionfs_rename(struct vop_rename_args *ap)
 			cache_purge(fdvp);
 	}
 
-	if (ltdvp != NULLVP)
-		VOP_UNLOCK(ltdvp);
 	if (tdvp != rtdvp)
 		vrele(tdvp);
-	if (ltvp != NULLVP)
-		VOP_UNLOCK(ltvp);
 	if (tvp != rtvp && tvp != NULLVP) {
 		if (rtvp == NULLVP)
 			vput(tvp);
@@ -1504,43 +1715,55 @@ unionfs_rmdir(struct vop_rmdir_args *ap)
 
 	if (uvp != NULLVP) {
 		if (lvp != NULLVP) {
+			/*
+			 * We need to keep dvp and vp's upper vnodes locked
+			 * going into the VOP_RMDIR() call, but the empty
+			 * directory check also requires the lower vnode lock.
+			 * For this third, cross-filesystem lock we use a
+			 * similar approach taken by various FS' VOP_RENAME
+			 * implementations (which require 2-4 vnode locks).
+			 * First we attempt a NOWAIT acquisition, then if
+			 * that fails we drops the other two vnode locks,
+			 * acquire lvp's lock in the normal fashion to reduce
+			 * the likelihood of spinning on it in the future,
+			 * then drop, reacquire the other locks, and return
+			 * ERELOOKUP to re-drive the lookup in case the dvp->
+			 * vp relationship has changed.
+			 */
+			if (vn_lock(lvp, LK_SHARED | LK_NOWAIT) != 0) {
+				VOP_UNLOCK(ap->a_vp);
+				VOP_UNLOCK(ap->a_dvp);
+				vn_lock(lvp, LK_SHARED | LK_RETRY);
+				VOP_UNLOCK(lvp);
+				vn_lock(ap->a_dvp, LK_EXCLUSIVE | LK_RETRY);
+				vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
+				return (ERELOOKUP);
+			}
 			error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td);
+			/*
+			 * It's possible for a direct operation on the lower FS
+			 * to make the lower directory non-empty after we drop
+			 * the lock, but it's also possible for the upper-layer
+			 * VOP_RMDIR to relock udvp/uvp which would lead to
+			 * LOR if we kept lvp locked across that call.
+			 */
+			VOP_UNLOCK(lvp);
 			if (error != 0)
 				return (error);
 		}
 		ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
 		if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP)
-			cnp->cn_flags |= DOWHITEOUT;
-		/*
-		 * The relookup path will need to relock the parent dvp and
-		 * possibly the vp as well.  Locking is expected to be done
-		 * in parent->child order; drop the lock on vp to avoid LOR
-		 * and potential recursion on vp's lock.
-		 * vp is expected to remain referenced during VOP_RMDIR(),
-		 * so vref/vrele should not be necessary here.
-		 */
-		VOP_UNLOCK(ap->a_vp);
-		VNPASS(vrefcnt(ap->a_vp) > 0, ap->a_vp);
-		error = unionfs_relookup_for_delete(ap->a_dvp, cnp, td);
-		vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
-		/*
-		 * VOP_RMDIR is dispatched against udvp, so if uvp became
-		 * doomed while the lock was dropped above the target
-		 * filesystem may not be able to cope.
-		 */
-		if (error == 0 && VN_IS_DOOMED(uvp))
-			error = ENOENT;
-		if (error == 0) {
-			int udvp_lkflags, uvp_lkflags;
-			unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
-			    uvp, &uvp_lkflags);
-			error = VOP_RMDIR(udvp, uvp, cnp);
-			unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
-			    ap->a_vp, uvp, uvp_lkflags);
-		}
-	} else if (lvp != NULLVP)
-		error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td,
+			cnp->cn_flags |= (DOWHITEOUT | IGNOREWHITEOUT);
+		int udvp_lkflags, uvp_lkflags;
+		unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
+		    uvp, &uvp_lkflags);
+		error = VOP_RMDIR(udvp, uvp, cnp);
+		unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
+		    ap->a_vp, uvp, uvp_lkflags);
+	} else if (lvp != NULLVP) {
+		error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td,
 		    unp->un_path, unp->un_pathlen);
+	}
 
 	if (error == 0) {
 		cache_purge(ap->a_dvp);
@@ -1613,6 +1836,7 @@ unionfs_readdir(struct vop_readdir_args *ap)
 	uint64_t	*cookies_bk;
 	int		error;
 	int		eofflag;
+	int		lkflags;
 	int		ncookies_bk;
 	int		uio_offset_bk;
 	enum unionfs_lkupgrade lkstatus;
@@ -1668,18 +1892,26 @@ unionfs_readdir(struct vop_readdir_args *ap)
 
 	/* upper only */
 	if (uvp != NULLVP && lvp == NULLVP) {
+		unionfs_forward_vop_start(uvp, &lkflags);
 		error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag,
 		    ap->a_ncookies, ap->a_cookies);
-		unsp->uns_readdir_status = 0;
+		if (unionfs_forward_vop_finish(vp, uvp, lkflags))
+			error = error ? error : ENOENT;
+		else
+			unsp->uns_readdir_status = 0;
 
 		goto unionfs_readdir_exit;
 	}
 
 	/* lower only */
 	if (uvp == NULLVP && lvp != NULLVP) {
+		unionfs_forward_vop_start(lvp, &lkflags);
 		error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
 		    ap->a_ncookies, ap->a_cookies);
-		unsp->uns_readdir_status = 2;
+		if (unionfs_forward_vop_finish(vp, lvp, lkflags))
+			error = error ? error : ENOENT;
+		else
+			unsp->uns_readdir_status = 2;
 
 		goto unionfs_readdir_exit;
 	}
@@ -1689,14 +1921,17 @@ unionfs_readdir(struct vop_readdir_args *ap)
 	 */
 	KASSERT(uvp != NULLVP, ("unionfs_readdir: null upper vp"));
 	KASSERT(lvp != NULLVP, ("unionfs_readdir: null lower vp"));
+
 	if (uio->uio_offset == 0)
 		unsp->uns_readdir_status = 0;
 
 	if (unsp->uns_readdir_status == 0) {
 		/* read upper */
+		unionfs_forward_vop_start(uvp, &lkflags);
 		error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag,
 				    ap->a_ncookies, ap->a_cookies);
-
+		if (unionfs_forward_vop_finish(vp, uvp, lkflags) && error == 0)
+			error = ENOENT;
 		if (error != 0 || eofflag == 0)
 			goto unionfs_readdir_exit;
 		unsp->uns_readdir_status = 1;
@@ -1735,14 +1970,22 @@ unionfs_readdir(struct vop_readdir_args *ap)
 		uio->uio_offset = 0;
 	}
 
-	if (lvp == NULLVP) {
-		error = EBADF;
+	lvp = unionfs_lock_lvp(vp, &lkflags);
+	if (lvp == NULL) {
+		error = ENOENT;
 		goto unionfs_readdir_exit;
 	}
+
 	/* read lower */
 	error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
 			    ap->a_ncookies, ap->a_cookies);
 
+
+	unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+	if (unp == NULL && error == 0)
+		error = ENOENT;
+
+
 	/*
 	 * We can't return an uio_offset of 0: this would trigger an
 	 * infinite loop, because the next call to unionfs_readdir would
@@ -1907,96 +2150,49 @@ unionfs_print(struct vop_print_args *ap)
 }
 
 static int
-unionfs_get_llt_revlock(struct vnode *vp, int flags)
-{
-	int revlock;
-
-	revlock = 0;
-
-	switch (flags & LK_TYPE_MASK) {
-	case LK_SHARED:
-		if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
-			revlock = LK_UPGRADE;
-		else
-			revlock = LK_RELEASE;
-		break;
-	case LK_EXCLUSIVE:
-	case LK_UPGRADE:
-		revlock = LK_RELEASE;
-		break;
-	case LK_DOWNGRADE:
-		revlock = LK_UPGRADE;
-		break;
-	default:
-		break;
-	}
-
-	return (revlock);
-}
-
-/*
- * The state of an acquired lock is adjusted similarly to
- * the time of error generating. 
- * flags: LK_RELEASE or LK_UPGRADE
- */
-static void
-unionfs_revlock(struct vnode *vp, int flags)
-{
-	if (flags & LK_RELEASE)
-		VOP_UNLOCK_FLAGS(vp, flags);
-	else {
-		/* UPGRADE */
-		if (vn_lock(vp, flags) != 0)
-			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-	}
-}
-
-static int
 unionfs_lock(struct vop_lock1_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
-	struct vnode   *uvp;
-	struct vnode   *lvp;
+	struct vnode   *tvp;
 	int		error;
 	int		flags;
-	int		revlock;
-	int		interlock;
-	int		uhold;
+	bool		lvp_locked;
 
-	/*
-	 * TODO: rework the unionfs locking scheme.
-	 * It's not guaranteed to be safe to blindly lock two vnodes on
-	 * different mounts as is done here.  Further, the entanglement
-	 * of locking both vnodes with the various options that can be
-	 * passed to VOP_LOCK() makes this code hard to reason about.
-	 * Instead, consider locking only the upper vnode, or the lower
-	 * vnode is the upper is not present, and taking separate measures
-	 * to lock both vnodes in the few cases when that is needed.
-	 */
 	error = 0;
-	interlock = 1;
-	uhold = 0;
 	flags = ap->a_flags;
 	vp = ap->a_vp;
 
 	if (LK_RELEASE == (flags & LK_TYPE_MASK) || !(flags & LK_TYPE_MASK))
 		return (VOP_UNLOCK_FLAGS(vp, flags | LK_RELEASE));
 
+unionfs_lock_restart:
+	/*
+	 * We currently need the interlock here to ensure we can safely
+	 * access the unionfs vnode's private data.  We may be able to
+	 * eliminate this extra locking by instead using vfs_smr_enter()
+	 * and vn_load_v_data_smr() here in conjunction with an SMR UMA
+	 * zone for unionfs nodes.
+	 */
 	if ((flags & LK_INTERLOCK) == 0)
 		VI_LOCK(vp);
+	else
+		flags &= ~LK_INTERLOCK;
 
 	unp = VTOUNIONFS(vp);
-	if (unp == NULL)
-		goto unionfs_lock_null_vnode;
-
-	KASSERT_UNIONFS_VNODE(ap->a_vp);
-
-	lvp = unp->un_lowervp;
-	uvp = unp->un_uppervp;
+	if (unp == NULL) {
+		VI_UNLOCK(vp);
+		ap->a_flags = flags;
+		return (vop_stdlock(ap));
+	}
 
-	if ((revlock = unionfs_get_llt_revlock(vp, flags)) == 0)
-		panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
+	if (unp->un_uppervp != NULL) {
+		tvp = unp->un_uppervp;
+		lvp_locked = false;
+	} else {
+		tvp = unp->un_lowervp;
+		lvp_locked = true;
+	}
 
 	/*
 	 * During unmount, the root vnode lock may be taken recursively,
@@ -2009,150 +2205,77 @@ unionfs_lock(struct vop_lock1_args *ap)
 	    (vp->v_vflag & VV_ROOT) != 0)
 		flags |= LK_CANRECURSE;
 
-	if (lvp != NULLVP) {
-		if (uvp != NULLVP && flags & LK_UPGRADE) {
+	vholdnz(tvp);
+	VI_UNLOCK(vp);
+	error = VOP_LOCK(tvp, flags);
+	vdrop(tvp);
+	if (error == 0 && (lvp_locked || VTOUNIONFS(vp) == NULL)) {
+		/*
+		 * After dropping the interlock above, there exists a window
+		 * in which another thread may acquire the lower vnode lock
+		 * and then either doom the unionfs vnode or create an upper
+		 * vnode.  In either case, we will effectively be holding the
+		 * wrong lock, so we must drop the lower vnode lock and
+		 * restart the lock operation.
+		 *
+		 * If unp is not already NULL, we assume that we can safely
+		 * access it because we currently hold lvp's lock.
+		 * unionfs_noderem() acquires lvp's lock before freeing
+		 * the vnode private data, ensuring it can't be concurrently
+		 * freed while we are using it here.  Likewise,
+		 * unionfs_node_update() acquires lvp's lock before installing
+		 * an upper vnode.  Without those guarantees, we would need to
+		 * reacquire the vnode interlock here.
+		 * Note that unionfs_noderem() doesn't acquire lvp's lock if
+		 * this is the root vnode, but the root vnode should always
+		 * have an upper vnode and therefore we should never use its
+		 * lower vnode lock here.
+		 */
+		unp = VTOUNIONFS(vp);
+		if (unp == NULL || unp->un_uppervp != NULLVP) {
+			VOP_UNLOCK(tvp);
 			/*
-			 * Share Lock is once released and a deadlock is
-			 * avoided.
+			 * If we previously held the lock, the upgrade may
+			 * have temporarily dropped the lock, in which case
+			 * concurrent dooming or copy-up will necessitate
+			 * acquiring a different lock.  Since we never held
+			 * the new lock, LK_UPGRADE must be cleared here to
+			 * avoid triggering a lockmgr panic.
 			 */
-			vholdnz(uvp);
-			uhold = 1;
-			VOP_UNLOCK(uvp);
-		}
-		VI_LOCK_FLAGS(lvp, MTX_DUPOK);
-		flags |= LK_INTERLOCK;
-		vholdl(lvp);
-
-		VI_UNLOCK(vp);
-		ap->a_flags &= ~LK_INTERLOCK;
-
-		error = VOP_LOCK(lvp, flags);
-
-		VI_LOCK(vp);
-		unp = VTOUNIONFS(vp);
-		if (unp == NULL) {
-			/* vnode is released. */
-			VI_UNLOCK(vp);
-			if (error == 0)
-				VOP_UNLOCK(lvp);
-			vdrop(lvp);
-			if (uhold != 0)
-				vdrop(uvp);
-			goto unionfs_lock_fallback;
+			if (flags & LK_UPGRADE)
+				flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
+			VNASSERT((flags & LK_DOWNGRADE) == 0, vp,
+			    ("%s: vnode doomed during downgrade", __func__));
+			goto unionfs_lock_restart;
 		}
 	}
 
-	if (error == 0 && uvp != NULLVP) {
-		if (uhold && flags & LK_UPGRADE) {
-			flags &= ~LK_TYPE_MASK;
-			flags |= LK_EXCLUSIVE;
-		}
-		VI_LOCK_FLAGS(uvp, MTX_DUPOK);
-		flags |= LK_INTERLOCK;
-		if (uhold == 0) {
-			vholdl(uvp);
-			uhold = 1;
-		}
-
-		VI_UNLOCK(vp);
-		ap->a_flags &= ~LK_INTERLOCK;
-
-		error = VOP_LOCK(uvp, flags);
-
-		VI_LOCK(vp);
-		unp = VTOUNIONFS(vp);
-		if (unp == NULL) {
-			/* vnode is released. */
-			VI_UNLOCK(vp);
-			if (error == 0)
-				VOP_UNLOCK(uvp);
-			vdrop(uvp);
-			if (lvp != NULLVP) {
-				VOP_UNLOCK(lvp);
-				vdrop(lvp);
-			}
-			goto unionfs_lock_fallback;
-		}
-		if (error != 0 && lvp != NULLVP) {
-			/* rollback */
-			VI_UNLOCK(vp);
-			unionfs_revlock(lvp, revlock);
-			interlock = 0;
-		}
-	}
-
-	if (interlock)
-		VI_UNLOCK(vp);
-	if (lvp != NULLVP)
-		vdrop(lvp);
-	if (uhold != 0)
-		vdrop(uvp);
-
 	return (error);
-
-unionfs_lock_null_vnode:
-	ap->a_flags |= LK_INTERLOCK;
-	return (vop_stdlock(ap));
-
-unionfs_lock_fallback:
-	/*
-	 * If we reach this point, we've discovered the unionfs vnode
-	 * has been reclaimed while the upper/lower vnode locks were
-	 * temporarily dropped.  Such temporary droppage may happen
-	 * during the course of an LK_UPGRADE operation itself, and in
-	 * that case LK_UPGRADE must be cleared as the unionfs vnode's
-	 * lock has been reset to point to the standard v_lock field,
-	 * which has not previously been held.
-	 */
-	if (flags & LK_UPGRADE) {
-		ap->a_flags &= ~LK_TYPE_MASK;
-		ap->a_flags |= LK_EXCLUSIVE;
-	}
-	return (vop_stdlock(ap));
 }
 
 static int
 unionfs_unlock(struct vop_unlock_args *ap)
 {
 	struct vnode   *vp;
-	struct vnode   *lvp;
-	struct vnode   *uvp;
+	struct vnode   *tvp;
 	struct unionfs_node *unp;
 	int		error;
-	int		uhold;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
-	error = 0;
-	uhold = 0;
 	vp = ap->a_vp;
 
 	unp = VTOUNIONFS(vp);
 	if (unp == NULL)
-		goto unionfs_unlock_null_vnode;
-	lvp = unp->un_lowervp;
-	uvp = unp->un_uppervp;
+		return (vop_stdunlock(ap));
 
-	if (lvp != NULLVP) {
-		vholdnz(lvp);
-		error = VOP_UNLOCK(lvp);
-	}
+	tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp);
 
-	if (error == 0 && uvp != NULLVP) {
-		vholdnz(uvp);
-		uhold = 1;
-		error = VOP_UNLOCK(uvp);
-	}
+	vholdnz(tvp);
+	error = VOP_UNLOCK(tvp);
+	vdrop(tvp);
 
-	if (lvp != NULLVP)
-		vdrop(lvp);
-	if (uhold != 0)
-		vdrop(uvp);
-
-	return error;
-
-unionfs_unlock_null_vnode:
-	return (vop_stdunlock(ap));
+	return (error);
 }
 
 static int
@@ -2192,7 +2315,7 @@ unionfs_advlock(struct vop_advlock_args *ap)
 	uvp = unp->un_uppervp;
 
 	if (uvp == NULLVP) {
-		error = unionfs_copyfile(unp, 1, td->td_ucred, td);
+		error = unionfs_copyfile(ap->a_vp, 1, td->td_ucred, td);
 		if (error != 0)
 			goto unionfs_advlock_abort;
 		uvp = unp->un_uppervp;
@@ -2294,7 +2417,7 @@ unionfs_setacl(struct vop_setacl_args *ap)
 		return (EROFS);
 
 	if (uvp == NULLVP && lvp->v_type == VREG) {
-		if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
+		if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0)
 			return (error);
 		uvp = unp->un_uppervp;
 	}
@@ -2467,9 +2590,10 @@ unionfs_setextattr(struct vop_setextattr_args *ap)
 	if (ovp == lvp && lvp->v_type == VREG) {
 		VOP_CLOSEEXTATTR(lvp, 0, cred, td);
 		if (uvp == NULLVP &&
-		    (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
+		    (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) {
 unionfs_setextattr_reopen:
-			if ((unp->un_flag & UNIONFS_OPENEXTL) &&
+			unp = VTOUNIONFS(ap->a_vp);
+			if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) &&
 			    VOP_OPENEXTATTR(lvp, cred, td)) {
 #ifdef DIAGNOSTIC
 				panic("unionfs: VOP_OPENEXTATTR failed");
@@ -2561,9 +2685,10 @@ unionfs_deleteextattr(struct vop_deleteextattr_args *ap)
 	if (ovp == lvp && lvp->v_type == VREG) {
 		VOP_CLOSEEXTATTR(lvp, 0, cred, td);
 		if (uvp == NULLVP &&
-		    (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
+		    (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) {
 unionfs_deleteextattr_reopen:
-			if ((unp->un_flag & UNIONFS_OPENEXTL) &&
+			unp = VTOUNIONFS(ap->a_vp);
+			if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) &&
 			    VOP_OPENEXTATTR(lvp, cred, td)) {
 #ifdef DIAGNOSTIC
 				panic("unionfs: VOP_OPENEXTATTR failed");
@@ -2613,7 +2738,7 @@ unionfs_setlabel(struct vop_setlabel_args *ap)
 		return (EROFS);
 
 	if (uvp == NULLVP && lvp->v_type == VREG) {
-		if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
+		if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0)
 			return (error);
 		uvp = unp->un_uppervp;
 	}
@@ -2665,7 +2790,7 @@ static int
 unionfs_vput_pair(struct vop_vput_pair_args *ap)
 {
 	struct mount *mp;
-	struct vnode *dvp, *vp, **vpp, *lvp, *ldvp, *uvp, *udvp, *tempvp;
+	struct vnode *dvp, *vp, **vpp, *lvp, *uvp, *tvp, *tdvp, *tempvp;
 	struct unionfs_node *dunp, *unp;
 	int error, res;
 
@@ -2674,11 +2799,14 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
 	vp = NULLVP;
 	lvp = NULLVP;
 	uvp = NULLVP;
+	tvp = NULLVP;
 	unp = NULL;
 
 	dunp = VTOUNIONFS(dvp);
-	udvp = dunp->un_uppervp;
-	ldvp = dunp->un_lowervp;
+	if (dunp->un_uppervp != NULL)
+		tdvp = dunp->un_uppervp;
+	else
+		tdvp = dunp->un_lowervp;
 
 	/*
 	 * Underlying vnodes should be locked because the encompassing unionfs
@@ -2686,10 +2814,7 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
 	 * only be on the unionfs node.  Reference them now so that the vput()s
 	 * performed by VOP_VPUT_PAIR() will have a reference to drop.
 	 */
-	if (udvp != NULLVP)
-		vref(udvp);
-	if (ldvp != NULLVP)
-		vref(ldvp);
+	vref(tdvp);
 
 	if (vpp != NULL)
 		vp = *vpp;
@@ -2699,9 +2824,10 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
 		uvp = unp->un_uppervp;
 		lvp = unp->un_lowervp;
 		if (uvp != NULLVP)
-			vref(uvp);
-		if (lvp != NULLVP)
-			vref(lvp);
+			tvp = uvp;
+		else
+			tvp = lvp;
+		vref(tvp);
 
 		/*
 		 * If we're being asked to return a locked child vnode, then
@@ -2721,31 +2847,19 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
 		}
 	}
 
-	/*
-	 * TODO: Because unionfs_lock() locks both the lower and upper vnodes
-	 * (if available), we must also call VOP_VPUT_PAIR() on both the lower
-	 * and upper parent/child pairs.  If unionfs_lock() is reworked to lock
-	 * only a single vnode, this code will need to change to also only
-	 * operate on one vnode pair.
-	 */
-	ASSERT_VOP_LOCKED(ldvp, __func__);
-	ASSERT_VOP_LOCKED(udvp, __func__);
-	ASSERT_VOP_LOCKED(lvp, __func__);
-	ASSERT_VOP_LOCKED(uvp, __func__);
-
-	KASSERT(lvp == NULLVP || ldvp != NULLVP,
-	    ("%s: NULL ldvp with non-NULL lvp", __func__));
-	if (ldvp != NULLVP)
-		res = VOP_VPUT_PAIR(ldvp, lvp != NULLVP ? &lvp : NULL, true);
-	KASSERT(uvp == NULLVP || udvp != NULLVP,
-	    ("%s: NULL udvp with non-NULL uvp", __func__));
-	if (udvp != NULLVP)
-		res = VOP_VPUT_PAIR(udvp, uvp != NULLVP ? &uvp : NULL, true);
-
-	ASSERT_VOP_UNLOCKED(ldvp, __func__);
-	ASSERT_VOP_UNLOCKED(udvp, __func__);
-	ASSERT_VOP_UNLOCKED(lvp, __func__);
-	ASSERT_VOP_UNLOCKED(uvp, __func__);
+	ASSERT_VOP_LOCKED(tdvp, __func__);
+	ASSERT_VOP_LOCKED(tvp, __func__);
+
+	if (tdvp == dunp->un_uppervp && tvp != NULLVP && tvp == lvp) {
+		vput(tvp);
+		vput(tdvp);
+		res = 0;
+	} else {
+		res = VOP_VPUT_PAIR(tdvp, tvp != NULLVP ? &tvp : NULL, true);
+	}
+
+	ASSERT_VOP_UNLOCKED(tdvp, __func__);
+	ASSERT_VOP_UNLOCKED(tvp, __func__);
 
 	/*
 	 * VOP_VPUT_PAIR() dropped the references we added to the underlying