aboutsummaryrefslogtreecommitdiff
path: root/sys/fs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/fs')
-rw-r--r--sys/fs/cd9660/cd9660_lookup.c8
-rw-r--r--sys/fs/cd9660/cd9660_node.c6
-rw-r--r--sys/fs/cd9660/cd9660_node.h2
-rw-r--r--sys/fs/cd9660/cd9660_rrip.c2
-rw-r--r--sys/fs/cd9660/cd9660_vfsops.c10
-rw-r--r--sys/fs/cd9660/cd9660_vnops.c5
-rw-r--r--sys/fs/cd9660/iso.h23
-rw-r--r--sys/fs/cd9660/iso_rrip.h4
-rw-r--r--sys/fs/cuse/cuse.c6
-rw-r--r--sys/fs/devfs/devfs_devs.c10
-rw-r--r--sys/fs/devfs/devfs_vnops.c9
-rw-r--r--sys/fs/ext2fs/ext2_extents.c2
-rw-r--r--sys/fs/ext2fs/ext2_vfsops.c2
-rw-r--r--sys/fs/ext2fs/ext2_vnops.c2
-rw-r--r--sys/fs/ext2fs/inode.h8
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c13
-rw-r--r--sys/fs/fuse/fuse_device.c49
-rw-r--r--sys/fs/fuse/fuse_file.c9
-rw-r--r--sys/fs/fuse/fuse_file.h2
-rw-r--r--sys/fs/fuse/fuse_internal.c11
-rw-r--r--sys/fs/fuse/fuse_io.c2
-rw-r--r--sys/fs/fuse/fuse_ipc.c9
-rw-r--r--sys/fs/fuse/fuse_ipc.h4
-rw-r--r--sys/fs/fuse/fuse_kernel.h132
-rw-r--r--sys/fs/fuse/fuse_node.c21
-rw-r--r--sys/fs/fuse/fuse_vfsops.c42
-rw-r--r--sys/fs/fuse/fuse_vnops.c320
-rw-r--r--sys/fs/msdosfs/denode.h4
-rw-r--r--sys/fs/msdosfs/fat.h4
-rw-r--r--sys/fs/msdosfs/msdosfs_conv.c11
-rw-r--r--sys/fs/msdosfs/msdosfs_lookup.c5
-rw-r--r--sys/fs/msdosfs/msdosfs_vfsops.c7
-rw-r--r--sys/fs/msdosfs/msdosfs_vnops.c37
-rw-r--r--sys/fs/msdosfs/msdosfsmount.h18
-rw-r--r--sys/fs/nfs/nfs.h2
-rw-r--r--sys/fs/nfs/nfs_commonacl.c66
-rw-r--r--sys/fs/nfs/nfs_commonkrpc.c42
-rw-r--r--sys/fs/nfs/nfs_commonport.c3
-rw-r--r--sys/fs/nfs/nfs_commonsubs.c337
-rw-r--r--sys/fs/nfs/nfs_var.h46
-rw-r--r--sys/fs/nfs/nfscl.h9
-rw-r--r--sys/fs/nfs/nfsclstate.h4
-rw-r--r--sys/fs/nfs/nfsport.h14
-rw-r--r--sys/fs/nfs/nfsproto.h54
-rw-r--r--sys/fs/nfs/nfsrvstate.h2
-rw-r--r--sys/fs/nfsclient/nfs_clbio.c2
-rw-r--r--sys/fs/nfsclient/nfs_clcomsubs.c3
-rw-r--r--sys/fs/nfsclient/nfs_clnode.c24
-rw-r--r--sys/fs/nfsclient/nfs_clport.c3
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c542
-rw-r--r--sys/fs/nfsclient/nfs_clstate.c247
-rw-r--r--sys/fs/nfsclient/nfs_clsubs.c13
-rw-r--r--sys/fs/nfsclient/nfs_clvfsops.c23
-rw-r--r--sys/fs/nfsclient/nfs_clvnops.c510
-rw-r--r--sys/fs/nfsclient/nfsnode.h1
-rw-r--r--sys/fs/nfsserver/nfs_nfsdcache.c6
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c386
-rw-r--r--sys/fs/nfsserver/nfs_nfsdserv.c289
-rw-r--r--sys/fs/nfsserver/nfs_nfsdsocket.c8
-rw-r--r--sys/fs/nfsserver/nfs_nfsdstate.c698
-rw-r--r--sys/fs/nfsserver/nfs_nfsdsubs.c21
-rw-r--r--sys/fs/nullfs/null_subr.c4
-rw-r--r--sys/fs/nullfs/null_vfsops.c5
-rw-r--r--sys/fs/nullfs/null_vnops.c69
-rw-r--r--sys/fs/p9fs/p9_client.c1332
-rw-r--r--sys/fs/p9fs/p9_client.h169
-rw-r--r--sys/fs/p9fs/p9_debug.h45
-rw-r--r--sys/fs/p9fs/p9_protocol.c632
-rw-r--r--sys/fs/p9fs/p9_protocol.h282
-rw-r--r--sys/fs/p9fs/p9_transport.c70
-rw-r--r--sys/fs/p9fs/p9_transport.h53
-rw-r--r--sys/fs/p9fs/p9fs.h203
-rw-r--r--sys/fs/p9fs/p9fs_proto.h42
-rw-r--r--sys/fs/p9fs/p9fs_subr.c411
-rw-r--r--sys/fs/p9fs/p9fs_vfsops.c610
-rw-r--r--sys/fs/p9fs/p9fs_vnops.c2236
-rw-r--r--sys/fs/procfs/procfs_mem.c1
-rw-r--r--sys/fs/procfs/procfs_osrel.c7
-rw-r--r--sys/fs/procfs/procfs_rlimit.c3
-rw-r--r--sys/fs/procfs/procfs_status.c26
-rw-r--r--sys/fs/pseudofs/pseudofs.c54
-rw-r--r--sys/fs/smbfs/smbfs_io.c2
-rw-r--r--sys/fs/smbfs/smbfs_vnops.c5
-rw-r--r--sys/fs/tarfs/tarfs.h9
-rw-r--r--sys/fs/tarfs/tarfs_vnops.c2
-rw-r--r--sys/fs/tmpfs/tmpfs.h17
-rw-r--r--sys/fs/tmpfs/tmpfs_subr.c106
-rw-r--r--sys/fs/tmpfs/tmpfs_vfsops.c18
-rw-r--r--sys/fs/tmpfs/tmpfs_vnops.c136
-rw-r--r--sys/fs/udf/ecma167-udf.h4
-rw-r--r--sys/fs/udf/udf_vfsops.c9
-rw-r--r--sys/fs/udf/udf_vnops.c52
-rw-r--r--sys/fs/unionfs/union.h34
-rw-r--r--sys/fs/unionfs/union_subr.c817
-rw-r--r--sys/fs/unionfs/union_vfsops.c15
-rw-r--r--sys/fs/unionfs/union_vnops.c938
96 files changed, 10404 insertions, 2168 deletions
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
index 569ee631416c..75fcdc9152cd 100644
--- a/sys/fs/cd9660/cd9660_lookup.c
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -47,8 +47,8 @@
#include <fs/cd9660/iso_rrip.h>
struct cd9660_ino_alloc_arg {
- cd_ino_t ino;
- cd_ino_t i_ino;
+ ino_t ino;
+ ino_t i_ino;
struct iso_directory_record *ep;
};
@@ -115,7 +115,7 @@ cd9660_lookup(struct vop_cachedlookup_args *ap)
struct cd9660_ino_alloc_arg dd_arg;
u_long bmask; /* block offset mask */
int error;
- cd_ino_t ino, i_ino;
+ ino_t ino, i_ino;
int ltype, reclen;
u_short namelen;
int isoflags;
@@ -125,7 +125,7 @@ cd9660_lookup(struct vop_cachedlookup_args *ap)
char *name;
struct vnode **vpp = ap->a_vpp;
struct componentname *cnp = ap->a_cnp;
- int flags = cnp->cn_flags;
+ uint64_t flags = cnp->cn_flags;
int nameiop = cnp->cn_nameiop;
ep2 = ep = NULL;
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
index 67270b40f2b0..ce6ec3aa7a1c 100644
--- a/sys/fs/cd9660/cd9660_node.c
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -281,10 +281,10 @@ cd9660_tstamp_conv17(u_char *pi, struct timespec *pu)
return cd9660_tstamp_conv7(buf, pu, ISO_FTYPE_DEFAULT);
}
-cd_ino_t
+ino_t
isodirino(struct iso_directory_record *isodir, struct iso_mnt *imp)
{
- cd_ino_t ino;
+ ino_t ino;
/*
* Note there is an inverse calculation in
@@ -293,7 +293,7 @@ isodirino(struct iso_directory_record *isodir, struct iso_mnt *imp)
* and also a calculation of the isodir pointer
* from an inode in cd9660_vnops.c:cd9660_readlink()
*/
- ino = ((cd_ino_t)isonum_733(isodir->extent) +
+ ino = ((ino_t)isonum_733(isodir->extent) +
isonum_711(isodir->ext_attr_length)) << imp->im_bshift;
return ino;
}
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
index 9dc84dd57c0e..6021c1681c5d 100644
--- a/sys/fs/cd9660/cd9660_node.h
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -56,7 +56,7 @@ typedef struct {
struct iso_node {
struct vnode *i_vnode; /* vnode associated with this inode */
- cd_ino_t i_number; /* the identity of the inode */
+ ino_t i_number; /* the identity of the inode */
/* we use the actual starting block of the file */
struct iso_mnt *i_mnt; /* filesystem associated with this inode */
struct lockf *i_lockf; /* head of byte-level lock list */
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
index 26825062d25a..d0b0008d10b2 100644
--- a/sys/fs/cd9660/cd9660_rrip.c
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -593,7 +593,7 @@ static RRIP_TABLE rrip_table_getname[] = {
int
cd9660_rrip_getname(struct iso_directory_record *isodir, char *outbuf,
- u_short *outlen, cd_ino_t *inump, struct iso_mnt *imp)
+ u_short *outlen, ino_t *inump, struct iso_mnt *imp)
{
ISO_RRIP_ANALYZE analyze;
RRIP_TABLE *tab;
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
index f067453d3458..b4db4c4f7331 100644
--- a/sys/fs/cd9660/cd9660_vfsops.c
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -394,7 +394,7 @@ iso_mountfs(struct vnode *devvp, struct mount *mp)
isomp->im_mountp = mp;
isomp->im_dev = dev;
isomp->im_devvp = devvp;
- isomp->im_fmask = isomp->im_dmask = ACCESSPERMS;
+ isomp->im_fmask = isomp->im_dmask = ALLPERMS;
vfs_flagopt(mp->mnt_optnew, "norrip", &isomp->im_flags, ISOFSMNT_NORRIP);
vfs_flagopt(mp->mnt_optnew, "gens", &isomp->im_flags, ISOFSMNT_GENS);
@@ -560,7 +560,7 @@ cd9660_root(struct mount *mp, int flags, struct vnode **vpp)
struct iso_mnt *imp = VFSTOISOFS(mp);
struct iso_directory_record *dp =
(struct iso_directory_record *)imp->root;
- cd_ino_t ino = isodirino(dp, imp);
+ ino_t ino = isodirino(dp, imp);
/*
* With RRIP we must use the `.' entry of the root directory.
@@ -660,15 +660,15 @@ static int
cd9660_vfs_hash_cmp(struct vnode *vp, void *pino)
{
struct iso_node *ip;
- cd_ino_t ino;
+ ino_t ino;
ip = VTOI(vp);
- ino = *(cd_ino_t *)pino;
+ ino = *(ino_t *)pino;
return (ip->i_number != ino);
}
int
-cd9660_vget_internal(struct mount *mp, cd_ino_t ino, int flags,
+cd9660_vget_internal(struct mount *mp, ino_t ino, int flags,
struct vnode **vpp, int relocated, struct iso_directory_record *isodir)
{
struct iso_mnt *imp;
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
index 33ca58472490..c4d0e6ba7b30 100644
--- a/sys/fs/cd9660/cd9660_vnops.c
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -443,7 +443,7 @@ cd9660_readdir(struct vop_readdir_args *ap)
u_short namelen;
u_int ncookies = 0;
uint64_t *cookies = NULL;
- cd_ino_t ino;
+ ino_t ino;
dp = VTOI(vdp);
imp = dp->i_mnt;
@@ -758,6 +758,9 @@ cd9660_pathconf(struct vop_pathconf_args *ap)
/* NOTREACHED */
}
+_Static_assert(sizeof(struct ifid) <= sizeof(struct fid),
+ "struct ifid must be no larger than struct fid");
+
/*
* Vnode pointer to File handle
*/
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
index a9733f62c077..40047cc92de6 100644
--- a/sys/fs/cd9660/iso.h
+++ b/sys/fs/cd9660/iso.h
@@ -212,21 +212,12 @@ struct iso_extended_attributes {
u_char len_au [ISODCL (247, 250)]; /* 723 */
};
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_WANT_ISO_MNT)
/* CD-ROM Format type */
enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP,
ISO_FTYPE_JOLIET, ISO_FTYPE_ECMA, ISO_FTYPE_HIGH_SIERRA };
-#ifndef ISOFSMNT_ROOT
-#define ISOFSMNT_ROOT 0
-#endif
-
-/*
- * When ino_t becomes 64-bit, we can remove this definition in favor of ino_t.
- */
-typedef __uint64_t cd_ino_t;
-
struct iso_mnt {
uint64_t im_flags;
@@ -262,12 +253,16 @@ struct iso_mnt {
void *im_l2d;
};
+#endif /* defined(_KERNEL) || defined(_WANT_ISO_MNT) */
+
+#ifdef _KERNEL
+
struct ifid {
u_short ifid_len;
u_short ifid_pad;
- cd_ino_t ifid_ino;
+ ino_t ifid_ino;
long ifid_start;
-};
+} __packed;
#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data))
@@ -276,7 +271,7 @@ struct ifid {
#define lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
#define blksize(imp, ip, lbn) ((imp)->logical_block_size)
-int cd9660_vget_internal(struct mount *, cd_ino_t, int, struct vnode **, int,
+int cd9660_vget_internal(struct mount *, ino_t , int, struct vnode **, int,
struct iso_directory_record *);
#define cd9660_sysctl ((int (*)(int *, u_int, void *, size_t *, void *, \
size_t, struct proc *))eopnotsupp)
@@ -287,7 +282,7 @@ extern struct vop_vector cd9660_fifoops;
int isochar(u_char *, u_char *, int, u_short *, int *, int, void *);
int isofncmp(u_char *, int, u_char *, int, int, int, void *, void *);
void isofntrans(u_char *, int, u_char *, u_short *, int, int, int, int, void *);
-cd_ino_t isodirino(struct iso_directory_record *, struct iso_mnt *);
+ino_t isodirino(struct iso_directory_record *, struct iso_mnt *);
u_short sgetrune(const char *, size_t, char const **, int, void *);
#endif /* _KERNEL */
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
index bea0811eccf4..5a75beb08d93 100644
--- a/sys/fs/cd9660/iso_rrip.h
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -63,7 +63,7 @@ typedef struct {
off_t iso_ce_off; /* offset of continuation area */
int iso_ce_len; /* length of continuation area */
struct iso_mnt *imp; /* mount structure */
- cd_ino_t *inump; /* inode number pointer */
+ ino_t *inump; /* inode number pointer */
char *outbuf; /* name/symbolic link output area */
u_short *outlen; /* length of above */
u_short maxlen; /* maximum length of above */
@@ -76,7 +76,7 @@ int cd9660_rrip_analyze(struct iso_directory_record *isodir,
struct iso_node *inop, struct iso_mnt *imp);
int cd9660_rrip_getname(struct iso_directory_record *isodir,
char *outbuf, u_short *outlen,
- cd_ino_t *inump, struct iso_mnt *imp);
+ ino_t *inump, struct iso_mnt *imp);
int cd9660_rrip_getsymname(struct iso_directory_record *isodir,
char *outbuf, u_short *outlen,
struct iso_mnt *imp);
diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c
index 9ef234c35427..d63a7d4691cf 100644
--- a/sys/fs/cuse/cuse.c
+++ b/sys/fs/cuse/cuse.c
@@ -191,13 +191,13 @@ static void cuse_client_kqfilter_write_detach(struct knote *kn);
static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
-static struct filterops cuse_client_kqfilter_read_ops = {
+static const struct filterops cuse_client_kqfilter_read_ops = {
.f_isfd = 1,
.f_detach = cuse_client_kqfilter_read_detach,
.f_event = cuse_client_kqfilter_read_event,
};
-static struct filterops cuse_client_kqfilter_write_ops = {
+static const struct filterops cuse_client_kqfilter_write_ops = {
.f_isfd = 1,
.f_detach = cuse_client_kqfilter_write_detach,
.f_event = cuse_client_kqfilter_write_event,
@@ -332,7 +332,7 @@ cuse_kern_uninit(void *arg)
mtx_destroy(&cuse_global_mtx);
}
-SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
+SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, NULL);
static int
cuse_server_get(struct cuse_server **ppcs)
diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c
index db879efe803a..124f9f0449af 100644
--- a/sys/fs/devfs/devfs_devs.c
+++ b/sys/fs/devfs/devfs_devs.c
@@ -86,6 +86,9 @@ sysctl_devname(SYSCTL_HANDLER_ARGS)
struct cdev_priv *cdp;
struct cdev *dev;
+ if (req->newptr == NULL)
+ return (EINVAL);
+
#ifdef COMPAT_FREEBSD11
if (req->newlen == sizeof(ud_compat)) {
error = SYSCTL_IN(req, &ud_compat, sizeof(ud_compat));
@@ -118,11 +121,8 @@ SYSCTL_PROC(_kern, OID_AUTO, devname,
CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE,
NULL, 0, sysctl_devname, "", "devname(3) handler");
-SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD,
- SYSCTL_NULL_INT_PTR, sizeof(struct cdev), "sizeof(struct cdev)");
-
-SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD,
- SYSCTL_NULL_INT_PTR, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)");
+SYSCTL_SIZEOF_STRUCT(cdev);
+SYSCTL_SIZEOF_STRUCT(cdev_priv);
struct cdev *
devfs_alloc(int flags)
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index a35f6dbf9520..1d744e6593c0 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -66,7 +66,7 @@
static struct vop_vector devfs_vnodeops;
static struct vop_vector devfs_specops;
-static struct fileops devfs_ops_f;
+static const struct fileops devfs_ops_f;
#include <fs/devfs/devfs.h>
#include <fs/devfs/devfs_int.h>
@@ -555,8 +555,7 @@ loop:
if (devfs_allocv_drop_refs(0, dmp, de)) {
vput(vp);
return (ENOENT);
- }
- else if (VN_IS_DOOMED(vp)) {
+ } else if (VN_IS_DOOMED(vp)) {
mtx_lock(&devfs_de_interlock);
if (de->de_vnode == vp) {
de->de_vnode = NULL;
@@ -1516,6 +1515,8 @@ devfs_readdir(struct vop_readdir_args *ap)
*/
if (tmp_ncookies != NULL)
ap->a_ncookies = tmp_ncookies;
+ if (dd == NULL && error == 0 && ap->a_eofflag != NULL)
+ *ap->a_eofflag = 1;
return (error);
}
@@ -2038,7 +2039,7 @@ devfs_cmp_f(struct file *fp1, struct file *fp2, struct thread *td)
return (kcmp_cmp((uintptr_t)fp1->f_data, (uintptr_t)fp2->f_data));
}
-static struct fileops devfs_ops_f = {
+static const struct fileops devfs_ops_f = {
.fo_read = devfs_read_f,
.fo_write = devfs_write_f,
.fo_truncate = devfs_truncate_f,
diff --git a/sys/fs/ext2fs/ext2_extents.c b/sys/fs/ext2fs/ext2_extents.c
index 3ae1da4fe6b7..146aa48f6743 100644
--- a/sys/fs/ext2fs/ext2_extents.c
+++ b/sys/fs/ext2fs/ext2_extents.c
@@ -711,7 +711,7 @@ ext4_ext_tree_init(struct inode *ip)
ip->i_flag |= IN_E4EXTENTS;
- memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR);
+ memset(ip->i_data, 0, sizeof(ip->i_data));
ehp = (struct ext4_extent_header *)ip->i_data;
ehp->eh_magic = htole16(EXT4_EXT_MAGIC);
ehp->eh_max = htole16(ext4_ext_space_root(ip));
diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c
index bffbf4546f37..9e7a03fffd71 100644
--- a/sys/fs/ext2fs/ext2_vfsops.c
+++ b/sys/fs/ext2fs/ext2_vfsops.c
@@ -1345,7 +1345,7 @@ ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
return (ESTALE);
}
*vpp = nvp;
- vnode_create_vobject(*vpp, 0, curthread);
+ vnode_create_vobject(*vpp, ip->i_size, curthread);
return (0);
}
diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c
index dfbb11f75421..064c10bd18b2 100644
--- a/sys/fs/ext2fs/ext2_vnops.c
+++ b/sys/fs/ext2fs/ext2_vnops.c
@@ -1889,6 +1889,8 @@ ext2_vptofh(struct vop_vptofh_args *ap)
{
struct inode *ip;
struct ufid *ufhp;
+ _Static_assert(sizeof(struct ufid) <= sizeof(struct fid),
+ "struct ufid cannot be larger than struct fid");
ip = VTOI(ap->a_vp);
ufhp = (struct ufid *)ap->a_fhp;
diff --git a/sys/fs/ext2fs/inode.h b/sys/fs/ext2fs/inode.h
index 9ee1b5672da6..c45339bfde40 100644
--- a/sys/fs/ext2fs/inode.h
+++ b/sys/fs/ext2fs/inode.h
@@ -187,10 +187,10 @@ struct indir {
/* This overlays the fid structure (see mount.h). */
struct ufid {
- uint16_t ufid_len; /* Length of structure. */
- uint16_t ufid_pad; /* Force 32-bit alignment. */
- ino_t ufid_ino; /* File number (ino). */
- uint32_t ufid_gen; /* Generation number. */
+ uint16_t ufid_len; /* Length of structure. */
+ uint16_t ufid_pad; /* Force 32-bit alignment. */
+ uint32_t ufid_gen; /* Generation number. */
+ ino_t ufid_ino; /* File number (ino). */
};
#endif /* _KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 9ec80794e795..58a22b8bdc50 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -502,7 +502,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
} else {
error = getvnode_path(td, fd,
- cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+ cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
}
if (error) {
/*
@@ -547,6 +547,8 @@ fdesc_readdir(struct vop_readdir_args *ap)
fmp = VFSTOFDESC(ap->a_vp->v_mount);
if (ap->a_ncookies != NULL)
*ap->a_ncookies = 0;
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 0;
off = (int)uio->uio_offset;
if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
@@ -559,7 +561,12 @@ fdesc_readdir(struct vop_readdir_args *ap)
fcnt = i - 2; /* The first two nodes are `.' and `..' */
FILEDESC_SLOCK(fdp);
- while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
+ while (uio->uio_resid >= UIO_MX) {
+ if (i >= fdp->fd_nfiles + 2) {
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 1;
+ break;
+ }
bzero((caddr_t)dp, UIO_MX);
switch (i) {
case 0: /* `.' */
@@ -639,7 +646,7 @@ fdesc_readlink(struct vop_readlink_args *va)
VOP_UNLOCK(vn);
td = curthread;
- error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL);
+ error = fget_cap(td, fd_fd, &cap_no_rights, NULL, &fp, NULL);
if (error != 0)
goto out;
diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c
index 892793993ecc..57b3559731f7 100644
--- a/sys/fs/fuse/fuse_device.c
+++ b/sys/fs/fuse/fuse_device.c
@@ -82,6 +82,8 @@
#include <sys/sysctl.h>
#include <sys/poll.h>
#include <sys/selinfo.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
#include "fuse.h"
#include "fuse_internal.h"
@@ -120,13 +122,13 @@ static int fuse_device_filt_read(struct knote *kn, long hint);
static int fuse_device_filt_write(struct knote *kn, long hint);
static void fuse_device_filt_detach(struct knote *kn);
-struct filterops fuse_device_rfiltops = {
+static const struct filterops fuse_device_rfiltops = {
.f_isfd = 1,
.f_detach = fuse_device_filt_detach,
.f_event = fuse_device_filt_read,
};
-struct filterops fuse_device_wfiltops = {
+static const struct filterops fuse_device_wfiltops = {
.f_isfd = 1,
.f_event = fuse_device_filt_write,
};
@@ -152,7 +154,7 @@ fdata_dtor(void *arg)
FUSE_LOCK();
fuse_lck_mtx_lock(fdata->aw_mtx);
/* wakup poll()ers */
- selwakeuppri(&fdata->ks_rsel, PZERO + 1);
+ selwakeuppri(&fdata->ks_rsel, PZERO);
/* Don't let syscall handlers wait in vain */
while ((tick = fuse_aw_pop(fdata))) {
fuse_lck_mtx_lock(tick->tk_aw_mtx);
@@ -193,7 +195,7 @@ fuse_device_filter(struct cdev *dev, struct knote *kn)
kn->kn_fop = &fuse_device_wfiltops;
error = 0;
} else if (error == 0) {
- error = EINVAL;
+ error = EXTERROR(EINVAL, "Unsupported kevent filter");
kn->kn_data = error;
}
@@ -319,7 +321,7 @@ again:
"we know early on that reader should be kicked so we "
"don't wait for news");
fuse_lck_mtx_unlock(data->ms_mtx);
- return (ENODEV);
+ return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
}
if (!(tick = fuse_ms_pop(data))) {
/* check if we may block */
@@ -331,7 +333,10 @@ again:
err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0);
if (err != 0) {
fuse_lck_mtx_unlock(data->ms_mtx);
- return (fdata_get_dead(data) ? ENODEV : err);
+ if (fdata_get_dead(data))
+ err = EXTERROR(ENODEV,
+ "This FUSE session is about to be closed");
+ return (err);
}
tick = fuse_ms_pop(data);
}
@@ -361,8 +366,8 @@ again:
FUSE_ASSERT_MS_DONE(tick);
fuse_ticket_drop(tick);
}
- return (ENODEV); /* This should make the daemon get off
- * of us */
+ /* This should make the daemon get off of us */
+ return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
}
SDT_PROBE2(fusefs, , device, trace, 1,
"fuse device read message successfully");
@@ -385,7 +390,7 @@ again:
fdata_set_dead(data);
SDT_PROBE2(fusefs, , device, trace, 2,
"daemon is stupid, kick it off...");
- err = ENODEV;
+ err = EXTERROR(ENODEV, "Partial read attempted");
} else {
err = uiomove(buf, buflen, uio);
}
@@ -403,12 +408,14 @@ fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
SDT_PROBE2(fusefs, , device, trace, 1,
"Format error: body size "
"differs from size claimed by header");
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Format error: body size "
+ "differs from size claimed by header"));
}
if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
SDT_PROBE2(fusefs, , device, trace, 1,
"Format error: non zero error but message had a body");
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Format error: non zero error, "
+ "but message had a body"));
}
return (0);
@@ -439,13 +446,12 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
err = devfs_get_cdevpriv((void **)&data);
if (err != 0)
return (err);
- mp = data->mp;
if (uio->uio_resid < sizeof(struct fuse_out_header)) {
SDT_PROBE2(fusefs, , device, trace, 1,
"fuse_device_write got less than a header!");
fdata_set_dead(data);
- return (EINVAL);
+ return (EXTERROR(EINVAL, "fuse_device_write got less than a header!"));
}
if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0)
return (err);
@@ -453,7 +459,7 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
if (data->linux_errnos != 0 && ohead.error != 0) {
err = -ohead.error;
if (err < 0 || err >= nitems(linux_to_bsd_errtbl))
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Unknown Linux errno", err));
/* '-', because it will get flipped again below */
ohead.error = -linux_to_bsd_errtbl[err];
@@ -521,7 +527,7 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
memcpy(&tick->tk_aw_ohead, &ohead,
sizeof(ohead));
tick->tk_aw_handler(tick, uio);
- err = EINVAL;
+ err = EXTERROR(EINVAL, "Unknown errno", ohead.error);
} else {
memcpy(&tick->tk_aw_ohead, &ohead,
sizeof(ohead));
@@ -542,6 +548,13 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
} else if (ohead.unique == 0){
/* unique == 0 means asynchronous notification */
SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
+ mp = data->mp;
+ vfs_ref(mp);
+ err = vfs_busy(mp, 0);
+ vfs_rel(mp);
+ if (err)
+ return (err);
+
switch (ohead.error) {
case FUSE_NOTIFY_INVAL_ENTRY:
err = fuse_internal_invalidate_entry(mp, uio);
@@ -564,8 +577,10 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
/* Unimplemented. See comments in fuse_vnops */
default:
/* Not implemented */
- err = ENOSYS;
+ err = EXTERROR(ENOSYS, "Unimplemented FUSE notification code",
+ ohead.error);
}
+ vfs_unbusy(mp);
} else {
/* no callback at all! */
SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket,
@@ -582,7 +597,7 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
*/
err = 0;
} else {
- err = EINVAL;
+ err = EXTERROR(EINVAL, "FUSE ticket is missing");
}
}
diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
index 88de12d59425..5f5819c2ccae 100644
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -122,7 +122,6 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred)
{
struct mount *mp = vnode_mount(vp);
- struct fuse_data *data = fuse_get_mpdata(mp);
struct fuse_dispatcher fdi;
const struct fuse_open_out default_foo = {
.fh = 0,
@@ -132,12 +131,10 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
struct fuse_open_in *foi = NULL;
const struct fuse_open_out *foo;
fufh_type_t fufh_type;
- int dataflags = data->dataflags;
int err = 0;
int oflags = 0;
int op = FUSE_OPEN;
int relop = FUSE_RELEASE;
- int fsess_no_op_support = FSESS_NO_OPEN_SUPPORT;
fufh_type = fflags_2_fufh_type(a_mode);
oflags = fufh_type_2_fflags(fufh_type);
@@ -145,12 +142,11 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
if (vnode_isdir(vp)) {
op = FUSE_OPENDIR;
relop = FUSE_RELEASEDIR;
- fsess_no_op_support = FSESS_NO_OPENDIR_SUPPORT;
/* vn_open_vnode already rejects FWRITE on directories */
MPASS(fufh_type == FUFH_RDONLY || fufh_type == FUFH_EXEC);
}
fdisp_init(&fdi, sizeof(*foi));
- if (fsess_not_impl(mp, op) && dataflags & fsess_no_op_support) {
+ if (fsess_not_impl(mp, op)) {
/* The operation implicitly succeeds */
foo = &default_foo;
} else {
@@ -160,7 +156,7 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
foi->flags = oflags;
err = fdisp_wait_answ(&fdi);
- if (err == ENOSYS && dataflags & fsess_no_op_support) {
+ if (err == ENOSYS) {
/* The operation implicitly succeeds */
foo = &default_foo;
fsess_set_notimpl(mp, op);
@@ -174,6 +170,7 @@ fuse_filehandle_open(struct vnode *vp, int a_mode,
goto out;
} else {
foo = fdi.answ;
+ fsess_set_impl(mp, op);
}
}
diff --git a/sys/fs/fuse/fuse_file.h b/sys/fs/fuse/fuse_file.h
index 2a90e66d1b23..232132473953 100644
--- a/sys/fs/fuse/fuse_file.h
+++ b/sys/fs/fuse/fuse_file.h
@@ -139,7 +139,7 @@ struct fuse_filehandle {
/*
* flags returned by FUSE_OPEN
- * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE
+ * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE, FOPEN_NOFLUSH
* Unsupported:
* FOPEN_NONSEEKABLE: Adding support would require a new per-file
* or per-vnode attribute, which would have to be checked by
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index 29d88fc942f4..61fe2ed032f6 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -282,12 +282,12 @@ fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
* dirty writes! That's a server bug.
*/
if (fuse_libabi_geq(data, 7, 23)) {
- msg = "writeback cache incoherent!."
+ msg = "writeback cache incoherent! "
"To prevent data corruption, disable "
"the writeback cache according to your "
"FUSE server's documentation.";
} else {
- msg = "writeback cache incoherent!."
+ msg = "writeback cache incoherent! "
"To prevent data corruption, disable "
"the writeback cache by setting "
"vfs.fusefs.data_cache_mode to 0 or 1.";
@@ -979,6 +979,9 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
struct fuse_data *data = tick->tk_data;
struct fuse_init_out *fiio = NULL;
+ if (fdata_get_dead(data))
+ goto out;
+
if ((err = tick->tk_aw_ohead.error)) {
goto out;
}
@@ -1010,10 +1013,6 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
data->dataflags |= FSESS_POSIX_LOCKS;
if (fiio->flags & FUSE_EXPORT_SUPPORT)
data->dataflags |= FSESS_EXPORT_SUPPORT;
- if (fiio->flags & FUSE_NO_OPEN_SUPPORT)
- data->dataflags |= FSESS_NO_OPEN_SUPPORT;
- if (fiio->flags & FUSE_NO_OPENDIR_SUPPORT)
- data->dataflags |= FSESS_NO_OPENDIR_SUPPORT;
/*
* Don't bother to check FUSE_BIG_WRITES, because it's
* redundant with max_write
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 00b348814642..0760d7641c7d 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -932,7 +932,7 @@ fuse_io_invalbuf(struct vnode *vp, struct thread *td)
if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
return EIO;
fvdat->flag |= FN_FLUSHWANT;
- tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
+ tsleep(&fvdat->flag, PRIBIO, "fusevinv", 2 * hz);
error = 0;
if (p != NULL) {
PROC_LOCK(p);
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
index f1f9f801bf4d..0b6048644d32 100644
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -443,11 +443,6 @@ retry:
if (err == EWOULDBLOCK) {
SDT_PROBE2(fusefs, , ipc, trace, 3,
"fticket_wait_answer: EWOULDBLOCK");
-#ifdef XXXIP /* die conditionally */
- if (!fdata_get_dead(data)) {
- fdata_set_dead(data);
- }
-#endif
err = ETIMEDOUT;
fticket_set_answered(ftick);
} else if ((err == EINTR || err == ERESTART)) {
@@ -593,7 +588,7 @@ fdata_set_dead(struct fuse_data *data)
fuse_lck_mtx_lock(data->ms_mtx);
data->dataflags |= FSESS_DEAD;
wakeup_one(data);
- selwakeuppri(&data->ks_rsel, PZERO + 1);
+ selwakeuppri(&data->ks_rsel, PZERO);
wakeup(&data->ticketer);
fuse_lck_mtx_unlock(data->ms_mtx);
FUSE_UNLOCK();
@@ -669,7 +664,7 @@ fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
else
fuse_ms_push(ftick);
wakeup_one(ftick->tk_data);
- selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
+ selwakeuppri(&ftick->tk_data->ks_rsel, PZERO);
KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
}
diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h
index 0ec556138be0..3bfc859dbac9 100644
--- a/sys/fs/fuse/fuse_ipc.h
+++ b/sys/fs/fuse/fuse_ipc.h
@@ -227,8 +227,6 @@ struct fuse_data {
/* (and being observed by the daemon) */
#define FSESS_PUSH_SYMLINKS_IN 0x0020 /* prefix absolute symlinks with mp */
#define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */
-#define FSESS_NO_OPEN_SUPPORT 0x0080 /* can elide FUSE_OPEN ops */
-#define FSESS_NO_OPENDIR_SUPPORT 0x0100 /* can elide FUSE_OPENDIR ops */
#define FSESS_ASYNC_READ 0x1000 /* allow multiple reads of some file */
#define FSESS_POSIX_LOCKS 0x2000 /* daemon supports POSIX locks */
#define FSESS_EXPORT_SUPPORT 0x10000 /* daemon supports NFS-style lookups */
@@ -240,6 +238,8 @@ struct fuse_data {
#define FSESS_WARN_WB_CACHE_INCOHERENT 0x400000 /* WB cache incoherent */
#define FSESS_WARN_ILLEGAL_INODE 0x800000 /* Illegal inode for new file */
#define FSESS_WARN_READLINK_EMBEDDED_NUL 0x1000000 /* corrupt READLINK output */
+#define FSESS_WARN_DOT_LOOKUP 0x2000000 /* Inconsistent . LOOKUP response */
+#define FSESS_WARN_INODE_MISMATCH 0x4000000 /* ino != nodeid */
#define FSESS_MNTOPTS_MASK ( \
FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \
FSESS_DEFAULT_PERMISSIONS | FSESS_INTR)
diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h
index ad93a26adaab..942448b47365 100644
--- a/sys/fs/fuse/fuse_kernel.h
+++ b/sys/fs/fuse/fuse_kernel.h
@@ -161,6 +161,33 @@
* - add FOPEN_CACHE_DIR
* - add FUSE_MAX_PAGES, add max_pages to init_out
* - add FUSE_CACHE_SYMLINKS
+ *
+ * 7.29
+ * - add FUSE_NO_OPENDIR_SUPPORT flag
+ *
+ * 7.30
+ * - add FUSE_EXPLICIT_INVAL_DATA
+ * - add FUSE_IOCTL_COMPAT_X32
+ *
+ * 7.31
+ * - add FUSE_WRITE_KILL_PRIV flag
+ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
+ *
+ * 7.32
+ * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
+ *
+ * 7.33
+ * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
+ * - add FUSE_OPEN_KILL_SUIDGID
+ * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
+ * - add FUSE_SETXATTR_ACL_KILL_SGID
+ *
+ * 7.34
+ * - add FUSE_SYNCFS
+ *
+ * 7.35
+ * - add FOPEN_NOFLUSH
*/
#ifndef _FUSE_FUSE_KERNEL_H
@@ -196,7 +223,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 29
+#define FUSE_KERNEL_MINOR_VERSION 35
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -220,7 +247,7 @@ struct fuse_attr {
uint32_t gid;
uint32_t rdev;
uint32_t blksize;
- uint32_t padding;
+ uint32_t flags;
};
struct fuse_kstatfs {
@@ -257,6 +284,7 @@ struct fuse_file_lock {
#define FATTR_MTIME_NOW (1 << 8)
#define FATTR_LOCKOWNER (1 << 9)
#define FATTR_CTIME (1 << 10)
+#define FATTR_KILL_SUIDGID (1 << 11)
/**
* Flags returned by the OPEN request
@@ -265,11 +293,15 @@ struct fuse_file_lock {
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open
* FOPEN_NONSEEKABLE: the file is not seekable
* FOPEN_CACHE_DIR: allow caching this directory
+ * FOPEN_STREAM: the file is stream-like (no file position at all)
+ * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
#define FOPEN_NONSEEKABLE (1 << 2)
#define FOPEN_CACHE_DIR (1 << 3)
+#define FOPEN_STREAM (1 << 4)
+#define FOPEN_NOFLUSH (1 << 5)
/**
* INIT request/reply flags
@@ -299,6 +331,17 @@ struct fuse_file_lock {
* FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
* FUSE_CACHE_SYMLINKS: cache READLINK responses
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
+ * foffset and moffset fields in struct
+ * fuse_setupmapping_out and fuse_removemapping_one.
+ * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
+ * FUSE_HANDLE_KILLPRIV_V2: fs kills suid/sgid/cap on write/chown/trunc.
+ * Upon write/truncate suid/sgid is only killed if caller
+ * does not have CAP_FSETID. Additionally upon
+ * write/truncate sgid is killed only if file has group
+ * execute permission. (Same as Linux VFS behavior).
+ * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -325,6 +368,11 @@ struct fuse_file_lock {
#define FUSE_MAX_PAGES (1 << 22)
#define FUSE_CACHE_SYMLINKS (1 << 23)
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT (1 << 26)
+#define FUSE_SUBMOUNTS (1 << 27)
+#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
+#define FUSE_SETXATTR_EXT (1 << 29)
#ifdef linux
/**
@@ -356,9 +404,14 @@ struct fuse_file_lock {
*
* FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
* FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits
*/
#define FUSE_WRITE_CACHE (1 << 0)
#define FUSE_WRITE_LOCKOWNER (1 << 1)
+#define FUSE_WRITE_KILL_SUIDGID (1 << 2)
+
+/* Obsolete alias; this flag implies killing suid/sgid only. */
+#define FUSE_WRITE_KILL_PRIV FUSE_WRITE_KILL_SUIDGID
/**
* Read flags
@@ -373,6 +426,7 @@ struct fuse_file_lock {
* FUSE_IOCTL_RETRY: retry with new iovecs
* FUSE_IOCTL_32BIT: 32bit ioctl
* FUSE_IOCTL_DIR: is a directory
+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t)
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
@@ -381,6 +435,7 @@ struct fuse_file_lock {
#define FUSE_IOCTL_RETRY (1 << 2)
#define FUSE_IOCTL_32BIT (1 << 3)
#define FUSE_IOCTL_DIR (1 << 4)
+#define FUSE_IOCTL_COMPAT_X32 (1 << 5)
#define FUSE_IOCTL_MAX_IOV 256
@@ -404,6 +459,25 @@ struct fuse_file_lock {
#define FUSE_FALLOC_FL_KEEP_SIZE 0x1
#define FUSE_FALLOC_FL_PUNCH_HOLE 0x2
+/**
+ * fuse_attr flags
+ *
+ * FUSE_ATTR_SUBMOUNT: Object is a submount root
+ */
+#define FUSE_ATTR_SUBMOUNT (1 << 0)
+
+/**
+ * Open flags
+ * FUSE_OPEN_KILL_SUIDGID: Kill suid and sgid if executable
+ */
+#define FUSE_OPEN_KILL_SUIDGID (1 << 0)
+
+/**
+ * setxattr flags
+ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set
+ */
+#define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0)
+
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
@@ -450,10 +524,16 @@ enum fuse_opcode {
FUSE_RENAME2 = 45,
FUSE_LSEEK = 46,
FUSE_COPY_FILE_RANGE = 47,
+ FUSE_SETUPMAPPING = 48,
+ FUSE_REMOVEMAPPING = 49,
+ FUSE_SYNCFS = 50,
#ifdef linux
/* CUSE specific operations */
CUSE_INIT = 4096,
+ /* Reserved opcodes: helpful to detect structure endian-ness */
+ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */
+ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */
#endif /* linux */
};
@@ -561,14 +641,14 @@ struct fuse_setattr_in {
struct fuse_open_in {
uint32_t flags;
- uint32_t unused;
+ uint32_t open_flags; /* FUSE_OPEN_... */
};
struct fuse_create_in {
uint32_t flags;
uint32_t mode;
uint32_t umask;
- uint32_t padding;
+ uint32_t open_flags; /* FUSE_OPEN_... */
};
struct fuse_open_out {
@@ -630,9 +710,13 @@ struct fuse_fsync_in {
uint32_t padding;
};
+#define FUSE_COMPAT_SETXATTR_IN_SIZE 8
+
struct fuse_setxattr_in {
uint32_t size;
uint32_t flags;
+ uint32_t setxattr_flags;
+ uint32_t padding;
};
struct fuse_listxattr_in {
@@ -692,7 +776,7 @@ struct fuse_init_out {
uint32_t max_write;
uint32_t time_gran;
uint16_t max_pages;
- uint16_t padding;
+ uint16_t map_alignment;
uint32_t unused[8];
};
@@ -863,6 +947,10 @@ struct fuse_notify_retrieve_in {
uint64_t dummy4;
};
+/* Device ioctls: */
+#define FUSE_DEV_IOC_MAGIC 229
+#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
+
struct fuse_lseek_in {
uint64_t fh;
uint64_t offset;
@@ -884,4 +972,38 @@ struct fuse_copy_file_range_in {
uint64_t flags;
};
+#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
+#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
+struct fuse_setupmapping_in {
+ /* An already open handle */
+ uint64_t fh;
+ /* Offset into the file to start the mapping */
+ uint64_t foffset;
+ /* Length of mapping required */
+ uint64_t len;
+ /* Flags, FUSE_SETUPMAPPING_FLAG_* */
+ uint64_t flags;
+ /* Offset in Memory Window */
+ uint64_t moffset;
+};
+
+struct fuse_removemapping_in {
+ /* number of fuse_removemapping_one follows */
+ uint32_t count;
+};
+
+struct fuse_removemapping_one {
+ /* Offset into the dax window start the unmapping */
+ uint64_t moffset;
+ /* Length of mapping required */
+ uint64_t len;
+};
+
+#define FUSE_REMOVEMAPPING_MAX_ENTRY \
+ (PAGE_SIZE / sizeof(struct fuse_removemapping_one))
+
+struct fuse_syncfs_in {
+ uint64_t padding;
+};
+
#endif /* _FUSE_FUSE_KERNEL_H */
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
index 777519450954..742dc66bcafc 100644
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -297,6 +297,8 @@ fuse_vnode_get(struct mount *mp,
__enum_uint8(vtype) vtyp)
{
struct thread *td = curthread;
+ bool exportable = fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT;
+
/*
* feo should only be NULL for the root directory, which (when libfuse
* is used) always has generation 0
@@ -309,6 +311,23 @@ fuse_vnode_get(struct mount *mp,
"Assigned same inode to both parent and child.");
return EIO;
}
+ if (feo && feo->nodeid != feo->attr.ino && exportable) {
+ /*
+ * NFS servers (both kernelspace and userspace) rely on
+ * VFS_VGET to lookup inodes. But that's only possible if the
+ * file's inode number matches its nodeid, which isn't
+ * necessarily the case for FUSE. If they don't match, then we
+ * can complete the current operation, but future VFS_VGET
+ * operations will almost certainly return spurious results.
+ * Warn the operator.
+ *
+ * But only warn the operator if the file system reports
+ * NFS-compatibility, because that's the only time that this
+ * matters, and dumb fuse servers abound.
+ */
+ fuse_warn(fuse_get_mpdata(mp), FSESS_WARN_INODE_MISMATCH,
+ "file has different inode number and nodeid.");
+ }
err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp);
if (err) {
@@ -354,7 +373,7 @@ void
fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td)
{
if (vnode_vtype(vp) == VREG)
- vnode_create_vobject(vp, 0, td);
+ vnode_create_vobject(vp, VNODE_NO_SIZE, td);
}
int
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
index e088f92bf5bf..1b858a988289 100644
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -81,6 +81,8 @@
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/fcntl.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
#include "fuse.h"
#include "fuse_node.h"
@@ -272,7 +274,7 @@ fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
int error;
if (!(fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "NFS-style lookups are not supported"));
error = VFS_VGET(mp, ffhp->nid, LK_EXCLUSIVE, &nvp);
if (error) {
@@ -286,7 +288,7 @@ fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
return (ESTALE);
}
*vpp = nvp;
- vnode_create_vobject(*vpp, 0, curthread);
+ vnode_create_vobject(*vpp, VNODE_NO_SIZE, curthread);
return (0);
}
@@ -321,11 +323,11 @@ fuse_vfsop_mount(struct mount *mp)
opts = mp->mnt_optnew;
if (!opts)
- return EINVAL;
+ return (EXTERROR(EINVAL, "Mount options were not supplied"));
/* `fspath' contains the mount point (eg. /mnt/fuse/sshfs); REQUIRED */
if (!vfs_getopts(opts, "fspath", &err))
- return err;
+ return (EXTERROR(err, "Mount options are missing 'fspath'"));
/*
* With the help of underscored options the mount program
@@ -358,11 +360,12 @@ fuse_vfsop_mount(struct mount *mp)
/* `from' contains the device name (eg. /dev/fuse0); REQUIRED */
fspec = vfs_getopts(opts, "from", &err);
if (!fspec)
- return err;
+ return (EXTERROR(err, "Mount options are missing 'from'"));
/* `fd' contains the filedescriptor for this session; REQUIRED */
if (vfs_scanopt(opts, "fd", "%d", &fd) != 1)
- return EINVAL;
+ return (EXTERROR(EINVAL, "Mount options contain an invalid value "
+ "for 'fd'"));
err = fuse_getdevice(fspec, td, &fdev);
if (err != 0)
@@ -398,11 +401,17 @@ fuse_vfsop_mount(struct mount *mp)
/* Sanity + permission checks */
if (!data->daemoncred)
panic("fuse daemon found, but identity unknown");
- if (mntopts & FSESS_DAEMON_CAN_SPY)
+ if (mntopts & FSESS_DAEMON_CAN_SPY) {
err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER);
- if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid)
+ EXTERROR(err, "FUSE daemon requires privileges "
+ "due to 'allow_other' option");
+ }
+ if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) {
/* are we allowed to do the first mount? */
err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER);
+ EXTERROR(err, "Mounting as a user that is different from the FUSE "
+ "daemon's requires privileges");
+ }
if (err) {
FUSE_UNLOCK();
goto out;
@@ -549,7 +558,7 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
* nullfs mount of a fusefs file system.
*/
SDT_PROBE1(fusefs, , vfsops, invalidate_without_export, mp);
- return (EOPNOTSUPP);
+ return (EXTERROR(EOPNOTSUPP, "NFS-style lookups are not supported"));
}
error = fuse_internal_get_cached_vnode(mp, ino, flags, vpp);
@@ -565,15 +574,28 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
error = fdisp_wait_answ(&fdi);
if (error)
- return error;
+ goto out;
feo = (struct fuse_entry_out *)fdi.answ;
+
if (feo->nodeid == 0) {
/* zero nodeid means ENOENT and cache it */
error = ENOENT;
goto out;
}
+ if (feo->nodeid != nodeid) {
+ /*
+ * Something is very wrong with the server if "foo/." has a
+ * different inode number than "foo".
+ */
+ static const char exterr[] = "Inconsistent LOOKUP response: "
+ "\"FILE/.\" has a different inode number than \"FILE\".";
+ fuse_warn(data, FSESS_WARN_DOT_LOOKUP, exterr);
+ error = EXTERROR(EIO, exterr);
+ goto out;
+ }
+
vtyp = IFTOVT(feo->attr.mode);
error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
if (error)
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 3f8f3322162a..ae28617537fd 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -89,6 +89,8 @@
#include <sys/buf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -289,6 +291,10 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
if (err)
return err;
+ if (fufh->fuse_open_flags & FOPEN_NOFLUSH &&
+ (!fsess_opt_writeback(vnode_mount(vp))))
+ return (0);
+
fdisp_init(&fdi, sizeof(*ffi));
fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
ffi = fdi.indata;
@@ -395,6 +401,9 @@ fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred,
err = fdisp_wait_answ(&fdi);
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_LSEEK);
+ } else if (err == ENXIO) {
+ /* Note: ENXIO means "no more hole/data regions until EOF" */
+ fsess_set_impl(mp, FUSE_LSEEK);
} else if (err == 0) {
fsess_set_impl(mp, FUSE_LSEEK);
flso = fdi.answ;
@@ -432,7 +441,8 @@ fuse_vnop_access(struct vop_access_args *ap)
if (vnode_isvroot(vp)) {
return 0;
}
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (!(data->dataflags & FSESS_INITED)) {
if (vnode_isvroot(vp)) {
@@ -441,7 +451,8 @@ fuse_vnop_access(struct vop_access_args *ap)
return 0;
}
}
- return EBADF;
+ return (EXTERROR(EBADF, "Access denied until FUSE session "
+ "is initialized"));
}
if (vnode_islnk(vp)) {
return 0;
@@ -482,7 +493,8 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
switch(ap->a_op) {
@@ -499,7 +511,7 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
op = FUSE_SETLK;
break;
default:
- return EINVAL;
+ return (EXTERROR(EINVAL, "Unsupported lock flags"));
}
if (!(dataflags & FSESS_POSIX_LOCKS))
@@ -527,14 +539,14 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
size = vattr.va_size;
if (size > OFF_MAX ||
(fl->l_start > 0 && size > OFF_MAX - fl->l_start)) {
- err = EOVERFLOW;
+ err = EXTERROR(EOVERFLOW, "Offset is too large");
goto out;
}
start = size + fl->l_start;
break;
default:
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Unsupported offset type"));
}
err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
@@ -596,15 +608,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
switch (vp->v_type) {
case VFIFO:
return (ESPIPE);
case VLNK:
case VREG:
- if (vfs_isrdonly(mp))
- return (EROFS);
break;
default:
return (ENODEV);
@@ -614,7 +625,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
return (EROFS);
if (fsess_not_impl(mp, FUSE_FALLOCATE))
- return (EINVAL);
+ return (EXTERROR(EINVAL, "This server does not implement "
+ "FUSE_FALLOCATE"));
io.uio_offset = *offset;
io.uio_resid = *len;
@@ -644,13 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_FALLOCATE);
- err = EINVAL;
+ err = EXTERROR(EINVAL, "This server does not implement "
+ "FUSE_ALLOCATE");
} else if (err == EOPNOTSUPP) {
/*
* The file system server does not support FUSE_FALLOCATE with
* the supplied mode for this particular file.
*/
- err = EINVAL;
+ err = EXTERROR(EINVAL, "This file can't be pre-allocated");
} else if (!err) {
*offset += *len;
*len = 0;
@@ -696,7 +709,8 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
int maxrun;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
mp = vnode_mount(vp);
@@ -793,6 +807,9 @@ fuse_vnop_close(struct vop_close_args *ap)
if (fflag & IO_NDELAY)
return 0;
+ if (cred == NULL)
+ cred = td->td_ucred;
+
err = fuse_flush(vp, cred, pid, fflag);
if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
struct vattr vap;
@@ -860,19 +877,21 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
pid_t pid;
int err;
- err = ENOSYS;
if (mp == NULL || mp != vnode_mount(outvp))
- goto fallback;
+ return (EXTERROR(ENOSYS, "Mount points do not match"));
if (incred->cr_uid != outcred->cr_uid)
- goto fallback;
+ return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
+ "support different credentials for infd and outfd"));
if (incred->cr_groups[0] != outcred->cr_groups[0])
- goto fallback;
+ return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
+ "support different credentials for infd and outfd"));
/* Caller busied mp, mnt_data can be safely accessed. */
if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE))
- goto fallback;
+ return (EXTERROR(ENOSYS, "This daemon does not "
+ "implement COPY_FILE_RANGE"));
if (ap->a_fsizetd == NULL)
td = curthread;
@@ -882,7 +901,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE);
if (invp->v_data == NULL || outvp->v_data == NULL) {
- err = EBADF;
+ err = EXTERROR(EBADF, "vnode got reclaimed");
goto unlock;
}
@@ -946,7 +965,6 @@ unlock:
if (err == ENOSYS)
fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE);
-fallback:
/*
* No need to call vn_rlimit_fsizex_res before return, since the uio is
@@ -1014,7 +1032,8 @@ fuse_vnop_create(struct vop_create_args *ap)
int flags;
if (fuse_isdeadfs(dvp))
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
/* FUSE expects sockets to be created with FUSE_MKNOD */
if (vap->va_type == VSOCK)
@@ -1030,7 +1049,7 @@ fuse_vnop_create(struct vop_create_args *ap)
bzero(&fdi, sizeof(fdi));
if (vap->va_type != VREG)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Only regular files can be created"));
if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
/* Fallback to FUSE_MKNOD/FUSE_OPEN */
@@ -1211,8 +1230,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
if (!(dataflags & FSESS_INITED)) {
if (!vnode_isvroot(vp)) {
fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
- err = ENOTCONN;
- return err;
+ return (EXTERROR(ENOTCONN, "FUSE daemon is not "
+ "initialized"));
} else {
goto fake;
}
@@ -1341,10 +1360,11 @@ fuse_vnop_link(struct vop_link_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (vnode_mount(tdvp) != vnode_mount(vp)) {
- return EXDEV;
+ return (EXDEV);
}
/*
@@ -1354,7 +1374,7 @@ fuse_vnop_link(struct vop_link_args *ap)
* validating that nlink does not overflow.
*/
if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
- return EMLINK;
+ return (EMLINK);
fli.oldnodeid = VTOI(vp);
fdisp_init(&fdi, 0);
@@ -1366,12 +1386,13 @@ fuse_vnop_link(struct vop_link_args *ap)
feo = fdi.answ;
if (fli.oldnodeid != feo->nodeid) {
+ static const char exterr[] = "Server assigned wrong inode "
+ "for a hard link.";
struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
- fuse_warn(data, FSESS_WARN_ILLEGAL_INODE,
- "Assigned wrong inode for a hard link.");
+ fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, exterr);
fuse_vnode_clear_attr_cache(vp);
fuse_vnode_clear_attr_cache(tdvp);
- err = EIO;
+ err = EXTERROR(EIO, exterr);
goto out;
}
@@ -1428,8 +1449,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
struct timespec now;
int nameiop = cnp->cn_nameiop;
- int flags = cnp->cn_flags;
- int islastcn = flags & ISLASTCN;
+ bool isdotdot = cnp->cn_flags & ISDOTDOT;
+ bool islastcn = cnp->cn_flags & ISLASTCN;
struct mount *mp = vnode_mount(dvp);
struct fuse_data *data = fuse_get_mpdata(mp);
int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
@@ -1448,7 +1469,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
if (fuse_isdeadfs(dvp)) {
*vpp = NULL;
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (!vnode_isdir(dvp))
return ENOTDIR;
@@ -1462,14 +1484,14 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
return err;
is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
- if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT))
- {
+ if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
/*
* Since the file system doesn't support ".." lookups,
* we have no way to find this entry.
*/
- return ESTALE;
+ return (EXTERROR(ESTALE, "This server does not support "
+ "'..' lookups"));
}
nid = VTOFUD(dvp)->parent_nid;
if (nid == 0)
@@ -1577,7 +1599,7 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
}
} else {
/* Entry was found */
- if (flags & ISDOTDOT) {
+ if (isdotdot) {
struct fuse_lookup_alloc_arg flaa;
flaa.nid = nid;
@@ -1592,11 +1614,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
vref(dvp);
*vpp = dvp;
} else {
+ static const char exterr[] = "Server assigned "
+ "same inode to both parent and child.";
fuse_warn(fuse_get_mpdata(mp),
- FSESS_WARN_ILLEGAL_INODE,
- "Assigned same inode to both parent and "
- "child.");
- err = EIO;
+ FSESS_WARN_ILLEGAL_INODE, exterr);
+ err = EXTERROR(EIO, exterr);
}
} else {
@@ -1684,7 +1706,8 @@ fuse_vnop_mkdir(struct vop_mkdir_args *ap)
struct fuse_mkdir_in fmdi;
if (fuse_isdeadfs(dvp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
fmdi.umask = curthread->td_proc->p_pd->pd_cmask;
@@ -1711,7 +1734,8 @@ fuse_vnop_mknod(struct vop_mknod_args *ap)
struct vattr *vap = ap->a_vap;
if (fuse_isdeadfs(dvp))
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
return fuse_internal_mknod(dvp, vpp, cnp, vap);
}
@@ -1735,11 +1759,13 @@ fuse_vnop_open(struct vop_open_args *ap)
pid_t pid = td->td_proc->p_pid;
if (fuse_isdeadfs(vp))
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
- return (EOPNOTSUPP);
+ return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type",
+ vp->v_type));
if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
- return EINVAL;
+ return (EXTERROR(EINVAL, "Illegal mode", a_mode));
if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
fuse_vnode_open(vp, 0, td);
@@ -1754,6 +1780,9 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap)
{
struct vnode *vp = ap->a_vp;
struct mount *mp;
+ struct fuse_filehandle *fufh;
+ int err;
+ bool closefufh = false;
switch (ap->a_name) {
case _PC_FILESIZEBITS:
@@ -1783,22 +1812,45 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap)
!fsess_not_impl(mp, FUSE_LSEEK)) {
off_t offset = 0;
- /* Issue a FUSE_LSEEK to find out if it's implemented */
- fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred,
- curthread->td_proc->p_pid, &offset, SEEK_DATA);
+ /*
+ * Issue a FUSE_LSEEK to find out if it's supported.
+ * Use SEEK_DATA instead of SEEK_HOLE, because the
+ * latter generally requires sequential scans of file
+ * metadata, which can be slow.
+ */
+ err = fuse_vnop_do_lseek(vp, curthread,
+ curthread->td_ucred, curthread->td_proc->p_pid,
+ &offset, SEEK_DATA);
+ if (err == EBADF) {
+ /*
+ * pathconf() doesn't necessarily open the
+ * file. So we may need to do it here.
+ */
+ err = fuse_filehandle_open(vp, FREAD, &fufh,
+ curthread, curthread->td_ucred);
+ if (err == 0) {
+ closefufh = true;
+ err = fuse_vnop_do_lseek(vp, curthread,
+ curthread->td_ucred,
+ curthread->td_proc->p_pid, &offset,
+ SEEK_DATA);
+ }
+ if (closefufh)
+ fuse_filehandle_close(vp, fufh,
+ curthread, curthread->td_ucred);
+ }
+
}
if (fsess_is_impl(mp, FUSE_LSEEK)) {
*ap->a_retval = 1;
return (0);
+ } else if (fsess_not_impl(mp, FUSE_LSEEK)) {
+ /* FUSE_LSEEK is not implemented */
+ return (EXTERROR(EINVAL, "This server does not "
+ "implement FUSE_LSEEK"));
} else {
- /*
- * Probably FUSE_LSEEK is not implemented. It might
- * be, if the FUSE_LSEEK above returned an error like
- * EACCES, but in that case we can't tell, so it's
- * safest to report EINVAL anyway.
- */
- return (EINVAL);
+ return (err);
}
default:
return (vop_stdpathconf(ap));
@@ -1830,7 +1882,8 @@ fuse_vnop_read(struct vop_read_args *ap)
MPASS(vp->v_type == VREG || vp->v_type == VDIR);
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
@@ -1907,20 +1960,18 @@ fuse_vnop_readdir(struct vop_readdir_args *ap)
if (ap->a_eofflag)
*ap->a_eofflag = 0;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
- }
- if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */
- (uio_resid(uio) < sizeof(struct dirent))) {
- return EINVAL;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
+ if (uio_resid(uio) < sizeof(struct dirent))
+ return (EXTERROR(EINVAL, "Buffer is too small"));
tresid = uio->uio_resid;
err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) {
- KASSERT(fuse_get_mpdata(mp)->dataflags
- & FSESS_NO_OPENDIR_SUPPORT,
- ("FUSE file systems that don't set "
- "FUSE_NO_OPENDIR_SUPPORT should not be exported"));
+ KASSERT(!fsess_is_impl(mp, FUSE_OPENDIR),
+ ("FUSE file systems that implement "
+ "FUSE_OPENDIR should not be exported"));
/*
* nfsd will do VOP_READDIR without first doing VOP_OPEN. We
* must implicitly open the directory here.
@@ -1983,7 +2034,8 @@ fuse_vnop_readlink(struct vop_readlink_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (!vnode_islnk(vp)) {
return EINVAL;
@@ -1994,10 +2046,11 @@ fuse_vnop_readlink(struct vop_readlink_args *ap)
goto out;
}
if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) {
+ static const char exterr[] = "Server returned an embedded NUL "
+ "from FUSE_READLINK.";
struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
- fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL,
- "Returned an embedded NUL from FUSE_READLINK.");
- err = EIO;
+ fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, exterr);
+ err = EXTERROR(EIO, exterr);
goto out;
}
if (((char *)fdi.answ)[0] == '/' &&
@@ -2081,10 +2134,11 @@ fuse_vnop_remove(struct vop_remove_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (vnode_isdir(vp)) {
- return EPERM;
+ return (EXTERROR(EPERM, "vnode is a directory"));
}
err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
@@ -2117,12 +2171,13 @@ fuse_vnop_rename(struct vop_rename_args *ap)
int err = 0;
if (fuse_isdeadfs(fdvp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (fvp->v_mount != tdvp->v_mount ||
(tvp && fvp->v_mount != tvp->v_mount)) {
SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
- err = EXDEV;
+ err = EXTERROR(EXDEV, "Cross-device rename");
goto out;
}
cache_purge(fvp);
@@ -2193,10 +2248,12 @@ fuse_vnop_rmdir(struct vop_rmdir_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (VTOFUD(vp) == VTOFUD(dvp)) {
- return EINVAL;
+ return (EXTERROR(EINVAL, "Directory to be removed "
+ "contains itself"));
}
err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
@@ -2233,7 +2290,8 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (vap->va_uid != (uid_t)VNOVAL) {
@@ -2248,19 +2306,15 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
return (err2);
if (vap->va_uid != old_va.va_uid)
return err;
- else
- accmode |= VADMIN;
drop_suid = true;
- } else
- accmode |= VADMIN;
- } else
- accmode |= VADMIN;
+ }
+ }
+ accmode |= VADMIN;
}
if (vap->va_gid != (gid_t)VNOVAL) {
if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
drop_suid = true;
- if (checkperm && !groupmember(vap->va_gid, cred))
- {
+ if (checkperm && !groupmember(vap->va_gid, cred)) {
/*
* Non-root users may only chgrp to one of their own
* groups
@@ -2274,11 +2328,9 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
return (err2);
if (vap->va_gid != old_va.va_gid)
return err;
- accmode |= VADMIN;
- } else
- accmode |= VADMIN;
- } else
- accmode |= VADMIN;
+ }
+ }
+ accmode |= VADMIN;
}
if (vap->va_size != VNOVAL) {
switch (vp->v_type) {
@@ -2404,7 +2456,8 @@ fuse_vnop_symlink(struct vop_symlink_args *ap)
size_t len;
if (fuse_isdeadfs(dvp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
/*
* Unlike the other creator type calls, here we have to create a message
@@ -2450,7 +2503,8 @@ fuse_vnop_write(struct vop_write_args *ap)
MPASS(vp->v_type == VREG || vp->v_type == VDIR);
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
@@ -2603,10 +2657,12 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_GETXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "extended attributes"));
err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
if (err)
@@ -2644,7 +2700,8 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
if (err != 0) {
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_GETXATTR);
- err = EOPNOTSUPP;
+ err = (EXTERROR(EOPNOTSUPP, "This server does not "
+ "implement extended attributes"));
}
goto out;
}
@@ -2683,16 +2740,19 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
struct mount *mp = vnode_mount(vp);
struct thread *td = ap->a_td;
struct ucred *cred = ap->a_cred;
+ size_t struct_size = FUSE_COMPAT_SETXATTR_IN_SIZE;
char *prefix;
size_t len;
char *attr_str;
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_SETXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "setting extended attributes"));
if (vfs_isrdonly(mp))
return EROFS;
@@ -2704,9 +2764,11 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
* return EOPNOTSUPP.
*/
if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
- return (EOPNOTSUPP);
+ return (EXTERROR(EOPNOTSUPP, "This server does not "
+ "implement removing extended attributess"));
else
- return (EINVAL);
+ return (EXTERROR(EINVAL, "DELETEEXTATTR should be used "
+ "to remove extattrs"));
}
err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
@@ -2723,17 +2785,26 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
len = strlen(prefix) + sizeof(extattr_namespace_separator) +
strlen(ap->a_name) + 1;
- fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
+ /* older FUSE servers use a smaller fuse_setxattr_in struct*/
+ if (fuse_libabi_geq(fuse_get_mpdata(mp), 7, 33))
+ struct_size = sizeof(*set_xattr_in);
+
+ fdisp_init(&fdi, len + struct_size + uio->uio_resid);
fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
set_xattr_in = fdi.indata;
set_xattr_in->size = uio->uio_resid;
- attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
+ if (fuse_libabi_geq(fuse_get_mpdata(mp), 7, 33)) {
+ set_xattr_in->setxattr_flags = 0;
+ set_xattr_in->padding = 0;
+ }
+
+ attr_str = (char *)fdi.indata + struct_size;
snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
ap->a_name);
- err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
+ err = uiomove((char *)fdi.indata + struct_size + len,
uio->uio_resid, uio);
if (err != 0) {
goto out;
@@ -2743,7 +2814,8 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_SETXATTR);
- err = EOPNOTSUPP;
+ err = EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "setting extended attributes");
}
if (err == ERESTART) {
/* Can't restart after calling uiomove */
@@ -2854,10 +2926,12 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_LISTXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "extended attributes"));
err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
if (err)
@@ -2885,7 +2959,8 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
if (err != 0) {
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_LISTXATTR);
- err = EOPNOTSUPP;
+ err = EXTERROR(EOPNOTSUPP, "This server does not "
+ "implement extended attributes");
}
goto out;
}
@@ -2985,7 +3060,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap)
bool closefufh = false;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (vfs_isrdonly(mp))
return (EROFS);
@@ -3053,8 +3129,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap)
false);
}
-out:
fdisp_destroy(&fdi);
+out:
if (closefufh)
fuse_filehandle_close(vp, fufh, curthread, cred);
@@ -3091,10 +3167,12 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "removing extended attributes"));
if (vfs_isrdonly(mp))
return EROFS;
@@ -3123,7 +3201,8 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
err = fdisp_wait_answ(&fdi);
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
- err = EOPNOTSUPP;
+ err = EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "removing extended attributes");
}
fdisp_destroy(&fdi);
@@ -3177,25 +3256,27 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
/* NFS requires lookups for "." and ".." */
SDT_PROBE2(fusefs, , vnops, trace, 1,
"VOP_VPTOFH without FUSE_EXPORT_SUPPORT");
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server is "
+ "missing FUSE_EXPORT_SUPPORT"));
}
if ((mp->mnt_flag & MNT_EXPORTED) &&
- !(data->dataflags & FSESS_NO_OPENDIR_SUPPORT))
+ fsess_is_impl(mp, FUSE_OPENDIR))
{
/*
* NFS is stateless, so nfsd must reopen a directory on every
* call to VOP_READDIR, passing in the d_off field from the
- * final dirent of the previous invocation. But without
- * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not
+ * final dirent of the previous invocation. But if the server
+ * implements FUSE_OPENDIR, the FUSE protocol does not
* guarantee that d_off will be valid after a directory is
* closed and reopened. So prohibit exporting FUSE file
- * systems that don't set that flag.
+ * systems that implement FUSE_OPENDIR.
*
* But userspace NFS servers don't have this problem.
*/
SDT_PROBE2(fusefs, , vnops, trace, 1,
- "VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT");
- return EOPNOTSUPP;
+ "VOP_VPTOFH with FUSE_OPENDIR");
+ return (EXTERROR(EOPNOTSUPP, "This server implements "
+ "FUSE_OPENDIR so is not compatible with getfh"));
}
err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
@@ -3209,6 +3290,7 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
if (fvdat->generation <= UINT32_MAX)
fhp->gen = fvdat->generation;
else
- return EOVERFLOW;
+ return (EXTERROR(EOVERFLOW, "inode generation "
+ "number overflow"));
return (0);
}
diff --git a/sys/fs/msdosfs/denode.h b/sys/fs/msdosfs/denode.h
index 0d31b0583fa6..e6928fb46052 100644
--- a/sys/fs/msdosfs/denode.h
+++ b/sys/fs/msdosfs/denode.h
@@ -212,7 +212,7 @@ struct denode {
((dep)->de_Attributes & ATTR_DIRECTORY) ? 0 : (dep)->de_FileSize), \
putushort((dp)->deHighClust, (dep)->de_StartCluster >> 16))
-#if defined(_KERNEL) || defined(MAKEFS)
+#if defined(_KERNEL) || defined(_WANT_MSDOSFS_INTERNALS)
#define VTODE(vp) ((struct denode *)(vp)->v_data)
#define DETOV(de) ((de)->de_vnode)
@@ -294,5 +294,5 @@ int removede(struct denode *pdep, struct denode *dep);
int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred);
int doscheckpath( struct denode *source, struct denode *target,
daddr_t *wait_scn);
-#endif /* _KERNEL || MAKEFS */
+#endif /* _KERNEL || _WANT_MSDOSFS_INTERNALS */
#endif /* !_FS_MSDOSFS_DENODE_H_ */
diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h
index a88bfb94e91d..344cd5a9416d 100644
--- a/sys/fs/msdosfs/fat.h
+++ b/sys/fs/msdosfs/fat.h
@@ -81,7 +81,7 @@
#define MSDOSFSEOF(pmp, cn) ((((cn) | ~(pmp)->pm_fatmask) & CLUST_EOFS) == CLUST_EOFS)
-#if defined (_KERNEL) || defined(MAKEFS)
+#if defined (_KERNEL) || defined(_WANT_MSDOSFS_INTERNALS)
/*
* These are the values for the function argument to the function
* fatentry().
@@ -110,5 +110,5 @@ markvoldirty(struct msdosfsmount *pmp, bool dirty)
return (markvoldirty_upgrade(pmp, dirty, false));
}
-#endif /* _KERNEL || MAKEFS */
+#endif /* _KERNEL || _WANT_MSDOSFS_INTERNALS */
#endif /* !_FS_MSDOSFS_FAT_H_ */
diff --git a/sys/fs/msdosfs/msdosfs_conv.c b/sys/fs/msdosfs/msdosfs_conv.c
index da4848169173..208b64930e61 100644
--- a/sys/fs/msdosfs/msdosfs_conv.c
+++ b/sys/fs/msdosfs/msdosfs_conv.c
@@ -797,19 +797,24 @@ mbsadjpos(const char **instr, size_t inlen, size_t outlen, int weight, int flag,
static u_char *
dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
{
- u_char c, *outp;
- size_t len, olen;
+ u_char c, *outp, *outp1;
+ size_t i, len, olen;
outp = outbuf;
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
olen = len = 4;
+ outp1 = outp;
if (lower & (LCASE_BASE | LCASE_EXT))
msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr,
ilen, (char **)&outp, &olen, KICONV_LOWER);
else
msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr,
ilen, (char **)&outp, &olen);
+ for (i = 0; i < outp - outp1; i++) {
+ if (outp1[i] == '/')
+ outp1[i] = '?';
+ }
len -= olen;
/*
@@ -826,6 +831,8 @@ dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struc
c = dos2unix[c];
if (lower & (LCASE_BASE | LCASE_EXT))
c = u2l[c];
+ if (c == '/')
+ c = '?';
*outp++ = c;
outbuf[1] = '\0';
}
diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c
index 2a90339d0878..8ab6d35a2685 100644
--- a/sys/fs/msdosfs/msdosfs_lookup.c
+++ b/sys/fs/msdosfs/msdosfs_lookup.c
@@ -198,7 +198,9 @@ msdosfs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname
switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename,
cnp->cn_namelen, 0, pmp)) {
case 0:
- return (EINVAL);
+ if (nameiop == CREATE || nameiop == RENAME)
+ return (EINVAL);
+ return (ENOENT);
case 1:
break;
case 2:
@@ -843,7 +845,6 @@ doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn)
*wait_scn = 0;
pmp = target->de_pmp;
- lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED);
KASSERT(pmp == source->de_pmp,
("doscheckpath: source and target on different filesystems"));
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
index 258c701bd300..4431d36c8a8e 100644
--- a/sys/fs/msdosfs/msdosfs_vfsops.c
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -575,7 +575,6 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp)
pmp->pm_bo = bo;
lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0);
- lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0);
TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp);
@@ -722,7 +721,9 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp)
}
}
- clusters = (pmp->pm_fatsize / pmp->pm_fatmult) * pmp->pm_fatdiv ;
+ clusters = (pmp->pm_fatsize / pmp->pm_fatmult) * pmp->pm_fatdiv;
+ if (clusters >= (CLUST_RSRVD & pmp->pm_fatmask))
+ clusters = CLUST_RSRVD & pmp->pm_fatmask;
if (pmp->pm_maxcluster >= clusters) {
#ifdef MSDOSFS_DEBUG
printf("Warning: number of clusters (%ld) exceeds FAT "
@@ -869,7 +870,6 @@ error_exit:
}
if (pmp != NULL) {
lockdestroy(&pmp->pm_fatlock);
- lockdestroy(&pmp->pm_checkpath_lock);
free(pmp->pm_inusemap, M_MSDOSFSFAT);
free(pmp, M_MSDOSFSMNT);
mp->mnt_data = NULL;
@@ -969,7 +969,6 @@ msdosfs_unmount(struct mount *mp, int mntflags)
dev_rel(pmp->pm_dev);
free(pmp->pm_inusemap, M_MSDOSFSFAT);
lockdestroy(&pmp->pm_fatlock);
- lockdestroy(&pmp->pm_checkpath_lock);
free(pmp, M_MSDOSFSMNT);
mp->mnt_data = NULL;
return (error);
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 078ea5e52312..33e0d94954d7 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -945,7 +945,7 @@ msdosfs_rename(struct vop_rename_args *ap)
struct denode *fdip, *fip, *tdip, *tip, *nip;
u_char toname[12], oldname[11];
u_long to_diroffset;
- bool checkpath_locked, doingdirectory, newparent;
+ bool doingdirectory, newparent;
int error;
u_long cn, pcl, blkoff;
daddr_t bn, wait_scn, scn;
@@ -986,8 +986,6 @@ msdosfs_rename(struct vop_rename_args *ap)
if (tvp != NULL && tvp != tdvp)
VOP_UNLOCK(tvp);
- checkpath_locked = false;
-
relock:
doingdirectory = newparent = false;
@@ -1108,12 +1106,8 @@ relock:
if (doingdirectory && newparent) {
if (error != 0) /* write access check above */
goto unlock;
- lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL);
- checkpath_locked = true;
error = doscheckpath(fip, tdip, &wait_scn);
if (wait_scn != 0) {
- lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
- checkpath_locked = false;
VOP_UNLOCK(fdvp);
VOP_UNLOCK(tdvp);
VOP_UNLOCK(fvp);
@@ -1276,8 +1270,6 @@ relock:
cache_purge(fvp);
unlock:
- if (checkpath_locked)
- lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
vput(fdvp);
vput(fvp);
if (tvp != NULL) {
@@ -1289,7 +1281,6 @@ unlock:
vput(tdvp);
return (error);
releout:
- MPASS(!checkpath_locked);
vrele(tdvp);
if (tvp != NULL)
vrele(tvp);
@@ -1530,6 +1521,9 @@ msdosfs_readdir(struct vop_readdir_args *ap)
ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
#endif
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 0;
+
/*
* msdosfs_readdir() won't operate properly on regular files since
* it does i/o only with the filesystem vnode, and hence can
@@ -1623,8 +1617,11 @@ msdosfs_readdir(struct vop_readdir_args *ap)
on = (offset - bias) & pmp->pm_crbomask;
n = min(pmp->pm_bpcluster - on, uio->uio_resid);
diff = dep->de_FileSize - (offset - bias);
- if (diff <= 0)
- break;
+ if (diff <= 0) {
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 1;
+ goto out;
+ }
n = min(n, diff);
error = pcbmap(dep, lbn, &bn, &cn, &blsize);
if (error)
@@ -1655,6 +1652,8 @@ msdosfs_readdir(struct vop_readdir_args *ap)
*/
if (dentp->deName[0] == SLOT_EMPTY) {
brelse(bp);
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 1;
goto out;
}
/*
@@ -1752,15 +1751,6 @@ out:
uio->uio_offset = off;
- /*
- * Set the eofflag (NFS uses it)
- */
- if (ap->a_eofflag) {
- if (dep->de_FileSize - (offset - bias) <= 0)
- *ap->a_eofflag = 1;
- else
- *ap->a_eofflag = 0;
- }
return (error);
}
@@ -1951,6 +1941,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap)
case _PC_NO_TRUNC:
*ap->a_retval = 0;
return (0);
+ case _PC_HAS_HIDDENSYSTEM:
+ *ap->a_retval = 1;
+ return (0);
default:
return (vop_stdpathconf(ap));
}
@@ -1962,6 +1955,8 @@ msdosfs_vptofh(struct vop_vptofh_args *ap)
{
struct denode *dep;
struct defid *defhp;
+ _Static_assert(sizeof(struct defid) <= sizeof(struct fid),
+ "struct defid cannot be larger than struct fid");
dep = VTODE(ap->a_vp);
defhp = (struct defid *)ap->a_fhp;
diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h
index 8f15bc2eaf42..04e6b75bea2a 100644
--- a/sys/fs/msdosfs/msdosfsmount.h
+++ b/sys/fs/msdosfs/msdosfsmount.h
@@ -52,14 +52,17 @@
#ifndef _MSDOSFS_MSDOSFSMOUNT_H_
#define _MSDOSFS_MSDOSFSMOUNT_H_
-#if defined (_KERNEL) || defined(MAKEFS)
+#if defined(_KERNEL) || defined(_WANT_MSDOSFS_INTERNALS)
#include <sys/types.h>
-#ifndef MAKEFS
+#ifdef _KERNEL
#include <sys/lock.h>
#include <sys/lockmgr.h>
-#include <sys/_task.h>
+#else
+#include <sys/_lock.h>
+#include <sys/_lockmgr.h>
#endif
+#include <sys/_task.h>
#include <sys/tree.h>
#ifdef MALLOC_DECLARE
@@ -114,11 +117,8 @@ struct msdosfsmount {
void *pm_w2u; /* Unicode->Local iconv handle */
void *pm_u2d; /* Unicode->DOS iconv handle */
void *pm_d2u; /* DOS->Local iconv handle */
-#ifndef MAKEFS
struct lock pm_fatlock; /* lockmgr protecting allocations */
- struct lock pm_checkpath_lock; /* protects doscheckpath result */
struct task pm_rw2ro_task; /* context for emergency remount ro */
-#endif
};
/*
@@ -245,9 +245,9 @@ struct msdosfs_fileno {
#define MSDOSFS_ASSERT_MP_LOCKED(pmp) \
lockmgr_assert(&(pmp)->pm_fatlock, KA_XLOCKED)
-#endif /* _KERNEL || MAKEFS */
+#endif /* _KERNEL || _WANT_MSDOSFS_INTERNALS */
-#ifndef MAKEFS
+#ifdef _KERNEL
/*
* Arguments to mount MSDOS filesystems.
*/
@@ -265,7 +265,7 @@ struct msdosfs_args {
char *cs_local; /* Local Charset */
mode_t dirmask; /* dir mask to be applied for msdosfs perms */
};
-#endif /* MAKEFS */
+#endif /* _KERNEL */
/*
* Msdosfs mount options:
diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h
index 9b09520b3257..e6a125b388a8 100644
--- a/sys/fs/nfs/nfs.h
+++ b/sys/fs/nfs/nfs.h
@@ -865,6 +865,8 @@ struct nfsslot {
/* Enumerated type for nfsuserd state. */
typedef enum { NOTRUNNING=0, STARTSTOP=1, RUNNING=2 } nfsuserd_state;
+typedef enum { UNKNOWN=0, DELETED=1, NLINK_ZERO=2, VALID=3 } nfsremove_status;
+
#endif /* _KERNEL */
#endif /* _NFS_NFS_H */
diff --git a/sys/fs/nfs/nfs_commonacl.c b/sys/fs/nfs/nfs_commonacl.c
index 55e6f89dd8ec..bba1d8821a9b 100644
--- a/sys/fs/nfs/nfs_commonacl.c
+++ b/sys/fs/nfs/nfs_commonacl.c
@@ -65,7 +65,7 @@ nfsrv_dissectace(struct nfsrv_descript *nd, struct acl_entry *acep,
goto nfsmout;
} else if (len == 0) {
/* Netapp filers return a 0 length who for nil users */
- acep->ae_tag = ACL_UNDEFINED_TAG;
+ acep->ae_tag = ACL_EVERYONE; /* Avoid panics. */
acep->ae_id = ACL_UNDEFINED_ID;
acep->ae_perm = (acl_perm_t)0;
acep->ae_entry_type = ACL_ENTRY_TYPE_DENY;
@@ -352,32 +352,7 @@ nfsrv_buildace(struct nfsrv_descript *nd, u_char *name, int namelen,
if (ace->ae_perm & ACL_SYNCHRONIZE)
acemask |= NFSV4ACE_SYNCHRONIZE;
} else {
- if (ace->ae_perm & ACL_READ_DATA)
- acemask |= NFSV4ACE_READDATA;
- if (ace->ae_perm & ACL_WRITE_DATA)
- acemask |= NFSV4ACE_WRITEDATA;
- if (ace->ae_perm & ACL_APPEND_DATA)
- acemask |= NFSV4ACE_APPENDDATA;
- if (ace->ae_perm & ACL_READ_NAMED_ATTRS)
- acemask |= NFSV4ACE_READNAMEDATTR;
- if (ace->ae_perm & ACL_WRITE_NAMED_ATTRS)
- acemask |= NFSV4ACE_WRITENAMEDATTR;
- if (ace->ae_perm & ACL_EXECUTE)
- acemask |= NFSV4ACE_EXECUTE;
- if (ace->ae_perm & ACL_READ_ATTRIBUTES)
- acemask |= NFSV4ACE_READATTRIBUTES;
- if (ace->ae_perm & ACL_WRITE_ATTRIBUTES)
- acemask |= NFSV4ACE_WRITEATTRIBUTES;
- if (ace->ae_perm & ACL_DELETE)
- acemask |= NFSV4ACE_DELETE;
- if (ace->ae_perm & ACL_READ_ACL)
- acemask |= NFSV4ACE_READACL;
- if (ace->ae_perm & ACL_WRITE_ACL)
- acemask |= NFSV4ACE_WRITEACL;
- if (ace->ae_perm & ACL_WRITE_OWNER)
- acemask |= NFSV4ACE_WRITEOWNER;
- if (ace->ae_perm & ACL_SYNCHRONIZE)
- acemask |= NFSV4ACE_SYNCHRONIZE;
+ acemask = nfs_aceperm(ace->ae_perm);
}
*tl++ = txdr_unsigned(acemask);
*tl++ = txdr_unsigned(namelen);
@@ -388,6 +363,43 @@ nfsrv_buildace(struct nfsrv_descript *nd, u_char *name, int namelen,
}
/*
+ * Convert ae_perm to NFSv4 ACL acemask4 for regular files.
+ */
+uint32_t
+nfs_aceperm(acl_perm_t ae_perm)
+{
+ uint32_t acemask = 0x0;
+
+ if (ae_perm & ACL_READ_DATA)
+ acemask |= NFSV4ACE_READDATA;
+ if (ae_perm & ACL_WRITE_DATA)
+ acemask |= NFSV4ACE_WRITEDATA;
+ if (ae_perm & ACL_APPEND_DATA)
+ acemask |= NFSV4ACE_APPENDDATA;
+ if (ae_perm & ACL_READ_NAMED_ATTRS)
+ acemask |= NFSV4ACE_READNAMEDATTR;
+ if (ae_perm & ACL_WRITE_NAMED_ATTRS)
+ acemask |= NFSV4ACE_WRITENAMEDATTR;
+ if (ae_perm & ACL_EXECUTE)
+ acemask |= NFSV4ACE_EXECUTE;
+ if (ae_perm & ACL_READ_ATTRIBUTES)
+ acemask |= NFSV4ACE_READATTRIBUTES;
+ if (ae_perm & ACL_WRITE_ATTRIBUTES)
+ acemask |= NFSV4ACE_WRITEATTRIBUTES;
+ if (ae_perm & ACL_DELETE)
+ acemask |= NFSV4ACE_DELETE;
+ if (ae_perm & ACL_READ_ACL)
+ acemask |= NFSV4ACE_READACL;
+ if (ae_perm & ACL_WRITE_ACL)
+ acemask |= NFSV4ACE_WRITEACL;
+ if (ae_perm & ACL_WRITE_OWNER)
+ acemask |= NFSV4ACE_WRITEOWNER;
+ if (ae_perm & ACL_SYNCHRONIZE)
+ acemask |= NFSV4ACE_SYNCHRONIZE;
+ return (acemask);
+}
+
+/*
* Build an NFSv4 ACL.
*/
int
diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c
index e5c658ce76d2..0ae3b94bef89 100644
--- a/sys/fs/nfs/nfs_commonkrpc.c
+++ b/sys/fs/nfs/nfs_commonkrpc.c
@@ -670,7 +670,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
{
- uint32_t retseq, retval, slotseq, *tl;
+ uint32_t retseq, retval, retval0, slotseq, *tl;
int i = 0, j = 0, opcnt, set_sigset = 0, slot;
int error = 0, usegssname = 0, secflavour = AUTH_SYS;
int freeslot, maxslot, reterr, slotpos, timeo;
@@ -1039,7 +1039,7 @@ tryagain:
sep->nfsess_badslots |= (0x1ULL << nd->nd_slotid);
mtx_unlock(&sep->nfsess_mtx);
/* And free the slot. */
- nfsv4_freeslot(sep, nd->nd_slotid, false);
+ nfsv4_freeslot(sep, nd->nd_slotid, true);
}
if (stat == RPC_INTR)
error = EINTR;
@@ -1192,15 +1192,22 @@ tryagain:
if (retseq != sep->nfsess_slotseq[slot])
printf("retseq diff 0x%x\n",
retseq);
- retval = fxdr_unsigned(uint32_t, *++tl);
+ retval0 = fxdr_unsigned(uint32_t,*tl++);
+ retval = fxdr_unsigned(uint32_t, *tl);
if ((retval + 1) < sep->nfsess_foreslots
- )
+ ) {
sep->nfsess_foreslots = (retval
+ 1);
- else if ((retval + 1) >
- sep->nfsess_foreslots)
- sep->nfsess_foreslots = (retval
- < 64) ? (retval + 1) : 64;
+ nfs_resetslots(sep);
+ } else if ((retval + 1) >
+ sep->nfsess_foreslots) {
+ if (retval0 > retval)
+ printf("Sess:highest > "
+ "target_highest\n");
+ sep->nfsess_foreslots =
+ (retval < NFSV4_SLOTS) ?
+ (retval + 1) : NFSV4_SLOTS;
+ }
}
mtx_unlock(&sep->nfsess_mtx);
@@ -1464,6 +1471,25 @@ nfsmout:
}
/*
+ * Reset slots above nfsess_foreslots that are not busy.
+ */
+void
+nfs_resetslots(struct nfsclsession *sep)
+{
+ int i;
+ uint64_t bitval;
+
+ mtx_assert(&sep->nfsess_mtx, MA_OWNED);
+ bitval = (1 << sep->nfsess_foreslots);
+ for (i = sep->nfsess_foreslots; i < NFSV4_SLOTS; i++) {
+ if ((sep->nfsess_slots & bitval) == 0 &&
+ (sep->nfsess_badslots & bitval) == 0)
+ sep->nfsess_slotseq[i] = 0;
+ bitval <<= 1;
+ }
+}
+
+/*
* Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
* wait for all requests to complete. This is used by forced unmounts
* to terminate any outstanding RPCs.
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index 2db9af5b9ea9..0c94f4e7dc52 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -258,7 +258,8 @@ newnfs_copycred(struct nfscred *nfscr, struct ucred *cr)
KASSERT(nfscr->nfsc_ngroups >= 0,
("newnfs_copycred: negative nfsc_ngroups"));
cr->cr_uid = nfscr->nfsc_uid;
- crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups);
+ crsetgroups_fallback(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups,
+ GID_NOGROUP);
}
/*
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index 3c9af40253ad..a957315aaa12 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -135,7 +135,7 @@ struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = {
{ 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookupp */
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* NVerify */
{ 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Open */
- { 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenAttr */
+ { 1, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* OpenAttr */
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenConfirm */
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenDowngrade */
{ 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutFH */
@@ -219,18 +219,19 @@ NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL;
static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 1, 0, 0, 0, 0, 0 };
+ 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 };
/* local functions */
static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep);
static void nfsv4_wanted(struct nfsv4lock *lp);
static uint32_t nfsv4_filesavail(struct statfs *, struct mount *);
-static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len);
static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name);
static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser);
static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **,
int *, int *);
static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *);
+static uint32_t vtonfsv4_type(struct vattr *);
+static __enum_uint8(vtype) nfsv4tov_type(uint32_t, uint16_t *);
static struct {
int op;
@@ -250,10 +251,10 @@ static struct {
{ NFSV4OP_CREATE, 5, "Create", 6, },
{ NFSV4OP_CREATE, 1, "Create", 6, },
{ NFSV4OP_CREATE, 3, "Create", 6, },
+ { NFSV4OP_REMOVE, 3, "Remove", 6, },
{ NFSV4OP_REMOVE, 1, "Remove", 6, },
- { NFSV4OP_REMOVE, 1, "Remove", 6, },
- { NFSV4OP_SAVEFH, 5, "Rename", 6, },
- { NFSV4OP_SAVEFH, 4, "Link", 4, },
+ { NFSV4OP_SAVEFH, 7, "Rename", 6, },
+ { NFSV4OP_SAVEFH, 6, "Link", 4, },
{ NFSV4OP_READDIR, 2, "Readdir", 7, },
{ NFSV4OP_READDIR, 2, "Readdir", 7, },
{ NFSV4OP_GETATTR, 1, "Getattr", 7, },
@@ -308,6 +309,7 @@ static struct {
{ NFSV4OP_DEALLOCATE, 2, "Deallocate", 10, },
{ NFSV4OP_LAYOUTERROR, 1, "LayoutError", 11, },
{ NFSV4OP_VERIFY, 3, "AppendWrite", 11, },
+ { NFSV4OP_OPENATTR, 3, "OpenAttr", 8, },
};
/*
@@ -317,7 +319,7 @@ static int nfs_bigrequest[NFSV42_NPROCS] = {
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
- 0, 1
+ 0, 1, 0
};
/*
@@ -610,32 +612,43 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap,
break;
case ND_NFSV4:
NFSZERO_ATTRBIT(&attrbits);
- if (vap->va_mode != (mode_t)VNOVAL)
- NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE);
+ np = NULL;
+ if (strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0)
+ np = VTONFS(vp);
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ if ((flags & NFSSATTR_NEWFILE) != 0 && np != NULL &&
+ NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+ NFSATTRBIT_MODEUMASK))
+ NFSSETBIT_ATTRBIT(&attrbits,
+ NFSATTRBIT_MODEUMASK);
+ else
+ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE);
+ }
if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL)
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL)
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP);
if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL)
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
+ if ((flags & NFSSATTR_FULL) && vap->va_flags != VNOVAL) {
+ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN);
+ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM);
+ }
if (vap->va_atime.tv_sec != VNOVAL)
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET);
if (vap->va_mtime.tv_sec != VNOVAL)
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET);
- if (vap->va_birthtime.tv_sec != VNOVAL &&
- strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0) {
- /*
- * We can only test for support of TimeCreate if
- * the "vp" argument is for an NFS vnode.
- */
- np = VTONFS(vp);
- if (NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
- NFSATTRBIT_TIMECREATE))
- NFSSETBIT_ATTRBIT(&attrbits,
- NFSATTRBIT_TIMECREATE);
- }
+ /*
+ * We can only test for support of TimeCreate if
+ * the "vp" argument is for an NFS vnode.
+ */
+ if (vap->va_birthtime.tv_sec != VNOVAL && np != NULL &&
+ NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+ NFSATTRBIT_TIMECREATE))
+ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
(void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0,
- &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
+ &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL,
+ false, false, false);
break;
}
}
@@ -980,6 +993,17 @@ nfsm_fhtom(struct nfsmount *nmp, struct nfsrv_descript *nd, u_int8_t *fhp,
(nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0) {
fhp = nmp->nm_fh;
size = nmp->nm_fhsize;
+ } else if (size >= NFSX_FHMAX + NFSX_V4NAMEDDIRFH &&
+ size <= NFSX_FHMAX + NFSX_V4NAMEDATTRFH) {
+ size -= (NFSX_FHMAX - NFSX_MYFH);
+ NFSM_BUILD(tl, uint32_t *, NFSX_MYFH +
+ 2 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(size);
+ NFSBCOPY(fhp, tl, NFSX_MYFH);
+ tl += (NFSX_MYFH / NFSX_UNSIGNED);
+ *tl = 0;
+ bytesize = NFSX_MYFH + 2 * NFSX_UNSIGNED;
+ break;
}
fullsiz = NFSM_RNDUP(size);
if (set_true) {
@@ -1277,7 +1301,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize,
struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp,
struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp,
- u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred)
+ u_int32_t *leasep, u_int32_t *rderrp, bool *has_namedattrp,
+ NFSPROC_T *p, struct ucred *cred)
{
u_int32_t *tl;
int i = 0, j, k, l = 0, m, bitpos, attrsum = 0;
@@ -1293,6 +1318,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
gid_t gid;
u_int32_t freenum = 0, tuint;
u_int64_t uquad = 0, thyp, thyp2;
+ uint16_t tui16;
+ long has_pathconf;
#ifdef QUOTA
struct dqblk dqb;
uid_t savuid;
@@ -1316,6 +1343,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
* Just set default values to some of the important ones.
*/
if (nap != NULL) {
+ VATTR_NULL(&nap->na_vattr);
nap->na_type = VREG;
nap->na_mode = 0;
nap->na_rdev = (NFSDEV_T)0;
@@ -1365,6 +1393,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
sfp->sf_tbytes = UINT64_MAX;
sfp->sf_abytes = UINT64_MAX;
}
+ if (has_namedattrp != NULL)
+ *has_namedattrp = false;
}
/*
@@ -1397,6 +1427,16 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL);
NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT);
}
+ /* Some filesystems do not support uf_hidden */
+ if (vp == NULL || VOP_PATHCONF(vp,
+ _PC_HAS_HIDDENSYSTEM, &has_pathconf) != 0)
+ has_pathconf = 0;
+ if (has_pathconf == 0) {
+ NFSCLRBIT_ATTRBIT(&checkattrbits,
+ NFSATTRBIT_HIDDEN);
+ NFSCLRBIT_ATTRBIT(&checkattrbits,
+ NFSATTRBIT_SYSTEM);
+ }
if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits)
|| retnotsup)
*retcmpp = NFSERR_NOTSAME;
@@ -1407,11 +1447,16 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
if (compare) {
if (!(*retcmpp)) {
- if (nap->na_type != nfsv34tov_type(*tl))
+ tui16 = 0;
+ if (nap->na_type != nfsv4tov_type(*tl,
+ &tui16) ||
+ ((nap->na_bsdflags & SFBSD_NAMEDATTR) ^
+ tui16) != 0)
*retcmpp = NFSERR_NOTSAME;
}
} else if (nap != NULL) {
- nap->na_type = nfsv34tov_type(*tl);
+ nap->na_type = nfsv4tov_type(*tl,
+ &nap->na_bsdflags);
}
attrsum += NFSX_UNSIGNED;
break;
@@ -1490,9 +1535,23 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
break;
case NFSATTRBIT_NAMEDATTR:
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- if (compare && !(*retcmpp)) {
- if (*tl != newnfs_false)
- *retcmpp = NFSERR_NOTSAME;
+ if (compare) {
+ if (!(*retcmpp)) {
+ if (vp == NULL || VOP_PATHCONF(vp,
+ _PC_HAS_NAMEDATTR, &has_pathconf)
+ != 0)
+ has_pathconf = 0;
+ if ((has_pathconf != 0 &&
+ *tl != newnfs_true) ||
+ (has_pathconf == 0 &&
+ *tl != newnfs_false))
+ *retcmpp = NFSERR_NOTSAME;
+ }
+ } else if (has_namedattrp != NULL) {
+ if (*tl == newnfs_true)
+ *has_namedattrp = true;
+ else
+ *has_namedattrp = false;
}
attrsum += NFSX_UNSIGNED;
break;
@@ -1666,6 +1725,8 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
goto nfsmout;
tfhsize = tnfhp->nfh_len;
if (compare) {
+ if (tfhsize > NFSX_MYFH)
+ tfhsize = NFSX_MYFH;
if (!(*retcmpp) &&
!NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize,
fhp, fhsize))
@@ -1745,9 +1806,17 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
free(cp2, M_NFSSTRING);
break;
case NFSATTRBIT_HIDDEN:
- NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- if (compare && !(*retcmpp))
- *retcmpp = NFSERR_ATTRNOTSUPP;
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ if (compare) {
+ if (!(*retcmpp) && ((*tl == newnfs_true &&
+ (nap->na_flags & UF_HIDDEN) == 0) ||
+ (*tl == newnfs_false &&
+ (nap->na_flags & UF_HIDDEN) != 0)))
+ *retcmpp = NFSERR_NOTSAME;
+ } else if (nap != NULL) {
+ if (*tl == newnfs_true)
+ nap->na_flags |= UF_HIDDEN;
+ }
attrsum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_HOMOGENEOUS:
@@ -2119,9 +2188,17 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
attrsum += NFSX_HYPER;
break;
case NFSATTRBIT_SYSTEM:
- NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- if (compare && !(*retcmpp))
- *retcmpp = NFSERR_ATTRNOTSUPP;
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ if (compare) {
+ if (!(*retcmpp) && ((*tl == newnfs_true &&
+ (nap->na_flags & UF_SYSTEM) == 0) ||
+ (*tl == newnfs_false &&
+ (nap->na_flags & UF_SYSTEM) != 0)))
+ *retcmpp = NFSERR_NOTSAME;
+ } else if (nap != NULL) {
+ if (*tl == newnfs_true)
+ nap->na_flags |= UF_SYSTEM;
+ }
attrsum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_TIMEACCESS:
@@ -2297,6 +2374,23 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
if (compare && !(*retcmpp) && i != nfs_srvmaxio)
*retcmpp = NFSERR_NOTSAME;
break;
+ case NFSATTRBIT_CHANGEATTRTYPE:
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ if (compare) {
+ if (!(*retcmpp)) {
+ tuint = NFSV4CHANGETYPE_UNDEFINED;
+ if ((vp->v_mount->mnt_vfc->vfc_flags &
+ VFCF_FILEREVINC) != 0)
+ tuint = NFSV4CHANGETYPE_VERS_COUNTER_NOPNFS;
+ else if ((vp->v_mount->mnt_vfc->vfc_flags &
+ VFCF_FILEREVCT) != 0)
+ tuint = NFSV4CHANGETYPE_TIME_METADATA;
+ if (fxdr_unsigned(uint32_t, *tl) != tuint)
+ *retcmpp = NFSERR_NOTSAME;
+ }
+ }
+ attrsum += NFSX_UNSIGNED;
+ break;
default:
printf("EEK! nfsv4_loadattr unknown attr=%d\n",
bitpos);
@@ -2553,7 +2647,8 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror,
nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram,
int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno,
- struct statfs *pnfssf)
+ struct statfs *pnfssf, bool xattrsupp, bool has_hiddensystem,
+ bool has_namedattr)
{
int bitpos, retnum = 0;
u_int32_t *tl;
@@ -2567,8 +2662,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
struct nfsfsinfo fsinf;
struct timespec temptime;
NFSACL_T *aclp, *naclp = NULL;
- size_t atsiz;
- bool xattrsupp;
+ short irflag;
#ifdef QUOTA
struct dqblk dqb;
uid_t savuid;
@@ -2652,18 +2746,6 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
}
}
- /* Check to see if Extended Attributes are supported. */
- xattrsupp = false;
- if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_XATTRSUPPORT)) {
- if (NFSVOPLOCK(vp, LK_SHARED) == 0) {
- error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER,
- "xxx", NULL, &atsiz, cred, p);
- NFSVOPUNLOCK(vp);
- if (error != EOPNOTSUPP)
- xattrsupp = true;
- }
- }
-
/*
* Put out the attribute bitmap for the ones being filled in
* and get the field for the number of attributes returned.
@@ -2685,11 +2767,15 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT);
NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL);
}
+ if (!has_hiddensystem) {
+ NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN);
+ NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM);
+ }
retnum += nfsrv_putattrbit(nd, &attrbits);
break;
case NFSATTRBIT_TYPE:
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
- *tl = vtonfsv34_type(vap->va_type);
+ *tl = vtonfsv4_type(vap);
retnum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_FHEXPIRETYPE:
@@ -2725,7 +2811,10 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
break;
case NFSATTRBIT_NAMEDATTR:
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
- *tl = newnfs_false;
+ if (has_namedattr)
+ *tl = newnfs_true;
+ else
+ *tl = newnfs_false;
retnum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_FSID:
@@ -2786,7 +2875,15 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
retnum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_FILEHANDLE:
- retnum += nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, 0, 0);
+ siz = 0;
+ if (vp != NULL) {
+ irflag = vn_irflag_read(vp);
+ if ((irflag & VIRF_NAMEDDIR) != 0)
+ siz = NFSX_FHMAX + 2;
+ else if ((irflag & VIRF_NAMEDATTR) != 0)
+ siz = NFSX_FHMAX + 3;
+ }
+ retnum += nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, siz, 0);
break;
case NFSATTRBIT_FILEID:
NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER);
@@ -2819,6 +2916,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
*tl = 0;
retnum += 2 * NFSX_UNSIGNED;
break;
+ case NFSATTRBIT_HIDDEN:
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ if ((vap->va_flags & UF_HIDDEN) != 0)
+ *tl = newnfs_true;
+ else
+ *tl = newnfs_false;
+ retnum += NFSX_UNSIGNED;
+ break;
case NFSATTRBIT_HOMOGENEOUS:
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS)
@@ -3008,6 +3113,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
txdr_hyper(vap->va_bytes, tl);
retnum += NFSX_HYPER;
break;
+ case NFSATTRBIT_SYSTEM:
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ if ((vap->va_flags & UF_SYSTEM) != 0)
+ *tl = newnfs_true;
+ else
+ *tl = newnfs_false;
+ retnum += NFSX_UNSIGNED;
+ break;
case NFSATTRBIT_TIMEACCESS:
NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME);
txdr_nfsv4time(&vap->va_atime, tl);
@@ -3109,6 +3222,33 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
*tl = newnfs_false;
retnum += NFSX_UNSIGNED;
break;
+ case NFSATTRBIT_MODEUMASK:
+ NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ /*
+ * Since FreeBSD applies the umask above the VFS/VOP,
+ * there is no umask to handle here. If FreeBSD
+ * moves handling of umask to below the VFS/VOP,
+ * this could change.
+ */
+ *tl++ = vtonfsv34_mode(vap->va_mode);
+ *tl = 0;
+ retnum += 2 * NFSX_UNSIGNED;
+ break;
+ case NFSATTRBIT_CHANGEATTRTYPE:
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(NFSV4CHANGETYPE_UNDEFINED);
+ if (mp != NULL) {
+ if ((mp->mnt_vfc->vfc_flags &
+ VFCF_FILEREVINC) != 0)
+ *tl = txdr_unsigned(
+ NFSV4CHANGETYPE_VERS_COUNTER_NOPNFS);
+ else if ((mp->mnt_vfc->vfc_flags &
+ VFCF_FILEREVCT) != 0)
+ *tl = txdr_unsigned(
+ NFSV4CHANGETYPE_TIME_METADATA);
+ }
+ retnum += NFSX_UNSIGNED;
+ break;
default:
printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos);
}
@@ -3419,13 +3559,13 @@ tryagain:
/*
* If an '@' is found and the domain name matches, search for
* the name with dns stripped off.
- * Mixed case alpahbetics will match for the domain name, but
- * all upper case will not.
+ * The match for alphabetics in now case insensitive,
+ * since RFC8881 defines this string as a DNS domain name.
*/
if (cnt == 0 && i < len && i > 0 &&
(len - 1 - i) == NFSD_VNET(nfsrv_dnsnamelen) &&
- !nfsrv_cmpmixedcase(cp,
- NFSD_VNET(nfsrv_dnsname), NFSD_VNET(nfsrv_dnsnamelen))) {
+ strncasecmp(cp, NFSD_VNET(nfsrv_dnsname),
+ NFSD_VNET(nfsrv_dnsnamelen)) == 0) {
len -= (NFSD_VNET(nfsrv_dnsnamelen) + 1);
*(cp - 1) = '\0';
}
@@ -3646,8 +3786,8 @@ tryagain:
*/
if (cnt == 0 && i < len && i > 0 &&
(len - 1 - i) == NFSD_VNET(nfsrv_dnsnamelen) &&
- !nfsrv_cmpmixedcase(cp,
- NFSD_VNET(nfsrv_dnsname), NFSD_VNET(nfsrv_dnsnamelen))) {
+ strncasecmp(cp, NFSD_VNET(nfsrv_dnsname),
+ NFSD_VNET(nfsrv_dnsnamelen)) == 0) {
len -= (NFSD_VNET(nfsrv_dnsnamelen) + 1);
*(cp - 1) = '\0';
}
@@ -3696,35 +3836,6 @@ out:
}
/*
- * Cmp len chars, allowing mixed case in the first argument to match lower
- * case in the second, but not if the first argument is all upper case.
- * Return 0 for a match, 1 otherwise.
- */
-static int
-nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len)
-{
- int i;
- u_char tmp;
- int fndlower = 0;
-
- for (i = 0; i < len; i++) {
- if (*cp >= 'A' && *cp <= 'Z') {
- tmp = *cp++ + ('a' - 'A');
- } else {
- tmp = *cp++;
- if (tmp >= 'a' && tmp <= 'z')
- fndlower = 1;
- }
- if (tmp != *cp2++)
- return (1);
- }
- if (fndlower)
- return (0);
- else
- return (1);
-}
-
-/*
* Set the port for the nfsuserd.
*/
int
@@ -4032,8 +4143,9 @@ nfssvc_idname(struct nfsd_idargs *nidp)
*/
cr = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid;
- crsetgroups(cr, nidp->nid_ngroup, grps);
- cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0];
+ crsetgroups_fallback(cr, nidp->nid_ngroup, grps,
+ GID_NOGROUP);
+ cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
cr->cr_prison = curthread->td_ucred->cr_prison;
prison_hold(cr->cr_prison);
#ifdef MAC
@@ -4644,7 +4756,7 @@ newnfs_sndlock(int *flagp)
ts.tv_sec = 0;
ts.tv_nsec = 0;
(void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR,
- PZERO - 1, "nfsndlck", &ts);
+ PVFS, "nfsndlck", &ts);
}
*flagp |= NFSR_SNDLOCK;
NFSUNLOCKSOCK();
@@ -5025,6 +5137,8 @@ nfsv4_freeslot(struct nfsclsession *sep, int slot, bool resetseq)
mtx_lock(&sep->nfsess_mtx);
if (resetseq)
sep->nfsess_slotseq[slot]--;
+ else if (slot > sep->nfsess_foreslots)
+ sep->nfsess_slotseq[slot] = 0;
if ((bitval & sep->nfsess_slots) == 0)
printf("freeing free slot!!\n");
sep->nfsess_slots &= ~bitval;
@@ -5154,3 +5268,46 @@ nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclsession *tsep,
m_freem(nd->nd_mrep);
return (error);
}
+
+/*
+ * Translate a vnode type into an NFSv4 type, including the named
+ * attribute types.
+ */
+static uint32_t
+vtonfsv4_type(struct vattr *vap)
+{
+ nfstype ntyp;
+
+ if (vap->va_type >= 9)
+ ntyp = NFNON;
+ else
+ ntyp = nfsv34_type[vap->va_type];
+ if ((vap->va_bsdflags & SFBSD_NAMEDATTR) != 0) {
+ if (ntyp == NFDIR)
+ ntyp = NFATTRDIR;
+ else if (ntyp == NFREG)
+ ntyp = NFNAMEDATTR;
+ }
+ return (txdr_unsigned((uint32_t)ntyp));
+}
+
+/*
+ * Translate an NFS type to a vnode type.
+ */
+static __enum_uint8(vtype)
+nfsv4tov_type(uint32_t ntyp, uint16_t *bsdflags)
+{
+ __enum_uint8(vtype) vtyp;
+
+ ntyp = fxdr_unsigned(uint32_t, ntyp) % (NFNAMEDATTR + 1);
+ if (ntyp == NFATTRDIR) {
+ vtyp = VDIR;
+ *bsdflags |= SFBSD_NAMEDATTR;
+ } else if (ntyp == NFNAMEDATTR) {
+ vtyp = VREG;
+ *bsdflags |= SFBSD_NAMEDATTR;
+ } else {
+ vtyp = nv34tov_type[ntyp];
+ }
+ return (vtyp);
+}
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 950e0c097457..54f60a753c50 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -169,6 +169,7 @@ int nfsrv_mdscopymr(char *, char *, char *, char *, int *, char *, NFSPROC_T *,
struct vnode **, struct vnode **, struct pnfsdsfile **, struct nfsdevice **,
struct nfsdevice **);
void nfsrv_marknospc(char *, bool);
+void nfsrv_removedeleg(fhandle_t *, struct nfsrv_descript *, NFSPROC_T *);
/* nfs_nfsdserv.c */
int nfsrvd_access(struct nfsrv_descript *, int,
@@ -340,7 +341,7 @@ int nfsv4_loadattr(struct nfsrv_descript *, vnode_t,
struct nfsvattr *, struct nfsfh **, fhandle_t *, int,
struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *,
struct nfsfsinfo *, NFSACL_T *,
- int, int *, u_int32_t *, u_int32_t *, NFSPROC_T *, struct ucred *);
+ int, int *, u_int32_t *, u_int32_t *, bool *, NFSPROC_T *, struct ucred *);
int nfsv4_lock(struct nfsv4lock *, int, int *, struct mtx *, struct mount *);
void nfsv4_unlock(struct nfsv4lock *, int);
void nfsv4_relref(struct nfsv4lock *);
@@ -394,8 +395,9 @@ int nfsrv_putopbit(struct nfsrv_descript *, nfsopbit_t *);
void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int,
struct nfsvattr *);
int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *,
- struct vattr *, fhandle_t *, int, nfsattrbit_t *,
- struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *);
+ struct vattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *,
+ NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *, bool, bool,
+ bool);
void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *);
struct mbuf *nfsrv_adj(struct mbuf *, int, int);
void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *);
@@ -438,6 +440,7 @@ int nfs_supportsnfsv4acls(vnode_t);
/* nfs_commonacl.c */
int nfsrv_dissectace(struct nfsrv_descript *, struct acl_entry *,
bool, int *, int *, NFSPROC_T *);
+uint32_t nfs_aceperm(acl_perm_t);
int nfsrv_buildacl(struct nfsrv_descript *, NFSACL_T *, __enum_uint8(vtype),
NFSPROC_T *);
int nfsrv_compareacl(NFSACL_T *, NFSACL_T *);
@@ -481,11 +484,13 @@ int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t,
int nfsrpc_create(vnode_t, char *, int, struct vattr *, nfsquad_t,
int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
struct nfsfh **, int *, int *);
-int nfsrpc_remove(vnode_t, char *, int, vnode_t, struct ucred *, NFSPROC_T *,
- struct nfsvattr *, int *);
-int nfsrpc_rename(vnode_t, vnode_t, char *, int, vnode_t, vnode_t, char *, int,
- struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
- int *, int *);
+int nfsrpc_remove(struct vnode *, char *, int, struct vnode *,
+ struct nfsvattr *, int *, nfsremove_status *, struct nfsvattr *, int *,
+ struct ucred *, NFSPROC_T *);
+int nfsrpc_rename(struct vnode *, struct vnode *, char *, int, struct vnode *,
+ struct vnode *, char *, int, nfsremove_status *, struct nfsvattr *,
+ struct nfsvattr *, int *, int *, struct nfsvattr *, int *, struct ucred *,
+ NFSPROC_T *);
int nfsrpc_link(vnode_t, vnode_t, char *, int,
struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
int *, int *);
@@ -515,7 +520,7 @@ int nfsrpc_statfs(vnode_t, struct nfsstatfs *, struct nfsfsinfo *, uint32_t *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *);
-int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *,
+int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, bool *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *,
NFSPROC_T *);
@@ -568,6 +573,9 @@ int nfsrpc_listextattr(vnode_t, uint64_t *, struct uio *, size_t *, bool *,
int nfsrpc_rmextattr(vnode_t, const char *, struct nfsvattr *, int *,
struct ucred *, NFSPROC_T *);
void nfsrpc_bindconnsess(CLIENT *, void *, struct ucred *);
+int nfsrpc_openattr(struct nfsmount *, struct vnode *, uint8_t *, int,
+ bool, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsfh **,
+ int *);
/* nfs_clstate.c */
int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int,
@@ -606,12 +614,12 @@ int nfscl_doclose(vnode_t, struct nfsclclient **, NFSPROC_T *);
int nfsrpc_doclose(struct nfsmount *, struct nfsclopen *, NFSPROC_T *, bool,
bool);
int nfscl_deleg(mount_t, struct nfsclclient *, u_int8_t *, int,
- struct ucred *, NFSPROC_T *, struct nfscldeleg **);
+ struct ucred *, NFSPROC_T *, struct nfscldeleg *);
void nfscl_lockinit(struct nfsv4lock *);
void nfscl_lockexcl(struct nfsv4lock *, void *);
void nfscl_lockunlock(struct nfsv4lock *);
void nfscl_lockderef(struct nfsv4lock *);
-void nfscl_delegreturnvp(vnode_t, NFSPROC_T *);
+void nfscl_delegreturnvp(struct vnode *, bool, NFSPROC_T *);
void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *);
void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *,
int);
@@ -626,7 +634,7 @@ int nfscl_renamedeleg(vnode_t, nfsv4stateid_t *, int *, vnode_t,
nfsv4stateid_t *, int *, NFSPROC_T *);
void nfscl_reclaimnode(vnode_t);
void nfscl_newnode(vnode_t);
-void nfscl_delegmodtime(vnode_t);
+void nfscl_delegmodtime(struct vnode *, struct timespec *);
void nfscl_deleggetmodtime(vnode_t, struct timespec *);
int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
struct nfsmount *, NFSPROC_T *);
@@ -651,6 +659,8 @@ void nfscl_freelayout(struct nfscllayout *);
void nfscl_freeflayout(struct nfsclflayout *);
void nfscl_freedevinfo(struct nfscldevinfo *);
int nfscl_layoutcommit(vnode_t, NFSPROC_T *);
+int nfscl_delegacecheck(struct vnode *, accmode_t, struct ucred *);
+void nfscl_startdelegrecall(struct nfsclclient *, struct nfsfh *);
/* nfs_clport.c */
int nfscl_nget(mount_t, vnode_t, struct nfsfh *,
@@ -707,12 +717,12 @@ int nfsvno_symlink(struct nameidata *, struct nfsvattr *, char *, int, int,
uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *);
int nfsvno_getsymlink(struct nfsrv_descript *, struct nfsvattr *,
NFSPROC_T *, char **, int *);
-int nfsvno_removesub(struct nameidata *, int, struct ucred *, NFSPROC_T *,
- struct nfsexstuff *);
+int nfsvno_removesub(struct nameidata *, bool, struct nfsrv_descript *,
+ NFSPROC_T *, struct nfsexstuff *);
int nfsvno_rmdirsub(struct nameidata *, int, struct ucred *, NFSPROC_T *,
struct nfsexstuff *);
-int nfsvno_rename(struct nameidata *, struct nameidata *, u_int32_t,
- u_int32_t, struct ucred *, NFSPROC_T *);
+int nfsvno_rename(struct nameidata *, struct nameidata *,
+ struct nfsrv_descript *, NFSPROC_T *);
int nfsvno_link(struct nameidata *, vnode_t, nfsquad_t, struct ucred *,
NFSPROC_T *, struct nfsexstuff *);
int nfsvno_fsync(vnode_t, u_int64_t, int, struct ucred *, NFSPROC_T *);
@@ -726,7 +736,8 @@ int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct nfsrv_descript *,
NFSPROC_T *);
int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t,
struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *,
- struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t);
+ struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, bool, bool,
+ bool);
int nfsrv_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *,
NFSACL_T *, NFSPROC_T *);
int nfsv4_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *,
@@ -780,6 +791,7 @@ int newnfs_request(struct nfsrv_descript *, struct nfsmount *,
struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *,
struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *,
struct nfsclsession *);
+void nfs_resetslots(struct nfsclsession *);
int newnfs_connect(struct nfsmount *, struct nfssockreq *,
struct ucred *, NFSPROC_T *, int, bool, struct __rpc_client **);
void newnfs_disconnect(struct nfsmount *, struct nfssockreq *);
diff --git a/sys/fs/nfs/nfscl.h b/sys/fs/nfs/nfscl.h
index a52b9e433145..3b1445e1923c 100644
--- a/sys/fs/nfs/nfscl.h
+++ b/sys/fs/nfs/nfscl.h
@@ -68,10 +68,11 @@ struct nfsv4node {
* These flag bits are used for the argument to nfscl_fillsattr() to
* indicate special handling of the attributes.
*/
-#define NFSSATTR_FULL 0x1
-#define NFSSATTR_SIZE0 0x2
-#define NFSSATTR_SIZENEG1 0x4
-#define NFSSATTR_SIZERDEV 0x8
+#define NFSSATTR_FULL 0x01
+#define NFSSATTR_SIZE0 0x02
+#define NFSSATTR_SIZENEG1 0x04
+#define NFSSATTR_SIZERDEV 0x08
+#define NFSSATTR_NEWFILE 0x10
/* Use this macro for debug printfs. */
#define NFSCL_DEBUG(level, ...) do { \
diff --git a/sys/fs/nfs/nfsclstate.h b/sys/fs/nfs/nfsclstate.h
index d9f5ed13b54f..92669ff8d1aa 100644
--- a/sys/fs/nfs/nfsclstate.h
+++ b/sys/fs/nfs/nfsclstate.h
@@ -116,6 +116,10 @@ struct nfsclclient {
struct proc *nfsc_renewthread;
struct nfsmount *nfsc_nmp;
time_t nfsc_expire;
+ int nfsc_delegcnt;
+ int nfsc_deleghighwater;
+ int nfsc_layoutcnt;
+ int nfsc_layouthighwater;
u_int32_t nfsc_clientidrev;
u_int32_t nfsc_rev;
u_int32_t nfsc_renew;
diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h
index 0b16ba9b85a8..c30b46261df0 100644
--- a/sys/fs/nfs/nfsport.h
+++ b/sys/fs/nfs/nfsport.h
@@ -439,10 +439,13 @@
/* Do an NFSv4 Verify+Write. */
#define NFSPROC_APPENDWRITE 69
+/* Do a NFSv4 Openattr. */
+#define NFSPROC_OPENATTR 70
+
/*
* Must be defined as one higher than the last NFSv4.2 Proc# above.
*/
-#define NFSV42_NPROCS 70
+#define NFSV42_NPROCS 71
/* Value of NFSV42_NPROCS for old nfsstats structure. (Always 69) */
#define NFSV42_OLDNPROCS 69
@@ -474,7 +477,7 @@ struct nfsstatsv1 {
uint64_t readlink_bios;
uint64_t biocache_readdirs;
uint64_t readdir_bios;
- uint64_t rpccnt[NFSV42_NPROCS + 10];
+ uint64_t rpccnt[NFSV42_NPROCS + 9];
uint64_t rpcretries;
uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15];
uint64_t srvlayouts;
@@ -690,6 +693,7 @@ struct nfsvattr {
#define na_bytes na_vattr.va_bytes
#define na_filerev na_vattr.va_filerev
#define na_vaflags na_vattr.va_vaflags
+#define na_bsdflags na_vattr.va_bsdflags
#include <fs/nfsclient/nfsnode.h>
@@ -1180,9 +1184,11 @@ struct nfsreq {
*/
#ifdef VV_DISABLEDELEG
#define NFSVNO_DELEGOK(v) \
- ((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0)
+ ((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0 || \
+ (vn_irflag_read(v) & VIRF_NAMEDATTR) == 0)
#else
-#define NFSVNO_DELEGOK(v) (1)
+#define NFSVNO_DELEGOK(v) \
+ ((v) == NULL || (vn_irflag_read(v) & VIRF_NAMEDATTR) == 0)
#endif
/*
diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h
index cef886755d5a..cb5a80e8df73 100644
--- a/sys/fs/nfs/nfsproto.h
+++ b/sys/fs/nfs/nfsproto.h
@@ -275,6 +275,8 @@
#define NFSX_V4SESSIONID 16
#define NFSX_V4DEVICEID 16
#define NFSX_V4PNFSFH (sizeof(fhandle_t) + 1)
+#define NFSX_V4NAMEDDIRFH 2
+#define NFSX_V4NAMEDATTRFH 3
#define NFSX_V4FILELAYOUT (4 * NFSX_UNSIGNED + NFSX_V4DEVICEID + \
NFSX_HYPER + NFSM_RNDUP(NFSX_V4PNFSFH))
#define NFSX_V4FLEXLAYOUT(m) (NFSX_HYPER + 3 * NFSX_UNSIGNED + \
@@ -406,10 +408,13 @@
/* Do an NFSv4 Verify+Write. */
#define NFSPROC_APPENDWRITE 69
+/* Do a NFSv4 Openattr. */
+#define NFSPROC_OPENATTR 70
+
/*
* Must be defined as one higher than the last NFSv4.2 Proc# above.
*/
-#define NFSV42_NPROCS 70
+#define NFSV42_NPROCS 71
/* Value of NFSV42_NPROCS for old nfsstats structure. (Always 69) */
#define NFSV42_OLDNPROCS 69
@@ -619,6 +624,8 @@
#define NFSV4OPEN_WDCONTENTION 0x00100000
#define NFSV4OPEN_WDNOTWANTED 0x00200000
#define NFSV4OPEN_WDSUPPFTYPE 0x00400000
+#define NFSV4OPEN_WDNOTSUPPDOWNGRADE 0x00800000
+#define NFSV4OPEN_WDNOTSUPPUPGRADE 0x01000000
/*
* NFS V4 File Handle types
@@ -742,6 +749,17 @@
#define NFSSECINFONONAME_CURFH 0
#define NFSSECINFONONAME_PARENT 1
+/* Bits for CB_RECALL_ANY. */
+#define NFSRCA4_RDATA_DLG 0x00000001
+#define NFSRCA4_WDATA_DLG 0x00000002
+#define NFSRCA4_DIR_DLG 0x00000004
+#define NFSRCA4_FILE_LAYOUT 0x00000008
+#define NFSRCA4_BLK_LAYOUT 0x00000010
+#define NFSRCA4_OBJ_LAYOUT_MIN 0x00000100
+#define NFSRCA4_OBJ_LAYOUT_MAX 0x00000200
+#define NFSRCA4_FF_LAYOUT_READ 0x00010000
+#define NFSRCA4_FF_LAYOUT_RW 0x00020000
+
#if defined(_KERNEL) || defined(KERNEL)
/* Conversion macros */
#define vtonfsv2_mode(t,m) \
@@ -1002,7 +1020,7 @@ struct nfsv3_sattr {
#define NFSATTRBIT_SPACEFREED 78
#define NFSATTRBIT_CHANGEATTRTYPE 79
#define NFSATTRBIT_SECLABEL 80
-/* Not sure what attribute bit #81 is? */
+#define NFSATTRBIT_MODEUMASK 81
#define NFSATTRBIT_XATTRSUPPORT 82
#define NFSATTRBM_SUPPORTEDATTRS 0x00000001
@@ -1086,7 +1104,7 @@ struct nfsv3_sattr {
#define NFSATTRBM_SPACEFREED 0x00004000
#define NFSATTRBM_CHANGEATTRTYPE 0x00008000
#define NFSATTRBM_SECLABEL 0x00010000
-/* Not sure what attribute bit#81/0x00020000 is? */
+#define NFSATTRBM_MODEUMASK 0x00020000
#define NFSATTRBM_XATTRSUPPORT 0x00040000
#define NFSATTRBIT_MAX 83
@@ -1124,6 +1142,7 @@ struct nfsv3_sattr {
NFSATTRBM_FILESFREE | \
NFSATTRBM_FILESTOTAL | \
NFSATTRBM_FSLOCATIONS | \
+ NFSATTRBM_HIDDEN | \
NFSATTRBM_HOMOGENEOUS | \
NFSATTRBM_MAXFILESIZE | \
NFSATTRBM_MAXLINK | \
@@ -1145,6 +1164,7 @@ struct nfsv3_sattr {
NFSATTRBM_SPACEFREE | \
NFSATTRBM_SPACETOTAL | \
NFSATTRBM_SPACEUSED | \
+ NFSATTRBM_SYSTEM | \
NFSATTRBM_TIMEACCESS | \
NFSATTRBM_TIMECREATE | \
NFSATTRBM_TIMEDELTA | \
@@ -1174,6 +1194,7 @@ struct nfsv3_sattr {
NFSATTRBM_LAYOUTBLKSIZE | \
NFSATTRBM_LAYOUTALIGNMENT | \
NFSATTRBM_SUPPATTREXCLCREAT | \
+ NFSATTRBM_CHANGEATTRTYPE | \
NFSATTRBM_XATTRSUPPORT)
/*
@@ -1181,7 +1202,8 @@ struct nfsv3_sattr {
*/
#define NFSATTRBIT_SUPPSETONLY1 (NFSATTRBM_TIMEACCESSSET | \
NFSATTRBM_TIMEMODIFYSET)
-#define NFSATTRBIT_SUPPSETONLY2 (NFSATTRBM_MODESETMASKED)
+#define NFSATTRBIT_SUPPSETONLY2 (NFSATTRBM_MODESETMASKED | \
+ NFSATTRBM_MODEUMASK)
/*
* NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31
@@ -1190,16 +1212,19 @@ struct nfsv3_sattr {
*/
#define NFSATTRBIT_SETABLE0 \
(NFSATTRBM_SIZE | \
+ NFSATTRBM_HIDDEN | \
NFSATTRBM_ACL)
#define NFSATTRBIT_SETABLE1 \
(NFSATTRBM_MODE | \
NFSATTRBM_OWNER | \
NFSATTRBM_OWNERGROUP | \
- NFSATTRBM_TIMECREATE | \
+ NFSATTRBM_SYSTEM | \
+ NFSATTRBM_TIMECREATE | \
NFSATTRBM_TIMEACCESSSET | \
NFSATTRBM_TIMEMODIFYSET)
#define NFSATTRBIT_SETABLE2 \
- (NFSATTRBM_MODESETMASKED)
+ (NFSATTRBM_MODESETMASKED | \
+ NFSATTRBM_MODEUMASK)
/*
* NFSATTRBIT_NFSV41 - Attributes only supported by NFSv4.1.
@@ -1216,7 +1241,10 @@ struct nfsv3_sattr {
/*
* NFSATTRBIT_NFSV42 - Attributes only supported by NFSv4.2.
*/
-#define NFSATTRBIT_NFSV42_2 NFSATTRBM_XATTRSUPPORT
+#define NFSATTRBIT_NFSV42_2 \
+ (NFSATTRBM_CHANGEATTRTYPE | \
+ NFSATTRBM_XATTRSUPPORT | \
+ NFSATTRBM_MODEUMASK)
/*
* Set of attributes that the getattr vnode op needs.
@@ -1230,6 +1258,7 @@ struct nfsv3_sattr {
NFSATTRBM_SIZE | \
NFSATTRBM_FSID | \
NFSATTRBM_FILEID | \
+ NFSATTRBM_HIDDEN | \
NFSATTRBM_MAXREAD)
/*
@@ -1242,6 +1271,7 @@ struct nfsv3_sattr {
NFSATTRBM_OWNERGROUP | \
NFSATTRBM_RAWDEV | \
NFSATTRBM_SPACEUSED | \
+ NFSATTRBM_SYSTEM | \
NFSATTRBM_TIMEACCESS | \
NFSATTRBM_TIMECREATE | \
NFSATTRBM_TIMEMETADATA | \
@@ -1264,6 +1294,7 @@ struct nfsv3_sattr {
NFSATTRBM_SIZE | \
NFSATTRBM_FSID | \
NFSATTRBM_FILEID | \
+ NFSATTRBM_HIDDEN | \
NFSATTRBM_MAXREAD)
/*
@@ -1274,6 +1305,7 @@ struct nfsv3_sattr {
NFSATTRBM_NUMLINKS | \
NFSATTRBM_RAWDEV | \
NFSATTRBM_SPACEUSED | \
+ NFSATTRBM_SYSTEM | \
NFSATTRBM_TIMEACCESS | \
NFSATTRBM_TIMECREATE | \
NFSATTRBM_TIMEMETADATA | \
@@ -1390,6 +1422,7 @@ struct nfsv3_sattr {
* NFSGETATTRBIT_PATHCONF0 - bits 0<->31
*/
#define NFSGETATTRBIT_PATHCONF0 (NFSATTRBIT_GETATTR0 | \
+ NFSATTRBM_NAMEDATTR | \
NFSATTRBM_CASEINSENSITIVE | \
NFSATTRBM_CASEPRESERVING | \
NFSATTRBM_CHOWNRESTRICTED | \
@@ -1651,4 +1684,11 @@ typedef struct nfsv4stateid nfsv4stateid_t;
#define NFSV4SXATTR_CREATE 1
#define NFSV4SXATTR_REPLACE 2
+/* Values for ChangeAttrType (RFC-7862). */
+#define NFSV4CHANGETYPE_MONOTONIC_INCR 0
+#define NFSV4CHANGETYPE_VERS_COUNTER 1
+#define NFSV4CHANGETYPE_VERS_COUNTER_NOPNFS 2
+#define NFSV4CHANGETYPE_TIME_METADATA 3
+#define NFSV4CHANGETYPE_UNDEFINED 4
+
#endif /* _NFS_NFSPROTO_H_ */
diff --git a/sys/fs/nfs/nfsrvstate.h b/sys/fs/nfs/nfsrvstate.h
index da214ae9d4e9..cc19ed6fa1d2 100644
--- a/sys/fs/nfs/nfsrvstate.h
+++ b/sys/fs/nfs/nfsrvstate.h
@@ -333,7 +333,7 @@ struct nfsf_rec {
u_int32_t numboots; /* Number of boottimes */
};
-void nfsrv_cleanclient(struct nfsclient *, NFSPROC_T *);
+void nfsrv_cleanclient(struct nfsclient *, NFSPROC_T *, bool, SVCXPRT **);
void nfsrv_freedeleglist(struct nfsstatehead *);
/*
diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index c691e797aa01..e181bf593e23 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -366,7 +366,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
bool old_lock;
/*
- * Ensure the exclusove access to the node before checking
+ * Ensure the exclusive access to the node before checking
* whether the cache is consistent.
*/
old_lock = ncl_excl_start(vp);
diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c
index 270f39d03c90..bca0bdcd0df1 100644
--- a/sys/fs/nfsclient/nfs_clcomsubs.c
+++ b/sys/fs/nfsclient/nfs_clcomsubs.c
@@ -271,7 +271,8 @@ nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap)
if (nd->nd_flag & ND_NFSV4) {
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL,
- NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL,
+ NULL);
} else if (nd->nd_flag & ND_NFSV3) {
NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR);
nap->na_type = nfsv34tov_type(fp->fa_type);
diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c
index be2024730cf0..f85f961d424e 100644
--- a/sys/fs/nfsclient/nfs_clnode.c
+++ b/sys/fs/nfsclient/nfs_clnode.c
@@ -205,7 +205,7 @@ nfs_freesillyrename(void *arg, __unused int pending)
}
static void
-ncl_releasesillyrename(struct vnode *vp, struct thread *td)
+ncl_releasesillyrename(struct vnode *vp, bool flushed, struct thread *td)
{
struct nfsnode *np;
struct sillyrename *sp;
@@ -220,7 +220,8 @@ ncl_releasesillyrename(struct vnode *vp, struct thread *td)
sp = NULL;
if (sp != NULL) {
NFSUNLOCKNODE(np);
- (void) ncl_vinvalbuf(vp, 0, td, 1);
+ if (flushed)
+ (void)ncl_vinvalbuf(vp, 0, td, 1);
/*
* Remove the silly file that was rename'd earlier
*/
@@ -238,9 +239,13 @@ ncl_inactive(struct vop_inactive_args *ap)
struct vnode *vp = ap->a_vp;
struct nfsnode *np;
struct thread *td;
+ struct nfsmount *nmp;
+ bool flushed;
td = curthread;
np = VTONFS(vp);
+ nmp = VFSTONFS(vp->v_mount);
+ flushed = true;
if (NFS_ISV4(vp) && vp->v_type == VREG) {
NFSLOCKNODE(np);
np->n_openstateid = NULL;
@@ -251,13 +256,18 @@ ncl_inactive(struct vop_inactive_args *ap)
* buffers/pages must be flushed before the close, so that the
* stateid is available for the writes.
*/
- vnode_pager_clean_sync(vp);
- (void)ncl_flush(vp, MNT_WAIT, td, 1, 0);
+ if ((nmp->nm_flag & NFSMNT_NOCTO) == 0 || !NFSHASNFSV4N(nmp) ||
+ nfscl_mustflush(vp) != 0) {
+ vnode_pager_clean_sync(vp);
+ (void)ncl_flush(vp, MNT_WAIT, td, 1, 0);
+ } else {
+ flushed = false;
+ }
(void)nfsrpc_close(vp, 1, td);
}
NFSLOCKNODE(np);
- ncl_releasesillyrename(vp, td);
+ ncl_releasesillyrename(vp, flushed, td);
/*
* NMODIFIED means that there might be dirty/stale buffers
@@ -294,7 +304,7 @@ ncl_reclaim(struct vop_reclaim_args *ap)
nfs_reclaim_p(ap);
NFSLOCKNODE(np);
- ncl_releasesillyrename(vp, td);
+ ncl_releasesillyrename(vp, true, td);
if (NFS_ISV4(vp) && vp->v_type == VREG) {
np->n_openstateid = NULL;
@@ -315,7 +325,7 @@ ncl_reclaim(struct vop_reclaim_args *ap)
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0) {
MNT_IUNLOCK(mp);
- nfscl_delegreturnvp(vp, td);
+ nfscl_delegreturnvp(vp, true, td);
} else
MNT_IUNLOCK(mp);
} else
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index 4e3a699fb170..b25d967982a1 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -828,7 +828,7 @@ nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp,
== (ND_NFSV4 | ND_V4WCCATTR)) {
error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
if (error)
return (error);
/*
@@ -1489,3 +1489,4 @@ MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1);
MODULE_DEPEND(nfscl, krpc, 1, 1, 1);
MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1);
MODULE_DEPEND(nfscl, xdr, 1, 1, 1);
+MODULE_DEPEND(nfscl, acl_nfs4, 1, 1, 1);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 8c5532268287..2f3c59b68518 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -142,6 +142,7 @@ static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
int *, int *);
+static bool nfscl_invalidfname(bool, char *, int);
static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
struct nfscllockowner *, u_int64_t, u_int64_t,
u_int32_t, struct ucred *, NFSPROC_T *, int);
@@ -389,13 +390,25 @@ nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
mode |= NFSV4OPEN_ACCESSREAD;
if (amode & FWRITE)
mode |= NFSV4OPEN_ACCESSWRITE;
+ if (NFSHASNFSV4N(nmp)) {
+ if (!NFSHASPNFS(nmp) && nfscl_enablecallb != 0 &&
+ nfs_numnfscbd > 0 &&
+ (vn_irflag_read(vp) & VIRF_NAMEDATTR) == 0) {
+ if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
+ mode |= NFSV4OPEN_WANTWRITEDELEG;
+ else
+ mode |= NFSV4OPEN_WANTANYDELEG;
+ } else
+ mode |= NFSV4OPEN_WANTNODELEG;
+ }
nfhp = np->n_fhp;
retrycnt = 0;
do {
dp = NULL;
- error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
- cred, p, NULL, &op, &newone, &ret, 1, true);
+ error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len,
+ (mode & NFSV4OPEN_ACCESSBOTH), 1, cred, p, NULL,
+ &op, &newone, &ret, 1, true);
if (error) {
return (error);
}
@@ -440,7 +453,7 @@ nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
NFSUNLOCKNODE(np);
(void) nfscl_deleg(nmp->nm_mountp,
op->nfso_own->nfsow_clp,
- nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
+ nfhp->nfh_fh, nfhp->nfh_len, cred, p, dp);
}
} else if (NFSHASNFSV4N(nmp)) {
/*
@@ -473,7 +486,7 @@ nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
NFSUNLOCKNODE(np);
(void) nfscl_deleg(nmp->nm_mountp,
op->nfso_own->nfsow_clp,
- nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
+ nfhp->nfh_fh, nfhp->nfh_len, cred, p, dp);
}
} else {
error = EIO;
@@ -547,7 +560,8 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
cred);
NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
- *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
+ *tl++ = txdr_unsigned(mode & (NFSV4OPEN_ACCESSBOTH |
+ NFSV4OPEN_WANTDELEGMASK));
*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
tsep = nfsmnt_mdssession(nmp);
*tl++ = tsep->nfsess_clientid.lval[0];
@@ -664,6 +678,13 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
&ret, &acesize, p);
if (error)
goto nfsmout;
+ } else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+ NFSHASNFSV4N(nmp)) {
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ deleg = fxdr_unsigned(uint32_t, *tl);
+ if (deleg == NFSV4OPEN_CONTENTION ||
+ deleg == NFSV4OPEN_RESOURCE)
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
} else if (deleg != NFSV4OPEN_DELEGATENONE) {
error = NFSERR_BADXDR;
goto nfsmout;
@@ -675,7 +696,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
("nfsrpc_openrpc: Getattr repstat"));
error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, NULL, p, cred);
+ NULL, NULL, NULL, NULL, p, cred);
if (error)
goto nfsmout;
}
@@ -1334,7 +1355,7 @@ nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
if ((nd->nd_flag & ND_NFSV4) != 0)
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
else
error = nfsm_loadattr(nd, nap);
} else
@@ -1546,7 +1567,7 @@ nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(NFSV4OP_OPEN);
*tl++ = 0; /* seqid, ignored. */
- *tl++ = txdr_unsigned(openmode);
+ *tl++ = txdr_unsigned(openmode | NFSV4OPEN_WANTNODELEG);
*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
*tl++ = 0; /* ClientID, ignored. */
*tl = 0;
@@ -1668,6 +1689,13 @@ nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
ndp->nfsdl_stateid.other[0] = *tl++;
ndp->nfsdl_stateid.other[1] = *tl++;
ndp->nfsdl_stateid.other[2] = *tl++;
+ } else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+ NFSHASNFSV4N(nmp)) {
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ deleg = fxdr_unsigned(uint32_t, *tl);
+ if (deleg == NFSV4OPEN_CONTENTION ||
+ deleg == NFSV4OPEN_RESOURCE)
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
} else if (deleg != NFSV4OPEN_DELEGATENONE) {
error = NFSERR_BADXDR;
goto nfsmout;
@@ -2396,7 +2424,7 @@ nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
*tl = vtonfsv34_type(vtyp);
}
if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
if ((nd->nd_flag & ND_NFSV3) &&
(vtyp == VCHR || vtyp == VBLK)) {
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
@@ -2484,7 +2512,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
*/
if (dp != NULL)
(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
- (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
+ (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, dp);
nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
@@ -2595,8 +2623,17 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
*/
NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(owp->nfsow_seqid);
- *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
- NFSV4OPEN_ACCESSREAD);
+ if (NFSHASNFSV4N(nmp)) {
+ if (!NFSHASPNFS(nmp) && nfscl_enablecallb != 0 &&
+ nfs_numnfscbd > 0)
+ *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+ NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTWRITEDELEG);
+ else
+ *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+ NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTNODELEG);
+ } else
+ *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+ NFSV4OPEN_ACCESSREAD);
*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
tsep = nfsmnt_mdssession(nmp);
*tl++ = tsep->nfsess_clientid.lval[0];
@@ -2609,14 +2646,16 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
if (NFSHASSESSPERSIST(nmp)) {
/* Use GUARDED for persistent sessions. */
*tl = txdr_unsigned(NFSCREATE_GUARDED);
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE,
+ 0);
} else {
/* Otherwise, use EXCLUSIVE4_1. */
*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
*tl++ = cverf.lval[0];
*tl = cverf.lval[1];
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE,
+ 0);
}
} else {
/* NFSv4.0 */
@@ -2627,7 +2666,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
}
} else {
*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
}
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
@@ -2714,6 +2753,13 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
&ret, &acesize, p);
if (error)
goto nfsmout;
+ } else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+ NFSHASNFSV4N(nmp)) {
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ deleg = fxdr_unsigned(uint32_t, *tl);
+ if (deleg == NFSV4OPEN_CONTENTION ||
+ deleg == NFSV4OPEN_RESOURCE)
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
} else if (deleg != NFSV4OPEN_DELEGATENONE) {
error = NFSERR_BADXDR;
goto nfsmout;
@@ -2813,22 +2859,28 @@ nfsmout:
* Nfs remove rpc
*/
int
-nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
- struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
+nfsrpc_remove(struct vnode *dvp, char *name, int namelen, struct vnode *vp,
+ struct nfsvattr *nap, int *attrflagp, nfsremove_status *file_status,
+ struct nfsvattr *dnap, int *dattrflagp, struct ucred *cred, NFSPROC_T *p)
{
- u_int32_t *tl;
+ uint32_t *tl;
struct nfsrv_descript nfsd, *nd = &nfsd;
struct nfsnode *np;
struct nfsmount *nmp;
nfsv4stateid_t dstateid;
- int error, ret = 0, i;
+ nfsattrbit_t attrbits;
+ int error, i, ret;
*dattrflagp = 0;
+ *attrflagp = 0;
+ *file_status = UNKNOWN;
+ ret = 0;
if (namelen > NFS_MAXNAMLEN)
return (ENAMETOOLONG);
nmp = VFSTONFS(dvp->v_mount);
tryagain:
- if (NFSHASNFSV4(nmp) && ret == 0) {
+ if (NFSHASNFSV4(nmp) && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+ !NFSHASNFSV4N(nmp)) && ret == 0) {
ret = nfscl_removedeleg(vp, p, &dstateid);
if (ret == 1) {
NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
@@ -2853,9 +2905,19 @@ tryagain:
}
if (ret == 0)
NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
- (void) nfsm_strtom(nd, name, namelen);
+ (void)nfsm_strtom(nd, name, namelen);
+ if (ret == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(NFSV4OP_PUTFH);
+ np = VTONFS(vp);
+ (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ NFSGETATTR_ATTRBIT(&attrbits);
+ *tl = txdr_unsigned(NFSV4OP_GETATTR);
+ (void)nfsrv_putattrbit(nd, &attrbits);
+ }
error = nfscl_request(nd, dvp, p, cred);
- if (error)
+ if (error != 0)
return (error);
if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
/* For NFSv4, parse out any Delereturn replies. */
@@ -2878,7 +2940,41 @@ tryagain:
}
error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
}
- if (nd->nd_repstat && !error)
+ if (ret == 0 && (nd->nd_flag & (ND_NFSV4 |
+ ND_NOMOREDATA)) == ND_NFSV4) {
+ /* Parse out the Remove reply for NFSPROC_REMOVE. */
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + 2 * NFSX_HYPER);
+ /* No use for change info for now. */
+ /* The Remove succeeded. */
+ nd->nd_repstat = 0;
+ }
+ if (ret == 0 && (nd->nd_flag & (ND_NFSV4 |
+ ND_NOMOREDATA)) == ND_NFSV4) {
+ /* Parse out the PutFH, Getattr for NFSPROC_REMOVE. */
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ if (*(tl + 1) != 0) {
+ i = fxdr_unsigned(int, *(tl + 1));
+ if (i == NFSERR_STALE)
+ *file_status = DELETED;
+ } else {
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ if (*(tl + 1) != 0) {
+ i = fxdr_unsigned(int, *(tl + 1));
+ if (i == NFSERR_STALE)
+ *file_status = DELETED;
+ } else {
+ error = nfsm_loadattr(nd, nap);
+ if (error == 0) {
+ *attrflagp = 1;
+ if (nap->na_nlink == 0)
+ *file_status = NLINK_ZERO;
+ else
+ *file_status = VALID;
+ }
+ }
+ }
+ }
+ if (nd->nd_repstat != 0 && error == 0)
error = nd->nd_repstat;
nfsmout:
m_freem(nd->nd_mrep);
@@ -2889,12 +2985,14 @@ nfsmout:
* Do an nfs rename rpc.
*/
int
-nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
- vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
- NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
- int *fattrflagp, int *tattrflagp)
+nfsrpc_rename(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
+ int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
+ int tnamelen, nfsremove_status *tvp_status, struct nfsvattr *fnap,
+ struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp,
+ struct nfsvattr *tvpnap, int *tvpattrflagp, struct ucred *cred,
+ NFSPROC_T *p)
{
- u_int32_t *tl;
+ uint32_t *tl;
struct nfsrv_descript nfsd, *nd = &nfsd;
struct nfsmount *nmp;
struct nfsnode *np;
@@ -2904,11 +3002,14 @@ nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
*fattrflagp = 0;
*tattrflagp = 0;
+ *tvpattrflagp = 0;
+ *tvp_status = UNKNOWN;
nmp = VFSTONFS(fdvp->v_mount);
if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
return (ENAMETOOLONG);
tryagain:
- if (NFSHASNFSV4(nmp) && ret == 0) {
+ if (NFSHASNFSV4(nmp) && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+ !NFSHASNFSV4N(nmp)) && ret == 0) {
ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
&tdstateid, &gottd, p);
if (gotfd && gottd) {
@@ -2961,29 +3062,44 @@ tryagain:
}
if (ret == 0)
NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
- if (nd->nd_flag & ND_NFSV4) {
+ if ((nd->nd_flag & ND_NFSV4) != 0) {
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OP_GETATTR);
NFSWCCATTR_ATTRBIT(&attrbits);
- (void) nfsrv_putattrbit(nd, &attrbits);
+ (void)nfsrv_putattrbit(nd, &attrbits);
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OP_PUTFH);
(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
VTONFS(tdvp)->n_fhp->nfh_len, 0);
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OP_GETATTR);
- (void) nfsrv_putattrbit(nd, &attrbits);
+ (void)nfsrv_putattrbit(nd, &attrbits);
nd->nd_flag |= ND_V4WCCATTR;
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OP_RENAME);
}
- (void) nfsm_strtom(nd, fnameptr, fnamelen);
- if (!(nd->nd_flag & ND_NFSV4))
+ (void)nfsm_strtom(nd, fnameptr, fnamelen);
+ if ((nd->nd_flag & ND_NFSV4) == 0)
(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
VTONFS(tdvp)->n_fhp->nfh_len, 0);
- (void) nfsm_strtom(nd, tnameptr, tnamelen);
+ (void)nfsm_strtom(nd, tnameptr, tnamelen);
+ if (ret == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ /* When tvp == NULL, it doesn't matter which dvp is used. */
+ *tl = txdr_unsigned(NFSV4OP_PUTFH);
+ if (tvp != NULL)
+ (void)nfsm_fhtom(nmp, nd, VTONFS(tvp)->n_fhp->nfh_fh,
+ VTONFS(tvp)->n_fhp->nfh_len, 0);
+ else
+ (void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
+ VTONFS(tdvp)->n_fhp->nfh_len, 0);
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(NFSV4OP_GETATTR);
+ NFSGETATTR_ATTRBIT(&attrbits);
+ (void)nfsrv_putattrbit(nd, &attrbits);
+ }
error = nfscl_request(nd, fdvp, p, cred);
- if (error)
+ if (error != 0)
return (error);
if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
/* For NFSv4, parse out any Delereturn replies. */
@@ -2999,7 +3115,7 @@ tryagain:
for (i = 0; i < (ret * 2); i++) {
if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
ND_NFSV4) {
- NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
if (*(tl + 1)) {
if (i == 1 && ret > 1) {
/*
@@ -3019,23 +3135,57 @@ tryagain:
}
/* Now, the first wcc attribute reply. */
if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
- NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
if (*(tl + 1))
nd->nd_flag |= ND_NOMOREDATA;
}
error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
/* and the second wcc attribute reply. */
if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
- !error) {
- NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+ error == 0) {
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
if (*(tl + 1))
nd->nd_flag |= ND_NOMOREDATA;
}
- if (!error)
+ if (error == 0)
error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
NULL, NULL);
}
- if (nd->nd_repstat && !error)
+ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
+ ret == 0 && error == 0) {
+ /* Parse out the rename successful reply. */
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED +
+ 4 * NFSX_HYPER);
+ nd->nd_repstat = 0; /* Rename succeeded. */
+ /* Parse PutFH reply for tvp. */
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ if (*(tl + 1) != 0) {
+ if (tvp != NULL) {
+ i = fxdr_unsigned(int, *(tl + 1));
+ if (i == NFSERR_STALE)
+ *tvp_status = DELETED;
+ }
+ } else {
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ if (*(tl + 1) != 0) {
+ if (tvp != NULL) {
+ i = fxdr_unsigned(int, *(tl + 1));
+ if (i == NFSERR_STALE)
+ *tvp_status = DELETED;
+ }
+ } else {
+ error = nfsm_loadattr(nd, tvpnap);
+ if (error == 0 && tvp != NULL) {
+ *tvpattrflagp = 1;
+ if (tvpnap->na_nlink == 0)
+ *tvp_status = NLINK_ZERO;
+ else
+ *tvp_status = VALID;
+ }
+ }
+ }
+ }
+ if (nd->nd_repstat != 0 && error == 0)
error = nd->nd_repstat;
nfsmout:
m_freem(nd->nd_mrep);
@@ -3068,14 +3218,19 @@ nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
VTONFS(dvp)->n_fhp->nfh_len, 0);
if (nd->nd_flag & ND_NFSV4) {
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
- *tl = txdr_unsigned(NFSV4OP_GETATTR);
- NFSWCCATTR_ATTRBIT(&attrbits);
- (void) nfsrv_putattrbit(nd, &attrbits);
- nd->nd_flag |= ND_V4WCCATTR;
- NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OP_LINK);
}
(void) nfsm_strtom(nd, name, namelen);
+ if (nd->nd_flag & ND_NFSV4) {
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(NFSV4OP_GETATTR);
+ NFSGETATTR_ATTRBIT(&attrbits);
+ (void)nfsrv_putattrbit(nd, &attrbits);
+ NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
+ *tl = txdr_unsigned(NFSV4OP_GETATTR);
+ (void)nfsrv_putattrbit(nd, &attrbits);
+ }
error = nfscl_request(nd, vp, p, cred);
if (error)
return (error);
@@ -3084,19 +3239,28 @@ nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
if (!error)
error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
NULL, NULL);
- } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
+ } else if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
/*
- * First, parse out the PutFH and Getattr result.
+ * First and parse out the PutFH and Link results.
*/
- NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
- if (!(*(tl + 1)))
- NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
- if (*(tl + 1))
+ NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED +
+ 2 * NFSX_HYPER);
+ if (*(tl + 3))
nd->nd_flag |= ND_NOMOREDATA;
/*
- * Get the pre-op attributes.
+ * Get the directory post-op attributes.
*/
- error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
+ if ((nd->nd_flag & ND_NOMOREDATA) == 0)
+ error = nfscl_postop_attr(nd, dnap, dattrflagp);
+ if (error == 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
+ /* Get rid of the RestoreFH reply. */
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ if (*(tl + 1))
+ nd->nd_flag |= ND_NOMOREDATA;
+ }
+ /* Get the file's post-op attributes. */
+ if (error == 0 && (nd->nd_flag & ND_NOMOREDATA) == 0)
+ error = nfscl_postop_attr(nd, nap, attrflagp);
}
if (nd->nd_repstat && !error)
error = nd->nd_repstat;
@@ -3195,7 +3359,7 @@ nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
*tl = txdr_unsigned(NFDIR);
}
(void) nfsm_strtom(nd, name, namelen);
- nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1 | NFSSATTR_NEWFILE, 0);
if (nd->nd_flag & ND_NFSV4) {
NFSGETATTR_ATTRBIT(&attrbits);
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
@@ -3280,6 +3444,31 @@ nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
}
/*
+ * Check to make sure the file name in a Readdir reply is valid.
+ */
+static bool
+nfscl_invalidfname(bool is_v4, char *name, int len)
+{
+ int i;
+ char *cp;
+
+ if (is_v4 && ((len == 1 && name[0] == '.') ||
+ (len == 2 && name[0] == '.' && name[1] == '.'))) {
+ printf("Readdir NFSv4 reply has dot or dotdot in it\n");
+ return (true);
+ }
+ cp = name;
+ for (i = 0; i < len; i++, cp++) {
+ if (*cp == '/' || *cp == '\0') {
+ printf("Readdir reply file name had imbedded / or nul"
+ " byte\n");
+ return (true);
+ }
+ }
+ return (false);
+}
+
+/*
* Readdir rpc.
* Always returns with either uio_resid unchanged, if you are at the
* end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
@@ -3327,10 +3516,13 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
nfsattrbit_t attrbits, dattrbits;
u_int32_t rderr, *tl2 = NULL;
size_t tresid;
+ bool validentry;
KASSERT(uiop->uio_iovcnt == 1 &&
(uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
("nfs readdirrpc bad uio"));
+ KASSERT(uiop->uio_segflg == UIO_SYSSPACE,
+ ("nfsrpc_readdir: uio userspace"));
ncookie.lval[0] = ncookie.lval[1] = 0;
/*
* There is no point in reading a lot more than uio_resid, however
@@ -3405,7 +3597,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
nfsva.na_mntonfileno = UINT64_MAX;
error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, NULL, p, cred);
+ NULL, NULL, NULL, NULL, p, cred);
if (error) {
dotdotfileid = dotfileid;
} else if (gotmnton) {
@@ -3550,6 +3742,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
/* loop through the dir entries, doctoring them to 4bsd form */
while (more_dirs && bigenough) {
+ validentry = true;
if (nd->nd_flag & ND_NFSV4) {
NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
ncookie.lval[0] = *tl++;
@@ -3588,6 +3781,17 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
uiop->uio_resid)
bigenough = 0;
if (bigenough) {
+ struct iovec saviov;
+ off_t savoff;
+ ssize_t savresid;
+ int savblksiz;
+
+ saviov.iov_base = uiop->uio_iov->iov_base;
+ saviov.iov_len = uiop->uio_iov->iov_len;
+ savoff = uiop->uio_offset;
+ savresid = uiop->uio_resid;
+ savblksiz = blksiz;
+
dp = (struct dirent *)uiop->uio_iov->iov_base;
dp->d_pad0 = dp->d_pad1 = 0;
dp->d_off = 0;
@@ -3603,20 +3807,36 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
uiop->uio_iov->iov_base =
(char *)uiop->uio_iov->iov_base + DIRHDSIZ;
uiop->uio_iov->iov_len -= DIRHDSIZ;
+ cp = uiop->uio_iov->iov_base;
error = nfsm_mbufuio(nd, uiop, len);
if (error)
goto nfsmout;
- cp = uiop->uio_iov->iov_base;
- tlen -= len;
- NFSBZERO(cp, tlen);
- cp += tlen; /* points to cookie storage */
- tl2 = (u_int32_t *)cp;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + tlen +
- NFSX_HYPER;
- uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
- uiop->uio_resid -= tlen + NFSX_HYPER;
- uiop->uio_offset += (tlen + NFSX_HYPER);
+ /* Check for an invalid file name. */
+ if (nfscl_invalidfname(
+ (nd->nd_flag & ND_NFSV4) != 0, cp, len)) {
+ /* Skip over this entry. */
+ uiop->uio_iov->iov_base =
+ saviov.iov_base;
+ uiop->uio_iov->iov_len =
+ saviov.iov_len;
+ uiop->uio_offset = savoff;
+ uiop->uio_resid = savresid;
+ blksiz = savblksiz;
+ validentry = false;
+ } else {
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ NFSBZERO(cp, tlen);
+ cp += tlen; /* points to cookie store */
+ tl2 = (u_int32_t *)cp;
+ uiop->uio_iov->iov_base =
+ (char *)uiop->uio_iov->iov_base +
+ tlen + NFSX_HYPER;
+ uiop->uio_iov->iov_len -= tlen +
+ NFSX_HYPER;
+ uiop->uio_resid -= tlen + NFSX_HYPER;
+ uiop->uio_offset += (tlen + NFSX_HYPER);
+ }
} else {
error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
if (error)
@@ -3627,7 +3847,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
nfsva.na_mntonfileno = UINT64_MAX;
error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, &rderr, p, cred);
+ NULL, NULL, &rderr, NULL, p, cred);
if (error)
goto nfsmout;
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
@@ -3640,7 +3860,7 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
ncookie.lval[0] = 0;
ncookie.lval[1] = *tl++;
}
- if (bigenough) {
+ if (bigenough && validentry) {
if (nd->nd_flag & ND_NFSV4) {
if (rderr) {
dp->d_fileno = 0;
@@ -3777,11 +3997,16 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
size_t tresid;
u_int32_t *tl2 = NULL, rderr;
struct timespec dctime, ts;
- bool attr_ok;
+ bool attr_ok, named_dir, validentry;
KASSERT(uiop->uio_iovcnt == 1 &&
(uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
("nfs readdirplusrpc bad uio"));
+ KASSERT(uiop->uio_segflg == UIO_SYSSPACE,
+ ("nfsrpc_readdirplus: uio userspace"));
+ named_dir = false;
+ if ((vp->v_irflag & VIRF_NAMEDDIR) != 0)
+ named_dir = true;
ncookie.lval[0] = ncookie.lval[1] = 0;
timespecclear(&dctime);
*attrflagp = 0;
@@ -3847,7 +4072,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
nfsva.na_mntonfileno = UINT64_MAX;
error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, NULL, p, cred);
+ NULL, NULL, NULL, NULL, p, cred);
if (error) {
dotdotfileid = dotfileid;
} else if (gotmnton) {
@@ -3933,6 +4158,13 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
NFSATTRBIT_TIMECREATE))
NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
+ if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
+ NFSATTRBIT_HIDDEN) ||
+ !NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
+ NFSATTRBIT_SYSTEM)) {
+ NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN);
+ NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM);
+ }
}
/*
@@ -3986,6 +4218,7 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
/* loop through the dir entries, doctoring them to 4bsd form */
while (more_dirs && bigenough) {
+ validentry = true;
NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
if (nd->nd_flag & ND_NFSV4) {
ncookie.lval[0] = *tl++;
@@ -4017,6 +4250,17 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
uiop->uio_resid)
bigenough = 0;
if (bigenough) {
+ struct iovec saviov;
+ off_t savoff;
+ ssize_t savresid;
+ int savblksiz;
+
+ saviov.iov_base = uiop->uio_iov->iov_base;
+ saviov.iov_len = uiop->uio_iov->iov_len;
+ savoff = uiop->uio_offset;
+ savresid = uiop->uio_resid;
+ savblksiz = blksiz;
+
dp = (struct dirent *)uiop->uio_iov->iov_base;
dp->d_pad0 = dp->d_pad1 = 0;
dp->d_off = 0;
@@ -4035,25 +4279,42 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
cnp->cn_nameptr = uiop->uio_iov->iov_base;
cnp->cn_namelen = len;
NFSCNHASHZERO(cnp);
+ cp = uiop->uio_iov->iov_base;
error = nfsm_mbufuio(nd, uiop, len);
if (error)
goto nfsmout;
- cp = uiop->uio_iov->iov_base;
- tlen -= len;
- NFSBZERO(cp, tlen);
- cp += tlen; /* points to cookie storage */
- tl2 = (u_int32_t *)cp;
- if (len == 2 && cnp->cn_nameptr[0] == '.' &&
- cnp->cn_nameptr[1] == '.')
- isdotdot = 1;
- else
- isdotdot = 0;
- uiop->uio_iov->iov_base =
- (char *)uiop->uio_iov->iov_base + tlen +
- NFSX_HYPER;
- uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
- uiop->uio_resid -= tlen + NFSX_HYPER;
- uiop->uio_offset += (tlen + NFSX_HYPER);
+ /* Check for an invalid file name. */
+ if (nfscl_invalidfname(
+ (nd->nd_flag & ND_NFSV4) != 0, cp, len)) {
+ /* Skip over this entry. */
+ uiop->uio_iov->iov_base =
+ saviov.iov_base;
+ uiop->uio_iov->iov_len =
+ saviov.iov_len;
+ uiop->uio_offset = savoff;
+ uiop->uio_resid = savresid;
+ blksiz = savblksiz;
+ validentry = false;
+ } else {
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ NFSBZERO(cp, tlen);
+ cp += tlen; /* points to cookie store */
+ tl2 = (u_int32_t *)cp;
+ if (len == 2 &&
+ cnp->cn_nameptr[0] == '.' &&
+ cnp->cn_nameptr[1] == '.')
+ isdotdot = 1;
+ else
+ isdotdot = 0;
+ uiop->uio_iov->iov_base =
+ (char *)uiop->uio_iov->iov_base +
+ tlen + NFSX_HYPER;
+ uiop->uio_iov->iov_len -= tlen +
+ NFSX_HYPER;
+ uiop->uio_resid -= tlen + NFSX_HYPER;
+ uiop->uio_offset += (tlen + NFSX_HYPER);
+ }
} else {
error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
if (error)
@@ -4085,12 +4346,12 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
nfsva.na_mntonfileno = 0xffffffff;
error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, &rderr, p, cred);
+ NULL, NULL, &rderr, NULL, p, cred);
if (error)
goto nfsmout;
}
- if (bigenough) {
+ if (bigenough && validentry) {
if (nd->nd_flag & ND_NFSV4) {
if (rderr) {
dp->d_fileno = 0;
@@ -4190,7 +4451,8 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
if (cnp->cn_namelen <= NCHNAMLEN &&
ndp->ni_dvp != ndp->ni_vp &&
(newvp->v_type != VDIR ||
- dctime.tv_sec != 0)) {
+ dctime.tv_sec != 0) &&
+ !named_dir) {
cache_enter_time_flags(ndp->ni_dvp,
ndp->ni_vp, cnp,
&nfsva.na_ctime,
@@ -4747,7 +5009,7 @@ nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
if (nd->nd_repstat == 0) {
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
- p, cred);
+ NULL, p, cred);
if (!error) {
nmp->nm_fsid[0] = nap->na_filesid[0];
nmp->nm_fsid[1] = nap->na_filesid[1];
@@ -4800,7 +5062,7 @@ nfsmout:
* nfs pathconf rpc
*/
int
-nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
+nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, bool *has_namedattrp,
struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
{
struct nfsrv_descript nfsd, *nd = &nfsd;
@@ -4810,6 +5072,7 @@ nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
int error;
struct nfsnode *np;
+ *has_namedattrp = false;
*attrflagp = 0;
nmp = VFSTONFS(vp->v_mount);
if (NFSHASNFSV4(nmp)) {
@@ -4836,8 +5099,8 @@ nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
return (error);
if (nd->nd_repstat == 0) {
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
- pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
- cred);
+ pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
+ has_namedattrp, p, cred);
if (!error)
*attrflagp = 1;
} else {
@@ -5132,7 +5395,7 @@ nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
return (error);
if (!nd->nd_repstat)
error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
- NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
+ NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, NULL, p, cred);
else
error = nd->nd_repstat;
m_freem(nd->nd_mrep);
@@ -5173,7 +5436,8 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
NFSZERO_ATTRBIT(&attrbits);
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
- &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
+ &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL, false, false,
+ false);
error = nfscl_request(nd, vp, p, cred);
if (error)
return (error);
@@ -8109,7 +8373,8 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
0, 0, cred);
NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
- *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
+ *tl++ = txdr_unsigned(mode & (NFSV4OPEN_ACCESSBOTH |
+ NFSV4OPEN_WANTDELEGMASK));
*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
tsep = nfsmnt_mdssession(nmp);
*tl++ = tsep->nfsess_clientid.lval[0];
@@ -8210,6 +8475,13 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
&ret, &acesize, p);
if (error != 0)
goto nfsmout;
+ } else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+ NFSHASNFSV4N(nmp)) {
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ deleg = fxdr_unsigned(uint32_t, *tl);
+ if (deleg == NFSV4OPEN_CONTENTION ||
+ deleg == NFSV4OPEN_RESOURCE)
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
} else if (deleg != NFSV4OPEN_DELEGATENONE) {
error = NFSERR_BADXDR;
goto nfsmout;
@@ -8224,7 +8496,7 @@ nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
if (*++tl == 0) {
error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
- NULL, NULL, NULL, p, cred);
+ NULL, NULL, NULL, NULL, p, cred);
if (error != 0)
goto nfsmout;
if (ndp != NULL) {
@@ -8301,8 +8573,17 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
*/
NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(owp->nfsow_seqid);
- *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
- NFSV4OPEN_ACCESSREAD);
+ if (NFSHASNFSV4N(nmp)) {
+ if (!NFSHASPNFS(nmp) && nfscl_enablecallb != 0 &&
+ nfs_numnfscbd > 0)
+ *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+ NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTWRITEDELEG);
+ else
+ *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+ NFSV4OPEN_ACCESSREAD | NFSV4OPEN_WANTNODELEG);
+ } else
+ *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
+ NFSV4OPEN_ACCESSREAD);
*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
tsep = nfsmnt_mdssession(nmp);
*tl++ = tsep->nfsess_clientid.lval[0];
@@ -8314,18 +8595,18 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
if (NFSHASSESSPERSIST(nmp)) {
/* Use GUARDED for persistent sessions. */
*tl = txdr_unsigned(NFSCREATE_GUARDED);
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
} else {
/* Otherwise, use EXCLUSIVE4_1. */
*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
*tl++ = cverf.lval[0];
*tl = cverf.lval[1];
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
}
} else {
*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
- nfscl_fillsattr(nd, vap, dvp, 0, 0);
+ nfscl_fillsattr(nd, vap, dvp, NFSSATTR_NEWFILE, 0);
}
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
@@ -8421,6 +8702,13 @@ nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
&ret, &acesize, p);
if (error != 0)
goto nfsmout;
+ } else if (deleg == NFSV4OPEN_DELEGATENONEEXT &&
+ NFSHASNFSV4N(nmp)) {
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ deleg = fxdr_unsigned(uint32_t, *tl);
+ if (deleg == NFSV4OPEN_CONTENTION ||
+ deleg == NFSV4OPEN_RESOURCE)
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
} else if (deleg != NFSV4OPEN_DELEGATENONE) {
error = NFSERR_BADXDR;
goto nfsmout;
@@ -9258,7 +9546,7 @@ nfsm_split(struct mbuf *mp, uint64_t xfer)
if (pgno == m->m_epg_npgs)
panic("nfsm_split: eroneous ext_pgs mbuf");
- m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
+ m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs, 0);
m2->m_epg_flags |= EPG_FLAG_ANON;
/*
@@ -9381,6 +9669,50 @@ nfsmout:
}
/*
+ * nfs opeattr rpc
+ */
+int
+nfsrpc_openattr(struct nfsmount *nmp, struct vnode *vp, uint8_t *fhp, int fhlen,
+ bool createit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
+ struct nfsfh **nfhpp, int *attrflagp)
+{
+ uint32_t *tl;
+ struct nfsrv_descript nfsd, *nd = &nfsd;
+ nfsattrbit_t attrbits;
+ int error = 0;
+
+ *attrflagp = 0;
+ nfscl_reqstart(nd, NFSPROC_OPENATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
+ cred);
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ if (createit)
+ *tl = newnfs_true;
+ else
+ *tl = newnfs_false;
+ NFSGETATTR_ATTRBIT(&attrbits);
+ NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(NFSV4OP_GETFH);
+ *tl = txdr_unsigned(NFSV4OP_GETATTR);
+ (void)nfsrv_putattrbit(nd, &attrbits);
+ error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
+ NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+ if (error != 0)
+ return (error);
+ if (nd->nd_repstat == 0) {
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ error = nfsm_getfh(nd, nfhpp);
+ if (error != 0)
+ goto nfsmout;
+ error = nfscl_postop_attr(nd, nap, attrflagp);
+ }
+nfsmout:
+ m_freem(nd->nd_mrep);
+ if (error == 0 && nd->nd_repstat != 0)
+ error = nd->nd_repstat;
+ return (error);
+}
+
+/*
* Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
* locked vnode.
*/
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 9fbaa6e63a56..99a781640c53 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -93,11 +93,7 @@ NFSREQSPINLOCK;
NFSCLSTATEMUTEX;
int nfscl_inited = 0;
struct nfsclhead nfsclhead; /* Head of clientid list */
-int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
-int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
-static int nfscl_delegcnt = 0;
-static int nfscl_layoutcnt = 0;
static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
struct nfscllockowner **, struct nfsclopen **);
@@ -433,25 +429,13 @@ nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
*/
int
nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
- int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
+ int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg *dp)
{
- struct nfscldeleg *dp = *dpp, *tdp;
+ struct nfscldeleg *tdp;
struct nfsmount *nmp;
KASSERT(mp != NULL, ("nfscl_deleg: mp NULL"));
nmp = VFSTONFS(mp);
- /*
- * First, if we have received a Read delegation for a file on a
- * read/write file system, just return it, because they aren't
- * useful, imho.
- */
- if (dp != NULL && !NFSMNT_RDONLY(mp) &&
- (dp->nfsdl_flags & NFSCLDL_READ)) {
- nfscl_trydelegreturn(dp, cred, nmp, p);
- free(dp, M_NFSCLDELEG);
- *dpp = NULL;
- return (0);
- }
/*
* Since a delegation might be added to the mount,
@@ -470,26 +454,40 @@ nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
NFSUNLOCKCLSTATE();
return (NFSERR_BADSTATEID);
}
- *dpp = NULL;
TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
nfsdl_hash);
dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
nfsstatsv1.cldelegates++;
- nfscl_delegcnt++;
+ clp->nfsc_delegcnt++;
} else {
/*
- * Delegation already exists, what do we do if a new one??
+ * A delegation already exists. If the new one is a Write
+ * delegation and the old one a Read delegation, return the
+ * Read delegation. Otherwise, return the new delegation.
*/
if (dp != NULL) {
- printf("Deleg already exists!\n");
- free(dp, M_NFSCLDELEG);
- *dpp = NULL;
+ if ((dp->nfsdl_flags & NFSCLDL_WRITE) != 0 &&
+ (tdp->nfsdl_flags & NFSCLDL_READ) != 0) {
+ TAILQ_REMOVE(&clp->nfsc_deleg, tdp, nfsdl_list);
+ LIST_REMOVE(tdp, nfsdl_hash);
+ TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
+ nfsdl_list);
+ LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp,
+ fhlen), dp, nfsdl_hash);
+ dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
+ } else {
+ tdp = dp; /* Return this one. */
+ }
} else {
- *dpp = tdp;
+ tdp = NULL;
}
}
NFSUNLOCKCLSTATE();
+ if (tdp != NULL) {
+ nfscl_trydelegreturn(tdp, cred, nmp, p);
+ free(tdp, M_NFSCLDELEG);
+ }
return (0);
}
@@ -918,6 +916,10 @@ nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
LIST_INIT(&clp->nfsc_layouthash[i]);
clp->nfsc_flags = NFSCLFLAGS_INITED;
+ clp->nfsc_delegcnt = 0;
+ clp->nfsc_deleghighwater = NFSCLDELEGHIGHWATER;
+ clp->nfsc_layoutcnt = 0;
+ clp->nfsc_layouthighwater = NFSCLLAYOUTHIGHWATER;
clp->nfsc_clientidrev = 1;
clp->nfsc_cbident = nfscl_nextcbident();
nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
@@ -1632,7 +1634,7 @@ nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
}
if (dp != NULL)
nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
- op->nfso_fhlen, cred, p, &dp);
+ op->nfso_fhlen, cred, p, dp);
}
/*
@@ -1750,10 +1752,10 @@ nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
TAILQ_REMOVE(hdp, dp, nfsdl_list);
LIST_REMOVE(dp, nfsdl_hash);
+ dp->nfsdl_clp->nfsc_delegcnt--;
if (freeit)
free(dp, M_NFSCLDELEG);
nfsstatsv1.cldelegates--;
- nfscl_delegcnt--;
}
/*
@@ -2863,7 +2865,7 @@ tryagain:
nfsdl_list);
LIST_REMOVE(dp, nfsdl_hash);
TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
- nfscl_delegcnt--;
+ clp->nfsc_delegcnt--;
nfsstatsv1.cldelegates--;
}
NFSLOCKCLSTATE();
@@ -2893,7 +2895,8 @@ tryagain:
* The tailq list is in LRU order.
*/
dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
- while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
+ while (clp->nfsc_delegcnt > clp->nfsc_deleghighwater &&
+ dp != NULL) {
ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
dp->nfsdl_rwlock.nfslock_lock == 0 &&
@@ -2920,7 +2923,7 @@ tryagain:
TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
LIST_REMOVE(dp, nfsdl_hash);
TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
- nfscl_delegcnt--;
+ clp->nfsc_delegcnt--;
nfsstatsv1.cldelegates--;
}
}
@@ -2976,13 +2979,14 @@ tryagain2:
lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
while (lyp != NULL) {
nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
- if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
+ if ((lyp->nfsly_timestamp < NFSD_MONOSEC ||
+ clp->nfsc_layoutcnt > clp->nfsc_layouthighwater) &&
(lyp->nfsly_flags & (NFSLY_RECALL |
NFSLY_RETONCLOSE)) == 0 &&
lyp->nfsly_lock.nfslock_usecnt == 0 &&
lyp->nfsly_lock.nfslock_lock == 0) {
NFSCL_DEBUG(4, "ret stale lay=%d\n",
- nfscl_layoutcnt);
+ clp->nfsc_layoutcnt);
recallp = malloc(sizeof(*recallp),
M_NFSLAYRECALL, M_NOWAIT);
if (recallp == NULL)
@@ -3504,7 +3508,7 @@ nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p,
* Return any delegation for this vp.
*/
void
-nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
+nfscl_delegreturnvp(struct vnode *vp, bool retdeleg, NFSPROC_T *p)
{
struct nfsclclient *clp;
struct nfscldeleg *dp;
@@ -3527,12 +3531,15 @@ nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
if (clp != NULL)
dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
np->n_fhp->nfh_len);
- if (dp != NULL) {
+ if (dp != NULL &&
+ (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0) {
nfscl_cleandeleg(dp);
nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
NFSUNLOCKCLSTATE();
- newnfs_copycred(&dp->nfsdl_cred, cred);
- nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
+ if (retdeleg) {
+ newnfs_copycred(&dp->nfsdl_cred, cred);
+ nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
+ }
free(dp, M_NFSCLDELEG);
} else
NFSUNLOCKCLSTATE();
@@ -3694,7 +3701,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
if (!error)
(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
- (uint64_t)0, NULL);
+ (uint64_t)0, NULL, false, false, false);
break;
case NFSV4OP_CBRECALL:
NFSCL_DEBUG(4, "cbrecall\n");
@@ -3712,18 +3719,10 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
clp = nfscl_getclnt(cbident);
else
clp = nfscl_getclntsess(sessionid);
- if (clp != NULL) {
- dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
- nfhp->nfh_len);
- if (dp != NULL && (dp->nfsdl_flags &
- NFSCLDL_DELEGRET) == 0) {
- dp->nfsdl_flags |=
- NFSCLDL_RECALL;
- wakeup((caddr_t)clp);
- }
- } else {
+ if (clp != NULL)
+ nfscl_startdelegrecall(clp, nfhp);
+ else
error = NFSERR_SERVERFAULT;
- }
NFSUNLOCKCLSTATE();
}
if (nfhp != NULL)
@@ -3933,6 +3932,77 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
}
break;
+ case NFSV4OP_CBRECALLSLOT:
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ highslot = fxdr_unsigned(uint32_t, *tl);
+ NFSLOCKCLSTATE();
+ clp = nfscl_getclntsess(sessionid);
+ if (clp == NULL)
+ error = NFSERR_SERVERFAULT;
+ if (error == 0) {
+ tsep = nfsmnt_mdssession(clp->nfsc_nmp);
+ mtx_lock(&tsep->nfsess_mtx);
+ if ((highslot + 1) < tsep->nfsess_foreslots) {
+ tsep->nfsess_foreslots = (highslot + 1);
+ nfs_resetslots(tsep);
+ }
+ mtx_unlock(&tsep->nfsess_mtx);
+ }
+ NFSUNLOCKCLSTATE();
+ break;
+ case NFSV4OP_CBRECALLANY:
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ i = fxdr_unsigned(int, *tl++);
+ j = fxdr_unsigned(int, *tl);
+ if (i < 0 || j != 1)
+ error = NFSERR_BADXDR;
+ if (error == 0) {
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ j = fxdr_unsigned(int, *tl);
+ if (i < 100)
+ i = 100;
+ else if (i > 100000)
+ i = 100000;
+ NFSLOCKCLSTATE();
+ clp = nfscl_getclntsess(sessionid);
+ if (clp == NULL)
+ error = NFSERR_SERVERFAULT;
+ if (((j & NFSRCA4_RDATA_DLG) != 0 ||
+ (j & NFSRCA4_WDATA_DLG) != 0) &&
+ error == 0 && i <
+ clp->nfsc_deleghighwater)
+ clp->nfsc_deleghighwater = i;
+ if (error == 0 &&
+ ((!NFSHASFLEXFILE(clp->nfsc_nmp) &&
+ (j & NFSRCA4_FILE_LAYOUT) != 0 &&
+ i < clp->nfsc_layouthighwater) ||
+ (NFSHASFLEXFILE(clp->nfsc_nmp) &&
+ (j & (NFSRCA4_FF_LAYOUT_READ |
+ NFSRCA4_FF_LAYOUT_RW)) != 0 &&
+ i < clp->nfsc_layouthighwater)))
+ clp->nfsc_layouthighwater = i;
+ NFSUNLOCKCLSTATE();
+ }
+ break;
+ case NFSV4OP_CBNOTIFY:
+ case NFSV4OP_CBRECALLOBJAVAIL:
+ case NFSV4OP_CBNOTIFYLOCK:
+ /*
+ * These callbacks are not necessarily optional,
+ * so I think it is better to reply NFS_OK than
+ * NFSERR_NOTSUPP.
+ * All provide information for which the FreeBSD client
+ * does not currently have a use.
+ * I am not sure if any of these could be generated
+ * by a NFSv4.1/4.2 server for this client?
+ */
+ error = 0;
+ NFSCL_DEBUG(1, "unsupp callback %d\n", op);
+ break;
+ case NFSV4OP_CBPUSHDELEG:
+ error = NFSERR_REJECTDELEG;
+ NFSCL_DEBUG(1, "unsupp callback %d\n", op);
+ break;
default:
if (i == 0 && minorvers != NFSV4_MINORVERSION)
error = NFSERR_OPNOTINSESS;
@@ -4647,7 +4717,7 @@ nfscl_mustflush(vnode_t vp)
np = VTONFS(vp);
nmp = VFSTONFS(vp->v_mount);
- if (!NFSHASNFSV4(nmp))
+ if (!NFSHASNFSV4(nmp) || vp->v_type != VREG)
return (1);
NFSLOCKMNT(nmp);
if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
@@ -4687,7 +4757,7 @@ nfscl_nodeleg(vnode_t vp, int writedeleg)
np = VTONFS(vp);
nmp = VFSTONFS(vp->v_mount);
- if (!NFSHASNFSV4(nmp))
+ if (!NFSHASNFSV4(nmp) || vp->v_type != VREG)
return (1);
NFSLOCKMNT(nmp);
if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
@@ -5099,7 +5169,7 @@ nfscl_newnode(vnode_t vp)
* to the local clock time.
*/
void
-nfscl_delegmodtime(vnode_t vp)
+nfscl_delegmodtime(struct vnode *vp, struct timespec *mtime)
{
struct nfsclclient *clp;
struct nfscldeleg *dp;
@@ -5123,7 +5193,10 @@ nfscl_delegmodtime(vnode_t vp)
}
dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
- nanotime(&dp->nfsdl_modtime);
+ if (mtime != NULL)
+ dp->nfsdl_modtime = *mtime;
+ else
+ nanotime(&dp->nfsdl_modtime);
dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
}
NFSUNLOCKCLSTATE();
@@ -5266,7 +5339,7 @@ nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
nfsly_hash);
lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
- nfscl_layoutcnt++;
+ clp->nfsc_layoutcnt++;
nfsstatsv1.cllayouts++;
} else {
if (retonclose != 0)
@@ -5641,7 +5714,7 @@ nfscl_freelayout(struct nfscllayout *layp)
LIST_REMOVE(rp, nfsrecly_list);
free(rp, M_NFSLAYRECALL);
}
- nfscl_layoutcnt--;
+ layp->nfsly_clp->nfsc_layoutcnt--;
nfsstatsv1.cllayouts--;
free(layp, M_NFSLAYOUT);
}
@@ -5879,3 +5952,69 @@ tryagain:
NFSUNLOCKCLSTATE();
return (0);
}
+
+/*
+ * Check access against a delegation ace.
+ * Return EINVAL for any case where the check cannot be completed.
+ */
+int
+nfscl_delegacecheck(struct vnode *vp, accmode_t accmode, struct ucred *cred)
+{
+ struct nfsclclient *clp;
+ struct nfscldeleg *dp;
+ struct nfsnode *np;
+ struct nfsmount *nmp;
+ struct acl *aclp;
+ int error;
+
+ np = VTONFS(vp);
+ nmp = VFSTONFS(vp->v_mount);
+ if (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) || vp->v_type != VREG)
+ return (EINVAL);
+ NFSLOCKMNT(nmp);
+ if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
+ NFSUNLOCKMNT(nmp);
+ return (EINVAL);
+ }
+ NFSUNLOCKMNT(nmp);
+ aclp = acl_alloc(M_WAITOK);
+ NFSLOCKCLSTATE();
+ clp = nfscl_findcl(nmp);
+ if (clp == NULL) {
+ NFSUNLOCKCLSTATE();
+ acl_free(aclp);
+ return (EINVAL);
+ }
+ dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
+ if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_RECALL |
+ NFSCLDL_DELEGRET)) == 0) {
+ memcpy(&aclp->acl_entry[0], &dp->nfsdl_ace,
+ sizeof(struct acl_entry));
+ NFSUNLOCKCLSTATE();
+ aclp->acl_cnt = 1;
+ error = vaccess_acl_nfs4(vp->v_type, np->n_vattr.na_uid,
+ np->n_vattr.na_gid, aclp, accmode, cred);
+ acl_free(aclp);
+ if (error == 0 || error == EACCES)
+ return (error);
+ } else {
+ NFSUNLOCKCLSTATE();
+ acl_free(aclp);
+ }
+ return (EINVAL);
+}
+
+/*
+ * Start the recall of a delegation. Called for CB_RECALL and REMOVE
+ * when nlink == 0 after the REMOVE.
+ */
+void nfscl_startdelegrecall(struct nfsclclient *clp, struct nfsfh *nfhp)
+{
+ struct nfscldeleg *dp;
+
+ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
+ if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) {
+ dp->nfsdl_flags |= NFSCLDL_RECALL;
+ wakeup((caddr_t)clp);
+ }
+}
diff --git a/sys/fs/nfsclient/nfs_clsubs.c b/sys/fs/nfsclient/nfs_clsubs.c
index 80ab979d22d7..ae9fa51947cc 100644
--- a/sys/fs/nfsclient/nfs_clsubs.c
+++ b/sys/fs/nfsclient/nfs_clsubs.c
@@ -54,6 +54,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/malloc.h>
+#include <sys/stdarg.h>
#include <sys/syscall.h>
#include <sys/sysproto.h>
#include <sys/taskqueue.h>
@@ -71,12 +72,6 @@
#include <netinet/in.h>
-/*
- * Note that stdarg.h and the ANSI style va_start macro is used for both
- * ANSI and traditional C compilers.
- */
-#include <machine/stdarg.h>
-
extern struct mtx ncl_iod_mutex;
extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
@@ -188,7 +183,7 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper)
np = VTONFS(vp);
vap = &np->n_vattr.na_vattr;
nmp = VFSTONFS(vp->v_mount);
- mustflush = nfscl_mustflush(vp); /* must be before mtx_lock() */
+ mustflush = nfscl_nodeleg(vp, 0); /* must be before mtx_lock() */
NFSLOCKNODE(np);
/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
timeo = (time_second - np->n_mtime.tv_sec) / 10;
@@ -221,8 +216,8 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper)
(time_second - np->n_attrstamp), timeo);
#endif
- if ((time_second - np->n_attrstamp) >= timeo &&
- (mustflush != 0 || np->n_attrstamp == 0)) {
+ if (mustflush != 0 && (np->n_attrstamp == 0 ||
+ time_second - np->n_attrstamp >= timeo)) {
nfsstatsv1.attrcache_misses++;
NFSUNLOCKNODE(np);
KDTRACE_NFS_ATTRCACHE_GET_MISS(vp);
diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c
index c050eef7d4c3..0bd05c03885b 100644
--- a/sys/fs/nfsclient/nfs_clvfsops.c
+++ b/sys/fs/nfsclient/nfs_clvfsops.c
@@ -415,7 +415,7 @@ ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
}
/*
- * Mount a remote root fs via. nfs. This depends on the info in the
+ * Mount a remote root fs via nfs. This depends on the info in the
* nfs_diskless structure that has been filled in properly by some primary
* bootstrap.
* It goes something like this:
@@ -1524,12 +1524,14 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
#endif
NFSCL_DEBUG(3, "in mnt\n");
+ CURVNET_SET(CRED_TO_VNET(cred));
clp = NULL;
if (mp->mnt_flag & MNT_UPDATE) {
nmp = VFSTONFS(mp);
printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
free(nam, M_SONAME);
free(tlscertname, M_NEWNFSMNT);
+ CURVNET_RESTORE();
return (0);
} else {
/* NFS-over-TLS requires that rpctls be functioning. */
@@ -1544,6 +1546,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
if (error != 0) {
free(nam, M_SONAME);
free(tlscertname, M_NEWNFSMNT);
+ CURVNET_RESTORE();
return (error);
}
}
@@ -1798,12 +1801,18 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
if (argp->flags & NFSMNT_NFSV3)
ncl_fsinfo(nmp, *vpp, cred, td);
- /* Mark if the mount point supports NFSv4 ACLs. */
- if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
- ret == 0 &&
- NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
+ /*
+ * Mark if the mount point supports NFSv4 ACLs and
+ * named attributes.
+ */
+ if ((argp->flags & NFSMNT_NFSV4) != 0) {
MNT_ILOCK(mp);
- mp->mnt_flag |= MNT_NFS4ACLS;
+ if (ret == 0 && nfsrv_useacl != 0 &&
+ NFSISSET_ATTRBIT(&nfsva.na_suppattr,
+ NFSATTRBIT_ACL))
+ mp->mnt_flag |= MNT_NFS4ACLS;
+ if (nmp->nm_minorvers > 0)
+ mp->mnt_flag |= MNT_NAMEDATTR;
MNT_IUNLOCK(mp);
}
@@ -1816,6 +1825,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
*/
NFSVOPUNLOCK(*vpp);
vfs_cache_root_set(mp, *vpp);
+ CURVNET_RESTORE();
return (0);
}
error = EIO;
@@ -1844,6 +1854,7 @@ bad:
free(nmp->nm_tlscertname, M_NEWNFSMNT);
free(nmp, M_NEWNFSMNT);
free(nam, M_SONAME);
+ CURVNET_RESTORE();
return (error);
}
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 76a3cdf9281e..fa451887e73e 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -106,6 +106,7 @@ uint32_t nfscl_accesscache_load_done_id;
extern struct nfsstatsv1 nfsstatsv1;
extern int nfsrv_useacl;
extern int nfscl_debuglevel;
+NFSCLSTATEMUTEX;
MALLOC_DECLARE(M_NEWNFSREQ);
static vop_read_t nfsfifo_read;
@@ -113,6 +114,8 @@ static vop_write_t nfsfifo_write;
static vop_close_t nfsfifo_close;
static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
struct thread *);
+static int nfs_get_namedattrdir(struct vnode *, struct componentname *,
+ struct vnode **);
static vop_lookup_t nfs_lookup;
static vop_create_t nfs_create;
static vop_mknod_t nfs_mknod;
@@ -248,10 +251,13 @@ VFS_VOP_VECTOR_REGISTER(newnfs_fifoops);
static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp, struct vattr *vap);
static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
- int namelen, struct ucred *cred, struct thread *td);
+ int namelen, struct ucred *cred, struct thread *td, bool silly);
+static void nfs_removestatus(struct vnode *vp, nfsremove_status file_status,
+ bool silly, struct thread *td);
static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
- char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
+ char *tnameptr, int tnamelen, bool silly, struct ucred *cred,
+ struct thread *td);
static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
struct componentname *scnp, struct sillyrename *sp);
@@ -474,6 +480,18 @@ nfs_access(struct vop_access_args *ap)
break;
}
}
+
+ /*
+ * For NFSv4, check for a delegation with an Allow ACE, to see
+ * if that permits access.
+ */
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) != 0) {
+ error = nfscl_delegacecheck(vp, ap->a_accmode, ap->a_cred);
+ if (error == 0)
+ return (error);
+ error = 0;
+ }
+
/*
* For nfs v3 or v4, check to see if we have done this recently, and if
* so return our cached result instead of making an ACCESS call.
@@ -827,9 +845,11 @@ nfs_close(struct vop_close_args *ap)
struct ucred *cred;
int error = 0, ret, localcred = 0;
int fmode = ap->a_fflag;
+ struct nfsmount *nmp;
if (NFSCL_FORCEDISM(vp->v_mount))
return (0);
+ nmp = VFSTONFS(vp->v_mount);
/*
* During shutdown, a_cred isn't valid, so just use root.
*/
@@ -883,7 +903,9 @@ nfs_close(struct vop_close_args *ap)
error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0);
/* np->n_flag &= ~NMODIFIED; */
} else if (NFS_ISV4(vp)) {
- if (nfscl_mustflush(vp) != 0) {
+ if (!NFSHASNFSV4N(nmp) ||
+ (nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+ nfscl_mustflush(vp) != 0) {
int cm = newnfs_commit_on_close ? 1 : 0;
if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
@@ -925,7 +947,7 @@ nfs_close(struct vop_close_args *ap)
* is the cause of some caching/coherency issue that might
* crop up.)
*/
- if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
+ if (nmp->nm_negnametimeo == 0) {
np->n_attrstamp = 0;
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
}
@@ -940,9 +962,9 @@ nfs_close(struct vop_close_args *ap)
/*
* Get attributes so "change" is up to date.
*/
- if (error == 0 && nfscl_mustflush(vp) != 0 &&
+ if (error == 0 && nfscl_nodeleg(vp, 0) != 0 &&
vp->v_type == VREG &&
- (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
+ (nmp->nm_flag & NFSMNT_NOCTO) == 0) {
ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva);
if (!ret) {
np->n_change = nfsva.na_filerev;
@@ -1023,8 +1045,9 @@ nfs_getattr(struct vop_getattr_args *ap)
return (0);
}
}
+
error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva);
- if (!error)
+ if (error == 0)
error = nfscl_loadattrcache(&vp, &nfsva, vap, 0, 0);
if (!error) {
/*
@@ -1051,21 +1074,29 @@ nfs_setattr(struct vop_setattr_args *ap)
int error = 0;
u_quad_t tsize;
struct timespec ts;
+ struct nfsmount *nmp;
#ifndef nolint
tsize = (u_quad_t)0;
#endif
/*
- * Setting of flags and marking of atimes are not supported.
+ * Only setting of UF_HIDDEN and UF_SYSTEM are supported and
+ * only for NFSv4 servers that support them.
*/
- if (vap->va_flags != VNOVAL)
+ nmp = VFSTONFS(vp->v_mount);
+ if (vap->va_flags != VNOVAL && (!NFSHASNFSV4(nmp) ||
+ (vap->va_flags & ~(UF_HIDDEN | UF_SYSTEM)) != 0 ||
+ ((vap->va_flags & UF_HIDDEN) != 0 &&
+ !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_HIDDEN)) ||
+ ((vap->va_flags & UF_SYSTEM) != 0 &&
+ !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_SYSTEM))))
return (EOPNOTSUPP);
/*
* Disallow write attempts if the filesystem is mounted read-only.
*/
- if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+ if ((vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
vap->va_mtime.tv_sec != VNOVAL ||
vap->va_birthtime.tv_sec != VNOVAL ||
@@ -1120,7 +1151,7 @@ nfs_setattr(struct vop_setattr_args *ap)
* Call nfscl_delegmodtime() to set the modify time
* locally, as required.
*/
- nfscl_delegmodtime(vp);
+ nfscl_delegmodtime(vp, NULL);
} else
NFSUNLOCKNODE(np);
/*
@@ -1158,6 +1189,8 @@ nfs_setattr(struct vop_setattr_args *ap)
NFSUNLOCKNODE(np);
}
}
+ if (vap->va_mtime.tv_sec != VNOVAL && error == 0)
+ nfscl_delegmodtime(vp, &vap->va_mtime);
return (error);
}
@@ -1192,6 +1225,40 @@ nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
}
/*
+ * Get a named attribute directory for the vnode.
+ */
+static int
+nfs_get_namedattrdir(struct vnode *vp, struct componentname *cnp,
+ struct vnode **vpp)
+{
+ struct nfsfh *nfhp;
+ struct nfsnode *np;
+ struct vnode *newvp;
+ struct nfsvattr nfsva;
+ int attrflag, error;
+
+ attrflag = 0;
+ *vpp = NULL;
+ np = VTONFS(vp);
+ error = nfsrpc_openattr(VFSTONFS(vp->v_mount), vp, np->n_fhp->nfh_fh,
+ np->n_fhp->nfh_len, (cnp->cn_flags & CREATENAMED),
+ cnp->cn_cred, curthread, &nfsva, &nfhp, &attrflag);
+ if (error == NFSERR_NOTSUPP)
+ error = ENOATTR;
+ if (error == 0)
+ error = nfscl_nget(vp->v_mount, vp, nfhp, cnp, curthread, &np,
+ cnp->cn_lkflags);
+ if (error != 0)
+ return (error);
+ newvp = NFSTOV(np);
+ vn_irflag_set_cond(newvp, VIRF_NAMEDDIR);
+ if (attrflag != 0)
+ (void)nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+ *vpp = newvp;
+ return (0);
+}
+
+/*
* nfs lookup call, one step at a time...
* First look in cache
* If not found, unlock the directory nfsnode and do the rpc
@@ -1203,7 +1270,7 @@ nfs_lookup(struct vop_lookup_args *ap)
struct vnode *dvp = ap->a_dvp;
struct vnode **vpp = ap->a_vpp;
struct mount *mp = dvp->v_mount;
- int flags = cnp->cn_flags;
+ uint64_t flags = cnp->cn_flags;
struct vnode *newvp;
struct nfsmount *nmp;
struct nfsnode *np, *newnp;
@@ -1214,15 +1281,57 @@ nfs_lookup(struct vop_lookup_args *ap)
struct vattr vattr;
struct timespec nctime, ts;
uint32_t openmode;
+ bool is_nameddir, needs_nameddir, opennamed;
+ dattrflag = 0;
*vpp = NULLVP;
+ nmp = VFSTONFS(mp);
+ opennamed = (flags & (OPENNAMED | ISLASTCN)) == (OPENNAMED | ISLASTCN);
+ if (opennamed && (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp)))
+ return (ENOATTR);
+ is_nameddir = (vn_irflag_read(dvp) & VIRF_NAMEDDIR) != 0;
+ if ((is_nameddir && (flags & ISLASTCN) == 0 && (cnp->cn_namelen > 1 ||
+ *cnp->cn_nameptr != '.')) ||
+ (opennamed && !is_nameddir && (flags & ISDOTDOT) != 0))
+ return (ENOATTR);
if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
return (EROFS);
+ np = VTONFS(dvp);
+
+ needs_nameddir = false;
+ if (opennamed || is_nameddir) {
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (!is_nameddir)
+ needs_nameddir = true;
+ }
+
+ /*
+ * If the named attribute directory is needed, acquire it now.
+ */
+ newvp = NULLVP;
+ if (needs_nameddir) {
+ KASSERT(np->n_v4 == NULL, ("nfs_lookup: O_NAMEDATTR when"
+ " n_v4 not NULL"));
+ error = nfs_get_namedattrdir(dvp, cnp, &newvp);
+ if (error != 0)
+ goto handle_error;
+ if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
+ *vpp = newvp;
+ return (0);
+ }
+ dvp = newvp;
+ np = VTONFS(dvp);
+ newvp = NULLVP;
+ } else if (opennamed && cnp->cn_namelen == 1 &&
+ *cnp->cn_nameptr == '.') {
+ VREF(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+
if (dvp->v_type != VDIR)
return (ENOTDIR);
- nmp = VFSTONFS(mp);
- np = VTONFS(dvp);
/* For NFSv4, wait until any remove is done. */
NFSLOCKNODE(np);
@@ -1235,80 +1344,91 @@ nfs_lookup(struct vop_lookup_args *ap)
error = vn_dir_check_exec(dvp, cnp);
if (error != 0)
return (error);
- error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
- if (error > 0 && error != ENOENT)
- return (error);
- if (error == -1) {
- /*
- * Lookups of "." are special and always return the
- * current directory. cache_lookup() already handles
- * associated locking bookkeeping, etc.
- */
- if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
- return (0);
- }
- /*
- * We only accept a positive hit in the cache if the
- * change time of the file matches our cached copy.
- * Otherwise, we discard the cache entry and fallback
- * to doing a lookup RPC. We also only trust cache
- * entries for less than nm_nametimeo seconds.
- *
- * To better handle stale file handles and attributes,
- * clear the attribute cache of this node if it is a
- * leaf component, part of an open() call, and not
- * locally modified before fetching the attributes.
- * This should allow stale file handles to be detected
- * here where we can fall back to a LOOKUP RPC to
- * recover rather than having nfs_open() detect the
- * stale file handle and failing open(2) with ESTALE.
- */
- newvp = *vpp;
- newnp = VTONFS(newvp);
- if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
- (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
- !(newnp->n_flag & NMODIFIED)) {
- NFSLOCKNODE(newnp);
- newnp->n_attrstamp = 0;
- KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
- NFSUNLOCKNODE(newnp);
- }
- if (nfscl_nodeleg(newvp, 0) == 0 ||
- ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
- VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
- timespeccmp(&vattr.va_ctime, &nctime, ==))) {
- NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
- return (0);
- }
- cache_purge(newvp);
- if (dvp != newvp)
- vput(newvp);
- else
- vrele(newvp);
- *vpp = NULLVP;
- } else if (error == ENOENT) {
- if (VN_IS_DOOMED(dvp))
- return (ENOENT);
- /*
- * We only accept a negative hit in the cache if the
- * modification time of the parent directory matches
- * the cached copy in the name cache entry.
- * Otherwise, we discard all of the negative cache
- * entries for this directory. We also only trust
- * negative cache entries for up to nm_negnametimeo
- * seconds.
- */
- if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
- VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
- timespeccmp(&vattr.va_mtime, &nctime, ==)) {
- NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
- return (ENOENT);
+ if (!opennamed && !is_nameddir) {
+ error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
+ if (error > 0 && error != ENOENT)
+ return (error);
+ if (error == -1) {
+ /*
+ * Lookups of "." are special and always return the
+ * current directory. cache_lookup() already handles
+ * associated locking bookkeeping, etc.
+ */
+ if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
+ return (0);
+ }
+
+ /*
+ * We only accept a positive hit in the cache if the
+ * change time of the file matches our cached copy.
+ * Otherwise, we discard the cache entry and fallback
+ * to doing a lookup RPC. We also only trust cache
+ * entries for less than nm_nametimeo seconds.
+ *
+ * To better handle stale file handles and attributes,
+ * clear the attribute cache of this node if it is a
+ * leaf component, part of an open() call, and not
+ * locally modified before fetching the attributes.
+ * This should allow stale file handles to be detected
+ * here where we can fall back to a LOOKUP RPC to
+ * recover rather than having nfs_open() detect the
+ * stale file handle and failing open(2) with ESTALE.
+ */
+ newvp = *vpp;
+ newnp = VTONFS(newvp);
+ if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
+ (flags & (ISLASTCN | ISOPEN)) ==
+ (ISLASTCN | ISOPEN) &&
+ !(newnp->n_flag & NMODIFIED)) {
+ NFSLOCKNODE(newnp);
+ newnp->n_attrstamp = 0;
+ KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
+ NFSUNLOCKNODE(newnp);
+ }
+ if (nfscl_nodeleg(newvp, 0) == 0 ||
+ ((u_int)(ticks - ncticks) <
+ (nmp->nm_nametimeo * hz) &&
+ VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
+ timespeccmp(&vattr.va_ctime, &nctime, ==))) {
+ NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
+ return (0);
+ }
+ cache_purge(newvp);
+ if (dvp != newvp)
+ vput(newvp);
+ else
+ vrele(newvp);
+ *vpp = NULLVP;
+ } else if (error == ENOENT) {
+ if (VN_IS_DOOMED(dvp))
+ return (ENOENT);
+ /*
+ * We only accept a negative hit in the cache if the
+ * modification time of the parent directory matches
+ * the cached copy in the name cache entry.
+ * Otherwise, we discard all of the negative cache
+ * entries for this directory. We also only trust
+ * negative cache entries for up to nm_negnametimeo
+ * seconds.
+ */
+ if ((u_int)(ticks - ncticks) <
+ (nmp->nm_negnametimeo * hz) &&
+ VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
+ timespeccmp(&vattr.va_mtime, &nctime, ==)) {
+ NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
+ return (ENOENT);
+ }
+ cache_purge_negative(dvp);
}
- cache_purge_negative(dvp);
}
openmode = 0;
+#if 0
+ /*
+ * The use of LookupOpen breaks some builds. It is disabled
+ * until that is fixed.
+ */
/*
* If this an NFSv4.1/4.2 mount using the "oneopenown" mount
* option, it is possible to do the Open operation in the same
@@ -1321,13 +1441,14 @@ nfs_lookup(struct vop_lookup_args *ap)
if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) &&
(nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 &&
(!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) &&
- (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) {
+ (flags & (ISLASTCN | ISOPEN | OPENNAMED))) == (ISLASTCN | ISOPEN)) {
if ((flags & OPENREAD) != 0)
openmode |= NFSV4OPEN_ACCESSREAD;
if ((flags & OPENWRITE) != 0)
openmode |= NFSV4OPEN_ACCESSWRITE;
}
NFSUNLOCKMNT(nmp);
+#endif
newvp = NULLVP;
NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
@@ -1337,6 +1458,11 @@ nfs_lookup(struct vop_lookup_args *ap)
openmode);
if (dattrflag)
(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
+ if (needs_nameddir) {
+ vput(dvp);
+ dvp = ap->a_dvp;
+ }
+handle_error:
if (error) {
if (newvp != NULLVP) {
vput(newvp);
@@ -1345,13 +1471,14 @@ nfs_lookup(struct vop_lookup_args *ap)
if (error != ENOENT) {
if (NFS_ISV4(dvp))
- error = nfscl_maperr(td, error, (uid_t)0,
- (gid_t)0);
+ error = nfscl_maperr(td, error,
+ (uid_t)0, (gid_t)0);
return (error);
}
/* The requested file was not found. */
- if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+ if ((cnp->cn_nameiop == CREATE ||
+ cnp->cn_nameiop == RENAME) &&
(flags & ISLASTCN)) {
/*
* XXX: UFS does a full VOP_ACCESS(dvp,
@@ -1392,7 +1519,8 @@ nfs_lookup(struct vop_lookup_args *ap)
free(nfhp, M_NFSFH);
return (EISDIR);
}
- error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, LK_EXCLUSIVE);
+ error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np,
+ LK_EXCLUSIVE);
if (error)
return (error);
newvp = NFSTOV(np);
@@ -1413,7 +1541,8 @@ nfs_lookup(struct vop_lookup_args *ap)
}
NFSUNLOCKNODE(np);
if (attrflag)
- (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+ (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
+ 0, 1);
*vpp = newvp;
return (0);
}
@@ -1454,19 +1583,23 @@ nfs_lookup(struct vop_lookup_args *ap)
if (error != 0)
return (error);
if (attrflag)
- (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+ (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
+ 0, 1);
} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
free(nfhp, M_NFSFH);
VREF(dvp);
newvp = dvp;
if (attrflag)
- (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
+ (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
+ 0, 1);
} else {
error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np,
cnp->cn_lkflags);
if (error)
return (error);
newvp = NFSTOV(np);
+ if (opennamed)
+ vn_irflag_set_cond(newvp, VIRF_NAMEDATTR);
/*
* If n_localmodtime >= time before RPC, then
* a file modification operation, such as
@@ -1484,8 +1617,10 @@ nfs_lookup(struct vop_lookup_args *ap)
}
NFSUNLOCKNODE(np);
if (attrflag)
- (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
- else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
+ (void)nfscl_loadattrcache(&newvp, &nfsva, NULL,
+ 0, 1);
+ else if ((flags & (ISLASTCN | ISOPEN)) ==
+ (ISLASTCN | ISOPEN) &&
!(np->n_flag & NMODIFIED)) {
/*
* Flush the attribute cache when opening a
@@ -1746,6 +1881,7 @@ nfs_create(struct vop_create_args *ap)
nfsquad_t cverf;
int error = 0, attrflag, dattrflag, fmode = 0;
struct vattr vattr;
+ bool is_nameddir, needs_nameddir, opennamed;
/*
* Oops, not for me..
@@ -1759,6 +1895,32 @@ nfs_create(struct vop_create_args *ap)
fmode |= O_EXCL;
dnp = VTONFS(dvp);
nmp = VFSTONFS(dvp->v_mount);
+ needs_nameddir = false;
+ if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp)) {
+ opennamed = (cnp->cn_flags & (OPENNAMED | ISLASTCN)) ==
+ (OPENNAMED | ISLASTCN);
+ is_nameddir = (vn_irflag_read(dvp) & VIRF_NAMEDDIR) != 0;
+ if (opennamed || is_nameddir) {
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (!is_nameddir)
+ needs_nameddir = true;
+ }
+ }
+
+ /*
+ * If the named attribute directory is needed, acquire it now.
+ */
+ if (needs_nameddir) {
+ KASSERT(dnp->n_v4 == NULL, ("nfs_create: O_NAMEDATTR when"
+ " n_v4 not NULL"));
+ error = nfs_get_namedattrdir(dvp, cnp, &newvp);
+ if (error != 0)
+ return (error);
+ dvp = newvp;
+ dnp = VTONFS(dvp);
+ newvp = NULL;
+ }
+
again:
/* For NFSv4, wait until any remove is done. */
NFSLOCKNODE(dnp);
@@ -1841,6 +2003,8 @@ again:
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
}
NFSUNLOCKNODE(dnp);
+ if (needs_nameddir)
+ vput(dvp);
return (error);
}
@@ -1864,6 +2028,7 @@ nfs_remove(struct vop_remove_args *ap)
struct nfsnode *np = VTONFS(vp);
int error = 0;
struct vattr vattr;
+ struct nfsmount *nmp;
KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
if (vp->v_type == VDIR)
@@ -1871,6 +2036,7 @@ nfs_remove(struct vop_remove_args *ap)
else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
vattr.va_nlink > 1)) {
+ nmp = VFSTONFS(vp->v_mount);
/*
* Purge the name cache so that the chance of a lookup for
* the name succeeding while the remove is in progress is
@@ -1882,12 +2048,19 @@ nfs_remove(struct vop_remove_args *ap)
/*
* throw away biocache buffers, mainly to avoid
* unnecessary delayed writes later.
+ * Flushing here would be more correct for the case
+ * where nfs_close() did not do a flush. However, it
+ * could be a large performance hit for some servers
+ * and only matters when the file name being removed is
+ * one of multiple hard links.
*/
- error = ncl_vinvalbuf(vp, 0, curthread, 1);
+ if (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) ||
+ (nmp->nm_flag & NFSMNT_NOCTO) == 0)
+ error = ncl_vinvalbuf(vp, 0, curthread, 1);
if (error != EINTR && error != EIO)
/* Do the rpc */
error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
- cnp->cn_namelen, cnp->cn_cred, curthread);
+ cnp->cn_namelen, cnp->cn_cred, curthread, false);
/*
* Kludge City: If the first reply to the remove rpc is lost..
* the reply to the retransmitted request will be ENOENT
@@ -1918,7 +2091,32 @@ ncl_removeit(struct sillyrename *sp, struct vnode *vp)
if (sp->s_dvp->v_type == VBAD)
return (0);
return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
- sp->s_cred, NULL));
+ sp->s_cred, NULL, true));
+}
+
+/*
+ * Handle the nfsremove_status reply from the RPC function.
+ */
+static void
+nfs_removestatus(struct vnode *vp, nfsremove_status file_status,
+ bool silly, struct thread *td)
+{
+
+ switch (file_status) {
+ case NLINK_ZERO:
+ /* Get rid of any delegation. */
+ nfscl_delegreturnvp(vp, false, td);
+ /* FALLTHROUGH */
+ case DELETED:
+ /* Throw away buffer cache blocks. */
+ (void)ncl_vinvalbuf(vp, 0, td, 1);
+ break;
+ case VALID:
+ /* Nothing to do, delegation is still ok. */
+ break;
+ default:
+ break;
+ }
}
/*
@@ -1926,17 +2124,20 @@ ncl_removeit(struct sillyrename *sp, struct vnode *vp)
*/
static int
nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
- int namelen, struct ucred *cred, struct thread *td)
+ int namelen, struct ucred *cred, struct thread *td, bool silly)
{
- struct nfsvattr dnfsva;
+ struct nfsvattr dnfsva, nfsva;
struct nfsnode *dnp = VTONFS(dvp);
- int error = 0, dattrflag;
+ struct nfsmount *nmp;
+ int attrflag, error = 0, dattrflag;
+ nfsremove_status file_status;
+ nmp = VFSTONFS(dvp->v_mount);
NFSLOCKNODE(dnp);
dnp->n_flag |= NREMOVEINPROG;
NFSUNLOCKNODE(dnp);
- error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
- &dattrflag);
+ error = nfsrpc_remove(dvp, name, namelen, vp, &nfsva, &attrflag,
+ &file_status, &dnfsva, &dattrflag, cred, td);
NFSLOCKNODE(dnp);
if ((dnp->n_flag & NREMOVEWANT)) {
dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
@@ -1946,11 +2147,19 @@ nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
dnp->n_flag &= ~NREMOVEINPROG;
NFSUNLOCKNODE(dnp);
}
- if (dattrflag)
+
+ if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp)) {
+ if (file_status != DELETED && attrflag != 0)
+ (void)nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
+ if ((nmp->nm_flag & NFSMNT_NOCTO) != 0)
+ nfs_removestatus(vp, file_status, silly, td);
+ }
+
+ if (dattrflag != 0)
(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
NFSLOCKNODE(dnp);
dnp->n_flag |= NMODIFIED;
- if (!dattrflag) {
+ if (dattrflag == 0) {
dnp->n_attrstamp = 0;
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
}
@@ -1975,6 +2184,7 @@ nfs_rename(struct vop_rename_args *ap)
struct nfsnode *fnp = VTONFS(ap->a_fvp);
struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
struct nfsv4node *newv4 = NULL;
+ struct nfsmount *nmp;
int error;
/* Check for cross-device rename */
@@ -1983,6 +2193,7 @@ nfs_rename(struct vop_rename_args *ap)
error = EXDEV;
goto out;
}
+ nmp = VFSTONFS(fvp->v_mount);
if (fvp == tvp) {
printf("nfs_rename: fvp == tvp (can't happen)\n");
@@ -2005,11 +2216,15 @@ nfs_rename(struct vop_rename_args *ap)
* that was written back to our cache earlier. Not checking for
* this condition can result in potential (silent) data loss.
*/
- error = VOP_FSYNC(fvp, MNT_WAIT, curthread);
+ if ((nmp->nm_flag & NFSMNT_NOCTO) == 0 || !NFSHASNFSV4(nmp) ||
+ !NFSHASNFSV4N(nmp) || nfscl_mustflush(fvp) != 0)
+ error = VOP_FSYNC(fvp, MNT_WAIT, curthread);
NFSVOPUNLOCK(fvp);
- if (!error && tvp)
+ if (error == 0 && tvp != NULL && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 ||
+ !NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) ||
+ nfscl_mustflush(tvp) != 0))
error = VOP_FSYNC(tvp, MNT_WAIT, curthread);
- if (error)
+ if (error != 0)
goto out;
/*
@@ -2024,7 +2239,7 @@ nfs_rename(struct vop_rename_args *ap)
}
error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
- tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
+ tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, false, tcnp->cn_cred,
curthread);
if (error == 0 && NFS_ISV4(tdvp)) {
@@ -2093,7 +2308,7 @@ nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
{
return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
- sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
+ sdvp, NULL, sp->s_name, sp->s_namlen, true, scnp->cn_cred,
curthread));
}
@@ -2103,16 +2318,19 @@ nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
static int
nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
- int tnamelen, struct ucred *cred, struct thread *td)
+ int tnamelen, bool silly, struct ucred *cred, struct thread *td)
{
- struct nfsvattr fnfsva, tnfsva;
+ struct nfsvattr fnfsva, tnfsva, tvpnfsva;
struct nfsnode *fdnp = VTONFS(fdvp);
struct nfsnode *tdnp = VTONFS(tdvp);
- int error = 0, fattrflag, tattrflag;
+ struct nfsmount *nmp;
+ int error = 0, fattrflag, tattrflag, tvpattrflag;
+ nfsremove_status tvp_status;
+ nmp = VFSTONFS(fdvp->v_mount);
error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
- tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
- &tattrflag);
+ tnameptr, tnamelen, &tvp_status, &fnfsva, &tnfsva, &fattrflag,
+ &tattrflag, &tvpnfsva, &tvpattrflag, cred, td);
NFSLOCKNODE(fdnp);
fdnp->n_flag |= NMODIFIED;
if (fattrflag != 0) {
@@ -2133,6 +2351,15 @@ nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
NFSUNLOCKNODE(tdnp);
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
}
+
+ if (tvp != NULL) {
+ if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp) &&
+ (nmp->nm_flag & NFSMNT_NOCTO) != 0)
+ nfs_removestatus(tvp, tvp_status, silly, td);
+ if (!silly && tvpattrflag != 0)
+ (void)nfscl_loadattrcache(&tvp, &tvpnfsva, NULL, 0, 1);
+ }
+
if (error && NFS_ISV4(fdvp))
error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
return (error);
@@ -2156,7 +2383,9 @@ nfs_link(struct vop_link_args *ap)
* doesn't get "out of sync" with the server.
* XXX There should be a better way!
*/
+#ifdef notnow
VOP_FSYNC(vp, MNT_WAIT, curthread);
+#endif
error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, &dattrflag);
@@ -4367,25 +4596,48 @@ nfs_pathconf(struct vop_pathconf_args *ap)
struct nfsmount *nmp;
struct thread *td = curthread;
off_t off;
- bool eof;
+ bool eof, has_namedattr, named_enabled;
int attrflag, error;
+ struct nfsnode *np;
+ nmp = VFSTONFS(vp->v_mount);
+ np = VTONFS(vp);
+ named_enabled = false;
+ has_namedattr = false;
if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
ap->a_name == _PC_NO_TRUNC)) ||
- (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
+ (NFS_ISV4(vp) && (ap->a_name == _PC_ACL_NFS4 ||
+ ap->a_name == _PC_HAS_NAMEDATTR))) {
/*
* Since only the above 4 a_names are returned by the NFSv3
* Pathconf RPC, there is no point in doing it for others.
* For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
- * be used for _PC_NFS4_ACL as well.
+ * be used for _PC_ACL_NFS4 and _PC_HAS_NAMEDATTR as well.
*/
- error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
- &attrflag);
+ error = nfsrpc_pathconf(vp, &pc, &has_namedattr, td->td_ucred,
+ td, &nfsva, &attrflag);
if (attrflag != 0)
(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
if (error != 0)
return (error);
+ } else if (NFS_ISV4(vp) && ap->a_name == _PC_NAMEDATTR_ENABLED &&
+ (np->n_flag & NNAMEDNOTSUPP) == 0) {
+ struct nfsfh *nfhp;
+
+ error = nfsrpc_openattr(nmp, vp, np->n_fhp->nfh_fh,
+ np->n_fhp->nfh_len, false, td->td_ucred, td, &nfsva, &nfhp,
+ &attrflag);
+ named_enabled = true;
+ if (error == 0) {
+ free(nfhp, M_NFSFH);
+ } else if (error == NFSERR_NOTSUPP) {
+ named_enabled = false;
+ NFSLOCKNODE(np);
+ np->n_flag |= NNAMEDNOTSUPP;
+ NFSUNLOCKNODE(np);
+ }
+ error = 0;
} else {
/*
* For NFSv2 (or NFSv3 when not one of the above 4 a_names),
@@ -4468,7 +4720,6 @@ nfs_pathconf(struct vop_pathconf_args *ap)
case _PC_MIN_HOLE_SIZE:
/* Only some NFSv4.2 servers support Seek for Holes. */
*ap->a_retval = 0;
- nmp = VFSTONFS(vp->v_mount);
if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) {
/*
* NFSv4.2 doesn't have an attribute for hole size,
@@ -4499,6 +4750,27 @@ nfs_pathconf(struct vop_pathconf_args *ap)
mtx_unlock(&nmp->nm_mtx);
}
break;
+ case _PC_NAMEDATTR_ENABLED:
+ if (named_enabled)
+ *ap->a_retval = 1;
+ else
+ *ap->a_retval = 0;
+ break;
+ case _PC_HAS_NAMEDATTR:
+ if (has_namedattr)
+ *ap->a_retval = 1;
+ else
+ *ap->a_retval = 0;
+ break;
+ case _PC_HAS_HIDDENSYSTEM:
+ if (NFS_ISV4(vp) && NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+ NFSATTRBIT_HIDDEN) &&
+ NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr,
+ NFSATTRBIT_SYSTEM))
+ *ap->a_retval = 1;
+ else
+ *ap->a_retval = 0;
+ break;
default:
error = vop_stdpathconf(ap);
diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h
index cc1959b7bf79..9b2627015612 100644
--- a/sys/fs/nfsclient/nfsnode.h
+++ b/sys/fs/nfsclient/nfsnode.h
@@ -162,6 +162,7 @@ struct nfsnode {
#define NDSCOMMIT 0x00100000 /* Commit is done via the DS. */
#define NVNSETSZSKIP 0x00200000 /* Skipped vnode_pager_setsize() */
#define NMIGHTBELOCKED 0x00400000 /* Might be file locked. */
+#define NNAMEDNOTSUPP 0x00800000 /* Openattr is not supported. */
/*
* Convert between nfsnode pointers and vnode pointers
diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c
index bf0ff4e84d98..de72187bbb91 100644
--- a/sys/fs/nfsserver/nfs_nfsdcache.c
+++ b/sys/fs/nfsserver/nfs_nfsdcache.c
@@ -392,7 +392,7 @@ loop:
nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ (void)mtx_sleep(rp, mutex, PVFS | PDROP,
"nfsrc", 10 * hz);
goto loop;
}
@@ -678,7 +678,7 @@ tryagain:
rp = hitrp;
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ (void)mtx_sleep(rp, mutex, PVFS | PDROP,
"nfsrc", 10 * hz);
goto tryagain;
}
@@ -750,7 +750,7 @@ nfsrc_lock(struct nfsrvcache *rp)
mtx_assert(mutex, MA_OWNED);
while ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
+ (void)mtx_sleep(rp, mutex, PVFS, "nfsrc", 0);
}
rp->rc_flag |= RC_LOCKED;
}
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 6f5b2855bcf0..4f0d5946d6b9 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -69,6 +69,7 @@ extern int nfsrv_maxpnfsmirror;
extern uint32_t nfs_srvmaxio;
extern int nfs_bufpackets;
extern u_long sb_max_adj;
+extern struct nfsv4lock nfsv4rootfs_lock;
NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst);
@@ -121,7 +122,6 @@ extern struct nfsdevicehead nfsrv_devidhead;
/* Map d_type to vnode type. */
static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR,
VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON };
-#define NFS_DTYPETOVTYPE(t) ((t) <= DT_WHT ? dtype_to_vnode[(t)] : VNON)
static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
struct iovec **);
@@ -129,6 +129,7 @@ static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
struct mbuf **, struct iovec **);
static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
int *);
+static void nfs_dtypetovtype(struct nfsvattr *, struct vnode *, uint8_t);
static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
NFSPROC_T *);
static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
@@ -178,8 +179,6 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
0, "");
SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
&nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
-SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
- &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
0, "Debug level for NFS server");
NFSD_VNET_DECLARE(int, nfsd_enable_stringtouid);
@@ -189,6 +188,10 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid,
static int nfsrv_pnfsgetdsattr = 1;
SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
&nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
+static bool nfsrv_recalldeleg = false;
+SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, recalldeleg, CTLFLAG_RW,
+ &nfsrv_recalldeleg, 0,
+ "When set remove/rename recalls delegations for same client");
/*
* nfsrv_dsdirsize can only be increased and only when the nfsd threads are
@@ -294,6 +297,38 @@ SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
+static int
+sysctl_dolocallocks(SYSCTL_HANDLER_ARGS)
+{
+ int error, igotlock, newdolocallocks;
+
+ newdolocallocks = nfsrv_dolocallocks;
+ error = sysctl_handle_int(oidp, &newdolocallocks, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (newdolocallocks == nfsrv_dolocallocks)
+ return (0);
+ if (jailed(curthread->td_ucred))
+ return (EINVAL);
+
+ NFSLOCKV4ROOTMUTEX();
+ do {
+ igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
+ NFSV4ROOTLOCKMUTEXPTR, NULL);
+ } while (!igotlock);
+ NFSUNLOCKV4ROOTMUTEX();
+
+ nfsrv_dolocallocks = newdolocallocks;
+
+ NFSLOCKV4ROOTMUTEX();
+ nfsv4_unlock(&nfsv4rootfs_lock, 0);
+ NFSUNLOCKV4ROOTMUTEX();
+ return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, enable_locallocks,
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
+ sysctl_dolocallocks, "IU", "Enable nfsd to acquire local locks on files");
+
#define MAX_REORDERED_RPC 16
#define NUM_HEURISTIC 1031
#define NHUSE_INIT 64
@@ -413,6 +448,8 @@ nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
gotattr = 1;
}
+ nvap->na_bsdflags = 0;
+ nvap->na_flags = 0;
error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
if (lockedit != 0)
NFSVOPUNLOCK(vp);
@@ -1451,32 +1488,61 @@ nfsmout:
* Remove a non-directory object.
*/
int
-nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
+nfsvno_removesub(struct nameidata *ndp, bool is_v4, struct nfsrv_descript *nd,
struct thread *p, struct nfsexstuff *exp)
{
- struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
- int error = 0, mirrorcnt;
+ struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS], *newvp;
+ struct mount *mp;
+ int error = 0, mirrorcnt, ret;
char fname[PNFS_FILENAME_LEN + 1];
fhandle_t fh;
vp = ndp->ni_vp;
dsdvp[0] = NULL;
- if (vp->v_type == VDIR)
+ if (vp->v_type == VDIR) {
error = NFSERR_ISDIR;
- else if (is_v4)
- error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
- p);
+ } else if (is_v4) {
+ if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
+ error = nfsrv_checkremove(vp, 1, NULL,
+ (nfsquad_t)((u_quad_t)0), p);
+ else
+ error = nfsrv_checkremove(vp, 1, NULL, nd->nd_clientid,
+ p);
+ }
if (error == 0)
nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
if (!error)
error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
if (error == 0 && dsdvp[0] != NULL)
nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
+ if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0)
+ error = nfsvno_getfh(vp, &fh, p);
if (ndp->ni_dvp == vp)
vrele(ndp->ni_dvp);
else
vput(ndp->ni_dvp);
vput(vp);
+
+ /* Use ret to determine if the file still exists. */
+ if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) {
+ mp = vfs_busyfs(&fh.fh_fsid);
+ if (mp != NULL) {
+ /* Find out if the file still exists. */
+ ret = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &newvp);
+ if (ret == 0)
+ vput(newvp);
+ else
+ ret = ESTALE;
+ vfs_unbusy(mp);
+ } else {
+ ret = ESTALE;
+ }
+ if (ret == ESTALE) {
+ /* Get rid of any delegation. */
+ nfsrv_removedeleg(&fh, nd, p);
+ }
+ }
+
nfsvno_relpathbuf(ndp);
NFSEXITCODE(error);
return (error);
@@ -1527,33 +1593,34 @@ out:
*/
int
nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
- u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
+ struct nfsrv_descript *nd, struct thread *p)
{
- struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
- int error = 0, mirrorcnt;
+ struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS], *newvp;
+ struct mount *mp;
+ int error = 0, mirrorcnt, ret;
char fname[PNFS_FILENAME_LEN + 1];
- fhandle_t fh;
+ fhandle_t fh, fh2;
dsdvp[0] = NULL;
fvp = fromndp->ni_vp;
- if (ndstat) {
+ if (nd->nd_repstat != 0) {
vrele(fromndp->ni_dvp);
vrele(fvp);
- error = ndstat;
+ error = nd->nd_repstat;
goto out1;
}
tdvp = tondp->ni_dvp;
tvp = tondp->ni_vp;
if (tvp != NULL) {
if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
- error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
+ error = (nd->nd_flag & ND_NFSV2) ? EISDIR : EEXIST;
goto out;
} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
- error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
+ error = (nd->nd_flag & ND_NFSV2) ? ENOTDIR : EEXIST;
goto out;
}
if (tvp->v_type == VDIR && tvp->v_mountedhere) {
- error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
+ error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
goto out;
}
@@ -1572,35 +1639,45 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
}
}
if (fvp->v_type == VDIR && fvp->v_mountedhere) {
- error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
+ error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
goto out;
}
if (fvp->v_mount != tdvp->v_mount) {
- error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
+ error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
goto out;
}
if (fvp == tdvp) {
- error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
+ error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
goto out;
}
if (fvp == tvp) {
/*
- * If source and destination are the same, there is nothing to
- * do. Set error to -1 to indicate this.
+ * If source and destination are the same, there is
+ * nothing to do. Set error to EJUSTRETURN to indicate
+ * this.
*/
- error = -1;
+ error = EJUSTRETURN;
goto out;
}
- if (ndflag & ND_NFSV4) {
+ if (nd->nd_flag & ND_NFSV4) {
if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
- error = nfsrv_checkremove(fvp, 0, NULL,
- (nfsquad_t)((u_quad_t)0), p);
+ if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
+ error = nfsrv_checkremove(fvp, 0, NULL,
+ (nfsquad_t)((u_quad_t)0), p);
+ else
+ error = nfsrv_checkremove(fvp, 0, NULL,
+ nd->nd_clientid, p);
NFSVOPUNLOCK(fvp);
} else
error = EPERM;
- if (tvp && !error)
- error = nfsrv_checkremove(tvp, 1, NULL,
- (nfsquad_t)((u_quad_t)0), p);
+ if (tvp && !error) {
+ if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0)
+ error = nfsrv_checkremove(tvp, 1, NULL,
+ (nfsquad_t)((u_quad_t)0), p);
+ else
+ error = nfsrv_checkremove(tvp, 1, NULL,
+ nd->nd_clientid, p);
+ }
} else {
/*
* For NFSv2 and NFSv3, try to get rid of the delegation, so
@@ -1612,15 +1689,35 @@ nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
nfsd_recalldelegation(fvp, p);
}
if (error == 0 && tvp != NULL) {
- nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
+ if ((nd->nd_flag & ND_NFSV41) != 0)
+ error = nfsvno_getfh(tvp, &fh2, p);
+ if (error == 0)
+ nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname,
+ &fh);
NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
" dsdvp=%p\n", dsdvp[0]);
}
out:
- if (!error) {
+ mp = NULL;
+ if (error == 0) {
+ error = VOP_GETWRITEMOUNT(tondp->ni_dvp, &mp);
+ if (error == 0) {
+ if (mp == NULL) {
+ error = ENOENT;
+ } else {
+ error = lockmgr(&mp->mnt_renamelock,
+ LK_EXCLUSIVE | LK_NOWAIT, NULL);
+ if (error != 0)
+ error = ERELOOKUP;
+ }
+ }
+ }
+ if (error == 0) {
error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
&fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
&tondp->ni_cnd);
+ lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+ vfs_rel(mp);
} else {
if (tdvp == tvp)
vrele(tdvp);
@@ -1630,8 +1727,13 @@ out:
vput(tvp);
vrele(fromndp->ni_dvp);
vrele(fvp);
- if (error == -1)
+ if (error == EJUSTRETURN) {
error = 0;
+ } else if (error == ERELOOKUP && mp != NULL) {
+ lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0);
+ lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+ vfs_rel(mp);
+ }
}
/*
@@ -1644,6 +1746,26 @@ out:
NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
}
+ /* Use ret to determine if the file still exists. */
+ if ((nd->nd_flag & ND_NFSV41) != 0 && error == 0) {
+ mp = vfs_busyfs(&fh2.fh_fsid);
+ if (mp != NULL) {
+ /* Find out if the file still exists. */
+ ret = VFS_FHTOVP(mp, &fh2.fh_fid, LK_SHARED, &newvp);
+ if (ret == 0)
+ vput(newvp);
+ else
+ ret = ESTALE;
+ vfs_unbusy(mp);
+ } else {
+ ret = ESTALE;
+ }
+ if (ret == ESTALE) {
+ /* Get rid of any delegation. */
+ nfsrv_removedeleg(&fh2, nd, p);
+ }
+ }
+
nfsvno_relpathbuf(tondp);
out1:
nfsvno_relpathbuf(fromndp);
@@ -1990,7 +2112,8 @@ int
nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
struct ucred *cred, struct thread *p, int isdgram, int reterr,
- int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
+ int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno,
+ bool xattrsupp, bool has_hiddensystem, bool has_namedattr)
{
struct statfs *sf;
int error;
@@ -2009,12 +2132,29 @@ nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
}
error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
- mounted_on_fileno, sf);
+ mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr);
free(sf, M_TEMP);
NFSEXITCODE2(0, nd);
return (error);
}
+/*
+ * Convert a dirent d_type to a vnode type.
+ */
+static void nfs_dtypetovtype(struct nfsvattr *nvap, struct vnode *vp,
+ uint8_t dtype)
+{
+
+ if ((vn_irflag_read(vp) & VIRF_NAMEDDIR) != 0) {
+ nvap->na_type = VREG;
+ nvap->na_bsdflags |= SFBSD_NAMEDATTR;
+ } else if (dtype <= DT_WHT) {
+ nvap->na_type = dtype_to_vnode[dtype];
+ } else {
+ nvap->na_type = VNON;
+ }
+}
+
/* Since the Readdir vnode ops vary, put the entire functions in here. */
/*
* nfs readdir service
@@ -2309,7 +2449,7 @@ nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
struct nfsvattr nva, at, *nvap = &nva;
struct mbuf *mb0, *mb1;
struct nfsreferral *refp;
- int nlen, r, error = 0, getret = 1, usevget = 1;
+ int nlen, r, error = 0, getret = 1, ret, usevget = 1;
int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
caddr_t bpos0, bpos1;
u_int64_t off, toff, verf __unused;
@@ -2323,6 +2463,9 @@ nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
uint64_t mounted_on_fileno;
struct thread *p = curthread;
int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
+ size_t atsiz;
+ long pathval;
+ bool has_hiddensystem, has_namedattr, xattrsupp;
if (nd->nd_repstat) {
nfsrv_postopattr(nd, getret, &at);
@@ -2634,6 +2777,10 @@ again:
LK_SHARED, &nvp);
else
r = EOPNOTSUPP;
+ if (r == 0 && (vn_irflag_read(vp) &
+ VIRF_NAMEDDIR) != 0)
+ vn_irflag_set_cond(nvp,
+ VIRF_NAMEDATTR);
if (r == EOPNOTSUPP) {
if (usevget) {
usevget = 0;
@@ -2648,6 +2795,10 @@ again:
cn.cn_namelen = nlen;
cn.cn_flags = ISLASTCN |
NOFOLLOW | LOCKLEAF;
+ if ((vn_irflag_read(vp) &
+ VIRF_NAMEDDIR) != 0)
+ cn.cn_flags |=
+ OPENNAMED;
if (nlen == 2 &&
dp->d_name[0] == '.' &&
dp->d_name[1] == '.')
@@ -2765,7 +2916,7 @@ again:
/* Only need Type and/or Fileid. */
VATTR_NULL(&nvap->na_vattr);
nvap->na_fileid = dp->d_fileno;
- nvap->na_type = NFS_DTYPETOVTYPE(dp->d_type);
+ nfs_dtypetovtype(nvap, vp, dp->d_type);
}
/*
@@ -2789,9 +2940,32 @@ again:
*tl++ = newnfs_true;
txdr_hyper(*cookiep, tl);
dirlen += nfsm_strtom(nd, dp->d_name, nlen);
+ xattrsupp = false;
+ has_hiddensystem = false;
+ has_namedattr = false;
if (nvp != NULL) {
supports_nfsv4acls =
nfs_supportsnfsv4acls(nvp);
+ if (NFSISSET_ATTRBIT(&attrbits,
+ NFSATTRBIT_XATTRSUPPORT)) {
+ ret = VOP_GETEXTATTR(nvp,
+ EXTATTR_NAMESPACE_USER,
+ "xxx", NULL, &atsiz,
+ nd->nd_cred, p);
+ xattrsupp = ret != EOPNOTSUPP;
+ }
+ if (VOP_PATHCONF(nvp,
+ _PC_HAS_HIDDENSYSTEM, &pathval) !=
+ 0)
+ pathval = 0;
+ has_hiddensystem = pathval > 0;
+ pathval = 0;
+ if (NFSISSET_ATTRBIT(&attrbits,
+ NFSATTRBIT_NAMEDATTR) &&
+ VOP_PATHCONF(nvp, _PC_HAS_NAMEDATTR,
+ &pathval) != 0)
+ pathval = 0;
+ has_namedattr = pathval > 0;
NFSVOPUNLOCK(nvp);
} else
supports_nfsv4acls = 0;
@@ -2811,13 +2985,15 @@ again:
nvp, nvap, &nfh, r, &rderrbits,
nd->nd_cred, p, isdgram, 0,
supports_nfsv4acls, at_root,
- mounted_on_fileno);
+ mounted_on_fileno, xattrsupp,
+ has_hiddensystem, has_namedattr);
} else {
dirlen += nfsvno_fillattr(nd, new_mp,
nvp, nvap, &nfh, r, &attrbits,
nd->nd_cred, p, isdgram, 0,
supports_nfsv4acls, at_root,
- mounted_on_fileno);
+ mounted_on_fileno, xattrsupp,
+ has_hiddensystem, has_namedattr);
}
if (nvp != NULL)
vrele(nvp);
@@ -2995,12 +3171,17 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
/*
* Loop around getting the setable attributes. If an unsupported
* one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
+ * Once nd_repstat != 0, do not set the attribute value, but keep
+ * parsing the attribute(s).
*/
if (retnotsup) {
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
bitpos = NFSATTRBIT_MAX;
} else {
bitpos = 0;
+ if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_HIDDEN) ||
+ NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SYSTEM))
+ nvap->na_flags = 0;
}
moderet = 0;
for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
@@ -3012,12 +3193,13 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
switch (bitpos) {
case NFSATTRBIT_SIZE:
NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
- if (vp != NULL && vp->v_type != VREG) {
- error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
- NFSERR_INVAL;
- goto nfsmout;
+ if (!nd->nd_repstat) {
+ if (vp != NULL && vp->v_type != VREG)
+ nd->nd_repstat = (vp->v_type == VDIR) ?
+ NFSERR_ISDIR : NFSERR_INVAL;
+ else
+ nvap->na_size = fxdr_hyper(tl);
}
- nvap->na_size = fxdr_hyper(tl);
attrsum += NFSX_HYPER;
break;
case NFSATTRBIT_ACL:
@@ -3036,9 +3218,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
attrsum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_HIDDEN:
- NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- if (!nd->nd_repstat)
- nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ if (nd->nd_repstat == 0) {
+ if (*tl == newnfs_true)
+ nvap->na_flags |= UF_HIDDEN;
+ }
attrsum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_MIMETYPE:
@@ -3054,7 +3238,8 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
case NFSATTRBIT_MODE:
moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- nvap->na_mode = nfstov_mode(*tl);
+ if (!nd->nd_repstat)
+ nvap->na_mode = nfstov_mode(*tl);
attrsum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_OWNER:
@@ -3112,9 +3297,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
break;
case NFSATTRBIT_SYSTEM:
- NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- if (!nd->nd_repstat)
- nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ if (nd->nd_repstat == 0) {
+ if (*tl == newnfs_true)
+ nvap->na_flags |= UF_SYSTEM;
+ }
attrsum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_TIMEACCESSSET:
@@ -3122,10 +3309,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
attrsum += NFSX_UNSIGNED;
if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
- fxdr_nfsv4time(tl, &nvap->na_atime);
+ if (!nd->nd_repstat)
+ fxdr_nfsv4time(tl, &nvap->na_atime);
toclient = 1;
attrsum += NFSX_V4TIME;
- } else {
+ } else if (!nd->nd_repstat) {
vfs_timestamp(&nvap->na_atime);
nvap->na_vaflags |= VA_UTIMES_NULL;
}
@@ -3138,7 +3326,8 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
break;
case NFSATTRBIT_TIMECREATE:
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
- fxdr_nfsv4time(tl, &nvap->na_btime);
+ if (!nd->nd_repstat)
+ fxdr_nfsv4time(tl, &nvap->na_btime);
attrsum += NFSX_V4TIME;
break;
case NFSATTRBIT_TIMEMODIFYSET:
@@ -3146,10 +3335,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
attrsum += NFSX_UNSIGNED;
if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
- fxdr_nfsv4time(tl, &nvap->na_mtime);
+ if (!nd->nd_repstat)
+ fxdr_nfsv4time(tl, &nvap->na_mtime);
nvap->na_vaflags &= ~VA_UTIMES_NULL;
attrsum += NFSX_V4TIME;
- } else {
+ } else if (!nd->nd_repstat) {
vfs_timestamp(&nvap->na_mtime);
if (!toclient)
nvap->na_vaflags |= VA_UTIMES_NULL;
@@ -3167,18 +3357,40 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
* specified and this attribute cannot be done in the
* same Setattr operation.
*/
- if ((nd->nd_flag & ND_NFSV41) == 0)
- nd->nd_repstat = NFSERR_ATTRNOTSUPP;
- else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
- vp == NULL)
- nd->nd_repstat = NFSERR_INVAL;
- else if (moderet == 0)
- moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
- if (moderet == 0)
- nvap->na_mode = (mode & mask) |
- (va.va_mode & ~mask);
- else
- nd->nd_repstat = moderet;
+ if (!nd->nd_repstat) {
+ if ((nd->nd_flag & ND_NFSV41) == 0)
+ nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+ else if ((mode & ~07777) != 0 ||
+ (mask & ~07777) != 0 || vp == NULL)
+ nd->nd_repstat = NFSERR_INVAL;
+ else if (moderet == 0)
+ moderet = VOP_GETATTR(vp, &va,
+ nd->nd_cred);
+ if (moderet == 0)
+ nvap->na_mode = (mode & mask) |
+ (va.va_mode & ~mask);
+ else
+ nd->nd_repstat = moderet;
+ }
+ attrsum += 2 * NFSX_UNSIGNED;
+ break;
+ case NFSATTRBIT_MODEUMASK:
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ mode = fxdr_unsigned(u_short, *tl++);
+ mask = fxdr_unsigned(u_short, *tl);
+ /*
+ * If moderet != 0, mode has already been done.
+ * If vp != NULL, this is not a file object creation.
+ */
+ if (!nd->nd_repstat) {
+ if ((nd->nd_flag & ND_NFSV42) == 0)
+ nd->nd_repstat = NFSERR_ATTRNOTSUPP;
+ else if ((mask & ~0777) != 0 || vp != NULL ||
+ moderet != 0)
+ nd->nd_repstat = NFSERR_INVAL;
+ else
+ nvap->na_mode = (mode & ~mask);
+ }
attrsum += 2 * NFSX_UNSIGNED;
break;
default:
@@ -3193,7 +3405,7 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
/*
* some clients pad the attrlist, so we need to skip over the
- * padding.
+ * padding. This also skips over unparsed non-supported attributes.
*/
if (attrsum > attrsize) {
error = NFSERR_BADXDR;
@@ -3251,7 +3463,11 @@ nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
NFSVNO_EXPORTANON(exp) ||
(nd->nd_flag & ND_AUTHNONE) != 0) {
nd->nd_cred->cr_uid = credanon->cr_uid;
- nd->nd_cred->cr_gid = credanon->cr_gid;
+ /*
+ * 'credanon' is already a 'struct ucred' that was built
+ * internally with calls to crsetgroups_fallback(), so
+ * we don't need a fallback here.
+ */
crsetgroups(nd->nd_cred, credanon->cr_ngroups,
credanon->cr_groups);
} else if ((nd->nd_flag & ND_GSS) == 0) {
@@ -3398,6 +3614,15 @@ nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
&credanon);
vfs_unbusy(mp);
+ if (nd->nd_repstat == 0 &&
+ nfp->nfsrvfh_len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH &&
+ nfp->nfsrvfh_len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) {
+ if (nfp->nfsrvfh_len == NFSX_MYFH + NFSX_V4NAMEDDIRFH)
+ vn_irflag_set_cond(*vpp, VIRF_NAMEDDIR);
+ else
+ vn_irflag_set_cond(*vpp, VIRF_NAMEDATTR);
+ }
+
/*
* For NFSv4 without a pseudo root fs, unexported file handles
* can be returned, so that Lookup works everywhere.
@@ -5464,7 +5689,7 @@ nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
(ND_NFSV4 | ND_V4WCCATTR)) {
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
- NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
if (error != 0)
goto nfsmout;
@@ -5495,7 +5720,7 @@ nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
if (error == 0) {
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
- NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
}
NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
nfsmout:
@@ -5661,7 +5886,7 @@ nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
if (nd->nd_repstat == 0) {
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
- NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
} else
error = nd->nd_repstat;
NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
@@ -5828,7 +6053,7 @@ nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
(ND_NFSV4 | ND_V4WCCATTR)) {
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
- NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error);
if (error != 0)
goto nfsmout;
@@ -5842,7 +6067,7 @@ nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
if (nd->nd_repstat == 0) {
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
- NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
} else
error = nd->nd_repstat;
NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
@@ -5990,7 +6215,7 @@ nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
(ND_NFSV4 | ND_V4WCCATTR)) {
error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
- NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL);
NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
if (error != 0)
goto nfsmout;
@@ -6014,7 +6239,8 @@ nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
if (error == 0) {
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
- NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL,
+ NULL);
}
NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
nfsmout:
@@ -6159,7 +6385,7 @@ nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
* the same type (VREG).
*/
nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
- NULL, 0, 0, 0, 0, 0, NULL);
+ NULL, 0, 0, 0, 0, 0, NULL, false, false, false);
error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
if (error != 0) {
@@ -6303,7 +6529,7 @@ nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
if (nd->nd_repstat == 0) {
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
/*
* We can only save the updated values in the extended
* attribute if the vp is exclusively locked.
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index 0c8bda6dc6a6..9eebcda548c6 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -64,6 +64,7 @@ extern u_long sb_max_adj;
extern int nfsrv_pnfsatime;
extern int nfsrv_maxpnfsmirror;
extern uint32_t nfs_srvmaxio;
+extern int nfsrv_issuedelegs;
static int nfs_async = 0;
SYSCTL_DECL(_vfs_nfsd);
@@ -240,7 +241,7 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
{
struct nfsvattr nva;
fhandle_t fh;
- int at_root = 0, error = 0, supports_nfsv4acls;
+ int at_root = 0, error = 0, ret, supports_nfsv4acls;
struct nfsreferral *refp;
nfsattrbit_t attrbits, tmpbits;
struct mount *mp;
@@ -249,6 +250,9 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
uint64_t mounted_on_fileno = 0;
accmode_t accmode;
struct thread *p = curthread;
+ size_t atsiz;
+ long pathval;
+ bool has_hiddensystem, has_namedattr, xattrsupp;
if (nd->nd_repstat)
goto out;
@@ -306,6 +310,26 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
&nva, &attrbits, p);
if (nd->nd_repstat == 0) {
supports_nfsv4acls = nfs_supportsnfsv4acls(vp);
+ xattrsupp = false;
+ if (NFSISSET_ATTRBIT(&attrbits,
+ NFSATTRBIT_XATTRSUPPORT)) {
+ ret = VOP_GETEXTATTR(vp,
+ EXTATTR_NAMESPACE_USER,
+ "xxx", NULL, &atsiz, nd->nd_cred,
+ p);
+ xattrsupp = ret != EOPNOTSUPP;
+ }
+ if (VOP_PATHCONF(vp, _PC_HAS_HIDDENSYSTEM,
+ &pathval) != 0)
+ pathval = 0;
+ has_hiddensystem = pathval > 0;
+ pathval = 0;
+ if (NFSISSET_ATTRBIT(&attrbits,
+ NFSATTRBIT_NAMEDATTR) &&
+ VOP_PATHCONF(vp, _PC_HAS_NAMEDATTR,
+ &pathval) != 0)
+ pathval = 0;
+ has_namedattr = pathval > 0;
mp = vp->v_mount;
if (nfsrv_enable_crossmntpt != 0 &&
vp->v_type == VDIR &&
@@ -339,7 +363,9 @@ nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
(void)nfsvno_fillattr(nd, mp, vp, &nva,
&fh, 0, &attrbits, nd->nd_cred, p,
isdgram, 1, supports_nfsv4acls,
- at_root, mounted_on_fileno);
+ at_root, mounted_on_fileno,
+ xattrsupp, has_hiddensystem,
+ has_namedattr);
vfs_unbusy(mp);
}
vrele(vp);
@@ -375,6 +401,7 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
NFSACL_T *aclp = NULL;
struct thread *p = curthread;
+ NFSZERO_ATTRBIT(&retbits);
if (nd->nd_repstat) {
nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
goto out;
@@ -401,9 +428,10 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
if (error)
goto nfsmout;
- /* For NFSv4, only va_uid is used from nva2. */
- NFSZERO_ATTRBIT(&retbits);
+ /* For NFSv4, only va_uid and va_flags is used from nva2. */
NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER);
+ NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_HIDDEN);
+ NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SYSTEM);
preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits);
if (!nd->nd_repstat)
nd->nd_repstat = preat_ret;
@@ -462,6 +490,9 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
&nva, &attrbits, exp, p);
if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) {
+ u_long oldflags;
+
+ oldflags = nva2.na_flags;
/*
* For V4, try setting the attributes in sets, so that the
* reply bitmap will be correct for an error case.
@@ -531,6 +562,32 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODESETMASKED);
}
}
+ if (!nd->nd_repstat &&
+ (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN) ||
+ NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM))) {
+ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN)) {
+ if ((nva.na_flags & UF_HIDDEN) != 0)
+ oldflags |= UF_HIDDEN;
+ else
+ oldflags &= ~UF_HIDDEN;
+ }
+ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM)) {
+ if ((nva.na_flags & UF_SYSTEM) != 0)
+ oldflags |= UF_SYSTEM;
+ else
+ oldflags &= ~UF_SYSTEM;
+ }
+ NFSVNO_ATTRINIT(&nva2);
+ NFSVNO_SETATTRVAL(&nva2, flags, oldflags);
+ nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
+ exp);
+ if (!nd->nd_repstat) {
+ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN))
+ NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_HIDDEN);
+ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM))
+ NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SYSTEM);
+ }
+ }
#ifdef NFS4_ACL_EXTATTR_NAME
if (!nd->nd_repstat && aclp->acl_cnt > 0 &&
@@ -595,6 +652,8 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
char *bufp;
u_long *hashp;
struct thread *p = curthread;
+ struct componentname *cnp;
+ short irflag;
if (nd->nd_repstat) {
nfsrv_postopattr(nd, dattr_ret, &dattr);
@@ -611,8 +670,12 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
goto out;
}
- NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
- LOCKLEAF);
+ cnp = &named.ni_cnd;
+ irflag = vn_irflag_read(dp);
+ if ((irflag & VIRF_NAMEDDIR) != 0)
+ NFSNAMEICNDSET(cnp, nd->nd_cred, LOOKUP, LOCKLEAF | OPENNAMED);
+ else
+ NFSNAMEICNDSET(cnp, nd->nd_cred, LOOKUP, LOCKLEAF);
nfsvno_setpathbuf(&named, &bufp, &hashp);
error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
if (error) {
@@ -621,6 +684,10 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
goto out;
}
if (!nd->nd_repstat) {
+ /* Don't set OPENNAMED for Lookupp (".."). */
+ if (cnp->cn_namelen == 2 && *cnp->cn_pnbuf == '.' &&
+ *(cnp->cn_pnbuf + 1) == '.')
+ cnp->cn_flags &= ~OPENNAMED;
nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, &dirp);
} else {
vrele(dp);
@@ -1348,6 +1415,18 @@ nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram,
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
vtyp = nfsv34tov_type(*tl);
nfs4type = fxdr_unsigned(nfstype, *tl);
+ if ((vn_irflag_read(dp) & VIRF_NAMEDDIR) != 0) {
+ /*
+ * Don't allow creation of non-regular file objects
+ * in a named attribute directory.
+ */
+ nd->nd_repstat = NFSERR_INVAL;
+ vrele(dp);
+#ifdef NFS4_ACL_EXTATTR_NAME
+ acl_free(aclp);
+#endif
+ goto out;
+ }
switch (nfs4type) {
case NFLNK:
error = nfsvno_getsymlink(nd, &nva, p, &pathcp,
@@ -1577,14 +1656,14 @@ nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram,
nd->nd_repstat = nfsvno_rmdirsub(&named, 1,
nd->nd_cred, p, exp);
else
- nd->nd_repstat = nfsvno_removesub(&named, 1,
- nd->nd_cred, p, exp);
+ nd->nd_repstat = nfsvno_removesub(&named, true,
+ nd, p, exp);
} else if (nd->nd_procnum == NFSPROC_RMDIR) {
nd->nd_repstat = nfsvno_rmdirsub(&named, 0,
nd->nd_cred, p, exp);
} else {
- nd->nd_repstat = nfsvno_removesub(&named, 0,
- nd->nd_cred, p, exp);
+ nd->nd_repstat = nfsvno_removesub(&named, false, nd, p,
+ exp);
}
}
if (!(nd->nd_flag & ND_NFSV2)) {
@@ -1680,8 +1759,7 @@ nfsrvd_rename(struct nfsrv_descript *nd, int isdgram,
}
/* If this is the same file handle, just VREF() the vnode. */
- if (tfh.nfsrvfh_len == NFSX_MYFH &&
- !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) {
+ if (!NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) {
VREF(dp);
tdp = dp;
tnes = *exp;
@@ -1749,8 +1827,7 @@ nfsrvd_rename(struct nfsrv_descript *nd, int isdgram,
if (fromnd.ni_vp->v_type == VDIR)
tond.ni_cnd.cn_flags |= WILLBEDIR;
nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, &tdirp);
- nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat,
- nd->nd_flag, nd->nd_cred, p);
+ nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd, p);
if (fdirp)
fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL);
if (tdirp)
@@ -1804,8 +1881,15 @@ nfsrvd_link(struct nfsrv_descript *nd, int isdgram,
nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
goto out;
}
+ if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0 ||
+ (tovp != NULL &&
+ (vn_irflag_read(tovp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0)) {
+ nd->nd_repstat = NFSERR_INVAL;
+ if (tovp != NULL)
+ vrele(tovp);
+ }
NFSVOPUNLOCK(vp);
- if (vp->v_type == VDIR) {
+ if (!nd->nd_repstat && vp->v_type == VDIR) {
if (nd->nd_flag & ND_NFSV4)
nd->nd_repstat = NFSERR_ISDIR;
else
@@ -2829,7 +2913,7 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
int how = NFSCREATE_UNCHECKED;
int32_t cverf[2], tverf[2] = { 0, 0 };
vnode_t vp = NULL, dirp = NULL;
- struct nfsvattr nva, dirfor, diraft;
+ struct nfsvattr nva, dirfor, diraft, nva2;
struct nameidata named;
nfsv4stateid_t stateid, delegstateid;
nfsattrbit_t attrbits;
@@ -2839,6 +2923,8 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
NFSACL_T *aclp = NULL;
struct thread *p = curthread;
bool done_namei;
+ __enum_uint8_decl(wdelegace) { USENONE, USEMODE, USENFSV4ACL }
+ delegace;
#ifdef NFS4_ACL_EXTATTR_NAME
aclp = acl_alloc(M_WAITOK);
@@ -2846,6 +2932,7 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
#endif
NFSZERO_ATTRBIT(&attrbits);
done_namei = false;
+ delegace = USEMODE;
named.ni_cnd.cn_nameiop = 0;
NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
i = fxdr_unsigned(int, *(tl + 5));
@@ -2971,6 +3058,8 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
cverf[0] = *tl++;
cverf[1] = *tl;
+ if ((vn_irflag_read(dp) & VIRF_NAMEDDIR) != 0)
+ nd->nd_repstat = NFSERR_INVAL;
break;
case NFSCREATE_EXCLUSIVE41:
NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
@@ -2979,7 +3068,8 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p);
if (error != 0)
goto nfsmout;
- if (NFSISSET_ATTRBIT(&attrbits,
+ if ((vn_irflag_read(dp) & VIRF_NAMEDDIR) != 0 ||
+ NFSISSET_ATTRBIT(&attrbits,
NFSATTRBIT_TIMEACCESSSET))
nd->nd_repstat = NFSERR_INVAL;
/*
@@ -3076,11 +3166,23 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
}
break;
case NFSCREATE_EXCLUSIVE:
- exclusive_flag = 1;
if (nd->nd_repstat == 0 && named.ni_vp == NULL)
nva.na_mode = 0;
- break;
+ /* FALLTHROUGH */
case NFSCREATE_EXCLUSIVE41:
+ if (nd->nd_repstat == 0 && named.ni_vp != NULL) {
+ nd->nd_repstat = nfsvno_getattr(named.ni_vp,
+ &nva2, nd, p, 1, NULL);
+ if (nd->nd_repstat == 0) {
+ tverf[0] = nva2.na_atime.tv_sec;
+ tverf[1] = nva2.na_atime.tv_nsec;
+ if (cverf[0] != tverf[0] ||
+ cverf[1] != tverf[1])
+ nd->nd_repstat = EEXIST;
+ }
+ if (nd->nd_repstat != 0)
+ done_namei = true;
+ }
exclusive_flag = 1;
break;
}
@@ -3170,16 +3272,27 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
NFSACCCHK_VPISLOCKED, NULL);
}
- if (!nd->nd_repstat) {
+ if (!nd->nd_repstat)
nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
- if (!nd->nd_repstat) {
- tverf[0] = nva.na_atime.tv_sec;
- tverf[1] = nva.na_atime.tv_nsec;
+
+ if (nd->nd_repstat == 0 && aclp != NULL && nfsrv_issuedelegs != 0 &&
+ (dp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) {
+ if (aclp->acl_cnt == 0 && create == NFSV4OPEN_NOCREATE) {
+ int retacl;
+
+ /* We do not yet have an ACL, so try and get one. */
+ retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp,
+ nd->nd_cred, p);
+ if (retacl != 0 && retacl != ENOATTR &&
+ retacl != EOPNOTSUPP && retacl != EINVAL)
+ delegace = USENONE;
+ else if (retacl == 0 && aclp->acl_cnt > 0)
+ delegace = USENFSV4ACL;
+ } else if (aclp->acl_cnt > 0 && create == NFSV4OPEN_CREATE) {
+ delegace = USENFSV4ACL;
}
}
- if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] ||
- cverf[1] != tverf[1]))
- nd->nd_repstat = EEXIST;
+
/*
* Do the open locking/delegation stuff.
*/
@@ -3244,6 +3357,13 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE);
*tl = newnfs_false;
+ } else if ((rflags &
+ NFSV4OPEN_WDNOTSUPPDOWNGRADE) != 0) {
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(NFSV4OPEN_NOTSUPPDOWNGRADE);
+ } else if ((rflags & NFSV4OPEN_WDNOTSUPPUPGRADE) != 0) {
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(NFSV4OPEN_NOTSUPPUPGRADE);
} else {
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFSV4OPEN_NOTWANTED);
@@ -3265,18 +3385,56 @@ nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
*tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE);
txdr_hyper(nva.na_size, tl);
}
- NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
- *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE);
- *tl++ = txdr_unsigned(0x0);
- acemask = NFSV4ACE_ALLFILESMASK;
- if (nva.na_mode & S_IRUSR)
- acemask |= NFSV4ACE_READMASK;
- if (nva.na_mode & S_IWUSR)
- acemask |= NFSV4ACE_WRITEMASK;
- if (nva.na_mode & S_IXUSR)
- acemask |= NFSV4ACE_EXECUTEMASK;
- *tl = txdr_unsigned(acemask);
- (void) nfsm_strtom(nd, "OWNER@", 6);
+
+ /* Set up the write delegation ACE. */
+ NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
+ if (delegace == USENFSV4ACL) {
+ int j;
+
+ for (j = 0; j < aclp->acl_cnt; j++) {
+ if (aclp->acl_entry[j].ae_tag ==
+ ACL_USER_OBJ ||
+ aclp->acl_entry[j].ae_entry_type !=
+ ACL_ENTRY_TYPE_ALLOW)
+ break;
+ }
+ if (j < aclp->acl_cnt &&
+ aclp->acl_entry[j].ae_tag ==
+ ACL_USER_OBJ &&
+ aclp->acl_entry[j].ae_entry_type ==
+ ACL_ENTRY_TYPE_ALLOW) {
+ /* Use this ACE. */
+ *tl++ = txdr_unsigned(
+ NFSV4ACE_ALLOWEDTYPE);
+ *tl++ = txdr_unsigned(0x0);
+ *tl = txdr_unsigned(
+ nfs_aceperm(
+ aclp->acl_entry[j].ae_perm));
+ (void)nfsm_strtom(nd, "OWNER@", 6);
+ } else
+ delegace = USENONE;
+ }
+ if (delegace == USENONE) {
+ /* Don't allow anything. */
+ *tl++ = 0x0;
+ *tl++ = 0x0;
+ *tl = 0x0;
+ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+ *tl = 0;
+ } else if (delegace == USEMODE) {
+ /* Build from mode. */
+ *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE);
+ *tl++ = txdr_unsigned(0x0);
+ acemask = NFSV4ACE_ALLFILESMASK;
+ if (nva.na_mode & S_IRUSR)
+ acemask |= NFSV4ACE_READMASK;
+ if (nva.na_mode & S_IWUSR)
+ acemask |= NFSV4ACE_WRITEMASK;
+ if (nva.na_mode & S_IXUSR)
+ acemask |= NFSV4ACE_EXECUTEMASK;
+ *tl = txdr_unsigned(acemask);
+ (void)nfsm_strtom(nd, "OWNER@", 6);
+ }
}
*vpp = vp;
} else if (vp) {
@@ -3466,11 +3624,20 @@ nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram,
{
fhandle_t fh;
struct thread *p = curthread;
+ int siz;
+ short irflag;
nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
+ irflag = vn_irflag_read(vp);
vput(vp);
- if (!nd->nd_repstat)
- (void)nfsm_fhtom(NULL, nd, (u_int8_t *)&fh, 0, 0);
+ if (nd->nd_repstat == 0) {
+ siz = 0;
+ if ((irflag & VIRF_NAMEDDIR) != 0)
+ siz = NFSX_FHMAX + NFSX_V4NAMEDDIRFH;
+ else if ((irflag & VIRF_NAMEDATTR) != 0)
+ siz = NFSX_FHMAX + NFSX_V4NAMEDATTRFH;
+ (void)nfsm_fhtom(NULL, nd, (u_int8_t *)&fh, siz, 0);
+ }
NFSEXITCODE2(0, nd);
return (0);
}
@@ -4180,7 +4347,8 @@ nfsrvd_verify(struct nfsrv_descript *nd, int isdgram,
if (!nd->nd_repstat) {
nfsvno_getfs(&fs, isdgram);
error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL,
- sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred);
+ sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, NULL, p,
+ nd->nd_cred);
if (!error) {
if (nd->nd_procnum == NFSV4OP_NVERIFY) {
if (ret == 0)
@@ -4202,15 +4370,42 @@ nfsrvd_verify(struct nfsrv_descript *nd, int isdgram,
*/
int
nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram,
- vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp,
+ struct vnode *dp, struct vnode **vpp, __unused fhandle_t *fhp,
__unused struct nfsexstuff *exp)
{
- u_int32_t *tl;
- int error = 0, createdir __unused;
+ uint32_t *tl;
+ struct componentname cn;
+ int error = 0;
- NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
- createdir = fxdr_unsigned(int, *tl);
- nd->nd_repstat = NFSERR_NOTSUPP;
+ NFSNAMEICNDSET(&cn, nd->nd_cred, LOOKUP, OPENNAMED | ISLASTCN |
+ NOFOLLOW | LOCKLEAF);
+ cn.cn_nameptr = ".";
+ cn.cn_namelen = 1;
+ cn.cn_lkflags = LK_SHARED;
+ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+ if (*tl == newnfs_true)
+ cn.cn_flags |= CREATENAMED;
+
+ nd->nd_repstat = vn_lock(dp, LK_SHARED);
+ if (nd->nd_repstat != 0)
+ goto nfsmout;
+
+ if ((dp->v_mount->mnt_flag & MNT_NAMEDATTR) == 0)
+ nd->nd_repstat = NFSERR_NOTSUPP;
+ if (nd->nd_repstat == 0 && (vn_irflag_read(dp) & (VIRF_NAMEDDIR |
+ VIRF_NAMEDATTR)) != 0)
+ nd->nd_repstat = NFSERR_WRONGTYPE;
+ if (nd->nd_repstat == 0) {
+ nd->nd_repstat = VOP_LOOKUP(dp, vpp, &cn);
+ if (nd->nd_repstat == ENOATTR)
+ nd->nd_repstat = NFSERR_NOENT;
+ }
+ if (nd->nd_repstat == 0)
+ NFSVOPUNLOCK(*vpp);
+
+ vput(dp);
+ NFSEXITCODE2(0, nd);
+ return (0);
nfsmout:
vrele(dp);
NFSEXITCODE2(error, nd);
diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c
index 1f50634405d0..d1b6198ba0e1 100644
--- a/sys/fs/nfsserver/nfs_nfsdsocket.c
+++ b/sys/fs/nfsserver/nfs_nfsdsocket.c
@@ -797,7 +797,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag,
!LIST_EMPTY(&clp->lc_deleg))
nfsrv_writestable(clp->lc_id,
clp->lc_idlen, NFSNST_REVOKE, p);
- nfsrv_cleanclient(clp, p);
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
LIST_REMOVE(clp, lc_hash);
@@ -1422,13 +1422,11 @@ static struct ucred *
nfsrv_createrootcred(void)
{
struct ucred *cr;
- gid_t grp;
cr = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = UID_ROOT;
- grp = GID_WHEEL;
- crsetgroups(cr, 1, &grp);
- cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0];
+ crsetgroups_fallback(cr, 0, NULL, GID_WHEEL);
+ cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
cr->cr_prison = curthread->td_ucred->cr_prison;
prison_hold(cr->cr_prison);
#ifdef MAC
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index c73840277022..2e27817389dd 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -115,6 +115,11 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
&nfsrv_flexlinuxhack, 0,
"For Linux clients, hack around Flex File Layout bug");
+NFSD_VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false;
+SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, testing_disable_grace,
+ CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_disable_grace),
+ 0, "Disable grace for testing");
+
/*
* Hash lists for nfs V4.
*/
@@ -139,7 +144,7 @@ static void nfsrv_dumpaclient(struct nfsclient *clp,
struct nfsd_dumpclients *dumpp);
static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
NFSPROC_T *p);
-static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
+static void nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
NFSPROC_T *p);
static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
NFSPROC_T *p);
@@ -204,7 +209,7 @@ static void nfsrv_locklf(struct nfslockfile *lfp);
static void nfsrv_unlocklf(struct nfslockfile *lfp);
static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
- uint8_t *sessionid);
+ uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp);
static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
int dont_replycache, struct nfsdsession **sepp, int *slotposp);
static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
@@ -240,6 +245,50 @@ static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
struct nfsclient *clp);
+static void nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
+ struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
+ u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
+ struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
+ nfsv4stateid_t *delegstateidp);
+static void nfsrv_clientlock(bool mlocked);
+static void nfsrv_clientunlock(bool mlocked);
+
+/*
+ * Lock the client structure, either with the mutex or the exclusive nfsd lock.
+ */
+static void
+nfsrv_clientlock(bool mlocked)
+{
+ int igotlock;
+
+ if (mlocked) {
+ NFSLOCKSTATE();
+ } else {
+ NFSLOCKV4ROOTMUTEX();
+ nfsv4_relref(&nfsv4rootfs_lock);
+ do {
+ igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
+ NFSV4ROOTLOCKMUTEXPTR, NULL);
+ } while (!igotlock);
+ NFSUNLOCKV4ROOTMUTEX();
+ }
+}
+
+/*
+ * Unlock the client structure.
+ */
+static void
+nfsrv_clientunlock(bool mlocked)
+{
+
+ if (mlocked) {
+ NFSUNLOCKSTATE();
+ } else {
+ NFSLOCKV4ROOTMUTEX();
+ nfsv4_unlock(&nfsv4rootfs_lock, 1);
+ NFSUNLOCKV4ROOTMUTEX();
+ }
+}
/*
* Scan the client list for a match and either return the current one,
@@ -261,7 +310,10 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
struct sockaddr_in6 *sin6, *rin6;
#endif
struct nfsdsession *sep, *nsep;
- int zapit = 0, gotit, hasstate = 0, igotlock;
+ SVCXPRT *old_xprt;
+ struct nfssessionhead old_sess;
+ int zapit = 0, gotit, hasstate = 0;
+ bool mlocked;
static u_int64_t confirm_index = 0;
/*
@@ -289,14 +341,11 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
*/
new_clp->lc_program = 0;
+ mlocked = true;
+ if (nfsrv_dolocallocks != 0)
+ mlocked = false;
/* Lock out other nfsd threads */
- NFSLOCKV4ROOTMUTEX();
- nfsv4_relref(&nfsv4rootfs_lock);
- do {
- igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
- NFSV4ROOTLOCKMUTEXPTR, NULL);
- } while (!igotlock);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientlock(mlocked);
/*
* Search for a match in the client list.
@@ -313,6 +362,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
if (gotit == 0)
i++;
}
+ old_xprt = NULL;
if (!gotit ||
(clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
@@ -320,9 +370,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
* For NFSv4.1, if confirmp->lval[1] is non-zero, the
* client is trying to update a confirmed clientid.
*/
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
confirmp->lval[1] = 0;
error = NFSERR_NOENT;
goto out;
@@ -332,7 +380,10 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
*/
if (i != nfsrv_clienthashsize) {
LIST_REMOVE(clp, lc_hash);
- nfsrv_cleanclient(clp, p);
+ if (mlocked)
+ nfsrv_cleanclient(clp, p, true, &old_xprt);
+ else
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
zapit = 1;
@@ -367,11 +418,12 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
NFSD_VNET(nfsstatsv1_p)->srvclients++;
nfsrv_openpluslock++;
nfsrv_clients++;
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
- if (zapit)
+ nfsrv_clientunlock(mlocked);
+ if (zapit != 0) {
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
nfsrv_zapclient(clp, p);
+ }
*new_clpp = NULL;
goto out;
}
@@ -385,7 +437,10 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
*/
if (clp->lc_expiry < NFSD_MONOSEC &&
(!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
- nfsrv_cleanclient(clp, p);
+ if (mlocked)
+ nfsrv_cleanclient(clp, p, true, &old_xprt);
+ else
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
}
@@ -430,9 +485,9 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
break;
#endif
}
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
error = NFSERR_CLIDINUSE;
goto out;
}
@@ -442,17 +497,12 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
/*
* If the verifier has changed, the client has rebooted
* and a new client id is issued. The old state info
- * can be thrown away once the SETCLIENTID_CONFIRM occurs.
+ * can be thrown away once the SetClientID_Confirm or
+ * Create_Session that confirms the clientid occurs.
*/
LIST_REMOVE(clp, lc_hash);
- /* Get rid of all sessions on this clientid. */
- LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
- ret = nfsrv_freesession(NULL, sep, NULL);
- if (ret != 0)
- printf("nfsrv_setclient: verifier changed free"
- " session failed=%d\n", ret);
- }
+ LIST_NEWHEAD(&old_sess, &clp->lc_session, sess_list);
new_clp->lc_flags |= LCL_NEEDSCONFIRM;
if ((nd->nd_flag & ND_NFSV41) != 0) {
@@ -496,21 +546,31 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
NFSD_VNET(nfsstatsv1_p)->srvclients++;
nfsrv_openpluslock++;
nfsrv_clients++;
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ if (!mlocked) {
+ nfsrv_clientunlock(mlocked);
+ NFSLOCKSTATE();
+ }
/*
* Must wait until any outstanding callback on the old clp
* completes.
*/
- NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
"nfsd clp", 10 * hz);
}
NFSUNLOCKSTATE();
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
+ /* Get rid of all sessions on this clientid. */
+ LIST_FOREACH_SAFE(sep, &old_sess, sess_list, nsep) {
+ ret = nfsrv_freesession(NULL, sep, NULL, false, NULL);
+ if (ret != 0)
+ printf("nfsrv_setclient: verifier changed free"
+ " session failed=%d\n", ret);
+ }
+
nfsrv_zapclient(clp, p);
*new_clpp = NULL;
goto out;
@@ -562,24 +622,31 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
nfsrv_openpluslock++;
nfsrv_clients++;
}
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ if (!mlocked)
+ nfsrv_clientunlock(mlocked);
if ((nd->nd_flag & ND_NFSV41) == 0) {
/*
* Must wait until any outstanding callback on the old clp
* completes.
*/
- NFSLOCKSTATE();
+ if (!mlocked)
+ NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
"nfsdclp", 10 * hz);
}
NFSUNLOCKSTATE();
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
nfsrv_zapclient(clp, p);
*new_clpp = NULL;
+ } else {
+ if (mlocked)
+ NFSUNLOCKSTATE();
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
}
out:
@@ -599,11 +666,13 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
struct nfsstate *stp;
int i;
struct nfsclienthashhead *hp;
- int error = 0, igotlock, doneok;
+ int error = 0, doneok, igotlock;
struct nfssessionhash *shp;
struct nfsdsession *sep;
uint64_t sessid[2];
- bool sess_replay;
+ CLIENT *client;
+ SVCXPRT *old_xprt;
+ bool mlocked, sess_replay;
static uint64_t next_sess = 0;
if (clpp)
@@ -620,13 +689,27 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
* already held. Otherwise, we need to get either that or,
* for the case of Confirm, lock out the nfsd threads.
*/
+ client = NULL;
+ old_xprt = NULL;
+ mlocked = true;
+ if (nfsrv_dolocallocks != 0)
+ mlocked = false;
if (opflags & CLOPS_CONFIRM) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_relref(&nfsv4rootfs_lock);
- do {
- igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
- NFSV4ROOTLOCKMUTEXPTR, NULL);
- } while (!igotlock);
+ if (nsep != NULL &&
+ (nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
+ client = (struct __rpc_client *)
+ clnt_bck_create(nd->nd_xprt->xp_socket,
+ cbprogram, NFSV4_CBVERS);
+ if (mlocked) {
+ nfsrv_clientlock(mlocked);
+ } else {
+ NFSLOCKV4ROOTMUTEX();
+ nfsv4_relref(&nfsv4rootfs_lock);
+ do {
+ igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1,
+ NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
+ } while (!igotlock);
+ }
/*
* Create a new sessionid here, since we need to do it where
* there is a mutex held to serialize update of next_sess.
@@ -635,7 +718,8 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
sessid[0] = ++next_sess;
sessid[1] = clientid.qval;
}
- NFSUNLOCKV4ROOTMUTEX();
+ if (!mlocked)
+ NFSUNLOCKV4ROOTMUTEX();
} else if (opflags != CLOPS_RENEW) {
NFSLOCKSTATE();
}
@@ -672,9 +756,9 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
}
if (error) {
if (opflags & CLOPS_CONFIRM) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
+ if (client != NULL)
+ CLNT_RELEASE(client);
} else if (opflags != CLOPS_RENEW) {
NFSUNLOCKSTATE();
}
@@ -719,7 +803,10 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
* for an Open with CLAIM_DELEGATE_PREV unless in
* grace, but get rid of the rest of the state.
*/
- nfsrv_cleanclient(clp, p);
+ if (mlocked)
+ nfsrv_cleanclient(clp, p, true, &old_xprt);
+ else
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_olddeleg);
if (nfsrv_checkgrace(nd, clp, 0)) {
/* In grace, so just delete delegations */
@@ -743,10 +830,10 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
/* Hold a reference on the xprt for a backchannel. */
if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
!= 0 && !sess_replay) {
- if (clp->lc_req.nr_client == NULL)
- clp->lc_req.nr_client = (struct __rpc_client *)
- clnt_bck_create(nd->nd_xprt->xp_socket,
- cbprogram, NFSV4_CBVERS);
+ if (clp->lc_req.nr_client == NULL) {
+ clp->lc_req.nr_client = client;
+ client = NULL;
+ }
if (clp->lc_req.nr_client != NULL) {
SVC_ACQUIRE(nd->nd_xprt);
CLNT_ACQUIRE(clp->lc_req.nr_client);
@@ -763,13 +850,15 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
NFSX_V4SESSIONID);
if (!sess_replay) {
shp = NFSSESSIONHASH(nsep->sess_sessionid);
- NFSLOCKSTATE();
+ if (!mlocked)
+ NFSLOCKSTATE();
NFSLOCKSESSION(shp);
LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
nsep->sess_clp = clp;
NFSUNLOCKSESSION(shp);
- NFSUNLOCKSTATE();
+ if (!mlocked)
+ NFSUNLOCKSTATE();
}
}
}
@@ -803,9 +892,11 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
clp->lc_expiry = nfsrv_leaseexpiry();
}
if (opflags & CLOPS_CONFIRM) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
+ if (client != NULL)
+ CLNT_RELEASE(client);
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
} else if (opflags != CLOPS_RENEW) {
NFSUNLOCKSTATE();
}
@@ -825,21 +916,20 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
{
struct nfsclient *clp;
struct nfsclienthashhead *hp;
- int error = 0, i, igotlock;
+ SVCXPRT *old_xprt;
+ int error = 0, i;
+ bool mlocked;
if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) {
error = NFSERR_STALECLIENTID;
goto out;
}
+ mlocked = true;
+ if (nfsrv_dolocallocks != 0)
+ mlocked = false;
/* Lock out other nfsd threads */
- NFSLOCKV4ROOTMUTEX();
- nfsv4_relref(&nfsv4rootfs_lock);
- do {
- igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
- NFSV4ROOTLOCKMUTEXPTR, NULL);
- } while (igotlock == 0);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientlock(mlocked);
hp = NFSCLIENTHASH(clientid);
LIST_FOREACH(clp, hp, lc_hash) {
@@ -847,9 +937,7 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
break;
}
if (clp == NULL) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
/* Just return ok, since it is gone. */
goto out;
}
@@ -857,9 +945,7 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
/* Check for the SP4_MACH_CRED case. */
error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
if (error != 0) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
goto out;
}
@@ -872,28 +958,28 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
/* Scan for state on the clientid. */
for (i = 0; i < nfsrv_statehashsize; i++)
if (!LIST_EMPTY(&clp->lc_stateid[i])) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
error = NFSERR_CLIENTIDBUSY;
goto out;
}
if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
error = NFSERR_CLIENTIDBUSY;
goto out;
}
/* Destroy the clientid and return ok. */
- nfsrv_cleanclient(clp, p);
+ old_xprt = NULL;
+ if (mlocked)
+ nfsrv_cleanclient(clp, p, true, &old_xprt);
+ else
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
LIST_REMOVE(clp, lc_hash);
- NFSLOCKV4ROOTMUTEX();
- nfsv4_unlock(&nfsv4rootfs_lock, 1);
- NFSUNLOCKV4ROOTMUTEX();
+ nfsrv_clientunlock(mlocked);
+ if (old_xprt != NULL)
+ SVC_RELEASE(old_xprt);
nfsrv_zapclient(clp, p);
out:
NFSEXITCODE2(error, nd);
@@ -956,7 +1042,7 @@ nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
*/
clp->lc_flags &= ~LCL_CALLBACKSON;
clp->lc_flags |= LCL_ADMINREVOKED;
- nfsrv_cleanclient(clp, p);
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
NFSLOCKV4ROOTMUTEX();
@@ -1376,16 +1462,22 @@ nfsrv_servertimer(void *arg __unused)
* there are no other active nfsd threads.
*/
void
-nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
+nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p, bool locked,
+ SVCXPRT **old_xprtp)
{
struct nfsstate *stp, *nstp;
struct nfsdsession *sep, *nsep;
- LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
- nfsrv_freeopenowner(stp, 1, p);
+ LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
+ if (locked)
+ nfsrv_freeopenowner(stp, 0, p);
+ else
+ nfsrv_freeopenowner(stp, 1, p);
+ }
if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
- (void)nfsrv_freesession(NULL, sep, NULL);
+ (void)nfsrv_freesession(NULL, sep, NULL, locked,
+ old_xprtp);
}
/*
@@ -1479,7 +1571,7 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
while (nstp != LIST_END(&stp->ls_open)) {
tstp = nstp;
nstp = LIST_NEXT(nstp, ls_list);
- (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
+ nfsrv_freeopen(tstp, NULL, cansleep, p);
}
if (stp->ls_op)
nfsrvd_derefcache(stp->ls_op);
@@ -1494,12 +1586,11 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
* are no other opens on the file.
* Returns 1 if it free'd the nfslockfile, 0 otherwise.
*/
-static int
+static void
nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
{
struct nfsstate *nstp, *tstp;
struct nfslockfile *lfp;
- int ret;
LIST_REMOVE(stp, ls_hash);
LIST_REMOVE(stp, ls_list);
@@ -1508,35 +1599,46 @@ nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
lfp = stp->ls_lfp;
/*
* Now, free all lockowners associated with this open.
+ * Note that, if vp != NULL, nfsrv_freelockowner() will
+ * not call nfsrv_freeallnfslocks(), so it needs to be called, below.
*/
LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
nfsrv_freelockowner(tstp, vp, cansleep, p);
+ if (vp != NULL) {
+ KASSERT(cansleep != 0, ("nfsrv_freeopen: cansleep == 0"));
+ mtx_assert(NFSSTATEMUTEXPTR, MA_OWNED);
+ /*
+ * Only called with vp != NULL for Close when
+ * vfs.nfsd.enable_locallocks != 0.
+ * Lock the lfp so that it will not go away and do the
+ * nfsrv_freeallnfslocks() call that was not done by
+ * nfsrv_freelockowner().
+ */
+ nfsrv_locklf(lfp);
+ NFSUNLOCKSTATE();
+ NFSVOPUNLOCK(vp);
+ nfsrv_freeallnfslocks(stp, vp, cansleep, p);
+ NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+ NFSLOCKSTATE();
+ nfsrv_unlocklf(lfp);
+ }
+
/*
* The nfslockfile is freed here if there are no locks
* associated with the open.
* If there are locks associated with the open, the
* nfslockfile structure can be freed via nfsrv_freelockowner().
- * Acquire the state mutex to avoid races with calls to
- * nfsrv_getlockfile().
*/
- if (cansleep != 0)
- NFSLOCKSTATE();
if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
lfp->lf_usecount == 0 &&
- (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
+ nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
nfsrv_freenfslockfile(lfp);
- ret = 1;
- } else
- ret = 0;
- if (cansleep != 0)
- NFSUNLOCKSTATE();
free(stp, M_NFSDSTATE);
NFSD_VNET(nfsstatsv1_p)->srvopens--;
nfsrv_openpluslock--;
- return (ret);
}
/*
@@ -1549,7 +1651,8 @@ nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
LIST_REMOVE(stp, ls_hash);
LIST_REMOVE(stp, ls_list);
- nfsrv_freeallnfslocks(stp, vp, cansleep, p);
+ if (vp == NULL)
+ nfsrv_freeallnfslocks(stp, vp, cansleep, p);
if (stp->ls_op)
nfsrvd_derefcache(stp->ls_op);
free(stp, M_NFSDSTATE);
@@ -2648,6 +2751,8 @@ tryagain:
* considered a conflict since the client with a read delegation
* could have done an Open with ReadAccess and WriteDeny
* locally and then not have checked for the WriteDeny.)
+ * The exception is a NFSv4.1/4.2 client that has requested
+ * an atomic upgrade to a write delegation.
* Don't check for a Reclaim, since that will be dealt with
* by nfsrv_openctrl().
*/
@@ -2657,9 +2762,10 @@ tryagain:
while (stp != LIST_END(&lfp->lf_deleg)) {
nstp = LIST_NEXT(stp, ls_file);
if ((readonly && stp->ls_clp != clp &&
- (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
+ (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
(!readonly && (stp->ls_clp != clp ||
- (stp->ls_flags & NFSLCK_DELEGREAD)))) {
+ ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
+ (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
ret = nfsrv_delegconflict(stp, &haslock, p, vp);
if (ret) {
/*
@@ -2944,6 +3050,8 @@ tryagain:
* considered a conflict since the client with a read delegation
* could have done an Open with ReadAccess and WriteDeny
* locally and then not have checked for the WriteDeny.)
+ * The exception is a NFSv4.1/4.2 client that has requested
+ * an atomic upgrade to a write delegation.
*/
if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
stp = LIST_FIRST(&lfp->lf_deleg);
@@ -2951,12 +3059,15 @@ tryagain:
nstp = LIST_NEXT(stp, ls_file);
if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
writedeleg = 0;
- else
+ else if (stp->ls_clp != clp ||
+ (stp->ls_flags & NFSLCK_DELEGWRITE) != 0 ||
+ (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)
delegate = 0;
if ((readonly && stp->ls_clp != clp &&
- (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
+ (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
(!readonly && (stp->ls_clp != clp ||
- (stp->ls_flags & NFSLCK_DELEGREAD)))) {
+ ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
+ (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
if (new_stp->ls_flags & NFSLCK_RECLAIM) {
delegate = 2;
} else {
@@ -3204,47 +3315,9 @@ tryagain:
/*
* This is where we can choose to issue a delegation.
*/
- if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
- *rflagsp |= NFSV4OPEN_WDNOTWANTED;
- else if (nfsrv_issuedelegs == 0)
- *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
- else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
- *rflagsp |= NFSV4OPEN_WDRESOURCE;
- else if (delegate == 0 || writedeleg == 0 ||
- NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
- nfsrv_writedelegifpos == 0) ||
- !NFSVNO_DELEGOK(vp) ||
- (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
- (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
- LCL_CALLBACKSON)
- *rflagsp |= NFSV4OPEN_WDCONTENTION;
- else {
- new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
- new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
- = clp->lc_clientid.lval[0];
- new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
- = clp->lc_clientid.lval[1];
- new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
- = nfsrv_nextstateindex(clp);
- new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
- NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
- *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
- new_deleg->ls_uid = new_stp->ls_uid;
- new_deleg->ls_lfp = lfp;
- new_deleg->ls_clp = clp;
- new_deleg->ls_filerev = filerev;
- new_deleg->ls_compref = nd->nd_compref;
- new_deleg->ls_lastrecall = 0;
- nfsrv_writedelegcnt++;
- LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
- LIST_INSERT_HEAD(NFSSTATEHASH(clp,
- new_deleg->ls_stateid), new_deleg, ls_hash);
- LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
- new_deleg = NULL;
- NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
- nfsrv_openpluslock++;
- nfsrv_delegatecnt++;
- }
+ nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
+ readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
+ new_stp, lfp, rflagsp, delegstateidp);
} else {
new_open->ls_stateid.seqid = 1;
new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
@@ -3269,52 +3342,9 @@ tryagain:
/*
* This is where we can choose to issue a delegation.
*/
- if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
- *rflagsp |= NFSV4OPEN_WDNOTWANTED;
- else if (nfsrv_issuedelegs == 0)
- *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
- else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
- *rflagsp |= NFSV4OPEN_WDRESOURCE;
- else if (delegate == 0 || (writedeleg == 0 &&
- readonly == 0) || !NFSVNO_DELEGOK(vp) ||
- (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
- LCL_CALLBACKSON)
- *rflagsp |= NFSV4OPEN_WDCONTENTION;
- else {
- new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
- new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
- = clp->lc_clientid.lval[0];
- new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
- = clp->lc_clientid.lval[1];
- new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
- = nfsrv_nextstateindex(clp);
- if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
- (nfsrv_writedelegifpos || !readonly) &&
- (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
- new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
- NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
- *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
- nfsrv_writedelegcnt++;
- } else {
- new_deleg->ls_flags = (NFSLCK_DELEGREAD |
- NFSLCK_READACCESS);
- *rflagsp |= NFSV4OPEN_READDELEGATE;
- }
- new_deleg->ls_uid = new_stp->ls_uid;
- new_deleg->ls_lfp = lfp;
- new_deleg->ls_clp = clp;
- new_deleg->ls_filerev = filerev;
- new_deleg->ls_compref = nd->nd_compref;
- new_deleg->ls_lastrecall = 0;
- LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
- LIST_INSERT_HEAD(NFSSTATEHASH(clp,
- new_deleg->ls_stateid), new_deleg, ls_hash);
- LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
- new_deleg = NULL;
- NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
- nfsrv_openpluslock++;
- nfsrv_delegatecnt++;
- }
+ nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
+ readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
+ new_stp, lfp, rflagsp, delegstateidp);
}
} else {
/*
@@ -3337,78 +3367,28 @@ tryagain:
if (new_stp->ls_flags & NFSLCK_RECLAIM) {
new_stp->ls_flags = 0;
} else if ((nd->nd_flag & ND_NFSV41) != 0) {
- /* NFSv4.1 never needs confirmation. */
- new_stp->ls_flags = 0;
+ /*
+ * This is where we can choose to issue a delegation.
+ */
+ nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
+ readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
+ new_stp, lfp, rflagsp, delegstateidp);
+ /* NFSv4.1 never needs confirmation. */
+ new_stp->ls_flags = 0;
- /*
- * This is where we can choose to issue a delegation.
- */
- if (delegate && nfsrv_issuedelegs &&
- (writedeleg || readonly) &&
- (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
- LCL_CALLBACKSON &&
- !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
- NFSVNO_DELEGOK(vp) &&
- ((nd->nd_flag & ND_NFSV41) == 0 ||
- (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
- new_deleg->ls_stateid.seqid =
- delegstateidp->seqid = 1;
- new_deleg->ls_stateid.other[0] =
- delegstateidp->other[0]
- = clp->lc_clientid.lval[0];
- new_deleg->ls_stateid.other[1] =
- delegstateidp->other[1]
- = clp->lc_clientid.lval[1];
- new_deleg->ls_stateid.other[2] =
- delegstateidp->other[2]
- = nfsrv_nextstateindex(clp);
- if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
- (nfsrv_writedelegifpos || !readonly) &&
- ((nd->nd_flag & ND_NFSV41) == 0 ||
- (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
- 0)) {
- new_deleg->ls_flags =
- (NFSLCK_DELEGWRITE |
- NFSLCK_READACCESS |
- NFSLCK_WRITEACCESS);
- *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
- nfsrv_writedelegcnt++;
- } else {
- new_deleg->ls_flags =
- (NFSLCK_DELEGREAD |
- NFSLCK_READACCESS);
- *rflagsp |= NFSV4OPEN_READDELEGATE;
- }
- new_deleg->ls_uid = new_stp->ls_uid;
- new_deleg->ls_lfp = lfp;
- new_deleg->ls_clp = clp;
- new_deleg->ls_filerev = filerev;
- new_deleg->ls_compref = nd->nd_compref;
- new_deleg->ls_lastrecall = 0;
- LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
- ls_file);
- LIST_INSERT_HEAD(NFSSTATEHASH(clp,
- new_deleg->ls_stateid), new_deleg, ls_hash);
- LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
- ls_list);
- new_deleg = NULL;
- NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
- nfsrv_openpluslock++;
- nfsrv_delegatecnt++;
- }
- /*
- * Since NFSv4.1 never does an OpenConfirm, the first
- * open state will be acquired here.
- */
- if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
- clp->lc_flags |= LCL_STAMPEDSTABLE;
- len = clp->lc_idlen;
- NFSBCOPY(clp->lc_id, clidp, len);
- gotstate = 1;
- }
+ /*
+ * Since NFSv4.1 never does an OpenConfirm, the first
+ * open state will be acquired here.
+ */
+ if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
+ clp->lc_flags |= LCL_STAMPEDSTABLE;
+ len = clp->lc_idlen;
+ NFSBCOPY(clp->lc_id, clidp, len);
+ gotstate = 1;
+ }
} else {
- *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
- new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
+ *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
+ new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
}
nfsrvd_refcache(new_stp->ls_op);
new_stp->ls_noopens = 0;
@@ -3467,7 +3447,6 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
{
struct nfsstate *stp;
struct nfsclient *clp;
- struct nfslockfile *lfp;
u_int32_t bits;
int error = 0, gotstate = 0, len = 0;
u_char *clidp = NULL;
@@ -3562,9 +3541,7 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
NFSBCOPY(clp->lc_id, clidp, len);
gotstate = 1;
}
- NFSUNLOCKSTATE();
} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
- lfp = stp->ls_lfp;
if (retwriteaccessp != NULL) {
if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
*retwriteaccessp = 1;
@@ -3572,20 +3549,10 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
*retwriteaccessp = 0;
}
if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
- /* Get the lf lock */
- nfsrv_locklf(lfp);
- NFSUNLOCKSTATE();
ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
- NFSVOPUNLOCK(vp);
- if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
- NFSLOCKSTATE();
- nfsrv_unlocklf(lfp);
- NFSUNLOCKSTATE();
- }
- NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+ nfsrv_freeopen(stp, vp, 1, p);
} else {
- (void) nfsrv_freeopen(stp, NULL, 0, p);
- NFSUNLOCKSTATE();
+ nfsrv_freeopen(stp, NULL, 0, p);
}
} else {
/*
@@ -3603,8 +3570,8 @@ nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
if ((nd->nd_flag & ND_NFSV41) != 0 &&
stp->ls_stateid.seqid == 0)
stp->ls_stateid.seqid = 1;
- NFSUNLOCKSTATE();
}
+ NFSUNLOCKSTATE();
/*
* If the client just confirmed its first open, write a timestamp
@@ -4419,11 +4386,13 @@ nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
* ReclaimComplete. If so, grace can end now.
*/
notreclaimed = 0;
- LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
- nst_list) {
- if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
- notreclaimed = 1;
- break;
+ if (!NFSD_VNET(nfsd_disable_grace)) {
+ LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
+ nst_list) {
+ if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
+ notreclaimed = 1;
+ break;
+ }
}
}
if (notreclaimed == 0)
@@ -4616,7 +4585,7 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
if (procnum != NFSV4PROC_CBNULL)
nfsv4_freeslot(&sep->sess_cbsess, slotpos,
true);
- nfsrv_freesession(NULL, sep, NULL);
+ nfsrv_freesession(NULL, sep, NULL, false, NULL);
} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
error = newnfs_connect(NULL, &clp->lc_req, cred,
NULL, 1, dotls, &clp->lc_req.nr_client);
@@ -4665,7 +4634,7 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
nfsv4_freeslot(&sep->sess_cbsess, slotpos,
true);
}
- nfsrv_freesession(NULL, sep, NULL);
+ nfsrv_freesession(NULL, sep, NULL, false, NULL);
} else
error = newnfs_request(nd, NULL, clp, &clp->lc_req,
NULL, NULL, cred, clp->lc_program,
@@ -4706,7 +4675,7 @@ errout:
} else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
- p, NULL);
+ NULL, p, NULL);
m_freem(nd->nd_mrep);
}
NFSLOCKSTATE();
@@ -5179,6 +5148,11 @@ nfsrv_markreclaim(struct nfsclient *clp)
* Now, just set the flag.
*/
sp->nst_flag |= NFSNST_RECLAIMED;
+
+ /*
+ * Free up any old delegations.
+ */
+ nfsrv_freedeleglist(&clp->lc_olddeleg);
}
/*
@@ -5263,7 +5237,7 @@ nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
*/
nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
nfsrv_backupstable();
- nfsrv_cleanclient(clp, p);
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
LIST_REMOVE(clp, lc_hash);
@@ -5455,7 +5429,7 @@ nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
nfsrv_backupstable();
if (clp->lc_expiry < NFSD_MONOSEC) {
- nfsrv_cleanclient(clp, p);
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
LIST_REMOVE(clp, lc_hash);
@@ -6262,7 +6236,7 @@ nfsrv_throwawayallstate(NFSPROC_T *p)
for (i = 0; i < nfsrv_clienthashsize; i++) {
LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
nclp) {
- nfsrv_cleanclient(clp, p);
+ nfsrv_cleanclient(clp, p, false, NULL);
nfsrv_freedeleglist(&clp->lc_deleg);
nfsrv_freedeleglist(&clp->lc_olddeleg);
free(clp->lc_stateid, M_NFSDCLIENT);
@@ -6485,7 +6459,7 @@ nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
} while (igotlock == 0);
NFSUNLOCKV4ROOTMUTEX();
- error = nfsrv_freesession(nd, NULL, sessionid);
+ error = nfsrv_freesession(nd, NULL, sessionid, false, NULL);
if (error == 0 && samesess != 0)
nd->nd_flag &= ~ND_HASSEQUENCE;
@@ -6581,12 +6555,13 @@ out:
*/
static int
nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
- uint8_t *sessionid)
+ uint8_t *sessionid, bool locked, SVCXPRT **old_xprtp)
{
struct nfssessionhash *shp;
int i;
- NFSLOCKSTATE();
+ if (!locked)
+ NFSLOCKSTATE();
if (sep == NULL) {
shp = NFSSESSIONHASH(sessionid);
NFSLOCKSESSION(shp);
@@ -6600,28 +6575,36 @@ nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
nd, sep->sess_clp) != 0) {
NFSUNLOCKSESSION(shp);
- NFSUNLOCKSTATE();
+ if (!locked)
+ NFSUNLOCKSTATE();
return (NFSERR_AUTHERR | AUTH_TOOWEAK);
}
sep->sess_refcnt--;
if (sep->sess_refcnt > 0) {
NFSUNLOCKSESSION(shp);
- NFSUNLOCKSTATE();
+ if (!locked)
+ NFSUNLOCKSTATE();
return (NFSERR_BACKCHANBUSY);
}
LIST_REMOVE(sep, sess_hash);
LIST_REMOVE(sep, sess_list);
}
NFSUNLOCKSESSION(shp);
- NFSUNLOCKSTATE();
+ if (!locked)
+ NFSUNLOCKSTATE();
if (sep == NULL)
return (NFSERR_BADSESSION);
for (i = 0; i < NFSV4_SLOTS; i++)
if (sep->sess_slots[i].nfssl_reply != NULL)
m_freem(sep->sess_slots[i].nfssl_reply);
- if (sep->sess_cbsess.nfsess_xprt != NULL)
- SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
+ if (!locked) {
+ if (sep->sess_cbsess.nfsess_xprt != NULL)
+ SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
+ if (old_xprtp != NULL)
+ *old_xprtp = NULL;
+ } else if (old_xprtp != NULL)
+ *old_xprtp = sep->sess_cbsess.nfsess_xprt;
free(sep, M_NFSDSESSION);
return (0);
}
@@ -8943,3 +8926,112 @@ nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
return (0);
return (NFSERR_AUTHERR | AUTH_TOOWEAK);
}
+
+/*
+ * Issue a delegation and, optionally set rflagsp for why not.
+ */
+static void
+nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
+ struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
+ u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
+ struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
+ nfsv4stateid_t *delegstateidp)
+{
+ struct nfsstate *up_deleg, *new_deleg;
+
+ new_deleg = *new_delegp;
+ up_deleg = LIST_FIRST(&lfp->lf_deleg);
+ if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
+ *rflagsp |= NFSV4OPEN_WDNOTWANTED;
+ else if (nfsrv_issuedelegs == 0)
+ *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
+ else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
+ *rflagsp |= NFSV4OPEN_WDRESOURCE;
+ else if (delegate == 0 || !NFSVNO_DELEGOK(vp) ||
+ (writedeleg == 0 && (readonly == 0 ||
+ (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0)) ||
+ (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
+ LCL_CALLBACKSON) {
+ /* Is this a downgrade attempt? */
+ if (up_deleg != NULL && up_deleg->ls_clp == clp &&
+ (up_deleg->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
+ (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0)
+ *rflagsp |= NFSV4OPEN_WDNOTSUPPDOWNGRADE;
+ else
+ *rflagsp |= NFSV4OPEN_WDCONTENTION;
+ } else if (up_deleg != NULL &&
+ (up_deleg->ls_flags & NFSLCK_DELEGREAD) != 0 &&
+ (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0) {
+ /* This is an atomic upgrade. */
+ up_deleg->ls_stateid.seqid++;
+ delegstateidp->seqid = up_deleg->ls_stateid.seqid;
+ delegstateidp->other[0] = up_deleg->ls_stateid.other[0];
+ delegstateidp->other[1] = up_deleg->ls_stateid.other[1];
+ delegstateidp->other[2] = up_deleg->ls_stateid.other[2];
+ up_deleg->ls_flags = (NFSLCK_DELEGWRITE |
+ NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
+ *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+ nfsrv_writedelegcnt++;
+ } else {
+ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
+ new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
+ = clp->lc_clientid.lval[0];
+ new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
+ = clp->lc_clientid.lval[1];
+ new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
+ = nfsrv_nextstateindex(clp);
+ if (writedeleg && !rdonly &&
+ (nfsrv_writedelegifpos || !readonly) &&
+ (new_stp->ls_flags & (NFSLCK_WANTRDELEG |
+ NFSLCK_WANTWDELEG)) != NFSLCK_WANTRDELEG) {
+ new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
+ NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
+ *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+ nfsrv_writedelegcnt++;
+ } else {
+ new_deleg->ls_flags = (NFSLCK_DELEGREAD |
+ NFSLCK_READACCESS);
+ *rflagsp |= NFSV4OPEN_READDELEGATE;
+ }
+ new_deleg->ls_uid = new_stp->ls_uid;
+ new_deleg->ls_lfp = lfp;
+ new_deleg->ls_clp = clp;
+ new_deleg->ls_filerev = filerev;
+ new_deleg->ls_compref = nd->nd_compref;
+ new_deleg->ls_lastrecall = 0;
+ LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
+ LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid),
+ new_deleg, ls_hash);
+ LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
+ *new_delegp = NULL;
+ NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
+ nfsrv_openpluslock++;
+ nfsrv_delegatecnt++;
+ }
+}
+
+/*
+ * Find and remove any delegations for the fh.
+ */
+void
+nfsrv_removedeleg(fhandle_t *fhp, struct nfsrv_descript *nd, NFSPROC_T *p)
+{
+ struct nfsclient *clp;
+ struct nfsstate *stp, *nstp;
+ struct nfslockfile *lfp;
+ int error;
+
+ NFSLOCKSTATE();
+ error = nfsrv_getclient(nd->nd_clientid, CLOPS_RENEW, &clp, NULL,
+ (nfsquad_t)((u_quad_t)0), 0, nd, p);
+ if (error == 0)
+ error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, fhp, 0);
+ /*
+ * Now we must free any delegations.
+ */
+ if (error == 0) {
+ LIST_FOREACH_SAFE(stp, &lfp->lf_deleg, ls_file, nstp)
+ nfsrv_freedeleg(stp);
+ }
+ NFSUNLOCKSTATE();
+}
diff --git a/sys/fs/nfsserver/nfs_nfsdsubs.c b/sys/fs/nfsserver/nfs_nfsdsubs.c
index 0d7e4c73fe69..b09ec1b3a062 100644
--- a/sys/fs/nfsserver/nfs_nfsdsubs.c
+++ b/sys/fs/nfsserver/nfs_nfsdsubs.c
@@ -57,9 +57,6 @@ NFSD_VNET_DECLARE(int, nfs_rootfhset);
NFSD_VNET_DECLARE(uid_t, nfsrv_defaultuid);
NFSD_VNET_DECLARE(gid_t, nfsrv_defaultgid);
-NFSD_VNET_DEFINE(struct nfsdontlisthead, nfsrv_dontlisthead);
-
-
char nfs_v2pubfh[NFSX_V2FH];
struct nfsdontlisthead nfsrv_dontlisthead;
struct nfslayouthead nfsrv_recalllisthead;
@@ -1476,8 +1473,9 @@ int
nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp)
{
u_int32_t *tl;
- int error = 0, len, copylen;
+ int error = 0, len, copylen, namedlen;
+ namedlen = 0;
if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
len = fxdr_unsigned(int, *tl);
@@ -1493,6 +1491,11 @@ nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp)
copylen = NFSX_MYFH;
len = NFSM_RNDUP(len);
nd->nd_flag |= ND_DSSERVER;
+ } else if (len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH &&
+ len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) {
+ copylen = NFSX_MYFH;
+ namedlen = len;
+ len = NFSM_RNDUP(len);
} else if (len < NFSRV_MINFH || len > NFSRV_MAXFH) {
if (nd->nd_flag & ND_NFSV4) {
if (len > 0 && len <= NFSX_V4FHMAX) {
@@ -1527,7 +1530,10 @@ nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp)
goto nfsmout;
}
NFSBCOPY(tl, (caddr_t)fhp->nfsrvfh_data, copylen);
- fhp->nfsrvfh_len = copylen;
+ if (namedlen > 0)
+ fhp->nfsrvfh_len = namedlen;
+ else
+ fhp->nfsrvfh_len = copylen;
nfsmout:
NFSEXITCODE2(error, nd);
return (error);
@@ -1623,7 +1629,7 @@ nfsrv_checkuidgid(struct nfsrv_descript *nd, struct nfsvattr *nvap)
if (nd->nd_cred->cr_uid == 0)
goto out;
if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid != nd->nd_cred->cr_uid) ||
- (NFSVNO_ISSETGID(nvap) && nvap->na_gid != nd->nd_cred->cr_gid &&
+ (NFSVNO_ISSETGID(nvap) &&
!groupmember(nvap->na_gid, nd->nd_cred)))
error = NFSERR_PERM;
@@ -1682,8 +1688,7 @@ nfsrv_fixattr(struct nfsrv_descript *nd, vnode_t vp,
}
if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) &&
NFSVNO_ISSETGID(nvap)) {
- if (nvap->na_gid == nd->nd_cred->cr_gid ||
- groupmember(nvap->na_gid, nd->nd_cred)) {
+ if (groupmember(nvap->na_gid, nd->nd_cred)) {
nd->nd_cred->cr_uid = 0;
nva.na_gid = nvap->na_gid;
change++;
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 0356877eaf05..7dcc83880bb9 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -245,6 +245,10 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
vp->v_object = lowervp->v_object;
vn_irflag_set(vp, VIRF_PGREAD);
}
+ if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0)
+ vn_irflag_set(vp, VIRF_INOTIFY);
+ if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0)
+ vn_irflag_set(vp, VIRF_INOTIFY_PARENT);
if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
vp->v_vflag |= VV_ROOT;
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 7ab1fb6c1a25..4cddf24a5745 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -365,12 +365,7 @@ nullfs_statfs(struct mount *mp, struct statfs *sbp)
return (error);
}
- /* now copy across the "interesting" information and fake the rest */
sbp->f_type = mstat->f_type;
- sbp->f_flags &= MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID | MNT_UNION |
- MNT_NOSYMFOLLOW | MNT_AUTOMOUNTED | MNT_EXPORTED | MNT_IGNORE;
- mstat->f_flags &= ~(MNT_ROOTFS | MNT_AUTOMOUNTED | MNT_EXPORTED);
- sbp->f_flags |= mstat->f_flags;
sbp->f_bsize = mstat->f_bsize;
sbp->f_iosize = mstat->f_iosize;
sbp->f_blocks = mstat->f_blocks;
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 4747b1dd5b82..74c1a8f3acb6 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -190,6 +190,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
/*
+ * Synchronize inotify flags with the lower vnode:
+ * - If the upper vnode has the flag set and the lower does not, then the lower
+ * vnode is unwatched and the upper vnode does not need to go through
+ * VOP_INOTIFY.
+ * - If the lower vnode is watched, then the upper vnode should go through
+ * VOP_INOTIFY, so copy the flag up.
+ */
+static void
+null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag)
+{
+ if ((vn_irflag_read(vp) & flag) != 0) {
+ if (__predict_false((vn_irflag_read(lvp) & flag) == 0))
+ vn_irflag_unset(vp, flag);
+ } else if ((vn_irflag_read(lvp) & flag) != 0) {
+ if (__predict_false((vn_irflag_read(vp) & flag) == 0))
+ vn_irflag_set(vp, flag);
+ }
+}
+
+/*
* This is the 10-Apr-92 bypass routine.
* This version has been optimized for speed, throwing away some
* safety checks. It should still always work, but it's not as
@@ -305,7 +325,10 @@ null_bypass(struct vop_generic_args *ap)
lvp = *(vps_p[i]);
/*
- * Get rid of the transient hold on lvp.
+ * Get rid of the transient hold on lvp. Copy inotify
+ * flags up in case something is watching the lower
+ * layer.
+ *
* If lowervp was unlocked during VOP
* operation, nullfs upper vnode could have
* been reclaimed, which changes its v_vnlock
@@ -314,6 +337,10 @@ null_bypass(struct vop_generic_args *ap)
* upper (reclaimed) vnode.
*/
if (lvp != NULLVP) {
+ null_copy_inotify(old_vps[i], lvp,
+ VIRF_INOTIFY);
+ null_copy_inotify(old_vps[i], lvp,
+ VIRF_INOTIFY_PARENT);
if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE &&
old_vps[i]->v_vnlock != lvp->v_vnlock) {
VOP_UNLOCK(lvp);
@@ -385,7 +412,7 @@ null_lookup(struct vop_lookup_args *ap)
{
struct componentname *cnp = ap->a_cnp;
struct vnode *dvp = ap->a_dvp;
- int flags = cnp->cn_flags;
+ uint64_t flags = cnp->cn_flags;
struct vnode *vp, *ldvp, *lvp;
struct mount *mp;
int error;
@@ -403,17 +430,25 @@ null_lookup(struct vop_lookup_args *ap)
/*
* Renames in the lower mounts might create an inconsistent
- * configuration where lower vnode is moved out of the
- * directory tree remounted by our null mount. Do not try to
- * handle it fancy, just avoid VOP_LOOKUP() with DOTDOT name
- * which cannot be handled by VOP, at least passing over lower
- * root.
+ * configuration where lower vnode is moved out of the directory tree
+ * remounted by our null mount.
+ *
+ * Do not try to handle it fancy, just avoid VOP_LOOKUP() with DOTDOT
+ * name which cannot be handled by the VOP.
*/
- if ((ldvp->v_vflag & VV_ROOT) != 0 && (flags & ISDOTDOT) != 0) {
- KASSERT((dvp->v_vflag & VV_ROOT) == 0,
- ("ldvp %p fl %#x dvp %p fl %#x flags %#x",
- ldvp, ldvp->v_vflag, dvp, dvp->v_vflag, flags));
- return (ENOENT);
+ if ((flags & ISDOTDOT) != 0) {
+ struct nameidata *ndp;
+
+ if ((ldvp->v_vflag & VV_ROOT) != 0) {
+ KASSERT((dvp->v_vflag & VV_ROOT) == 0,
+ ("ldvp %p fl %#x dvp %p fl %#x flags %#jx",
+ ldvp, ldvp->v_vflag, dvp, dvp->v_vflag,
+ (uintmax_t)flags));
+ return (ENOENT);
+ }
+ ndp = vfs_lookup_nameidata(cnp);
+ if (ndp != NULL && vfs_lookup_isroot(ndp, ldvp))
+ return (ENOENT);
}
/*
@@ -528,7 +563,7 @@ null_setattr(struct vop_setattr_args *ap)
}
}
- return (null_bypass((struct vop_generic_args *)ap));
+ return (null_bypass(&ap->a_gen));
}
/*
@@ -539,7 +574,7 @@ null_stat(struct vop_stat_args *ap)
{
int error;
- if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
+ if ((error = null_bypass(&ap->a_gen)) != 0)
return (error);
ap->a_sb->st_dev = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
@@ -551,7 +586,7 @@ null_getattr(struct vop_getattr_args *ap)
{
int error;
- if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
+ if ((error = null_bypass(&ap->a_gen)) != 0)
return (error);
ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
@@ -584,7 +619,7 @@ null_access(struct vop_access_args *ap)
break;
}
}
- return (null_bypass((struct vop_generic_args *)ap));
+ return (null_bypass(&ap->a_gen));
}
static int
@@ -610,7 +645,7 @@ null_accessx(struct vop_accessx_args *ap)
break;
}
}
- return (null_bypass((struct vop_generic_args *)ap));
+ return (null_bypass(&ap->a_gen));
}
/*
diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c
new file mode 100644
index 000000000000..547de98c4c03
--- /dev/null
+++ b/sys/fs/p9fs/p9_client.c
@@ -0,0 +1,1332 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains 9P client functions which prepares message to be sent to
+ * the server. Every fileop typically has a function defined here to interact
+ * with the host.
+ */
+
+#include <vm/uma.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_transport.h>
+
+#define QEMU_HEADER 7
+#define P9FS_MAX_FID_CNT (1024 * 1024 * 1024)
+#define P9FS_ROOT_FID_NO 2
+#define P9FS_MIN_TAG 1
+#define P9FS_MAX_TAG 65535
+#define WSTAT_SIZE 47
+#define WSTAT_EXTENSION_SIZE 14
+
+static MALLOC_DEFINE(M_P9CLNT, "p9_client", "p9fs client structure");
+static uma_zone_t p9fs_fid_zone;
+static uma_zone_t p9fs_req_zone;
+static uma_zone_t p9fs_buf_zone;
+
+SYSCTL_DECL(_vfs_p9fs);
+int p9_debug_level = 0;
+SYSCTL_INT(_vfs_p9fs, OID_AUTO, debug_level, CTLFLAG_RW,
+ &p9_debug_level, 0, "p9fs debug logging");
+
+static struct p9_req_t *p9_get_request(struct p9_client *c, int *error);
+static struct p9_req_t *p9_client_request(
+ struct p9_client *c, int8_t type, int *error, const char *fmt, ...);
+
+inline int
+p9_is_proto_dotl(struct p9_client *clnt)
+{
+
+ return (clnt->proto_version == p9_proto_2000L);
+}
+
+inline int
+p9_is_proto_dotu(struct p9_client *clnt)
+{
+
+ return (clnt->proto_version == p9_proto_2000u);
+}
+
+/* Parse mount options into client structure */
+static int
+p9_parse_opts(struct mount *mp, struct p9_client *clnt)
+{
+ int error, len;
+ char *trans;
+
+ /*
+ * Default to virtio since thats the only transport we have for now.
+ */
+ error = vfs_getopt(mp->mnt_optnew, "trans", (void **)&trans, &len);
+ if (error == ENOENT)
+ trans = "virtio";
+
+ /* These are defaults for now */
+ clnt->proto_version = p9_proto_2000L;
+ clnt->msize = 8192;
+
+ /* Get the default trans callback */
+ clnt->ops = p9_get_trans_by_name(trans);
+
+ return (0);
+}
+
+/* Allocate buffer for sending request and getting responses */
+static struct p9_buffer *
+p9_buffer_alloc(int alloc_msize)
+{
+ struct p9_buffer *fc;
+
+ fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO);
+ fc->capacity = alloc_msize;
+ fc->offset = 0;
+ fc->size = 0;
+ fc->sdata = (char *)fc + sizeof(struct p9_buffer);
+
+ return (fc);
+}
+
+/* Free memory used by request and response buffers */
+static void
+p9_buffer_free(struct p9_buffer **buf)
+{
+
+ /* Free the sdata buffers first, then the whole structure*/
+ uma_zfree(p9fs_buf_zone, *buf);
+ *buf = NULL;
+}
+
+/* Free the request */
+static void
+p9_free_req(struct p9_client *clnt, struct p9_req_t *req)
+{
+
+ if (req->tc != NULL) {
+ if (req->tc->tag != P9_NOTAG)
+ p9_tag_destroy(clnt, req->tc->tag);
+ p9_buffer_free(&req->tc);
+ }
+
+ if (req->rc != NULL)
+ p9_buffer_free(&req->rc);
+
+ uma_zfree(p9fs_req_zone, req);
+}
+
+/* Allocate a request by tag */
+static struct p9_req_t *
+p9_get_request(struct p9_client *clnt, int *error)
+{
+ struct p9_req_t *req;
+ int alloc_msize;
+ uint16_t tag;
+
+ alloc_msize = P9FS_MTU;
+
+ req = uma_zalloc(p9fs_req_zone, M_WAITOK | M_ZERO);
+ req->tc = p9_buffer_alloc(alloc_msize);
+ req->rc = p9_buffer_alloc(alloc_msize);
+
+ tag = p9_tag_create(clnt);
+ if (tag == P9_NOTAG) {
+ *error = EAGAIN;
+ req->tc->tag = P9_NOTAG;
+ p9_free_req(clnt, req);
+ return (NULL);
+ }
+ req->tc->tag = tag;
+ return (req);
+}
+
+/* Parse header arguments of the response buffer */
+static int
+p9_parse_receive(struct p9_buffer *buf, struct p9_client *clnt)
+{
+ int8_t type;
+ int16_t tag;
+ int32_t size;
+ int error;
+
+ buf->offset = 0;
+
+ /* This value is set by QEMU for the header.*/
+ if (buf->size == 0)
+ buf->size = QEMU_HEADER;
+
+ /* This is the initial header. Parse size, type, and tag .*/
+ error = p9_buf_readf(buf, 0, "dbw", &size, &type, &tag);
+ if (error != 0)
+ goto out;
+
+ buf->size = size;
+ buf->id = type;
+ buf->tag = tag;
+ P9_DEBUG(TRANS, "%s: size=%d type: %d tag: %d\n",
+ __func__, buf->size, buf->id, buf->tag);
+out:
+ return (error);
+}
+
+/* Check 9P response for any errors returned and process it */
+static int
+p9_client_check_return(struct p9_client *c, struct p9_req_t *req)
+{
+ int error;
+ int ecode;
+ char *ename;
+
+ /* Check what we have in the receive bufer .*/
+ error = p9_parse_receive(req->rc, c);
+ if (error != 0)
+ goto out;
+
+ /*
+ * No error, We are done with the preprocessing. Return to the caller
+ * and process the actual data.
+ */
+ if (req->rc->id != P9PROTO_RERROR && req->rc->id != P9PROTO_RLERROR)
+ return (0);
+
+ /*
+ * Interpreting the error is done in different ways for Linux and
+ * Unix version. Make sure you interpret it right.
+ */
+ if (req->rc->id == P9PROTO_RERROR) {
+ error = p9_buf_readf(req->rc, c->proto_version, "s?d", &ename, &ecode);
+ } else if (req->rc->id == P9PROTO_RLERROR) {
+ error = p9_buf_readf(req->rc, c->proto_version, "d", &ecode);
+ } else {
+ goto out;
+ }
+ if (error != 0)
+ goto out;
+
+ /* if there was an ecode error make this the err now */
+ error = ecode;
+
+ /*
+ * Note this is still not completely an error, as lookups for files
+ * not present can hit this and return. Hence it is made a debug print.
+ */
+ if (error != 0) {
+ if (req->rc->id == P9PROTO_RERROR) {
+ P9_DEBUG(PROTO, "RERROR error %d ename %s\n",
+ error, ename);
+ } else if (req->rc->id == P9PROTO_RLERROR) {
+ P9_DEBUG(PROTO, "RLERROR error %d\n", error);
+ }
+ }
+
+ if (req->rc->id == P9PROTO_RERROR) {
+ free(ename, M_TEMP);
+ }
+ return (error);
+
+out:
+ P9_DEBUG(ERROR, "couldn't parse receive buffer error%d\n", error);
+ return (error);
+}
+
+/* State machine changing helpers */
+void p9_client_disconnect(struct p9_client *clnt)
+{
+
+ P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt);
+ clnt->trans_status = P9FS_DISCONNECT;
+}
+
+void p9_client_begin_disconnect(struct p9_client *clnt)
+{
+
+ P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt);
+ clnt->trans_status = P9FS_BEGIN_DISCONNECT;
+}
+
+static struct p9_req_t *
+p9_client_prepare_req(struct p9_client *c, int8_t type,
+ int req_size, int *error, const char *fmt, __va_list ap)
+{
+ struct p9_req_t *req;
+
+ P9_DEBUG(TRANS, "%s: client %p op %d\n", __func__, c, type);
+
+ /*
+ * Before we start with the request, check if its possible to finish
+ * this request. We are allowed to submit the request only if there
+ * are no close sessions happening or else there can be race. If the
+ * status is Disconnected, we stop any requests coming in after that.
+ */
+ if (c->trans_status == P9FS_DISCONNECT) {
+ *error = EIO;
+ return (NULL);
+ }
+
+ /* Allow only cleanup clunk messages once teardown has started. */
+ if ((c->trans_status == P9FS_BEGIN_DISCONNECT) &&
+ (type != P9PROTO_TCLUNK)) {
+ *error = EIO;
+ return (NULL);
+ }
+
+ /* Allocate buffer for transferring and receiving data from host */
+ req = p9_get_request(c, error);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: request allocation failed.\n", __func__);
+ return (NULL);
+ }
+
+ /* Marshall the data according to QEMU standards */
+ *error = p9_buf_prepare(req->tc, type);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_buf_prepare failed: %d\n",
+ __func__, *error);
+ goto out;
+ }
+
+ *error = p9_buf_vwritef(req->tc, c->proto_version, fmt, ap);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_buf_vwrite failed: %d\n",
+ __func__, *error);
+ goto out;
+ }
+
+ *error = p9_buf_finalize(c, req->tc);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_buf_finalize failed: %d \n",
+ __func__, *error);
+ goto out;
+ }
+
+ return (req);
+out:
+ p9_free_req(c, req);
+ return (NULL);
+}
+
+/*
+ * Issue a request and wait for response. The routine takes care of preparing
+ * the 9P request header to be sent, parsing and checking for error conditions
+ * in the received buffer. It returns the request structure.
+ */
+static struct p9_req_t *
+p9_client_request(struct p9_client *c, int8_t type, int *error,
+ const char *fmt, ...)
+{
+ va_list ap;
+ struct p9_req_t *req;
+
+ va_start(ap, fmt);
+ req = p9_client_prepare_req(c, type, c->msize, error, fmt, ap);
+ va_end(ap);
+
+ /* Issue with allocation of request buffer */
+ if (*error != 0)
+ return (NULL);
+
+ /* Call into the transport for submission. */
+ *error = c->ops->request(c->handle, req);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, *error);
+ goto out;
+ }
+
+ /*
+ * Before we return, pre process the header and the rc buffer before
+ * calling into the protocol infra to analyze the data in rc.
+ */
+ *error = p9_client_check_return(c, req);
+ if (*error != 0)
+ goto out;
+
+ return (req);
+out:
+ p9_free_req(c, req);
+ return (NULL);
+}
+
+/* Setup tag contents and structure */
+uint16_t
+p9_tag_create(struct p9_client *clnt)
+{
+ int tag;
+
+ tag = alloc_unr(&clnt->tagpool);
+ P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag);
+
+ /* Alloc_unr returning -1 is an error for no units left */
+ if (tag == -1) {
+ return (P9_NOTAG);
+ }
+ return (tag);
+}
+
+/* Clean up tag structures */
+void
+p9_tag_destroy(struct p9_client *clnt, uint16_t tag)
+{
+
+ P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag);
+
+ /* Release to the pool */
+ free_unr(&clnt->tagpool, tag);
+}
+
+/* Allocate a new fid from the fidpool */
+struct p9_fid *
+p9_fid_create(struct p9_client *clnt)
+{
+ struct p9_fid *fid;
+
+
+ fid = uma_zalloc(p9fs_fid_zone, M_WAITOK | M_ZERO);
+ fid->fid = alloc_unr(&clnt->fidpool);
+ P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid);
+
+ /* Alloc_unr returning -1 is an error for no units left */
+ if (fid->fid == -1) {
+ uma_zfree(p9fs_fid_zone, fid);
+ return (NULL);
+ }
+ fid->mode = -1;
+ fid->uid = -1;
+ fid->clnt = clnt;
+
+ return (fid);
+}
+
+/* Free the fid by releasing it to fidpool */
+void
+p9_fid_destroy(struct p9_fid *fid)
+{
+ struct p9_client *clnt;
+
+ P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid);
+ clnt = fid->clnt;
+ /* Release to the pool */
+ free_unr(&clnt->fidpool, fid->fid);
+ uma_zfree(p9fs_fid_zone, fid);
+}
+
+/* Request the version of 9P protocol */
+int
+p9_client_version(struct p9_client *c)
+{
+ int error;
+ struct p9_req_t *req;
+ char *version;
+ int msize;
+
+ error = 0;
+
+ P9_DEBUG(PROTO, "TVERSION msize %d protocol %d\n",
+ c->msize, c->proto_version);
+
+ switch (c->proto_version) {
+ case p9_proto_2000L:
+ req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds",
+ c->msize, "9P2000.L");
+ break;
+ case p9_proto_2000u:
+ req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds",
+ c->msize, "9P2000.u");
+ break;
+ case p9_proto_legacy:
+ req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds",
+ c->msize, "9P2000");
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ /* Always return the relevant error code */
+ if (error != 0)
+ return (error);
+
+ error = p9_buf_readf(req->rc, c->proto_version, "ds", &msize, &version);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: version error: %d\n", __func__, error);
+ goto out;
+ }
+
+ P9_DEBUG(PROTO, "RVERSION msize %d %s\n", msize, version);
+
+ if (!strncmp(version, "9P2000.L", 8))
+ c->proto_version = p9_proto_2000L;
+ else if (!strncmp(version, "9P2000.u", 8))
+ c->proto_version = p9_proto_2000u;
+ else if (!strncmp(version, "9P2000", 6))
+ c->proto_version = p9_proto_legacy;
+ else {
+ error = ENOMEM;
+ goto out;
+ }
+
+ /* limit the msize .*/
+ if (msize < c->msize)
+ c->msize = msize;
+out:
+ p9_free_req(c, req);
+ return (error);
+}
+
+/*
+ * Initialize zones for different things. This is called from Init module
+ * so that we just have them initalized once.
+ */
+void
+p9_init_zones(void)
+{
+
+ /* Create the request and the fid zones */
+ p9fs_fid_zone = uma_zcreate("p9fs fid zone",
+ sizeof(struct p9_fid), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ /* Create the request and the fid zones */
+ p9fs_req_zone = uma_zcreate("p9fs req zone",
+ sizeof(struct p9_req_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ /* Create the buffer zone */
+ p9fs_buf_zone = uma_zcreate("p9fs buf zone",
+ sizeof(struct p9_buffer) + P9FS_MTU, NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+void
+p9_destroy_zones(void)
+{
+
+ uma_zdestroy(p9fs_fid_zone);
+ uma_zdestroy(p9fs_req_zone);
+ uma_zdestroy(p9fs_buf_zone);
+}
+
+/* Return the client to the session in the FS to hold it */
+struct p9_client *
+p9_client_create(struct mount *mp, int *error, const char *mount_tag)
+{
+ struct p9_client *clnt;
+
+ clnt = malloc(sizeof(struct p9_client), M_P9CLNT, M_WAITOK | M_ZERO);
+ mtx_init(&clnt->clnt_mtx, "p9clnt", NULL, MTX_DEF);
+
+ /* Parse should have set trans_mod */
+ *error = p9_parse_opts(mp, clnt);
+ if (*error != 0)
+ goto out;
+
+ if (clnt->ops == NULL) {
+ *error = EINVAL;
+ P9_DEBUG(ERROR, "%s: no transport\n", __func__);
+ goto out;
+ }
+
+ /* All the structures from here are protected by the lock clnt_mtx */
+ init_unrhdr(&clnt->fidpool, P9FS_ROOT_FID_NO, P9FS_MAX_FID_CNT,
+ &clnt->clnt_mtx);
+ init_unrhdr(&clnt->tagpool, P9FS_MIN_TAG, P9FS_MAX_TAG,
+ &clnt->clnt_mtx);
+
+ P9_DEBUG(TRANS, "%s: clnt %p trans %p msize %d protocol %d\n",
+ __func__, clnt, clnt->ops, clnt->msize, clnt->proto_version);
+
+ *error = clnt->ops->create(mount_tag, &clnt->handle);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: transport create failed .%d \n",
+ __func__, *error);
+ goto out;
+ }
+ clnt->trans_status = P9FS_CONNECT;
+
+ *error = p9_client_version(clnt);
+ if (*error != 0)
+ goto out;
+
+ P9_DEBUG(TRANS, "%s: client creation succeeded.\n", __func__);
+ return (clnt);
+out:
+ free(clnt, M_P9CLNT);
+ return (NULL);
+}
+
+/* Destroy the client by destroying associated fidpool and tagpool */
+void
+p9_client_destroy(struct p9_client *clnt)
+{
+
+ P9_DEBUG(TRANS, "%s: client %p\n", __func__, clnt);
+ clnt->ops->close(clnt->handle);
+
+ P9_DEBUG(TRANS, "%s : Destroying fidpool\n", __func__);
+ clear_unrhdr(&clnt->fidpool);
+
+ P9_DEBUG(TRANS, "%s : Destroying tagpool\n", __func__);
+ clear_unrhdr(&clnt->tagpool);
+
+ free(clnt, M_P9CLNT);
+}
+
+/*
+ * Attach a user to the filesystem. Create a fid for that user to access
+ * the root of the filesystem.
+ */
+struct p9_fid *
+p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
+ const char *uname, uid_t n_uname, const char *aname, int *error)
+{
+ struct p9_req_t *req;
+ struct p9_fid *fid;
+ struct p9_qid qid;
+
+ P9_DEBUG(PROTO, "TATTACH uname=%s aname=%s, n_uname=%d\n",
+ uname, aname, n_uname);
+ fid = p9_fid_create(clnt);
+ if (fid == NULL) {
+ *error = ENOMEM;
+ return (NULL);
+ }
+ fid->uid = n_uname;
+
+ req = p9_client_request(clnt, P9PROTO_TATTACH, error, "ddssd", fid->fid,
+ P9PROTO_NOFID, uname, aname, n_uname);
+ if (*error != 0)
+ goto out;
+
+ *error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid);
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d \n",
+ __func__, *error);
+ goto out;
+ }
+
+ P9_DEBUG(PROTO, "RATTACH qid %x.%llx.%x\n",
+ qid.type, (unsigned long long)qid.path, qid.version);
+
+ memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+ p9_free_req(clnt, req);
+
+ return (fid);
+out:
+ if (req != NULL)
+ p9_free_req(clnt, req);
+ if (fid != NULL)
+ p9_fid_destroy(fid);
+
+ return (NULL);
+}
+
+/* Delete a file/directory. Corresponding fid will be cluncked too */
+int
+p9_client_remove(struct p9_fid *fid)
+{
+ int error;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ P9_DEBUG(PROTO, "TREMOVE fid %d\n", fid->fid);
+
+ error = 0;
+ clnt = fid->clnt;
+
+ req = p9_client_request(clnt, P9PROTO_TREMOVE, &error, "d", fid->fid);
+ if (error != 0) {
+ P9_DEBUG(PROTO, "RREMOVE fid %d\n", fid->fid);
+ return (error);
+ }
+
+ p9_free_req(clnt, req);
+ return (error);
+}
+
+int
+p9_client_unlink(struct p9_fid *dfid, const char *name, int32_t flags)
+{
+ int error;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ error = 0;
+ clnt = dfid->clnt;
+
+ req = p9_client_request(clnt, P9PROTO_TUNLINKAT, &error, "dsd",
+ dfid->fid, name, flags);
+ if (error != 0) {
+ P9_DEBUG(PROTO, "RUNLINKAT fid %d\n", dfid->fid);
+ return (error);
+ }
+
+ p9_free_req(clnt, req);
+ return (error);
+}
+
+/* Inform the file server that the current file represented by fid is no longer
+ * needed by the client. Any allocated fid on the server needs a clunk to be
+ * destroyed.
+ */
+int
+p9_client_clunk(struct p9_fid *fid)
+{
+ int error;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ error = 0;
+
+ if (fid == NULL) {
+ P9_DEBUG(ERROR, "%s: clunk with NULL fid is bad\n", __func__);
+ return (0);
+ }
+
+ P9_DEBUG(PROTO, "TCLUNK fid %d \n", fid->fid);
+
+ clnt = fid->clnt;
+ req = p9_client_request(clnt, P9PROTO_TCLUNK, &error, "d", fid->fid);
+ if (req != NULL) {
+ P9_DEBUG(PROTO, "RCLUNK fid %d\n", fid->fid);
+ p9_free_req(clnt, req);
+ }
+
+ p9_fid_destroy(fid);
+ return (error);
+}
+
+/*
+ * Client_walk is for searching any component name in a directory.
+ * This is usually called on lookups. Also when we need a new open fid
+ * as 9p needs to have an open fid for every file to fileops, we call this
+ * validate the component of the file and return the newfid(openfid) created.
+ */
+struct p9_fid *
+p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames,
+ int clone, int *error)
+{
+ struct p9_client *clnt;
+ struct p9_fid *fid;
+ struct p9_qid *wqids;
+ struct p9_req_t *req;
+ uint16_t nwqids, count;
+
+ clnt = oldfid->clnt;
+ wqids = NULL;
+ nwqids = 0;
+
+ /*
+ * Before, we go and create fid, make sure we are not tearing
+ * down. Only then we create.
+ * Allow only cleanup clunk messages once we are starting to teardown.
+ */
+ if (clnt->trans_status != P9FS_CONNECT) {
+ *error = EIO;
+ return (NULL);
+ }
+
+ if (clone) {
+ fid = p9_fid_create(clnt);
+ if (fid == NULL) {
+ *error = ENOMEM;
+ return (NULL);
+ }
+ fid->uid = oldfid->uid;
+ } else
+ fid = oldfid;
+
+ P9_DEBUG(PROTO, "TWALK fids %d,%d nwnames %u wname %s\n",
+ oldfid->fid, fid->fid, nwnames,
+ wnames != NULL ? wnames[nwnames-1] : NULL);
+
+ /*
+ * The newfid is for the component in search. We are preallocating as
+ * qemu on other side allocates or returns a fid if it sees a match
+ */
+ req = p9_client_request(clnt, P9PROTO_TWALK, error, "ddT", oldfid->fid,
+ fid->fid, wnames, nwnames);
+ if (*error != 0) {
+ if (fid != oldfid)
+ p9_fid_destroy(fid);
+ return (NULL);
+ }
+
+ *error = p9_buf_readf(req->rc, clnt->proto_version, "R", &nwqids,
+ &wqids);
+ if (*error != 0)
+ goto out;
+
+ P9_DEBUG(PROTO, "RWALK nwqid %d:\n", nwqids);
+
+ if (nwqids != nwnames) {
+ *error = ENOENT;
+ goto out;
+ }
+
+ for (count = 0; count < nwqids; count++)
+ P9_DEBUG(TRANS, "%s: [%d] %x.%llx.%x\n",
+ __func__, count, wqids[count].type,
+ (unsigned long long)wqids[count].path,
+ wqids[count].version);
+
+ if (nwnames)
+ memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
+ else
+ fid->qid = oldfid->qid;
+
+ p9_free_req(clnt, req);
+ free(wqids, M_TEMP);
+ return (fid);
+
+out:
+ p9_free_req(clnt, req);
+ if (wqids)
+ free(wqids, M_TEMP);
+ if (fid && fid != oldfid)
+ p9_client_clunk(fid);
+ return (NULL);
+}
+
+/* Open a file with given fid and mode */
+int
+p9_client_open(struct p9_fid *fid, int mode)
+{
+ int error, mtu;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ error = 0;
+ clnt = fid->clnt;
+ mtu = 0;
+
+ P9_DEBUG(PROTO, "%s fid %d mode %d\n",
+ p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN",
+ fid->fid, mode);
+
+ if (fid->mode != -1)
+ return (EINVAL);
+
+ if (p9_is_proto_dotl(clnt))
+ req = p9_client_request(clnt, P9PROTO_TLOPEN, &error, "dd",
+ fid->fid, mode);
+ else
+ req = p9_client_request(clnt, P9PROTO_TOPEN, &error, "db",
+ fid->fid, mode);
+
+ if (error != 0)
+ return (error);
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &fid->qid,
+ &mtu);
+ if (error != 0)
+ goto out;
+
+ P9_DEBUG(PROTO, "%s qid %x.%llx.%x mtu %x\n",
+ p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",
+ (fid->qid).type, (unsigned long long)(fid->qid).path,
+ (fid->qid).version, mtu);
+
+ fid->mode = mode;
+ fid->mtu = mtu;
+out:
+ p9_free_req(clnt, req);
+ return (error);
+}
+
+/* Request to get directory entries */
+int
+p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset,
+ uint32_t count)
+{
+ int error;
+ uint32_t rsize;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+ char *dataptr;
+
+ P9_DEBUG(PROTO, "TREADDIR fid %d offset %llu count %d\n",
+ fid->fid, (unsigned long long) offset, count);
+
+ error = 0;
+ rsize = fid->mtu;
+ clnt = fid->clnt;
+
+ if (rsize == 0 || rsize > clnt->msize)
+ rsize = clnt->msize;
+
+ if (count < rsize)
+ rsize = count;
+
+ req = p9_client_request(clnt, P9PROTO_TREADDIR, &error, "dqd",
+ fid->fid, offset, rsize);
+
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: couldn't allocate req in client_readdir\n",
+ __func__);
+ return (-error);
+ }
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count,
+ &dataptr);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: p0_buf_readf failed: %d\n",
+ __func__, error);
+ p9_free_req(clnt, req);
+ return (-error);
+ }
+
+ P9_DEBUG(PROTO, "RREADDIR count %u\n", count);
+
+ /* Copy back the data into the input buffer. */
+ memmove(data, dataptr, count);
+ p9_free_req(clnt, req);
+ return (count);
+}
+
+/*
+ * Read count bytes from offset for the file fid into the character
+ * buffer data. This buffer is handed over to p9fs to process into user
+ * buffers. Note that this function typically returns the number of bytes read
+ * so in case of an error we return -error so that we can distinguish between
+ * error codes and bytes.
+ */
+int
+p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data)
+{
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+ char *dataptr;
+ int error, rsize;
+
+ clnt = fid->clnt;
+ rsize = fid->mtu;
+ error = 0;
+
+ P9_DEBUG(PROTO, "TREAD fid %d offset %llu %u\n",
+ fid->fid, (unsigned long long) offset, count);
+
+ if (!rsize || rsize > clnt->msize)
+ rsize = clnt->msize;
+
+ if (count < rsize)
+ rsize = count;
+
+ /* At this stage, we only have 8K buffers so only transfer */
+ req = p9_client_request(clnt, P9PROTO_TREAD, &error, "dqd", fid->fid,
+ offset, rsize);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: failed allocate request\n", __func__);
+ return (-error);
+ }
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count,
+ &dataptr);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ if (rsize < count) {
+ P9_DEBUG(PROTO, "RREAD count (%d > %d)\n", count, rsize);
+ count = rsize;
+ }
+
+ P9_DEBUG(PROTO, "RREAD count %d\n", count);
+
+ if (count == 0) {
+ error = -EIO;
+ P9_DEBUG(ERROR, "%s: EIO error in client_read \n", __func__);
+ goto out;
+ }
+
+ /* Copy back the data into the input buffer. */
+ memmove(data, dataptr, count);
+ p9_free_req(clnt, req);
+ return (count);
+out:
+ p9_free_req(clnt, req);
+ return (-error);
+}
+
+/*
+ * Write count bytes from buffer to the offset for the file fid
+ * Note that this function typically returns the number of bytes written
+ * so in case of an error we return -error so that we can distinguish between
+ * error codes and bytes.
+ */
+
+int
+p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data)
+{
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+ int ret, error, rsize;
+
+ clnt = fid->clnt;
+ rsize = fid->mtu;
+ ret = 0;
+ error = 0;
+
+ P9_DEBUG(PROTO, "TWRITE fid %d offset %llu %u\n",
+ fid->fid, (unsigned long long) offset, count);
+
+ if (!rsize || rsize > clnt->msize)
+ rsize = clnt->msize;
+
+ /* Limit set by Qemu ,8168 */
+ if (count > rsize) {
+ count = rsize;
+ }
+
+ /*
+ * Doing the Data blob instead. If at all we add the zerocopy, we can
+ * change it to uio direct copy
+ */
+ req = p9_client_request(clnt, P9PROTO_TWRITE, &error, "dqD", fid->fid,
+ offset, count, data);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: failed allocate request: %d\n",
+ __func__, error);
+ return (-error);
+ }
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "d", &ret);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_buf_readf error: %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ if (count < ret) {
+ P9_DEBUG(PROTO, "RWRITE count (%d > %d)\n", count, ret);
+ ret = count;
+ }
+ P9_DEBUG(PROTO, "RWRITE count %d\n", ret);
+
+ if (count == 0) {
+ error = EIO;
+ P9_DEBUG(ERROR, "%s: EIO error\n", __func__);
+ goto out;
+ }
+
+ p9_free_req(clnt, req);
+ return (ret);
+out:
+ p9_free_req(clnt, req);
+ return (-error);
+}
+
+
+/* Create file under directory fid, with name, permissions, mode. */
+int
+p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode,
+ char *extension)
+{
+ int error;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+ struct p9_qid qid;
+ int mtu;
+
+ P9_DEBUG(PROTO, "TCREATE fid %d name %s perm %d mode %d\n",
+ fid->fid, name, perm, mode);
+
+ clnt = fid->clnt;
+ error = 0;
+
+ if (fid->mode != -1)
+ return (EINVAL);
+
+ req = p9_client_request(clnt, P9PROTO_TCREATE, &error, "dsdb?s",
+ fid->fid, name, perm, mode, extension);
+ if (error != 0)
+ return (error);
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &qid, &mtu);
+ if (error != 0)
+ goto out;
+
+ P9_DEBUG(PROTO, "RCREATE qid %x.%jx.%x mtu %x\n",
+ qid.type, (uintmax_t)qid.path, qid.version, mtu);
+ fid->mode = mode;
+ fid->mtu = mtu;
+
+out:
+ p9_free_req(clnt, req);
+ return (error);
+}
+
+/* Request file system information of the file system */
+int
+p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat)
+{
+ int error;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+
+ error = 0;
+ clnt = fid->clnt;
+
+ P9_DEBUG(PROTO, "TSTATFS fid %d\n", fid->fid);
+
+ req = p9_client_request(clnt, P9PROTO_TSTATFS, &error, "d", fid->fid);
+ if (error != 0) {
+ return (error);
+ }
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "ddqqqqqqd",
+ &stat->type, &stat->bsize, &stat->blocks, &stat->bfree,
+ &stat->bavail, &stat->files, &stat->ffree, &stat->fsid,
+ &stat->namelen);
+
+ if (error != 0)
+ goto out;
+
+ P9_DEBUG(PROTO, "RSTATFS fid %d type 0x%jx bsize %ju "
+ "blocks %ju bfree %ju bavail %ju files %ju ffree %ju "
+ "fsid %ju namelen %ju\n",
+ fid->fid, (uintmax_t)stat->type,
+ (uintmax_t)stat->bsize, (uintmax_t)stat->blocks,
+ (uintmax_t)stat->bfree, (uintmax_t)stat->bavail,
+ (uintmax_t)stat->files, (uintmax_t)stat->ffree,
+ (uintmax_t)stat->fsid, (uintmax_t)stat->namelen);
+
+out:
+ p9_free_req(clnt, req);
+ return (error);
+}
+
+/* Rename file referenced by the fid */
+int
+p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid,
+ char *newname)
+{
+ int error;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ P9_DEBUG(PROTO, "TRENAMEAT oldfid %d oldname %s newfid %d newfid %s",
+ oldfid->fid, oldname, newfid->fid, newname);
+
+ error = 0;
+ clnt = oldfid->clnt;
+
+ /*
+ * we are calling the request with TRENAMEAT tag and not TRENAME with
+ * the 9p protocol version 9p2000.u as the QEMU version supports this
+ * version of renaming
+ */
+ req = p9_client_request(clnt, P9PROTO_TRENAMEAT, &error, "dsds",
+ oldfid->fid, oldname, newfid->fid, newname);
+
+ if (error != 0)
+ return (error);
+
+ p9_free_req(clnt, req);
+ return (error);
+}
+
+/* Request to create symbolic link */
+int
+p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid)
+{
+ int error;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+ struct p9_qid qid;
+
+ error = 0;
+ clnt = fid->clnt;
+
+ P9_DEBUG(PROTO, "TSYMLINK fid %d name %s\n", fid->fid, name);
+
+ req = p9_client_request(clnt, P9PROTO_TSYMLINK, &error, "dssd",
+ fid->fid, name, symtgt, gid);
+
+ if (error != 0)
+ return (error);
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error);
+ return (error);
+ }
+
+ P9_DEBUG(PROTO, "RSYMLINK qid %x.%jx.%x\n",
+ qid.type, (uintmax_t)qid.path, qid.version);
+
+ p9_free_req(clnt, req);
+ return (0);
+}
+
+/* Request to create hard link */
+int
+p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name)
+{
+ int error;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+
+ error = 0;
+ clnt = dfid->clnt;
+
+ P9_DEBUG(PROTO, "TLINK dfid %d oldfid %d name %s\n",
+ dfid->fid, oldfid->fid, name);
+
+ req = p9_client_request(clnt, P9PROTO_TLINK, &error, "dds", dfid->fid,
+ oldfid->fid, name);
+ if (error != 0)
+ return (error);
+
+ p9_free_req(clnt, req);
+ return (0);
+}
+
+/* Request to read contents of symbolic link */
+int
+p9_readlink(struct p9_fid *fid, char **target)
+{
+ int error;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ error = 0;
+ clnt = fid->clnt;
+
+ P9_DEBUG(PROTO, "TREADLINK fid %d\n", fid->fid);
+
+ req = p9_client_request(clnt, P9PROTO_TREADLINK, &error, "d", fid->fid);
+ if (error != 0)
+ return (error);
+
+ error = p9_buf_readf(req->rc, clnt->proto_version, "s", target);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error);
+ return (error);
+ }
+
+ P9_DEBUG(PROTO, "RREADLINK target %s \n", *target);
+
+ p9_free_req(clnt, req);
+ return (0);
+}
+
+/* Get file attributes of the file referenced by the fid */
+int
+p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl,
+ uint64_t request_mask)
+{
+ int err;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ err = 0;
+
+ P9_DEBUG(PROTO, "TGETATTR fid %d mask %ju\n",
+ fid->fid, (uintmax_t)request_mask);
+
+ clnt = fid->clnt;
+ req = p9_client_request(clnt, P9PROTO_TGETATTR, &err, "dq", fid->fid,
+ request_mask);
+ if (req == NULL) {
+ P9_DEBUG(ERROR, "%s: allocation failed %d", __func__, err);
+ goto error;
+ }
+
+ err = p9_buf_readf(req->rc, clnt->proto_version, "A", stat_dotl);
+ if (err != 0) {
+ P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, err);
+ goto error;
+ }
+
+ p9_free_req(clnt, req);
+ P9_DEBUG(PROTO, "RGETATTR fid %d qid %x.%jx.%x st_mode %8.8x "
+ "uid %d gid %d nlink %ju rdev %jx st_size %jx blksize %ju "
+ "blocks %ju st_atime_sec %ju, st_atime_nsec %ju "
+ "st_mtime_sec %ju, st_mtime_nsec %ju st_ctime_sec %ju "
+ "st_ctime_nsec %ju st_btime_sec %ju, st_btime_nsec %ju "
+ "st_stat %ju, st_data_version %ju \n", fid->fid,
+ stat_dotl->qid.type, (uintmax_t)stat_dotl->qid.path,
+ stat_dotl->qid.version, stat_dotl->st_mode, stat_dotl->st_uid,
+ stat_dotl->st_gid, (uintmax_t)stat_dotl->st_nlink,
+ (uintmax_t)stat_dotl->st_rdev, (uintmax_t)stat_dotl->st_size,
+ (uintmax_t)stat_dotl->st_blksize,
+ (uintmax_t)stat_dotl->st_blocks, (uintmax_t)stat_dotl->st_atime_sec,
+ (uintmax_t)stat_dotl->st_atime_nsec, (uintmax_t)stat_dotl->st_mtime_sec,
+ (uintmax_t)stat_dotl->st_mtime_nsec, (uintmax_t)stat_dotl->st_ctime_sec,
+ (uintmax_t)stat_dotl->st_ctime_nsec, (uintmax_t)stat_dotl->st_btime_sec,
+ (uintmax_t)stat_dotl->st_btime_nsec, (uintmax_t)stat_dotl->st_gen,
+ (uintmax_t)stat_dotl->st_data_version);
+
+ return (err);
+
+error:
+ if (req != NULL)
+ p9_free_req(clnt, req);
+
+ return (err);
+}
+
+/* Set file attributes of the file referenced by the fid */
+int
+p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
+{
+ int err;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+
+ err = 0;
+
+ P9_DEBUG(PROTO, "TSETATTR fid %d"
+ " valid %x mode %x uid %d gid %d size %ju"
+ " atime_sec %ju atime_nsec %ju"
+ " mtime_sec %ju mtime_nsec %ju\n",
+ fid->fid,
+ p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+ (uintmax_t)p9attr->size, (uintmax_t)p9attr->atime_sec,
+ (uintmax_t)p9attr->atime_nsec, (uintmax_t)p9attr->mtime_sec,
+ (uintmax_t)p9attr->mtime_nsec);
+
+ clnt = fid->clnt;
+
+ /* Any client_request error is converted to req == NULL error*/
+ req = p9_client_request(clnt, P9PROTO_TSETATTR, &err, "dA", fid->fid,
+ p9attr);
+
+ if (req == NULL) {
+ P9_DEBUG(ERROR, "%s: allocation failed %d\n", __func__, err);
+ goto error;
+ }
+
+ p9_free_req(clnt, req);
+error:
+ return (err);
+}
+
diff --git a/sys/fs/p9fs/p9_client.h b/sys/fs/p9fs/p9_client.h
new file mode 100644
index 000000000000..4eb82c0232f4
--- /dev/null
+++ b/sys/fs/p9fs/p9_client.h
@@ -0,0 +1,169 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* 9P client definitions */
+
+#ifndef FS_P9FS_P9_CLIENT_H
+#define FS_P9FS_P9_CLIENT_H
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/_unrhdr.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/dirent.h>
+#include <sys/stdarg.h>
+
+#include <fs/p9fs/p9_protocol.h>
+
+/* 9P protocol versions */
+enum p9_proto_versions {
+ p9_proto_legacy, /* legacy version */
+ p9_proto_2000u, /* Unix version */
+ p9_proto_2000L, /* Linux version */
+};
+
+/* P9 Request exchanged between Host and Guest */
+struct p9_req_t {
+ struct p9_buffer *tc; /* request buffer */
+ struct p9_buffer *rc; /* response buffer */
+};
+
+/* 9P transport status */
+enum transport_status {
+ P9FS_CONNECT, /* transport is connected */
+ P9FS_BEGIN_DISCONNECT,/* transport has begun to disconnect */
+ P9FS_DISCONNECT, /* transport has been dosconnected */
+};
+
+/* This is set by QEMU so we will oblige */
+#define P9FS_MTU 8192
+
+/*
+ * Even though we have a 8k buffer, Qemu is typically doing 8168
+ * because of a HDR of 24. Use that amount for transfers so that we dont
+ * drop anything.
+ */
+#define P9FS_IOUNIT (P9FS_MTU - 24)
+#define P9FS_DIRENT_LEN 256
+#define P9_NOTAG 0
+
+/* Client state information */
+struct p9_client {
+ struct p9_trans_module *ops; /* module API instantiated with this client */
+ void *handle; /* module-specific client handle */
+ struct mtx clnt_mtx; /* mutex to lock the client */
+ struct mtx req_mtx; /* mutex to lock the request buffer */
+ struct cv req_cv; /* condition variable on which to wake up thread */
+ unsigned int msize; /* maximum data size */
+ unsigned char proto_version; /* 9P version to use */
+ struct unrhdr fidpool; /* fid handle accounting for session */
+ struct unrhdr tagpool; /* transaction id accounting for session */
+ enum transport_status trans_status; /* tranport instance state */
+};
+
+/* The main fid structure which keeps track of the file.*/
+struct p9_fid {
+ struct p9_client *clnt; /* the instatntiating 9P client */
+ uint32_t fid; /* numeric identifier */
+ int mode; /* current mode of this fid */
+ struct p9_qid qid; /* server identifier */
+ uint32_t mtu; /* max transferrable unit at a time */
+ uid_t uid; /* numeric uid of the local user who owns this handle */
+ int v_opens; /* keep count on the number of opens called with this fiel handle */
+ STAILQ_ENTRY(p9_fid) fid_next; /* points to next fid in the list */
+};
+
+/* Directory entry structure */
+struct p9_dirent {
+ struct p9_qid qid; /* 9P server qid for this dirent */
+ uint64_t d_off; /* offset to the next dirent */
+ unsigned char d_type; /* file type */
+ char d_name[P9FS_DIRENT_LEN]; /* file name */
+ int len;
+};
+
+void p9_init_zones(void);
+void p9_destroy_zones(void);
+
+/* Session and client Init Ops */
+struct p9_client *p9_client_create(struct mount *mp, int *error,
+ const char *mount_tag);
+void p9_client_destroy(struct p9_client *clnt);
+struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *fid,
+ const char *uname, uid_t n_uname, const char *aname, int *error);
+
+/* FILE OPS - These are individually called from the specific vop function */
+
+int p9_client_open(struct p9_fid *fid, int mode);
+int p9_client_close(struct p9_fid *fid);
+struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames,
+ char **wnames, int clone, int *error);
+struct p9_fid *p9_fid_create(struct p9_client *clnt);
+void p9_fid_destroy(struct p9_fid *fid);
+uint16_t p9_tag_create(struct p9_client *clnt);
+void p9_tag_destroy(struct p9_client *clnt, uint16_t tag);
+int p9_client_clunk(struct p9_fid *fid);
+int p9_client_version(struct p9_client *clnt);
+int p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, uint32_t count);
+int p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data);
+int p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data);
+int p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode,
+ char *extension);
+int p9_client_remove(struct p9_fid *fid);
+int p9_client_unlink(struct p9_fid *dfid, const char *name, int32_t flags);
+int p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len,
+ struct p9_dirent *dirent);
+int p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat);
+int p9_client_statread(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st);
+int p9_is_proto_dotu(struct p9_client *clnt);
+int p9_is_proto_dotl(struct p9_client *clnt);
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
+int p9stat_read(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st);
+void p9_client_disconnect(struct p9_client *clnt);
+void p9_client_begin_disconnect(struct p9_client *clnt);
+int p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid);
+int p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name);
+int p9_readlink(struct p9_fid *fid, char **target);
+int p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, char *newname);
+int p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl,
+ uint64_t request_mask);
+int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr);
+
+int p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt,
+ va_list ap);
+int p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...);
+int p9_buf_prepare(struct p9_buffer *buf, int8_t type);
+int p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf);
+void p9_buf_reset(struct p9_buffer *buf);
+
+#endif /* FS_P9FS_P9_CLIENT_H */
diff --git a/sys/fs/p9fs/p9_debug.h b/sys/fs/p9fs/p9_debug.h
new file mode 100644
index 000000000000..463b009d00ad
--- /dev/null
+++ b/sys/fs/p9fs/p9_debug.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef FS_P9FS_P9_DEBUG_H
+#define FS_P9FS_P9_DEBUG_H
+
+extern int p9_debug_level; /* All debugs on now */
+
+/* 9P debug flags */
+#define P9_DEBUG_TRANS 0x0001 /* Trace transport */
+#define P9_DEBUG_SUBR 0x0002 /* Trace driver submissions */
+#define P9_DEBUG_LPROTO 0x0004 /* Low level protocol tracing */
+#define P9_DEBUG_PROTO 0x0008 /* High level protocol tracing */
+#define P9_DEBUG_VOPS 0x0010 /* VOPs tracing */
+#define P9_DEBUG_ERROR 0x0020 /* verbose error messages */
+
+#define P9_DEBUG(category, fmt, ...) do { \
+ if ((p9_debug_level & P9_DEBUG_##category) != 0) \
+ printf(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#endif /* FS_P9FS_P9_DEBUG_H */
diff --git a/sys/fs/p9fs/p9_protocol.c b/sys/fs/p9fs/p9_protocol.c
new file mode 100644
index 000000000000..e0045f67993d
--- /dev/null
+++ b/sys/fs/p9fs/p9_protocol.c
@@ -0,0 +1,632 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * 9P Protocol Support Code
+ * This file provides the standard for the FS interactions with the server
+ * interface as it can understand only this protocol. The details of the
+ * protocol can be found here
+ * XXX (link to protocol details page on FreeBSD wiki)
+ */
+
+#include <sys/types.h>
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_protocol.h>
+
+#define P9FS_MAXLEN 255
+
+static int p9_buf_writef(struct p9_buffer *buf, int proto_version,
+ const char *fmt, ...);
+static void stat_free(struct p9_wstat *sbuf);
+
+static void
+stat_free(struct p9_wstat *stbuf)
+{
+
+ free(stbuf->name, M_TEMP);
+ free(stbuf->uid, M_TEMP);
+ free(stbuf->gid, M_TEMP);
+ free(stbuf->muid, M_TEMP);
+ free(stbuf->extension, M_TEMP);
+}
+
+static size_t
+buf_read(struct p9_buffer *buf, void *data, size_t size)
+{
+ size_t len;
+
+ len = min(buf->size - buf->offset, size);
+
+ memcpy(data, &buf->sdata[buf->offset], len);
+ buf->offset += len;
+
+ return (size - len);
+}
+
+static size_t
+buf_write(struct p9_buffer *buf, const void *data, size_t size)
+{
+ size_t len;
+
+ len = min(buf->capacity - buf->size, size);
+
+ memcpy(&buf->sdata[buf->size], data, len);
+ buf->size += len;
+
+ return (size - len);
+}
+
+/*
+ * Main buf_read routine. This copies the data from the buffer into the
+ * respective values based on the data type.
+ * Here
+ * b - int8_t
+ * w - int16_t
+ * d - int32_t
+ * q - int64_t
+ * s - string
+ * u - uid
+ * g - gid
+ * Q - qid
+ * S - stat
+ * A - getattr (9P2000.L)
+ * D - data blob (int32_t size followed by void *, results are not freed)
+ * T - array of strings (int16_t count, followed by strings)
+ * R - array of qids (int16_t count, followed by qids)
+ * ? - return if version is not .u or .l
+ */
+static int
+p9_buf_vreadf(struct p9_buffer *buf, int proto_version, const char *fmt,
+ va_list ap)
+{
+ const char *ptr;
+ int error;
+
+ error = 0;
+
+ for (ptr = fmt; *ptr; ptr++) {
+ switch (*ptr) {
+ case 'b':
+ {
+ int8_t *val = va_arg(ap, int8_t *);
+
+ if (buf_read(buf, val, sizeof(*val)))
+ error = EFAULT;
+ break;
+ }
+ case 'w':
+ {
+ int16_t *val = va_arg(ap, int16_t *);
+
+ if (buf_read(buf, val, sizeof(*val)))
+ error = EFAULT;
+ break;
+ }
+ case 'd':
+ {
+ int32_t *val = va_arg(ap, int32_t *);
+
+ if (buf_read(buf, val, sizeof(*val)))
+ error = EFAULT;
+ break;
+ }
+ case 'q':
+ {
+ int64_t *val = va_arg(ap, int64_t *);
+
+ if (buf_read(buf, val, sizeof(*val)))
+ error = EFAULT;
+ break;
+ }
+ case 's':
+ {
+ char **sptr_p = va_arg(ap, char **);
+ uint16_t len;
+ char *sptr;
+
+ error = buf_read(buf, &len, sizeof(uint16_t));
+ if (error)
+ break;
+
+ sptr = malloc(len + 1, M_TEMP, M_NOWAIT | M_ZERO);
+
+ if (buf_read(buf, sptr, len)) {
+ error = EFAULT;
+ free(sptr, M_TEMP);
+ sptr = NULL;
+ } else {
+ (sptr)[len] = 0;
+ *sptr_p = sptr;
+ }
+ break;
+ }
+ case 'u':
+ {
+ uid_t *val = va_arg(ap, uid_t *);
+
+ if (buf_read(buf, val, sizeof(*val)))
+ error = EFAULT;
+ break;
+
+ }
+ case 'g':
+ {
+ gid_t *val = va_arg(ap, gid_t *);
+
+ if (buf_read(buf, val, sizeof(*val)))
+ error = EFAULT;
+ break;
+
+ }
+ case 'Q':
+ {
+ struct p9_qid *qid = va_arg(ap, struct p9_qid *);
+
+ error = p9_buf_readf(buf, proto_version, "bdq",
+ &qid->type, &qid->version, &qid->path);
+
+ break;
+ }
+ case 'S':
+ {
+ struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *);
+
+ error = p9_buf_readf(buf, proto_version, "wwdQdddqssss?sddd",
+ &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid,
+ &stbuf->mode, &stbuf->atime, &stbuf->mtime, &stbuf->length,
+ &stbuf->name, &stbuf->uid, &stbuf->gid, &stbuf->muid,
+ &stbuf->extension, &stbuf->n_uid, &stbuf->n_gid, &stbuf->n_muid);
+
+ if (error != 0)
+ stat_free(stbuf);
+ break;
+ }
+ case 'A':
+ {
+ struct p9_stat_dotl *stbuf = va_arg(ap, struct p9_stat_dotl *);
+
+ error = p9_buf_readf(buf, proto_version, "qQdugqqqqqqqqqqqqqqq",
+ &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode,
+ &stbuf->st_uid,&stbuf->st_gid, &stbuf->st_nlink,
+ &stbuf->st_rdev, &stbuf->st_size, &stbuf->st_blksize,
+ &stbuf->st_blocks, &stbuf->st_atime_sec,
+ &stbuf->st_atime_nsec, &stbuf->st_mtime_sec,
+ &stbuf->st_mtime_nsec, &stbuf->st_ctime_sec,
+ &stbuf->st_ctime_nsec, &stbuf->st_btime_sec,
+ &stbuf->st_btime_nsec, &stbuf->st_gen,
+ &stbuf->st_data_version);
+
+ break;
+ }
+ case 'D':
+ {
+ uint32_t *count = va_arg(ap, uint32_t *);
+ void **data = va_arg(ap, void **);
+
+ error = buf_read(buf, count, sizeof(uint32_t));
+ if (error == 0) {
+ *count = MIN(*count, buf->size - buf->offset);
+ *data = &buf->sdata[buf->offset];
+ }
+ break;
+ }
+ case 'T':
+ {
+ uint16_t *nwname_p = va_arg(ap, uint16_t *);
+ char ***wnames_p = va_arg(ap, char ***);
+ uint16_t nwname;
+ char **wnames;
+ int i;
+
+ error = buf_read(buf, nwname_p, sizeof(uint16_t));
+ if (error != 0)
+ break;
+
+ nwname = *nwname_p;
+ wnames = malloc(sizeof(char *) * nwname, M_TEMP, M_NOWAIT | M_ZERO);
+
+ for (i = 0; i < nwname && (error == 0); i++)
+ error = p9_buf_readf(buf, proto_version, "s", &wnames[i]);
+
+ if (error != 0) {
+ for (i = 0; i < nwname; i++)
+ free((wnames)[i], M_TEMP);
+ free(wnames, M_TEMP);
+ } else
+ *wnames_p = wnames;
+ break;
+ }
+ case 'R':
+ {
+ uint16_t *nwqid_p = va_arg(ap, uint16_t *);
+ struct p9_qid **wqids_p = va_arg(ap, struct p9_qid **);
+ uint16_t nwqid;
+ struct p9_qid *wqids;
+ int i;
+
+ wqids = NULL;
+ error = buf_read(buf, nwqid_p, sizeof(uint16_t));
+ if (error != 0)
+ break;
+
+ nwqid = *nwqid_p;
+ wqids = malloc(nwqid * sizeof(struct p9_qid), M_TEMP, M_NOWAIT | M_ZERO);
+ if (wqids == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ for (i = 0; i < nwqid && (error == 0); i++)
+ error = p9_buf_readf(buf, proto_version, "Q", &(wqids)[i]);
+
+ if (error != 0) {
+ free(wqids, M_TEMP);
+ } else
+ *wqids_p = wqids;
+
+ break;
+ }
+ case '?':
+ {
+ if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L))
+ return (0);
+ break;
+ }
+ default:
+ break;
+ }
+
+ if (error != 0)
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * Main buf_write routine. This copies the data into the buffer from the
+ * respective values based on the data type.
+ * Here
+ * b - int8_t
+ * w - int16_t
+ * d - int32_t
+ * q - int64_t
+ * s - string
+ * u - uid
+ * g - gid
+ * Q - qid
+ * S - stat
+ * D - data blob (int32_t size followed by void *, results are not freed)
+ * T - array of strings (int16_t count, followed by strings)
+ * W - string of a specific length
+ * R - array of qids (int16_t count, followed by qids)
+ * A - setattr (9P2000.L)
+ * ? - return if version is not .u or .l
+ */
+
+int
+p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt,
+ va_list ap)
+{
+ const char *ptr;
+ int error;
+
+ error = 0;
+
+ for (ptr = fmt; *ptr; ptr++) {
+ switch (*ptr) {
+ case 'b':
+ {
+ int8_t val = va_arg(ap, int);
+
+ if (buf_write(buf, &val, sizeof(val)))
+ error = EFAULT;
+ break;
+ }
+ case 'w':
+ {
+ int16_t val = va_arg(ap, int);
+
+ if (buf_write(buf, &val, sizeof(val)))
+ error = EFAULT;
+ break;
+ }
+ case 'd':
+ {
+ int32_t val = va_arg(ap, int32_t);
+
+ if (buf_write(buf, &val, sizeof(val)))
+ error = EFAULT;
+ break;
+ }
+ case 'q':
+ {
+ int64_t val = va_arg(ap, int64_t);
+
+ if (buf_write(buf, &val, sizeof(val)))
+ error = EFAULT;
+
+ break;
+ }
+ case 's':
+ {
+ const char *sptr = va_arg(ap, const char *);
+ uint16_t len = 0;
+
+ if (sptr)
+ len = MIN(strlen(sptr), P9FS_MAXLEN);
+
+ error = buf_write(buf, &len, sizeof(uint16_t));
+ if (error == 0 && buf_write(buf, sptr, len))
+ error = EFAULT;
+ break;
+ }
+ case 'u':
+ {
+ uid_t val = va_arg(ap, uid_t);
+
+ if (buf_write(buf, &val, sizeof(val)))
+ error = EFAULT;
+ break;
+
+ }
+ case 'g':
+ {
+ gid_t val = va_arg(ap, gid_t);
+
+ if (buf_write(buf, &val, sizeof(val)))
+ error = EFAULT;
+ break;
+
+ }
+ case 'Q':
+ {
+ const struct p9_qid *qid = va_arg(ap, const struct p9_qid *);
+
+ error = p9_buf_writef(buf, proto_version, "bdq",
+ qid->type, qid->version, qid->path);
+ break;
+ }
+ case 'S':
+ {
+ struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *);
+
+ error = p9_buf_writef(buf, proto_version,
+ "wwdQdddqssss?sddd", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid,
+ stbuf->mode, stbuf->atime, stbuf->mtime, stbuf->length, stbuf->name,
+ stbuf->uid, stbuf->gid, stbuf->muid, stbuf->extension, stbuf->n_uid,
+ stbuf->n_gid, stbuf->n_muid);
+
+ if (error != 0)
+ stat_free(stbuf);
+
+ break;
+ }
+ case 'D':
+ {
+ uint32_t count = va_arg(ap, uint32_t);
+ void *data = va_arg(ap, void *);
+
+ error = buf_write(buf, &count, sizeof(uint32_t));
+ if ((error == 0) && buf_write(buf, data, count))
+ error = EFAULT;
+
+ break;
+ }
+ case 'T':
+ {
+ char **wnames = va_arg(ap, char **);
+ uint16_t nwnames = va_arg(ap, int);
+
+ error = buf_write(buf, &nwnames, sizeof(uint16_t));
+ if (error == 0) {
+ int i = 0;
+ for (i = 0; i < nwnames; i++) {
+ error = p9_buf_writef(buf, proto_version, "s", wnames[i]);
+ if (error != 0)
+ break;
+ }
+ }
+ break;
+ }
+ case 'W':
+ {
+ const char *sptr = va_arg(ap, const char*);
+ uint16_t len = va_arg(ap, int);
+
+ error = buf_write(buf, &len, sizeof(uint16_t));
+ if (error == 0 && buf_write(buf, sptr, len))
+ error = EFAULT;
+ break;
+
+ }
+ case 'R':
+ {
+ uint16_t nwqid = va_arg(ap, int);
+ struct p9_qid *wqids = va_arg(ap, struct p9_qid *);
+ int i;
+
+ error = buf_write(buf, &nwqid, sizeof(uint16_t));
+ if (error == 0) {
+
+ for (i = 0; i < nwqid; i++) {
+ error = p9_buf_writef(buf, proto_version, "Q", &wqids[i]);
+ if (error != 0)
+ break;
+ }
+ }
+ break;
+ }
+ case 'A':
+ {
+ struct p9_iattr_dotl *p9attr = va_arg(ap, struct p9_iattr_dotl *);
+
+ error = p9_buf_writef(buf, proto_version, "ddugqqqqq",
+ p9attr->valid, p9attr->mode, p9attr->uid,
+ p9attr->gid, p9attr->size, p9attr->atime_sec,
+ p9attr->atime_nsec, p9attr->mtime_sec,
+ p9attr->mtime_nsec);
+
+ break;
+ }
+ case '?':
+ {
+ if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L))
+ return (0);
+ break;
+ }
+ default:
+ break;
+ }
+
+ if (error != 0)
+ break;
+ }
+
+ return (error);
+}
+
+/* Variadic form of buf_read */
+int
+p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = p9_buf_vreadf(buf, proto_version, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+/* Variadic form of buf_write */
+static int
+p9_buf_writef(struct p9_buffer *buf, int proto_version, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = p9_buf_vwritef(buf, proto_version, fmt, ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+/* File stats read routine for P9 to get attributes of files */
+int
+p9stat_read(struct p9_client *clnt, char *buf, size_t len, struct p9_wstat *st)
+{
+ struct p9_buffer msg_buf;
+ int ret;
+
+ msg_buf.size = len;
+ msg_buf.capacity = len;
+ msg_buf.sdata = buf;
+ msg_buf.offset = 0;
+
+ ret = p9_buf_readf(&msg_buf, clnt->proto_version, "S", st);
+ if (ret) {
+ P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret);
+ }
+
+ return (ret);
+}
+
+/*
+ * P9_header preparation routine. All p9 buffers have to have this header(QEMU_HEADER) at the
+ * front of the buffer.
+ */
+int
+p9_buf_prepare(struct p9_buffer *buf, int8_t type)
+{
+ buf->id = type;
+ return (p9_buf_writef(buf, 0, "dbw", 0, type, buf->tag));
+}
+
+/*
+ * Final write to the buffer, this is the total size of the buffer. Since the buffer length can
+ * vary with request, this is computed at the end just before sending the request to the driver
+ */
+int
+p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf)
+{
+ int size;
+ int error;
+
+ size = buf->size;
+ buf->size = 0;
+ error = p9_buf_writef(buf, 0, "d", size);
+ buf->size = size;
+
+ P9_DEBUG(LPROTO, "%s: size=%d type: %d tag: %d\n",
+ __func__, buf->size, buf->id, buf->tag);
+
+ return (error);
+}
+
+/* Reset values of the buffer */
+void
+p9_buf_reset(struct p9_buffer *buf)
+{
+
+ buf->offset = 0;
+ buf->size = 0;
+}
+
+/*
+ * Directory entry read with the buf we have. Call this once we have the buf to parse.
+ * This buf, obtained from the server, is parsed to make dirent in readdir.
+ */
+int
+p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len,
+ struct p9_dirent *dent)
+{
+ struct p9_buffer msg_buf;
+ int ret;
+ char *nameptr;
+ uint16_t sle;
+
+ msg_buf.size = len;
+ msg_buf.capacity = len;
+ msg_buf.sdata = buf;
+ msg_buf.offset = start;
+
+ ret = p9_buf_readf(&msg_buf, clnt->proto_version, "Qqbs", &dent->qid,
+ &dent->d_off, &dent->d_type, &nameptr);
+ if (ret) {
+ P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret);
+ goto out;
+ }
+
+ sle = strlen(nameptr);
+ strncpy(dent->d_name, nameptr, sle);
+ dent->len = sle;
+ free(nameptr, M_TEMP);
+out:
+ return (msg_buf.offset);
+}
diff --git a/sys/fs/p9fs/p9_protocol.h b/sys/fs/p9fs/p9_protocol.h
new file mode 100644
index 000000000000..7ffd7dd67bcf
--- /dev/null
+++ b/sys/fs/p9fs/p9_protocol.h
@@ -0,0 +1,282 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* File contains 9P protocol definitions */
+
+#ifndef FS_P9FS_P9_PROTOCOL_H
+#define FS_P9FS_P9_PROTOCOL_H
+
+#include <sys/types.h>
+
+/* 9P message types */
+enum p9_cmds_t {
+ P9PROTO_TLERROR = 6, /* not used */
+ P9PROTO_RLERROR, /* response for any failed request */
+ P9PROTO_TSTATFS = 8, /* file system status request */
+ P9PROTO_RSTATFS, /* file system status response */
+ P9PROTO_TLOPEN = 12, /* open a file (9P2000.L) */
+ P9PROTO_RLOPEN, /* response to opne request (9P2000.L) */
+ P9PROTO_TLCREATE = 14, /* prepare for handle for I/O on a new file (9P2000.L) */
+ P9PROTO_RLCREATE, /* response with file access information (9P2000.L) */
+ P9PROTO_TSYMLINK = 16, /* symlink creation request */
+ P9PROTO_RSYMLINK, /* symlink creation response */
+ P9PROTO_TMKNOD = 18, /* create a special file object request */
+ P9PROTO_RMKNOD, /* create a special file object response */
+ P9PROTO_TRENAME = 20, /* rename a file request */
+ P9PROTO_RRENAME, /* rename a file response */
+ P9PROTO_TREADLINK = 22, /* request to read value of symbolic link */
+ P9PROTO_RREADLINK, /* response to read value of symbolic link request */
+ P9PROTO_TGETATTR = 24, /* get file attributes request */
+ P9PROTO_RGETATTR, /* get file attributes response */
+ P9PROTO_TSETATTR = 26, /* set file attributes request */
+ P9PROTO_RSETATTR, /* set file attributes response */
+ P9PROTO_TXATTRWALK = 30,/* request to read extended attributes */
+ P9PROTO_RXATTRWALK, /* response from server with attributes */
+ P9PROTO_TXATTRCREATE = 32,/* request to set extended attribute */
+ P9PROTO_RXATTRCREATE, /* response from server for setting extended attribute */
+ P9PROTO_TREADDIR = 40, /* request to read a directory */
+ P9PROTO_RREADDIR, /* response from server for read request */
+ P9PROTO_TFSYNC = 50, /* request to flush an cached data to disk */
+ P9PROTO_RFSYNC, /* response when cache dat is flushed */
+ P9PROTO_TLOCK = 52, /* acquire or release a POSIX record lock */
+ P9PROTO_RLOCK, /* response with the status of the lock */
+ P9PROTO_TGETLOCK = 54, /* request to check for presence of a POSIX record lock */
+ P9PROTO_RGETLOCK, /* response with the details of the lock if acquired */
+ P9PROTO_TLINK = 70, /* request to create hard link */
+ P9PROTO_RLINK, /* create hard link response */
+ P9PROTO_TMKDIR = 72, /* create a directory request */
+ P9PROTO_RMKDIR, /* create a directory response */
+ P9PROTO_TRENAMEAT = 74, /* request to rename a file or directory */
+ P9PROTO_RRENAMEAT, /* reponse to rename request */
+ P9PROTO_TUNLINKAT = 76, /* unlink a file or directory */
+ P9PROTO_RUNLINKAT, /* reponse to unlink request */
+ P9PROTO_TVERSION = 100, /* request for version handshake */
+ P9PROTO_RVERSION, /* response for version handshake */
+ P9PROTO_TAUTH = 102, /* request to establish authentication channel */
+ P9PROTO_RAUTH, /* response with authentication information */
+ P9PROTO_TATTACH = 104, /* establish a user access to a file system*/
+ P9PROTO_RATTACH, /* response with top level handle to file hierarchy */
+ P9PROTO_TERROR = 106, /* not used */
+ P9PROTO_RERROR, /* response for any failed request */
+ P9PROTO_TFLUSH = 108, /* request to abort a previous request */
+ P9PROTO_RFLUSH, /* response when previous request has been cancelled */
+ P9PROTO_TWALK = 110, /* descend a directory hierarchy */
+ P9PROTO_RWALK, /* response with new handle for position within hierarchy */
+ P9PROTO_TOPEN = 112, /* prepare file handle for I/O for an existing file */
+ P9PROTO_ROPEN, /* response with file access information */
+ P9PROTO_TCREATE = 114, /* prepare for handle for I/O on a new file */
+ P9PROTO_RCREATE, /* response with file access information */
+ P9PROTO_TREAD = 116, /* request to transfer data from a file */
+ P9PROTO_RREAD, /* response with data requested */
+ P9PROTO_TWRITE = 118, /* request to transfer data to a file */
+ P9PROTO_RWRITE, /* response with how much data was written to the file */
+ P9PROTO_TCLUNK = 120, /* forget about a handle to a file within the File System */
+ P9PROTO_RCLUNK, /* response from the server for forgetting the file handle */
+ P9PROTO_TREMOVE = 122, /* request to remove a file */
+ P9PROTO_RREMOVE, /* response when server has removed the file */
+ P9PROTO_TSTAT = 124, /* request file entity attributes */
+ P9PROTO_RSTAT, /* response with file entity attributes */
+ P9PROTO_TWSTAT = 126, /* request to update file entity attributes */
+ P9PROTO_RWSTAT, /* response when file entity attributes are updated */
+};
+
+/* File Open Modes */
+enum p9_open_mode_t {
+ P9PROTO_OREAD = 0x00, /* open file for reading only */
+ P9PROTO_OWRITE = 0x01, /* open file for writing only */
+ P9PROTO_ORDWR = 0x02, /* open file for both reading and writing */
+ P9PROTO_OEXEC = 0x03, /* open file for execution */
+ P9PROTO_OTRUNC = 0x10, /* truncate file to zero length before opening it */
+ P9PROTO_OREXEC = 0x20, /* close the file when exec system call is made */
+ P9PROTO_ORCLOSE = 0x40, /* remove the file when it is closed */
+ P9PROTO_OAPPEND = 0x80, /* open the file and seek to the end of the file */
+ P9PROTO_OEXCL = 0x1000, /* only create a file and not open it */
+};
+
+/* FIle Permissions */
+enum p9_perm_t {
+ P9PROTO_DMDIR = 0x80000000, /* permission bit for directories */
+ P9PROTO_DMAPPEND = 0x40000000, /* permission bit for is append-only */
+ P9PROTO_DMEXCL = 0x20000000, /* permission bit for exclusive use (only one open handle allowed) */
+ P9PROTO_DMMOUNT = 0x10000000, /* permission bit for mount points */
+ P9PROTO_DMAUTH = 0x08000000, /* permission bit for authentication file */
+ P9PROTO_DMTMP = 0x04000000, /* permission bit for non-backed-up files */
+ P9PROTO_DMSYMLINK = 0x02000000, /* permission bit for symbolic link (9P2000.u) */
+ P9PROTO_DMLINK = 0x01000000, /* permission bit for hard-link (9P2000.u) */
+ P9PROTO_DMDEVICE = 0x00800000, /* permission bit for device files (9P2000.u) */
+ P9PROTO_DMNAMEDPIPE = 0x00200000,/* permission bit for named pipe (9P2000.u) */
+ P9PROTO_DMSOCKET = 0x00100000, /* permission bit for socket (9P2000.u) */
+ P9PROTO_DMSETUID = 0x00080000, /* permission bit for setuid (9P2000.u) */
+ P9PROTO_DMSETGID = 0x00040000, /* permission bit for setgid (9P2000.u) */
+ P9PROTO_DMSETVTX = 0x00010000, /* permission bit for sticky bit (9P2000.u) */
+};
+
+/*
+ * QID types - they are primarly used to
+ * differentiate semantics for a file system
+ */
+enum p9_qid_t {
+ P9PROTO_QTDIR = 0x80, /* directory */
+ P9PROTO_QTAPPEND = 0x40, /* append-only */
+ P9PROTO_QTEXCL = 0x20, /* exclusive use (only one open handle allowed)*/
+ P9PROTO_QTMOUNT = 0x10, /* mount points */
+ P9PROTO_QTAUTH = 0x08, /* authentication file */
+ P9PROTO_QTTMP = 0x04, /* non-backed-up files */
+ P9PROTO_QTSYMLINK = 0x02, /* symbolic links */
+ P9PROTO_QTLINK = 0x01, /* hard link */
+ P9PROTO_QTFILE = 0x00, /* normal files */
+};
+
+/* P9 Magic Numbers */
+#define P9PROTO_NOFID (uint32_t)(~0)
+#define P9_DEFUNAME "nobody"
+#define P9_DEFANAME ""
+#define P9_NONUNAME (uint32_t)(~0)
+#define P9_MAXWELEM 16
+
+/* Exchange unit between Qemu and Client */
+struct p9_qid {
+ uint8_t type; /* the type of the file */
+ uint32_t version; /* version number for given path */
+ uint64_t path; /* the file servers unique id for file */
+};
+
+/* FS information stat structure */
+struct p9_statfs {
+ uint32_t type; /* type of file system */
+ uint32_t bsize; /* optimal transfer block size */
+ uint64_t blocks; /* total data blocks in file system */
+ uint64_t bfree; /* free blocks in fs */
+ uint64_t bavail; /* free blocks avail to non-superuser */
+ uint64_t files; /* total file nodes in file system */
+ uint64_t ffree; /* free file nodes in fs */
+ uint64_t fsid; /* file system id */
+ uint32_t namelen; /* maximum length of filenames */
+};
+
+
+/* File system metadata information */
+struct p9_wstat {
+ uint16_t size; /* total byte count of the following data */
+ uint16_t type; /* type of file */
+ uint32_t dev; /* id of device containing file */
+ struct p9_qid qid; /* identifier used by server for file system entity information */
+ uint32_t mode; /* protection */
+ uint32_t atime; /* time of last access */
+ uint32_t mtime; /* time of last modification */
+ uint64_t length; /* length of file in bytes */
+ char *name; /* file name */
+ char *uid; /* user ID of owner */
+ char *gid; /* group ID of owner */
+ char *muid; /* name of the user who last modified the file */
+ char *extension; /* 9p2000.u extensions */
+ uid_t n_uid; /* 9p2000.u extensions */
+ gid_t n_gid; /* 9p2000.u extensions */
+ uid_t n_muid; /* 9p2000.u extensions */
+};
+
+/* The linux version of FS information stat structure*/
+struct p9_stat_dotl {
+ uint64_t st_result_mask;/* indicates fields that are requested */
+ struct p9_qid qid; /* identifier used by server for file system entity information */
+ uint32_t st_mode; /* protection */
+ uid_t st_uid; /* user ID of owner */
+ gid_t st_gid; /* group ID of owner */
+ uint64_t st_nlink; /* number of hard links */
+ uint64_t st_rdev; /* device ID (if special file) */
+ uint64_t st_size; /* total size, in bytes */
+ uint64_t st_blksize; /* blocksize for file system I/O */
+ uint64_t st_blocks; /* number of 512B blocks allocated */
+ uint64_t st_atime_sec; /* time of last access, seconds */
+ uint64_t st_atime_nsec; /* time of last access, nanoseconds */
+ uint64_t st_mtime_sec; /* time of last modification, seconds */
+ uint64_t st_mtime_nsec; /* time of last modifictaion, nanoseconds */
+ uint64_t st_ctime_sec; /* time of last status change, seconds*/
+ uint64_t st_ctime_nsec; /* time of last status change, nanoseconds*/
+ uint64_t st_btime_sec; /* following memebers are reserved for future use */
+ uint64_t st_btime_nsec;
+ uint64_t st_gen;
+ uint64_t st_data_version;
+};
+
+/* P9 inode attribute for setattr */
+struct p9_iattr_dotl {
+ uint32_t valid; /* bit fields specifying which fields are valid */
+ uint32_t mode; /* protection */
+ uid_t uid; /* user id of owner */
+ gid_t gid; /* group id */
+ uint64_t size; /* file size */
+ uint64_t atime_sec; /* last access time in seconds */
+ uint64_t atime_nsec; /* last access time in nanoseconds */
+ uint64_t mtime_sec; /* last modification time in seconds */
+ uint64_t mtime_nsec; /* last modification time in nanoseconds */
+};
+
+#define P9PROTO_STATS_MODE 0x00000001ULL
+#define P9PROTO_STATS_NLINK 0x00000002ULL
+#define P9PROTO_STATS_UID 0x00000004ULL
+#define P9PROTO_STATS_GID 0x00000008ULL
+#define P9PROTO_STATS_RDEV 0x00000010ULL
+#define P9PROTO_STATS_ATIME 0x00000020ULL
+#define P9PROTO_STATS_MTIME 0x00000040ULL
+#define P9PROTO_STATS_CTIME 0x00000080ULL
+#define P9PROTO_STATS_INO 0x00000100ULL
+#define P9PROTO_STATS_SIZE 0x00000200ULL
+#define P9PROTO_STATS_BLOCKS 0x00000400ULL
+
+#define P9PROTO_STATS_BTIME 0x00000800ULL
+#define P9PROTO_STATS_GEN 0x00001000ULL
+#define P9PROTO_STATS_DATA_VERSION 0x00002000ULL
+
+#define P9PROTO_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */
+#define P9PROTO_STATS_ALL 0x00003fffULL /* Mask for All fields above */
+
+#define P9PROTO_SETATTR_MODE 0x00000001UL
+#define P9PROTO_SETATTR_UID 0x00000002UL
+#define P9PROTO_SETATTR_GID 0x00000004UL
+#define P9PROTO_SETATTR_SIZE 0x00000008UL
+#define P9PROTO_SETATTR_ATIME 0x00000010UL
+#define P9PROTO_SETATTR_MTIME 0x00000020UL
+#define P9PROTO_SETATTR_CTIME 0x00000040UL
+#define P9PROTO_SETATTR_ATIME_SET 0x00000080UL
+#define P9PROTO_SETATTR_MTIME_SET 0x00000100UL
+#define P9PROTO_SETATTR_MASK 0x000001bfUL
+
+#define P9PROTO_TGETATTR_BLK 512
+
+#define P9PROTO_UNLINKAT_REMOVEDIR 0x200
+
+/* PDU buffer used for SG lists. */
+struct p9_buffer {
+ uint32_t size;
+ uint16_t tag;
+ uint8_t id;
+ size_t offset;
+ size_t capacity;
+ uint8_t *sdata;
+};
+
+#endif /* FS_P9FS_P9_PROTOCOL_H */
diff --git a/sys/fs/p9fs/p9_transport.c b/sys/fs/p9fs/p9_transport.c
new file mode 100644
index 000000000000..c82d81fedcd7
--- /dev/null
+++ b/sys/fs/p9fs/p9_transport.c
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2022-present Doug Rabson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/kassert.h>
+#include <sys/libkern.h>
+
+#include <fs/p9fs/p9_transport.h>
+
+TAILQ_HEAD(, p9_trans_module) transports;
+
+static void
+p9_transport_init(void)
+{
+
+ TAILQ_INIT(&transports);
+}
+
+SYSINIT(p9_transport, SI_SUB_DRIVERS, SI_ORDER_FIRST, p9_transport_init, NULL);
+
+void
+p9_register_trans(struct p9_trans_module *m)
+{
+
+ TAILQ_INSERT_TAIL(&transports, m, link);
+}
+
+void
+p9_unregister_trans(struct p9_trans_module *m)
+{
+
+ TAILQ_REMOVE(&transports, m, link);
+}
+
+struct p9_trans_module *
+p9_get_trans_by_name(char *name)
+{
+ struct p9_trans_module *m;
+
+ TAILQ_FOREACH(m, &transports, link) {
+ if (strcmp(m->name, name) == 0)
+ return (m);
+ }
+ return (NULL);
+}
+
diff --git a/sys/fs/p9fs/p9_transport.h b/sys/fs/p9fs/p9_transport.h
new file mode 100644
index 000000000000..143c29f2382e
--- /dev/null
+++ b/sys/fs/p9fs/p9_transport.h
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* Transport definitions */
+#ifndef FS_P9FS_P9_TRANSPORT_H
+#define FS_P9FS_P9_TRANSPORT_H
+
+#include <sys/queue.h>
+
+struct p9_req_t;
+
+/* Tranport module interface */
+struct p9_trans_module {
+ TAILQ_ENTRY(p9_trans_module) link;
+ char *name; /* name of transport */
+ /* member function to create a new conection on this transport*/
+ int (*create)(const char *mount_tag, void **handlep);
+ /* member function to terminate a connection on this transport */
+ void (*close) (void *handle);
+ /* member function to issue a request to the transport*/
+ int (*request) (void *handle, struct p9_req_t *req);
+ /* member function to cancel a request if it has been sent */
+ int (*cancel) (void *handle, struct p9_req_t *req);
+};
+
+void p9_register_trans(struct p9_trans_module *m);
+void p9_unregister_trans(struct p9_trans_module *m);
+struct p9_trans_module *p9_get_trans_by_name(char *s);
+
+#endif /* FS_P9FS_P9_TRANSPORT_H */
diff --git a/sys/fs/p9fs/p9fs.h b/sys/fs/p9fs/p9fs.h
new file mode 100644
index 000000000000..a270d8b5ce5f
--- /dev/null
+++ b/sys/fs/p9fs/p9fs.h
@@ -0,0 +1,203 @@
+/*-
+ * Copyright (c) 2017-2020 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* This file has prototypes specific to the p9fs file system */
+
+#ifndef FS_P9FS_P9FS_H
+#define FS_P9FS_P9FS_H
+
+struct p9fs_session;
+
+/* QID: Unique identification for the file being accessed */
+struct p9fs_qid {
+ uint8_t qid_mode; /* file mode specifiying file type */
+ uint32_t qid_version; /* version of the file */
+ uint64_t qid_path; /* unique integer among all files in hierarchy */
+};
+
+/*
+ * The in memory representation of the on disk inode. Save the current
+ * fields to write it back later.
+ */
+struct p9fs_inode {
+ /* Make it simple first, Add more fields later */
+ uint64_t i_size; /* size of the inode */
+ uint16_t i_type; /* type of inode */
+ uint32_t i_dev; /* type of device */
+ uint32_t i_mode; /* mode of the inode */
+ uint32_t i_atime; /* time of last access */
+ uint32_t i_mtime; /* time of last modification */
+ uint32_t i_ctime; /* time of last status change */
+ uint32_t i_atime_nsec; /* times of last access in nanoseconds resolution */
+ uint32_t i_mtime_nsec; /* time of last modification in nanoseconds resolution */
+ uint32_t i_ctime_nsec; /* time of last status change in nanoseconds resolution */
+ uint64_t i_length;
+ char *i_name; /* inode name */
+ char *i_uid; /* inode user id */
+ char *i_gid; /* inode group id */
+ char *i_muid;
+ char *i_extension; /* 9p2000.u extensions */
+ uid_t n_uid; /* 9p2000.u extensions */
+ gid_t n_gid; /* 9p2000.u extensions */
+ uid_t n_muid; /* 9p2000.u extensions */
+ /* bookkeeping info on the client. */
+ uint16_t i_links_count; /*number of references to the inode*/
+ uint64_t i_qid_path; /* using inode number for reference. */
+ uint64_t i_flags;
+ uint64_t blksize; /* block size for file system */
+ uint64_t blocks; /* number of 512B blocks allocated */
+ uint64_t gen; /* reserved for future use */
+ uint64_t data_version; /* reserved for future use */
+
+};
+
+#define P9FS_VFID_MTX(_sc) (&(_sc)->vfid_mtx)
+#define P9FS_VFID_LOCK(_sc) mtx_lock(P9FS_VFID_MTX(_sc))
+#define P9FS_VFID_UNLOCK(_sc) mtx_unlock(P9FS_VFID_MTX(_sc))
+#define P9FS_VFID_LOCK_INIT(_sc) mtx_init(P9FS_VFID_MTX(_sc), \
+ "VFID List lock", NULL, MTX_DEF)
+#define P9FS_VFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VFID_MTX(_sc))
+
+#define P9FS_VOFID_MTX(_sc) (&(_sc)->vofid_mtx)
+#define P9FS_VOFID_LOCK(_sc) mtx_lock(P9FS_VOFID_MTX(_sc))
+#define P9FS_VOFID_UNLOCK(_sc) mtx_unlock(P9FS_VOFID_MTX(_sc))
+#define P9FS_VOFID_LOCK_INIT(_sc) mtx_init(P9FS_VOFID_MTX(_sc), \
+ "VOFID List lock", NULL, MTX_DEF)
+#define P9FS_VOFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VOFID_MTX(_sc))
+
+#define VFID 0x01
+#define VOFID 0x02
+
+/* A Plan9 node. */
+struct p9fs_node {
+ STAILQ_HEAD( ,p9_fid) vfid_list; /* vfid related to uid */
+ struct mtx vfid_mtx; /* mutex for vfid list */
+ STAILQ_HEAD( ,p9_fid) vofid_list; /* vofid related to uid */
+ struct mtx vofid_mtx; /* mutex for vofid list */
+ struct p9fs_node *parent; /* pointer to parent p9fs node */
+ struct p9fs_qid vqid; /* the server qid, will be from the host */
+ struct vnode *v_node; /* vnode for this fs_node. */
+ struct p9fs_inode inode; /* in memory representation of ondisk information*/
+ struct p9fs_session *p9fs_ses; /* Session_ptr for this node */
+ STAILQ_ENTRY(p9fs_node) p9fs_node_next;
+ uint64_t flags;
+};
+
+#define P9FS_VTON(vp) ((struct p9fs_node *)(vp)->v_data)
+#define P9FS_NTOV(node) ((node)->v_node)
+#define VFSTOP9(mp) ((struct p9fs_mount *)(mp)->mnt_data)
+#define QEMU_DIRENTRY_SZ 25
+#define P9FS_NODE_MODIFIED 0x1 /* indicating file change */
+#define P9FS_ROOT 0x2 /* indicating root p9fs node */
+#define P9FS_NODE_DELETED 0x4 /* indicating file or directory delete */
+#define P9FS_NODE_IN_SESSION 0x8 /* p9fs_node is in the session - virt_node_list */
+#define IS_ROOT(node) (node->flags & P9FS_ROOT)
+
+#define P9FS_SET_LINKS(inode) do { \
+ (inode)->i_links_count = 1; \
+} while (0) \
+
+#define P9FS_INCR_LINKS(inode) do { \
+ (inode)->i_links_count++; \
+} while (0) \
+
+#define P9FS_DECR_LINKS(inode) do { \
+ (inode)->i_links_count--; \
+} while (0) \
+
+#define P9FS_CLR_LINKS(inode) do { \
+ (inode)->i_links_count = 0; \
+} while (0) \
+
+#define P9FS_MTX(_sc) (&(_sc)->p9fs_mtx)
+#define P9FS_LOCK(_sc) mtx_lock(P9FS_MTX(_sc))
+#define P9FS_UNLOCK(_sc) mtx_unlock(P9FS_MTX(_sc))
+#define P9FS_LOCK_INIT(_sc) mtx_init(P9FS_MTX(_sc), \
+ "P9FS session chain lock", NULL, MTX_DEF)
+#define P9FS_LOCK_DESTROY(_sc) mtx_destroy(P9FS_MTX(_sc))
+
+/* Session structure for the FS */
+struct p9fs_session {
+ unsigned char flags; /* these flags for the session */
+ struct mount *p9fs_mount; /* mount point */
+ struct p9fs_node rnp; /* root p9fs node for this session */
+ uid_t uid; /* the uid that has access */
+ const char *uname; /* user name to mount as */
+ const char *aname; /* name of remote file tree being mounted */
+ struct p9_client *clnt; /* 9p client */
+ struct mtx p9fs_mtx; /* mutex used for guarding the chain.*/
+ STAILQ_HEAD( ,p9fs_node) virt_node_list; /* list of p9fs nodes in this session*/
+ struct p9_fid *mnt_fid; /* to save nobody 's fid for unmounting as root user */
+};
+
+struct p9fs_mount {
+ struct p9fs_session p9fs_session; /* per instance session information */
+ struct mount *p9fs_mountp; /* mount point */
+ int mount_tag_len; /* length of the mount tag */
+ char *mount_tag; /* mount tag used */
+};
+
+/* All session flags based on 9p versions */
+enum virt_session_flags {
+ P9FS_PROTO_2000U = 0x01,
+ P9FS_PROTO_2000L = 0x02,
+};
+
+/* Session access flags */
+#define P9_ACCESS_ANY 0x04 /* single attach for all users */
+#define P9_ACCESS_SINGLE 0x08 /* access to only the user who mounts */
+#define P9_ACCESS_USER 0x10 /* new attach established for every user */
+#define P9_ACCESS_MASK (P9_ACCESS_ANY|P9_ACCESS_SINGLE|P9_ACCESS_USER)
+
+u_quad_t p9fs_round_filesize_to_bytes(uint64_t filesize, uint64_t bsize);
+u_quad_t p9fs_pow2_filesize_to_bytes(uint64_t filesize, uint64_t bsize);
+
+/* These are all the P9FS specific vops */
+int p9fs_stat_vnode_l(void);
+int p9fs_stat_vnode_dotl(struct p9_stat_dotl *st, struct vnode *vp);
+int p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred);
+int p9fs_proto_dotl(struct p9fs_session *vses);
+struct p9_fid *p9fs_init_session(struct mount *mp, int *error);
+void p9fs_close_session(struct mount *mp);
+void p9fs_prepare_to_close(struct mount *mp);
+void p9fs_complete_close(struct mount *mp);
+int p9fs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp);
+int p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
+ struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
+ char *name);
+int p9fs_node_cmp(struct vnode *vp, void *arg);
+void p9fs_destroy_node(struct p9fs_node **npp);
+void p9fs_dispose_node(struct p9fs_node **npp);
+void p9fs_cleanup(struct p9fs_node *vp);
+void p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids);
+void p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *vfid,
+ int fid_type);
+void p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid,
+ int fid_type);
+struct p9_fid *p9fs_get_fid(struct p9_client *clnt,
+ struct p9fs_node *np, struct ucred *cred, int fid_type, int mode, int *error);
+
+#endif /* FS_P9FS_P9FS_H */
diff --git a/sys/fs/p9fs/p9fs_proto.h b/sys/fs/p9fs/p9fs_proto.h
new file mode 100644
index 000000000000..d78caa686f36
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_proto.h
@@ -0,0 +1,42 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+/*
+ * Plan9 filesystem (9P2000.u) protocol definitions.
+ */
+
+#ifndef FS_P9FS_P9FS_PROTO_H
+#define FS_P9FS_P9FS_PROTO_H
+
+//#include <dev/virtio/virtio_fs_9p.h>
+
+/* File permissions */
+#define P9FS_OREAD 0
+#define P9FS_OWRITE 1
+#define P9FS_ORDWR 2
+#define P9FS_OEXEC 3
+#define P9FS_OTRUNC 0x10
+
+#endif /* FS_P9FS_P9FS_PROTO_H */
diff --git a/sys/fs/p9fs/p9fs_subr.c b/sys/fs/p9fs/p9fs_subr.c
new file mode 100644
index 000000000000..d0f04f6c5e97
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_subr.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+/*-
+ * 9P filesystem subroutines. This file consists of all the Non VFS subroutines.
+ * It contains all of the functions related to the driver submission which form
+ * the upper layer i.e, p9fs driver. This will interact with the client to make
+ * sure we have correct API calls in the header.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/limits.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include "p9fs_proto.h"
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_protocol.h>
+#include <fs/p9fs/p9fs.h>
+
+int
+p9fs_proto_dotl(struct p9fs_session *vses)
+{
+
+ return (vses->flags & P9FS_PROTO_2000L);
+}
+
+/* Initialize a p9fs session */
+struct p9_fid *
+p9fs_init_session(struct mount *mp, int *error)
+{
+ struct p9fs_session *vses;
+ struct p9fs_mount *virtmp;
+ struct p9_fid *fid;
+ char *access;
+
+ virtmp = VFSTOP9(mp);
+ vses = &virtmp->p9fs_session;
+ vses->uid = P9_NONUNAME;
+ vses->uname = P9_DEFUNAME;
+ vses->aname = P9_DEFANAME;
+
+ /*
+ * Create the client structure. Call into the driver to create
+ * driver structures for the actual IO transfer.
+ */
+ vses->clnt = p9_client_create(mp, error, virtmp->mount_tag);
+
+ if (vses->clnt == NULL) {
+ P9_DEBUG(ERROR, "%s: p9_client_create failed\n", __func__);
+ return (NULL);
+ }
+ /*
+ * Find the client version and cache the copy. We will use this copy
+ * throughout FS layer.
+ */
+ if (p9_is_proto_dotl(vses->clnt))
+ vses->flags |= P9FS_PROTO_2000L;
+ else if (p9_is_proto_dotu(vses->clnt))
+ vses->flags |= P9FS_PROTO_2000U;
+
+ /* Set the access mode */
+ access = vfs_getopts(mp->mnt_optnew, "access", error);
+ if (access == NULL)
+ vses->flags |= P9_ACCESS_USER;
+ else if (!strcmp(access, "any"))
+ vses->flags |= P9_ACCESS_ANY;
+ else if (!strcmp(access, "single"))
+ vses->flags |= P9_ACCESS_SINGLE;
+ else if (!strcmp(access, "user"))
+ vses->flags |= P9_ACCESS_USER;
+ else {
+ P9_DEBUG(ERROR, "%s: unknown access mode\n", __func__);
+ *error = EINVAL;
+ goto out;
+ }
+
+ *error = 0;
+ /* Attach with the backend host*/
+ fid = p9_client_attach(vses->clnt, NULL, vses->uname, P9_NONUNAME,
+ vses->aname, error);
+ vses->mnt_fid = fid;
+
+ if (*error != 0) {
+ P9_DEBUG(ERROR, "%s: attach failed: %d\n", __func__, *error);
+ goto out;
+ }
+ P9_DEBUG(SUBR, "%s: attach successful fid :%p\n", __func__, fid);
+ fid->uid = vses->uid;
+
+ /* initialize the node list for the session */
+ STAILQ_INIT(&vses->virt_node_list);
+ P9FS_LOCK_INIT(vses);
+
+ P9_DEBUG(SUBR, "%s: INIT session successful\n", __func__);
+
+ return (fid);
+out:
+ p9_client_destroy(vses->clnt);
+ return (NULL);
+}
+
+/* Begin to terminate a session */
+void
+p9fs_prepare_to_close(struct mount *mp)
+{
+ struct p9fs_session *vses;
+ struct p9fs_mount *vmp;
+ struct p9fs_node *np, *pnp, *tmp;
+
+ vmp = VFSTOP9(mp);
+ vses = &vmp->p9fs_session;
+
+ /* break the node->parent references */
+ STAILQ_FOREACH_SAFE(np, &vses->virt_node_list, p9fs_node_next, tmp) {
+ if (np->parent && np->parent != np) {
+ pnp = np->parent;
+ np->parent = NULL;
+ vrele(P9FS_NTOV(pnp));
+ }
+ }
+
+ /* We are about to teardown, we dont allow anything other than clunk after this.*/
+ p9_client_begin_disconnect(vses->clnt);
+}
+
+/* Shutdown a session */
+void
+p9fs_complete_close(struct mount *mp)
+{
+ struct p9fs_session *vses;
+ struct p9fs_mount *vmp;
+
+ vmp = VFSTOP9(mp);
+ vses = &vmp->p9fs_session;
+
+ /* Finish the close*/
+ p9_client_disconnect(vses->clnt);
+}
+
+
+/* Call from unmount. Close the session. */
+void
+p9fs_close_session(struct mount *mp)
+{
+ struct p9fs_session *vses;
+ struct p9fs_mount *vmp;
+
+ vmp = VFSTOP9(mp);
+ vses = &vmp->p9fs_session;
+
+ p9fs_complete_close(mp);
+ /* Clean up the clnt structure. */
+ p9_client_destroy(vses->clnt);
+ P9FS_LOCK_DESTROY(vses);
+ P9_DEBUG(SUBR, "%s: Clean close session .\n", __func__);
+}
+
+/*
+ * Remove all the fids of a particular type from a p9fs node
+ * as well as destroy/clunk them.
+ */
+void
+p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids)
+{
+ struct p9_fid *fid, *tfid;
+
+ STAILQ_FOREACH_SAFE(fid, &np->vfid_list, fid_next, tfid) {
+ STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next);
+ p9_client_clunk(fid);
+ }
+
+ if (!leave_ofids) {
+ STAILQ_FOREACH_SAFE(fid, &np->vofid_list, fid_next, tfid) {
+ STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next);
+ p9_client_clunk(fid);
+ }
+ }
+}
+
+
+/* Remove a fid from its corresponding fid list */
+void
+p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *fid, int fid_type)
+{
+
+ switch (fid_type) {
+ case VFID:
+ P9FS_VFID_LOCK(np);
+ STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next);
+ P9FS_VFID_UNLOCK(np);
+ break;
+ case VOFID:
+ P9FS_VOFID_LOCK(np);
+ STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next);
+ P9FS_VOFID_UNLOCK(np);
+ break;
+ }
+}
+
+/* Add a fid to the corresponding fid list */
+void
+p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid, int fid_type)
+{
+
+ switch (fid_type) {
+ case VFID:
+ P9FS_VFID_LOCK(np);
+ STAILQ_INSERT_TAIL(&np->vfid_list, fid, fid_next);
+ P9FS_VFID_UNLOCK(np);
+ break;
+ case VOFID:
+ P9FS_VOFID_LOCK(np);
+ STAILQ_INSERT_TAIL(&np->vofid_list, fid, fid_next);
+ P9FS_VOFID_UNLOCK(np);
+ break;
+ }
+}
+
+/* Build the path from root to current directory */
+static int
+p9fs_get_full_path(struct p9fs_node *np, char ***names)
+{
+ int i, n;
+ struct p9fs_node *node;
+ char **wnames;
+
+ n = 0;
+ for (node = np ; (node != NULL) && !IS_ROOT(node) ; node = node->parent)
+ n++;
+
+ if (node == NULL)
+ return (0);
+
+ wnames = malloc(n * sizeof(char *), M_TEMP, M_ZERO|M_WAITOK);
+
+ for (i = n-1, node = np; i >= 0 ; i--, node = node->parent)
+ wnames[i] = node->inode.i_name;
+
+ *names = wnames;
+ return (n);
+}
+
+/*
+ * Return TRUE if this fid can be used for the requested mode.
+ */
+static int
+p9fs_compatible_mode(struct p9_fid *fid, int mode)
+{
+ /*
+ * Return TRUE for an exact match. For OREAD and OWRITE, allow
+ * existing ORDWR fids to match. Only check the low two bits
+ * of mode.
+ *
+ * TODO: figure out if this is correct for O_APPEND
+ */
+ int fid_mode = fid->mode & 3;
+ if (fid_mode == mode)
+ return (TRUE);
+ if (fid_mode == P9PROTO_ORDWR)
+ return (mode == P9PROTO_OREAD || mode == P9PROTO_OWRITE);
+ return (FALSE);
+}
+
+/*
+ * Retrieve fid structure corresponding to a particular
+ * uid and fid type for a p9fs node
+ */
+static struct p9_fid *
+p9fs_get_fid_from_uid(struct p9fs_node *np, uid_t uid, int fid_type, int mode)
+{
+ struct p9_fid *fid;
+
+ switch (fid_type) {
+ case VFID:
+ P9FS_VFID_LOCK(np);
+ STAILQ_FOREACH(fid, &np->vfid_list, fid_next) {
+ if (fid->uid == uid) {
+ P9FS_VFID_UNLOCK(np);
+ return (fid);
+ }
+ }
+ P9FS_VFID_UNLOCK(np);
+ break;
+ case VOFID:
+ P9FS_VOFID_LOCK(np);
+ STAILQ_FOREACH(fid, &np->vofid_list, fid_next) {
+ if (fid->uid == uid && p9fs_compatible_mode(fid, mode)) {
+ P9FS_VOFID_UNLOCK(np);
+ return (fid);
+ }
+ }
+ P9FS_VOFID_UNLOCK(np);
+ break;
+ }
+
+ return (NULL);
+}
+
+/*
+ * Function returns the fid sturcture for a file corresponding to current user id.
+ * First it searches in the fid list of the corresponding p9fs node.
+ * New fid will be created if not already present and added in the corresponding
+ * fid list in the p9fs node.
+ * If the user is not already attached then this will attach the user first
+ * and then create a new fid for this particular file by doing dir walk.
+ */
+struct p9_fid *
+p9fs_get_fid(struct p9_client *clnt, struct p9fs_node *np, struct ucred *cred,
+ int fid_type, int mode, int *error)
+{
+ uid_t uid;
+ struct p9_fid *fid, *oldfid;
+ struct p9fs_node *root;
+ struct p9fs_session *vses;
+ int i, l, clone;
+ char **wnames = NULL;
+ uint16_t nwnames;
+
+ oldfid = NULL;
+ vses = np->p9fs_ses;
+
+ if (vses->flags & P9_ACCESS_ANY)
+ uid = vses->uid;
+ else if (cred)
+ uid = cred->cr_uid;
+ else
+ uid = 0;
+
+ /*
+ * Search for the fid in corresponding fid list.
+ * We should return NULL for VOFID if it is not present in the list.
+ * Because VOFID should have been created during the file open.
+ * If VFID is not present in the list then we should create one.
+ */
+ fid = p9fs_get_fid_from_uid(np, uid, fid_type, mode);
+ if (fid != NULL || fid_type == VOFID)
+ return (fid);
+
+ /* Check root if the user is attached */
+ root = &np->p9fs_ses->rnp;
+ fid = p9fs_get_fid_from_uid(root, uid, fid_type, mode);
+ if(fid == NULL) {
+ /* Attach the user */
+ fid = p9_client_attach(clnt, NULL, NULL, uid,
+ vses->aname, error);
+ if (*error != 0)
+ return (NULL);
+ p9fs_fid_add(root, fid, fid_type);
+ }
+
+ /* If we are looking for root then return it */
+ if (IS_ROOT(np))
+ return (fid);
+
+ /* Get full path from root to p9fs node */
+ nwnames = p9fs_get_full_path(np, &wnames);
+
+ /*
+ * Could not get full path.
+ * If p9fs node is not deleted, parent should exist.
+ */
+ KASSERT(nwnames != 0, ("%s: Directory of %s doesn't exist", __func__, np->inode.i_name));
+
+ clone = 1;
+ i = 0;
+ while (i < nwnames) {
+ l = MIN(nwnames - i, P9_MAXWELEM);
+
+ fid = p9_client_walk(fid, l, wnames, clone, error);
+ if (*error != 0) {
+ if (oldfid)
+ p9_client_clunk(oldfid);
+ fid = NULL;
+ goto bail_out;
+ }
+ oldfid = fid;
+ clone = 0;
+ i += l ;
+ }
+ p9fs_fid_add(np, fid, fid_type);
+bail_out:
+ free(wnames, M_TEMP);
+ return (fid);
+}
diff --git a/sys/fs/p9fs/p9fs_vfsops.c b/sys/fs/p9fs/p9fs_vfsops.c
new file mode 100644
index 000000000000..3451bc052187
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_vfsops.c
@@ -0,0 +1,610 @@
+/*-
+ * Copyright (c) 2017-2020 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file consists of all the VFS interactions of VFS ops which include
+ * mount, unmount, initilaize etc. for p9fs.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/fnv_hash.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <vm/uma.h>
+
+#include <fs/p9fs/p9fs_proto.h>
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9fs.h>
+
+SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "Plan 9 filesystem");
+
+/* This count is static now. Can be made tunable later */
+#define P9FS_FLUSH_RETRIES 10
+
+static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
+static uma_zone_t p9fs_node_zone;
+uma_zone_t p9fs_io_buffer_zone;
+uma_zone_t p9fs_getattr_zone;
+uma_zone_t p9fs_setattr_zone;
+uma_zone_t p9fs_pbuf_zone;
+extern struct vop_vector p9fs_vnops;
+
+/* option parsing */
+static const char *p9fs_opts[] = {
+ "from", "trans", "access", NULL
+};
+
+/* Dispose p9fs node, freeing it to the UMA zone */
+void
+p9fs_dispose_node(struct p9fs_node **npp)
+{
+ struct p9fs_node *node;
+ struct vnode *vp;
+
+ node = *npp;
+
+ if (node == NULL)
+ return;
+
+ if (node->parent && node->parent != node) {
+ vrele(P9FS_NTOV(node->parent));
+ }
+
+ P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
+
+ vp = P9FS_NTOV(node);
+ vp->v_data = NULL;
+
+ /* Free our associated memory */
+ if (!(vp->v_vflag & VV_ROOT)) {
+ free(node->inode.i_name, M_TEMP);
+ uma_zfree(p9fs_node_zone, node);
+ }
+
+ *npp = NULL;
+}
+
+/* Initialize memory allocation */
+static int
+p9fs_init(struct vfsconf *vfsp)
+{
+
+ p9fs_node_zone = uma_zcreate("p9fs node zone",
+ sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ /* Create the getattr_dotl zone */
+ p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
+ sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ /* Create the setattr_dotl zone */
+ p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
+ sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ /* Create the putpages zone */
+ p9fs_pbuf_zone = pbuf_zsecond_create("p9fs pbuf zone", nswbuf / 2);
+
+ /*
+ * Create the io_buffer zone pool to keep things simpler in case of
+ * multiple threads. Each thread works with its own so there is no
+ * contention.
+ */
+ p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
+ P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ return (0);
+}
+
+/* Destroy all the allocated memory */
+static int
+p9fs_uninit(struct vfsconf *vfsp)
+{
+
+ uma_zdestroy(p9fs_node_zone);
+ uma_zdestroy(p9fs_io_buffer_zone);
+ uma_zdestroy(p9fs_getattr_zone);
+ uma_zdestroy(p9fs_setattr_zone);
+ uma_zdestroy(p9fs_pbuf_zone);
+
+ return (0);
+}
+
+/* Function to umount p9fs */
+static int
+p9fs_unmount(struct mount *mp, int mntflags)
+{
+ struct p9fs_mount *vmp;
+ struct p9fs_session *vses;
+ int error, flags, i;
+
+ error = 0;
+ flags = 0;
+ vmp = VFSTOP9(mp);
+ if (vmp == NULL)
+ return (0);
+
+ vses = &vmp->p9fs_session;
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ p9fs_prepare_to_close(mp);
+ for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
+
+ /* Flush everything on this mount point.*/
+ error = vflush(mp, 1, flags, curthread);
+
+ if (error == 0 || (mntflags & MNT_FORCE) == 0)
+ break;
+ /* Sleep until interrupted or 1 tick expires. */
+ error = tsleep(&error, PSOCK, "p9unmnt", 1);
+ if (error == EINTR)
+ break;
+ error = EBUSY;
+ }
+
+ if (error != 0)
+ goto out;
+ p9fs_close_session(mp);
+ /* Cleanup the mount structure. */
+ free(vmp, M_P9MNT);
+ mp->mnt_data = NULL;
+ return (error);
+out:
+ /* Restore the flag in case of error */
+ vses->clnt->trans_status = P9FS_CONNECT;
+ return (error);
+}
+
+/*
+ * Compare qid stored in p9fs node
+ * Return 1 if does not match otherwise return 0
+ */
+int
+p9fs_node_cmp(struct vnode *vp, void *arg)
+{
+ struct p9fs_node *np;
+ struct p9_qid *qid;
+
+ np = vp->v_data;
+ qid = (struct p9_qid *)arg;
+
+ if (np == NULL)
+ return (1);
+
+ if (np->vqid.qid_path == qid->path) {
+ if (vp->v_vflag & VV_ROOT)
+ return (0);
+ else if (np->vqid.qid_mode == qid->type &&
+ np->vqid.qid_version == qid->version)
+ return (0);
+ }
+
+ return (1);
+}
+
+/*
+ * Cleanup p9fs node
+ * - Destroy the FID LIST locks
+ * - Dispose all node knowledge
+ */
+void
+p9fs_destroy_node(struct p9fs_node **npp)
+{
+ struct p9fs_node *np;
+
+ np = *npp;
+
+ if (np == NULL)
+ return;
+
+ /* Destroy the FID LIST locks */
+ P9FS_VFID_LOCK_DESTROY(np);
+ P9FS_VOFID_LOCK_DESTROY(np);
+
+ /* Dispose all node knowledge.*/
+ p9fs_dispose_node(&np);
+}
+
+/*
+ * Common code used across p9fs to return vnode for the file represented
+ * by the fid.
+ * Lookup for the vnode in hash_list. This lookup is based on the qid path
+ * which is unique to a file. p9fs_node_cmp is called in this lookup process.
+ * I. If the vnode we are looking for is found in the hash list
+ * 1. Check if the vnode is a valid vnode by reloading its stats
+ * a. if the reloading of the vnode stats returns error then remove the
+ * vnode from hash list and return
+ * b. If reloading of vnode stats returns without any error then, clunk the
+ * new fid which was created for the vnode as we know that the vnode
+ * already has a fid associated with it and return the vnode.
+ * This is to avoid fid leaks
+ * II. If vnode is not found in the hash list then, create new vnode, p9fs
+ * node and return the vnode
+ */
+int
+p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
+ struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
+ char *name)
+{
+ struct p9fs_mount *vmp;
+ struct p9fs_session *vses;
+ struct vnode *vp;
+ struct p9fs_node *node;
+ struct thread *td;
+ uint32_t hash;
+ int error, error_reload = 0;
+ struct p9fs_inode *inode;
+
+ td = curthread;
+ vmp = VFSTOP9(mp);
+ vses = &vmp->p9fs_session;
+
+ /* Look for vp in the hash_list */
+ hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
+ error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
+ &fid->qid);
+ if (error != 0)
+ return (error);
+ else if (vp != NULL) {
+ if (vp->v_vflag & VV_ROOT) {
+ if (np == NULL)
+ p9_client_clunk(fid);
+ *vpp = vp;
+ return (0);
+ }
+ error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
+ if (error != 0) {
+ node = vp->v_data;
+ /* Remove stale vnode from hash list */
+ vfs_hash_remove(vp);
+ node->flags |= P9FS_NODE_DELETED;
+
+ vput(vp);
+ *vpp = NULLVP;
+ vp = NULL;
+ } else {
+ *vpp = vp;
+ /* Clunk the new fid if not root */
+ p9_client_clunk(fid);
+ return (0);
+ }
+ }
+
+ /*
+ * We must promote to an exclusive lock for vnode creation. This
+ * can happen if lookup is passed LOCKSHARED.
+ */
+ if ((flags & LK_TYPE_MASK) == LK_SHARED) {
+ flags &= ~LK_TYPE_MASK;
+ flags |= LK_EXCLUSIVE;
+ }
+
+ /* Allocate a new vnode. */
+ if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
+ *vpp = NULLVP;
+ P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
+ return (error);
+ }
+
+ /* If we dont have it, create one. */
+ if (np == NULL) {
+ np = uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
+ /* Initialize the VFID list */
+ P9FS_VFID_LOCK_INIT(np);
+ STAILQ_INIT(&np->vfid_list);
+ p9fs_fid_add(np, fid, VFID);
+
+ /* Initialize the VOFID list */
+ P9FS_VOFID_LOCK_INIT(np);
+ STAILQ_INIT(&np->vofid_list);
+
+ vref(P9FS_NTOV(parent));
+ np->parent = parent;
+ np->p9fs_ses = vses; /* Map the current session */
+ inode = &np->inode;
+ /*Fill the name of the file in inode */
+ inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
+ strlcpy(inode->i_name, name, strlen(name)+1);
+ } else {
+ vp->v_type = VDIR; /* root vp is a directory */
+ vp->v_vflag |= VV_ROOT;
+ vref(vp); /* Increment a reference on root vnode during mount */
+ }
+
+ vp->v_data = np;
+ np->v_node = vp;
+ inode = &np->inode;
+ inode->i_qid_path = fid->qid.path;
+ P9FS_SET_LINKS(inode);
+
+ lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
+ if (vp->v_type != VFIFO)
+ VN_LOCK_ASHARE(vp);
+ error = insmntque(vp, mp);
+ if (error != 0) {
+ /*
+ * vput(vp) is already called from insmntque_stddtr().
+ * Just goto 'out' to dispose the node.
+ */
+ goto out;
+ }
+
+ /* Init the vnode with the disk info*/
+ error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
+ if (error != 0) {
+ error_reload = 1;
+ goto out;
+ }
+
+ error = vfs_hash_insert(vp, hash, flags, td, vpp,
+ p9fs_node_cmp, &fid->qid);
+ if (error != 0) {
+ goto out;
+ }
+
+ if (*vpp == NULL) {
+ P9FS_LOCK(vses);
+ STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
+ np->flags |= P9FS_NODE_IN_SESSION;
+ P9FS_UNLOCK(vses);
+
+ *vpp = vp;
+ } else {
+ /*
+ * Returning matching vp found in hashlist.
+ * So cleanup the np allocated above in this context.
+ */
+ if (!IS_ROOT(np)) {
+ p9fs_destroy_node(&np);
+ }
+ }
+
+ return (0);
+out:
+ /* Something went wrong, dispose the node */
+ if (!IS_ROOT(np)) {
+ p9fs_destroy_node(&np);
+ }
+
+ if (error_reload) {
+ vput(vp);
+ }
+
+ *vpp = NULLVP;
+ return (error);
+}
+
+/* Main mount function for 9pfs */
+static int
+p9_mount(struct mount *mp)
+{
+ struct p9_fid *fid;
+ struct p9fs_mount *vmp;
+ struct p9fs_session *vses;
+ struct p9fs_node *p9fs_root;
+ int error;
+ char *from;
+ int len;
+
+ /* Verify the validity of mount options */
+ if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
+ return (EINVAL);
+
+ /* Extract NULL terminated mount tag from mount options */
+ error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
+ if (error != 0 || from[len - 1] != '\0')
+ return (EINVAL);
+
+ /* Allocate and initialize the private mount structure. */
+ vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
+ mp->mnt_data = vmp;
+ vmp->p9fs_mountp = mp;
+ vmp->mount_tag = from;
+ vmp->mount_tag_len = len;
+ vses = &vmp->p9fs_session;
+ vses->p9fs_mount = mp;
+ p9fs_root = &vses->rnp;
+ /* Hardware iosize from the Qemu */
+ mp->mnt_iosize_max = PAGE_SIZE;
+ /*
+ * Init the session for the p9fs root. This creates a new root fid and
+ * attaches the client and server.
+ */
+ fid = p9fs_init_session(mp, &error);
+ if (fid == NULL) {
+ goto out;
+ }
+
+ P9FS_VFID_LOCK_INIT(p9fs_root);
+ STAILQ_INIT(&p9fs_root->vfid_list);
+ p9fs_fid_add(p9fs_root, fid, VFID);
+ P9FS_VOFID_LOCK_INIT(p9fs_root);
+ STAILQ_INIT(&p9fs_root->vofid_list);
+ p9fs_root->parent = p9fs_root;
+ p9fs_root->flags |= P9FS_ROOT;
+ p9fs_root->p9fs_ses = vses;
+ vfs_getnewfsid(mp);
+ strlcpy(mp->mnt_stat.f_mntfromname, from,
+ sizeof(mp->mnt_stat.f_mntfromname));
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
+ MNT_IUNLOCK(mp);
+ P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
+ /* Mount structures created. */
+
+ return (0);
+out:
+ P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
+ if (vmp != NULL) {
+ free(vmp, M_P9MNT);
+ mp->mnt_data = NULL;
+ }
+ return (error);
+}
+
+/* Mount entry point */
+static int
+p9fs_mount(struct mount *mp)
+{
+ int error;
+
+ /*
+ * Minimal support for MNT_UPDATE - allow changing from
+ * readonly.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
+ mp->mnt_flag &= ~MNT_RDONLY;
+ }
+ return (0);
+ }
+
+ error = p9_mount(mp);
+ if (error != 0)
+ (void) p9fs_unmount(mp, MNT_FORCE);
+
+ return (error);
+}
+
+/*
+ * Retrieve the root vnode of this mount. After filesystem is mounted, the root
+ * vnode is created for the first time. Subsequent calls to p9fs root will
+ * return the same vnode created during mount.
+ */
+static int
+p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
+{
+ struct p9fs_mount *vmp;
+ struct p9fs_node *np;
+ struct p9_client *clnt;
+ struct p9_fid *vfid;
+ int error;
+
+ vmp = VFSTOP9(mp);
+ np = &vmp->p9fs_session.rnp;
+ clnt = vmp->p9fs_session.clnt;
+ error = 0;
+
+ P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
+
+ vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
+
+ if (error != 0) {
+ /* for root use the nobody user's fid as vfid.
+ * This is used while unmounting as root when non-root
+ * user has mounted p9fs
+ */
+ if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
+ vfid = vmp->p9fs_session.mnt_fid;
+ else {
+ *vpp = NULLVP;
+ return (error);
+ }
+ }
+
+ error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
+ if (error != 0) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ np->v_node = *vpp;
+ return (error);
+}
+
+/* Retrieve the file system statistics */
+static int
+p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
+{
+ struct p9fs_mount *vmp;
+ struct p9fs_node *np;
+ struct p9_client *clnt;
+ struct p9_fid *vfid;
+ struct p9_statfs statfs;
+ int res, error;
+
+ vmp = VFSTOP9(mp);
+ np = &vmp->p9fs_session.rnp;
+ clnt = vmp->p9fs_session.clnt;
+ error = 0;
+
+ vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
+ if (error != 0) {
+ return (error);
+ }
+
+ res = p9_client_statfs(vfid, &statfs);
+
+ if (res == 0) {
+ buf->f_type = statfs.type;
+ /*
+ * We have a limit of 4k irrespective of what the
+ * Qemu server can do.
+ */
+ if (statfs.bsize > PAGE_SIZE)
+ buf->f_bsize = PAGE_SIZE;
+ else
+ buf->f_bsize = statfs.bsize;
+
+ buf->f_iosize = buf->f_bsize;
+ buf->f_blocks = statfs.blocks;
+ buf->f_bfree = statfs.bfree;
+ buf->f_bavail = statfs.bavail;
+ buf->f_files = statfs.files;
+ buf->f_ffree = statfs.ffree;
+ }
+ else {
+ /* Atleast set these if stat fail */
+ buf->f_bsize = PAGE_SIZE;
+ buf->f_iosize = buf->f_bsize; /* XXX */
+ }
+
+ return (0);
+}
+
+static int
+p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
+{
+
+ return (EINVAL);
+}
+
+struct vfsops p9fs_vfsops = {
+ .vfs_init = p9fs_init,
+ .vfs_uninit = p9fs_uninit,
+ .vfs_mount = p9fs_mount,
+ .vfs_unmount = p9fs_unmount,
+ .vfs_root = p9fs_root,
+ .vfs_statfs = p9fs_statfs,
+ .vfs_fhtovp = p9fs_fhtovp,
+};
+
+VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
+MODULE_VERSION(p9fs, 1);
diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c
new file mode 100644
index 000000000000..227e2b93883e
--- /dev/null
+++ b/sys/fs/p9fs/p9fs_vnops.c
@@ -0,0 +1,2236 @@
+/*
+ * Copyright (c) 2017-2020 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* This file contains VFS file ops for the 9P protocol.
+ * This makes the upper layer of the p9fs driver. These functions interact
+ * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
+ * the user file operations are handled here.
+ */
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/rwlock.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vnode_pager.h>
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9fs.h>
+#include <fs/p9fs/p9fs_proto.h>
+
+/* File permissions. */
+#define IEXEC 0000100 /* Executable. */
+#define IWRITE 0000200 /* Writeable. */
+#define IREAD 0000400 /* Readable. */
+#define ISVTX 0001000 /* Sticky bit. */
+#define ISGID 0002000 /* Set-gid. */
+#define ISUID 0004000 /* Set-uid. */
+
+static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
+extern uma_zone_t p9fs_io_buffer_zone;
+extern uma_zone_t p9fs_getattr_zone;
+extern uma_zone_t p9fs_setattr_zone;
+extern uma_zone_t p9fs_pbuf_zone;
+/* For the root vnode's vnops. */
+struct vop_vector p9fs_vnops;
+
+static uint32_t p9fs_unix2p9_mode(uint32_t mode);
+
+static void
+p9fs_itimes(struct vnode *vp)
+{
+ struct p9fs_node *node;
+ struct timespec ts;
+ struct p9fs_inode *inode;
+
+ node = P9FS_VTON(vp);
+ inode = &node->inode;
+
+ vfs_timestamp(&ts);
+ inode->i_mtime = ts.tv_sec;
+}
+
+/*
+ * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
+ * The cleanup includes invalidating all cache entries for the vnode,
+ * destroying the vobject, removing vnode from hashlist, removing p9fs node
+ * from the list of session p9fs nodes, and disposing of the p9fs node.
+ * Basically it is doing a reverse of what a create/vget does.
+ */
+void
+p9fs_cleanup(struct p9fs_node *np)
+{
+ struct vnode *vp;
+ struct p9fs_session *vses;
+
+ if (np == NULL)
+ return;
+
+ vp = P9FS_NTOV(np);
+ vses = np->p9fs_ses;
+
+ /* Remove the vnode from hash list if vnode is not already deleted */
+ if ((np->flags & P9FS_NODE_DELETED) == 0)
+ vfs_hash_remove(vp);
+
+ P9FS_LOCK(vses);
+ if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
+ np->flags &= ~P9FS_NODE_IN_SESSION;
+ STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
+ } else {
+ P9FS_UNLOCK(vses);
+ return;
+ }
+ P9FS_UNLOCK(vses);
+
+ /* Invalidate all entries to a particular vnode. */
+ cache_purge(vp);
+
+ /* Destroy the vm object and flush associated pages. */
+ vnode_destroy_vobject(vp);
+
+ /* Remove all the FID */
+ p9fs_fid_remove_all(np, FALSE);
+
+ /* Dispose all node knowledge.*/
+ p9fs_destroy_node(&np);
+}
+
+/*
+ * Reclaim VOP is defined to be called for every vnode. This starts off
+ * the cleanup by clunking(remove the fid on the server) and calls
+ * p9fs_cleanup to free all the resources allocated for p9fs node.
+ */
+static int
+p9fs_reclaim(struct vop_reclaim_args *ap)
+{
+ struct vnode *vp;
+ struct p9fs_node *np;
+
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+
+ P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
+ p9fs_cleanup(np);
+
+ return (0);
+}
+
+/*
+ * recycle vnodes which are no longer referenced i.e, their usecount is zero
+ */
+static int
+p9fs_inactive(struct vop_inactive_args *ap)
+{
+ struct vnode *vp;
+ struct p9fs_node *np;
+
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+
+ P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
+ if (np->flags & P9FS_NODE_DELETED)
+ vrecycle(vp);
+
+ return (0);
+}
+
+struct p9fs_lookup_alloc_arg {
+ struct componentname *cnp;
+ struct p9fs_node *dnp;
+ struct p9_fid *newfid;
+};
+
+/* Callback for vn_get_ino */
+static int
+p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
+{
+ struct p9fs_lookup_alloc_arg *p9aa = arg;
+
+ return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
+ p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
+}
+
+/*
+ * p9fs_lookup is called for every component name that is being searched for.
+ *
+ * I. If component is found on the server, we look for the in-memory
+ * repesentation(vnode) of this component in namecache.
+ * A. If the node is found in the namecache, we check is the vnode is still
+ * valid.
+ * 1. If it is still valid, return vnode.
+ * 2. If it is not valid, we remove this vnode from the name cache and
+ * create a new vnode for the component and return that vnode.
+ * B. If the vnode is not found in the namecache, we look for it in the
+ * hash list.
+ * 1. If the vnode is in the hash list, we check if the vnode is still
+ * valid.
+ * a. If it is still valid, we add that vnode to the namecache for
+ * future lookups and return the vnode.
+ * b. If it is not valid, create a new vnode and p9fs node,
+ * initialize them and return the vnode.
+ * 2. If the vnode is not found in the hash list, we create a new vnode
+ * and p9fs node, initialize them and return the vnode.
+ * II. If the component is not found on the server, an error code is returned.
+ * A. For the creation case, we return EJUSTRETURN so VFS can handle it.
+ * B. For all other cases, ENOENT is returned.
+ */
+static int
+p9fs_lookup(struct vop_lookup_args *ap)
+{
+ struct vnode *dvp;
+ struct vnode **vpp, *vp;
+ struct componentname *cnp;
+ struct p9fs_node *dnp; /*dir p9_node */
+ struct p9fs_node *np;
+ struct p9fs_session *vses;
+ struct mount *mp; /* Get the mount point */
+ struct p9_fid *dvfid, *newfid;
+ uint64_t flags;
+ int error;
+ struct vattr vattr;
+ char tmpchr;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ cnp = ap->a_cnp;
+ dnp = P9FS_VTON(dvp);
+ error = 0;
+ flags = cnp->cn_flags;
+ *vpp = NULLVP;
+
+ if (dnp == NULL)
+ return (ENOENT);
+
+ if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) {
+ vref(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+
+ vses = dnp->p9fs_ses;
+ mp = vses->p9fs_mount;
+
+ /* Do the cache part ourselves */
+ if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ return (EROFS);
+
+ if (dvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
+ if (error)
+ return (error);
+
+ /* Do the directory walk on host to check if file exist */
+ dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+ if (error)
+ return (error);
+
+ /*
+ * Save the character present at namelen in nameptr string and
+ * null terminate the character to get the search name for p9_dir_walk
+ * This is done to handle when lookup is for "a" and component
+ * name contains a/b/c
+ */
+ tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+ cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+ /*
+ * If the client_walk fails, it means the file looking for doesnt exist.
+ * Create the file is the flags are set or just return the error
+ */
+ newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
+
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+
+ if (error != 0 || newfid == NULL) {
+ /* Clunk the newfid if it is not NULL */
+ if (newfid != NULL)
+ p9_client_clunk(newfid);
+
+ if (error != ENOENT)
+ return (error);
+
+ /* The requested file was not found. */
+ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+ (flags & ISLASTCN)) {
+
+ if (mp->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+
+ error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
+ curthread);
+ if (!error) {
+ return (EJUSTRETURN);
+ }
+ }
+ return (error);
+ }
+
+ /* Look for the entry in the component cache*/
+ error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
+ if (error > 0 && error != ENOENT) {
+ P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
+ goto out;
+ }
+
+ if (error == -1) {
+ vp = *vpp;
+ /* Check if the entry in cache is stale or not */
+ if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
+ ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
+ goto out;
+ }
+ /*
+ * This case, we have an error coming from getattr,
+ * act accordingly.
+ */
+ cache_purge(vp);
+ if (dvp != vp)
+ vput(vp);
+ else
+ vrele(vp);
+
+ *vpp = NULLVP;
+ } else if (error == ENOENT) {
+ if (VN_IS_DOOMED(dvp))
+ goto out;
+ if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
+ error = ENOENT;
+ goto out;
+ }
+ cache_purge_negative(dvp);
+ }
+ /* Reset values */
+ error = 0;
+ vp = NULLVP;
+
+ tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+ cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+ /*
+ * Looks like we have found an entry. Now take care of all other cases.
+ */
+ if (flags & ISDOTDOT) {
+ struct p9fs_lookup_alloc_arg p9aa;
+ p9aa.cnp = cnp;
+ p9aa.dnp = dnp;
+ p9aa.newfid = newfid;
+ error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
+ if (error)
+ goto out;
+ *vpp = vp;
+ } else {
+ /*
+ * client_walk is equivalent to searching a component name in a
+ * directory(fid) here. If new fid is returned, we have found an
+ * entry for this component name so, go and create the rest of
+ * the vnode infra(vget_common) for the returned newfid.
+ */
+ if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
+ && (flags & ISLASTCN)) {
+ error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
+ curthread);
+ if (error)
+ goto out;
+
+ error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+ dnp, newfid, &vp, cnp->cn_nameptr);
+ if (error)
+ goto out;
+
+ *vpp = vp;
+ np = P9FS_VTON(vp);
+ if ((dnp->inode.i_mode & ISVTX) &&
+ cnp->cn_cred->cr_uid != 0 &&
+ cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
+ cnp->cn_cred->cr_uid != np->inode.n_uid) {
+ vput(*vpp);
+ *vpp = NULL;
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+ return (EPERM);
+ }
+ } else {
+ error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+ dnp, newfid, &vp, cnp->cn_nameptr);
+ if (error)
+ goto out;
+ *vpp = vp;
+ }
+ }
+
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+
+ /* Store the result the cache if MAKEENTRY is specified in flags */
+ if ((cnp->cn_flags & MAKEENTRY) != 0)
+ cache_enter(dvp, *vpp, cnp);
+ return (error);
+out:
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+ p9_client_clunk(newfid);
+ return (error);
+}
+
+/*
+ * Common creation function for file/directory with respective flags. We first
+ * open the parent directory in order to create the file under it. For this,
+ * as 9P protocol suggests, we need to call client_walk to create the open fid.
+ * Once we have the open fid, the file_create function creates the direntry with
+ * the name and perm specified under the parent dir. If this succeeds (an entry
+ * is created for the new file on the server), we create our metadata for this
+ * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
+ * fid of the parent directory.
+ */
+static int
+create_common(struct p9fs_node *dnp, struct componentname *cnp,
+ char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
+{
+ char tmpchr;
+ struct p9_fid *dvfid, *ofid, *newfid;
+ struct p9fs_session *vses;
+ struct mount *mp;
+ int error;
+
+ P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);
+
+ vses = dnp->p9fs_ses;
+ mp = vses->p9fs_mount;
+ newfid = NULL;
+ error = 0;
+
+ dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+ if (error != 0)
+ return (error);
+
+ /* Clone the directory fid to create the new file */
+ ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Save the character present at namelen in nameptr string and
+ * null terminate the character to get the search name for p9_dir_walk
+ */
+ tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+ cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+ error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
+ extension);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
+ goto out;
+ }
+
+ /* If its not hardlink only then do the walk, else we are done. */
+ if (!(perm & P9PROTO_DMLINK)) {
+ /*
+ * Do the lookup part and add the vnode, p9fs node. Note that vpp
+ * is filled in here.
+ */
+ newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
+ if (newfid != NULL) {
+ error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+ dnp, newfid, vpp, cnp->cn_nameptr);
+ if (error != 0)
+ goto out;
+ } else {
+ /* Not found return NOENTRY.*/
+ goto out;
+ }
+
+ if ((cnp->cn_flags & MAKEENTRY) != 0)
+ cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
+ }
+ P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
+ __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
+ /* Clunk the open ofid. */
+ if (ofid != NULL)
+ (void)p9_client_clunk(ofid);
+
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+ return (0);
+out:
+ if (ofid != NULL)
+ (void)p9_client_clunk(ofid);
+
+ if (newfid != NULL)
+ (void)p9_client_clunk(newfid);
+
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+ return (error);
+}
+
+/*
+ * This is the main file creation VOP. Make the permissions of the new
+ * file and call the create_common common code to complete the create.
+ */
+static int
+p9fs_create(struct vop_create_args *ap)
+{
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+ uint32_t mode;
+ struct p9fs_node *dnp;
+ struct p9fs_inode *dinode;
+ uint32_t perm;
+ int ret;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ cnp = ap->a_cnp;
+ dnp = P9FS_VTON(dvp);
+ dinode = &dnp->inode;
+ mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ perm = p9fs_unix2p9_mode(mode);
+
+ P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+ ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
+ if (ret == 0) {
+ P9FS_INCR_LINKS(dinode);
+ }
+
+ return (ret);
+}
+
+/*
+ * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
+ * and call the create_common common code to complete the create.
+ */
+static int
+p9fs_mkdir(struct vop_mkdir_args *ap)
+{
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+ uint32_t mode;
+ struct p9fs_node *dnp;
+ struct p9fs_inode *dinode;
+ uint32_t perm;
+ int ret;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ cnp = ap->a_cnp;
+ dnp = P9FS_VTON(dvp);
+ dinode = &dnp->inode;
+ mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ perm = p9fs_unix2p9_mode(mode | S_IFDIR);
+
+ P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+ ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
+ if (ret == 0)
+ P9FS_INCR_LINKS(dinode);
+
+ return (ret);
+}
+
+/*
+ * p9fs_mknod is the main node creation vop. Make the permissions of the new node
+ * and call the create_common common code to complete the create.
+ */
+static int
+p9fs_mknod(struct vop_mknod_args *ap)
+{
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+ uint32_t mode;
+ struct p9fs_node *dnp;
+ struct p9fs_inode *dinode;
+ uint32_t perm;
+ int ret;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ cnp = ap->a_cnp;
+ dnp = P9FS_VTON(dvp);
+ dinode = &dnp->inode;
+ mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ perm = p9fs_unix2p9_mode(mode);
+
+ P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+ ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
+ if (ret == 0) {
+ P9FS_INCR_LINKS(dinode);
+ }
+
+ return (ret);
+}
+
+/* Convert open mode permissions to P9 */
+static int
+p9fs_uflags_mode(int uflags, int extended)
+{
+ uint32_t ret;
+
+ /* Convert first to O flags.*/
+ uflags = OFLAGS(uflags);
+
+ switch (uflags & 3) {
+
+ case O_RDONLY:
+ ret = P9PROTO_OREAD;
+ break;
+
+ case O_WRONLY:
+ ret = P9PROTO_OWRITE;
+ break;
+
+ case O_RDWR:
+ ret = P9PROTO_ORDWR;
+ break;
+ }
+
+ if (extended) {
+ if (uflags & O_EXCL)
+ ret |= P9PROTO_OEXCL;
+
+ if (uflags & O_APPEND)
+ ret |= P9PROTO_OAPPEND;
+ }
+
+ return (ret);
+}
+
+/*
+ * This is the main open VOP for every file open. If the file is already
+ * open, then increment and return. If there is no open fid for this file,
+ * there needs to be a client_walk which creates a new open fid for this file.
+ * Once we have a open fid, call the open on this file with the mode creating
+ * the vobject.
+ */
+static int
+p9fs_open(struct vop_open_args *ap)
+{
+ int error;
+ struct vnode *vp;
+ struct p9fs_node *np;
+ struct p9fs_session *vses;
+ struct p9_fid *vofid, *vfid;
+ size_t filesize;
+ uint32_t mode;
+
+ error = 0;
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+ vses = np->p9fs_ses;
+
+ P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+ return (EOPNOTSUPP);
+
+ error = p9fs_reload_stats_dotl(vp, ap->a_cred);
+ if (error != 0)
+ return (error);
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ /*
+ * Invalidate the pages of the vm_object cache if the file is modified
+ * based on the flag set in reload stats
+ */
+ if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
+ error = vinvalbuf(vp, 0, 0, 0);
+ if (error != 0)
+ return (error);
+ np->flags &= ~P9FS_NODE_MODIFIED;
+ }
+
+ vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Translate kernel fflags to 9p mode
+ */
+ mode = p9fs_uflags_mode(ap->a_mode, 1);
+
+ /*
+ * Search the fid in vofid_list for current user. If found increase the open
+ * count and return. If not found clone a new fid and open the file using
+ * that cloned fid.
+ */
+ vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
+ if (vofid != NULL) {
+ vofid->v_opens++;
+ return (0);
+ } else {
+ /*vofid is the open fid for this file.*/
+ vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
+ if (error != 0)
+ return (error);
+ }
+
+ error = p9_client_open(vofid, mode);
+ if (error != 0)
+ p9_client_clunk(vofid);
+ else {
+ vofid->v_opens = 1;
+ filesize = np->inode.i_size;
+ vnode_create_vobject(vp, filesize, ap->a_td);
+ p9fs_fid_add(np, vofid, VOFID);
+ }
+
+ return (error);
+}
+
+/*
+ * Close the open references. Just reduce the open count on vofid and return.
+ * Let clunking of VOFID happen in p9fs_reclaim.
+ */
+static int
+p9fs_close(struct vop_close_args *ap)
+{
+ struct vnode *vp;
+ struct p9fs_node *np;
+ struct p9fs_session *vses;
+ struct p9_fid *vofid;
+ int error;
+
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+
+ if (np == NULL)
+ return (0);
+
+ vses = np->p9fs_ses;
+ error = 0;
+
+ P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);
+
+ /*
+ * Translate kernel fflags to 9p mode
+ */
+ vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
+ p9fs_uflags_mode(ap->a_fflag, 1), &error);
+ if (vofid == NULL)
+ return (0);
+
+ vofid->v_opens--;
+
+ return (0);
+}
+
+/* Helper routine for checking if fileops are possible on this file */
+static int
+p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
+{
+
+ /* Check if we are allowed to write */
+ switch (vap->va_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ /*
+ * Normal nodes: check if we're on a read-only mounted
+ * file system and bail out if we're trying to write.
+ */
+ if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
+ return (EROFS);
+ break;
+ case VBLK:
+ case VCHR:
+ case VSOCK:
+ case VFIFO:
+ /*
+ * Special nodes: even on read-only mounted file systems
+ * these are allowed to be written to if permissions allow.
+ */
+ break;
+ default:
+ /* No idea what this is */
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/* Check the access permissions of the file. */
+static int
+p9fs_access(struct vop_access_args *ap)
+{
+ struct vnode *vp;
+ accmode_t accmode;
+ struct ucred *cred;
+ struct vattr vap;
+ int error;
+
+ vp = ap->a_vp;
+ accmode = ap->a_accmode;
+ cred = ap->a_cred;
+
+ P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
+
+ /* make sure getattr is working correctly and is defined.*/
+ error = VOP_GETATTR(vp, &vap, cred);
+ if (error != 0)
+ return (error);
+
+ error = p9fs_check_possible(vp, &vap, accmode);
+ if (error != 0)
+ return (error);
+
+ /* Call the Generic Access check in VOPS*/
+ error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
+ cred);
+
+
+ return (error);
+}
+
+/*
+ * Reload the file stats from the server and update the inode structure present
+ * in p9fs node.
+ */
+int
+p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
+{
+ struct p9_stat_dotl *stat;
+ int error;
+ struct p9fs_node *node;
+ struct p9fs_session *vses;
+ struct p9_fid *vfid;
+
+ error = 0;
+ node = P9FS_VTON(vp);
+ vses = node->p9fs_ses;
+
+ vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
+ if (vfid == NULL) {
+ vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
+ if (error)
+ return (error);
+ }
+
+ stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);
+
+ error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
+ goto out;
+ }
+
+ /* Init the vnode with the disk info */
+ p9fs_stat_vnode_dotl(stat, vp);
+out:
+ if (stat != NULL) {
+ uma_zfree(p9fs_getattr_zone, stat);
+ }
+
+ return (error);
+}
+
+/*
+ * Read the current inode values into the vap attr. We reload the stats from
+ * the server.
+ */
+static int
+p9fs_getattr_dotl(struct vop_getattr_args *ap)
+{
+ struct vnode *vp;
+ struct vattr *vap;
+ struct p9fs_node *node;
+ struct p9fs_inode *inode;
+ int error;
+
+ vp = ap->a_vp;
+ vap = ap->a_vap;
+ node = P9FS_VTON(vp);
+
+ if (node == NULL)
+ return (ENOENT);
+
+ inode = &node->inode;
+
+ P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));
+
+ /* Reload our stats once to get the right values.*/
+ error = p9fs_reload_stats_dotl(vp, ap->a_cred);
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
+ return (error);
+ }
+
+ /* Basic info */
+ VATTR_NULL(vap);
+
+ vap->va_atime.tv_sec = inode->i_atime;
+ vap->va_mtime.tv_sec = inode->i_mtime;
+ vap->va_ctime.tv_sec = inode->i_ctime;
+ vap->va_atime.tv_nsec = inode->i_atime_nsec;
+ vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
+ vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
+ vap->va_type = IFTOVT(inode->i_mode);
+ vap->va_mode = inode->i_mode;
+ vap->va_uid = inode->n_uid;
+ vap->va_gid = inode->n_gid;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_size = inode->i_size;
+ vap->va_nlink = inode->i_links_count;
+ vap->va_blocksize = inode->blksize;
+ vap->va_fileid = inode->i_qid_path;
+ vap->va_flags = inode->i_flags;
+ vap->va_gen = inode->gen;
+ vap->va_filerev = inode->data_version;
+ vap->va_vaflags = 0;
+ vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;
+
+ return (0);
+}
+
+/* Convert a standard FreeBSD permission to P9. */
+static uint32_t
+p9fs_unix2p9_mode(uint32_t mode)
+{
+ uint32_t res;
+
+ res = mode & 0777;
+ if (S_ISDIR(mode))
+ res |= P9PROTO_DMDIR;
+ if (S_ISSOCK(mode))
+ res |= P9PROTO_DMSOCKET;
+ if (S_ISLNK(mode))
+ res |= P9PROTO_DMSYMLINK;
+ if (S_ISFIFO(mode))
+ res |= P9PROTO_DMNAMEDPIPE;
+ if ((mode & S_ISUID) == S_ISUID)
+ res |= P9PROTO_DMSETUID;
+ if ((mode & S_ISGID) == S_ISGID)
+ res |= P9PROTO_DMSETGID;
+ if ((mode & S_ISVTX) == S_ISVTX)
+ res |= P9PROTO_DMSETVTX;
+
+ return (res);
+}
+
+/* Update inode with the stats read from server.(9P2000.L version) */
+int
+p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
+{
+ struct p9fs_node *np;
+ struct p9fs_inode *inode;
+
+ np = P9FS_VTON(vp);
+ inode = &np->inode;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ /* Update the pager size if file size changes on host */
+ if (inode->i_size != stat->st_size) {
+ inode->i_size = stat->st_size;
+ if (vp->v_type == VREG)
+ vnode_pager_setsize(vp, inode->i_size);
+ }
+
+ inode->i_mtime = stat->st_mtime_sec;
+ inode->i_atime = stat->st_atime_sec;
+ inode->i_ctime = stat->st_ctime_sec;
+ inode->i_mtime_nsec = stat->st_mtime_nsec;
+ inode->i_atime_nsec = stat->st_atime_nsec;
+ inode->i_ctime_nsec = stat->st_ctime_nsec;
+ inode->n_uid = stat->st_uid;
+ inode->n_gid = stat->st_gid;
+ inode->i_mode = stat->st_mode;
+ vp->v_type = IFTOVT(inode->i_mode);
+ inode->i_links_count = stat->st_nlink;
+ inode->blksize = stat->st_blksize;
+ inode->blocks = stat->st_blocks;
+ inode->gen = stat->st_gen;
+ inode->data_version = stat->st_data_version;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ /* Setting a flag if file changes based on qid version */
+ if (np->vqid.qid_version != stat->qid.version)
+ np->flags |= P9FS_NODE_MODIFIED;
+ memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));
+
+ return (0);
+}
+
+/*
+ * Write the current in memory inode stats into persistent stats structure
+ * to write to the server(for linux version).
+ */
+static int
+p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
+{
+ p9attr->size = inode->i_size;
+ p9attr->mode = inode->i_mode;
+ p9attr->uid = inode->n_uid;
+ p9attr->gid = inode->n_gid;
+ p9attr->atime_sec = inode->i_atime;
+ p9attr->atime_nsec = inode->i_atime_nsec;
+ p9attr->mtime_sec = inode->i_mtime;
+ p9attr->mtime_nsec = inode->i_mtime_nsec;
+
+ return (0);
+}
+
+/*
+ * Modify the ownership of a file whenever the chown is called on the
+ * file.
+ */
+static int
+p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
+ struct thread *td)
+{
+ struct p9fs_node *np;
+ struct p9fs_inode *inode;
+ uid_t ouid;
+ gid_t ogid;
+ int error;
+
+ np = P9FS_VTON(vp);
+ inode = &np->inode;
+
+ if (uid == (uid_t)VNOVAL)
+ uid = inode->n_uid;
+ if (gid == (gid_t)VNOVAL)
+ gid = inode->n_gid;
+ /*
+ * To modify the ownership of a file, must possess VADMIN for that
+ * file.
+ */
+ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
+ return (error);
+ /*
+ * To change the owner of a file, or change the group of a file to a
+ * group of which we are not a member, the caller must have
+ * privilege.
+ */
+ if (((uid != inode->n_uid && uid != cred->cr_uid) ||
+ (gid != inode->n_gid && !groupmember(gid, cred))) &&
+ (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
+ return (error);
+
+ ogid = inode->n_gid;
+ ouid = inode->n_uid;
+
+ inode->n_gid = gid;
+ inode->n_uid = uid;
+
+ if ((inode->i_mode & (ISUID | ISGID)) &&
+ (ouid != uid || ogid != gid)) {
+
+ if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
+ inode->i_mode &= ~(ISUID | ISGID);
+ }
+ P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);
+
+ return (0);
+}
+
+/*
+ * Update the in memory inode with all chmod new permissions/mode. Typically a
+ * setattr is called to update it to server.
+ */
+static int
+p9fs_chmod(struct vnode *vp, uint32_t mode, struct ucred *cred, struct thread *td)
+{
+ struct p9fs_node *np;
+ struct p9fs_inode *inode;
+ uint32_t nmode;
+ int error;
+
+ np = P9FS_VTON(vp);
+ inode = &np->inode;
+
+ P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td);
+ /*
+ * To modify the permissions on a file, must possess VADMIN
+ * for that file.
+ */
+ if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+ return (error);
+
+ /*
+ * Privileged processes may set the sticky bit on non-directories,
+ * as well as set the setgid bit on a file with a group that the
+ * process is not a member of. Both of these are allowed in
+ * jail(8).
+ */
+ if (vp->v_type != VDIR && (mode & S_ISTXT)) {
+ if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
+ return (EFTYPE);
+ }
+ if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
+ error = priv_check_cred(cred, PRIV_VFS_SETGID);
+ if (error != 0)
+ return (error);
+ }
+
+ /*
+ * Deny setting setuid if we are not the file owner.
+ */
+ if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
+ error = priv_check_cred(cred, PRIV_VFS_ADMIN);
+ if (error != 0)
+ return (error);
+ }
+ nmode = inode->i_mode;
+ nmode &= ~ALLPERMS;
+ nmode |= (mode & ALLPERMS);
+ inode->i_mode = nmode;
+
+ P9_DEBUG(VOPS, "%s: to mode %x %d \n ", __func__, nmode, error);
+
+ return (error);
+}
+
+/*
+ * Set the attributes of a file referenced by fid. A valid bitmask is sent
+ * in request selecting which fields to set
+ */
+static int
+p9fs_setattr_dotl(struct vop_setattr_args *ap)
+{
+ struct vnode *vp;
+ struct vattr *vap;
+ struct p9fs_node *node;
+ struct p9fs_inode *inode;
+ struct ucred *cred;
+ struct thread *td;
+ struct p9_iattr_dotl *p9attr;
+ struct p9fs_session *vses;
+ struct p9_fid *vfid;
+ uint64_t oldfilesize;
+ int error;
+
+ vp = ap->a_vp;
+ vap = ap->a_vap;
+ node = P9FS_VTON(vp);
+ inode = &node->inode;
+ cred = ap->a_cred;
+ td = curthread;
+ vses = node->p9fs_ses;
+ error = 0;
+
+ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+ (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+ (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+ (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+ P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
+ return (EINVAL);
+ }
+ /* Disallow write attempts on read only filesystem */
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+
+ /* Setting of flags is not supported */
+ if (vap->va_flags != VNOVAL)
+ return (EOPNOTSUPP);
+
+ /* Allocate p9attr struct */
+ p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
+ if (p9attr == NULL)
+ return (ENOMEM);
+
+ /* Check if we need to change the ownership of the file*/
+ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+ P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
+ vp, td, vap->va_uid, vap->va_gid);
+
+ error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
+ p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
+ P9PROTO_SETATTR_MODE;
+ if (error)
+ goto out;
+ }
+
+ /* Check for mode changes */
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
+ vap->va_mode);
+
+ error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
+ p9attr->valid |= P9PROTO_SETATTR_MODE;
+ if (error)
+ goto out;
+ }
+
+ /* Update the size of the file and update mtime */
+ if (vap->va_size != (uint64_t)VNOVAL) {
+ P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
+ vp, td, (uintmax_t)vap->va_size);
+ switch (vp->v_type) {
+ case VDIR:
+ error = EISDIR;
+ goto out;
+ case VLNK:
+ case VREG:
+ /* Invalidate cached pages of vp */
+ error = vinvalbuf(vp, 0, 0, 0);
+ if (error)
+ goto out;
+ oldfilesize = inode->i_size;
+ inode->i_size = vap->va_size;
+ /* Update the p9fs_inode time */
+ p9fs_itimes(vp);
+ p9attr->valid |= P9PROTO_SETATTR_SIZE |
+ P9PROTO_SETATTR_ATIME |
+ P9PROTO_SETATTR_MTIME |
+ P9PROTO_SETATTR_ATIME_SET |
+ P9PROTO_SETATTR_MTIME_SET ;
+ break;
+ default:
+ goto out;
+ }
+ } else if (vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL) {
+ P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
+ __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
+ (uintmax_t)vap->va_mtime.tv_sec);
+ /* Update the p9fs_inode times */
+ p9fs_itimes(vp);
+ p9attr->valid |= P9PROTO_SETATTR_ATIME |
+ P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
+ P9PROTO_SETATTR_MTIME_SET;
+ }
+
+ vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
+ if (vfid == NULL) {
+ vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
+ if (error)
+ goto out;
+ }
+
+ /* Write the inode structure values into p9attr */
+ p9fs_inode_to_iattr(inode, p9attr);
+ error = p9_client_setattr(vfid, p9attr);
+ if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
+ if (error)
+ inode->i_size = oldfilesize;
+ else
+ vnode_pager_setsize(vp, inode->i_size);
+ }
+out:
+ if (p9attr) {
+ uma_zfree(p9fs_setattr_zone, p9attr);
+ }
+ P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
+ return (error);
+}
+
+struct open_fid_state {
+ struct p9_fid *vofid;
+ int fflags;
+ int opened;
+};
+
+/*
+ * TODO: change this to take P9PROTO_* mode and avoid routing through
+ * VOP_OPEN, factoring out implementation of p9fs_open.
+ */
+static int
+p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
+{
+ struct p9fs_node *np;
+ struct p9fs_session *vses;
+ struct p9_fid *vofid;
+ int mode = p9fs_uflags_mode(fflags, TRUE);
+ int error = 0;
+
+ statep->opened = FALSE;
+
+ np = P9FS_VTON(vp);
+ vses = np->p9fs_ses;
+ vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
+ if (vofid == NULL) {
+ error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
+ if (error) {
+ return (error);
+ }
+ vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
+ if (vofid == NULL) {
+ return (EBADF);
+ }
+ statep->fflags = fflags;
+ statep->opened = TRUE;
+ }
+ statep->vofid = vofid;
+ return (0);
+}
+
+static void
+p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
+{
+ if (statep->opened) {
+ (void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
+ }
+}
+
+/*
+ * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
+ * need to copy data into. As long as resid is greater than zero, we call
+ * client_read to read data from offset(offset into the file) in the open fid
+ * for the file into the I/O buffer. The data is read into the user data buffer.
+ */
+static int
+p9fs_read(struct vop_read_args *ap)
+{
+ struct vnode *vp;
+ struct uio *uio;
+ struct p9fs_node *np;
+ uint64_t offset;
+ int64_t ret;
+ uint64_t resid;
+ uint32_t count;
+ int error;
+ char *io_buffer = NULL;
+ uint64_t filesize;
+ struct open_fid_state ostate;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ np = P9FS_VTON(vp);
+ error = 0;
+
+ if (vp->v_type == VCHR || vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+ if (vp->v_type != VREG)
+ return (EISDIR);
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+
+ error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
+ if (error)
+ return (error);
+
+ /* where in the file are we to start reading */
+ offset = uio->uio_offset;
+ filesize = np->inode.i_size;
+ if (uio->uio_offset >= filesize)
+ goto out;
+
+ P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
+ __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);
+
+ /* Work with a local buffer from the pool for this vop */
+
+ io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
+ while ((resid = uio->uio_resid) > 0) {
+ if (offset >= filesize)
+ break;
+ count = MIN(filesize - uio->uio_offset , resid);
+ if (count == 0)
+ break;
+
+ /* Copy count bytes into the uio */
+ ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
+ /*
+ * This is the only place in the entire p9fs where we check the
+ * error for < 0 as p9_client_read/write return the number of
+ * bytes instead of an error code. In this case if ret is < 0,
+ * it means there is an IO error.
+ */
+ if (ret < 0) {
+ error = -ret;
+ goto out;
+ }
+ error = uiomove(io_buffer, ret, uio);
+ if (error != 0)
+ goto out;
+
+ offset += ret;
+ }
+ uio->uio_offset = offset;
+out:
+ uma_zfree(p9fs_io_buffer_zone, io_buffer);
+ p9fs_release_open_fid(vp, ap->a_cred, &ostate);
+
+ return (error);
+}
+
+/*
+ * The user buffer contains the data to be written. This data is copied first
+ * from uio into I/O buffer. This I/O buffer is used to do the client_write to
+ * the fid of the file starting from the offset given upto count bytes. The
+ * number of bytes written is returned to the caller.
+ */
+static int
+p9fs_write(struct vop_write_args *ap)
+{
+ struct vnode *vp;
+ struct uio *uio;
+ struct p9fs_node *np;
+ uint64_t off, offset;
+ int64_t ret;
+ uint64_t resid, bytes_written;
+ uint32_t count;
+ int error, ioflag;
+ uint64_t file_size;
+ char *io_buffer = NULL;
+ struct open_fid_state ostate;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ np = P9FS_VTON(vp);
+ error = 0;
+ ioflag = ap->a_ioflag;
+
+ error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
+ if (error)
+ return (error);
+
+ P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
+ __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
+
+ if (uio->uio_offset < 0) {
+ error = EINVAL;
+ goto out;
+ }
+ if (uio->uio_resid == 0)
+ goto out;
+
+ file_size = np->inode.i_size;
+
+ switch (vp->v_type) {
+ case VREG:
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = file_size;
+ break;
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ break;
+ default:
+ panic("%s: bad file type vp: %p", __func__, vp);
+ }
+
+ resid = uio->uio_resid;
+ offset = uio->uio_offset;
+ bytes_written = 0;
+ error = 0;
+
+ io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
+ while ((resid = uio->uio_resid) > 0) {
+ off = 0;
+ count = MIN(resid, P9FS_IOUNIT);
+ error = uiomove(io_buffer, count, uio);
+
+ if (error != 0) {
+ P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
+ goto out;
+ }
+
+ /* While count still exists, keep writing.*/
+ while (count > 0) {
+ /* Copy count bytes from the uio */
+ ret = p9_client_write(ostate.vofid, offset, count,
+ io_buffer + off);
+ if (ret < 0) {
+ if (bytes_written == 0) {
+ error = -ret;
+ goto out;
+ } else {
+ break;
+ }
+ }
+ P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
+ __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
+
+ off += ret;
+ offset += ret;
+ bytes_written += ret;
+ count -= ret;
+ }
+ }
+ /* Update the fields in the node to reflect the change*/
+ if (file_size < uio->uio_offset + uio->uio_resid) {
+ np->inode.i_size = uio->uio_offset + uio->uio_resid;
+ vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
+ }
+out:
+ if (io_buffer)
+ uma_zfree(p9fs_io_buffer_zone, io_buffer);
+ p9fs_release_open_fid(vp, ap->a_cred, &ostate);
+
+ return (error);
+}
+
+/*
+ * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
+ * client_remove op to send messages to remove the node's fid on the server.
+ * After that, does a node metadata cleanup on client side.
+ */
+static int
+remove_common(struct p9fs_node *dnp, struct p9fs_node *np, const char *name,
+ struct ucred *cred)
+{
+ int error;
+ struct p9fs_session *vses;
+ struct vnode *vp;
+ struct p9_fid *vfid;
+
+ error = 0;
+ vses = np->p9fs_ses;
+ vp = P9FS_NTOV(np);
+
+ vfid = p9fs_get_fid(vses->clnt, dnp, cred, VFID, -1, &error);
+ if (error != 0)
+ return (error);
+
+ error = p9_client_unlink(vfid, name,
+ np->v_node->v_type == VDIR ? P9PROTO_UNLINKAT_REMOVEDIR : 0);
+ if (error != 0)
+ return (error);
+
+ /* Remove all non-open fids associated with the vp */
+ if (np->inode.i_links_count == 1)
+ p9fs_fid_remove_all(np, TRUE);
+
+ /* Invalidate all entries of vnode from name cache and hash list. */
+ cache_purge(vp);
+ vfs_hash_remove(vp);
+
+ np->flags |= P9FS_NODE_DELETED;
+
+ return (error);
+}
+
+/* Remove vop for all files. Call common code for remove and adjust links */
+static int
+p9fs_remove(struct vop_remove_args *ap)
+{
+ struct vnode *vp;
+ struct p9fs_node *np;
+ struct vnode *dvp;
+ struct p9fs_node *dnp;
+ struct p9fs_inode *dinode;
+ struct componentname *cnp;
+ int error;
+
+ cnp = ap->a_cnp;
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+ dvp = ap->a_dvp;
+ dnp = P9FS_VTON(dvp);
+ dinode = &dnp->inode;
+
+ P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
+
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+
+ error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
+ if (error == 0)
+ P9FS_DECR_LINKS(dinode);
+
+ return (error);
+}
+
+/* Remove vop for all directories. Call common code for remove and adjust links */
+static int
+p9fs_rmdir(struct vop_rmdir_args *ap)
+{
+ struct vnode *vp;
+ struct p9fs_node *np;
+ struct vnode *dvp;
+ struct p9fs_node *dnp;
+ struct p9fs_inode *dinode;
+ struct componentname *cnp;
+ int error;
+
+ cnp = ap->a_cnp;
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+ dvp = ap->a_dvp;
+ dnp = P9FS_VTON(dvp);
+ dinode = &dnp->inode;
+
+ P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
+
+ error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
+ if (error == 0)
+ P9FS_DECR_LINKS(dinode);
+
+ return (error);
+}
+
+/*
+ * Create symlinks. Make the permissions and call create_common code
+ * for Soft links.
+ */
+static int
+p9fs_symlink(struct vop_symlink_args *ap)
+{
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct vattr *vap;
+ struct componentname *cnp;
+ char *symtgt;
+ struct p9fs_node *dnp;
+ struct p9fs_session *vses;
+ struct mount *mp;
+ struct p9_fid *dvfid, *newfid;
+ int error;
+ char tmpchr;
+ gid_t gid;
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ vap = ap->a_vap;
+ cnp = ap->a_cnp;
+ symtgt = (char*)(uintptr_t) ap->a_target;
+ dnp = P9FS_VTON(dvp);
+ vses = dnp->p9fs_ses;
+ mp = vses->p9fs_mount;
+ newfid = NULL;
+ error = 0;
+ gid = vap->va_gid;
+
+ P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
+
+ /*
+ * Save the character present at namelen in nameptr string and
+ * null terminate the character to get the search name for p9_dir_walk
+ */
+ tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
+ cnp->cn_nameptr[cnp->cn_namelen] = '\0';
+
+ dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+ if (error != 0)
+ goto out;
+
+ error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
+ if (error != 0)
+ goto out;
+
+ /*create vnode for symtgt */
+ newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
+ if (newfid != NULL) {
+ error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
+ dnp, newfid, vpp, cnp->cn_nameptr);
+ if (error != 0)
+ goto out;
+ } else
+ goto out;
+
+ if ((cnp->cn_flags & MAKEENTRY) != 0) {
+ cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
+ }
+ P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
+ __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
+
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+ return (error);
+
+out:
+ if (newfid != NULL)
+ p9_client_clunk(newfid);
+ cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
+ return (error);
+}
+
+/* Create hard link */
+static int
+p9fs_link(struct vop_link_args *ap)
+{
+ struct vnode *vp;
+ struct vnode *tdvp;
+ struct componentname *cnp;
+ struct p9fs_node *dnp;
+ struct p9fs_node *np;
+ struct p9fs_inode *inode;
+ struct p9fs_session *vses;
+ struct p9_fid *dvfid, *oldvfid;
+ int error;
+
+ vp = ap->a_vp;
+ tdvp = ap->a_tdvp;
+ cnp = ap->a_cnp;
+ dnp = P9FS_VTON(tdvp);
+ np = P9FS_VTON(vp);
+ inode = &np->inode;
+ vses = np->p9fs_ses;
+ error = 0;
+
+ P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);
+
+ dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
+ if (error != 0)
+ return (error);
+ oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
+ if (error != 0)
+ return (error);
+
+ error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
+ if (error != 0)
+ return (error);
+ /* Increment ref count on the inode */
+ P9FS_INCR_LINKS(inode);
+
+ return (0);
+}
+
+/* Read contents of the symbolic link */
+static int
+p9fs_readlink(struct vop_readlink_args *ap)
+{
+ struct vnode *vp;
+ struct uio *uio;
+ struct p9fs_node *dnp;
+ struct p9fs_session *vses;
+ struct p9_fid *dvfid;
+ int error, len;
+ char *target;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ dnp = P9FS_VTON(vp);
+ vses = dnp->p9fs_ses;
+ error = 0;
+
+ P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
+
+ dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
+ if (error != 0)
+ return (error);
+
+ error = p9_readlink(dvfid, &target);
+ if (error != 0)
+ return (error);
+
+ len = strlen(target);
+ error = uiomove(target, len, uio);
+
+ return (0);
+}
+
+/*
+ * Iterate through a directory. An entire 8k data is read into the I/O buffer.
+ * This buffer is parsed to make dir entries and fed to the user buffer to
+ * complete it to the VFS.
+ */
+static int
+p9fs_readdir(struct vop_readdir_args *ap)
+{
+ struct uio *uio;
+ struct vnode *vp;
+ struct dirent cde;
+ int64_t offset;
+ uint64_t diroffset;
+ struct p9fs_node *np;
+ int error;
+ int32_t count;
+ struct p9_client *clnt;
+ struct p9_dirent dent;
+ char *io_buffer;
+ struct p9_fid *vofid;
+
+ uio = ap->a_uio;
+ vp = ap->a_vp;
+ np = P9FS_VTON(ap->a_vp);
+ offset = 0;
+ diroffset = 0;
+ error = 0;
+ count = 0;
+ clnt = np->p9fs_ses->clnt;
+
+ P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);
+
+ if (ap->a_uio->uio_iov->iov_len <= 0)
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
+ if (vofid == NULL) {
+ P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
+ return (EBADF);
+ }
+
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 0;
+
+ io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
+
+ /* We haven't reached the end yet. read more. */
+ diroffset = uio->uio_offset;
+ while (uio->uio_resid >= sizeof(struct dirent)) {
+ /*
+ * We need to read more data as what is indicated by filesize because
+ * filesize is based on data stored in struct dirent structure but
+ * we read data in struct p9_dirent format which has different size.
+ * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
+ * it into struct dirent structure and send it back.
+ */
+ count = P9FS_IOUNIT;
+ bzero(io_buffer, P9FS_MTU);
+ count = p9_client_readdir(vofid, (char *)io_buffer,
+ diroffset, count);
+
+ if (count == 0) {
+ if (ap->a_eofflag != NULL)
+ *ap->a_eofflag = 1;
+ break;
+ }
+
+ if (count < 0) {
+ error = EIO;
+ goto out;
+ }
+
+ offset = 0;
+ while (offset + QEMU_DIRENTRY_SZ <= count) {
+
+ /*
+ * Read and make sense out of the buffer in one dirent
+ * This is part of 9p protocol read. This reads one p9_dirent,
+ * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
+ */
+ bzero(&dent, sizeof(dent));
+ offset = p9_dirent_read(clnt, io_buffer, offset, count,
+ &dent);
+ if (offset < 0 || offset > count) {
+ error = EIO;
+ goto out;
+ }
+
+ bzero(&cde, sizeof(cde));
+ strncpy(cde.d_name, dent.d_name, dent.len);
+ cde.d_fileno = dent.qid.path;
+ cde.d_type = dent.d_type;
+ cde.d_namlen = dent.len;
+ cde.d_reclen = GENERIC_DIRSIZ(&cde);
+
+ /*
+ * If there isn't enough space in the uio to return a
+ * whole dirent, break off read
+ */
+ if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
+ break;
+
+ /* Transfer */
+ error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
+ if (error != 0) {
+ error = EIO;
+ goto out;
+ }
+ diroffset = dent.d_off;
+ }
+ }
+ /* Pass on last transferred offset */
+ uio->uio_offset = diroffset;
+
+out:
+ uma_zfree(p9fs_io_buffer_zone, io_buffer);
+
+ return (error);
+}
+
+static void
+p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
+{
+ struct uio *uiov;
+ struct iovec io;
+ int error;
+ uint64_t off, offset;
+ uint64_t filesize;
+ uint64_t resid;
+ uint32_t count;
+ int64_t ret;
+ struct p9fs_node *np;
+ char *io_buffer;
+
+ error = 0;
+ np = P9FS_VTON(vp);
+
+ filesize = np->inode.i_size;
+ uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
+ uiov->uio_iov = &io;
+ uiov->uio_iovcnt = 1;
+ uiov->uio_segflg = UIO_SYSSPACE;
+ io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
+
+ if (bp->b_iocmd == BIO_READ) {
+ io.iov_len = uiov->uio_resid = bp->b_bcount;
+ io.iov_base = bp->b_data;
+ uiov->uio_rw = UIO_READ;
+
+ switch (vp->v_type) {
+
+ case VREG:
+ {
+ uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
+
+ if (uiov->uio_resid) {
+ int left = uiov->uio_resid;
+ int nread = bp->b_bcount - left;
+
+ if (left > 0)
+ bzero((char *)bp->b_data + nread, left);
+ }
+ /* where in the file are we to start reading */
+ offset = uiov->uio_offset;
+ if (uiov->uio_offset >= filesize)
+ goto out;
+
+ while ((resid = uiov->uio_resid) > 0) {
+ if (offset >= filesize)
+ break;
+ count = min(filesize - uiov->uio_offset, resid);
+ if (count == 0)
+ break;
+
+ P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
+ __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
+
+ /* Copy count bytes into the uio */
+ ret = p9_client_read(vofid, offset, count, io_buffer);
+ error = uiomove(io_buffer, ret, uiov);
+
+ if (error != 0)
+ goto out;
+ offset += ret;
+ }
+ break;
+ }
+ default:
+ printf("vfs: type %x unexpected\n", vp->v_type);
+ break;
+ }
+ } else {
+ if (bp->b_dirtyend > bp->b_dirtyoff) {
+ io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
+ uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
+ io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+ uiov->uio_rw = UIO_WRITE;
+
+ if (uiov->uio_offset < 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (uiov->uio_resid == 0)
+ goto out;
+
+ resid = uiov->uio_resid;
+ offset = uiov->uio_offset;
+ error = 0;
+
+ while ((resid = uiov->uio_resid) > 0) {
+ off = 0;
+ count = MIN(resid, P9FS_IOUNIT);
+ error = uiomove(io_buffer, count, uiov);
+ if (error != 0) {
+ goto out;
+ }
+
+ while (count > 0) {
+ /* Copy count bytes from the uio */
+ ret = p9_client_write(vofid, offset, count,
+ io_buffer + off);
+ if (ret < 0)
+ goto out;
+
+ P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
+ __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
+ off += ret;
+ offset += ret;
+ count -= ret;
+ }
+ }
+
+ /* Update the fields in the node to reflect the change */
+ if (filesize < uiov->uio_offset + uiov->uio_resid) {
+ np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
+ vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
+ /* update the modified timers. */
+ p9fs_itimes(vp);
+ }
+ } else {
+ bp->b_resid = 0;
+ goto out1;
+ }
+ }
+out:
+ /* Set the error */
+ if (error != 0) {
+ bp->b_error = error;
+ bp->b_ioflags |= BIO_ERROR;
+ }
+ bp->b_resid = uiov->uio_resid;
+out1:
+ bufdone(bp);
+ uma_zfree(p9fs_io_buffer_zone, io_buffer);
+ free(uiov, M_P9UIOV);
+}
+
+/*
+ * The I/O buffer is mapped to a uio and a client_write/client_read is performed
+ * the same way as p9fs_read and p9fs_write.
+ */
+static int
+p9fs_strategy(struct vop_strategy_args *ap)
+{
+ struct vnode *vp;
+ struct buf *bp;
+ struct ucred *cr;
+ int error;
+ struct open_fid_state ostate;
+
+ vp = ap->a_vp;
+ bp = ap->a_bp;
+ error = 0;
+
+ P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);
+
+ if (bp->b_iocmd == BIO_READ)
+ cr = bp->b_rcred;
+ else
+ cr = bp->b_wcred;
+
+ error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
+ if (error) {
+ P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
+ bp->b_error = error;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ return (0);
+ }
+
+ p9fs_doio(vp, bp, ostate.vofid, cr);
+ p9fs_release_open_fid(vp, cr, &ostate);
+
+ return (0);
+}
+
+/* Rename a file */
+static int
+p9fs_rename(struct vop_rename_args *ap)
+{
+ struct vnode *tvp;
+ struct vnode *tdvp;
+ struct vnode *fvp;
+ struct vnode *fdvp;
+ struct componentname *tcnp;
+ struct componentname *fcnp;
+ struct p9fs_node *tdnode;
+ struct p9fs_node *fdnode;
+ struct p9fs_inode *fdinode;
+ struct p9fs_node *fnode;
+ struct p9fs_inode *finode;
+ struct p9fs_session *vses;
+ struct p9fs_node *tnode;
+ struct p9fs_inode *tinode;
+ struct p9_fid *olddirvfid, *newdirvfid ;
+ int error;
+
+ tvp = ap->a_tvp;
+ tdvp = ap->a_tdvp;
+ fvp = ap->a_fvp;
+ fdvp = ap->a_fdvp;
+ tcnp = ap->a_tcnp;
+ fcnp = ap->a_fcnp;
+ tdnode = P9FS_VTON(tdvp);
+ fdnode = P9FS_VTON(fdvp);
+ fdinode = &fdnode->inode;
+ fnode = P9FS_VTON(fvp);
+ finode = &fnode->inode;
+ vses = fnode->p9fs_ses;
+ error = 0;
+
+ P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);
+
+ /* Check for cross mount operation */
+ if (fvp->v_mount != tdvp->v_mount ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out;
+ }
+
+ /* warning if you are renaming to the same name */
+ if (fvp == tvp)
+ error = 0;
+
+ olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
+ if (error != 0)
+ goto out;
+ newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
+ if (error != 0)
+ goto out;
+
+ error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
+ if (error != 0)
+ goto out;
+
+ /*
+ * decrement the link count on the "from" file whose name is going
+ * to be changed if its a directory
+ */
+ if (fvp->v_type == VDIR) {
+ if (tvp && tvp->v_type == VDIR)
+ cache_purge(tdvp);
+ P9FS_DECR_LINKS(fdinode);
+ cache_purge(fdvp);
+ }
+
+ /* Taking exclusive lock on the from node before decrementing the link count */
+ if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
+ goto out;
+ P9FS_DECR_LINKS(finode);
+ VOP_UNLOCK(fvp);
+
+ if (tvp) {
+ tnode = P9FS_VTON(tvp);
+ tinode = &tnode->inode;
+ P9FS_DECR_LINKS(tinode);
+ }
+
+out:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ return (error);
+}
+
+/*
+ * Put VM pages, synchronously.
+ * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
+ */
+static int
+p9fs_putpages(struct vop_putpages_args *ap)
+{
+ struct uio uio;
+ struct iovec iov;
+ int i, error, npages, count;
+ off_t offset;
+ int *rtvals;
+ struct vnode *vp;
+ struct thread *td;
+ struct ucred *cred;
+ struct p9fs_node *np;
+ vm_page_t *pages;
+ vm_offset_t kva;
+ struct buf *bp;
+
+ vp = ap->a_vp;
+ np = P9FS_VTON(vp);
+ td = curthread;
+ cred = curthread->td_ucred;
+ pages = ap->a_m;
+ count = ap->a_count;
+ rtvals = ap->a_rtvals;
+ npages = btoc(count);
+ offset = IDX_TO_OFF(pages[0]->pindex);
+
+ /*
+ * When putting pages, do not extend file past EOF.
+ */
+ if (offset + count > np->inode.i_size) {
+ count = np->inode.i_size - offset;
+ if (count < 0)
+ count = 0;
+ }
+
+ for (i = 0; i < npages; i++)
+ rtvals[i] = VM_PAGER_ERROR;
+
+ bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
+ kva = (vm_offset_t) bp->b_data;
+ pmap_qenter(kva, pages, npages);
+
+ VM_CNT_INC(v_vnodeout);
+ VM_CNT_ADD(v_vnodepgsout, count);
+
+ iov.iov_base = (caddr_t) kva;
+ iov.iov_len = count;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = offset;
+ uio.uio_resid = count;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_td = td;
+
+ P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);
+
+ error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
+ cred);
+
+ pmap_qremove(kva, npages);
+ uma_zfree(p9fs_pbuf_zone, bp);
+
+ if (error == 0)
+ vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
+ np->inode.i_size - offset, npages * PAGE_SIZE);
+
+ return (rtvals[0]);
+}
+
+struct vop_vector p9fs_vnops = {
+ .vop_default = &default_vnodeops,
+ .vop_lookup = p9fs_lookup,
+ .vop_open = p9fs_open,
+ .vop_close = p9fs_close,
+ .vop_access = p9fs_access,
+ .vop_getattr = p9fs_getattr_dotl,
+ .vop_setattr = p9fs_setattr_dotl,
+ .vop_reclaim = p9fs_reclaim,
+ .vop_inactive = p9fs_inactive,
+ .vop_readdir = p9fs_readdir,
+ .vop_create = p9fs_create,
+ .vop_mknod = p9fs_mknod,
+ .vop_read = p9fs_read,
+ .vop_write = p9fs_write,
+ .vop_remove = p9fs_remove,
+ .vop_mkdir = p9fs_mkdir,
+ .vop_rmdir = p9fs_rmdir,
+ .vop_strategy = p9fs_strategy,
+ .vop_symlink = p9fs_symlink,
+ .vop_rename = p9fs_rename,
+ .vop_link = p9fs_link,
+ .vop_readlink = p9fs_readlink,
+ .vop_putpages = p9fs_putpages,
+};
+VFS_VOP_VECTOR_REGISTER(p9fs_vnops);
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
index 6ef725ee0ee7..0020b8f8a8d8 100644
--- a/sys/fs/procfs/procfs_mem.c
+++ b/sys/fs/procfs/procfs_mem.c
@@ -41,6 +41,7 @@
#include <sys/ptrace.h>
#include <sys/systm.h>
#include <sys/uio.h>
+#include <sys/priv.h>
#include <fs/pseudofs/pseudofs.h>
#include <fs/procfs/procfs.h>
diff --git a/sys/fs/procfs/procfs_osrel.c b/sys/fs/procfs/procfs_osrel.c
index fd6a4d7e0eea..0102090de4da 100644
--- a/sys/fs/procfs/procfs_osrel.c
+++ b/sys/fs/procfs/procfs_osrel.c
@@ -45,9 +45,11 @@ procfs_doosrel(PFS_FILL_ARGS)
if (uio == NULL)
return (EOPNOTSUPP);
- if (uio->uio_rw == UIO_READ) {
+ switch (uio->uio_rw) {
+ case UIO_READ:
sbuf_printf(sb, "%d\n", p->p_osrel);
- } else {
+ break;
+ case UIO_WRITE:
sbuf_trim(sb);
sbuf_finish(sb);
pp = sbuf_data(sb);
@@ -62,6 +64,7 @@ procfs_doosrel(PFS_FILL_ARGS)
osrel = ov;
}
p->p_osrel = osrel;
+ break;
}
return (0);
}
diff --git a/sys/fs/procfs/procfs_rlimit.c b/sys/fs/procfs/procfs_rlimit.c
index 83e11f44b3f8..6be933ac6e44 100644
--- a/sys/fs/procfs/procfs_rlimit.c
+++ b/sys/fs/procfs/procfs_rlimit.c
@@ -57,6 +57,9 @@
#include <fs/pseudofs/pseudofs.h>
#include <fs/procfs/procfs.h>
+_Static_assert(nitems(rlimit_ident) == RLIM_NLIMITS,
+ "resource.h RLIMIT_IDENT needs update");
+
int
procfs_doprocrlimit(PFS_FILL_ARGS)
{
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
index 9c2f42a45102..38070e0946bb 100644
--- a/sys/fs/procfs/procfs_status.c
+++ b/sys/fs/procfs/procfs_status.c
@@ -61,6 +61,7 @@
int
procfs_doprocstatus(PFS_FILL_ARGS)
{
+ struct timeval start, ut, st;
struct session *sess;
struct thread *tdfirst;
struct tty *tp;
@@ -121,21 +122,16 @@ procfs_doprocstatus(PFS_FILL_ARGS)
wmesg = "nochan";
thread_unlock(tdfirst);
- if (p->p_flag & P_INMEM) {
- struct timeval start, ut, st;
-
- PROC_STATLOCK(p);
- calcru(p, &ut, &st);
- PROC_STATUNLOCK(p);
- start = p->p_stats->p_start;
- getboottime(&boottime);
- timevaladd(&start, &boottime);
- sbuf_printf(sb, " %jd,%ld %jd,%ld %jd,%ld",
- (intmax_t)start.tv_sec, start.tv_usec,
- (intmax_t)ut.tv_sec, ut.tv_usec,
- (intmax_t)st.tv_sec, st.tv_usec);
- } else
- sbuf_printf(sb, " -1,-1 -1,-1 -1,-1");
+ PROC_STATLOCK(p);
+ calcru(p, &ut, &st);
+ PROC_STATUNLOCK(p);
+ start = p->p_stats->p_start;
+ getboottime(&boottime);
+ timevaladd(&start, &boottime);
+ sbuf_printf(sb, " %jd,%ld %jd,%ld %jd,%ld",
+ (intmax_t)start.tv_sec, start.tv_usec,
+ (intmax_t)ut.tv_sec, ut.tv_usec,
+ (intmax_t)st.tv_sec, st.tv_usec);
sbuf_printf(sb, " %s", wmesg);
diff --git a/sys/fs/pseudofs/pseudofs.c b/sys/fs/pseudofs/pseudofs.c
index eb4ca8a82456..ef45f96a6192 100644
--- a/sys/fs/pseudofs/pseudofs.c
+++ b/sys/fs/pseudofs/pseudofs.c
@@ -98,12 +98,10 @@ pfs_alloc_node(struct pfs_info *pi, const char *name, pfs_type_t type)
/*
* Add a node to a directory
*/
-static void
+static int
pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
{
-#ifdef INVARIANTS
struct pfs_node *iter;
-#endif
KASSERT(parent != NULL,
("%s(): parent is NULL", __func__));
@@ -123,8 +121,6 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
KASSERT(iter->pn_type != pfstype_procdir,
("%s(): nested process directories", __func__));
for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
- KASSERT(strcmp(pn->pn_name, iter->pn_name) != 0,
- ("%s(): homonymous siblings", __func__));
if (pn->pn_type == pfstype_procdir)
KASSERT(iter->pn_type != pfstype_procdir,
("%s(): sibling process directories", __func__));
@@ -133,8 +129,19 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
pn->pn_parent = parent;
pfs_fileno_alloc(pn);
-
pfs_lock(parent);
+ for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) {
+ if (strcmp(pn->pn_name, iter->pn_name) != 0)
+ continue;
+ printf("pfs_add_node: homonymous siblings: '%s/%s' type %d",
+ parent->pn_name, pn->pn_name, pn->pn_type);
+ /* Do not detach, because we are not yet attached. */
+ pn->pn_parent = NULL;
+ pfs_unlock(parent);
+ return (EEXIST);
+ }
+
+
if ((parent->pn_flags & PFS_PROCDEP) != 0)
pn->pn_flags |= PFS_PROCDEP;
if (parent->pn_nodes == NULL) {
@@ -151,10 +158,11 @@ pfs_add_node(struct pfs_node *parent, struct pfs_node *pn)
parent->pn_last_node = pn;
}
pfs_unlock(parent);
+ return (0);
}
/*
- * Detach a node from its aprent
+ * Detach a node from its parent
*/
static void
pfs_detach_node(struct pfs_node *pn)
@@ -196,6 +204,7 @@ static int
pfs_fixup_dir_flags(struct pfs_node *parent, int flags)
{
struct pfs_node *dot, *dotdot;
+ int rc;
dot = pfs_alloc_node_flags(parent->pn_info, ".", pfstype_this, flags);
if (dot == NULL)
@@ -205,9 +214,14 @@ pfs_fixup_dir_flags(struct pfs_node *parent, int flags)
pfs_destroy(dot);
return (ENOMEM);
}
- pfs_add_node(parent, dot);
- pfs_add_node(parent, dotdot);
- return (0);
+ rc = pfs_add_node(parent, dot);
+ if (rc == 0)
+ rc = pfs_add_node(parent, dotdot);
+ if (rc != 0) {
+ pfs_destroy(dot);
+ pfs_destroy(dotdot);
+ }
+ return (rc);
}
static void
@@ -236,11 +250,12 @@ pfs_create_dir(struct pfs_node *parent, const char *name,
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- pfs_add_node(parent, pn);
- rc = pfs_fixup_dir_flags(pn, flags);
- if (rc) {
+ rc = pfs_add_node(parent, pn);
+ if (rc == 0)
+ rc = pfs_fixup_dir_flags(pn, flags);
+ if (rc != 0) {
pfs_destroy(pn);
- return (NULL);
+ pn = NULL;
}
return (pn);
}
@@ -263,8 +278,10 @@ pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill,
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- pfs_add_node(parent, pn);
-
+ if (pfs_add_node(parent, pn) != 0) {
+ pfs_destroy(pn);
+ pn = NULL;
+ }
return (pn);
}
@@ -286,7 +303,10 @@ pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill,
pn->pn_vis = vis;
pn->pn_destroy = destroy;
pn->pn_flags = flags;
- pfs_add_node(parent, pn);
+ if (pfs_add_node(parent, pn) != 0) {
+ pfs_destroy(pn);
+ pn = NULL;
+ }
return (pn);
}
diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c
index 324f38abd10e..35454998fc8e 100644
--- a/sys/fs/smbfs/smbfs_io.c
+++ b/sys/fs/smbfs/smbfs_io.c
@@ -629,7 +629,7 @@ smbfs_vinvalbuf(struct vnode *vp, struct thread *td)
while (np->n_flag & NFLUSHINPROG) {
np->n_flag |= NFLUSHWANT;
- error = tsleep(&np->n_flag, PRIBIO + 2, "smfsvinv", 2 * hz);
+ error = tsleep(&np->n_flag, PRIBIO, "smfsvinv", 2 * hz);
error = smb_td_intr(td);
if (error == EINTR)
return EINTR;
diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c
index 1e7dcafb1121..5d412cabadb8 100644
--- a/sys/fs/smbfs/smbfs_vnops.c
+++ b/sys/fs/smbfs/smbfs_vnops.c
@@ -810,6 +810,9 @@ smbfs_pathconf(struct vop_pathconf_args *ap)
case _PC_NO_TRUNC:
*retval = 1;
break;
+ case _PC_HAS_HIDDENSYSTEM:
+ *retval = 1;
+ break;
default:
error = vop_stdpathconf(ap);
}
@@ -1051,7 +1054,7 @@ smbfs_lookup(struct vop_lookup_args *ap)
struct smbfattr fattr, *fap;
struct smb_cred *scred;
char *name = cnp->cn_nameptr;
- int flags = cnp->cn_flags;
+ uint64_t flags = cnp->cn_flags;
int nameiop = cnp->cn_nameiop;
int nmlen = cnp->cn_namelen;
int error, islastcn, isdot;
diff --git a/sys/fs/tarfs/tarfs.h b/sys/fs/tarfs/tarfs.h
index ff1985e488cd..46fa8b55b3ad 100644
--- a/sys/fs/tarfs/tarfs.h
+++ b/sys/fs/tarfs/tarfs.h
@@ -74,7 +74,7 @@ struct tarfs_node {
struct timespec mtime;
struct timespec ctime;
struct timespec birthtime;
- unsigned long gen;
+ uint32_t gen;
/* Block map */
size_t nblk;
@@ -161,10 +161,9 @@ struct tarfs_zio {
};
struct tarfs_fid {
- u_short len; /* length of data in bytes */
- u_short data0; /* force alignment */
- ino_t ino;
- unsigned long gen;
+ u_short len; /* length of data in bytes */
+ uint32_t gen;
+ ino_t ino;
};
#define TARFS_NODE_LOCK(tnp) \
diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c
index 8c97fab185fc..afb8e05f5929 100644
--- a/sys/fs/tarfs/tarfs_vnops.c
+++ b/sys/fs/tarfs/tarfs_vnops.c
@@ -668,6 +668,8 @@ tarfs_vptofh(struct vop_vptofh_args *ap)
{
struct tarfs_fid *tfp;
struct tarfs_node *tnp;
+ _Static_assert(sizeof(struct tarfs_fid) <= sizeof(struct fid),
+ "struct tarfs_fid cannot be larger than struct fid");
tfp = (struct tarfs_fid *)ap->a_fhp;
tnp = VP_TO_TARFS_NODE(ap->a_vp);
diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index c28f3a02a7bf..52307cc7c7b2 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -292,6 +292,15 @@ struct tmpfs_node {
*/
off_t tn_readdir_lastn;
struct tmpfs_dirent * tn_readdir_lastp;
+
+ /*
+ * Total size of whiteout directory entries. This
+ * must be a multiple of sizeof(struct tmpfs_dirent)
+ * and is used to determine whether a directory is
+ * empty (excluding whiteout entries) during rename/
+ * rmdir operations.
+ */
+ off_t tn_wht_size; /* (v) */
} tn_dir;
/* Valid when tn_type == VLNK. */
@@ -439,11 +448,10 @@ struct tmpfs_mount {
* NFS code.
*/
struct tmpfs_fid_data {
+ unsigned short tfd_len;
ino_t tfd_id;
unsigned long tfd_gen;
-};
-_Static_assert(sizeof(struct tmpfs_fid_data) <= MAXFIDSZ,
- "(struct tmpfs_fid_data) is larger than (struct fid).fid_data");
+} __packed;
struct tmpfs_dir_cursor {
struct tmpfs_dirent *tdc_current;
@@ -484,6 +492,7 @@ int tmpfs_dir_getdents(struct tmpfs_mount *, struct tmpfs_node *,
struct uio *, int, uint64_t *, int *);
int tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);
void tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);
+void tmpfs_dir_clear_whiteouts(struct vnode *);
int tmpfs_reg_resize(struct vnode *, off_t, boolean_t);
int tmpfs_reg_punch_hole(struct vnode *vp, off_t *, off_t *);
int tmpfs_chflags(struct vnode *, u_long, struct ucred *, struct thread *);
@@ -533,6 +542,8 @@ tmpfs_update(struct vnode *vp)
#define TMPFS_VALIDATE_DIR(node) do { \
MPASS((node)->tn_type == VDIR); \
MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
+ MPASS((node)->tn_dir.tn_wht_size % sizeof(struct tmpfs_dirent) == 0); \
+ MPASS((node)->tn_dir.tn_wht_size <= (node)->tn_size); \
} while (0)
/*
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 9bdcc4575511..1237f6b92cdb 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -120,7 +120,7 @@ tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old,
/*
* Forced unmount?
*/
- if (vp == NULL) {
+ if (vp == NULL || vp->v_object == NULL) {
KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
("object %p with OBJ_TMPFS_VREF but without vnode",
object));
@@ -183,6 +183,9 @@ tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start,
KASSERT((object->flags & OBJ_ANON) == 0,
("%s: object %p with OBJ_ANON", __func__, object));
old = object->un_pager.swp.writemappings;
+ KASSERT(old >= (vm_ooffset_t)end - start,
+ ("tmpfs obj %p writecount %jx dec %jx", object, (uintmax_t)old,
+ (uintmax_t)((vm_ooffset_t)end - start)));
object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
new = object->un_pager.swp.writemappings;
tmpfs_pager_writecount_recalc(object, old, new);
@@ -346,7 +349,7 @@ tmpfs_node_init(void *mem, int size, int flags)
node = mem;
node->tn_id = 0;
- mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF);
+ mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF | MTX_NEW);
node->tn_gen = arc4random();
return (0);
}
@@ -425,7 +428,7 @@ sysctl_mem_percent(SYSCTL_HANDLER_ARGS)
if ((unsigned) percent > 100)
return (EINVAL);
- *(long *)arg1 = percent;
+ *(int *)arg1 = percent;
tmpfs_set_reserve_from_percent();
return (0);
}
@@ -440,7 +443,7 @@ tmpfs_set_reserve_from_percent(void)
}
SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_percent,
- CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_mem_percent, 0,
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, &tmpfs_mem_percent, 0,
sysctl_mem_percent, "I",
"Percent of available memory that can be used if no size limit");
@@ -490,50 +493,11 @@ static int
tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
int end, boolean_t ignerr)
{
- vm_page_t m;
- int rv, error;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- KASSERT(base >= 0, ("%s: base %d", __func__, base));
- KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
- end));
- error = 0;
-
-retry:
- m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
- if (m != NULL) {
- MPASS(vm_page_all_valid(m));
- } else if (vm_pager_has_page(object, idx, NULL, NULL)) {
- m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL |
- VM_ALLOC_WAITFAIL);
- if (m == NULL)
- goto retry;
- vm_object_pip_add(object, 1);
- VM_OBJECT_WUNLOCK(object);
- rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
- VM_OBJECT_WLOCK(object);
- vm_object_pip_wakeup(object);
- if (rv == VM_PAGER_OK) {
- /*
- * Since the page was not resident, and therefore not
- * recently accessed, immediately enqueue it for
- * asynchronous laundering. The current operation is
- * not regarded as an access.
- */
- vm_page_launder(m);
- } else {
- vm_page_free(m);
- m = NULL;
- if (!ignerr)
- error = EIO;
- }
- }
- if (m != NULL) {
- pmap_zero_page_area(m, base, end - base);
- vm_page_set_dirty(m);
- vm_page_xunbusy(m);
- }
+ int error;
+ error = vm_page_grab_zero_partial(object, idx, base, end);
+ if (ignerr)
+ error = 0;
return (error);
}
@@ -643,6 +607,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype)
nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
nnode->tn_dir.tn_readdir_lastn = 0;
nnode->tn_dir.tn_readdir_lastp = NULL;
+ nnode->tn_dir.tn_wht_size = 0;
nnode->tn_links++;
TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
nnode->tn_dir.tn_parent->tn_links++;
@@ -954,6 +919,8 @@ tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj)
VM_OBJECT_WLOCK(obj);
VI_LOCK(vp);
+ vp->v_object = NULL;
+
/*
* May be going through forced unmount.
*/
@@ -1094,15 +1061,19 @@ loop:
KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
("%s: object %p with OBJ_TMPFS_VREF but without vnode",
__func__, object));
- KASSERT(object->un_pager.swp.writemappings == 0,
- ("%s: object %p has writemappings",
- __func__, object));
VI_LOCK(vp);
KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
vp->v_object = object;
vn_irflag_set_locked(vp, (tm->tm_pgread ? VIRF_PGREAD : 0) |
VIRF_TEXT_REF);
VI_UNLOCK(vp);
+ VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
+ ("leaked OBJ_TMPFS_VREF"));
+ if (object->un_pager.swp.writemappings > 0) {
+ vrefact(vp);
+ vlazy(vp);
+ vm_object_set_flag(object, OBJ_TMPFS_VREF);
+ }
VM_OBJECT_WUNLOCK(object);
break;
case VDIR:
@@ -1822,13 +1793,16 @@ int
tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
{
struct tmpfs_dirent *de;
+ struct tmpfs_node *dnode;
int error;
error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
cnp->cn_nameptr, cnp->cn_namelen, &de);
if (error != 0)
return (error);
+ dnode = VP_TO_TMPFS_DIR(dvp);
tmpfs_dir_attach(dvp, de);
+ dnode->tn_dir.tn_wht_size += sizeof(*de);
return (0);
}
@@ -1836,14 +1810,44 @@ void
tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
{
struct tmpfs_dirent *de;
+ struct tmpfs_node *dnode;
- de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
+ dnode = VP_TO_TMPFS_DIR(dvp);
+ de = tmpfs_dir_lookup(dnode, NULL, cnp);
MPASS(de != NULL && de->td_node == NULL);
+ MPASS(dnode->tn_dir.tn_wht_size >= sizeof(*de));
+ dnode->tn_dir.tn_wht_size -= sizeof(*de);
tmpfs_dir_detach(dvp, de);
tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
}
/*
+ * Frees any dirents still associated with the directory represented
+ * by dvp in preparation for the removal of the directory. This is
+ * required when removing a directory which contains only whiteout
+ * entries.
+ */
+void
+tmpfs_dir_clear_whiteouts(struct vnode *dvp)
+{
+ struct tmpfs_dir_cursor dc;
+ struct tmpfs_dirent *de;
+ struct tmpfs_node *dnode;
+
+ dnode = VP_TO_TMPFS_DIR(dvp);
+
+ while ((de = tmpfs_dir_first(dnode, &dc)) != NULL) {
+ KASSERT(de->td_node == NULL, ("%s: non-whiteout dirent %p",
+ __func__, de));
+ dnode->tn_dir.tn_wht_size -= sizeof(*de);
+ tmpfs_dir_detach(dvp, de);
+ tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
+ }
+ MPASS(dnode->tn_size == 0);
+ MPASS(dnode->tn_dir.tn_wht_size == 0);
+}
+
+/*
* Resizes the aobj associated with the regular file pointed to by 'vp' to the
* size 'newsize'. 'vp' must point to a vnode that represents a regular file.
* 'newsize' must be positive.
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index 32eb9c958df1..431893b77bb9 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -208,7 +208,7 @@ again:
continue;
}
vm = vmspace_acquire_ref(p);
- _PHOLD_LITE(p);
+ _PHOLD(p);
PROC_UNLOCK(p);
if (vm == NULL) {
PRELE(p);
@@ -585,29 +585,25 @@ static int
tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags,
struct vnode **vpp)
{
- struct tmpfs_fid_data tfd;
+ struct tmpfs_fid_data *tfd;
struct tmpfs_mount *tmp;
struct tmpfs_node *node;
int error;
- if (fhp->fid_len != sizeof(tfd))
+ if (fhp->fid_len != sizeof(*tfd))
return (EINVAL);
- /*
- * Copy from fid_data onto the stack to avoid unaligned pointer use.
- * See the comment in sys/mount.h on struct fid for details.
- */
- memcpy(&tfd, fhp->fid_data, fhp->fid_len);
+ tfd = (struct tmpfs_fid_data *)fhp;
tmp = VFS_TO_TMPFS(mp);
- if (tfd.tfd_id >= tmp->tm_nodes_max)
+ if (tfd->tfd_id >= tmp->tm_nodes_max)
return (EINVAL);
TMPFS_LOCK(tmp);
LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
- if (node->tn_id == tfd.tfd_id &&
- node->tn_gen == tfd.tfd_gen) {
+ if (node->tn_id == tfd->tfd_id &&
+ node->tn_gen == tfd->tfd_gen) {
tmpfs_ref_node(node);
break;
}
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 718cfef6bfa3..9d2a587b177a 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -476,6 +476,7 @@ tmpfs_stat(struct vop_stat_args *v)
sb->st_blksize = PAGE_SIZE;
sb->st_flags = node->tn_flags;
sb->st_gen = node->tn_gen;
+ sb->st_filerev = 0;
if (vp->v_type == VREG) {
#ifdef __ILP32__
vm_object_t obj = node->tn_reg.tn_aobj;
@@ -1078,7 +1079,9 @@ tmpfs_rename(struct vop_rename_args *v)
}
if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
- if (tnode->tn_size > 0) {
+ if (tnode->tn_size != 0 &&
+ ((tcnp->cn_flags & IGNOREWHITEOUT) == 0 ||
+ tnode->tn_size > tnode->tn_dir.tn_wht_size)) {
error = ENOTEMPTY;
goto out_locked;
}
@@ -1239,6 +1242,16 @@ tmpfs_rename(struct vop_rename_args *v)
tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
tmpfs_dir_detach(tdvp, tde);
+ /*
+ * If we are overwriting a directory, per the ENOTEMPTY check
+ * above it must either be empty or contain only whiteout
+ * entries. In the latter case (which can only happen if
+ * IGNOREWHITEOUT was passed in tcnp->cn_flags), clear the
+ * whiteout entries to avoid leaking memory.
+ */
+ if (tnode->tn_type == VDIR && tnode->tn_size > 0)
+ tmpfs_dir_clear_whiteouts(tvp);
+
/* Update node's ctime because of possible hardlinks. */
tnode->tn_status |= TMPFS_NODE_CHANGED;
tmpfs_update(tvp);
@@ -1309,6 +1322,7 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
{
struct vnode *dvp = v->a_dvp;
struct vnode *vp = v->a_vp;
+ struct componentname *cnp = v->a_cnp;
int error;
struct tmpfs_dirent *de;
@@ -1320,13 +1334,18 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
dnode = VP_TO_TMPFS_DIR(dvp);
node = VP_TO_TMPFS_DIR(vp);
- /* Directories with more than two entries ('.' and '..') cannot be
- * removed. */
- if (node->tn_size > 0) {
- error = ENOTEMPTY;
- goto out;
- }
+ /*
+ * Directories with more than two non-whiteout entries ('.' and '..')
+ * cannot be removed.
+ */
+ if (node->tn_size != 0 &&
+ ((cnp->cn_flags & IGNOREWHITEOUT) == 0 ||
+ node->tn_size > node->tn_dir.tn_wht_size)) {
+ error = ENOTEMPTY;
+ goto out;
+ }
+ /* Check flags to see if we are allowed to remove the directory. */
if ((dnode->tn_flags & APPEND)
|| (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
error = EPERM;
@@ -1334,27 +1353,31 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
}
/* This invariant holds only if we are not trying to remove "..".
- * We checked for that above so this is safe now. */
+ * We checked for that above so this is safe now. */
MPASS(node->tn_dir.tn_parent == dnode);
/* Get the directory entry associated with node (vp). This was
* filled by tmpfs_lookup while looking up the entry. */
- de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
+ de = tmpfs_dir_lookup(dnode, node, cnp);
MPASS(TMPFS_DIRENT_MATCHES(de,
- v->a_cnp->cn_nameptr,
- v->a_cnp->cn_namelen));
-
- /* Check flags to see if we are allowed to remove the directory. */
- if ((dnode->tn_flags & APPEND) != 0 ||
- (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
- error = EPERM;
- goto out;
- }
+ cnp->cn_nameptr,
+ cnp->cn_namelen));
/* Detach the directory entry from the directory (dnode). */
tmpfs_dir_detach(dvp, de);
- if (v->a_cnp->cn_flags & DOWHITEOUT)
- tmpfs_dir_whiteout_add(dvp, v->a_cnp);
+
+ /*
+ * If we are removing a directory, per the ENOTEMPTY check above it
+ * must either be empty or contain only whiteout entries. In the
+ * latter case (which can only happen if IGNOREWHITEOUT was passed
+ * in cnp->cn_flags), clear the whiteout entries to avoid leaking
+ * memory.
+ */
+ if (node->tn_size > 0)
+ tmpfs_dir_clear_whiteouts(vp);
+
+ if (cnp->cn_flags & DOWHITEOUT)
+ tmpfs_dir_whiteout_add(dvp, cnp);
/* No vnode should be allocated for this entry from this point */
TMPFS_NODE_LOCK(node);
@@ -1668,6 +1691,10 @@ tmpfs_pathconf(struct vop_pathconf_args *v)
*retval = PAGE_SIZE;
break;
+ case _PC_HAS_HIDDENSYSTEM:
+ *retval = 1;
+ break;
+
default:
error = vop_stdpathconf(v);
}
@@ -1684,21 +1711,15 @@ vop_vptofh {
};
*/
{
- struct tmpfs_fid_data tfd;
+ struct tmpfs_fid_data *const tfd = (struct tmpfs_fid_data *)ap->a_fhp;
struct tmpfs_node *node;
- struct fid *fhp;
+ _Static_assert(sizeof(struct tmpfs_fid_data) <= sizeof(struct fid),
+ "struct tmpfs_fid_data cannot be larger than struct fid");
node = VP_TO_TMPFS_NODE(ap->a_vp);
- fhp = ap->a_fhp;
- fhp->fid_len = sizeof(tfd);
-
- /*
- * Copy into fid_data from the stack to avoid unaligned pointer use.
- * See the comment in sys/mount.h on struct fid for details.
- */
- tfd.tfd_id = node->tn_id;
- tfd.tfd_gen = node->tn_gen;
- memcpy(fhp->fid_data, &tfd, fhp->fid_len);
+ tfd->tfd_len = sizeof(*tfd);
+ tfd->tfd_gen = node->tn_gen;
+ tfd->tfd_id = node->tn_id;
return (0);
}
@@ -2070,31 +2091,10 @@ tmpfs_setextattr(struct vop_setextattr_args *ap)
static off_t
tmpfs_seek_data_locked(vm_object_t obj, off_t noff)
{
- vm_page_t m;
- vm_pindex_t p, p_m, p_swp;
-
- p = OFF_TO_IDX(noff);
- m = vm_page_find_least(obj, p);
-
- /*
- * Microoptimize the most common case for SEEK_DATA, where
- * there is no hole and the page is resident.
- */
- if (m != NULL && vm_page_any_valid(m) && m->pindex == p)
- return (noff);
-
- p_swp = swap_pager_find_least(obj, p);
- if (p_swp == p)
- return (noff);
-
- p_m = m == NULL ? obj->size : m->pindex;
- return (IDX_TO_OFF(MIN(p_m, p_swp)));
-}
+ vm_pindex_t p;
-static off_t
-tmpfs_seek_next(off_t noff)
-{
- return (noff + PAGE_SIZE - (noff & PAGE_MASK));
+ p = swap_pager_seek_data(obj, OFF_TO_IDX(noff));
+ return (p == OFF_TO_IDX(noff) ? noff : IDX_TO_OFF(p));
}
static int
@@ -2111,30 +2111,8 @@ tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata)
static off_t
tmpfs_seek_hole_locked(vm_object_t obj, off_t noff)
{
- vm_page_t m;
- vm_pindex_t p, p_swp;
-
- for (;; noff = tmpfs_seek_next(noff)) {
- /*
- * Walk over the largest sequential run of the valid pages.
- */
- for (m = vm_page_lookup(obj, OFF_TO_IDX(noff));
- m != NULL && vm_page_any_valid(m);
- m = vm_page_next(m), noff = tmpfs_seek_next(noff))
- ;
- /*
- * Found a hole in the object's page queue. Check if
- * there is a hole in the swap at the same place.
- */
- p = OFF_TO_IDX(noff);
- p_swp = swap_pager_find_least(obj, p);
- if (p_swp != p) {
- noff = IDX_TO_OFF(p);
- break;
- }
- }
- return (noff);
+ return (IDX_TO_OFF(swap_pager_seek_hole(obj, OFF_TO_IDX(noff))));
}
static int
diff --git a/sys/fs/udf/ecma167-udf.h b/sys/fs/udf/ecma167-udf.h
index 839bbec08254..19e114763cac 100644
--- a/sys/fs/udf/ecma167-udf.h
+++ b/sys/fs/udf/ecma167-udf.h
@@ -243,7 +243,7 @@ struct part_map_spare {
uint8_t n_st; /* Number of Sparing Tables */
uint8_t reserved1;
uint32_t st_size;
- uint32_t st_loc[1];
+ uint32_t st_loc[];
} __packed;
union udf_pmap {
@@ -266,7 +266,7 @@ struct udf_sparing_table {
uint16_t rt_l; /* Relocation Table len */
uint8_t reserved[2];
uint32_t seq_num;
- struct spare_map_entry entries[1];
+ struct spare_map_entry entries[];
} __packed;
/* Partition Descriptor [3/10.5] */
diff --git a/sys/fs/udf/udf_vfsops.c b/sys/fs/udf/udf_vfsops.c
index 866d0172f745..c5ef1f686093 100644
--- a/sys/fs/udf/udf_vfsops.c
+++ b/sys/fs/udf/udf_vfsops.c
@@ -32,7 +32,7 @@
/*
* Ok, here's how it goes. The UDF specs are pretty clear on how each data
* structure is made up, but not very clear on how they relate to each other.
- * Here is the skinny... This demostrates a filesystem with one file in the
+ * Here is the skinny... This demonstrates a filesystem with one file in the
* root directory. Subdirectories are treated just as normal files, but they
* have File Id Descriptors of their children as their file data. As for the
* Anchor Volume Descriptor Pointer, it can exist in two of the following three
@@ -81,6 +81,7 @@
#include <sys/fcntl.h>
#include <sys/iconv.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/namei.h>
@@ -729,7 +730,7 @@ udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
struct ifid *ifhp;
struct vnode *nvp;
struct udf_node *np;
- off_t fsize;
+ uint64_t fsize;
int error;
ifhp = (struct ifid *)fhp;
@@ -741,6 +742,10 @@ udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
np = VTON(nvp);
fsize = le64toh(np->fentry->inf_len);
+ if (fsize > OFF_MAX) {
+ *vpp = NULLVP;
+ return (EIO);
+ }
*vpp = nvp;
vnode_create_vobject(*vpp, fsize, curthread);
diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c
index f230ca0c72fa..37889241e8c3 100644
--- a/sys/fs/udf/udf_vnops.c
+++ b/sys/fs/udf/udf_vnops.c
@@ -39,6 +39,7 @@
#include <sys/conf.h>
#include <sys/buf.h>
#include <sys/iconv.h>
+#include <sys/limits.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/dirent.h>
@@ -182,11 +183,14 @@ udf_access(struct vop_access_args *a)
}
static int
-udf_open(struct vop_open_args *ap) {
+udf_open(struct vop_open_args *ap)
+{
struct udf_node *np = VTON(ap->a_vp);
- off_t fsize;
+ uint64_t fsize;
fsize = le64toh(np->fentry->inf_len);
+ if (fsize > OFF_MAX)
+ return (EIO);
vnode_create_vobject(ap->a_vp, fsize, ap->a_td);
return 0;
}
@@ -314,12 +318,13 @@ udf_getattr(struct vop_getattr_args *a)
* that directories consume at least one logical block,
* make it appear so.
*/
- if (fentry->logblks_rec != 0) {
- vap->va_size =
- le64toh(fentry->logblks_rec) * node->udfmp->bsize;
- } else {
+ vap->va_size = le64toh(fentry->logblks_rec);
+ if (vap->va_size == 0)
vap->va_size = node->udfmp->bsize;
- }
+ else if (vap->va_size > UINT64_MAX / node->udfmp->bsize)
+ vap->va_size = UINT64_MAX;
+ else
+ vap->va_size *= node->udfmp->bsize;
} else {
vap->va_size = le64toh(fentry->inf_len);
}
@@ -446,6 +451,7 @@ udf_read(struct vop_read_args *ap)
struct buf *bp;
uint8_t *data;
daddr_t lbn, rablock;
+ uint64_t len;
off_t diff, fsize;
ssize_t n;
int error = 0;
@@ -471,7 +477,12 @@ udf_read(struct vop_read_args *ap)
return (error);
}
- fsize = le64toh(node->fentry->inf_len);
+ len = le64toh(node->fentry->inf_len);
+ if (len > OFF_MAX) {
+ /* too big, just cap to the requested length */
+ len = uio->uio_resid;
+ }
+ fsize = len;
udfmp = node->udfmp;
do {
lbn = lblkno(udfmp, uio->uio_offset);
@@ -783,6 +794,7 @@ udf_readdir(struct vop_readdir_args *a)
struct udf_uiodir uiodir;
struct udf_dirstream *ds;
uint64_t *cookies = NULL;
+ uint64_t len;
int ncookies;
int error = 0;
@@ -800,8 +812,6 @@ udf_readdir(struct vop_readdir_args *a)
*/
ncookies = uio->uio_resid / 8;
cookies = malloc(sizeof(*cookies) * ncookies, M_TEMP, M_WAITOK);
- if (cookies == NULL)
- return (ENOMEM);
uiodir.ncookies = ncookies;
uiodir.cookies = cookies;
uiodir.acookies = 0;
@@ -813,8 +823,12 @@ udf_readdir(struct vop_readdir_args *a)
* Iterate through the file id descriptors. Give the parent dir
* entry special attention.
*/
- ds = udf_opendir(node, uio->uio_offset, le64toh(node->fentry->inf_len),
- node->udfmp);
+ len = le64toh(node->fentry->inf_len);
+ if (len > INT_MAX) {
+ /* too big, just cap to INT_MAX */
+ len = INT_MAX;
+ }
+ ds = udf_opendir(node, uio->uio_offset, len, node->udfmp);
while ((fid = udf_getfid(ds)) != NULL) {
/* XXX Should we return an error on a bad fid? */
@@ -906,7 +920,8 @@ udf_readlink(struct vop_readlink_args *ap)
struct udf_node *node;
void *buf;
char *cp;
- int error, len, root;
+ uint64_t len;
+ int error, root;
/*
* A symbolic link in UDF is a list of variable-length path
@@ -916,6 +931,8 @@ udf_readlink(struct vop_readlink_args *ap)
vp = ap->a_vp;
node = VTON(vp);
len = le64toh(node->fentry->inf_len);
+ if (len > MAXPATHLEN)
+ return (EIO);
buf = malloc(len, M_DEVBUF, M_WAITOK);
iov[0].iov_len = len;
iov[0].iov_base = buf;
@@ -1118,13 +1135,14 @@ udf_lookup(struct vop_cachedlookup_args *a)
struct udf_mnt *udfmp;
struct fileid_desc *fid = NULL;
struct udf_dirstream *ds;
+ uint64_t fsize;
u_long nameiop;
u_long flags;
char *nameptr;
long namelen;
ino_t id = 0;
int offset, error = 0;
- int fsize, lkflags, ltype, numdirpasses;
+ int lkflags, ltype, numdirpasses;
dvp = a->a_dvp;
node = VTON(dvp);
@@ -1135,6 +1153,10 @@ udf_lookup(struct vop_cachedlookup_args *a)
nameptr = a->a_cnp->cn_nameptr;
namelen = a->a_cnp->cn_namelen;
fsize = le64toh(node->fentry->inf_len);
+ if (fsize > INT_MAX) {
+ /* too big, just cap to INT_MAX */
+ fsize = INT_MAX;
+ }
/*
* If this is a LOOKUP and we've already partially searched through
@@ -1276,6 +1298,8 @@ udf_vptofh(struct vop_vptofh_args *a)
{
struct udf_node *node;
struct ifid *ifhp;
+ _Static_assert(sizeof(struct ifid) <= sizeof(struct fid),
+ "struct ifid cannot be larger than struct fid");
node = VTON(a->a_vp);
ifhp = (struct ifid *)a->a_fhp;
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
index 467db3b29ff8..0bd1894a2195 100644
--- a/sys/fs/unionfs/union.h
+++ b/sys/fs/unionfs/union.h
@@ -97,15 +97,17 @@ struct unionfs_node {
char *un_path; /* path */
int un_pathlen; /* strlen of path */
- int un_flag; /* unionfs node flag */
-};
-/*
- * unionfs node flags
- * It needs the vnode with exclusive lock, when changing the un_flag variable.
- */
-#define UNIONFS_OPENEXTL 0x01 /* openextattr (lower) */
-#define UNIONFS_OPENEXTU 0x02 /* openextattr (upper) */
+ /*
+ * unionfs node flags
+ * Changing these flags requires the vnode to be locked exclusive.
+ */
+ #define UNIONFS_OPENEXTL 0x01 /* openextattr (lower) */
+ #define UNIONFS_OPENEXTU 0x02 /* openextattr (upper) */
+ #define UNIONFS_COPY_IN_PROGRESS 0x04 /* copy/dir shadow in progres */
+ #define UNIONFS_LOOKUP_IN_PROGRESS 0x08
+ unsigned int un_flag; /* unionfs node flag */
+};
extern struct vop_vector unionfs_vnodeops;
@@ -131,34 +133,32 @@ int unionfs_uninit(struct vfsconf *);
int unionfs_nodeget(struct mount *, struct vnode *, struct vnode *,
struct vnode *, struct vnode **, struct componentname *);
void unionfs_noderem(struct vnode *);
+struct unionfs_node_status * unionfs_find_node_status(struct unionfs_node *,
+ struct thread *td);
void unionfs_get_node_status(struct unionfs_node *, struct thread *,
struct unionfs_node_status **);
void unionfs_tryrem_node_status(struct unionfs_node *,
struct unionfs_node_status *);
int unionfs_check_rmdir(struct vnode *, struct ucred *, struct thread *td);
-int unionfs_copyfile(struct unionfs_node *, int, struct ucred *,
+int unionfs_copyfile(struct vnode *, int, struct ucred *,
struct thread *);
void unionfs_create_uppervattr_core(struct unionfs_mount *, struct vattr *,
struct vattr *, struct thread *);
int unionfs_create_uppervattr(struct unionfs_mount *, struct vnode *,
struct vattr *, struct ucred *, struct thread *);
-int unionfs_mkshadowdir(struct unionfs_mount *, struct vnode *,
- struct unionfs_node *, struct componentname *, struct thread *);
+int unionfs_mkshadowdir(struct vnode *, struct vnode *,
+ struct componentname *, struct thread *);
int unionfs_mkwhiteout(struct vnode *, struct vnode *,
struct componentname *, struct thread *, char *, int);
int unionfs_relookup(struct vnode *, struct vnode **,
struct componentname *, struct componentname *, struct thread *,
char *, int, u_long);
-int unionfs_relookup_for_create(struct vnode *, struct componentname *,
- struct thread *);
-int unionfs_relookup_for_delete(struct vnode *, struct componentname *,
- struct thread *);
-int unionfs_relookup_for_rename(struct vnode *, struct componentname *,
- struct thread *);
void unionfs_forward_vop_start_pair(struct vnode *, int *,
struct vnode *, int *);
bool unionfs_forward_vop_finish_pair(struct vnode *, struct vnode *, int,
struct vnode *, struct vnode *, int);
+int unionfs_set_in_progress_flag(struct vnode *, unsigned int);
+void unionfs_clear_in_progress_flag(struct vnode *, unsigned int);
static inline void
unionfs_forward_vop_start(struct vnode *basevp, int *lkflags)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index bb57f3d56ade..edcc6716b674 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -203,19 +203,19 @@ unionfs_ins_cached_vnode(struct unionfs_node *uncp,
struct unionfs_node_hashhead *hd;
struct vnode *vp;
- ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
- ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
- KASSERT(uncp->un_uppervp == NULLVP || uncp->un_uppervp->v_type == VDIR,
- ("%s: v_type != VDIR", __func__));
- KASSERT(uncp->un_lowervp == NULLVP || uncp->un_lowervp->v_type == VDIR,
- ("%s: v_type != VDIR", __func__));
-
vp = NULLVP;
VI_LOCK(dvp);
- if (uncp->un_uppervp != NULL)
+ if (uncp->un_uppervp != NULLVP) {
+ ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
+ KASSERT(uncp->un_uppervp->v_type == VDIR,
+ ("%s: v_type != VDIR", __func__));
vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp);
- else if (uncp->un_lowervp != NULL)
+ } else if (uncp->un_lowervp != NULLVP) {
+ ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
+ KASSERT(uncp->un_lowervp->v_type == VDIR,
+ ("%s: v_type != VDIR", __func__));
vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp);
+ }
if (vp == NULLVP) {
hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ?
uncp->un_uppervp : uncp->un_lowervp));
@@ -276,9 +276,11 @@ unionfs_nodeget_cleanup(struct vnode *vp, struct unionfs_node *unp)
if (unp->un_dvp != NULLVP)
vrele(unp->un_dvp);
- if (unp->un_uppervp != NULLVP)
+ if (unp->un_uppervp != NULLVP) {
vput(unp->un_uppervp);
- if (unp->un_lowervp != NULLVP)
+ if (unp->un_lowervp != NULLVP)
+ vrele(unp->un_lowervp);
+ } else if (unp->un_lowervp != NULLVP)
vput(unp->un_lowervp);
if (unp->un_hashtbl != NULL)
hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
@@ -314,7 +316,7 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
*vpp = NULLVP;
if (uppervp == NULLVP && lowervp == NULLVP)
- panic("%s: upper and lower is null", __func__);
+ panic("%s: upper and lower are both null", __func__);
vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
@@ -327,7 +329,9 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp);
if (vp != NULLVP) {
*vpp = vp;
- goto unionfs_nodeget_out;
+ if (lkflags != 0)
+ vn_lock(*vpp, lkflags | LK_RETRY);
+ return (0);
}
}
@@ -385,27 +389,47 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
KASSERT(dvp != NULL || (vp->v_vflag & VV_ROOT) != 0,
("%s: NULL dvp for non-root vp %p", __func__, vp));
- vn_lock_pair(lowervp, false, LK_EXCLUSIVE, uppervp, false,
- LK_EXCLUSIVE);
+
+ /*
+ * NOTE: There is still a possibility for cross-filesystem locking here.
+ * If dvp has an upper FS component and is locked, while the new vnode
+ * created here only has a lower-layer FS component, then we will end
+ * up taking a lower-FS lock while holding an upper-FS lock.
+ * That situation could be dealt with here using vn_lock_pair().
+ * However, that would only address one instance out of many in which
+ * a child vnode lock is taken while holding a lock on its parent
+ * directory. This is done in many places in common VFS code, as well as
+ * a few places within unionfs (which could lead to the same cross-FS
+ * locking issue if, for example, the upper FS is another nested unionfs
+ * instance). Additionally, it is unclear under what circumstances this
+ * specific lock sequence (a directory on one FS followed by a child of
+ * its 'peer' directory on another FS) would present the practical
+ * possibility of deadlock due to some other agent on the system
+ * attempting to lock those two specific vnodes in the opposite order.
+ */
+ if (uppervp != NULLVP)
+ vn_lock(uppervp, LK_EXCLUSIVE | LK_RETRY);
+ else
+ vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
error = insmntque1(vp, mp);
if (error != 0) {
unionfs_nodeget_cleanup(vp, unp);
return (error);
}
- if (lowervp != NULL && VN_IS_DOOMED(lowervp)) {
- vput(lowervp);
- unp->un_lowervp = lowervp = NULL;
- }
- if (uppervp != NULL && VN_IS_DOOMED(uppervp)) {
- vput(uppervp);
- unp->un_uppervp = uppervp = NULL;
- if (lowervp != NULLVP)
- vp->v_vnlock = lowervp->v_vnlock;
- }
- if (lowervp == NULL && uppervp == NULL) {
- unionfs_nodeget_cleanup(vp, unp);
- return (ENOENT);
- }
+ /*
+ * lowervp and uppervp should only be doomed by a forced unmount of
+ * their respective filesystems, but that can only happen if the
+ * unionfs instance is first unmounted. We also effectively hold the
+ * lock on the new unionfs vnode at this point. Therefore, if a
+ * unionfs umount has not yet reached the point at which the above
+ * insmntque1() would fail, then its vflush() call will end up
+ * blocked on our vnode lock, effectively also preventing unmount
+ * of the underlying filesystems.
+ */
+ VNASSERT(lowervp == NULLVP || !VN_IS_DOOMED(lowervp), vp,
+ ("%s: doomed lowervp %p", __func__, lowervp));
+ VNASSERT(uppervp == NULLVP || !VN_IS_DOOMED(uppervp), vp,
+ ("%s: doomed lowervp %p", __func__, uppervp));
vn_set_state(vp, VSTATE_CONSTRUCTED);
@@ -413,18 +437,16 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
*vpp = unionfs_ins_cached_vnode(unp, dvp);
if (*vpp != NULLVP) {
unionfs_nodeget_cleanup(vp, unp);
- vp = *vpp;
- } else {
- if (uppervp != NULL)
- VOP_UNLOCK(uppervp);
- if (lowervp != NULL)
- VOP_UNLOCK(lowervp);
+ if (lkflags != 0)
+ vn_lock(*vpp, lkflags | LK_RETRY);
+ return (0);
+ } else
*vpp = vp;
- }
-unionfs_nodeget_out:
- if (lkflags & LK_TYPE_MASK)
- vn_lock(vp, lkflags | LK_RETRY);
+ if ((lkflags & LK_SHARED) != 0)
+ vn_lock(vp, LK_DOWNGRADE);
+ else if ((lkflags & LK_EXCLUSIVE) == 0)
+ VOP_UNLOCK(vp);
return (0);
}
@@ -443,6 +465,7 @@ unionfs_noderem(struct vnode *vp)
struct vnode *dvp;
int count;
int writerefs;
+ bool unlock_lvp;
/*
* The root vnode lock may be recursed during unmount, because
@@ -455,18 +478,36 @@ unionfs_noderem(struct vnode *vp)
*/
KASSERT(vp->v_vnlock->lk_recurse == 0 || (vp->v_vflag & VV_ROOT) != 0,
("%s: vnode %p locked recursively", __func__, vp));
+
+ unp = VTOUNIONFS(vp);
+ VNASSERT(unp != NULL, vp, ("%s: already reclaimed", __func__));
+ lvp = unp->un_lowervp;
+ uvp = unp->un_uppervp;
+ dvp = unp->un_dvp;
+ unlock_lvp = (uvp == NULLVP);
+
+ /*
+ * Lock the lower vnode in addition to the upper vnode lock in order
+ * to synchronize against any unionfs_lock() operation which may still
+ * hold the lower vnode lock. We do not need to do this for the root
+ * vnode, as the root vnode should always have both upper and lower
+ * base vnodes for its entire lifecycled, so unionfs_lock() should
+ * never attempt to lock its lower vnode in the first place.
+ * Moreover, during unmount of a non-"below" unionfs mount, the lower
+ * root vnode will already be locked as it is the covered vnode.
+ */
+ if (uvp != NULLVP && lvp != NULLVP && (vp->v_vflag & VV_ROOT) == 0) {
+ vn_lock_pair(uvp, true, LK_EXCLUSIVE, lvp, false, LK_EXCLUSIVE);
+ unlock_lvp = true;
+ }
+
if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
panic("%s: failed to acquire lock for vnode lock", __func__);
-
/*
* Use the interlock to protect the clearing of v_data to
* prevent faults in unionfs_lock().
*/
VI_LOCK(vp);
- unp = VTOUNIONFS(vp);
- lvp = unp->un_lowervp;
- uvp = unp->un_uppervp;
- dvp = unp->un_dvp;
unp->un_lowervp = unp->un_uppervp = NULLVP;
vp->v_vnlock = &(vp->v_lock);
vp->v_data = NULL;
@@ -502,18 +543,16 @@ unionfs_noderem(struct vnode *vp)
("%s: write reference without upper vnode", __func__));
VOP_ADD_WRITECOUNT(uvp, -writerefs);
}
- if (lvp != NULLVP)
- VOP_UNLOCK(lvp);
if (uvp != NULLVP)
- VOP_UNLOCK(uvp);
+ vput(uvp);
+ if (unlock_lvp)
+ vput(lvp);
+ else if (lvp != NULLVP)
+ vrele(lvp);
if (dvp != NULLVP)
unionfs_rem_cached_vnode(unp, dvp);
- if (lvp != NULLVP)
- vrele(lvp);
- if (uvp != NULLVP)
- vrele(uvp);
if (unp->un_path != NULL) {
free(unp->un_path, M_UNIONFSPATH);
unp->un_path = NULL;
@@ -539,35 +578,52 @@ unionfs_noderem(struct vnode *vp)
}
/*
- * Get the unionfs node status object for the vnode corresponding to unp,
- * for the process that owns td. Allocate a new status object if one
- * does not already exist.
+ * Find the unionfs node status object for the vnode corresponding to unp,
+ * for the process that owns td. Return NULL if no such object exists.
*/
-void
-unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
- struct unionfs_node_status **unspp)
+struct unionfs_node_status *
+unionfs_find_node_status(struct unionfs_node *unp, struct thread *td)
{
struct unionfs_node_status *unsp;
pid_t pid;
pid = td->td_proc->p_pid;
- KASSERT(NULL != unspp, ("%s: NULL status", __func__));
ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
if (unsp->uns_pid == pid) {
- *unspp = unsp;
- return;
+ return (unsp);
}
}
- /* create a new unionfs node status */
- unsp = malloc(sizeof(struct unionfs_node_status),
- M_TEMP, M_WAITOK | M_ZERO);
+ return (NULL);
+}
+
+/*
+ * Get the unionfs node status object for the vnode corresponding to unp,
+ * for the process that owns td. Allocate a new status object if one
+ * does not already exist.
+ */
+void
+unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
+ struct unionfs_node_status **unspp)
+{
+ struct unionfs_node_status *unsp;
+ pid_t pid;
+
+ pid = td->td_proc->p_pid;
- unsp->uns_pid = pid;
- LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
+ KASSERT(NULL != unspp, ("%s: NULL status", __func__));
+ unsp = unionfs_find_node_status(unp, td);
+ if (unsp == NULL) {
+ /* create a new unionfs node status */
+ unsp = malloc(sizeof(struct unionfs_node_status),
+ M_TEMP, M_WAITOK | M_ZERO);
+
+ unsp->uns_pid = pid;
+ LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
+ }
*unspp = unsp;
}
@@ -697,110 +753,6 @@ unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
}
/*
- * relookup for CREATE namei operation.
- *
- * dvp is unionfs vnode. dvp should be locked.
- *
- * If it called 'unionfs_copyfile' function by unionfs_link etc,
- * VOP_LOOKUP information is broken.
- * So it need relookup in order to create link etc.
- */
-int
-unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
- struct thread *td)
-{
- struct vnode *udvp;
- struct vnode *vp;
- struct componentname cn;
- int error;
-
- udvp = UNIONFSVPTOUPPERVP(dvp);
- vp = NULLVP;
-
- error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
- cnp->cn_namelen, CREATE);
- if (error)
- return (error);
-
- if (vp != NULLVP) {
- if (udvp == vp)
- vrele(vp);
- else
- vput(vp);
-
- error = EEXIST;
- }
-
- return (error);
-}
-
-/*
- * relookup for DELETE namei operation.
- *
- * dvp is unionfs vnode. dvp should be locked.
- */
-int
-unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
- struct thread *td)
-{
- struct vnode *udvp;
- struct vnode *vp;
- struct componentname cn;
- int error;
-
- udvp = UNIONFSVPTOUPPERVP(dvp);
- vp = NULLVP;
-
- error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
- cnp->cn_namelen, DELETE);
- if (error)
- return (error);
-
- if (vp == NULLVP)
- error = ENOENT;
- else {
- if (udvp == vp)
- vrele(vp);
- else
- vput(vp);
- }
-
- return (error);
-}
-
-/*
- * relookup for RENAME namei operation.
- *
- * dvp is unionfs vnode. dvp should be locked.
- */
-int
-unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
- struct thread *td)
-{
- struct vnode *udvp;
- struct vnode *vp;
- struct componentname cn;
- int error;
-
- udvp = UNIONFSVPTOUPPERVP(dvp);
- vp = NULLVP;
-
- error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
- cnp->cn_namelen, RENAME);
- if (error)
- return (error);
-
- if (vp != NULLVP) {
- if (udvp == vp)
- vrele(vp);
- else
- vput(vp);
- }
-
- return (error);
-}
-
-/*
* Update the unionfs_node.
*
* uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
@@ -836,6 +788,8 @@ unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
vp->v_vnlock = uvp->v_vnlock;
VI_UNLOCK(vp);
+ for (count = 0; count < lockrec + 1; count++)
+ VOP_UNLOCK(lvp);
/*
* Re-cache the unionfs vnode against the upper vnode
*/
@@ -851,18 +805,87 @@ unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
}
/*
+ * Mark a unionfs operation as being in progress, sleeping if the
+ * same operation is already in progress.
+ * This is useful, for example, during copy-up operations in which
+ * we may drop the target vnode lock, but we want to avoid the
+ * possibility of a concurrent copy-up on the same vnode triggering
+ * a spurious failure.
+ */
+int
+unionfs_set_in_progress_flag(struct vnode *vp, unsigned int flag)
+{
+ struct unionfs_node *unp;
+ int error;
+
+ error = 0;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+ VI_LOCK(vp);
+ unp = VTOUNIONFS(vp);
+ while (error == 0 && (unp->un_flag & flag) != 0) {
+ VOP_UNLOCK(vp);
+ error = msleep(vp, VI_MTX(vp), PCATCH | PDROP, "unioncp", 0);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ VI_LOCK(vp);
+ if (error == 0) {
+ /*
+ * If we waited on a concurrent copy-up and that
+ * copy-up was successful, return a non-fatal
+ * indication that the desired operation is already
+ * complete. If we waited on a concurrent lookup,
+ * return ERELOOKUP to indicate the VFS cache should
+ * be re-queried to avoid creating a duplicate unionfs
+ * vnode.
+ */
+ unp = VTOUNIONFS(vp);
+ if (unp == NULL)
+ error = ENOENT;
+ else if (flag == UNIONFS_COPY_IN_PROGRESS &&
+ unp->un_uppervp != NULLVP)
+ error = EJUSTRETURN;
+ else if (flag == UNIONFS_LOOKUP_IN_PROGRESS)
+ error = ERELOOKUP;
+ }
+ }
+ if (error == 0)
+ unp->un_flag |= flag;
+ VI_UNLOCK(vp);
+
+ return (error);
+}
+
+void
+unionfs_clear_in_progress_flag(struct vnode *vp, unsigned int flag)
+{
+ struct unionfs_node *unp;
+
+ ASSERT_VOP_ELOCKED(vp, __func__);
+ unp = VTOUNIONFS(vp);
+ VI_LOCK(vp);
+ if (unp != NULL) {
+ VNASSERT((unp->un_flag & flag) != 0, vp,
+ ("%s: copy not in progress", __func__));
+ unp->un_flag &= ~flag;
+ }
+ wakeup(vp);
+ VI_UNLOCK(vp);
+}
+
+/*
* Create a new shadow dir.
*
- * udvp should be locked on entry and will be locked on return.
+ * dvp and vp are unionfs vnodes representing a parent directory and
+ * child file, should be locked on entry, and will be locked on return.
*
* If no error returned, unp will be updated.
*/
int
-unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
- struct unionfs_node *unp, struct componentname *cnp, struct thread *td)
+unionfs_mkshadowdir(struct vnode *dvp, struct vnode *vp,
+ struct componentname *cnp, struct thread *td)
{
struct vnode *lvp;
struct vnode *uvp;
+ struct vnode *udvp;
struct vattr va;
struct vattr lva;
struct nameidata nd;
@@ -870,10 +893,25 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
struct ucred *cred;
struct ucred *credbk;
struct uidinfo *rootinfo;
+ struct unionfs_mount *ump;
+ struct unionfs_node *dunp;
+ struct unionfs_node *unp;
int error;
+ ASSERT_VOP_ELOCKED(dvp, __func__);
+ ASSERT_VOP_ELOCKED(vp, __func__);
+ ump = MOUNTTOUNIONFSMOUNT(vp->v_mount);
+ unp = VTOUNIONFS(vp);
if (unp->un_uppervp != NULLVP)
return (EEXIST);
+ dunp = VTOUNIONFS(dvp);
+ udvp = dunp->un_uppervp;
+
+ error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
+ if (error == EJUSTRETURN)
+ return (0);
+ else if (error != 0)
+ return (error);
lvp = unp->un_lowervp;
uvp = NULLVP;
@@ -882,11 +920,6 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
/* Authority change to root */
rootinfo = uifind((uid_t)0);
cred = crdup(cnp->cn_cred);
- /*
- * The calls to chgproccnt() are needed to compensate for change_ruid()
- * calling chgproccnt().
- */
- chgproccnt(cred->cr_ruidinfo, 1, 0);
change_euid(cred, rootinfo);
change_ruid(cred, rootinfo);
change_svuid(cred, (uid_t)0);
@@ -897,11 +930,29 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
NDPREINIT(&nd);
if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
- goto unionfs_mkshadowdir_abort;
+ goto unionfs_mkshadowdir_finish;
+ vref(udvp);
+ VOP_UNLOCK(vp);
if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td,
- cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
- goto unionfs_mkshadowdir_abort;
+ cnp->cn_nameptr, cnp->cn_namelen, CREATE))) {
+ /*
+ * When handling error cases here, we drop udvp's lock and
+ * then jump to exit code that relocks dvp, which in most
+ * cases will effectively relock udvp. However, this is
+ * not guaranteed to be the case, as various calls made
+ * here (such as unionfs_relookup() above and VOP_MKDIR()
+ * below) may unlock and then relock udvp, allowing dvp to
+ * be reclaimed in the meantime. In such a situation dvp
+ * will no longer share its lock with udvp. Since
+ * performance isn't a concern for these error cases, it
+ * makes more sense to reuse the common code that locks
+ * dvp on exit than to explicitly check for reclamation
+ * of dvp.
+ */
+ vput(udvp);
+ goto unionfs_mkshadowdir_relock;
+ }
if (uvp != NULLVP) {
if (udvp == uvp)
vrele(uvp);
@@ -909,11 +960,14 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
vput(uvp);
error = EEXIST;
- goto unionfs_mkshadowdir_abort;
+ vput(udvp);
+ goto unionfs_mkshadowdir_relock;
}
- if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)))
- goto unionfs_mkshadowdir_abort;
+ if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH))) {
+ vput(udvp);
+ goto unionfs_mkshadowdir_relock;
+ }
unionfs_create_uppervattr_core(ump, &lva, &va, td);
/*
@@ -924,7 +978,7 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
* component. This *should* be fine, as cn_namelen will still
* correctly indicate the length of only the current component,
* but ZFS in particular does not respect cn_namelen in its VOP_MKDIR
- * implementation
+ * implementation.
* Note that this assumes nd.ni_cnd.cn_pnbuf was allocated by
* something like a local namei() operation and the temporary
* NUL-termination will not have an effect on other threads.
@@ -934,29 +988,59 @@ unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
*pathend = '\0';
error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va);
*pathend = pathterm;
-
- if (!error) {
- /*
- * XXX The bug which cannot set uid/gid was corrected.
- * Ignore errors.
- */
- va.va_type = VNON;
- VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
-
+ if (error != 0) {
/*
- * VOP_SETATTR() may transiently drop uvp's lock, so it's
- * important to call it before unionfs_node_update() transfers
- * the unionfs vnode's lock from lvp to uvp; otherwise the
- * unionfs vnode itself would be transiently unlocked and
- * potentially doomed.
+ * See the comment after unionfs_relookup() above for an
+ * explanation of why we unlock udvp here only to relock
+ * dvp on exit.
*/
- unionfs_node_update(unp, uvp, td);
+ vput(udvp);
+ vn_finished_write(mp);
+ goto unionfs_mkshadowdir_relock;
}
+
+ /*
+ * XXX The bug which cannot set uid/gid was corrected.
+ * Ignore errors.
+ */
+ va.va_type = VNON;
+ /*
+ * VOP_SETATTR() may transiently drop uvp's lock, so it's
+ * important to call it before unionfs_node_update() transfers
+ * the unionfs vnode's lock from lvp to uvp; otherwise the
+ * unionfs vnode itself would be transiently unlocked and
+ * potentially doomed.
+ */
+ VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
+
+ /*
+ * uvp may become doomed during VOP_VPUT_PAIR() if the implementation
+ * must temporarily drop uvp's lock. However, since we hold a
+ * reference to uvp from the VOP_MKDIR() call above, this would require
+ * a forcible unmount of uvp's filesystem, which in turn can only
+ * happen if our unionfs instance is first forcibly unmounted. We'll
+ * therefore catch this case in the NULL check of unp below.
+ */
+ VOP_VPUT_PAIR(udvp, &uvp, false);
vn_finished_write(mp);
+ vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE);
+ unp = VTOUNIONFS(vp);
+ if (unp == NULL) {
+ vput(uvp);
+ error = ENOENT;
+ } else
+ unionfs_node_update(unp, uvp, td);
+ VOP_UNLOCK(vp);
+
+unionfs_mkshadowdir_relock:
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ if (error == 0 && (VN_IS_DOOMED(dvp) || VN_IS_DOOMED(vp)))
+ error = ENOENT;
-unionfs_mkshadowdir_abort:
+unionfs_mkshadowdir_finish:
+ unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
cnp->cn_cred = credbk;
- chgproccnt(cred->cr_ruidinfo, -1, 0);
crfree(cred);
return (error);
@@ -1116,23 +1200,31 @@ unionfs_forward_vop_finish_pair(
/*
* Create a new whiteout.
*
- * udvp and dvp should be locked on entry and will be locked on return.
+ * dvp and vp are unionfs vnodes representing a parent directory and
+ * child file, should be locked on entry, and will be locked on return.
*/
int
-unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp,
+unionfs_mkwhiteout(struct vnode *dvp, struct vnode *vp,
struct componentname *cnp, struct thread *td, char *path, int pathlen)
{
+ struct vnode *udvp;
struct vnode *wvp;
struct nameidata nd;
struct mount *mp;
int error;
- int lkflags;
+ bool dvp_locked;
+
+ ASSERT_VOP_ELOCKED(dvp, __func__);
+ ASSERT_VOP_ELOCKED(vp, __func__);
+ udvp = VTOUNIONFS(dvp)->un_uppervp;
wvp = NULLVP;
NDPREINIT(&nd);
+ vref(udvp);
+ VOP_UNLOCK(vp);
if ((error = unionfs_relookup(udvp, &wvp, cnp, &nd.ni_cnd, td, path,
pathlen, CREATE))) {
- return (error);
+ goto unionfs_mkwhiteout_cleanup;
}
if (wvp != NULLVP) {
if (udvp == wvp)
@@ -1140,18 +1232,27 @@ unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp,
else
vput(wvp);
- return (EEXIST);
+ if (nd.ni_cnd.cn_flags & ISWHITEOUT)
+ error = 0;
+ else
+ error = EEXIST;
+ goto unionfs_mkwhiteout_cleanup;
}
if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)))
- goto unionfs_mkwhiteout_free_out;
- unionfs_forward_vop_start(udvp, &lkflags);
+ goto unionfs_mkwhiteout_cleanup;
error = VOP_WHITEOUT(udvp, &nd.ni_cnd, CREATE);
- unionfs_forward_vop_finish(dvp, udvp, lkflags);
-
vn_finished_write(mp);
-unionfs_mkwhiteout_free_out:
+unionfs_mkwhiteout_cleanup:
+ if (VTOUNIONFS(dvp) == NULL) {
+ vput(udvp);
+ dvp_locked = false;
+ } else {
+ vrele(udvp);
+ dvp_locked = true;
+ }
+ vn_lock_pair(dvp, dvp_locked, LK_EXCLUSIVE, vp, false, LK_EXCLUSIVE);
return (error);
}
@@ -1165,10 +1266,11 @@ unionfs_mkwhiteout_free_out:
*/
static int
unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
- struct unionfs_node *unp, struct vattr *uvap, struct thread *td)
+ struct vnode *vp, struct vattr *uvap, struct thread *td)
{
struct unionfs_mount *ump;
- struct vnode *vp;
+ struct unionfs_node *unp;
+ struct vnode *uvp;
struct vnode *lvp;
struct ucred *cred;
struct vattr lva;
@@ -1176,8 +1278,10 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
int fmode;
int error;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+ unp = VTOUNIONFS(vp);
ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
- vp = NULLVP;
+ uvp = NULLVP;
lvp = unp->un_lowervp;
cred = td->td_ucred;
fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
@@ -1200,42 +1304,39 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
NDPREINIT(&nd);
vref(udvp);
- if ((error = vfs_relookup(udvp, &vp, &nd.ni_cnd, false)) != 0)
- goto unionfs_vn_create_on_upper_free_out2;
- vrele(udvp);
+ VOP_UNLOCK(vp);
+ if ((error = vfs_relookup(udvp, &uvp, &nd.ni_cnd, false)) != 0) {
+ vrele(udvp);
+ return (error);
+ }
- if (vp != NULLVP) {
- if (vp == udvp)
- vrele(vp);
+ if (uvp != NULLVP) {
+ if (uvp == udvp)
+ vrele(uvp);
else
- vput(vp);
+ vput(uvp);
error = EEXIST;
- goto unionfs_vn_create_on_upper_free_out1;
+ goto unionfs_vn_create_on_upper_cleanup;
}
- if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0)
- goto unionfs_vn_create_on_upper_free_out1;
+ if ((error = VOP_CREATE(udvp, &uvp, &nd.ni_cnd, uvap)) != 0)
+ goto unionfs_vn_create_on_upper_cleanup;
- if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
- vput(vp);
- goto unionfs_vn_create_on_upper_free_out1;
+ if ((error = VOP_OPEN(uvp, fmode, cred, td, NULL)) != 0) {
+ vput(uvp);
+ goto unionfs_vn_create_on_upper_cleanup;
}
- error = VOP_ADD_WRITECOUNT(vp, 1);
+ error = VOP_ADD_WRITECOUNT(uvp, 1);
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
- __func__, vp, vp->v_writecount);
+ __func__, uvp, uvp->v_writecount);
if (error == 0) {
- *vpp = vp;
+ *vpp = uvp;
} else {
- VOP_CLOSE(vp, fmode, cred, td);
+ VOP_CLOSE(uvp, fmode, cred, td);
}
-unionfs_vn_create_on_upper_free_out1:
- VOP_UNLOCK(udvp);
-
-unionfs_vn_create_on_upper_free_out2:
- KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path,
- ("%s: cn_pnbuf changed", __func__));
-
+unionfs_vn_create_on_upper_cleanup:
+ vput(udvp);
return (error);
}
@@ -1310,13 +1411,18 @@ unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
*
* If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
* docopy.
+ *
+ * vp is a unionfs vnode that should be locked on entry and will be
+ * locked on return.
*
* If no error returned, unp will be updated.
*/
int
-unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
+unionfs_copyfile(struct vnode *vp, int docopy, struct ucred *cred,
struct thread *td)
{
+ struct unionfs_node *unp;
+ struct unionfs_node *dunp;
struct mount *mp;
struct vnode *udvp;
struct vnode *lvp;
@@ -1324,6 +1430,8 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
struct vattr uva;
int error;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+ unp = VTOUNIONFS(vp);
lvp = unp->un_lowervp;
uvp = NULLVP;
@@ -1333,22 +1441,51 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
return (EINVAL);
if (unp->un_uppervp != NULLVP)
return (EEXIST);
- udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
+
+ udvp = NULLVP;
+ VI_LOCK(unp->un_dvp);
+ dunp = VTOUNIONFS(unp->un_dvp);
+ if (dunp != NULL)
+ udvp = dunp->un_uppervp;
+ VI_UNLOCK(unp->un_dvp);
+
if (udvp == NULLVP)
return (EROFS);
if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
return (EROFS);
+ ASSERT_VOP_UNLOCKED(udvp, __func__);
+
+ error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
+ if (error == EJUSTRETURN)
+ return (0);
+ else if (error != 0)
+ return (error);
error = VOP_ACCESS(lvp, VREAD, cred, td);
if (error != 0)
- return (error);
+ goto unionfs_copyfile_cleanup;
if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)) != 0)
- return (error);
- error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
+ goto unionfs_copyfile_cleanup;
+ error = unionfs_vn_create_on_upper(&uvp, udvp, vp, &uva, td);
if (error != 0) {
vn_finished_write(mp);
- return (error);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ goto unionfs_copyfile_cleanup;
+ }
+
+ /*
+ * Note that it's still possible for e.g. VOP_WRITE to relock
+ * uvp below while holding vp[=lvp] locked. Replacing
+ * unionfs_copyfile_core with vn_generic_copy_file_range() will
+ * allow us to avoid the problem by moving this vn_lock_pair()
+ * call much later.
+ */
+ vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE);
+ unp = VTOUNIONFS(vp);
+ if (unp == NULL) {
+ error = ENOENT;
+ goto unionfs_copyfile_cleanup;
}
if (docopy != 0) {
@@ -1369,18 +1506,30 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
/* Reset the attributes. Ignore errors. */
uva.va_type = VNON;
VOP_SETATTR(uvp, &uva, cred);
+ unionfs_node_update(unp, uvp, td);
}
- unionfs_node_update(unp, uvp, td);
-
+unionfs_copyfile_cleanup:
+ unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
return (error);
}
/*
- * It checks whether vp can rmdir. (check empty)
+ * Determine if the unionfs view of a directory is empty such that
+ * an rmdir operation can be permitted.
+ *
+ * We assume the VOP_RMDIR() against the upper layer vnode will take
+ * care of this check for us where the upper FS is concerned, so here
+ * we concentrate on the lower FS. We need to check for the presence
+ * of files other than "." and ".." in the lower FS directory and
+ * then cross-check any files we find against the upper FS to see if
+ * a whiteout is present (in which case we treat the lower file as
+ * non-present).
+ *
+ * The logic here is based heavily on vn_dir_check_empty().
*
- * vp is unionfs vnode.
- * vp should be locked.
+ * vp should be a locked unionfs node, and vp's lowervp should also be
+ * locked.
*/
int
unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
@@ -1388,115 +1537,127 @@ unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
struct vnode *uvp;
struct vnode *lvp;
struct vnode *tvp;
+ char *dirbuf;
+ size_t dirbuflen, len;
+ off_t off;
struct dirent *dp;
- struct dirent *edp;
struct componentname cn;
- struct iovec iov;
- struct uio uio;
struct vattr va;
int error;
int eofflag;
- int lookuperr;
-
- /*
- * The size of buf needs to be larger than DIRBLKSIZ.
- */
- char buf[256 * 6];
-
- ASSERT_VOP_ELOCKED(vp, __func__);
eofflag = 0;
- uvp = UNIONFSVPTOUPPERVP(vp);
lvp = UNIONFSVPTOLOWERVP(vp);
+ uvp = UNIONFSVPTOUPPERVP(vp);
+
+ /*
+ * Note that the locking here still isn't ideal: We expect the caller
+ * to hold both the upper and lower layer locks as well as the upper
+ * parent directory lock, which it can do in a manner that avoids
+ * deadlock. However, if the cross-check logic below needs to call
+ * VOP_LOOKUP(), that may relock the upper vnode and lock any found
+ * child vnode in a way that doesn't protect against deadlock given
+ * the other held locks. Beyond that, the various other VOPs we issue
+ * below, such as VOP_OPEN() and VOP_READDIR(), may also re-lock the
+ * lower vnode.
+ * We might instead just handoff between the upper vnode lock
+ * (and its parent directory lock) and the lower vnode lock as needed,
+ * so that the lower lock is never held at the same time as the upper
+ * locks, but that opens up a wider window in which the upper
+ * directory (and also the lower directory if it isn't truly
+ * read-only) may change while the relevant lock is dropped. But
+ * since re-locking may happen here and open up such a window anyway,
+ * perhaps that is a worthwile tradeoff? Or perhaps we can ultimately
+ * do sufficient tracking of empty state within the unionfs vnode
+ * (in conjunction with upcalls from the lower FSes to notify us
+ * of out-of-band state changes) that we can avoid these costly checks
+ * altogether.
+ */
+ ASSERT_VOP_LOCKED(lvp, __func__);
+ ASSERT_VOP_ELOCKED(uvp, __func__);
- /* check opaque */
if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
return (error);
if (va.va_flags & OPAQUE)
return (0);
- /* open vnode */
#ifdef MAC
- if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
+ if ((error = mac_vnode_check_open(cred, lvp, VEXEC | VREAD)) != 0)
return (error);
#endif
- if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
+ if ((error = VOP_ACCESS(lvp, VEXEC | VREAD, cred, td)) != 0)
+ return (error);
+ if ((error = VOP_OPEN(lvp, FREAD, cred, td, NULL)) != 0)
return (error);
- if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
+ if ((error = VOP_GETATTR(lvp, &va, cred)) != 0)
return (error);
- uio.uio_rw = UIO_READ;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_td = td;
- uio.uio_offset = 0;
+ dirbuflen = max(DEV_BSIZE, GENERIC_MAXDIRSIZ);
+ if (dirbuflen < va.va_blocksize)
+ dirbuflen = va.va_blocksize;
+ dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
-#ifdef MAC
- error = mac_vnode_check_readdir(td->td_ucred, lvp);
-#endif
- while (!error && !eofflag) {
- iov.iov_base = buf;
- iov.iov_len = sizeof(buf);
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_resid = iov.iov_len;
+ len = 0;
+ off = 0;
+ eofflag = 0;
- error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
+ for (;;) {
+ error = vn_dir_next_dirent(lvp, td, dirbuf, dirbuflen,
+ &dp, &len, &off, &eofflag);
if (error != 0)
break;
- KASSERT(eofflag != 0 || uio.uio_resid < sizeof(buf),
- ("%s: empty read from lower FS", __func__));
-
- edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
- for (dp = (struct dirent*)buf; !error && dp < edp;
- dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
- if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
- (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
- (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
- continue;
-
- cn.cn_namelen = dp->d_namlen;
- cn.cn_pnbuf = NULL;
- cn.cn_nameptr = dp->d_name;
- cn.cn_nameiop = LOOKUP;
- cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
- cn.cn_lkflags = LK_EXCLUSIVE;
- cn.cn_cred = cred;
-
- /*
- * check entry in lower.
- * Sometimes, readdir function returns
- * wrong entry.
- */
- lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
- if (!lookuperr)
- vput(tvp);
- else
- continue; /* skip entry */
-
- /*
- * check entry
- * If it has no exist/whiteout entry in upper,
- * directory is not empty.
- */
- cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
- lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
+ if (len == 0) {
+ /* EOF */
+ error = 0;
+ break;
+ }
- if (!lookuperr)
- vput(tvp);
+ if (dp->d_type == DT_WHT)
+ continue;
- /* ignore exist or whiteout entry */
- if (!lookuperr ||
- (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
- continue;
+ /*
+ * Any file in the directory which is not '.' or '..' indicates
+ * the directory is not empty.
+ */
+ switch (dp->d_namlen) {
+ case 2:
+ if (dp->d_name[1] != '.') {
+ /* Can't be '..' (nor '.') */
+ break;
+ }
+ /* FALLTHROUGH */
+ case 1:
+ if (dp->d_name[0] != '.') {
+ /* Can't be '..' nor '.' */
+ break;
+ }
+ continue;
+ default:
+ break;
+ }
+ cn.cn_namelen = dp->d_namlen;
+ cn.cn_pnbuf = NULL;
+ cn.cn_nameptr = dp->d_name;
+ cn.cn_nameiop = LOOKUP;
+ cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
+ cn.cn_lkflags = LK_EXCLUSIVE;
+ cn.cn_cred = cred;
+
+ error = VOP_LOOKUP(uvp, &tvp, &cn);
+ if (tvp != NULLVP)
+ vput(tvp);
+ if (error != 0 && error != ENOENT && error != EJUSTRETURN)
+ break;
+ else if ((cn.cn_flags & ISWHITEOUT) == 0) {
error = ENOTEMPTY;
- }
+ break;
+ } else
+ error = 0;
}
- /* close vnode */
- VOP_CLOSE(vp, FREAD, cred, td);
-
+ VOP_CLOSE(lvp, FREAD, cred, td);
+ free(dirbuf, M_TEMP);
return (error);
}
-
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
index cb55c2dd6474..9342317ad08e 100644
--- a/sys/fs/unionfs/union_vfsops.c
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -327,18 +327,15 @@ unionfs_domount(struct mount *mp)
* unionfs_lock()) and the mountpoint's busy count. Without this,
* unmount will lock the covered vnode lock (directly through the
* covered vnode) and wait for the busy count to drain, while a
- * concurrent lookup will increment the busy count and then lock
+ * concurrent lookup will increment the busy count and then may lock
* the covered vnode lock (indirectly through unionfs_lock()).
*
- * Note that we can't yet use this facility for the 'below' case
- * in which the upper vnode is the covered vnode, because that would
- * introduce a different LOR in which the cross-mount lookup would
- * effectively hold the upper vnode lock before acquiring the lower
- * vnode lock, while an unrelated lock operation would still acquire
- * the lower vnode lock before the upper vnode lock, which is the
- * order unionfs currently requires.
+ * Note that this is only needed for the 'below' case in which the
+ * upper vnode is also the covered vnode, because unionfs_lock()
+ * only locks the upper vnode as long as both lower and upper vnodes
+ * are present (which they will always be for the unionfs mount root).
*/
- if (!below) {
+ if (below) {
vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
mp->mnt_vnodecovered->v_vflag |= VV_CROSSLOCK;
VOP_UNLOCK(mp->mnt_vnodecovered);
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index aa2a7273825a..03130f0ca949 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -76,6 +76,21 @@
VNASSERT(((vp)->v_op == &unionfs_vnodeops), vp, \
("%s: non-unionfs vnode", __func__))
+static bool
+unionfs_lookup_isroot(struct componentname *cnp, struct vnode *dvp)
+{
+ struct nameidata *ndp;
+
+ if (dvp == NULL)
+ return (false);
+ if ((dvp->v_vflag & VV_ROOT) != 0)
+ return (true);
+ ndp = vfs_lookup_nameidata(cnp);
+ if (ndp == NULL)
+ return (false);
+ return (vfs_lookup_isroot(ndp, dvp));
+}
+
static int
unionfs_lookup(struct vop_cachedlookup_args *ap)
{
@@ -84,13 +99,12 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
struct vattr va;
struct componentname *cnp;
struct thread *td;
+ uint64_t cnflags;
u_long nameiop;
- u_long cnflags, cnflagsbk;
- int iswhiteout;
int lockflag;
- int error , uerror, lerror;
+ int lkflags;
+ int error, uerror, lerror;
- iswhiteout = 0;
lockflag = 0;
error = uerror = lerror = ENOENT;
cnp = ap->a_cnp;
@@ -120,87 +134,185 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
return (EROFS);
/*
+ * Note that a lookup is in-flight, and block if another lookup
+ * is already in-flight against dvp. This is done because we may
+ * end up dropping dvp's lock to look up a lower vnode or to create
+ * a shadow directory, opening up the possibility of parallel lookups
+ * against the same directory creating duplicate unionfs vnodes for
+ * the same file(s). Note that if this function encounters an
+ * in-progress lookup for the directory, it will block until the
+ * lookup is complete and then return ERELOOKUP to allow any
+ * existing unionfs vnode to be loaded from the VFS cache.
+ * This is really a hack; filesystems that support MNTK_LOOKUP_SHARED
+ * (which unionfs currently doesn't) seem to deal with this by using
+ * the vfs_hash_* functions to manage a per-mount vnode cache keyed
+ * by the inode number (or some roughly equivalent unique ID
+ * usually assocated with the storage medium). It may make sense
+ * for unionfs to adopt something similar as a replacement for its
+ * current half-baked directory-only cache implementation, particularly
+ * if we want to support MNTK_LOOKUP_SHARED here.
+ */
+ error = unionfs_set_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS);
+ if (error != 0)
+ return (error);
+ /*
* lookup dotdot
*/
if (cnflags & ISDOTDOT) {
- if (LOOKUP != nameiop && udvp == NULLVP)
- return (EROFS);
+ if (LOOKUP != nameiop && udvp == NULLVP) {
+ error = EROFS;
+ goto unionfs_lookup_return;
+ }
- if (udvp != NULLVP) {
- dtmpvp = udvp;
- if (ldvp != NULLVP)
- VOP_UNLOCK(ldvp);
+ if (unionfs_lookup_isroot(cnp, udvp) ||
+ unionfs_lookup_isroot(cnp, ldvp)) {
+ error = ENOENT;
+ goto unionfs_lookup_return;
}
+
+ if (udvp != NULLVP)
+ dtmpvp = udvp;
else
dtmpvp = ldvp;
+ unionfs_forward_vop_start(dtmpvp, &lkflags);
error = VOP_LOOKUP(dtmpvp, &vp, cnp);
+ unionfs_forward_vop_finish(dvp, dtmpvp, lkflags);
- if (dtmpvp == udvp && ldvp != NULLVP) {
- VOP_UNLOCK(udvp);
- vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
- dunp = VTOUNIONFS(dvp);
- if (error == 0 && dunp == NULL)
- error = ENOENT;
- }
+ /*
+ * Drop the lock and reference on vp. If the lookup was
+ * successful, we'll either need to exchange vp's lock and
+ * reference for the unionfs parent vnode's lock and
+ * reference, or (if dvp was reclaimed) we'll need to drop
+ * vp's lock and reference to return early.
+ */
+ if (vp != NULLVP)
+ vput(vp);
+ dunp = VTOUNIONFS(dvp);
+ if (error == 0 && dunp == NULL)
+ error = ENOENT;
if (error == 0) {
- /*
- * Exchange lock and reference from vp to
- * dunp->un_dvp. vp is upper/lower vnode, but it
- * will need to return the unionfs vnode.
- */
- if (nameiop == DELETE || nameiop == RENAME ||
- (cnp->cn_lkflags & LK_TYPE_MASK))
- VOP_UNLOCK(vp);
- vrele(vp);
-
dtmpvp = dunp->un_dvp;
vref(dtmpvp);
VOP_UNLOCK(dvp);
*(ap->a_vpp) = dtmpvp;
- if (nameiop == DELETE || nameiop == RENAME)
- vn_lock(dtmpvp, LK_EXCLUSIVE | LK_RETRY);
- else if (cnp->cn_lkflags & LK_TYPE_MASK)
- vn_lock(dtmpvp, cnp->cn_lkflags |
- LK_RETRY);
+ vn_lock(dtmpvp, cnp->cn_lkflags | LK_RETRY);
+ if (VN_IS_DOOMED(dtmpvp)) {
+ vput(dtmpvp);
+ *(ap->a_vpp) = NULLVP;
+ error = ENOENT;
+ }
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
- } else if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
- cache_enter(dvp, NULLVP, cnp);
+ }
- goto unionfs_lookup_return;
+ goto unionfs_lookup_cleanup;
}
/*
+ * Lookup lower layer. We do this before looking up the the upper
+ * layer, as we may drop the upper parent directory's lock, and we
+ * want to ensure the upper parent remains locked from the point of
+ * lookup through any ensuing VOP that may require it to be locked.
+ * The cost of this is that we may end up performing an unnecessary
+ * lower layer lookup if a whiteout is present in the upper layer.
+ */
+ if (ldvp != NULLVP && !(cnflags & DOWHITEOUT)) {
+ struct componentname lcn;
+ bool is_dot;
+
+ if (udvp != NULLVP) {
+ vref(ldvp);
+ VOP_UNLOCK(dvp);
+ vn_lock(ldvp, LK_EXCLUSIVE | LK_RETRY);
+ }
+
+ lcn = *cnp;
+ /* always op is LOOKUP */
+ lcn.cn_nameiop = LOOKUP;
+ lcn.cn_flags = cnflags;
+ is_dot = false;
+
+ if (udvp == NULLVP)
+ unionfs_forward_vop_start(ldvp, &lkflags);
+ lerror = VOP_LOOKUP(ldvp, &lvp, &lcn);
+ if (udvp == NULLVP &&
+ unionfs_forward_vop_finish(dvp, ldvp, lkflags)) {
+ if (lvp != NULLVP)
+ VOP_UNLOCK(lvp);
+ error = ENOENT;
+ goto unionfs_lookup_cleanup;
+ }
+
+ if (udvp == NULLVP)
+ cnp->cn_flags = lcn.cn_flags;
+
+ if (lerror == 0) {
+ if (ldvp == lvp) { /* is dot */
+ vrele(lvp);
+ *(ap->a_vpp) = dvp;
+ vref(dvp);
+ is_dot = true;
+ error = lerror;
+ } else if (lvp != NULLVP)
+ VOP_UNLOCK(lvp);
+ }
+
+ if (udvp != NULLVP) {
+ vput(ldvp);
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ if (VN_IS_DOOMED(dvp))
+ error = ENOENT;
+ }
+ if (is_dot)
+ goto unionfs_lookup_return;
+ else if (error != 0)
+ goto unionfs_lookup_cleanup;
+ }
+ /*
* lookup upper layer
*/
if (udvp != NULLVP) {
+ bool iswhiteout = false;
+
+ unionfs_forward_vop_start(udvp, &lkflags);
uerror = VOP_LOOKUP(udvp, &uvp, cnp);
+ if (unionfs_forward_vop_finish(dvp, udvp, lkflags)) {
+ if (uvp != NULLVP)
+ VOP_UNLOCK(uvp);
+ error = ENOENT;
+ goto unionfs_lookup_cleanup;
+ }
if (uerror == 0) {
if (udvp == uvp) { /* is dot */
+ if (lvp != NULLVP)
+ vrele(lvp);
vrele(uvp);
*(ap->a_vpp) = dvp;
vref(dvp);
error = uerror;
goto unionfs_lookup_return;
- }
- if (nameiop == DELETE || nameiop == RENAME ||
- (cnp->cn_lkflags & LK_TYPE_MASK))
+ } else if (uvp != NULLVP)
VOP_UNLOCK(uvp);
}
/* check whiteout */
- if (uerror == ENOENT || uerror == EJUSTRETURN)
- if (cnp->cn_flags & ISWHITEOUT)
- iswhiteout = 1; /* don't lookup lower */
- if (iswhiteout == 0 && ldvp != NULLVP)
- if (!VOP_GETATTR(udvp, &va, cnp->cn_cred) &&
- (va.va_flags & OPAQUE))
- iswhiteout = 1; /* don't lookup lower */
+ if ((uerror == ENOENT || uerror == EJUSTRETURN) &&
+ (cnp->cn_flags & ISWHITEOUT))
+ iswhiteout = true;
+ else if (VOP_GETATTR(udvp, &va, cnp->cn_cred) == 0 &&
+ (va.va_flags & OPAQUE))
+ iswhiteout = true;
+
+ if (iswhiteout && lvp != NULLVP) {
+ vrele(lvp);
+ lvp = NULLVP;
+ }
+
#if 0
UNIONFS_INTERNAL_DEBUG(
"unionfs_lookup: debug: whiteout=%d, path=%s\n",
@@ -209,39 +321,6 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
}
/*
- * lookup lower layer
- */
- if (ldvp != NULLVP && !(cnflags & DOWHITEOUT) && iswhiteout == 0) {
- /* always op is LOOKUP */
- cnp->cn_nameiop = LOOKUP;
- cnflagsbk = cnp->cn_flags;
- cnp->cn_flags = cnflags;
-
- lerror = VOP_LOOKUP(ldvp, &lvp, cnp);
-
- cnp->cn_nameiop = nameiop;
- if (udvp != NULLVP && (uerror == 0 || uerror == EJUSTRETURN))
- cnp->cn_flags = cnflagsbk;
-
- if (lerror == 0) {
- if (ldvp == lvp) { /* is dot */
- if (uvp != NULLVP)
- vrele(uvp); /* no need? */
- vrele(lvp);
- *(ap->a_vpp) = dvp;
- vref(dvp);
-
- UNIONFS_INTERNAL_DEBUG(
- "unionfs_lookup: leave (%d)\n", lerror);
-
- return (lerror);
- }
- if (cnp->cn_lkflags & LK_TYPE_MASK)
- VOP_UNLOCK(lvp);
- }
- }
-
- /*
* check lookup result
*/
if (uvp == NULLVP && lvp == NULLVP) {
@@ -280,8 +359,7 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
if (unp == NULL)
error = ENOENT;
else
- error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount),
- udvp, unp, cnp, td);
+ error = unionfs_mkshadowdir(dvp, vp, cnp, td);
if (lockflag != 0)
VOP_UNLOCK(vp);
if (error != 0) {
@@ -293,6 +371,10 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
vrele(vp);
goto unionfs_lookup_cleanup;
}
+ /*
+ * TODO: Since unionfs_mkshadowdir() relocks udvp after
+ * creating the new directory, return ERELOOKUP here?
+ */
if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_SHARED)
vn_lock(vp, LK_SHARED | LK_RETRY);
}
@@ -313,9 +395,12 @@ unionfs_lookup(struct vop_cachedlookup_args *ap)
"unionfs_lookup: Unable to create unionfs vnode.");
goto unionfs_lookup_cleanup;
}
- if ((nameiop == DELETE || nameiop == RENAME) &&
- (cnp->cn_lkflags & LK_TYPE_MASK) == 0)
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ }
+
+ if (VN_IS_DOOMED(dvp) || VN_IS_DOOMED(vp)) {
+ error = ENOENT;
+ vput(vp);
+ goto unionfs_lookup_cleanup;
}
*(ap->a_vpp) = vp;
@@ -329,10 +414,12 @@ unionfs_lookup_cleanup:
if (lvp != NULLVP)
vrele(lvp);
- if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
+ if (error == ENOENT && (cnflags & MAKEENTRY) != 0 &&
+ !VN_IS_DOOMED(dvp))
cache_enter(dvp, NULLVP, cnp);
unionfs_lookup_return:
+ unionfs_clear_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS);
UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error);
@@ -492,6 +579,61 @@ unionfs_downgrade_lock(struct vnode *vp, enum unionfs_lkupgrade status)
vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
}
+/*
+ * Exchange the default (upper vnode) lock on a unionfs vnode for the lower
+ * vnode lock, in support of operations that require access to the lower vnode
+ * even when an upper vnode is present. We don't use vn_lock_pair() to hold
+ * both vnodes at the same time, primarily because the caller may proceed
+ * to issue VOPs to the lower layer which re-lock or perform other operations
+ * which may not be safe in the presence of a locked vnode from another FS.
+ * Moreover, vn_lock_pair()'s deadlock resolution approach can introduce
+ * additional overhead that isn't necessary on these paths.
+ *
+ * vp must be a locked unionfs vnode; the lock state of this vnode is
+ * returned through *lkflags for later use in unionfs_unlock_lvp().
+ *
+ * Returns the locked lower vnode, or NULL if the lower vnode (and therefore
+ * also the unionfs vnode above it) has been doomed.
+ */
+static struct vnode *
+unionfs_lock_lvp(struct vnode *vp, int *lkflags)
+{
+ struct unionfs_node *unp;
+ struct vnode *lvp;
+
+ unp = VTOUNIONFS(vp);
+ lvp = unp->un_lowervp;
+ ASSERT_VOP_LOCKED(vp, __func__);
+ ASSERT_VOP_UNLOCKED(lvp, __func__);
+ *lkflags = VOP_ISLOCKED(vp);
+ vref(lvp);
+ VOP_UNLOCK(vp);
+ vn_lock(lvp, *lkflags | LK_RETRY);
+ if (VN_IS_DOOMED(lvp)) {
+ vput(lvp);
+ lvp = NULLVP;
+ vn_lock(vp, *lkflags | LK_RETRY);
+ }
+ return (lvp);
+}
+
+/*
+ * Undo a previous call to unionfs_lock_lvp(), restoring the default lock
+ * on the unionfs vnode. This function reloads and returns the vnode
+ * private data for the unionfs vnode, which will be NULL if the unionfs
+ * vnode became doomed while its lock was dropped. The caller must check
+ * for this case.
+ */
+static struct unionfs_node *
+unionfs_unlock_lvp(struct vnode *vp, struct vnode *lvp, int lkflags)
+{
+ ASSERT_VOP_LOCKED(lvp, __func__);
+ ASSERT_VOP_UNLOCKED(vp, __func__);
+ vput(lvp);
+ vn_lock(vp, lkflags | LK_RETRY);
+ return (VTOUNIONFS(vp));
+}
+
static int
unionfs_open(struct vop_open_args *ap)
{
@@ -504,7 +646,9 @@ unionfs_open(struct vop_open_args *ap)
struct ucred *cred;
struct thread *td;
int error;
+ int lkflags;
enum unionfs_lkupgrade lkstatus;
+ bool lock_lvp, open_lvp;
UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n");
@@ -515,6 +659,7 @@ unionfs_open(struct vop_open_args *ap)
targetvp = NULLVP;
cred = ap->a_cred;
td = ap->a_td;
+ open_lvp = lock_lvp = false;
/*
* The executable loader path may call this function with vp locked
@@ -546,10 +691,12 @@ unionfs_open(struct vop_open_args *ap)
if (targetvp == NULLVP) {
if (uvp == NULLVP) {
if ((ap->a_mode & FWRITE) && lvp->v_type == VREG) {
- error = unionfs_copyfile(unp,
+ error = unionfs_copyfile(vp,
!(ap->a_mode & O_TRUNC), cred, td);
- if (error != 0)
+ if (error != 0) {
+ unp = VTOUNIONFS(vp);
goto unionfs_open_abort;
+ }
targetvp = uvp = unp->un_uppervp;
} else
targetvp = lvp;
@@ -557,30 +704,69 @@ unionfs_open(struct vop_open_args *ap)
targetvp = uvp;
}
+ if (targetvp == uvp && uvp->v_type == VDIR && lvp != NULLVP &&
+ unsp->uns_lower_opencnt <= 0)
+ open_lvp = true;
+ else if (targetvp == lvp && uvp != NULLVP)
+ lock_lvp = true;
+
+ if (lock_lvp) {
+ unp = NULL;
+ lvp = unionfs_lock_lvp(vp, &lkflags);
+ if (lvp == NULLVP) {
+ error = ENOENT;
+ goto unionfs_open_abort;
+ }
+ } else
+ unionfs_forward_vop_start(targetvp, &lkflags);
+
error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fp);
- if (error == 0) {
- if (targetvp == uvp) {
- if (uvp->v_type == VDIR && lvp != NULLVP &&
- unsp->uns_lower_opencnt <= 0) {
- /* open lower for readdir */
- error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
- if (error != 0) {
- VOP_CLOSE(uvp, ap->a_mode, cred, td);
- goto unionfs_open_abort;
- }
- unsp->uns_node_flag |= UNS_OPENL_4_READDIR;
- unsp->uns_lower_opencnt++;
+
+ if (lock_lvp) {
+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+ if (unp == NULL && error == 0)
+ error = ENOENT;
+ } else if (unionfs_forward_vop_finish(vp, targetvp, lkflags))
+ error = error ? error : ENOENT;
+
+ if (error != 0)
+ goto unionfs_open_abort;
+
+ if (targetvp == uvp) {
+ if (open_lvp) {
+ unp = NULL;
+ lvp = unionfs_lock_lvp(vp, &lkflags);
+ if (lvp == NULLVP) {
+ error = ENOENT;
+ goto unionfs_open_abort;
}
- unsp->uns_upper_opencnt++;
- } else {
+ /* open lower for readdir */
+ error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+ if (unp == NULL) {
+ error = error ? error : ENOENT;
+ goto unionfs_open_abort;
+ }
+ if (error != 0) {
+ unionfs_forward_vop_start(uvp, &lkflags);
+ VOP_CLOSE(uvp, ap->a_mode, cred, td);
+ if (unionfs_forward_vop_finish(vp, uvp, lkflags))
+ unp = NULL;
+ goto unionfs_open_abort;
+ }
+ unsp->uns_node_flag |= UNS_OPENL_4_READDIR;
unsp->uns_lower_opencnt++;
- unsp->uns_lower_openmode = ap->a_mode;
}
- vp->v_object = targetvp->v_object;
+ unsp->uns_upper_opencnt++;
+ } else {
+ unsp->uns_lower_opencnt++;
+ unsp->uns_lower_openmode = ap->a_mode;
}
+ vp->v_object = targetvp->v_object;
unionfs_open_abort:
- if (error != 0)
+
+ if (error != 0 && unp != NULL)
unionfs_tryrem_node_status(unp, unsp);
unionfs_open_cleanup:
@@ -599,9 +785,13 @@ unionfs_close(struct vop_close_args *ap)
struct ucred *cred;
struct thread *td;
struct vnode *vp;
+ struct vnode *uvp;
+ struct vnode *lvp;
struct vnode *ovp;
int error;
+ int lkflags;
enum unionfs_lkupgrade lkstatus;
+ bool lock_lvp;
UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n");
@@ -611,6 +801,7 @@ unionfs_close(struct vop_close_args *ap)
cred = ap->a_cred;
td = ap->a_td;
error = 0;
+ lock_lvp = false;
/*
* If the vnode is reclaimed while upgrading, we can't safely use unp
@@ -621,44 +812,77 @@ unionfs_close(struct vop_close_args *ap)
goto unionfs_close_cleanup;
unp = VTOUNIONFS(vp);
- unionfs_get_node_status(unp, td, &unsp);
+ lvp = unp->un_lowervp;
+ uvp = unp->un_uppervp;
+ unsp = unionfs_find_node_status(unp, td);
- if (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0) {
+ if (unsp == NULL ||
+ (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0)) {
#ifdef DIAGNOSTIC
- printf("unionfs_close: warning: open count is 0\n");
+ if (unsp != NULL)
+ printf("unionfs_close: warning: open count is 0\n");
#endif
- if (unp->un_uppervp != NULLVP)
- ovp = unp->un_uppervp;
+ if (uvp != NULLVP)
+ ovp = uvp;
else
- ovp = unp->un_lowervp;
+ ovp = lvp;
} else if (unsp->uns_upper_opencnt > 0)
- ovp = unp->un_uppervp;
+ ovp = uvp;
else
- ovp = unp->un_lowervp;
+ ovp = lvp;
+
+ if (ovp == lvp && uvp != NULLVP) {
+ lock_lvp = true;
+ unp = NULL;
+ lvp = unionfs_lock_lvp(vp, &lkflags);
+ if (lvp == NULLVP) {
+ error = ENOENT;
+ goto unionfs_close_abort;
+ }
+ } else
+ unionfs_forward_vop_start(ovp, &lkflags);
error = VOP_CLOSE(ovp, ap->a_fflag, cred, td);
+ if (lock_lvp) {
+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+ if (unp == NULL && error == 0)
+ error = ENOENT;
+ } else if (unionfs_forward_vop_finish(vp, ovp, lkflags))
+ error = error ? error : ENOENT;
+
if (error != 0)
goto unionfs_close_abort;
vp->v_object = ovp->v_object;
- if (ovp == unp->un_uppervp) {
- unsp->uns_upper_opencnt--;
- if (unsp->uns_upper_opencnt == 0) {
+ if (ovp == uvp) {
+ if (unsp != NULL && ((--unsp->uns_upper_opencnt) == 0)) {
if (unsp->uns_node_flag & UNS_OPENL_4_READDIR) {
- VOP_CLOSE(unp->un_lowervp, FREAD, cred, td);
+ unp = NULL;
+ lvp = unionfs_lock_lvp(vp, &lkflags);
+ if (lvp == NULLVP) {
+ error = ENOENT;
+ goto unionfs_close_abort;
+ }
+ VOP_CLOSE(lvp, FREAD, cred, td);
+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+ if (unp == NULL) {
+ error = ENOENT;
+ goto unionfs_close_abort;
+ }
unsp->uns_node_flag &= ~UNS_OPENL_4_READDIR;
unsp->uns_lower_opencnt--;
}
if (unsp->uns_lower_opencnt > 0)
- vp->v_object = unp->un_lowervp->v_object;
+ vp->v_object = lvp->v_object;
}
- } else
+ } else if (unsp != NULL)
unsp->uns_lower_opencnt--;
unionfs_close_abort:
- unionfs_tryrem_node_status(unp, unsp);
+ if (unp != NULL && unsp != NULL)
+ unionfs_tryrem_node_status(unp, unsp);
unionfs_close_cleanup:
unionfs_downgrade_lock(vp, lkstatus);
@@ -883,7 +1107,7 @@ unionfs_setattr(struct vop_setattr_args *ap)
return (EROFS);
if (uvp == NULLVP && lvp->v_type == VREG) {
- error = unionfs_copyfile(unp, (vap->va_size != 0),
+ error = unionfs_copyfile(ap->a_vp, (vap->va_size != 0),
ap->a_cred, td);
if (error != 0)
return (error);
@@ -1078,8 +1302,10 @@ unionfs_remove(struct vop_remove_args *ap)
error = VOP_REMOVE(udvp, uvp, cnp);
unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
ap->a_vp, uvp, uvp_lkflags);
- } else if (lvp != NULLVP)
- error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, path, pathlen);
+ } else if (lvp != NULLVP) {
+ error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td,
+ path, pathlen);
+ }
UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error);
@@ -1096,7 +1322,6 @@ unionfs_link(struct vop_link_args *ap)
struct componentname *cnp;
struct thread *td;
int error;
- int needrelookup;
UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n");
@@ -1104,7 +1329,6 @@ unionfs_link(struct vop_link_args *ap)
KASSERT_UNIONFS_VNODE(ap->a_vp);
error = 0;
- needrelookup = 0;
dunp = VTOUNIONFS(ap->a_tdvp);
unp = NULL;
udvp = dunp->un_uppervp;
@@ -1121,16 +1345,15 @@ unionfs_link(struct vop_link_args *ap)
if (ap->a_vp->v_type != VREG)
return (EOPNOTSUPP);
- error = unionfs_copyfile(unp, 1, cnp->cn_cred, td);
- if (error != 0)
- return (error);
- needrelookup = 1;
+ VOP_UNLOCK(ap->a_tdvp);
+ error = unionfs_copyfile(ap->a_vp, 1, cnp->cn_cred, td);
+ vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY);
+ if (error == 0)
+ error = ERELOOKUP;
+ return (error);
}
uvp = unp->un_uppervp;
- if (needrelookup != 0)
- error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td);
-
if (error == 0) {
int udvp_lkflags, uvp_lkflags;
unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
@@ -1154,8 +1377,6 @@ unionfs_rename(struct vop_rename_args *ap)
struct vnode *tdvp;
struct vnode *tvp;
struct componentname *tcnp;
- struct vnode *ltdvp;
- struct vnode *ltvp;
struct thread *td;
/* rename target vnodes */
@@ -1164,7 +1385,6 @@ unionfs_rename(struct vop_rename_args *ap)
struct vnode *rtdvp;
struct vnode *rtvp;
- struct unionfs_mount *ump;
struct unionfs_node *unp;
int error;
@@ -1177,8 +1397,6 @@ unionfs_rename(struct vop_rename_args *ap)
tdvp = ap->a_tdvp;
tvp = ap->a_tvp;
tcnp = ap->a_tcnp;
- ltdvp = NULLVP;
- ltvp = NULLVP;
td = curthread;
rfdvp = fdvp;
rfvp = fvp;
@@ -1238,7 +1456,6 @@ unionfs_rename(struct vop_rename_args *ap)
UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n",
fvp, unp->un_uppervp, unp->un_lowervp);
#endif
- ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount);
/*
* If we only have a lower vnode, copy the source file to the upper
* FS so that the rename operation can be issued against the upper FS.
@@ -1282,10 +1499,10 @@ unionfs_rename(struct vop_rename_args *ap)
else if (unp->un_uppervp == NULLVP) {
switch (fvp->v_type) {
case VREG:
- error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
+ error = unionfs_copyfile(fvp, 1, fcnp->cn_cred, td);
break;
case VDIR:
- error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
+ error = unionfs_mkshadowdir(fdvp, fvp, fcnp, td);
break;
default:
error = ENODEV;
@@ -1327,7 +1544,6 @@ unionfs_rename(struct vop_rename_args *ap)
goto unionfs_rename_abort;
}
rtdvp = unp->un_uppervp;
- ltdvp = unp->un_lowervp;
vref(rtdvp);
if (tvp != NULLVP) {
@@ -1348,7 +1564,6 @@ unionfs_rename(struct vop_rename_args *ap)
goto unionfs_rename_abort;
}
rtvp = unp->un_uppervp;
- ltvp = unp->un_lowervp;
vref(rtvp);
}
}
@@ -1365,12 +1580,8 @@ unionfs_rename(struct vop_rename_args *ap)
cache_purge(fdvp);
}
- if (ltdvp != NULLVP)
- VOP_UNLOCK(ltdvp);
if (tdvp != rtdvp)
vrele(tdvp);
- if (ltvp != NULLVP)
- VOP_UNLOCK(ltvp);
if (tvp != rtvp && tvp != NULLVP) {
if (rtvp == NULLVP)
vput(tvp);
@@ -1504,43 +1715,55 @@ unionfs_rmdir(struct vop_rmdir_args *ap)
if (uvp != NULLVP) {
if (lvp != NULLVP) {
+ /*
+ * We need to keep dvp and vp's upper vnodes locked
+ * going into the VOP_RMDIR() call, but the empty
+ * directory check also requires the lower vnode lock.
+ * For this third, cross-filesystem lock we use a
+ * similar approach taken by various FS' VOP_RENAME
+ * implementations (which require 2-4 vnode locks).
+ * First we attempt a NOWAIT acquisition, then if
+ * that fails we drops the other two vnode locks,
+ * acquire lvp's lock in the normal fashion to reduce
+ * the likelihood of spinning on it in the future,
+ * then drop, reacquire the other locks, and return
+ * ERELOOKUP to re-drive the lookup in case the dvp->
+ * vp relationship has changed.
+ */
+ if (vn_lock(lvp, LK_SHARED | LK_NOWAIT) != 0) {
+ VOP_UNLOCK(ap->a_vp);
+ VOP_UNLOCK(ap->a_dvp);
+ vn_lock(lvp, LK_SHARED | LK_RETRY);
+ VOP_UNLOCK(lvp);
+ vn_lock(ap->a_dvp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
+ return (ERELOOKUP);
+ }
error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td);
+ /*
+ * It's possible for a direct operation on the lower FS
+ * to make the lower directory non-empty after we drop
+ * the lock, but it's also possible for the upper-layer
+ * VOP_RMDIR to relock udvp/uvp which would lead to
+ * LOR if we kept lvp locked across that call.
+ */
+ VOP_UNLOCK(lvp);
if (error != 0)
return (error);
}
ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP)
- cnp->cn_flags |= DOWHITEOUT;
- /*
- * The relookup path will need to relock the parent dvp and
- * possibly the vp as well. Locking is expected to be done
- * in parent->child order; drop the lock on vp to avoid LOR
- * and potential recursion on vp's lock.
- * vp is expected to remain referenced during VOP_RMDIR(),
- * so vref/vrele should not be necessary here.
- */
- VOP_UNLOCK(ap->a_vp);
- VNPASS(vrefcnt(ap->a_vp) > 0, ap->a_vp);
- error = unionfs_relookup_for_delete(ap->a_dvp, cnp, td);
- vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
- /*
- * VOP_RMDIR is dispatched against udvp, so if uvp became
- * doomed while the lock was dropped above the target
- * filesystem may not be able to cope.
- */
- if (error == 0 && VN_IS_DOOMED(uvp))
- error = ENOENT;
- if (error == 0) {
- int udvp_lkflags, uvp_lkflags;
- unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
- uvp, &uvp_lkflags);
- error = VOP_RMDIR(udvp, uvp, cnp);
- unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
- ap->a_vp, uvp, uvp_lkflags);
- }
- } else if (lvp != NULLVP)
- error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td,
+ cnp->cn_flags |= (DOWHITEOUT | IGNOREWHITEOUT);
+ int udvp_lkflags, uvp_lkflags;
+ unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
+ uvp, &uvp_lkflags);
+ error = VOP_RMDIR(udvp, uvp, cnp);
+ unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
+ ap->a_vp, uvp, uvp_lkflags);
+ } else if (lvp != NULLVP) {
+ error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td,
unp->un_path, unp->un_pathlen);
+ }
if (error == 0) {
cache_purge(ap->a_dvp);
@@ -1613,6 +1836,7 @@ unionfs_readdir(struct vop_readdir_args *ap)
uint64_t *cookies_bk;
int error;
int eofflag;
+ int lkflags;
int ncookies_bk;
int uio_offset_bk;
enum unionfs_lkupgrade lkstatus;
@@ -1668,18 +1892,26 @@ unionfs_readdir(struct vop_readdir_args *ap)
/* upper only */
if (uvp != NULLVP && lvp == NULLVP) {
+ unionfs_forward_vop_start(uvp, &lkflags);
error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag,
ap->a_ncookies, ap->a_cookies);
- unsp->uns_readdir_status = 0;
+ if (unionfs_forward_vop_finish(vp, uvp, lkflags))
+ error = error ? error : ENOENT;
+ else
+ unsp->uns_readdir_status = 0;
goto unionfs_readdir_exit;
}
/* lower only */
if (uvp == NULLVP && lvp != NULLVP) {
+ unionfs_forward_vop_start(lvp, &lkflags);
error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
ap->a_ncookies, ap->a_cookies);
- unsp->uns_readdir_status = 2;
+ if (unionfs_forward_vop_finish(vp, lvp, lkflags))
+ error = error ? error : ENOENT;
+ else
+ unsp->uns_readdir_status = 2;
goto unionfs_readdir_exit;
}
@@ -1689,14 +1921,17 @@ unionfs_readdir(struct vop_readdir_args *ap)
*/
KASSERT(uvp != NULLVP, ("unionfs_readdir: null upper vp"));
KASSERT(lvp != NULLVP, ("unionfs_readdir: null lower vp"));
+
if (uio->uio_offset == 0)
unsp->uns_readdir_status = 0;
if (unsp->uns_readdir_status == 0) {
/* read upper */
+ unionfs_forward_vop_start(uvp, &lkflags);
error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag,
ap->a_ncookies, ap->a_cookies);
-
+ if (unionfs_forward_vop_finish(vp, uvp, lkflags) && error == 0)
+ error = ENOENT;
if (error != 0 || eofflag == 0)
goto unionfs_readdir_exit;
unsp->uns_readdir_status = 1;
@@ -1735,14 +1970,22 @@ unionfs_readdir(struct vop_readdir_args *ap)
uio->uio_offset = 0;
}
- if (lvp == NULLVP) {
- error = EBADF;
+ lvp = unionfs_lock_lvp(vp, &lkflags);
+ if (lvp == NULL) {
+ error = ENOENT;
goto unionfs_readdir_exit;
}
+
/* read lower */
error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
ap->a_ncookies, ap->a_cookies);
+
+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
+ if (unp == NULL && error == 0)
+ error = ENOENT;
+
+
/*
* We can't return an uio_offset of 0: this would trigger an
* infinite loop, because the next call to unionfs_readdir would
@@ -1907,96 +2150,49 @@ unionfs_print(struct vop_print_args *ap)
}
static int
-unionfs_get_llt_revlock(struct vnode *vp, int flags)
-{
- int revlock;
-
- revlock = 0;
-
- switch (flags & LK_TYPE_MASK) {
- case LK_SHARED:
- if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
- revlock = LK_UPGRADE;
- else
- revlock = LK_RELEASE;
- break;
- case LK_EXCLUSIVE:
- case LK_UPGRADE:
- revlock = LK_RELEASE;
- break;
- case LK_DOWNGRADE:
- revlock = LK_UPGRADE;
- break;
- default:
- break;
- }
-
- return (revlock);
-}
-
-/*
- * The state of an acquired lock is adjusted similarly to
- * the time of error generating.
- * flags: LK_RELEASE or LK_UPGRADE
- */
-static void
-unionfs_revlock(struct vnode *vp, int flags)
-{
- if (flags & LK_RELEASE)
- VOP_UNLOCK_FLAGS(vp, flags);
- else {
- /* UPGRADE */
- if (vn_lock(vp, flags) != 0)
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- }
-}
-
-static int
unionfs_lock(struct vop_lock1_args *ap)
{
struct unionfs_node *unp;
struct vnode *vp;
- struct vnode *uvp;
- struct vnode *lvp;
+ struct vnode *tvp;
int error;
int flags;
- int revlock;
- int interlock;
- int uhold;
+ bool lvp_locked;
- /*
- * TODO: rework the unionfs locking scheme.
- * It's not guaranteed to be safe to blindly lock two vnodes on
- * different mounts as is done here. Further, the entanglement
- * of locking both vnodes with the various options that can be
- * passed to VOP_LOCK() makes this code hard to reason about.
- * Instead, consider locking only the upper vnode, or the lower
- * vnode is the upper is not present, and taking separate measures
- * to lock both vnodes in the few cases when that is needed.
- */
error = 0;
- interlock = 1;
- uhold = 0;
flags = ap->a_flags;
vp = ap->a_vp;
if (LK_RELEASE == (flags & LK_TYPE_MASK) || !(flags & LK_TYPE_MASK))
return (VOP_UNLOCK_FLAGS(vp, flags | LK_RELEASE));
+unionfs_lock_restart:
+ /*
+ * We currently need the interlock here to ensure we can safely
+ * access the unionfs vnode's private data. We may be able to
+ * eliminate this extra locking by instead using vfs_smr_enter()
+ * and vn_load_v_data_smr() here in conjunction with an SMR UMA
+ * zone for unionfs nodes.
+ */
if ((flags & LK_INTERLOCK) == 0)
VI_LOCK(vp);
+ else
+ flags &= ~LK_INTERLOCK;
unp = VTOUNIONFS(vp);
- if (unp == NULL)
- goto unionfs_lock_null_vnode;
-
- KASSERT_UNIONFS_VNODE(ap->a_vp);
-
- lvp = unp->un_lowervp;
- uvp = unp->un_uppervp;
+ if (unp == NULL) {
+ VI_UNLOCK(vp);
+ ap->a_flags = flags;
+ return (vop_stdlock(ap));
+ }
- if ((revlock = unionfs_get_llt_revlock(vp, flags)) == 0)
- panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
+ if (unp->un_uppervp != NULL) {
+ tvp = unp->un_uppervp;
+ lvp_locked = false;
+ } else {
+ tvp = unp->un_lowervp;
+ lvp_locked = true;
+ }
/*
* During unmount, the root vnode lock may be taken recursively,
@@ -2009,150 +2205,77 @@ unionfs_lock(struct vop_lock1_args *ap)
(vp->v_vflag & VV_ROOT) != 0)
flags |= LK_CANRECURSE;
- if (lvp != NULLVP) {
- if (uvp != NULLVP && flags & LK_UPGRADE) {
+ vholdnz(tvp);
+ VI_UNLOCK(vp);
+ error = VOP_LOCK(tvp, flags);
+ vdrop(tvp);
+ if (error == 0 && (lvp_locked || VTOUNIONFS(vp) == NULL)) {
+ /*
+ * After dropping the interlock above, there exists a window
+ * in which another thread may acquire the lower vnode lock
+ * and then either doom the unionfs vnode or create an upper
+ * vnode. In either case, we will effectively be holding the
+ * wrong lock, so we must drop the lower vnode lock and
+ * restart the lock operation.
+ *
+ * If unp is not already NULL, we assume that we can safely
+ * access it because we currently hold lvp's lock.
+ * unionfs_noderem() acquires lvp's lock before freeing
+ * the vnode private data, ensuring it can't be concurrently
+ * freed while we are using it here. Likewise,
+ * unionfs_node_update() acquires lvp's lock before installing
+ * an upper vnode. Without those guarantees, we would need to
+ * reacquire the vnode interlock here.
+ * Note that unionfs_noderem() doesn't acquire lvp's lock if
+ * this is the root vnode, but the root vnode should always
+ * have an upper vnode and therefore we should never use its
+ * lower vnode lock here.
+ */
+ unp = VTOUNIONFS(vp);
+ if (unp == NULL || unp->un_uppervp != NULLVP) {
+ VOP_UNLOCK(tvp);
/*
- * Share Lock is once released and a deadlock is
- * avoided.
+ * If we previously held the lock, the upgrade may
+ * have temporarily dropped the lock, in which case
+ * concurrent dooming or copy-up will necessitate
+ * acquiring a different lock. Since we never held
+ * the new lock, LK_UPGRADE must be cleared here to
+ * avoid triggering a lockmgr panic.
*/
- vholdnz(uvp);
- uhold = 1;
- VOP_UNLOCK(uvp);
- }
- VI_LOCK_FLAGS(lvp, MTX_DUPOK);
- flags |= LK_INTERLOCK;
- vholdl(lvp);
-
- VI_UNLOCK(vp);
- ap->a_flags &= ~LK_INTERLOCK;
-
- error = VOP_LOCK(lvp, flags);
-
- VI_LOCK(vp);
- unp = VTOUNIONFS(vp);
- if (unp == NULL) {
- /* vnode is released. */
- VI_UNLOCK(vp);
- if (error == 0)
- VOP_UNLOCK(lvp);
- vdrop(lvp);
- if (uhold != 0)
- vdrop(uvp);
- goto unionfs_lock_fallback;
+ if (flags & LK_UPGRADE)
+ flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
+ VNASSERT((flags & LK_DOWNGRADE) == 0, vp,
+ ("%s: vnode doomed during downgrade", __func__));
+ goto unionfs_lock_restart;
}
}
- if (error == 0 && uvp != NULLVP) {
- if (uhold && flags & LK_UPGRADE) {
- flags &= ~LK_TYPE_MASK;
- flags |= LK_EXCLUSIVE;
- }
- VI_LOCK_FLAGS(uvp, MTX_DUPOK);
- flags |= LK_INTERLOCK;
- if (uhold == 0) {
- vholdl(uvp);
- uhold = 1;
- }
-
- VI_UNLOCK(vp);
- ap->a_flags &= ~LK_INTERLOCK;
-
- error = VOP_LOCK(uvp, flags);
-
- VI_LOCK(vp);
- unp = VTOUNIONFS(vp);
- if (unp == NULL) {
- /* vnode is released. */
- VI_UNLOCK(vp);
- if (error == 0)
- VOP_UNLOCK(uvp);
- vdrop(uvp);
- if (lvp != NULLVP) {
- VOP_UNLOCK(lvp);
- vdrop(lvp);
- }
- goto unionfs_lock_fallback;
- }
- if (error != 0 && lvp != NULLVP) {
- /* rollback */
- VI_UNLOCK(vp);
- unionfs_revlock(lvp, revlock);
- interlock = 0;
- }
- }
-
- if (interlock)
- VI_UNLOCK(vp);
- if (lvp != NULLVP)
- vdrop(lvp);
- if (uhold != 0)
- vdrop(uvp);
-
return (error);
-
-unionfs_lock_null_vnode:
- ap->a_flags |= LK_INTERLOCK;
- return (vop_stdlock(ap));
-
-unionfs_lock_fallback:
- /*
- * If we reach this point, we've discovered the unionfs vnode
- * has been reclaimed while the upper/lower vnode locks were
- * temporarily dropped. Such temporary droppage may happen
- * during the course of an LK_UPGRADE operation itself, and in
- * that case LK_UPGRADE must be cleared as the unionfs vnode's
- * lock has been reset to point to the standard v_lock field,
- * which has not previously been held.
- */
- if (flags & LK_UPGRADE) {
- ap->a_flags &= ~LK_TYPE_MASK;
- ap->a_flags |= LK_EXCLUSIVE;
- }
- return (vop_stdlock(ap));
}
static int
unionfs_unlock(struct vop_unlock_args *ap)
{
struct vnode *vp;
- struct vnode *lvp;
- struct vnode *uvp;
+ struct vnode *tvp;
struct unionfs_node *unp;
int error;
- int uhold;
KASSERT_UNIONFS_VNODE(ap->a_vp);
- error = 0;
- uhold = 0;
vp = ap->a_vp;
unp = VTOUNIONFS(vp);
if (unp == NULL)
- goto unionfs_unlock_null_vnode;
- lvp = unp->un_lowervp;
- uvp = unp->un_uppervp;
+ return (vop_stdunlock(ap));
- if (lvp != NULLVP) {
- vholdnz(lvp);
- error = VOP_UNLOCK(lvp);
- }
+ tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp);
- if (error == 0 && uvp != NULLVP) {
- vholdnz(uvp);
- uhold = 1;
- error = VOP_UNLOCK(uvp);
- }
+ vholdnz(tvp);
+ error = VOP_UNLOCK(tvp);
+ vdrop(tvp);
- if (lvp != NULLVP)
- vdrop(lvp);
- if (uhold != 0)
- vdrop(uvp);
-
- return error;
-
-unionfs_unlock_null_vnode:
- return (vop_stdunlock(ap));
+ return (error);
}
static int
@@ -2192,7 +2315,7 @@ unionfs_advlock(struct vop_advlock_args *ap)
uvp = unp->un_uppervp;
if (uvp == NULLVP) {
- error = unionfs_copyfile(unp, 1, td->td_ucred, td);
+ error = unionfs_copyfile(ap->a_vp, 1, td->td_ucred, td);
if (error != 0)
goto unionfs_advlock_abort;
uvp = unp->un_uppervp;
@@ -2294,7 +2417,7 @@ unionfs_setacl(struct vop_setacl_args *ap)
return (EROFS);
if (uvp == NULLVP && lvp->v_type == VREG) {
- if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
+ if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0)
return (error);
uvp = unp->un_uppervp;
}
@@ -2467,9 +2590,10 @@ unionfs_setextattr(struct vop_setextattr_args *ap)
if (ovp == lvp && lvp->v_type == VREG) {
VOP_CLOSEEXTATTR(lvp, 0, cred, td);
if (uvp == NULLVP &&
- (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
+ (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) {
unionfs_setextattr_reopen:
- if ((unp->un_flag & UNIONFS_OPENEXTL) &&
+ unp = VTOUNIONFS(ap->a_vp);
+ if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) &&
VOP_OPENEXTATTR(lvp, cred, td)) {
#ifdef DIAGNOSTIC
panic("unionfs: VOP_OPENEXTATTR failed");
@@ -2561,9 +2685,10 @@ unionfs_deleteextattr(struct vop_deleteextattr_args *ap)
if (ovp == lvp && lvp->v_type == VREG) {
VOP_CLOSEEXTATTR(lvp, 0, cred, td);
if (uvp == NULLVP &&
- (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
+ (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) {
unionfs_deleteextattr_reopen:
- if ((unp->un_flag & UNIONFS_OPENEXTL) &&
+ unp = VTOUNIONFS(ap->a_vp);
+ if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) &&
VOP_OPENEXTATTR(lvp, cred, td)) {
#ifdef DIAGNOSTIC
panic("unionfs: VOP_OPENEXTATTR failed");
@@ -2613,7 +2738,7 @@ unionfs_setlabel(struct vop_setlabel_args *ap)
return (EROFS);
if (uvp == NULLVP && lvp->v_type == VREG) {
- if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
+ if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0)
return (error);
uvp = unp->un_uppervp;
}
@@ -2665,7 +2790,7 @@ static int
unionfs_vput_pair(struct vop_vput_pair_args *ap)
{
struct mount *mp;
- struct vnode *dvp, *vp, **vpp, *lvp, *ldvp, *uvp, *udvp, *tempvp;
+ struct vnode *dvp, *vp, **vpp, *lvp, *uvp, *tvp, *tdvp, *tempvp;
struct unionfs_node *dunp, *unp;
int error, res;
@@ -2674,11 +2799,14 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
vp = NULLVP;
lvp = NULLVP;
uvp = NULLVP;
+ tvp = NULLVP;
unp = NULL;
dunp = VTOUNIONFS(dvp);
- udvp = dunp->un_uppervp;
- ldvp = dunp->un_lowervp;
+ if (dunp->un_uppervp != NULL)
+ tdvp = dunp->un_uppervp;
+ else
+ tdvp = dunp->un_lowervp;
/*
* Underlying vnodes should be locked because the encompassing unionfs
@@ -2686,10 +2814,7 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
* only be on the unionfs node. Reference them now so that the vput()s
* performed by VOP_VPUT_PAIR() will have a reference to drop.
*/
- if (udvp != NULLVP)
- vref(udvp);
- if (ldvp != NULLVP)
- vref(ldvp);
+ vref(tdvp);
if (vpp != NULL)
vp = *vpp;
@@ -2699,9 +2824,10 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
uvp = unp->un_uppervp;
lvp = unp->un_lowervp;
if (uvp != NULLVP)
- vref(uvp);
- if (lvp != NULLVP)
- vref(lvp);
+ tvp = uvp;
+ else
+ tvp = lvp;
+ vref(tvp);
/*
* If we're being asked to return a locked child vnode, then
@@ -2721,31 +2847,19 @@ unionfs_vput_pair(struct vop_vput_pair_args *ap)
}
}
- /*
- * TODO: Because unionfs_lock() locks both the lower and upper vnodes
- * (if available), we must also call VOP_VPUT_PAIR() on both the lower
- * and upper parent/child pairs. If unionfs_lock() is reworked to lock
- * only a single vnode, this code will need to change to also only
- * operate on one vnode pair.
- */
- ASSERT_VOP_LOCKED(ldvp, __func__);
- ASSERT_VOP_LOCKED(udvp, __func__);
- ASSERT_VOP_LOCKED(lvp, __func__);
- ASSERT_VOP_LOCKED(uvp, __func__);
-
- KASSERT(lvp == NULLVP || ldvp != NULLVP,
- ("%s: NULL ldvp with non-NULL lvp", __func__));
- if (ldvp != NULLVP)
- res = VOP_VPUT_PAIR(ldvp, lvp != NULLVP ? &lvp : NULL, true);
- KASSERT(uvp == NULLVP || udvp != NULLVP,
- ("%s: NULL udvp with non-NULL uvp", __func__));
- if (udvp != NULLVP)
- res = VOP_VPUT_PAIR(udvp, uvp != NULLVP ? &uvp : NULL, true);
-
- ASSERT_VOP_UNLOCKED(ldvp, __func__);
- ASSERT_VOP_UNLOCKED(udvp, __func__);
- ASSERT_VOP_UNLOCKED(lvp, __func__);
- ASSERT_VOP_UNLOCKED(uvp, __func__);
+ ASSERT_VOP_LOCKED(tdvp, __func__);
+ ASSERT_VOP_LOCKED(tvp, __func__);
+
+ if (tdvp == dunp->un_uppervp && tvp != NULLVP && tvp == lvp) {
+ vput(tvp);
+ vput(tdvp);
+ res = 0;
+ } else {
+ res = VOP_VPUT_PAIR(tdvp, tvp != NULLVP ? &tvp : NULL, true);
+ }
+
+ ASSERT_VOP_UNLOCKED(tdvp, __func__);
+ ASSERT_VOP_UNLOCKED(tvp, __func__);
/*
* VOP_VPUT_PAIR() dropped the references we added to the underlying