From 6d9daf7da2883bacd006132f8a1b6a7c388e191c Mon Sep 17 00:00:00 2001
From: Jakub Wojciech Klama <jceel@FreeBSD.org>
Date: Wed, 29 Apr 2020 16:24:32 +0000
Subject: Import lib9p 7ddb1164407da19b9b1afb83df83ae65a71a9a66.

Approved by:	trasz (mentor)
MFC after:	1 month
Sponsored by:	Conclusive Engineering
---
 .gitignore                 |   37 +
 COPYRIGHT                  |   47 +
 GNUmakefile                |   76 ++
 Makefile                   |   27 +
 README.md                  |   20 +
 apple_endian.h             |   27 +
 backend/backend.h          |   69 +
 backend/fs.c               | 3061 ++++++++++++++++++++++++++++++++++++++++++++
 backend/fs.h               |   37 +
 connection.c               |  215 ++++
 example/Makefile           |   10 +
 example/server.c           |   89 ++
 fcall.h                    |  624 +++++++++
 fid.h                      |  160 +++
 genacl.c                   |  720 +++++++++++
 genacl.h                   |  307 +++++
 hashtable.c                |  267 ++++
 hashtable.h                |  107 ++
 lib9p.h                    |  249 ++++
 lib9p_impl.h               |   78 ++
 linux_errno.h              |  247 ++++
 log.c                      |   67 +
 log.h                      |   46 +
 pack.c                     |  993 ++++++++++++++
 pytest/.gitignore          |    3 +
 pytest/Makefile            |    9 +
 pytest/README              |   32 +
 pytest/client.py           |  643 ++++++++++
 pytest/lerrno.py           |  291 +++++
 pytest/numalloc.py         |  379 ++++++
 pytest/p9conn.py           | 1788 ++++++++++++++++++++++++++
 pytest/p9err.py            |  146 +++
 pytest/pfod.py             |  204 +++
 pytest/protocol.py         | 1998 +++++++++++++++++++++++++++++
 pytest/sequencer.py        |  653 ++++++++++
 pytest/testconf.ini.sample |   16 +
 request.c                  | 1440 +++++++++++++++++++++
 rfuncs.c                   |  320 +++++
 rfuncs.h                   |   79 ++
 sbuf/sbuf.c                |  127 ++
 sbuf/sbuf.h                |   55 +
 threadpool.c               |  422 ++++++
 threadpool.h               |  118 ++
 transport/socket.c         |  363 ++++++
 transport/socket.h         |   40 +
 utils.c                    | 1268 ++++++++++++++++++
 46 files changed, 17974 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 COPYRIGHT
 create mode 100644 GNUmakefile
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100644 apple_endian.h
 create mode 100644 backend/backend.h
 create mode 100644 backend/fs.c
 create mode 100644 backend/fs.h
 create mode 100644 connection.c
 create mode 100644 example/Makefile
 create mode 100644 example/server.c
 create mode 100644 fcall.h
 create mode 100644 fid.h
 create mode 100644 genacl.c
 create mode 100644 genacl.h
 create mode 100644 hashtable.c
 create mode 100644 hashtable.h
 create mode 100644 lib9p.h
 create mode 100644 lib9p_impl.h
 create mode 100644 linux_errno.h
 create mode 100644 log.c
 create mode 100644 log.h
 create mode 100644 pack.c
 create mode 100644 pytest/.gitignore
 create mode 100644 pytest/Makefile
 create mode 100644 pytest/README
 create mode 100755 pytest/client.py
 create mode 100644 pytest/lerrno.py
 create mode 100644 pytest/numalloc.py
 create mode 100644 pytest/p9conn.py
 create mode 100644 pytest/p9err.py
 create mode 100644 pytest/pfod.py
 create mode 100644 pytest/protocol.py
 create mode 100644 pytest/sequencer.py
 create mode 100644 pytest/testconf.ini.sample
 create mode 100644 request.c
 create mode 100644 rfuncs.c
 create mode 100644 rfuncs.h
 create mode 100644 sbuf/sbuf.c
 create mode 100644 sbuf/sbuf.h
 create mode 100644 threadpool.c
 create mode 100644 threadpool.h
 create mode 100644 transport/socket.c
 create mode 100644 transport/socket.h
 create mode 100644 utils.c

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000000..beccfc6b48ec
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,37 @@
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+/build/
+
+*.po
+*.pico
+*.depend
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 000000000000..b02f09aabdd9
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,47 @@
+Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted providing that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+Some parts of the code are based on libixp (http://libs.suckless.org/libixp)
+library code released under following license:
+
+ÂŠ 2005-2006 Anselm R. Garbe <garbeam@gmail.com>
+ÂŠ 2006-2010 Kris Maglione <maglione.k at Gmail>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/GNUmakefile b/GNUmakefile
new file mode 100644
index 000000000000..79108f2ceb61
--- /dev/null
+++ b/GNUmakefile
@@ -0,0 +1,76 @@
+CC_VERSION := $(shell $(CC) --version | \
+    sed -n -e '/clang-/s/.*clang-\([0-9][0-9]*\).*/\1/p')
+ifeq ($(CC_VERSION),)
+# probably not clang
+CC_VERSION := 0
+endif
+
+WFLAGS :=
+
+# Warnings are version-dependent, unfortunately,
+# so test for version before adding a -W flag.
+# Note: gnu make requires $(shell test ...) for "a > b" type tests.
+ifeq ($(shell test $(CC_VERSION) -gt 0; echo $$?),0)
+WFLAGS += -Weverything
+WFLAGS += -Wno-padded
+WFLAGS += -Wno-gnu-zero-variadic-macro-arguments
+WFLAGS += -Wno-format-nonliteral
+WFLAGS += -Wno-unused-macros
+WFLAGS += -Wno-disabled-macro-expansion
+WFLAGS += -Werror
+endif
+
+ifeq ($(shell test $(CC_VERSION) -gt 600; echo $$?),0)
+WFLAGS += -Wno-reserved-id-macro
+endif
+
+CFLAGS := $(WFLAGS) \
+	-g \
+	-O0 \
+	-DL9P_DEBUG=L9P_DEBUG
+# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG,
+# and set env variable LIB9P_LOGGING to stderr or to
+# the (preferably full path name of) the debug log file.
+
+LIB_SRCS := \
+	pack.c \
+	connection.c \
+	request.c \
+	genacl.c \
+	log.c \
+	hashtable.c \
+	utils.c \
+	rfuncs.c \
+	threadpool.c \
+	sbuf/sbuf.c \
+	transport/socket.c \
+	backend/fs.c
+
+SERVER_SRCS := \
+	example/server.c
+
+BUILD_DIR := build
+LIB_OBJS := $(addprefix build/,$(LIB_SRCS:.c=.o))
+SERVER_OBJS := $(SERVER_SRCS:.c=.o)
+LIB := lib9p.dylib
+SERVER := server
+
+all: build $(LIB) $(SERVER)
+
+$(LIB): $(LIB_OBJS)
+	cc -dynamiclib $^ -o build/$@
+
+$(SERVER): $(SERVER_OBJS) $(LIB)
+	cc $< -o build/$(SERVER) -Lbuild/ -l9p
+
+clean:
+	rm -rf build
+	rm -f $(SERVER_OBJS)
+build:
+	mkdir build
+	mkdir build/sbuf
+	mkdir build/transport
+	mkdir build/backend
+
+build/%.o: %.c
+	$(CC) $(CFLAGS) -c $< -o $@
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000000..cb9f364329e5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG,
+# and set env variable LIB9P_LOGGING to stderr or to
+# the (preferably full path name of) the debug log file.
+
+LIB=		9p
+SHLIB_MAJOR=	1
+SRCS=		pack.c \
+		connection.c \
+		request.c log.c \
+		hashtable.c \
+		genacl.c \
+		utils.c \
+		rfuncs.c \
+		threadpool.c \
+		transport/socket.c \
+		backend/fs.c
+
+INCS=		lib9p.h
+CC=		clang
+CFLAGS=		-g -O0 -DL9P_DEBUG=L9P_DEBUG -DWITH_CASPER
+LIBADD=		sbuf libcasper libcap_pwd libcap_grp
+SUBDIR=		example
+
+cscope: .PHONY
+	cd ${.CURDIR}; cscope -buq $$(find . -name '*.[ch]' -print)
+
+.include <bsd.lib.mk>
diff --git a/README.md b/README.md
new file mode 100644
index 000000000000..f74a978fb52b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,20 @@
+# lib9p
+
+lib9p is a server library implementing 9p2000, 9p2000.u and 9p2000.L revisions
+of 9P protocol. It is being developed primarily as a backend for virtio-9p in
+BHyVe, the FreeBSD hypervisor.
+
+# Features
+
+* 9p2000, 9p2000.u and 9p2000.L protocol support
+* Built-in TCP transport
+
+# Supported operating systems
+
+* FreeBSD (>=10)
+* macOS (>=10.9)
+
+# Authors
+
+* Jakub Klama [jceel](https://github.com/jceel)
+* Chris Torek [chris3torek](https://github.com/chris3torek)
diff --git a/apple_endian.h b/apple_endian.h
new file mode 100644
index 000000000000..d061b643b8f2
--- /dev/null
+++ b/apple_endian.h
@@ -0,0 +1,27 @@
+#ifndef _APPLE_ENDIAN_H
+#define _APPLE_ENDIAN_H
+
+/*
+ * Shims to make Apple's endian headers and macros compatible
+ * with <sys/endian.h> (which is awful).
+ */
+
+# include <libkern/OSByteOrder.h>
+
+# define _LITTLE_ENDIAN 0x12345678
+# define _BIG_ENDIAN 0x87654321
+
+# ifdef __LITTLE_ENDIAN__
+#  define _BYTE_ORDER _LITTLE_ENDIAN
+# endif
+# ifdef __BIG_ENDIAN__
+#  define _BYTE_ORDER _BIG_ENDIAN
+# endif
+
+# define htole32(x)	OSSwapHostToLittleInt32(x)
+# define le32toh(x)	OSSwapLittleToHostInt32(x)
+
+# define htobe32(x)	OSSwapHostToBigInt32(x)
+# define be32toh(x)	OSSwapBigToHostInt32(x)
+
+#endif /* _APPLE_ENDIAN_H */
diff --git a/backend/backend.h b/backend/backend.h
new file mode 100644
index 000000000000..2b4bf2d8e480
--- /dev/null
+++ b/backend/backend.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_BACKEND_H
+#define LIB9P_BACKEND_H
+
+struct l9p_backend {
+	void *softc;
+	void (*freefid)(void *, struct l9p_fid *);
+	int (*attach)(void *, struct l9p_request *);
+	int (*clunk)(void *, struct l9p_fid *);
+	int (*create)(void *, struct l9p_request *);
+	int (*open)(void *, struct l9p_request *);
+	int (*read)(void *, struct l9p_request *);
+	int (*remove)(void *, struct l9p_fid *);
+	int (*stat)(void *, struct l9p_request *);
+	int (*walk)(void *, struct l9p_request *);
+	int (*write)(void *, struct l9p_request *);
+	int (*wstat)(void *, struct l9p_request *);
+	int (*statfs)(void *, struct l9p_request *);
+	int (*lopen)(void *, struct l9p_request *);
+	int (*lcreate)(void *, struct l9p_request *);
+	int (*symlink)(void *, struct l9p_request *);
+	int (*mknod)(void *, struct l9p_request *);
+	int (*rename)(void *, struct l9p_request *);
+	int (*readlink)(void *, struct l9p_request *);
+	int (*getattr)(void *, struct l9p_request *);
+	int (*setattr)(void *, struct l9p_request *);
+	int (*xattrwalk)(void *, struct l9p_request *);
+	int (*xattrcreate)(void *, struct l9p_request *);
+	int (*xattrread)(void *, struct l9p_request *);
+	int (*xattrwrite)(void *, struct l9p_request *);
+	int (*xattrclunk)(void *, struct l9p_fid *);
+	int (*readdir)(void *, struct l9p_request *);
+	int (*fsync)(void *, struct l9p_request *);
+	int (*lock)(void *, struct l9p_request *);
+	int (*getlock)(void *, struct l9p_request *);
+	int (*link)(void *, struct l9p_request *);
+	int (*mkdir)(void *, struct l9p_request *);
+	int (*renameat)(void *, struct l9p_request *);
+	int (*unlinkat)(void *, struct l9p_request *);
+};
+
+#endif  /* LIB9P_BACKEND_H */
diff --git a/backend/fs.c b/backend/fs.c
new file mode 100644
index 000000000000..474de756734e
--- /dev/null
+++ b/backend/fs.c
@@ -0,0 +1,3061 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: Š2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <dirent.h>
+#include <pwd.h>
+#include <grp.h>
+#include <libgen.h>
+#include <pthread.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../fid.h"
+#include "../log.h"
+#include "../rfuncs.h"
+#include "../genacl.h"
+#include "backend.h"
+#include "fs.h"
+
+#if defined(WITH_CASPER)
+  #include <libcasper.h>
+  #include <casper/cap_pwd.h>
+  #include <casper/cap_grp.h>
+#endif
+
+#if defined(__FreeBSD__)
+  #include <sys/param.h>
+  #if __FreeBSD_version >= 1000000
+    #define	HAVE_BINDAT
+  #endif
+#endif
+
+#if defined(__FreeBSD__)
+  #define	HAVE_BIRTHTIME
+#endif
+
+#if defined(__APPLE__)
+  #include <sys/syscall.h>
+  #include "Availability.h"
+  #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
+#endif
+
+struct fs_softc {
+	int 	fs_rootfd;
+	bool	fs_readonly;
+#if defined(WITH_CASPER)
+	cap_channel_t *fs_cappwd;
+	cap_channel_t *fs_capgrp;
+#endif
+};
+
+struct fs_fid {
+	DIR	*ff_dir;
+	int	ff_dirfd;
+	int	ff_fd;
+	int	ff_flags;
+	char	*ff_name;
+	struct fs_authinfo *ff_ai;
+	pthread_mutex_t ff_mtx;
+	struct l9p_acl *ff_acl; /* cached ACL if any */
+};
+
+#define	FF_NO_NFSV4_ACL	0x01	/* don't go looking for NFSv4 ACLs */
+/*	FF_NO_POSIX_ACL	0x02	-- not yet */
+
+/*
+ * Our authinfo consists of:
+ *
+ *  - a reference count
+ *  - a uid
+ *  - a gid-set
+ *
+ * The "default" gid is the first gid in the git-set, provided the
+ * set size is at least 1.  The set-size may be zero, though.
+ *
+ * Adjustments to the ref-count must be atomic, once it's shared.
+ * It would be nice to use C11 atomics here but they are not common
+ * enough to all systems just yet; for now, we use a mutex.
+ *
+ * Note that some ops (Linux style ones) pass an effective gid for
+ * the op, in which case, that gid may override.  To achieve this
+ * effect, permissions testing functions also take an extra gid.
+ * If this gid is (gid_t)-1 it is not used and only the remaining
+ * gids take part.
+ *
+ * The uid may also be (uid_t)-1, meaning "no uid was available
+ * at all at attach time".  In this case, new files inherit parent
+ * directory uids.
+ *
+ * The refcount is simply the number of "openfile"s using this
+ * authinfo (so that when the last ref goes away, we can free it).
+ *
+ * There are also master ACL flags (same as in ff_flags).
+ */
+struct fs_authinfo {
+	pthread_mutex_t ai_mtx;	/* lock for refcnt */
+	uint32_t ai_refcnt;
+	int	ai_flags;
+	uid_t	ai_uid;
+	int	ai_ngids;
+	gid_t	ai_gids[];	/* NB: flexible array member */
+};
+
+/*
+ * We have a global-static mutex for single-threading Tattach
+ * requests, which use getpwnam (and indirectly, getgr* functions)
+ * which are not reentrant.
+ */
+static bool fs_attach_mutex_inited;
+static pthread_mutex_t fs_attach_mutex;
+
+/*
+ * Internal functions (except inline functions).
+ */
+static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
+static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
+static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
+static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
+    struct stat *st);
+static int fs_dpf(char *, char *, size_t);
+static int fs_oflags_dotu(int, int *);
+static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
+static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
+    struct stat *, uid_t *, gid_t *);
+static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
+static void dostat(struct fs_softc *, struct l9p_stat *, char *,
+    struct stat *, bool dotu);
+static void dostatfs(struct l9p_statfs *, struct statfs *, long);
+static void fillacl(struct fs_fid *ff);
+static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
+static void dropacl(struct fs_fid *ff);
+static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
+    const char *path);
+static int check_access(int32_t,
+    struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
+    struct fs_authinfo *, gid_t);
+static void generate_qid(struct stat *, struct l9p_qid *);
+
+static int fs_icreate(void *, struct l9p_fid *, char *, int,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
+    gid_t, struct stat *);
+static int fs_imkdir(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_imkfifo(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_imknod(void *, struct l9p_fid *, char *,
+    bool, mode_t, dev_t, gid_t, struct stat *);
+static int fs_imksocket(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
+    gid_t, struct stat *);
+
+/*
+ * Internal functions implementing backend.
+ */
+static int fs_attach(void *, struct l9p_request *);
+static int fs_clunk(void *, struct l9p_fid *);
+static int fs_create(void *, struct l9p_request *);
+static int fs_open(void *, struct l9p_request *);
+static int fs_read(void *, struct l9p_request *);
+static int fs_remove(void *, struct l9p_fid *);
+static int fs_stat(void *, struct l9p_request *);
+static int fs_walk(void *, struct l9p_request *);
+static int fs_write(void *, struct l9p_request *);
+static int fs_wstat(void *, struct l9p_request *);
+static int fs_statfs(void *, struct l9p_request *);
+static int fs_lopen(void *, struct l9p_request *);
+static int fs_lcreate(void *, struct l9p_request *);
+static int fs_symlink(void *, struct l9p_request *);
+static int fs_mknod(void *, struct l9p_request *);
+static int fs_rename(void *, struct l9p_request *);
+static int fs_readlink(void *, struct l9p_request *);
+static int fs_getattr(void *, struct l9p_request *);
+static int fs_setattr(void *, struct l9p_request *);
+static int fs_xattrwalk(void *, struct l9p_request *);
+static int fs_xattrcreate(void *, struct l9p_request *);
+static int fs_readdir(void *, struct l9p_request *);
+static int fs_fsync(void *, struct l9p_request *);
+static int fs_lock(void *, struct l9p_request *);
+static int fs_getlock(void *, struct l9p_request *);
+static int fs_link(void *, struct l9p_request *);
+static int fs_renameat(void *, struct l9p_request *);
+static int fs_unlinkat(void *, struct l9p_request *);
+static void fs_freefid(void *, struct l9p_fid *);
+
+/*
+ * Convert from 9p2000 open/create mode to Unix-style O_* flags.
+ * This includes 9p2000.u extensions, but not 9p2000.L protocol,
+ * which has entirely different open, create, etc., flag bits.
+ *
+ * The <mode> given here is the one-byte (uint8_t) "mode"
+ * argument to Tcreate or Topen, so it can have at most 8 bits.
+ *
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * both say:
+ *
+ *   The [low two bits of the] mode field determines the
+ *   type of I/O ... [I]f mode has the OTRUNC (0x10) bit
+ *   set, the file is to be truncated, which requires write
+ *   permission ...; if the mode has the ORCLOSE (0x40) bit
+ *   set, the file is to be removed when the fid is clunked,
+ *   which requires permission to remove the file from its
+ *   directory.  All other bits in mode should be zero.  It
+ *   is illegal to write a directory, truncate it, or
+ *   attempt to remove it on close.
+ *
+ * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
+ * The fcall.h header defines OCEXEC (0x20) as well, but it makes
+ * no sense to send this to a server.  There seem to be no bits
+ * 0x04 and 0x08.
+ *
+ * We always turn on O_NOCTTY since as a server, we never want
+ * to gain a controlling terminal.  We always turn on O_NOFOLLOW
+ * for reasons described elsewhere.
+ */
+static int
+fs_oflags_dotu(int mode, int *aflags)
+{
+	int flags;
+#define	CONVERT(theirs, ours) \
+	do { \
+		if (mode & (theirs)) { \
+			mode &= ~(theirs); \
+			flags |= ours; \
+		} \
+	} while (0)
+
+	switch (mode & L9P_OACCMODE) {
+
+	case L9P_OREAD:
+	default:
+		flags = O_RDONLY;
+		break;
+
+	case L9P_OWRITE:
+		flags = O_WRONLY;
+		break;
+
+	case L9P_ORDWR:
+		flags = O_RDWR;
+		break;
+
+	case L9P_OEXEC:
+		if (mode & L9P_OTRUNC)
+			return (EINVAL);
+		flags = O_RDONLY;
+		break;
+	}
+
+	flags |= O_NOCTTY | O_NOFOLLOW;
+
+	CONVERT(L9P_OTRUNC, O_TRUNC);
+
+	/*
+	 * Now take away some flags locally:
+	 *   the access mode (already translated)
+	 *   ORCLOSE - caller only
+	 *   OCEXEC - makes no sense in server
+	 *   ODIRECT - not applicable here
+	 * If there are any flag bits left after this,
+	 * we were unable to translate them.  For now, let's
+	 * treat this as EINVAL so that we can catch problems.
+	 */
+	mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
+	if (mode != 0) {
+		L9P_LOG(L9P_INFO,
+		    "fs_oflags_dotu: untranslated bits: %#x",
+		    (unsigned)mode);
+		return (EINVAL);
+	}
+
+	*aflags = flags;
+	return (0);
+#undef CONVERT
+}
+
+/*
+ * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
+ * See fs_oflags_dotu above.
+ *
+ * Linux currently does not have open-for-exec, but there is a
+ * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
+ *
+ * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
+ */
+static int
+fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
+{
+	int flags;
+	enum l9p_omode p9;
+#define	CLEAR(theirs)	l_mode &= ~(uint32_t)(theirs)
+#define	CONVERT(theirs, ours) \
+	do { \
+		if (l_mode & (theirs)) { \
+			CLEAR(theirs); \
+			flags |= ours; \
+		} \
+	} while (0)
+
+	/*
+	 * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
+	 */
+	flags = l_mode & O_ACCMODE;
+	if (flags == 3)
+		return (EINVAL);
+	CLEAR(O_ACCMODE);
+
+	if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
+		    (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
+		CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
+		p9 = L9P_OEXEC;
+	} else {
+		/*
+		 * Slightly dirty, but same dirt, really, as
+		 * setting flags from l_mode & O_ACCMODE.
+		 */
+		p9 = (enum l9p_omode)flags;	/* slightly dirty */
+	}
+
+	/* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
+	if (l_mode & L9P_L_O_TRUNC)
+		p9 |= L9P_OTRUNC;	/* but don't CLEAR yet */
+
+	flags |= O_NOCTTY | O_NOFOLLOW;
+
+	/*
+	 * L_O_CREAT seems to be noise, since we get separate open
+	 * and create.  But it is actually set sometimes.  We just
+	 * throw it out here; create ops must set it themselves and
+	 * open ops have no permissions bits and hence cannot create.
+	 *
+	 * L_O_EXCL does make sense on create ops, i.e., we can
+	 * take a create op with or without L_O_EXCL.  We pass that
+	 * through.
+	 */
+	CLEAR(L9P_L_O_CREAT);
+	CONVERT(L9P_L_O_EXCL, O_EXCL);
+	CONVERT(L9P_L_O_TRUNC, O_TRUNC);
+	CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
+	CONVERT(L9P_L_O_APPEND, O_APPEND);
+	CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
+
+	/*
+	 * Discard these as useless noise at our (server) end.
+	 * (NOATIME might be useful but we can only set it on a
+	 * per-mount basis.)
+	 */
+	CLEAR(L9P_L_O_CLOEXEC);
+	CLEAR(L9P_L_O_DIRECT);
+	CLEAR(L9P_L_O_DSYNC);
+	CLEAR(L9P_L_O_FASYNC);
+	CLEAR(L9P_L_O_LARGEFILE);
+	CLEAR(L9P_L_O_NOATIME);
+	CLEAR(L9P_L_O_NOCTTY);
+	CLEAR(L9P_L_O_NOFOLLOW);
+	CLEAR(L9P_L_O_SYNC);
+
+	if (l_mode != 0) {
+		L9P_LOG(L9P_INFO,
+		    "fs_oflags_dotl: untranslated bits: %#x",
+		    (unsigned)l_mode);
+		return (EINVAL);
+	}
+
+	*aflags = flags;
+	*ap9 = p9;
+	return (0);
+#undef CLEAR
+#undef CONVERT
+}
+
+static struct passwd *
+fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+	return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
+#else
+	(void)sc;
+	return (r_getpwuid(uid, pg));
+#endif
+}
+
+static struct group *
+fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+	return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
+#else
+	(void)sc;
+	return (r_getgrgid(gid, pg));
+#endif
+}
+
+/*
+ * Build full name of file by appending given name to directory name.
+ */
+static int
+fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
+{
+	struct fs_fid *dirf = dir->lo_aux;
+	size_t dlen, nlen1;
+
+	assert(dirf != NULL);
+	dlen = strlen(dirf->ff_name);
+	nlen1 = strlen(name) + 1;	/* +1 for '\0' */
+	if (dlen + 1 + nlen1 > size)
+		return (ENAMETOOLONG);
+	memcpy(buf, dirf->ff_name, dlen);
+	buf[dlen] = '/';
+	memcpy(buf + dlen + 1, name, nlen1);
+	return (0);
+}
+
+/*
+ * Build parent name of file by splitting it off.  Return an error
+ * if the given fid represents the root, so that there is no such
+ * parent, or if the discovered parent is not a directory.
+ */
+static int
+fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
+    size_t size, struct stat *st)
+{
+	struct fs_fid *ff;
+	char *path;
+
+	ff = fid->lo_aux;
+	assert(ff != NULL);
+	path = ff->ff_name;
+	path = r_dirname(path, buf, size);
+	if (path == NULL)
+		return (ENAMETOOLONG);
+	if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(st->st_mode))
+		return (ENOTDIR);
+	return (0);
+}
+
+/*
+ * Like fs_buildname() but for adding a file name to a buffer
+ * already holding a directory name.  Essentially does
+ *     strcat(dbuf, "/");
+ *     strcat(dbuf, fname);
+ * but with size checking and an ENAMETOOLONG error as needed.
+ *
+ * (Think of the function name as "directory plus-equals file".)
+ */
+static int
+fs_dpf(char *dbuf, char *fname, size_t size)
+{
+	size_t dlen, nlen1;
+
+	dlen = strlen(dbuf);
+	nlen1 = strlen(fname) + 1;
+	if (dlen + 1 + nlen1 > size)
+		return (ENAMETOOLONG);
+	dbuf[dlen] = '/';
+	memcpy(dbuf + dlen + 1, fname, nlen1);
+	return (0);
+}
+
+/*
+ * Prepare to create a new directory entry (open with O_CREAT,
+ * mkdir, etc -- any operation that creates a new inode),
+ * operating in parent data <dir>, based on authinfo <ai> and
+ * effective gid <egid>.
+ *
+ * The new entity should be owned by user/group <*nuid, *ngid>,
+ * if it's really a new entity.  It will be a directory if isdir.
+ *
+ * Returns an error number if the entry should not be created
+ * (e.g., read-only file system or no permission to write in
+ * parent directory).  Always sets *nuid and *ngid on success:
+ * in the worst case, when there is no available ID, this will
+ * use the parent directory's IDs.  Fills in <*st> on success.
+ */
+static int
+fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
+    struct stat *st, uid_t *nuid, gid_t *ngid)
+{
+	struct fs_fid *dirf;
+	struct fs_authinfo *ai;
+	int32_t op;
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+	dirf = dir->lo_aux;
+	assert(dirf != NULL);
+	if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(st->st_mode))
+		return (ENOTDIR);
+	dirf = dir->lo_aux;
+	ai = dirf->ff_ai;
+	fillacl(dirf);
+	op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
+	if (error)
+		return (EPERM);
+
+	*nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
+	*ngid = egid != (gid_t)-1 ? egid :
+	    ai->ai_ngids > 0 ?  ai->ai_gids[0] : st->st_gid;
+	return (0);
+}
+
+/*
+ * Allocate new open-file data structure to attach to a fid.
+ *
+ * The new file's authinfo is the same as the old one's, and
+ * we gain a reference.
+ */
+static struct fs_fid *
+open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
+{
+	struct fs_fid *ret;
+	uint32_t newcount;
+	int error;
+
+	ret = l9p_calloc(1, sizeof(*ret));
+	error = pthread_mutex_init(&ret->ff_mtx, NULL);
+	if (error) {
+		free(ret);
+		return (NULL);
+	}
+	ret->ff_fd = -1;
+	ret->ff_dirfd = dirfd;
+	ret->ff_name = strdup(path);
+	if (ret->ff_name == NULL) {
+		pthread_mutex_destroy(&ret->ff_mtx);
+		free(ret);
+		return (NULL);
+	}
+	pthread_mutex_lock(&ai->ai_mtx);
+	newcount = ++ai->ai_refcnt;
+	pthread_mutex_unlock(&ai->ai_mtx);
+	/*
+	 * If we just incremented the count to 1, we're the *first*
+	 * reference.  This is only allowed when creating the authinfo,
+	 * otherwise it means something has gone wrong.  This cannot
+	 * catch every bad (re)use of a freed authinfo but it may catch
+	 * a few.
+	 */
+	assert(newcount > 1 || creating);
+	L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+	    (void *)ai, (u_long)newcount);
+	ret->ff_ai = ai;
+	return (ret);
+}
+
+static void
+dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
+    struct stat *buf, bool dotu)
+{
+	struct passwd *user;
+	struct group *group;
+
+	memset(s, 0, sizeof(struct l9p_stat));
+
+	generate_qid(buf, &s->qid);
+
+	s->type = 0;
+	s->dev = 0;
+	s->mode = buf->st_mode & 0777;
+
+	if (S_ISDIR(buf->st_mode))
+		s->mode |= L9P_DMDIR;
+
+	if (S_ISLNK(buf->st_mode) && dotu)
+		s->mode |= L9P_DMSYMLINK;
+
+	if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
+		s->mode |= L9P_DMDEVICE;
+
+	if (S_ISSOCK(buf->st_mode))
+		s->mode |= L9P_DMSOCKET;
+
+	if (S_ISFIFO(buf->st_mode))
+		s->mode |= L9P_DMNAMEDPIPE;
+
+	s->atime = (uint32_t)buf->st_atime;
+	s->mtime = (uint32_t)buf->st_mtime;
+	s->length = (uint64_t)buf->st_size;
+
+	s->name = r_basename(name, NULL, 0);
+
+	if (!dotu) {
+		struct r_pgdata udata, gdata;
+
+		user = fs_getpwuid(sc, buf->st_uid, &udata);
+		group = fs_getgrgid(sc, buf->st_gid, &gdata);
+		s->uid = user != NULL ? strdup(user->pw_name) : NULL;
+		s->gid = group != NULL ? strdup(group->gr_name) : NULL;
+		s->muid = user != NULL ? strdup(user->pw_name) : NULL;
+		r_pgfree(&udata);
+		r_pgfree(&gdata);
+	} else {
+		/*
+		 * When using 9P2000.u, we don't need to bother about
+		 * providing user and group names in textual form.
+		 *
+		 * NB: if the asprintf()s fail, s->extension should
+		 * be unset so we can ignore these.
+		 */
+		s->n_uid = buf->st_uid;
+		s->n_gid = buf->st_gid;
+		s->n_muid = buf->st_uid;
+
+		if (S_ISLNK(buf->st_mode)) {
+			char target[MAXPATHLEN];
+			ssize_t ret = readlink(name, target, MAXPATHLEN);
+
+			if (ret < 0) {
+				s->extension = NULL;
+				return;
+			}
+
+			s->extension = strndup(target, (size_t)ret);
+		}
+
+		if (S_ISBLK(buf->st_mode)) {
+			asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
+			    minor(buf->st_rdev));
+		}
+
+		if (S_ISCHR(buf->st_mode)) {
+			asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
+			    minor(buf->st_rdev));
+		}
+	}
+}
+
+static void dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
+{
+
+	out->type = L9P_FSTYPE;
+	out->bsize = in->f_bsize;
+	out->blocks = in->f_blocks;
+	out->bfree = in->f_bfree;
+	out->bavail = in->f_bavail;
+	out->files = in->f_files;
+	out->ffree = in->f_ffree;
+	out->namelen = (uint32_t)namelen;
+	out->fsid = ((uint64_t)in->f_fsid.val[0] << 32) |
+	    (uint64_t)in->f_fsid.val[1];
+}
+
+static void
+generate_qid(struct stat *buf, struct l9p_qid *qid)
+{
+	qid->path = buf->st_ino;
+	qid->version = 0;
+
+	if (S_ISREG(buf->st_mode))
+		qid->type |= L9P_QTFILE;
+
+	if (S_ISDIR(buf->st_mode))
+		qid->type |= L9P_QTDIR;
+
+	if (S_ISLNK(buf->st_mode))
+		qid->type |= L9P_QTSYMLINK;
+}
+
+/*
+ * Fill in ff->ff_acl if it's not set yet.  Skip if the "don't use
+ * ACLs" flag is set, and use the flag to remember failure so
+ * we don't bother retrying either.
+ */
+static void
+fillacl(struct fs_fid *ff)
+{
+
+	if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
+		ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
+		if (ff->ff_acl == NULL)
+			ff->ff_flags |= FF_NO_NFSV4_ACL;
+	}
+}
+
+/*
+ * Get an ACL given fd and/or path name.  We check for the "don't get
+ * ACL" flag in the given ff_fid data structure first, but don't set
+ * the flag here.  The fillacl() code is similar but will set the
+ * flag; it also uses the ff_fd and ff_name directly.
+ *
+ * (This is used to get ACLs for parent directories, for instance.)
+ */
+static struct l9p_acl *
+getacl(struct fs_fid *ff, int fd, const char *path)
+{
+
+	if (ff->ff_flags & FF_NO_NFSV4_ACL)
+		return (NULL);
+	return look_for_nfsv4_acl(ff, fd, path);
+}
+
+/*
+ * Drop cached ff->ff_acl, e.g., after moving from one directory to
+ * another, where inherited ACLs might change.
+ */
+static void
+dropacl(struct fs_fid *ff)
+{
+
+	l9p_acl_free(ff->ff_acl);
+	ff->ff_acl = NULL;
+	ff->ff_flags = ff->ff_ai->ai_flags;
+}
+
+/*
+ * Check to see if we can find NFSv4 ACLs for the given file.
+ * If we have an open fd, we can use that, otherwise we need
+ * to use the path.
+ */
+static struct l9p_acl *
+look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
+{
+	struct l9p_acl *acl;
+	acl_t sysacl;
+	int doclose = 0;
+
+	if (fd < 0) {
+		fd = openat(ff->ff_dirfd, path, 0);
+		doclose = 1;
+	}
+
+	sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
+	if (sysacl == NULL) {
+		/*
+		 * EINVAL means no NFSv4 ACLs apply for this file.
+		 * Other error numbers indicate some kind of problem.
+		 */
+		if (errno != EINVAL) {
+			L9P_LOG(L9P_ERROR,
+			    "error retrieving NFSv4 ACL from "
+			    "fdesc %d (%s): %s", fd,
+			    path, strerror(errno));
+		}
+
+		if (doclose)
+			close(fd);
+
+		return (NULL);
+	}
+#if defined(HAVE_FREEBSD_ACLS)
+	acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
+#else
+	acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
+#endif
+	acl_free(sysacl);
+
+	if (doclose)
+		close(fd);
+
+	return (acl);
+}
+
+/*
+ * Verify that the user whose authinfo is in <ai> and effective
+ * group ID is <egid> ((gid_t)-1 means no egid supplied) has
+ * permission to do something.
+ *
+ * The "something" may be rather complex: we allow NFSv4 style
+ * operation masks here, and provide parent and child ACLs and
+ * stat data.  At most one of pacl+pst and cacl+cst can be NULL,
+ * unless ACLs are not supported; then pacl and cacl can both
+ * be NULL but pst or cst must be non-NULL depending on the
+ * operation.
+ */
+static int
+check_access(int32_t opmask,
+    struct l9p_acl *pacl, struct stat *pst,
+    struct l9p_acl *cacl, struct stat *cst,
+    struct fs_authinfo *ai, gid_t egid)
+{
+	struct l9p_acl_check_args args;
+
+	/*
+	 * If we have ACLs, use them exclusively, ignoring Unix
+	 * permissions.  Otherwise, fall back on stat st_mode
+	 * bits, and allow super-user as well.
+	 */
+	args.aca_uid = ai->ai_uid;
+	args.aca_gid = egid;
+	args.aca_groups = ai->ai_gids;
+	args.aca_ngroups = (size_t)ai->ai_ngids;
+	args.aca_parent = pacl;
+	args.aca_pstat = pst;
+	args.aca_child = cacl;
+	args.aca_cstat = cst;
+	args.aca_aclmode = pacl == NULL && cacl == NULL
+	    ? L9P_ACM_STAT_MODE
+	    : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
+
+	args.aca_superuser = true;
+	return (l9p_acl_check_access(opmask, &args));
+}
+
+static int
+fs_attach(void *softc, struct l9p_request *req)
+{
+	struct fs_authinfo *ai;
+	struct fs_softc *sc = (struct fs_softc *)softc;
+	struct fs_fid *file;
+	struct passwd *pwd;
+	struct stat st;
+	struct r_pgdata udata;
+	uint32_t n_uname;
+	gid_t *gids;
+	uid_t uid;
+	int error;
+	int ngroups;
+
+	assert(req->lr_fid != NULL);
+
+	/*
+	 * Single-thread pwd/group related items.  We have a reentrant
+	 * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
+	 * may use non-reentrant C library getgr* routines.
+	 */
+	pthread_mutex_lock(&fs_attach_mutex);
+
+	n_uname = req->lr_req.tattach.n_uname;
+	if (n_uname != L9P_NONUNAME) {
+		uid = (uid_t)n_uname;
+		pwd = fs_getpwuid(sc, uid, &udata);
+		if (pwd == NULL)
+			L9P_LOG(L9P_DEBUG,
+			    "Tattach: uid %ld: no such user", (long)uid);
+	} else {
+		uid = (uid_t)-1;
+#if defined(WITH_CASPER)
+		pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
+#else
+		pwd = getpwnam(req->lr_req.tattach.uname);
+#endif
+		if (pwd == NULL)
+			L9P_LOG(L9P_DEBUG,
+			    "Tattach: %s: no such user",
+			    req->lr_req.tattach.uname);
+	}
+
+	/*
+	 * If caller didn't give a numeric UID, pick it up from pwd
+	 * if possible.  If that doesn't work we can't continue.
+	 *
+	 * Note that pwd also supplies the group set.  This assumes
+	 * the server has the right mapping; this needs improvement.
+	 * We do at least support ai->ai_ngids==0 properly now though.
+	 */
+	if (uid == (uid_t)-1 && pwd != NULL)
+		uid = pwd->pw_uid;
+	if (uid == (uid_t)-1)
+		error = EPERM;
+	else {
+		error = 0;
+		if (fstat(sc->fs_rootfd, &st) != 0)
+			error = errno;
+		else if (!S_ISDIR(st.st_mode))
+			error = ENOTDIR;
+	}
+	if (error) {
+		pthread_mutex_unlock(&fs_attach_mutex);
+		L9P_LOG(L9P_DEBUG,
+		    "Tattach: denying uid=%ld access to rootdir: %s",
+		    (long)uid, strerror(error));
+		/*
+		 * Pass ENOENT and ENOTDIR through for diagnosis;
+		 * others become EPERM.  This should not leak too
+		 * much security.
+		 */
+		return (error == ENOENT || error == ENOTDIR ? error : EPERM);
+	}
+
+	if (pwd != NULL) {
+		/*
+		 * This either succeeds and fills in ngroups and
+		 * returns non-NULL, or fails and sets ngroups to 0
+		 * and returns NULL.  Either way ngroups is correct.
+		 */
+		gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
+	} else {
+		gids = NULL;
+		ngroups = 0;
+	}
+
+	/*
+	 * Done with pwd and group related items that may use
+	 * non-reentrant C library routines; allow other threads in.
+	 */
+	pthread_mutex_unlock(&fs_attach_mutex);
+
+	ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
+	if (ai == NULL) {
+		free(gids);
+		return (ENOMEM);
+	}
+	error = pthread_mutex_init(&ai->ai_mtx, NULL);
+	if (error) {
+		free(gids);
+		free(ai);
+		return (error);
+	}
+	ai->ai_refcnt = 0;
+	ai->ai_uid = uid;
+	ai->ai_flags = 0;	/* XXX for now */
+	ai->ai_ngids = ngroups;
+	memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
+	free(gids);
+
+	file = open_fid(sc->fs_rootfd, ".", ai, true);
+	if (file == NULL) {
+		pthread_mutex_destroy(&ai->ai_mtx);
+		free(ai);
+		return (ENOMEM);
+	}
+
+	req->lr_fid->lo_aux = file;
+	generate_qid(&st, &req->lr_resp.rattach.qid);
+	return (0);
+}
+
+static int
+fs_clunk(void *softc __unused, struct l9p_fid *fid)
+{
+	struct fs_fid *file;
+
+	file = fid->lo_aux;
+	assert(file != NULL);
+
+	if (file->ff_dir) {
+		closedir(file->ff_dir);
+		file->ff_dir = NULL;
+	} else if (file->ff_fd != -1) {
+		close(file->ff_fd);
+		file->ff_fd = -1;
+	}
+
+	return (0);
+}
+
+/*
+ * Create ops.
+ *
+ * We are to create a new file under some existing path,
+ * where the new file's name is in the Tcreate request and the
+ * existing path is due to a fid-based file (req->lr_fid).
+ *
+ * One op (create regular file) sets file->fd, the rest do not.
+ */
+static int
+fs_create(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	uint32_t dmperm;
+	mode_t perm;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tcreate.name;
+	dmperm = req->lr_req.tcreate.perm;
+	perm = (mode_t)(dmperm & 0777);
+
+	if (dmperm & L9P_DMDIR)
+		error = fs_imkdir(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMSYMLINK)
+		error = fs_isymlink(softc, dir, name,
+		    req->lr_req.tcreate.extension, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMNAMEDPIPE)
+		error = fs_imkfifo(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMSOCKET)
+		error = fs_imksocket(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMDEVICE) {
+		unsigned int major, minor;
+		char type;
+		dev_t dev;
+
+		/*
+		 * ??? Should this be testing < 3?  For now, allow a single
+		 * integer mode with minor==0 implied.
+		 */
+		minor = 0;
+		if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
+		    &type, &major, &minor) < 2) {
+			return (EINVAL);
+		}
+
+		switch (type) {
+		case 'b':
+			perm |= S_IFBLK;
+			break;
+		case 'c':
+			perm |= S_IFCHR;
+			break;
+		default:
+			return (EINVAL);
+		}
+		dev = makedev(major, minor);
+		error = fs_imknod(softc, dir, name, true, perm, dev,
+		    (gid_t)-1, &st);
+	} else {
+		enum l9p_omode p9;
+		int flags;
+
+		p9 = req->lr_req.tcreate.mode;
+		error = fs_oflags_dotu(p9, &flags);
+		if (error)
+			return (error);
+		error = fs_icreate(softc, dir, name, flags,
+		    true, perm, (gid_t)-1, &st);
+		req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
+	}
+
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rcreate.qid);
+
+	return (error);
+}
+
+/*
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * say that permissions are actually
+ *     perm & (~0666 | (dir.perm & 0666))
+ * for files, and
+ *     perm & (~0777 | (dir.perm & 0777))
+ * for directories.  That is, the parent directory may
+ * take away permissions granted by the operation.
+ *
+ * This seems a bit restrictive; probably
+ * there should be a control knob for this.
+ */
+static inline mode_t
+fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
+{
+
+	if (isdir)
+		perm &= ~0777 | (dir_perm & 0777);
+	else
+		perm &= ~0666 | (dir_perm & 0666);
+	return (perm);
+}
+
+/*
+ * Internal form of create (plain file).
+ *
+ * Our caller takes care of splitting off all the special
+ * types of create (mknod, etc), so this is purely for files.
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new file is to be created, the name of the
+ * new file, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * On successful create, the fid switches to the newly created
+ * file, which is now open; its associated file-name changes too.
+ *
+ * Note that the original (dir) fid is never currently open,
+ * so there is nothing to close.
+ */
+static int
+fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *file;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error, fd;
+
+	file = dir->lo_aux;
+
+	/*
+	 * Build full path name from directory + file name.  We'll
+	 * check permissions on the parent directory, then race to
+	 * create the file before anything bad happens like symlinks.
+	 *
+	 * (To close this race we need to use openat(), which is
+	 * left for a later version of this code.)
+	 */
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	/* In case of success, we will need a new file->ff_name. */
+	name = strdup(newname);
+	if (name == NULL)
+		return (ENOMEM);
+
+	/* Check create permission and compute new file ownership. */
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error) {
+		free(name);
+		return (error);
+	}
+
+	/* Adjust new-file permissions for Plan9 protocol. */
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	/* Create is always exclusive so O_TRUNC is irrelevant. */
+	fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
+	if (fd < 0) {
+		error = errno;
+		free(name);
+		return (error);
+	}
+
+	/* Fix permissions and owner. */
+	if (fchmod(fd, perm) != 0 ||
+	    fchown(fd, uid, gid) != 0 ||
+	    fstat(fd, st) != 0) {
+		error = errno;
+		(void) close(fd);
+		/* unlink(newname); ? */
+		free(name);
+		return (error);
+	}
+
+	/* It *was* a directory; now it's a file, and it's open. */
+	free(file->ff_name);
+	file->ff_name = name;
+	file->ff_fd = fd;
+	return (0);
+}
+
+/*
+ * Internal form of open: stat file and verify permissions (from p9
+ * argument), then open the file-or-directory, leaving the internal
+ * fs_fid fields set up.  If we cannot open the file, return a
+ * suitable error number, and leave everything unchanged.
+ *
+ * To mitigate the race between permissions testing and the actual
+ * open, we can stat the file twice (once with lstat() before open,
+ * then with fstat() after).  We assume O_NOFOLLOW is set in flags,
+ * so if some other race-winner substitutes in a symlink we won't
+ * open it here.  (However, embedded symlinks, if they occur, are
+ * still an issue.  Ideally we would like to have an O_NEVERFOLLOW
+ * that fails on embedded symlinks, and a way to pass this to
+ * lstat() as well.)
+ *
+ * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
+ * on substitution-detection via fstat().  To simplify the code we
+ * just always re-check.
+ *
+ * (For a proper fix in the future, we can require openat(), keep
+ * each parent directory open during walk etc, and allow only final
+ * name components with O_NOFOLLOW.)
+ *
+ * On successful return, st has been filled in.
+ */
+static int
+fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
+    gid_t egid __unused, struct stat *st)
+{
+	struct fs_softc *sc = softc;
+	struct fs_fid *file;
+	struct stat first;
+	int32_t op;
+	char *name;
+	int error;
+	int fd;
+	DIR *dirp;
+
+	/* Forbid write ops on read-only file system. */
+	if (sc->fs_readonly) {
+		if ((flags & O_TRUNC) != 0)
+			return (EROFS);
+		if ((flags & O_ACCMODE) != O_RDONLY)
+			return (EROFS);
+		if (p9 & L9P_ORCLOSE)
+			return (EROFS);
+	}
+
+	file = fid->lo_aux;
+	assert(file != NULL);
+	name = file->ff_name;
+
+	if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (S_ISLNK(first.st_mode))
+		return (EPERM);
+
+	/* Can we rely on O_APPEND here?  Best not, can be cleared. */
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		op = L9P_ACE_READ_DATA;
+		break;
+	case O_WRONLY:
+		op = L9P_ACE_WRITE_DATA;
+		break;
+	case O_RDWR:
+		op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
+		break;
+	default:
+		return (EINVAL);
+	}
+	fillacl(file);
+	error = check_access(op, NULL, NULL, file->ff_acl, &first,
+	    file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	if (S_ISDIR(first.st_mode)) {
+		/* Forbid write or truncate on directory. */
+		if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
+			return (EPERM);
+		fd = openat(file->ff_dirfd, name, O_DIRECTORY);
+		dirp = fdopendir(fd);
+		if (dirp == NULL)
+			return (EPERM);
+		fd = dirfd(dirp);
+	} else {
+		dirp = NULL;
+		fd = openat(file->ff_dirfd, name, flags);
+		if (fd < 0)
+			return (EPERM);
+	}
+
+	/*
+	 * We have a valid fd, and maybe non-null dirp.  Re-check
+	 * the file, and fail if st_dev or st_ino changed.
+	 */
+	if (fstat(fd, st) != 0 ||
+	    first.st_dev != st->st_dev ||
+	    first.st_ino != st->st_ino) {
+		if (dirp != NULL)
+			(void) closedir(dirp);
+		else
+			(void) close(fd);
+		return (EPERM);
+	}
+	if (dirp != NULL)
+		file->ff_dir = dirp;
+	else
+		file->ff_fd = fd;
+	return (0);
+}
+
+/*
+ * Internal form of mkdir (common code for all forms).
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new entry is to be created, the name of the
+ * new entry, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * See also fs_icreate() above.
+ */
+static int
+fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error, fd;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, true);
+
+	if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
+		return (errno);
+
+	fd = openat(ff->ff_dirfd, newname,
+	    O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
+	if (fd < 0 ||
+	    fchown(fd, uid, gid) != 0 ||
+	    fchmod(fd, perm) != 0 ||
+	    fstat(fd, st) != 0) {
+		error = errno;
+		/* rmdir(newname) ? */
+	}
+	if (fd >= 0)
+		(void) close(fd);
+
+	return (error);
+}
+
+#ifdef __APPLE__
+/*
+ * This is an undocumented OS X syscall. It would be best to avoid it,
+ * but there doesn't seem to be another safe way to implement mknodat.
+ * Dear Apple, please implement mknodat before you remove this syscall.
+ */
+static int fs_ifchdir_thread_local(int fd)
+{
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+	return syscall(SYS___pthread_fchdir, fd);
+#pragma clang diagnostic pop
+}
+#endif
+
+/*
+ * Internal form of mknod (special device).
+ *
+ * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
+ */
+static int
+fs_imknod(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	mode_t perm;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9) {
+		perm = fs_p9perm(mode & 0777, st->st_mode, false);
+		mode = (mode & ~0777) | perm;
+	} else {
+		perm = mode & 0777;
+	}
+
+#ifdef __APPLE__
+	if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) {
+		return -1;
+	}
+	error = mknod(newname, mode, dev);
+	int preserved_errno = errno;
+	/* Stop using the thread-local cwd */
+	fs_ifchdir_thread_local(-1);
+	if (error < 0) {
+		errno = preserved_errno;
+		return errno;
+	}
+#else
+	if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
+		return (errno);
+#endif
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+/*
+ * Internal form of mkfifo.
+ */
+static int
+fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	if (mkfifo(newname, perm) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if (!S_ISFIFO(st->st_mode))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+/*
+ * Internal form of mksocket.
+ *
+ * This is a bit different because of the horrible socket naming
+ * system (bind() with sockaddr_un sun_path).
+ */
+static int
+fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	struct sockaddr_un sun;
+	char *path;
+	char newname[MAXPATHLEN];
+	gid_t gid;
+	uid_t uid;
+	int error = 0, s, fd;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	s = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (s < 0)
+		return (errno);
+
+	path = newname;
+	fd = -1;
+#ifdef HAVE_BINDAT
+	/* Try bindat() if needed. */
+	if (strlen(path) >= sizeof(sun.sun_path)) {
+		fd = openat(ff->ff_dirfd, ff->ff_name,
+		    O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
+		if (fd >= 0)
+			path = name;
+	}
+#endif
+
+	/*
+	 * Can only create the socket if the path will fit.
+	 * Even if we are using bindat() there are limits
+	 * (the API for AF_UNIX sockets is ... not good).
+	 *
+	 * Note: in theory we can fill sun_path to the end
+	 * (omitting a terminating '\0') but in at least one
+	 * Unix-like system, this was known to behave oddly,
+	 * so we test for ">=" rather than just ">".
+	 */
+	if (strlen(path) >= sizeof(sun.sun_path)) {
+		error = ENAMETOOLONG;
+		goto out;
+	}
+	sun.sun_family = AF_UNIX;
+	sun.sun_len = sizeof(struct sockaddr_un);
+	strncpy(sun.sun_path, path, sizeof(sun.sun_path));
+
+#ifdef HAVE_BINDAT
+	if (fd >= 0) {
+		if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0)
+			error = errno;
+		goto out;	/* done now, for good or ill */
+	}
+#endif
+
+	if (bind(s, (struct sockaddr *)&sun, sun.sun_len) < 0)
+		error = errno;
+out:
+
+	if (error == 0) {
+		/*
+		 * We believe we created the socket-inode.  Fix
+		 * permissions etc.  Note that we cannot use
+		 * fstat() on the socket descriptor: it succeeds,
+		 * but we get bogus data!
+		 */
+		if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+		    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+		    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+			error = errno;
+		else if (!S_ISSOCK(st->st_mode))
+			error = EPERM;		/* ??? lost a race anyway */
+
+		/* if (error) unlink(newname) ? */
+	}
+
+	/*
+	 * It's not clear which error should override, although
+	 * ideally we should never see either close() call fail.
+	 * In any case we do want to try to close both fd and s,
+	 * always.  Let's set error only if it is not already set,
+	 * so that all exit paths can use the same code.
+	 */
+	if (fd >= 0 && close(fd) != 0)
+		if (error == 0)
+			error = errno;
+	if (close(s) != 0)
+		if (error == 0)
+			error = errno;
+
+	return (error);
+}
+
+/*
+ * Internal form of symlink.
+ *
+ * Note that symlinks are presumed to carry no permission bits.
+ * They do have owners, however (who may be charged for quotas).
+ */
+static int
+fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
+    char *symtgt, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if (!S_ISLNK(st->st_mode))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+static int
+fs_open(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *fid = req->lr_fid;
+	struct stat st;
+	enum l9p_omode p9;
+	int error, flags;
+
+	p9 = req->lr_req.topen.mode;
+	error = fs_oflags_dotu(p9, &flags);
+	if (error)
+		return (error);
+
+	error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
+	if (error)
+		return (error);
+
+	generate_qid(&st, &req->lr_resp.ropen.qid);
+	req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
+	return (0);
+}
+
+/*
+ * Helper for directory read.  We want to run an lstat on each
+ * file name within the directory.  This is a lot faster if we
+ * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
+ * all systems do, so hide the ifdef-ed code in an inline function.
+ */
+static inline int
+fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
+{
+
+	return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
+}
+
+static int
+fs_read(void *softc, struct l9p_request *req)
+{
+	struct l9p_stat l9stat;
+	struct fs_softc *sc;
+	struct fs_fid *file;
+	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+	ssize_t ret;
+
+	sc = softc;
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (file->ff_dir != NULL) {
+		struct dirent *d;
+		struct stat st;
+		struct l9p_message msg;
+		long o;
+
+		pthread_mutex_lock(&file->ff_mtx);
+
+		/*
+		 * Must use telldir before readdir since seekdir
+		 * takes cookie values.  Unfortunately this wastes
+		 * a lot of time (and memory) building unneeded
+		 * cookies that can only be flushed by closing
+		 * the directory.
+		 *
+		 * NB: FreeBSD libc seekdir has SINGLEUSE defined,
+		 * so in fact, we can discard the cookies by
+		 * calling seekdir on them.  This clears up wasted
+		 * memory at the cost of even more wasted time...
+		 *
+		 * XXX: readdir/telldir/seekdir not thread safe
+		 */
+		l9p_init_msg(&msg, req, L9P_PACK);
+		for (;;) {
+			o = telldir(file->ff_dir);
+			d = readdir(file->ff_dir);
+			if (d == NULL)
+				break;
+			if (fs_lstatat(file, d->d_name, &st))
+				continue;
+			dostat(sc, &l9stat, d->d_name, &st, dotu);
+			if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
+				seekdir(file->ff_dir, o);
+				break;
+			}
+#if defined(__FreeBSD__)
+			seekdir(file->ff_dir, o);
+			(void) readdir(file->ff_dir);
+#endif
+		}
+
+		pthread_mutex_unlock(&file->ff_mtx);
+	} else {
+		size_t niov = l9p_truncate_iov(req->lr_data_iov,
+                    req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__)
+		ret = preadv(file->ff_fd, req->lr_data_iov, niov,
+		    req->lr_req.io.offset);
+#else
+		/* XXX: not thread safe, should really use aio_listio. */
+		if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+			return (errno);
+
+		ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
+#endif
+
+		if (ret < 0)
+			return (errno);
+
+		req->lr_resp.io.count = (uint32_t)ret;
+	}
+
+	return (0);
+}
+
+static int
+fs_remove(void *softc, struct l9p_fid *fid)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_acl *parent_acl;
+	struct fs_fid *file;
+	struct stat pst, cst;
+	char dirname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
+	if (error)
+		return (error);
+
+	file = fid->lo_aux;
+	if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (error);
+
+	parent_acl = getacl(file, -1, dirname);
+	fillacl(file);
+
+	error = check_access(L9P_ACOP_UNLINK,
+	    parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
+	l9p_acl_free(parent_acl);
+	if (error)
+		return (error);
+
+	if (unlinkat(file->ff_dirfd, file->ff_name,
+	    S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0)
+		error = errno;
+
+	return (error);
+}
+
+static int
+fs_stat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc;
+	struct fs_fid *file;
+	struct stat st;
+	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+
+	sc = softc;
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
+	return (0);
+}
+
+static int
+fs_walk(void *softc, struct l9p_request *req)
+{
+	struct l9p_acl *acl;
+	struct fs_authinfo *ai;
+	struct fs_fid *file = req->lr_fid->lo_aux;
+	struct fs_fid *newfile;
+	struct stat st;
+	size_t clen, namelen, need;
+	char *comp, *succ, *next, *swtmp;
+	bool atroot;
+	bool dotdot;
+	int i, nwname;
+	int error = 0;
+	char namebufs[2][MAXPATHLEN];
+
+	/*
+	 * https://swtch.com/plan9port/man/man9/walk.html:
+	 *
+	 *    It is legal for nwname to be zero, in which case newfid
+	 *    will represent the same file as fid and the walk will
+	 *    usually succeed; this is equivalent to walking to dot.
+	 * [Aside: it's not clear if we should test S_ISDIR here.]
+	 *    ...
+	 *    The name ".." ... represents the parent directory.
+	 *    The name "." ... is not used in the protocol.
+	 *    ... A walk of the name ".." in the root directory
+	 *    of the server is equivalent to a walk with no name
+	 *    elements.
+	 *
+	 * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
+	 * so it is safe to convert to plain int.
+	 *
+	 * We are to return an error only if the first walk fails,
+	 * else stop at the end of the names or on the first error.
+	 * The final fid is based on the last name successfully
+	 * walked.
+	 *
+	 * Note that we *do* get Twalk requests with nwname==0 on files.
+	 *
+	 * Set up "successful name" buffer pointer with base fid name,
+	 * initially.  We'll swap each new success into it as we go.
+	 *
+	 * Invariant: atroot and stat data correspond to current
+	 * (succ) path.
+	 */
+	succ = namebufs[0];
+	next = namebufs[1];
+	namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
+	if (namelen >= MAXPATHLEN)
+		return (ENAMETOOLONG);
+	if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
+		return (errno);
+	ai = file->ff_ai;
+	atroot = strlen(succ) == 0; /* XXX? */
+	fillacl(file);
+	acl = file->ff_acl;
+
+	nwname = (int)req->lr_req.twalk.nwname;
+
+	for (i = 0; i < nwname; i++) {
+		/*
+		 * Must have execute permission to search a directory.
+		 * Then, look up each component in its directory-so-far.
+		 * Check for ".." along the way, handlng specially
+		 * as needed.  Forbid "/" in name components.
+		 *
+		 */
+		if (!S_ISDIR(st.st_mode)) {
+			error = ENOTDIR;
+			goto out;
+		}
+		error = check_access(L9P_ACE_EXECUTE,
+		     NULL, NULL, acl, &st, ai, (gid_t)-1);
+		if (error) {
+			L9P_LOG(L9P_DEBUG,
+			    "Twalk: denying dir-walk on \"%s\" for uid %u",
+			    succ, (unsigned)ai->ai_uid);
+			error = EPERM;
+			goto out;
+		}
+		comp = req->lr_req.twalk.wname[i];
+		if (strchr(comp, '/') != NULL) {
+			error = EINVAL;
+			break;
+		}
+
+		clen = strlen(comp);
+		dotdot = false;
+
+		/*
+		 * Build next pathname (into "next").  If "..",
+		 * just strip one name component off the success
+		 * name so far.  Since we know this name fits, the
+		 * stripped down version also fits.  Otherwise,
+		 * the name is the base name plus '/' plus the
+		 * component name plus terminating '\0'; this may
+		 * or may not fit.
+		 */
+		if (comp[0] == '.') {
+			if (clen == 1) {
+				error = EINVAL;
+				break;
+			}
+			if (comp[1] == '.' && clen == 2)
+				dotdot = true;
+		}
+		if (dotdot) {
+			/*
+			 * It's not clear how ".." at root should
+			 * be handled when i > 0.  Obeying the man
+			 * page exactly, we reset i to 0 and stop,
+			 * declaring terminal success.
+			 *
+			 * Otherwise, we just climbed up one level
+			 * so adjust "atroot".
+			 */
+			if (atroot) {
+				i = 0;
+				break;
+			}
+			(void) r_dirname(succ, next, MAXPATHLEN);
+			namelen = strlen(next);
+			atroot = strlen(next) == 0; /* XXX? */
+		} else {
+			need = namelen + 1 + clen + 1;
+			if (need > MAXPATHLEN) {
+				error = ENAMETOOLONG;
+				break;
+			}
+			memcpy(next, succ, namelen);
+			next[namelen++] = '/';
+			memcpy(&next[namelen], comp, clen + 1);
+			namelen += clen;
+			/*
+			 * Since name is never ".", we are necessarily
+			 * descending below the root now.
+			 */
+			atroot = false;
+		}
+
+		if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+			error = ENOENT;
+			break;
+		}
+
+		/*
+		 * Success: generate qid and swap this
+		 * successful name into place.  Update acl.
+		 */
+		generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
+		swtmp = succ;
+		succ = next;
+		next = swtmp;
+		if (acl != NULL && acl != file->ff_acl)
+			l9p_acl_free(acl);
+		acl = getacl(file, -1, next);
+	}
+
+	/*
+	 * Fail only if we failed on the first name.
+	 * Otherwise we succeeded on something, and "succ"
+	 * points to the last successful name in namebufs[].
+	 */
+	if (error) {
+		if (i == 0)
+			goto out;
+		error = 0;
+	}
+
+	newfile = open_fid(file->ff_dirfd, succ, ai, false);
+	if (newfile == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
+	if (req->lr_newfid == req->lr_fid) {
+		/*
+		 * Before overwriting fid->lo_aux, free the old value.
+		 * Note that this doesn't free the l9p_fid data,
+		 * just the fs_fid data.  (But it does ditch ff_acl.)
+		 */
+		if (acl == file->ff_acl)
+			acl = NULL;
+		fs_freefid(softc, req->lr_fid);
+		file = NULL;
+	}
+	req->lr_newfid->lo_aux = newfile;
+	if (file != NULL && acl != file->ff_acl) {
+		newfile->ff_acl = acl;
+		acl = NULL;
+	}
+	req->lr_resp.rwalk.nwqid = (uint16_t)i;
+out:
+	if (file != NULL && acl != file->ff_acl)
+		l9p_acl_free(acl);
+	return (error);
+}
+
+static int
+fs_write(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct fs_fid *file;
+	ssize_t ret;
+
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	size_t niov = l9p_truncate_iov(req->lr_data_iov,
+            req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__)
+	ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
+	    req->lr_req.io.offset);
+#else
+	/* XXX: not thread safe, should really use aio_listio. */
+	if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+		return (errno);
+
+	ret = writev(file->ff_fd, req->lr_data_iov,
+	    (int)niov);
+#endif
+
+	if (ret < 0)
+		return (errno);
+
+	req->lr_resp.io.count = (uint32_t)ret;
+	return (0);
+}
+
+static int
+fs_wstat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
+	struct l9p_fid *fid;
+	struct fs_fid *file;
+	int error = 0;
+
+	fid = req->lr_fid;
+	file = fid->lo_aux;
+	assert(file != NULL);
+
+	/*
+	 * XXX:
+	 *
+	 * stat(9P) sez:
+	 *
+	 * Either all the changes in wstat request happen, or none of them
+	 * does: if the request succeeds, all changes were made; if it fails,
+	 * none were.
+	 *
+	 * Atomicity is clearly missing in current implementation.
+	 */
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	if (l9stat->atime != (uint32_t)~0) {
+		/* XXX: not implemented, ignore */
+	}
+
+	if (l9stat->mtime != (uint32_t)~0) {
+		/* XXX: not implemented, ignore */
+	}
+
+	if (l9stat->dev != (uint32_t)~0) {
+		error = EPERM;
+		goto out;
+	}
+
+	if (l9stat->length != (uint64_t)~0) {
+		if (file->ff_dir != NULL) {
+			error = EINVAL;
+			goto out;
+		}
+
+		if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (req->lr_conn->lc_version >= L9P_2000U) {
+		if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
+		    l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (l9stat->mode != (uint32_t)~0) {
+		if (fchmodat(file->ff_dirfd, file->ff_name,
+		    l9stat->mode & 0777, 0) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (strlen(l9stat->name) > 0) {
+		struct l9p_acl *parent_acl;
+		struct stat st;
+		char *tmp;
+		char newname[MAXPATHLEN];
+
+		/*
+		 * Rename-within-directory: it's not deleting anything,
+		 * but we need write permission on the directory.  This
+		 * should suffice.
+		 */
+		error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
+		if (error)
+			goto out;
+		parent_acl = getacl(file, -1, newname);
+		error = check_access(L9P_ACE_ADD_FILE,
+		    parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
+		l9p_acl_free(parent_acl);
+		if (error)
+			goto out;
+		error = fs_dpf(newname, l9stat->name, sizeof(newname));
+		if (error)
+			goto out;
+		tmp = strdup(newname);
+		if (tmp == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+		    tmp) != 0) {
+			error = errno;
+			free(tmp);
+			goto out;
+		}
+		/* Successful rename, update file->ff_name.  ACL can stay. */
+		free(file->ff_name);
+		file->ff_name = tmp;
+	}
+out:
+	return (error);
+}
+
+static int
+fs_statfs(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	struct stat st;
+	struct statfs f;
+	long name_max;
+	int error;
+	int fd;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	/*
+	 * Not entirely clear what access to require; we'll go
+	 * for "read data".
+	 */
+	fillacl(file);
+	error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
+	    file->ff_acl, &st, file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	fd = openat(file->ff_dirfd, file->ff_name, 0);
+	if (fd < 0)
+		return (errno);
+
+	if (fstatfs(fd, &f) != 0)
+		return (errno);
+
+	name_max = fpathconf(fd, _PC_NAME_MAX);
+	error = errno;
+	close(fd);
+
+	if (name_max == -1)
+		return (error);
+
+	dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
+
+	return (0);
+}
+
+static int
+fs_lopen(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *fid = req->lr_fid;
+	struct stat st;
+	enum l9p_omode p9;
+	gid_t gid;
+	int error, flags;
+
+	error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
+	if (error)
+		return (error);
+
+	gid = req->lr_req.tlopen.gid;
+	error = fs_iopen(softc, fid, flags, p9, gid, &st);
+	if (error)
+		return (error);
+
+	generate_qid(&st, &req->lr_resp.rlopen.qid);
+	req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
+	return (0);
+}
+
+static int
+fs_lcreate(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	enum l9p_omode p9;
+	char *name;
+	mode_t perm;
+	gid_t gid;
+	int error, flags;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tlcreate.name;
+
+	error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
+	if (error)
+		return (error);
+
+	perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
+	gid = req->lr_req.tlcreate.gid;
+	error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rlcreate.qid);
+	req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
+	return (error);
+}
+
+static int
+fs_symlink(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	gid_t gid;
+	char *name, *symtgt;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tsymlink.name;
+	symtgt = req->lr_req.tsymlink.symtgt;
+	gid = req->lr_req.tsymlink.gid;
+	error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rsymlink.qid);
+	return (error);
+}
+
+static int
+fs_mknod(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	uint32_t mode, major, minor;
+	dev_t dev;
+	gid_t gid;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tmknod.name;
+	mode = req->lr_req.tmknod.mode;
+	gid = req->lr_req.tmknod.gid;
+
+	switch (mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		mode = (mode & S_IFMT) | (mode & 0777);	/* ??? */
+		major = req->lr_req.tmknod.major;
+		minor = req->lr_req.tmknod.major;
+		dev = makedev(major, minor);
+		error = fs_imknod(softc, dir, name, false,
+		    (mode_t)mode, dev, gid, &st);
+		break;
+
+	case S_IFIFO:
+		error = fs_imkfifo(softc, dir, name, false,
+		    (mode_t)(mode & 0777), gid, &st);
+		break;
+
+	case S_IFSOCK:
+		error = fs_imksocket(softc, dir, name, false,
+		    (mode_t)(mode & 0777), gid, &st);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rmknod.qid);
+	return (error);
+}
+
+static int
+fs_rename(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct fs_authinfo *ai;
+	struct l9p_acl *oparent_acl;
+	struct l9p_fid *fid, *f2;
+	struct fs_fid *file, *f2ff;
+	struct stat cst, opst, npst;
+	int32_t op;
+	bool reparenting;
+	char *tmp;
+	char olddir[MAXPATHLEN], newname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	/*
+	 * Note: lr_fid represents the file that is to be renamed,
+	 * so we must locate its parent directory and verify that
+	 * both this parent directory and the new directory f2 are
+	 * writable.  But if the new parent directory is the same
+	 * path as the old parent directory, our job is simpler.
+	 */
+	fid = req->lr_fid;
+	file = fid->lo_aux;
+	assert(file != NULL);
+	ai = file->ff_ai;
+
+	error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
+	if (error)
+		return (error);
+
+	f2 = req->lr_fid2;
+	f2ff = f2->lo_aux;
+	assert(f2ff != NULL);
+
+	reparenting = strcmp(olddir, f2ff->ff_name) != 0;
+
+	fillacl(file);
+	fillacl(f2ff);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &cst,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	/*
+	 * Are we moving from olddir?  If so, we're unlinking
+	 * from it, in terms of ACL access.
+	 */
+	if (reparenting) {
+		oparent_acl = getacl(file, -1, olddir);
+		error = check_access(L9P_ACOP_UNLINK,
+		    oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
+		l9p_acl_free(oparent_acl);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Now check that we're allowed to "create" a file or directory in
+	 * f2.  (Should we do this, too, only if reparenting?  Maybe check
+	 * for dir write permission if not reparenting -- but that's just
+	 * add-file/add-subdir, which means doing this always.)
+	 */
+	if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
+	    ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	/*
+	 * Directories OK, file systems not R/O, etc; build final name.
+	 * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
+	 * paranoia, let's double check anyway.
+	 */
+	if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
+		return (ENAMETOOLONG);
+	error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
+	if (error)
+		return (error);
+	tmp = strdup(newname);
+	if (tmp == NULL)
+		return (ENOMEM);
+
+	if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
+		error = errno;
+		free(tmp);
+		return (error);
+	}
+
+	/* file has been renamed but old fid is not clunked */
+	free(file->ff_name);
+	file->ff_name = tmp;
+
+	dropacl(file);
+	return (0);
+}
+
+static int
+fs_readlink(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	ssize_t linklen;
+	char buf[MAXPATHLEN];
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
+	if (linklen < 0)
+		error = errno;
+	else if ((size_t)linklen >= sizeof(buf))
+		error = ENOMEM; /* todo: allocate dynamically */
+	else if ((req->lr_resp.rreadlink.target = strndup(buf,
+	    (size_t)linklen)) == NULL)
+		error = ENOMEM;
+	return (error);
+}
+
+static int
+fs_getattr(void *softc __unused, struct l9p_request *req)
+{
+	uint64_t mask, valid;
+	struct fs_fid *file;
+	struct stat st;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	valid = 0;
+	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+		error = errno;
+		goto out;
+	}
+	/* ?? Can we provide items not-requested? If so, can skip tests. */
+	mask = req->lr_req.tgetattr.request_mask;
+	if (mask & L9PL_GETATTR_MODE) {
+		/* It is not clear if we need any translations. */
+		req->lr_resp.rgetattr.mode = st.st_mode;
+		valid |= L9PL_GETATTR_MODE;
+	}
+	if (mask & L9PL_GETATTR_NLINK) {
+		req->lr_resp.rgetattr.nlink = st.st_nlink;
+		valid |= L9PL_GETATTR_NLINK;
+	}
+	if (mask & L9PL_GETATTR_UID) {
+		/* provide st_uid, or file->ff_uid? */
+		req->lr_resp.rgetattr.uid = st.st_uid;
+		valid |= L9PL_GETATTR_UID;
+	}
+	if (mask & L9PL_GETATTR_GID) {
+		/* provide st_gid, or file->ff_gid? */
+		req->lr_resp.rgetattr.gid = st.st_gid;
+		valid |= L9PL_GETATTR_GID;
+	}
+	if (mask & L9PL_GETATTR_RDEV) {
+		/* It is not clear if we need any translations. */
+		req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
+		valid |= L9PL_GETATTR_RDEV;
+	}
+	if (mask & L9PL_GETATTR_ATIME) {
+		req->lr_resp.rgetattr.atime_sec =
+		    (uint64_t)st.st_atimespec.tv_sec;
+		req->lr_resp.rgetattr.atime_nsec =
+		    (uint64_t)st.st_atimespec.tv_nsec;
+		valid |= L9PL_GETATTR_ATIME;
+	}
+	if (mask & L9PL_GETATTR_MTIME) {
+		req->lr_resp.rgetattr.mtime_sec =
+		    (uint64_t)st.st_mtimespec.tv_sec;
+		req->lr_resp.rgetattr.mtime_nsec =
+		    (uint64_t)st.st_mtimespec.tv_nsec;
+		valid |= L9PL_GETATTR_MTIME;
+	}
+	if (mask & L9PL_GETATTR_CTIME) {
+		req->lr_resp.rgetattr.ctime_sec =
+		    (uint64_t)st.st_ctimespec.tv_sec;
+		req->lr_resp.rgetattr.ctime_nsec =
+		    (uint64_t)st.st_ctimespec.tv_nsec;
+		valid |= L9PL_GETATTR_CTIME;
+	}
+	if (mask & L9PL_GETATTR_BTIME) {
+#if defined(HAVE_BIRTHTIME)
+		req->lr_resp.rgetattr.btime_sec =
+		    (uint64_t)st.st_birthtim.tv_sec;
+		req->lr_resp.rgetattr.btime_nsec =
+		    (uint64_t)st.st_birthtim.tv_nsec;
+#else
+		req->lr_resp.rgetattr.btime_sec = 0;
+		req->lr_resp.rgetattr.btime_nsec = 0;
+#endif
+		valid |= L9PL_GETATTR_BTIME;
+	}
+	if (mask & L9PL_GETATTR_INO)
+		valid |= L9PL_GETATTR_INO;
+	if (mask & L9PL_GETATTR_SIZE) {
+		req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
+		valid |= L9PL_GETATTR_SIZE;
+	}
+	if (mask & L9PL_GETATTR_BLOCKS) {
+		req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
+		req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
+		valid |= L9PL_GETATTR_BLOCKS;
+	}
+	if (mask & L9PL_GETATTR_GEN) {
+		req->lr_resp.rgetattr.gen = st.st_gen;
+		valid |= L9PL_GETATTR_GEN;
+	}
+	/* don't know what to do with data version yet */
+
+	generate_qid(&st, &req->lr_resp.rgetattr.qid);
+out:
+	req->lr_resp.rgetattr.valid = valid;
+	return (error);
+}
+
+/*
+ * Should combine some of this with wstat code.
+ */
+static int
+fs_setattr(void *softc, struct l9p_request *req)
+{
+	uint64_t mask;
+	struct fs_softc *sc = softc;
+	struct timespec ts[2];
+	struct fs_fid *file;
+	struct stat st;
+	int error = 0;
+	uid_t uid, gid;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	/*
+	 * As with WSTAT we have atomicity issues.
+	 */
+	mask = req->lr_req.tsetattr.valid;
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+		error = errno;
+		goto out;
+	}
+
+	if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
+		error = EISDIR;
+		goto out;
+	}
+
+	if (mask & L9PL_SETATTR_MODE) {
+		if (fchmodat(file->ff_dirfd, file->ff_name,
+		    req->lr_req.tsetattr.mode & 0777,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
+		uid = mask & L9PL_SETATTR_UID
+		    ? req->lr_req.tsetattr.uid
+		    : (uid_t)-1;
+
+		gid = mask & L9PL_SETATTR_GID
+		    ? req->lr_req.tsetattr.gid
+		    : (gid_t)-1;
+
+		if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (mask & L9PL_SETATTR_SIZE) {
+		/* Truncate follows symlinks, is this OK? */
+		int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
+		if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
+			error = errno;
+			(void) close(fd);
+			goto out;
+		}
+		(void) close(fd);
+	}
+
+	if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
+		ts[0].tv_sec = st.st_atimespec.tv_sec;
+		ts[0].tv_nsec = st.st_atimespec.tv_nsec;
+		ts[1].tv_sec = st.st_mtimespec.tv_sec;
+		ts[1].tv_nsec = st.st_mtimespec.tv_nsec;
+
+		if (mask & L9PL_SETATTR_ATIME) {
+			if (mask & L9PL_SETATTR_ATIME_SET) {
+				ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
+				ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
+			} else {
+				if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
+					error = errno;
+					goto out;
+				}
+			}
+		}
+
+		if (mask & L9PL_SETATTR_MTIME) {
+			if (mask & L9PL_SETATTR_MTIME_SET) {
+				ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
+				ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
+			} else {
+				if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
+					error = errno;
+					goto out;
+				}
+			}
+		}
+
+		if (utimensat(file->ff_dirfd, file->ff_name, ts,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+out:
+	return (error);
+}
+
+static int
+fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+fs_readdir(void *softc __unused, struct l9p_request *req)
+{
+	struct l9p_message msg;
+	struct l9p_dirent de;
+	struct fs_fid *file;
+	struct dirent *dp;
+	struct stat st;
+	uint32_t count;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (file->ff_dir == NULL)
+		return (ENOTDIR);
+
+	pthread_mutex_lock(&file->ff_mtx);
+
+	/*
+	 * It's not clear whether we can use the same trick for
+	 * discarding offsets here as we do in fs_read.  It
+	 * probably should work, we'll have to see if some
+	 * client(s) use the zero-offset thing to rescan without
+	 * clunking the directory first.
+	 *
+	 * Probably the thing to do is switch to calling
+	 * getdirentries() / getdents() directly, instead of
+	 * going through libc.
+	 */
+	if (req->lr_req.io.offset == 0)
+		rewinddir(file->ff_dir);
+	else
+		seekdir(file->ff_dir, (long)req->lr_req.io.offset);
+
+	l9p_init_msg(&msg, req, L9P_PACK);
+	count = (uint32_t)msg.lm_size; /* in case we get no entries */
+	while ((dp = readdir(file->ff_dir)) != NULL) {
+		/*
+		 * Although "." is forbidden in naming and ".." is
+		 * special cased, testing shows that we must transmit
+		 * them through readdir.  (For ".." at root, we
+		 * should perhaps alter the inode number, but not
+		 * yet.)
+		 */
+
+		/*
+		 * TODO: we do a full lstat here; could use dp->d_*
+		 * to construct the qid more efficiently, as long
+		 * as dp->d_type != DT_UNKNOWN.
+		 */
+		if (fs_lstatat(file, dp->d_name, &st))
+			continue;
+
+		de.qid.type = 0;
+		generate_qid(&st, &de.qid);
+		de.offset = (uint64_t)telldir(file->ff_dir);
+		de.type = dp->d_type;
+		de.name = dp->d_name;
+
+		/* Update count only if we completely pack the dirent. */
+		if (l9p_pudirent(&msg, &de) < 0)
+			break;
+		count = (uint32_t)msg.lm_size;
+	}
+
+	pthread_mutex_unlock(&file->ff_mtx);
+	req->lr_resp.io.count = count;
+	return (error);
+}
+
+static int
+fs_fsync(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+	if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
+		error = errno;
+	return (error);
+}
+
+static int
+fs_lock(void *softc __unused, struct l9p_request *req)
+{
+
+	switch (req->lr_req.tlock.type) {
+	case L9PL_LOCK_TYPE_RDLOCK:
+	case L9PL_LOCK_TYPE_WRLOCK:
+	case L9PL_LOCK_TYPE_UNLOCK:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
+	return (0);
+}
+
+static int
+fs_getlock(void *softc __unused, struct l9p_request *req)
+{
+
+	/*
+	 * Client wants to see if a request to lock a region would
+	 * block.  This is, of course, not atomic anyway, so the
+	 * op is useless.  QEMU simply says "unlocked!", so we do
+	 * too.
+	 */
+	switch (req->lr_req.getlock.type) {
+	case L9PL_LOCK_TYPE_RDLOCK:
+	case L9PL_LOCK_TYPE_WRLOCK:
+	case L9PL_LOCK_TYPE_UNLOCK:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	req->lr_resp.getlock = req->lr_req.getlock;
+	req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
+	req->lr_resp.getlock.client_id = strdup("");  /* XXX what should go here? */
+	return (0);
+}
+
+static int
+fs_link(void *softc __unused, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct fs_fid *file;
+	struct fs_fid *dirf;
+	struct stat fst, tdst;
+	int32_t op;
+	char *name;
+	char newname[MAXPATHLEN];
+	int error;
+
+	/* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
+	dir = req->lr_fid2;
+	dirf = dir->lo_aux;
+	assert(dirf != NULL);
+
+	name = req->lr_req.tlink.name;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (S_ISDIR(fst.st_mode))
+		return (EISDIR);
+	fillacl(dirf);
+	op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op,
+	    dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+	    newname, 0) != 0)
+		error = errno;
+	else
+		dropacl(file);
+
+	return (error);
+}
+
+static int
+fs_mkdir(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	mode_t perm;
+	gid_t gid;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tmkdir.name;
+	perm = (mode_t)req->lr_req.tmkdir.mode;
+	gid = req->lr_req.tmkdir.gid;
+
+	error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rmkdir.qid);
+	return (error);
+}
+
+static int
+fs_renameat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_fid *olddir, *newdir;
+	struct l9p_acl *facl;
+	struct fs_fid *off, *nff;
+	struct stat odst, ndst, fst;
+	int32_t op;
+	bool reparenting;
+	char *onp, *nnp;
+	char onb[MAXPATHLEN], nnb[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	olddir = req->lr_fid;
+	newdir = req->lr_fid2;
+	assert(olddir != NULL && newdir != NULL);
+	off = olddir->lo_aux;
+	nff = newdir->lo_aux;
+	assert(off != NULL && nff != NULL);
+
+	onp = req->lr_req.trenameat.oldname;
+	nnp = req->lr_req.trenameat.newname;
+	error = fs_buildname(olddir, onp, onb, sizeof(onb));
+	if (error)
+		return (error);
+	error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
+	if (error)
+		return (error);
+	if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	reparenting = olddir != newdir &&
+	    strcmp(off->ff_name, nff->ff_name) != 0;
+
+	if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(odst.st_mode))
+		return (ENOTDIR);
+	fillacl(off);
+
+	if (reparenting) {
+		if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
+			return (errno);
+		if (!S_ISDIR(ndst.st_mode))
+			return (ENOTDIR);
+		facl = getacl(off, -1, onb);
+		fillacl(nff);
+
+		error = check_access(L9P_ACOP_UNLINK,
+		    off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
+		l9p_acl_free(facl);
+		if (error)
+			return (error);
+		op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
+		    L9P_ACE_ADD_FILE;
+		error = check_access(op,
+		    nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
+		if (error)
+			return (error);
+	}
+
+	if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
+		error = errno;
+
+	return (error);
+}
+
+/*
+ * Unlink file in given directory, or remove directory in given
+ * directory, based on flags.
+ */
+static int
+fs_unlinkat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_acl *facl;
+	struct l9p_fid *dir;
+	struct fs_fid *dirff;
+	struct stat dirst, fst;
+	char *name;
+	char newname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	dir = req->lr_fid;
+	dirff = dir->lo_aux;
+	assert(dirff != NULL);
+	name = req->lr_req.tunlinkat.name;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+	if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	fillacl(dirff);
+	facl = getacl(dirff, -1, newname);
+	error = check_access(L9P_ACOP_UNLINK,
+	    dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
+	l9p_acl_free(facl);
+	if (error)
+		return (error);
+
+	if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
+		if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0)
+			error = errno;
+	} else {
+		if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
+			error = errno;
+	}
+	return (error);
+}
+
+static void
+fs_freefid(void *softc __unused, struct l9p_fid *fid)
+{
+	struct fs_fid *f = fid->lo_aux;
+	struct fs_authinfo *ai;
+	uint32_t newcount;
+
+	if (f == NULL) {
+		/* Nothing to do here */
+		return;
+	}
+
+	if (f->ff_fd != -1)
+		close(f->ff_fd);
+
+	if (f->ff_dir)
+		closedir(f->ff_dir);
+
+	pthread_mutex_destroy(&f->ff_mtx);
+	free(f->ff_name);
+	ai = f->ff_ai;
+	l9p_acl_free(f->ff_acl);
+	free(f);
+	pthread_mutex_lock(&ai->ai_mtx);
+	newcount = --ai->ai_refcnt;
+	pthread_mutex_unlock(&ai->ai_mtx);
+	if (newcount == 0) {
+		/*
+		 * We *were* the last ref, no one can have gained a ref.
+		 */
+		L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
+		    (void *)ai);
+		pthread_mutex_destroy(&ai->ai_mtx);
+		free(ai);
+	} else {
+		L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+		    (void *)ai, (u_long)newcount);
+	}
+}
+
+int
+l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
+{
+	struct l9p_backend *backend;
+	struct fs_softc *sc;
+	int error;
+#if defined(WITH_CASPER)
+	cap_channel_t *capcas;
+#endif
+
+	if (!fs_attach_mutex_inited) {
+		error = pthread_mutex_init(&fs_attach_mutex, NULL);
+		if (error) {
+			errno = error;
+			return (-1);
+		}
+		fs_attach_mutex_inited = true;
+	}
+
+	backend = l9p_malloc(sizeof(*backend));
+	backend->attach = fs_attach;
+	backend->clunk = fs_clunk;
+	backend->create = fs_create;
+	backend->open = fs_open;
+	backend->read = fs_read;
+	backend->remove = fs_remove;
+	backend->stat = fs_stat;
+	backend->walk = fs_walk;
+	backend->write = fs_write;
+	backend->wstat = fs_wstat;
+	backend->statfs = fs_statfs;
+	backend->lopen = fs_lopen;
+	backend->lcreate = fs_lcreate;
+	backend->symlink = fs_symlink;
+	backend->mknod = fs_mknod;
+	backend->rename = fs_rename;
+	backend->readlink = fs_readlink;
+	backend->getattr = fs_getattr;
+	backend->setattr = fs_setattr;
+	backend->xattrwalk = fs_xattrwalk;
+	backend->xattrcreate = fs_xattrcreate;
+	backend->readdir = fs_readdir;
+	backend->fsync = fs_fsync;
+	backend->lock = fs_lock;
+	backend->getlock = fs_getlock;
+	backend->link = fs_link;
+	backend->mkdir = fs_mkdir;
+	backend->renameat = fs_renameat;
+	backend->unlinkat = fs_unlinkat;
+	backend->freefid = fs_freefid;
+
+	sc = l9p_malloc(sizeof(*sc));
+	sc->fs_rootfd = rootfd;
+	sc->fs_readonly = ro;
+	backend->softc = sc;
+
+#if defined(WITH_CASPER)
+	capcas = cap_init();
+	if (capcas == NULL)
+		return (-1);
+
+	sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
+	if (sc->fs_cappwd == NULL)
+		return (-1);
+
+	sc->fs_capgrp = cap_service_open(capcas, "system.grp");
+	if (sc->fs_capgrp == NULL)
+		return (-1);
+
+	cap_setpassent(sc->fs_cappwd, 1);
+	cap_setgroupent(sc->fs_capgrp, 1);
+	cap_close(capcas);
+#else
+	setpassent(1);
+#endif
+
+	*backendp = backend;
+	return (0);
+}
diff --git a/backend/fs.h b/backend/fs.h
new file mode 100644
index 000000000000..84b37171c271
--- /dev/null
+++ b/backend/fs.h
@@ -0,0 +1,37 @@
+
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_BACKEND_FS_H
+#define LIB9P_BACKEND_FS_H
+
+#include <stdbool.h>
+#include "backend.h"
+
+int l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro);
+
+#endif  /* LIB9P_BACKEND_FS_H */
diff --git a/connection.c b/connection.c
new file mode 100644
index 000000000000..20c27796b829
--- /dev/null
+++ b/connection.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "threadpool.h"
+#include "backend/backend.h"
+
+int
+l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend)
+{
+	struct l9p_server *server;
+
+	server = l9p_calloc(1, sizeof (*server));
+	server->ls_max_version = L9P_2000L;
+	server->ls_backend = backend;
+	LIST_INIT(&server->ls_conns);
+
+	*serverp = server;
+	return (0);
+}
+
+int
+l9p_connection_init(struct l9p_server *server, struct l9p_connection **conn)
+{
+	struct l9p_connection *newconn;
+
+	assert(server != NULL);
+	assert(conn != NULL);
+
+	newconn = calloc(1, sizeof (*newconn));
+	if (newconn == NULL)
+		return (-1);
+	newconn->lc_server = server;
+	newconn->lc_msize = L9P_DEFAULT_MSIZE;
+	if (l9p_threadpool_init(&newconn->lc_tp, L9P_NUMTHREADS)) {
+		free(newconn);
+		return (-1);
+	}
+	ht_init(&newconn->lc_files, 100);
+	ht_init(&newconn->lc_requests, 100);
+	LIST_INSERT_HEAD(&server->ls_conns, newconn, lc_link);
+	*conn = newconn;
+
+	return (0);
+}
+
+void
+l9p_connection_free(struct l9p_connection *conn)
+{
+
+	LIST_REMOVE(conn, lc_link);
+	free(conn);
+}
+
+void
+l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+    const size_t niov, void *aux)
+{
+	struct l9p_request *req;
+	int error;
+
+	req = l9p_calloc(1, sizeof (struct l9p_request));
+	req->lr_aux = aux;
+	req->lr_conn = conn;
+
+	req->lr_req_msg.lm_mode = L9P_UNPACK;
+	req->lr_req_msg.lm_niov = niov;
+	memcpy(req->lr_req_msg.lm_iov, iov, sizeof (struct iovec) * niov);
+
+	req->lr_resp_msg.lm_mode = L9P_PACK;
+
+	if (l9p_pufcall(&req->lr_req_msg, &req->lr_req, conn->lc_version) != 0) {
+		L9P_LOG(L9P_WARNING, "cannot unpack received message");
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	if (ht_add(&conn->lc_requests, req->lr_req.hdr.tag, req)) {
+		L9P_LOG(L9P_WARNING, "client reusing outstanding tag %d",
+		    req->lr_req.hdr.tag);
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	error = conn->lc_lt.lt_get_response_buffer(req,
+	    req->lr_resp_msg.lm_iov,
+	    &req->lr_resp_msg.lm_niov,
+	    conn->lc_lt.lt_aux);
+	if (error) {
+		L9P_LOG(L9P_WARNING, "cannot obtain buffers for response");
+		ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	/*
+	 * NB: it's up to l9p_threadpool_run to decide whether
+	 * to queue the work or to run it immediately and wait
+	 * (it must do the latter for Tflush requests).
+	 */
+	l9p_threadpool_run(&conn->lc_tp, req);
+}
+
+void
+l9p_connection_close(struct l9p_connection *conn)
+{
+	struct ht_iter iter;
+	struct l9p_fid *fid;
+	struct l9p_request *req;
+
+	L9P_LOG(L9P_DEBUG, "waiting for thread pool to shut down");
+	l9p_threadpool_shutdown(&conn->lc_tp);
+
+	/* Drain pending requests (if any) */
+	L9P_LOG(L9P_DEBUG, "draining pending requests");
+	ht_iter(&conn->lc_requests, &iter);
+	while ((req = ht_next(&iter)) != NULL) {
+#ifdef notyet
+		/* XXX would be good to know if there is anyone listening */
+		if (anyone listening) {
+			/* XXX crude - ops like Tclunk should succeed */
+			req->lr_error = EINTR;
+			l9p_respond(req, false, false);
+		} else
+#endif
+		l9p_respond(req, true, false);	/* use no-answer path */
+		ht_remove_at_iter(&iter);
+	}
+
+	/* Close opened files (if any) */
+	L9P_LOG(L9P_DEBUG, "closing opened files");
+	ht_iter(&conn->lc_files, &iter);
+	while ((fid = ht_next(&iter)) != NULL) {
+		conn->lc_server->ls_backend->freefid(
+		    conn->lc_server->ls_backend->softc, fid);
+		free(fid);
+		ht_remove_at_iter(&iter);
+	}
+
+	ht_destroy(&conn->lc_requests);
+	ht_destroy(&conn->lc_files);
+}
+
+struct l9p_fid *
+l9p_connection_alloc_fid(struct l9p_connection *conn, uint32_t fid)
+{
+	struct l9p_fid *file;
+
+	file = l9p_calloc(1, sizeof (struct l9p_fid));
+	file->lo_fid = fid;
+	/*
+	 * Note that the new fid is not marked valid yet.
+	 * The insert here will fail if the fid number is
+	 * in use, otherwise we have an invalid fid in the
+	 * table (as desired).
+	 */
+
+	if (ht_add(&conn->lc_files, fid, file) != 0) {
+		free(file);
+		return (NULL);
+	}
+
+	return (file);
+}
+
+void
+l9p_connection_remove_fid(struct l9p_connection *conn, struct l9p_fid *fid)
+{
+	struct l9p_backend *be;
+
+	/* fid should be marked invalid by this point */
+	assert(!l9p_fid_isvalid(fid));
+
+	be = conn->lc_server->ls_backend;
+	be->freefid(be->softc, fid);
+
+	ht_remove(&conn->lc_files, fid->lo_fid);
+	free(fid);
+}
diff --git a/example/Makefile b/example/Makefile
new file mode 100644
index 000000000000..94b218099c34
--- /dev/null
+++ b/example/Makefile
@@ -0,0 +1,10 @@
+PROG=	server
+SRCS=	server.c
+MAN=
+
+CFLAGS=	-pthread -g -O0
+
+LDFLAGS=-L..
+LDADD=	-lsbuf -l9p -lcasper -lcap_pwd -lcap_grp
+
+.include <bsd.prog.mk>
diff --git a/example/server.c b/example/server.c
new file mode 100644
index 000000000000..971c239c8526
--- /dev/null
+++ b/example/server.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <err.h>
+#include <unistd.h>
+#include "../lib9p.h"
+#include "../backend/fs.h"
+#include "../transport/socket.h"
+
+int
+main(int argc, char **argv)
+{
+	struct l9p_backend *fs_backend;
+	struct l9p_server *server;
+	char *host = "0.0.0.0";
+	char *port = "564";
+	char *path;
+	bool ro = false;
+	int rootfd;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "h:p:r")) != -1) {
+		switch (opt) {
+		case 'h':
+			host = optarg;
+			break;
+		case 'p':
+			port = optarg;
+			break;
+		case 'r':
+			ro = true;
+			break;
+		case '?':
+		default:
+			goto usage;
+		}
+	}
+
+	if (optind >= argc) {
+usage:
+		errx(1, "Usage: server [-h <host>] [-p <port>] [-r] <path>");
+	}
+
+	path = argv[optind];
+	rootfd = open(path, O_DIRECTORY);
+
+	if (rootfd < 0)
+		err(1, "cannot open root directory");
+
+	if (l9p_backend_fs_init(&fs_backend, rootfd, ro) != 0)
+		err(1, "cannot init backend");
+
+	if (l9p_server_init(&server, fs_backend) != 0)
+		err(1, "cannot create server");
+
+	server->ls_max_version = L9P_2000L;
+	if (l9p_start_server(server, host, port))
+		err(1, "l9p_start_server() failed");
+
+	/* XXX - we never get here, l9p_start_server does not return */
+	exit(0);
+}
diff --git a/fcall.h b/fcall.h
new file mode 100644
index 000000000000..f779ea6ad56f
--- /dev/null
+++ b/fcall.h
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ÂŠ2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#ifndef LIB9P_FCALL_H
+#define LIB9P_FCALL_H
+
+#include <stdint.h>
+
+#define L9P_MAX_WELEM   256
+
+/*
+ * Function call/reply (Tfoo/Rfoo) numbers.
+ *
+ * These are protocol code numbers, so the exact values
+ * matter.  However, __FIRST and __LAST_PLUS_ONE are for
+ * debug code, and just need to encompass the entire range.
+ *
+ * Note that we rely (in the debug code) on Rfoo == Tfoo+1.
+ */
+enum l9p_ftype {
+	L9P__FIRST = 6,		/* NB: must be <= all legal values */
+	L9P_TLERROR = 6,	/* illegal; exists for parity with Rlerror */
+	L9P_RLERROR,
+	L9P_TSTATFS = 8,
+	L9P_RSTATFS,
+	L9P_TLOPEN = 12,
+	L9P_RLOPEN,
+	L9P_TLCREATE = 14,
+	L9P_RLCREATE,
+	L9P_TSYMLINK = 16,
+	L9P_RSYMLINK,
+	L9P_TMKNOD = 18,
+	L9P_RMKNOD,
+	L9P_TRENAME = 20,
+	L9P_RRENAME,
+	L9P_TREADLINK = 22,
+	L9P_RREADLINK,
+	L9P_TGETATTR = 24,
+	L9P_RGETATTR,
+	L9P_TSETATTR = 26,
+	L9P_RSETATTR,
+	L9P_TXATTRWALK = 30,
+	L9P_RXATTRWALK,
+	L9P_TXATTRCREATE = 32,
+	L9P_RXATTRCREATE,
+	L9P_TREADDIR = 40,
+	L9P_RREADDIR,
+	L9P_TFSYNC = 50,
+	L9P_RFSYNC,
+	L9P_TLOCK = 52,
+	L9P_RLOCK,
+	L9P_TGETLOCK = 54,
+	L9P_RGETLOCK,
+	L9P_TLINK = 70,
+	L9P_RLINK,
+	L9P_TMKDIR = 72,
+	L9P_RMKDIR,
+	L9P_TRENAMEAT = 74,
+	L9P_RRENAMEAT,
+	L9P_TUNLINKAT = 76,
+	L9P_RUNLINKAT,
+	L9P_TVERSION = 100,
+	L9P_RVERSION,
+	L9P_TAUTH = 102,
+	L9P_RAUTH,
+	L9P_TATTACH = 104,
+	L9P_RATTACH,
+	L9P_TERROR = 106, 	/* illegal */
+	L9P_RERROR,
+	L9P_TFLUSH = 108,
+	L9P_RFLUSH,
+	L9P_TWALK = 110,
+	L9P_RWALK,
+	L9P_TOPEN = 112,
+	L9P_ROPEN,
+	L9P_TCREATE = 114,
+	L9P_RCREATE,
+	L9P_TREAD = 116,
+	L9P_RREAD,
+	L9P_TWRITE = 118,
+	L9P_RWRITE,
+	L9P_TCLUNK = 120,
+	L9P_RCLUNK,
+	L9P_TREMOVE = 122,
+	L9P_RREMOVE,
+	L9P_TSTAT = 124,
+	L9P_RSTAT,
+	L9P_TWSTAT = 126,
+	L9P_RWSTAT,
+	L9P__LAST_PLUS_1,	/* NB: must be last */
+};
+
+/*
+ * When a Tfoo request comes over the wire, we decode it
+ * (pack.c) from wire format into a request laid out in
+ * a "union l9p_fcall" object.  This object is not in wire
+ * format, but rather in something more convenient for us
+ * to operate on.
+ *
+ * We then dispatch the request (request.c, backend/fs.c) and
+ * use another "union l9p_fcall" object to build a reply.
+ * The reply is converted to wire format on the way back out
+ * (pack.c again).
+ *
+ * All sub-objects start with a header containing the request
+ * or reply type code and two-byte tag, and whether or not it
+ * is needed, a four-byte fid.
+ *
+ * What this means here is that the data structures within
+ * the union can be shared across various requests and replies.
+ * For instance, replies to OPEN, CREATE, LCREATE, LOPEN, MKDIR, and
+ * SYMLINK are all fairly similar (providing a qid and sometimes
+ * an iounit) and hence can all use the l9p_f_ropen structure.
+ * Which structures are used for which operations is somewhat
+ * arbitrary; for programming ease, if an operation shares a
+ * data structure, it still has its own name: there are union
+ * members named ropen, rcreate, rlcreate, rlopen, rmkdir, and
+ * rsymlink, even though all use struct l9p_f_ropen.
+ *
+ * The big exception to the above rule is struct l9p_f_io, which
+ * is used as both request and reply for all of READ, WRITE, and
+ * READDIR.  Moreover, the READDIR reply must be pre-packed into
+ * wire format (it is handled like raw data a la READ).
+ *
+ * Some request messages (e.g., TREADLINK) fit in a header, having
+ * just type code, tag, and fid.  These have no separate data
+ * structure, nor union member name.  Similarly, some reply
+ * messages (e.g., RCLUNK, RREMOVE, RRENAME) have just the type
+ * code and tag.
+ */
+
+/*
+ * Type code bits in (the first byte of) a qid.
+ */
+enum l9p_qid_type {
+	L9P_QTDIR = 0x80, /* type bit for directories */
+	L9P_QTAPPEND = 0x40, /* type bit for append only files */
+	L9P_QTEXCL = 0x20, /* type bit for exclusive use files */
+	L9P_QTMOUNT = 0x10, /* type bit for mounted channel */
+	L9P_QTAUTH = 0x08, /* type bit for authentication file */
+	L9P_QTTMP = 0x04, /* type bit for non-backed-up file */
+	L9P_QTSYMLINK = 0x02, /* type bit for symbolic link */
+	L9P_QTFILE = 0x00 /* type bits for plain file */
+};
+
+/*
+ * Extra permission bits in create and file modes (stat).
+ */
+#define L9P_DMDIR 0x80000000
+enum {
+	L9P_DMAPPEND = 0x40000000,
+	L9P_DMEXCL = 0x20000000,
+	L9P_DMMOUNT = 0x10000000,
+	L9P_DMAUTH = 0x08000000,
+	L9P_DMTMP = 0x04000000,
+	L9P_DMSYMLINK = 0x02000000,
+	/* 9P2000.u extensions */
+	L9P_DMDEVICE = 0x00800000,
+	L9P_DMNAMEDPIPE = 0x00200000,
+	L9P_DMSOCKET = 0x00100000,
+	L9P_DMSETUID = 0x00080000,
+	L9P_DMSETGID = 0x00040000,
+};
+
+/*
+ * Open/create mode bits in 9P2000 and 9P2000.u operations
+ * (not Linux lopen and lcreate flags, which are different).
+ * Note that the mode field is only one byte wide.
+ */
+enum l9p_omode {
+	L9P_OREAD = 0,	/* open for read */
+	L9P_OWRITE = 1,	/* write */
+	L9P_ORDWR = 2,	/* read and write */
+	L9P_OEXEC = 3,	/* execute, == read but check execute permission */
+	L9P_OACCMODE = 3, /* mask for the above access-mode bits */
+	L9P_OTRUNC = 16,	/* or'ed in (except for exec), truncate file first */
+	L9P_OCEXEC = 32,	/* or'ed in, close on exec */
+	L9P_ORCLOSE = 64,	/* or'ed in, remove on close */
+	L9P_ODIRECT = 128,	/* or'ed in, direct access */
+};
+
+/*
+ * Flag bits in 9P2000.L operations (Tlopen, Tlcreate).  These are
+ * basically just the Linux L_* flags.  The bottom 3 bits are the
+ * same as for l9p_omode, although open-for-exec is not used:
+ * instead, the client does a Tgetattr and checks the mode for
+ * execute bits, then just opens for reading.
+ *
+ * Each L_O_xxx is just value O_xxx has on Linux in <fcntl.h>;
+ * not all are necessarily used.  From observation, we do get
+ * L_O_CREAT and L_O_EXCL when creating with exclusive, and always
+ * get L_O_LARGEFILE.  We do get L_O_APPEND when opening for
+ * append.  We also get both L_O_DIRECT and L_O_DIRECTORY set
+ * when opening directories.
+ *
+ * We probably never get L_O_NOCTTY which makes no sense, and
+ * some of the other options may need to be handled on the client.
+ */
+enum l9p_l_o_flags {
+	L9P_L_O_CREAT =		000000100U,
+	L9P_L_O_EXCL =		000000200U,
+	L9P_L_O_NOCTTY =	000000400U,
+	L9P_L_O_TRUNC =		000001000U,
+	L9P_L_O_APPEND =	000002000U,
+	L9P_L_O_NONBLOCK =	000004000U,
+	L9P_L_O_DSYNC =		000010000U,
+	L9P_L_O_FASYNC =	000020000U,
+	L9P_L_O_DIRECT =	000040000U,
+	L9P_L_O_LARGEFILE =	000100000U,
+	L9P_L_O_DIRECTORY =	000200000U,
+	L9P_L_O_NOFOLLOW =	000400000U,
+	L9P_L_O_NOATIME =	001000000U,
+	L9P_L_O_CLOEXEC =	002000000U,
+	L9P_L_O_SYNC =		004000000U,
+	L9P_L_O_PATH =		010000000U,
+	L9P_L_O_TMPFILE =	020000000U,
+};
+
+struct l9p_hdr {
+	uint8_t type;
+	uint16_t tag;
+	uint32_t fid;
+};
+
+struct l9p_qid {
+	uint8_t  type;
+	uint32_t version;
+	uint64_t path;
+};
+
+struct l9p_stat {
+	uint16_t type;
+	uint32_t dev;
+	struct l9p_qid qid;
+	uint32_t mode;
+	uint32_t atime;
+	uint32_t mtime;
+	uint64_t length;
+	char *name;
+	char *uid;
+	char *gid;
+	char *muid;
+	char *extension;
+	uint32_t n_uid;
+	uint32_t n_gid;
+	uint32_t n_muid;
+};
+
+#define	L9P_FSTYPE	 0x01021997
+
+struct l9p_statfs {
+	uint32_t type;		/* file system type */
+	uint32_t bsize;		/* block size for I/O */
+	uint64_t blocks;	/* file system size (bsize-byte blocks) */
+	uint64_t bfree;		/* free blocks in fs */
+	uint64_t bavail;	/* free blocks avail to non-superuser*/
+	uint64_t files;		/* file nodes in file system (# inodes) */
+	uint64_t ffree;		/* free file nodes in fs */
+	uint64_t fsid;		/* file system identifier */
+	uint32_t namelen;	/* maximum length of filenames */
+};
+
+struct l9p_f_version {
+	struct l9p_hdr hdr;
+	uint32_t msize;
+	char *version;
+};
+
+struct l9p_f_tflush {
+	struct l9p_hdr hdr;
+	uint16_t oldtag;
+};
+
+struct l9p_f_error {
+	struct l9p_hdr hdr;
+	char *ename;
+	uint32_t errnum;
+};
+
+struct l9p_f_ropen {
+	struct l9p_hdr hdr;
+	struct l9p_qid qid;
+	uint32_t iounit;
+};
+
+struct l9p_f_rauth {
+	struct l9p_hdr hdr;
+	struct l9p_qid aqid;
+};
+
+struct l9p_f_attach {
+	struct l9p_hdr hdr;
+	uint32_t afid;
+	char *uname;
+	char *aname;
+	uint32_t n_uname;
+};
+#define	L9P_NOFID ((uint32_t)-1)	/* in Tattach, no auth fid */
+#define	L9P_NONUNAME ((uint32_t)-1)	/* in Tattach, no n_uname */
+
+struct l9p_f_tcreate {
+	struct l9p_hdr hdr;
+	uint32_t perm;
+	char *name;
+	uint8_t mode; /* +Topen */
+	char *extension;
+};
+
+struct l9p_f_twalk {
+	struct l9p_hdr hdr;
+	uint32_t newfid;
+	uint16_t nwname;
+	char *wname[L9P_MAX_WELEM];
+};
+
+struct l9p_f_rwalk {
+	struct l9p_hdr hdr;
+	uint16_t nwqid;
+	struct l9p_qid wqid[L9P_MAX_WELEM];
+};
+
+struct l9p_f_io {
+	struct l9p_hdr hdr;
+	uint64_t offset; /* Tread, Twrite, Treaddir */
+	uint32_t count; /* Tread, Twrite, Rread, Treaddir, Rreaddir */
+};
+
+struct l9p_f_rstat {
+	struct l9p_hdr hdr;
+	struct l9p_stat stat;
+};
+
+struct l9p_f_twstat {
+	struct l9p_hdr hdr;
+	struct l9p_stat stat;
+};
+
+struct l9p_f_rstatfs {
+	struct l9p_hdr hdr;
+	struct l9p_statfs statfs;
+};
+
+/* Used for Tlcreate, Tlopen, Tmkdir, Tunlinkat. */
+struct l9p_f_tlcreate {
+	struct l9p_hdr hdr;
+	char *name;		/* Tlcreate, Tmkdir, Tunlinkat */
+	uint32_t flags;		/* Tlcreate, Tlopen, Tmkdir, Tunlinkat */
+	uint32_t mode;		/* Tlcreate, Tmkdir */
+	uint32_t gid;		/* Tlcreate, Tmkdir */
+};
+
+struct l9p_f_tsymlink {
+	struct l9p_hdr hdr;
+	char *name;
+	char *symtgt;
+	uint32_t gid;
+};
+
+struct l9p_f_tmknod {
+	struct l9p_hdr hdr;
+	char *name;
+	uint32_t mode;
+	uint32_t major;
+	uint32_t minor;
+	uint32_t gid;
+};
+
+struct l9p_f_trename {
+	struct l9p_hdr hdr;
+	uint32_t dfid;
+	char *name;
+};
+
+struct l9p_f_rreadlink {
+	struct l9p_hdr hdr;
+	char *target;
+};
+
+struct l9p_f_tgetattr {
+	struct l9p_hdr hdr;
+	uint64_t request_mask;
+};
+
+struct l9p_f_rgetattr {
+	struct l9p_hdr hdr;
+	uint64_t valid;
+	struct l9p_qid qid;
+	uint32_t mode;
+	uint32_t uid;
+	uint32_t gid;
+	uint64_t nlink;
+	uint64_t rdev;
+	uint64_t size;
+	uint64_t blksize;
+	uint64_t blocks;
+	uint64_t atime_sec;
+	uint64_t atime_nsec;
+	uint64_t mtime_sec;
+	uint64_t mtime_nsec;
+	uint64_t ctime_sec;
+	uint64_t ctime_nsec;
+	uint64_t btime_sec;
+	uint64_t btime_nsec;
+	uint64_t gen;
+	uint64_t data_version;
+};
+
+/* Fields in req->request_mask and reply->valid for Tgetattr, Rgetattr. */
+enum l9pl_getattr_flags {
+	L9PL_GETATTR_MODE = 0x00000001,
+	L9PL_GETATTR_NLINK = 0x00000002,
+	L9PL_GETATTR_UID = 0x00000004,
+	L9PL_GETATTR_GID = 0x00000008,
+	L9PL_GETATTR_RDEV = 0x00000010,
+	L9PL_GETATTR_ATIME = 0x00000020,
+	L9PL_GETATTR_MTIME = 0x00000040,
+	L9PL_GETATTR_CTIME = 0x00000080,
+	L9PL_GETATTR_INO = 0x00000100,
+	L9PL_GETATTR_SIZE = 0x00000200,
+	L9PL_GETATTR_BLOCKS = 0x00000400,
+	/* everything up to and including BLOCKS is BASIC */
+	L9PL_GETATTR_BASIC = L9PL_GETATTR_MODE |
+		L9PL_GETATTR_NLINK |
+		L9PL_GETATTR_UID |
+		L9PL_GETATTR_GID |
+		L9PL_GETATTR_RDEV |
+		L9PL_GETATTR_ATIME |
+		L9PL_GETATTR_MTIME |
+		L9PL_GETATTR_CTIME |
+		L9PL_GETATTR_INO |
+		L9PL_GETATTR_SIZE |
+		L9PL_GETATTR_BLOCKS,
+	L9PL_GETATTR_BTIME = 0x00000800,
+	L9PL_GETATTR_GEN = 0x00001000,
+	L9PL_GETATTR_DATA_VERSION = 0x00002000,
+	/* BASIC + birthtime + gen + data-version = ALL */
+	L9PL_GETATTR_ALL = L9PL_GETATTR_BASIC |
+		L9PL_GETATTR_BTIME |
+		L9PL_GETATTR_GEN |
+		L9PL_GETATTR_DATA_VERSION,
+};
+
+struct l9p_f_tsetattr {
+	struct l9p_hdr hdr;
+	uint32_t valid;
+	uint32_t mode;
+	uint32_t uid;
+	uint32_t gid;
+	uint64_t size;
+	uint64_t atime_sec;	/* if valid & L9PL_SETATTR_ATIME_SET */
+	uint64_t atime_nsec;	/* (else use on-server time) */
+	uint64_t mtime_sec;	/* if valid & L9PL_SETATTR_MTIME_SET */
+	uint64_t mtime_nsec;	/* (else use on-server time) */
+};
+
+/* Fields in req->valid for Tsetattr. */
+enum l9pl_setattr_flags {
+	L9PL_SETATTR_MODE = 0x00000001,
+	L9PL_SETATTR_UID = 0x00000002,
+	L9PL_SETATTR_GID = 0x00000004,
+	L9PL_SETATTR_SIZE = 0x00000008,
+	L9PL_SETATTR_ATIME = 0x00000010,
+	L9PL_SETATTR_MTIME = 0x00000020,
+	L9PL_SETATTR_CTIME = 0x00000040,
+	L9PL_SETATTR_ATIME_SET = 0x00000080,
+	L9PL_SETATTR_MTIME_SET = 0x00000100,
+};
+
+struct l9p_f_txattrwalk {
+	struct l9p_hdr hdr;
+	uint32_t newfid;
+	char *name;
+};
+
+struct l9p_f_rxattrwalk {
+	struct l9p_hdr hdr;
+	uint64_t size;
+};
+
+struct l9p_f_txattrcreate {
+	struct l9p_hdr hdr;
+	char *name;
+	uint64_t attr_size;
+	uint32_t flags;
+};
+
+struct l9p_f_tlock {
+	struct l9p_hdr hdr;
+	uint8_t type;		/* from l9pl_lock_type */
+	uint32_t flags;		/* from l9pl_lock_flags */
+	uint64_t start;
+	uint64_t length;
+	uint32_t proc_id;
+	char *client_id;
+};
+
+enum l9pl_lock_type {
+	L9PL_LOCK_TYPE_RDLOCK =	0,
+	L9PL_LOCK_TYPE_WRLOCK =	1,
+	L9PL_LOCK_TYPE_UNLOCK =	2,
+};
+
+enum l9pl_lock_flags {
+	L9PL_LOCK_TYPE_BLOCK = 1,
+	L9PL_LOCK_TYPE_RECLAIM = 2,
+};
+
+struct l9p_f_rlock {
+	struct l9p_hdr hdr;
+	uint8_t status;		/* from l9pl_lock_status */
+};
+
+enum l9pl_lock_status {
+	L9PL_LOCK_SUCCESS = 0,
+	L9PL_LOCK_BLOCKED = 1,
+	L9PL_LOCK_ERROR = 2,
+	L9PL_LOCK_GRACE = 3,
+};
+
+struct l9p_f_getlock {
+	struct l9p_hdr hdr;
+	uint8_t type;		/* from l9pl_lock_type */
+	uint64_t start;
+	uint64_t length;
+	uint32_t proc_id;
+	char *client_id;
+};
+
+struct l9p_f_tlink {
+	struct l9p_hdr hdr;
+	uint32_t dfid;
+	char *name;
+};
+
+struct l9p_f_trenameat {
+	struct l9p_hdr hdr;
+	char *oldname;
+	uint32_t newdirfid;
+	char *newname;
+};
+
+/*
+ * Flags in Tunlinkat (which re-uses f_tlcreate data structure but
+ * with different meaning).
+ */
+enum l9p_l_unlinkat_flags {
+	/* not sure if any other AT_* flags are passed through */
+	L9PL_AT_REMOVEDIR =	0x0200,
+};
+
+union l9p_fcall {
+	struct l9p_hdr hdr;
+	struct l9p_f_version version;
+	struct l9p_f_tflush tflush;
+	struct l9p_f_ropen ropen;
+	struct l9p_f_ropen rcreate;
+	struct l9p_f_ropen rattach;
+	struct l9p_f_error error;
+	struct l9p_f_rauth rauth;
+	struct l9p_f_attach tattach;
+	struct l9p_f_attach tauth;
+	struct l9p_f_tcreate tcreate;
+	struct l9p_f_tcreate topen;
+	struct l9p_f_twalk twalk;
+	struct l9p_f_rwalk rwalk;
+	struct l9p_f_twstat twstat;
+	struct l9p_f_rstat rstat;
+	struct l9p_f_rstatfs rstatfs;
+	struct l9p_f_tlcreate tlopen;
+	struct l9p_f_ropen rlopen;
+	struct l9p_f_tlcreate tlcreate;
+	struct l9p_f_ropen rlcreate;
+	struct l9p_f_tsymlink tsymlink;
+	struct l9p_f_ropen rsymlink;
+	struct l9p_f_tmknod tmknod;
+	struct l9p_f_ropen rmknod;
+	struct l9p_f_trename trename;
+	struct l9p_f_rreadlink rreadlink;
+	struct l9p_f_tgetattr tgetattr;
+	struct l9p_f_rgetattr rgetattr;
+	struct l9p_f_tsetattr tsetattr;
+	struct l9p_f_txattrwalk txattrwalk;
+	struct l9p_f_rxattrwalk rxattrwalk;
+	struct l9p_f_txattrcreate txattrcreate;
+	struct l9p_f_tlock tlock;
+	struct l9p_f_rlock rlock;
+	struct l9p_f_getlock getlock;
+	struct l9p_f_tlink tlink;
+	struct l9p_f_tlcreate tmkdir;
+	struct l9p_f_ropen rmkdir;
+	struct l9p_f_trenameat trenameat;
+	struct l9p_f_tlcreate tunlinkat;
+	struct l9p_f_io io;
+};
+
+#endif  /* LIB9P_FCALL_H */
diff --git a/fid.h b/fid.h
new file mode 100644
index 000000000000..cdfdd7ec93d0
--- /dev/null
+++ b/fid.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_FID_H
+#define LIB9P_FID_H
+
+#include <stdbool.h>
+
+/*
+ * Data structure for a fid.  All active fids in one session
+ * are stored in a hash table; the hash table provides the
+ * iterator to process them.  (See also l9p_connection in lib9p.h.)
+ *
+ * The back-end code has additional data per fid, found via
+ * lo_aux.  Currently this is allocated with a separate calloc().
+ *
+ * Most fids represent a file or directory, but a few are special
+ * purpose, including the auth fid from Tauth+Tattach, and the
+ * fids used for extended attributes.  We have our own set of
+ * flags here in lo_flags.
+ *
+ * Note that all new fids start as potentially-valid (reserving
+ * their 32-bit fid value), but not actually-valid.  If another
+ * (threaded) op is invoked on a not-yet-valid fid, the fid cannot
+ * be used.  A fid can also be locked against other threads, in
+ * which case they must wait for it: this happens during create
+ * and open, which on success result in the fid changing from a
+ * directory to a file.  (At least, all this applies in principle
+ * -- we're currently single-threaded per connection so the locks
+ * are nop-ed out and the valid bit is mainly just for debug.)
+ *
+ * Fids that are "open" (the underlying file or directory is open)
+ * are marked as well.
+ *
+ * Locking is managed by the front end (request.c); validation
+ * and type-marking can be done by either side as needed.
+ *
+ * Fid types and validity are manipulated by set* and unset*
+ * functions, and tested by is* ops.  Note that we only
+ * distinguish between "directory" and "not directory" at this
+ * level, i.e., symlinks and devices are just "not a directory
+ * fid".  Also, fids cannot be unset as auth or xattr fids,
+ * nor can an open fid become closed, except by being clunked.
+ * While files should not normally become directories, it IS normal
+ * for directory fids to become file fids due to Twalk operations.
+ *
+ * (These accessor functions are just to leave wiggle room for
+ * different future implementations.)
+ */
+struct l9p_fid {
+	void	*lo_aux;
+	uint32_t lo_fid;
+	uint32_t lo_flags;	/* volatile atomic_t when threaded? */
+};
+
+enum l9p_lo_flags {
+	L9P_LO_ISAUTH = 0x01,
+	L9P_LO_ISDIR = 0x02,
+	L9P_LO_ISOPEN = 0x04,
+	L9P_LO_ISVALID = 0x08,
+	L9P_LO_ISXATTR = 0x10,
+};
+
+static inline bool
+l9p_fid_isauth(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISAUTH) != 0);
+}
+
+static inline void
+l9p_fid_setauth(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISAUTH;
+}
+
+static inline bool
+l9p_fid_isdir(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISDIR) != 0);
+}
+
+static inline void
+l9p_fid_setdir(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISDIR;
+}
+
+static inline void
+l9p_fid_unsetdir(struct l9p_fid *fid)
+{
+	fid->lo_flags &= ~(uint32_t)L9P_LO_ISDIR;
+}
+
+static inline bool
+l9p_fid_isopen(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISOPEN) != 0);
+}
+
+static inline void
+l9p_fid_setopen(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISOPEN;
+}
+
+static inline bool
+l9p_fid_isvalid(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISVALID) != 0);
+}
+
+static inline void
+l9p_fid_setvalid(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISVALID;
+}
+
+static inline void
+l9p_fid_unsetvalid(struct l9p_fid *fid)
+{
+	fid->lo_flags &= ~(uint32_t)L9P_LO_ISVALID;
+}
+
+static inline bool
+l9p_fid_isxattr(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISXATTR) != 0);
+}
+
+static inline void
+l9p_fid_setxattr(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISXATTR;
+}
+
+#endif  /* LIB9P_FID_H */
diff --git a/genacl.c b/genacl.c
new file mode 100644
index 000000000000..fed3d2ba10ce
--- /dev/null
+++ b/genacl.c
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "genacl.h"
+#include "fid.h"
+#include "log.h"
+
+typedef int econvertfn(acl_entry_t, struct l9p_ace *);
+
+#ifndef __APPLE__
+static struct l9p_acl *l9p_new_acl(uint32_t acetype, uint32_t aceasize);
+static struct l9p_acl *l9p_growacl(struct l9p_acl *acl, uint32_t aceasize);
+static int l9p_count_aces(acl_t sysacl);
+static struct l9p_acl *l9p_sysacl_to_acl(int, acl_t, econvertfn *);
+#endif
+static bool l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids);
+static int l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+    uid_t uid, gid_t gid, gid_t *gids, size_t ngids);
+
+void
+l9p_acl_free(struct l9p_acl *acl)
+{
+
+	free(acl);
+}
+
+/*
+ * Is the given group ID tid (test-id) any of the gid's in agids?
+ */
+static bool
+l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids)
+{
+	size_t i;
+
+	if (tid == gid)
+		return (true);
+	for (i = 0; i < ngids; i++)
+		if (tid == gids[i])
+			return (true);
+	return (false);
+}
+
+/* #define ACE_DEBUG */
+
+/*
+ * Note that NFSv4 tests are done on a "first match" basis.
+ * That is, we check each ACE sequentially until we run out
+ * of ACEs, or find something explicitly denied (DENIED!),
+ * or have cleared out all our attempt-something bits.  Once
+ * we come across an ALLOW entry for the bits we're trying,
+ * we clear those from the bits we're still looking for, in
+ * the order they appear.
+ *
+ * The result is either "definitely allowed" (we cleared
+ * all the bits), "definitely denied" (we hit a deny with
+ * some or all of the bits), or "unspecified".  We
+ * represent these three states as +1 (positive = yes = allow),
+ * -1 (negative = no = denied), or 0 (no strong answer).
+ *
+ * For our caller's convenience, if we are called with a
+ * mask of 0, we return 0 (no answer).
+ */
+static int
+l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+    uid_t uid, gid_t gid, gid_t *gids, size_t ngids)
+{
+	uint32_t i;
+	struct l9p_ace *ace;
+#ifdef ACE_DEBUG
+	const char *acetype, *allowdeny;
+	bool show_tid;
+#endif
+	bool match;
+	uid_t tid;
+
+	if (mask == 0)
+		return (0);
+
+	for (i = 0; mask != 0 && i < acl->acl_nace; i++) {
+		ace = &acl->acl_aces[i];
+		switch (ace->ace_type) {
+		case L9P_ACET_ACCESS_ALLOWED:
+		case L9P_ACET_ACCESS_DENIED:
+			break;
+		default:
+			/* audit, alarm - ignore */
+			continue;
+		}
+#ifdef ACE_DEBUG
+		show_tid = false;
+#endif
+		if (ace->ace_flags & L9P_ACEF_OWNER) {
+#ifdef ACE_DEBUG
+			acetype = "OWNER@";
+#endif
+			match = st->st_uid == uid;
+		} else if (ace->ace_flags & L9P_ACEF_GROUP) {
+#ifdef ACE_DEBUG
+			acetype = "GROUP@";
+#endif
+			match = l9p_ingroup(st->st_gid, gid, gids, ngids);
+		} else if (ace->ace_flags & L9P_ACEF_EVERYONE) {
+#ifdef ACE_DEBUG
+			acetype = "EVERYONE@";
+#endif
+			match = true;
+		} else {
+			if (ace->ace_idsize != sizeof(tid))
+				continue;
+#ifdef ACE_DEBUG
+			show_tid = true;
+#endif
+			memcpy(&tid, &ace->ace_idbytes, sizeof(tid));
+			if (ace->ace_flags & L9P_ACEF_IDENTIFIER_GROUP) {
+#ifdef ACE_DEBUG
+				acetype = "group";
+#endif
+				match = l9p_ingroup(tid, gid, gids, ngids);
+			} else {
+#ifdef ACE_DEBUG
+				acetype = "user";
+#endif
+				match = tid == uid;
+			}
+		}
+		/*
+		 * If this ACE applies to us, check remaining bits.
+		 * If any of those bits also apply, check the type:
+		 * DENY means "stop now", ALLOW means allow these bits
+		 * and keep checking.
+		 */
+#ifdef ACE_DEBUG
+		allowdeny = ace->ace_type == L9P_ACET_ACCESS_DENIED ?
+		    "deny" : "allow";
+#endif
+		if (match && (ace->ace_mask & (uint32_t)mask) != 0) {
+#ifdef ACE_DEBUG
+			if (show_tid)
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: %s %s %d: mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype, (int)tid,
+				    (u_int)mask, (u_int)ace->ace_mask);
+			else
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: %s %s: mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype,
+				    (u_int)mask, (u_int)ace->ace_mask);
+#endif
+			if (ace->ace_type == L9P_ACET_ACCESS_DENIED)
+				return (-1);
+			mask &= ~ace->ace_mask;
+#ifdef ACE_DEBUG
+			L9P_LOG(L9P_DEBUG, "clear 0x%x: now mask=0x%x",
+			    (u_int)ace->ace_mask, (u_int)mask);
+#endif
+		} else {
+#ifdef ACE_DEBUG
+			if (show_tid)
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: SKIP %s %s %d: "
+				    "match %d mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype, (int)tid,
+				    (int)match, (u_int)mask,
+				    (u_int)ace->ace_mask);
+			else
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: SKIP %s %s: "
+				    "match %d mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype,
+				    (int)match, (u_int)mask,
+				    (u_int)ace->ace_mask);
+#endif
+		}
+	}
+
+	/* Return 1 if access definitely granted. */
+#ifdef ACE_DEBUG
+	L9P_LOG(L9P_DEBUG, "ACE: end of ACEs, mask now 0x%x: %s",
+	    mask, mask ? "no-definitive-answer" : "ALLOW");
+#endif
+	return (mask == 0 ? 1 : 0);
+}
+
+/*
+ * Test against ACLs.
+ *
+ * The return value is normally 0 (access allowed) or EPERM
+ * (access denied), so it could just be a boolean....
+ *
+ * For "make new dir in dir" and "remove dir in dir", you must
+ * set the mask to test the directory permissions (not ADD_FILE but
+ * ADD_SUBDIRECTORY, and DELETE_CHILD).  For "make new file in dir"
+ * you must set the opmask to test file ADD_FILE.
+ *
+ * The L9P_ACE_DELETE flag means "can delete this thing"; it's not
+ * clear whether it should override the parent directory's ACL if
+ * any.  In our case it does not, but a caller may try
+ * L9P_ACE_DELETE_CHILD (separately, on its own) and then a
+ * (second, separate) L9P_ACE_DELETE, to make the permissions work
+ * as "or" instead of "and".
+ *
+ * Pass a NULL parent/pstat if they are not applicable, e.g.,
+ * for doing operations on an existing file, such as reading or
+ * writing data or attributes.  Pass in a null child/cstat if
+ * that's not applicable, such as creating a new file/dir.
+ *
+ * NB: it's probably wise to allow the owner of any file to update
+ * the ACLs of that file, but we leave that test to the caller.
+ */
+int l9p_acl_check_access(int32_t opmask, struct l9p_acl_check_args *args)
+{
+	struct l9p_acl *parent, *child;
+	struct stat *pstat, *cstat;
+	int32_t pop, cop;
+	size_t ngids;
+	uid_t uid;
+	gid_t gid, *gids;
+	int panswer, canswer;
+
+	assert(opmask != 0);
+	parent = args->aca_parent;
+	pstat = args->aca_pstat;
+	child = args->aca_child;
+	cstat = args->aca_cstat;
+	uid = args->aca_uid;
+	gid = args->aca_gid;
+	gids = args->aca_groups;
+	ngids = args->aca_ngroups;
+
+#ifdef ACE_DEBUG
+	L9P_LOG(L9P_DEBUG,
+	    "l9p_acl_check_access: opmask=0x%x uid=%ld gid=%ld ngids=%zd",
+	    (u_int)opmask, (long)uid, (long)gid, ngids);
+#endif
+	/*
+	 * If caller said "superuser semantics", check that first.
+	 * Note that we apply them regardless of ACLs.
+	 */
+	if (uid == 0 && args->aca_superuser)
+		return (0);
+
+	/*
+	 * If told to ignore ACLs and use only stat-based permissions,
+	 * discard any non-NULL ACL pointers.
+	 *
+	 * This will need some fancying up when we support POSIX ACLs.
+	 */
+	if ((args->aca_aclmode & L9P_ACM_NFS_ACL) == 0)
+		parent = child = NULL;
+
+	assert(parent == NULL || parent->acl_acetype == L9P_ACLTYPE_NFSv4);
+	assert(parent == NULL || pstat != NULL);
+	assert(child == NULL || child->acl_acetype == L9P_ACLTYPE_NFSv4);
+	assert(child == NULL || cstat != NULL);
+	assert(pstat != NULL || cstat != NULL);
+
+	/*
+	 * If the operation is UNLINK we should have either both ACLs
+	 * or no ACLs, but we won't require that here.
+	 *
+	 * If a parent ACL is supplied, it's a directory by definition.
+	 * Make sure we're allowed to do this there, whatever this is.
+	 * If a child ACL is supplied, check it too.  Note that the
+	 * DELETE permission only applies in the child though, not
+	 * in the parent, and the DELETE_CHILD only applies in the
+	 * parent.
+	 */
+	pop = cop = opmask;
+	if (parent != NULL || pstat != NULL) {
+		/*
+		 * Remove child-only bits from parent op and
+		 * parent-only bits from child op.
+		 *
+		 * L9P_ACE_DELETE is child-only.
+		 *
+		 * L9P_ACE_DELETE_CHILD is parent-only, and three data
+		 * access bits overlap with three directory access bits.
+		 * We should have child==NULL && cstat==NULL, so the
+		 * three data bits should be redundant, but it's
+		 * both trivial and safest to remove them anyway.
+		 */
+		pop &= ~L9P_ACE_DELETE;
+		cop &= ~(L9P_ACE_DELETE_CHILD | L9P_ACE_LIST_DIRECTORY |
+		    L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY);
+	} else {
+		/*
+		 * Remove child-only bits from parent op.  We need
+		 * not bother since we just found we have no parent
+		 * and no pstat, and hence won't actually *use* pop.
+		 *
+		 * pop &= ~(L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA |
+		 *     L9P_ACE_APPEND_DATA);
+		 */
+	}
+	panswer = 0;
+	canswer = 0;
+	if (parent != NULL)
+		panswer = l9p_check_aces(pop, parent, pstat,
+		    uid, gid, gids, ngids);
+	if (child != NULL)
+		canswer = l9p_check_aces(cop, child, cstat,
+		    uid, gid, gids, ngids);
+
+	if (panswer || canswer) {
+		/*
+		 * Got a definitive answer from parent and/or
+		 * child ACLs.  We're not quite done yet though.
+		 */
+		if (opmask == L9P_ACOP_UNLINK) {
+			/*
+			 * For UNLINK, we can get an allow from child
+			 * and deny from parent, or vice versa.  It's
+			 * not 100% clear how to handle the two-answer
+			 * case.  ZFS says that if either says "allow",
+			 * we allow, and if both definitely say "deny",
+			 * we deny.  This makes sense, so we do that
+			 * here for all cases, even "strict".
+			 */
+			if (panswer > 0 || canswer > 0)
+				return (0);
+			if (panswer < 0 && canswer < 0)
+				return (EPERM);
+			/* non-definitive answer from one! move on */
+		} else {
+			/*
+			 * Have at least one definitive answer, and
+			 * should have only one; obey whichever
+			 * one it is.
+			 */
+			if (panswer)
+				return (panswer < 0 ? EPERM : 0);
+			return (canswer < 0 ? EPERM : 0);
+		}
+	}
+
+	/*
+	 * No definitive answer from ACLs alone.  Check for ZFS style
+	 * permissions checking and an "UNLINK" operation under ACLs.
+	 * If so, find write-and-execute permission on parent.
+	 * Note that WRITE overlaps with ADD_FILE -- that's ZFS's
+	 * way of saying "allow write to dir" -- but EXECUTE is
+	 * separate from LIST_DIRECTORY, so that's at least a little
+	 * bit cleaner.
+	 *
+	 * Note also that only a definitive yes (both bits are
+	 * explicitly allowed) results in granting unlink, and
+	 * a definitive no (at least one bit explicitly denied)
+	 * results in EPERM.  Only "no answer" moves on.
+	 */
+	if ((args->aca_aclmode & L9P_ACM_ZFS_ACL) &&
+	    opmask == L9P_ACOP_UNLINK && parent != NULL) {
+		panswer = l9p_check_aces(L9P_ACE_ADD_FILE | L9P_ACE_EXECUTE,
+		    parent, pstat, uid, gid, gids, ngids);
+		if (panswer)
+			return (panswer < 0 ? EPERM : 0);
+	}
+
+	/*
+	 * No definitive answer from ACLs.
+	 *
+	 * Try POSIX style rwx permissions if allowed.  This should
+	 * be rare, occurring mainly when caller supplied no ACLs
+	 * or set the mode to suppress them.
+	 *
+	 * The stat to check is the parent's if we don't have a child
+	 * (i.e., this is a dir op), or if the DELETE_CHILD bit is set
+	 * (i.e., this is an unlink or similar).  Otherwise it's the
+	 * child's.
+	 */
+	if (args->aca_aclmode & L9P_ACM_STAT_MODE) {
+		struct stat *st;
+		int rwx, bits;
+
+		rwx = l9p_ace_mask_to_rwx(opmask);
+		if ((st = cstat) == NULL || (opmask & L9P_ACE_DELETE_CHILD))
+			st = pstat;
+		if (uid == st->st_uid)
+			bits = (st->st_mode >> 6) & 7;
+		else if (l9p_ingroup(st->st_gid, gid, gids, ngids))
+			bits = (st->st_mode >> 3) & 7;
+		else
+			bits = st->st_mode & 7;
+		/*
+		 * If all the desired bits are set, we're OK.
+		 */
+		if ((rwx & bits) == rwx)
+			return (0);
+	}
+
+	/* all methods have failed, return EPERM */
+	return (EPERM);
+}
+
+/*
+ * Collapse fancy ACL operation mask down to simple Unix bits.
+ *
+ * Directory operations don't map that well.  However, listing
+ * a directory really does require read permission, and adding
+ * or deleting files really does require write permission, so
+ * this is probably sufficient.
+ */
+int
+l9p_ace_mask_to_rwx(int32_t opmask)
+{
+	int rwx = 0;
+
+	if (opmask &
+	    (L9P_ACE_READ_DATA | L9P_ACE_READ_NAMED_ATTRS |
+	     L9P_ACE_READ_ATTRIBUTES | L9P_ACE_READ_ACL))
+		rwx |= 4;
+	if (opmask &
+	    (L9P_ACE_WRITE_DATA | L9P_ACE_APPEND_DATA |
+	     L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY |
+	     L9P_ACE_DELETE | L9P_ACE_DELETE_CHILD |
+	     L9P_ACE_WRITE_NAMED_ATTRS | L9P_ACE_WRITE_ATTRIBUTES |
+	     L9P_ACE_WRITE_ACL))
+		rwx |= 2;
+	if (opmask & L9P_ACE_EXECUTE)
+		rwx |= 1;
+	return (rwx);
+}
+
+#ifndef __APPLE__
+/*
+ * Allocate new ACL holder and ACEs.
+ */
+static struct l9p_acl *
+l9p_new_acl(uint32_t acetype, uint32_t aceasize)
+{
+	struct l9p_acl *ret;
+	size_t asize, size;
+
+	asize = aceasize * sizeof(struct l9p_ace);
+	size = sizeof(struct l9p_acl) + asize;
+	ret = malloc(size);
+	if (ret != NULL) {
+		ret->acl_acetype = acetype;
+		ret->acl_nace = 0;
+		ret->acl_aceasize = aceasize;
+	}
+	return (ret);
+}
+
+/*
+ * Expand ACL to accomodate more entries.
+ *
+ * Currently won't shrink, only grow, so it's a fast no-op until
+ * we hit the allocated size.  After that, it's best to grow in
+ * big chunks, or this will be O(n**2).
+ */
+static struct l9p_acl *
+l9p_growacl(struct l9p_acl *acl, uint32_t aceasize)
+{
+	struct l9p_acl *tmp;
+	size_t asize, size;
+
+	if (acl->acl_aceasize < aceasize) {
+		asize = aceasize * sizeof(struct l9p_ace);
+		size = sizeof(struct l9p_acl) + asize;
+		tmp = realloc(acl, size);
+		if (tmp == NULL)
+			free(acl);
+		acl = tmp;
+	}
+	return (acl);
+}
+
+/*
+ * Annoyingly, there's no POSIX-standard way to count the number
+ * of ACEs in a system ACL other than to walk through them all.
+ * This is silly, but at least 2n is still O(n), and the walk is
+ * short.  (If the system ACL mysteriously grows, we'll handle
+ * that OK via growacl(), too.)
+ */
+static int
+l9p_count_aces(acl_t sysacl)
+{
+	acl_entry_t entry;
+	uint32_t n;
+	int id;
+
+	id = ACL_FIRST_ENTRY;
+	for (n = 0; acl_get_entry(sysacl, id, &entry) == 1; n++)
+		id = ACL_NEXT_ENTRY;
+
+	return ((int)n);
+}
+
+/*
+ * Create ACL with ACEs from the given acl_t.  We use the given
+ * convert function on each ACE.
+ */
+static struct l9p_acl *
+l9p_sysacl_to_acl(int acetype, acl_t sysacl, econvertfn *convert)
+{
+	struct l9p_acl *acl;
+	acl_entry_t entry;
+	uint32_t n;
+	int error, id;
+
+	acl = l9p_new_acl((uint32_t)acetype, (uint32_t)l9p_count_aces(sysacl));
+	if (acl == NULL)
+		return (NULL);
+	id = ACL_FIRST_ENTRY;
+	for (n = 0;;) {
+		if (acl_get_entry(sysacl, id, &entry) != 1)
+			break;
+		acl = l9p_growacl(acl, n + 1);
+		if (acl == NULL)
+			return (NULL);
+		error = (*convert)(entry, &acl->acl_aces[n]);
+		id = ACL_NEXT_ENTRY;
+		if (error == 0)
+			n++;
+	}
+	acl->acl_nace = n;
+	return (acl);
+}
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_posix_acl_to_acl(acl_t sysacl)
+{
+}
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+static int
+l9p_frombsdnfs4(acl_entry_t sysace, struct l9p_ace *ace)
+{
+	acl_tag_t tag;			/* e.g., USER_OBJ, GROUP, etc */
+	acl_entry_type_t entry_type;	/* e.g., allow/deny */
+	acl_permset_t absdperm;
+	acl_flagset_t absdflag;
+	acl_perm_t bsdperm;		/* e.g., READ_DATA */
+	acl_flag_t bsdflag;		/* e.g., FILE_INHERIT_ACE */
+	uint32_t flags, mask;
+	int error;
+	uid_t uid, *aid;
+
+	error = acl_get_tag_type(sysace, &tag);
+	if (error == 0)
+		error = acl_get_entry_type_np(sysace, &entry_type);
+	if (error == 0)
+		error = acl_get_flagset_np(sysace, &absdflag);
+	if (error == 0)
+		error = acl_get_permset(sysace, &absdperm);
+	if (error)
+		return (error);
+
+	flags = 0;
+	uid = 0;
+	aid = NULL;
+
+	/* move user/group/everyone + id-is-group-id into flags */
+	switch (tag) {
+	case ACL_USER_OBJ:
+		flags |= L9P_ACEF_OWNER;
+		break;
+	case ACL_GROUP_OBJ:
+		flags |= L9P_ACEF_GROUP;
+		break;
+	case ACL_EVERYONE:
+		flags |= L9P_ACEF_EVERYONE;
+		break;
+	case ACL_GROUP:
+		flags |= L9P_ACEF_IDENTIFIER_GROUP;
+		/* FALLTHROUGH */
+	case ACL_USER:
+		aid = acl_get_qualifier(sysace); /* ugh, this malloc()s */
+		if (aid == NULL)
+			return (ENOMEM);
+		uid = *(uid_t *)aid;
+		free(aid);
+		aid = &uid;
+		break;
+	default:
+		return (EINVAL);	/* can't happen */
+	}
+
+	switch (entry_type) {
+
+	case ACL_ENTRY_TYPE_ALLOW:
+		ace->ace_type = L9P_ACET_ACCESS_ALLOWED;
+		break;
+
+	case ACL_ENTRY_TYPE_DENY:
+		ace->ace_type = L9P_ACET_ACCESS_DENIED;
+		break;
+
+	case ACL_ENTRY_TYPE_AUDIT:
+		ace->ace_type = L9P_ACET_SYSTEM_AUDIT;
+		break;
+
+	case ACL_ENTRY_TYPE_ALARM:
+		ace->ace_type = L9P_ACET_SYSTEM_ALARM;
+		break;
+
+	default:
+		return (EINVAL);	/* can't happen */
+	}
+
+	/* transform remaining BSD flags to internal NFS-y form */
+	bsdflag = *absdflag;
+	if (bsdflag & ACL_ENTRY_FILE_INHERIT)
+		flags |= L9P_ACEF_FILE_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_DIRECTORY_INHERIT)
+		flags |= L9P_ACEF_DIRECTORY_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_NO_PROPAGATE_INHERIT)
+		flags |= L9P_ACEF_NO_PROPAGATE_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_INHERIT_ONLY)
+		flags |= L9P_ACEF_INHERIT_ONLY_ACE;
+	if (bsdflag & ACL_ENTRY_SUCCESSFUL_ACCESS)
+		flags |= L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG;
+	if (bsdflag & ACL_ENTRY_FAILED_ACCESS)
+		flags |= L9P_ACEF_FAILED_ACCESS_ACE_FLAG;
+	ace->ace_flags = flags;
+
+	/*
+	 * Transform BSD permissions to ace_mask.  Note that directory
+	 * vs file bits are the same in both sets, so we don't need
+	 * to worry about that, at least.
+	 *
+	 * There seem to be no BSD equivalents for WRITE_RETENTION
+	 * and WRITE_RETENTION_HOLD.
+	 */
+	mask = 0;
+	bsdperm = *absdperm;
+	if (bsdperm & ACL_READ_DATA)
+		mask |= L9P_ACE_READ_DATA;
+	if (bsdperm & ACL_WRITE_DATA)
+		mask |= L9P_ACE_WRITE_DATA;
+	if (bsdperm & ACL_APPEND_DATA)
+		mask |= L9P_ACE_APPEND_DATA;
+	if (bsdperm & ACL_READ_NAMED_ATTRS)
+		mask |= L9P_ACE_READ_NAMED_ATTRS;
+	if (bsdperm & ACL_WRITE_NAMED_ATTRS)
+		mask |= L9P_ACE_WRITE_NAMED_ATTRS;
+	if (bsdperm & ACL_EXECUTE)
+		mask |= L9P_ACE_EXECUTE;
+	if (bsdperm & ACL_DELETE_CHILD)
+		mask |= L9P_ACE_DELETE_CHILD;
+	if (bsdperm & ACL_READ_ATTRIBUTES)
+		mask |= L9P_ACE_READ_ATTRIBUTES;
+	if (bsdperm & ACL_WRITE_ATTRIBUTES)
+		mask |= L9P_ACE_WRITE_ATTRIBUTES;
+	/* L9P_ACE_WRITE_RETENTION */
+	/* L9P_ACE_WRITE_RETENTION_HOLD */
+	/* 0x00800 */
+	if (bsdperm & ACL_DELETE)
+		mask |= L9P_ACE_DELETE;
+	if (bsdperm & ACL_READ_ACL)
+		mask |= L9P_ACE_READ_ACL;
+	if (bsdperm & ACL_WRITE_ACL)
+		mask |= L9P_ACE_WRITE_ACL;
+	if (bsdperm & ACL_WRITE_OWNER)
+		mask |= L9P_ACE_WRITE_OWNER;
+	if (bsdperm & ACL_SYNCHRONIZE)
+		mask |= L9P_ACE_SYNCHRONIZE;
+	ace->ace_mask = mask;
+
+	/* fill in variable-size user or group ID bytes */
+	if (aid == NULL)
+		ace->ace_idsize = 0;
+	else {
+		ace->ace_idsize = sizeof(uid);
+		memcpy(&ace->ace_idbytes[0], aid, sizeof(uid));
+	}
+
+	return (0);
+}
+
+struct l9p_acl *
+l9p_freebsd_nfsv4acl_to_acl(acl_t sysacl)
+{
+
+	return (l9p_sysacl_to_acl(L9P_ACLTYPE_NFSv4, sysacl, l9p_frombsdnfs4));
+}
+#endif
+
+#if defined(HAVE_DARWIN_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_darwin_nfsv4acl_to_acl(acl_t sysacl)
+{
+}
+#endif
diff --git a/genacl.h b/genacl.h
new file mode 100644
index 000000000000..f5feee716177
--- /dev/null
+++ b/genacl.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General ACL support for 9P2000.L.
+ *
+ * We mostly use Linux's xattr name space and nfs4 ACL bits, as
+ * these are the most general forms available.
+ *
+ * Linux requests attributes named
+ *
+ *     "system.posix_acl_default"
+ *     "system.posix_acl_access"
+ *
+ * to get POSIX style ACLs, and:
+ *
+ *     "system.nfs4_acl"
+ *
+ * to get NFSv4 style ACLs.  The v9fs client does not explicitly
+ * ask for the latter, but if you use the Ubuntu nfs4-acl-tools
+ * package, it should be able to read and write these.
+ *
+ * For the record, the Linux kernel source code also shows:
+ *
+ *  - Lustre uses "trusted.*", with "*" matching "lov", "lma",
+ *    "lmv", "dmv", "link", "fid", "version", "som", "hsm", and
+ *    "lfsck_namespace".
+ *
+ *  - ceph has a name tree of the form "ceph.<type>.<name>" with
+ *     <type,name> pairs like <"dir","entries">, <"dir","files>,
+ *     <"file","layout">, and so on.
+ *
+ *  - ext4 uses the POSIX names, plus some special ext4-specific
+ *    goop that might not get externalized.
+ *
+ *  - NFS uses both the POSIX names and the NFSv4 ACLs.  However,
+ *    what it mainly does is have nfsd generate fake NFSv4 ACLs
+ *    from POSIX ACLs.  If you run an NFS client, the client
+ *    relies on the server actually implementing the ACLs, and
+ *    lets nfs4-acl-tools read and write the system.nfs4_acl xattr
+ *    data.  If you run an NFS server off, e.g., an ext4 file system,
+ *    the server looks for the system.nfs4_acl xattr, serves that
+ *    out if found, and otherwise just generates the fakes.
+ *
+ *  - "security.*" and "selinux.*" are reserved.
+ *
+ *  - "security.capability" is the name for capabilities.
+ *
+ *  - sockets use "system.sockprotoname".
+ */
+
+#if defined(__APPLE__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE_DARWIN_ACLS
+#endif
+
+#if defined(__FreeBSD__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE_FREEBSD_ACLS
+#endif
+
+#include <sys/types.h>
+#include <sys/acl.h>		/* XXX assumes existence of sys/acl.h */
+
+/*
+ * An ACL consists of a number of ACEs that grant some kind of
+ * "allow" or "deny" to some specific entity.
+ *
+ * The number of ACEs is potentially unlimited, although in practice
+ * they tend not to be that long.
+ *
+ * It's the responsibility of the back-end to supply the ACL
+ * for each test.  However, the ACL may be in some sort of
+ * system-specific form.  It's the responsibility of some
+ * (system-specific) code to translate it to *this* form, after
+ * which the backend may use l9p_acl_check_access() to get
+ * access granted or denied (and, eventually, audits and alarms
+ * recorded and raises, although that's yet to be designed).
+ *
+ * The reason for all this faffing-about with formats is so that
+ * we can *report* the ACLs using Linux 9p style xattrs.
+ */
+
+struct l9p_acl;
+struct l9p_fid;
+
+void l9p_acl_free(struct l9p_acl *);
+
+/*
+ * An ACL is made up of ACEs.
+ *
+ * Each ACE has:
+ *
+ *   - a type: allow, deny, audit, alarm
+ *   - a set of flags
+ *   - permissions bits: a "mask"
+ *   - an optional, nominally-variable-length identity
+ *
+ * The last part is especially tricky and currently has limited
+ * support here: it's always a 16 byte field on Darwin, and just
+ * a uint32_t on BSD (should be larger, really).  Linux supports
+ * very large, actually-variable-size values; we'll deal with
+ * this later, maybe.
+ *
+ * We will define the mask first, below, since these are also the bits
+ * passed in for the accmask argument to l9p_acl_check_access().
+ */
+
+/*
+ * ACL entry mask, and accmask argument flags.
+ *
+ * NB: not every bit is implemented, but they are all here because
+ * they are all defined as part of an NFSv4 ACL entry, which is
+ * more or less a superset of a POSIX ACL entry.  This means you
+ * can put a complete NFSv4 ACL in and we can reproduce it.
+ *
+ * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits
+ * apply only to a directory, while the READ_DATA, WRITE_DATA, and
+ * APPEND_DATA bits apply only to a file.  See aca_parent/aca_child
+ * below.
+ */
+#define	L9P_ACE_READ_DATA		0x00001
+#define	L9P_ACE_LIST_DIRECTORY		0x00001 /* same as READ_DATA */
+#define	L9P_ACE_WRITE_DATA		0x00002
+#define	L9P_ACE_ADD_FILE		0x00002 /* same as WRITE_DATA */
+#define	L9P_ACE_APPEND_DATA		0x00004
+#define	L9P_ACE_ADD_SUBDIRECTORY	0x00004 /* same as APPEND_DATA */
+#define	L9P_ACE_READ_NAMED_ATTRS	0x00008
+#define	L9P_ACE_WRITE_NAMED_ATTRS	0x00010
+#define	L9P_ACE_EXECUTE			0x00020
+#define	L9P_ACE_DELETE_CHILD		0x00040
+#define	L9P_ACE_READ_ATTRIBUTES		0x00080
+#define	L9P_ACE_WRITE_ATTRIBUTES	0x00100
+#define	L9P_ACE_WRITE_RETENTION		0x00200 /* not used here */
+#define	L9P_ACE_WRITE_RETENTION_HOLD	0x00400 /* not used here */
+/*					0x00800 unused? */
+#define	L9P_ACE_DELETE			0x01000
+#define	L9P_ACE_READ_ACL		0x02000
+#define	L9P_ACE_WRITE_ACL		0x04000
+#define	L9P_ACE_WRITE_OWNER		0x08000
+#define	L9P_ACE_SYNCHRONIZE		0x10000 /* not used here */
+
+/*
+ * This is not an ACE bit, but is used with the access checking
+ * below.  It represents a request to unlink (delete child /
+ * delete) an entity, and is equivalent to asking for *either*
+ * (not both) permission.
+ */
+#define	L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE)
+
+/*
+ * Access checking takes a lot of arguments, so they are
+ * collected into a "struct" here.
+ *
+ * The aca_parent and aca_pstat fields may/must be NULL if the
+ * operation itself does not involve "directory" permissions.
+ * The aca_child and aca_cstat fields may/must be NULL if the
+ * operation does not involve anything *but* a directory.  This
+ * is how we decide whether you're interested in L9P_ACE_READ_DATA
+ * vs L9P_ACE_LIST_DIRECTORY, for instance.
+ *
+ * Note that it's OK for both parent and child to be directories
+ * (as is the case when we're adding or deleting a subdirectory).
+ */
+struct l9p_acl_check_args {
+	uid_t	aca_uid;		/* the uid that is requesting access */
+	gid_t	aca_gid;		/* the gid that is requesting access */
+	gid_t	*aca_groups;		/* the additional group-set, if any */
+	size_t	aca_ngroups;		/* number of groups in group-set */
+	struct l9p_acl *aca_parent;	/* ACLs associated with parent/dir */
+	struct stat *aca_pstat;		/* stat data for parent/dir */
+	struct l9p_acl *aca_child;	/* ACLs associated with file */
+	struct stat *aca_cstat;		/* stat data for file */
+	int	aca_aclmode;		/* mode checking bits, see below */
+	bool	aca_superuser;		/* alway allow uid==0 in STAT_MODE */
+};
+
+/*
+ * Access checking mode bits in aca_checkmode.  If you enable
+ * ACLs, they are used first, optionally with ZFS style ACLs.
+ * This means that even if aca_superuser is set, if an ACL denies
+ * permission to uid 0, permission is really denied.
+ *
+ * NFS style ACLs run before POSIX style ACLs (though POSIX
+ * ACLs aren't done yet anyway).
+ *
+ * N.B.: you probably want L9P_ACL_ZFS, especially when operating
+ * with a ZFS file system on FreeBSD.
+ */
+#define	L9P_ACM_NFS_ACL		0x0001	/* enable NFS ACL checking */
+#define	L9P_ACM_ZFS_ACL		0x0002	/* use ZFS ACL unlink semantics */
+#define	L9P_ACM_POSIX_ACL	0x0004	/* enable POSIX ACL checking (notyet) */
+#define	L9P_ACM_STAT_MODE	0x0008	/* enable st_mode bits */
+
+/*
+ * Requests to access some file or directory must provide:
+ *
+ *  - An operation.  This should usually be just one bit from the
+ *    L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK.
+ *    For a few file-open operations it may be multiple bits,
+ *    e.g., both read and write data.
+ *  - The identity of the accessor: uid + gid + gid-set.
+ *  - The type of access desired: this may be multiple bits.
+ *  - The parent directory, if applicable.
+ *  - The child file/dir being accessed, if applicable.
+ *  - stat data for parent and/or child, if applicable.
+ *
+ * The ACLs and/or stat data of the parent and/or child get used
+ * here, so the caller must provide them.  We should have a way to
+ * cache these on fids, but not yet.  The parent and child
+ * arguments are a bit tricky; see the code in genacl.c.
+ */
+int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args);
+
+/*
+ * When falling back to POSIX ACL or Unix-style permissions
+ * testing, it's nice to collapse the above detailed permissions
+ * into simple read/write/execute bits (value 0..7).  We provide
+ * a small utility function that does this.
+ */
+int l9p_ace_mask_to_rwx(int32_t);
+
+/*
+ * The rest of the data in an ACE.
+ */
+
+/* type in ace_type */
+#define	L9P_ACET_ACCESS_ALLOWED		0
+#define	L9P_ACET_ACCESS_DENIED		1
+#define	L9P_ACET_SYSTEM_AUDIT		2
+#define	L9P_ACET_SYSTEM_ALARM		3
+
+/* flags in ace_flags */
+#define	L9P_ACEF_FILE_INHERIT_ACE		0x001
+#define	L9P_ACEF_DIRECTORY_INHERIT_ACE		0x002
+#define	L9P_ACEF_NO_PROPAGATE_INHERIT_ACE	0x004
+#define	L9P_ACEF_INHERIT_ONLY_ACE		0x008
+#define	L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG	0x010
+#define	L9P_ACEF_FAILED_ACCESS_ACE_FLAG		0x020
+#define	L9P_ACEF_IDENTIFIER_GROUP		0x040
+#define	L9P_ACEF_OWNER				0x080
+#define	L9P_ACEF_GROUP				0x100
+#define	L9P_ACEF_EVERYONE			0x200
+
+#if defined(__APPLE__)
+#  define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */
+#else
+#  define L9P_ACE_IDSIZE 4
+#endif
+
+struct l9p_ace {
+	uint16_t ace_type;		/* ACL entry type */
+	uint16_t ace_flags;		/* ACL entry flags */
+	uint32_t ace_mask;		/* ACL entry mask */
+	uint32_t ace_idsize;		/* length of ace_idbytes */
+	unsigned char ace_idbytes[L9P_ACE_IDSIZE];
+};
+
+#define	L9P_ACLTYPE_NFSv4	1	/* currently the only valid type */
+struct l9p_acl {
+	uint32_t acl_acetype;		/* reserved for future expansion */
+	uint32_t acl_nace;		/* number of occupied ACEs */
+	uint32_t acl_aceasize;		/* actual size of ACE array */
+	struct l9p_ace acl_aces[];	/* variable length ACE array */
+};
+
+/*
+ * These are the system-specific converters.
+ *
+ * Right now the backend needs to just find BSD NFSv4 ACLs
+ * and convert them before each operation that needs to be
+ * tested.
+ */
+#if defined(HAVE_DARWIN_ACLS)
+struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl);
+#endif
diff --git a/hashtable.c b/hashtable.c
new file mode 100644
index 000000000000..d6558eb65598
--- /dev/null
+++ b/hashtable.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include "lib9p_impl.h"
+#include "hashtable.h"
+
+static struct ht_item *ht_iter_advance(struct ht_iter *, struct ht_item *);
+
+void
+ht_init(struct ht *h, ssize_t size)
+{
+	ssize_t i;
+
+	memset(h, 0, sizeof(struct ht));
+	h->ht_nentries = size;
+	h->ht_entries = l9p_calloc((size_t)size, sizeof(struct ht_entry));
+	pthread_rwlock_init(&h->ht_rwlock, NULL);
+
+	for (i = 0; i < size; i++)
+		TAILQ_INIT(&h->ht_entries[i].hte_items);
+}
+
+void
+ht_destroy(struct ht *h)
+{
+	struct ht_entry *he;
+	struct ht_item *item, *tmp;
+	ssize_t i;
+
+	for (i = 0; i < h->ht_nentries; i++) {
+		he = &h->ht_entries[i];
+		TAILQ_FOREACH_SAFE(item, &he->hte_items, hti_link, tmp) {
+			free(item);
+		}
+	}
+
+	pthread_rwlock_destroy(&h->ht_rwlock);
+	free(h->ht_entries);
+	h->ht_entries = NULL;
+}
+
+void *
+ht_find(struct ht *h, uint32_t hash)
+{
+	void *result;
+
+	ht_rdlock(h);
+	result = ht_find_locked(h, hash);
+	ht_unlock(h);
+	return (result);
+}
+
+void *
+ht_find_locked(struct ht *h, uint32_t hash)
+{
+	struct ht_entry *entry;
+	struct ht_item *item;
+
+	entry = &h->ht_entries[hash % h->ht_nentries];
+
+	TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+		if (item->hti_hash == hash)
+			return (item->hti_data);
+	}
+
+	return (NULL);
+}
+
+int
+ht_add(struct ht *h, uint32_t hash, void *value)
+{
+	struct ht_entry *entry;
+	struct ht_item *item;
+
+	ht_wrlock(h);
+	entry = &h->ht_entries[hash % h->ht_nentries];
+
+	TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+		if (item->hti_hash == hash) {
+			errno = EEXIST;
+			ht_unlock(h);
+			return (-1);
+		}
+	}
+
+	item = l9p_calloc(1, sizeof(struct ht_item));
+	item->hti_hash = hash;
+	item->hti_data = value;
+	TAILQ_INSERT_TAIL(&entry->hte_items, item, hti_link);
+	ht_unlock(h);
+
+	return (0);
+}
+
+int
+ht_remove(struct ht *h, uint32_t hash)
+{
+	int result;
+
+	ht_wrlock(h);
+	result = ht_remove_locked(h, hash);
+	ht_unlock(h);
+	return (result);
+}
+
+int
+ht_remove_locked(struct ht *h, uint32_t hash)
+{
+	struct ht_entry *entry;
+	struct ht_item *item, *tmp;
+	ssize_t slot = hash % h->ht_nentries;
+
+	entry = &h->ht_entries[slot];
+
+	TAILQ_FOREACH_SAFE(item, &entry->hte_items, hti_link, tmp) {
+		if (item->hti_hash == hash) {
+			TAILQ_REMOVE(&entry->hte_items, item, hti_link);
+			free(item);
+			return (0);
+		}
+	}
+
+	errno = ENOENT;
+	return (-1);
+}
+
+/*
+ * Inner workings for advancing the iterator.
+ *
+ * If we have a current item, that tells us how to find the
+ * next item.  If not, we get the first item from the next
+ * slot (well, the next slot with an item); in any case, we
+ * record the new slot and return the next item.
+ *
+ * For bootstrapping, iter->htit_slot can be -1 to start
+ * searching at slot 0.
+ *
+ * Caller must hold a lock on the table.
+ */
+static struct ht_item *
+ht_iter_advance(struct ht_iter *iter, struct ht_item *cur)
+{
+	struct ht_item *next;
+	struct ht *h;
+	ssize_t slot;
+
+	h = iter->htit_parent;
+
+	if (cur == NULL)
+		next = NULL;
+	else
+		next = TAILQ_NEXT(cur, hti_link);
+
+	if (next == NULL) {
+		slot = iter->htit_slot;
+		while (++slot < h->ht_nentries) {
+			next = TAILQ_FIRST(&h->ht_entries[slot].hte_items);
+			if (next != NULL)
+				break;
+		}
+		iter->htit_slot = slot;
+	}
+	return (next);
+}
+
+/*
+ * Remove the current item - there must be one, or this is an
+ * error.  This (necessarily) pre-locates the next item, so callers
+ * must not use it on an actively-changing table.
+ */
+int
+ht_remove_at_iter(struct ht_iter *iter)
+{
+	struct ht_item *item;
+	struct ht *h;
+	ssize_t slot;
+
+	assert(iter != NULL);
+
+	if ((item = iter->htit_curr) == NULL) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	/* remove the item from the table, saving the NEXT one */
+	h = iter->htit_parent;
+	ht_wrlock(h);
+	slot = iter->htit_slot;
+	iter->htit_next = ht_iter_advance(iter, item);
+	TAILQ_REMOVE(&h->ht_entries[slot].hte_items, item, hti_link);
+	ht_unlock(h);
+
+	/* mark us as no longer on an item, then free it */
+	iter->htit_curr = NULL;
+	free(item);
+
+	return (0);
+}
+
+/*
+ * Initialize iterator.  Subsequent ht_next calls will find the
+ * first item, then the next, and so on.  Callers should in general
+ * not use this on actively-changing tables, though we do our best
+ * to make it semi-sensible.
+ */
+void
+ht_iter(struct ht *h, struct ht_iter *iter)
+{
+
+	iter->htit_parent = h;
+	iter->htit_curr = NULL;
+	iter->htit_next = NULL;
+	iter->htit_slot = -1;	/* which will increment to 0 */
+}
+
+/*
+ * Return the next item, which is the first item if we have not
+ * yet been called on this iterator, or the next item if we have.
+ */
+void *
+ht_next(struct ht_iter *iter)
+{
+	struct ht_item *item;
+	struct ht *h;
+
+	if ((item = iter->htit_next) == NULL) {
+		/* no pre-loaded next; find next from current */
+		h = iter->htit_parent;
+		ht_rdlock(h);
+		item = ht_iter_advance(iter, iter->htit_curr);
+		ht_unlock(h);
+	} else
+		iter->htit_next = NULL;
+	iter->htit_curr = item;
+	return (item == NULL ? NULL : item->hti_data);
+}
diff --git a/hashtable.h b/hashtable.h
new file mode 100644
index 000000000000..60b8dfff7ba4
--- /dev/null
+++ b/hashtable.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_HASHTABLE_H
+#define LIB9P_HASHTABLE_H
+
+#include <pthread.h>
+#include <sys/queue.h>
+
+struct ht {
+	struct ht_entry * 	ht_entries;
+	ssize_t 		ht_nentries;
+	pthread_rwlock_t	ht_rwlock;
+};
+
+struct ht_entry {
+	TAILQ_HEAD(, ht_item) hte_items;
+};
+
+struct ht_item {
+	uint32_t		hti_hash;
+	void *			hti_data;
+	TAILQ_ENTRY(ht_item)	hti_link;
+};
+
+struct ht_iter {
+	struct ht *		htit_parent;
+	struct ht_item *	htit_curr;
+	struct ht_item *	htit_next;
+	ssize_t			htit_slot;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety-analysis"
+#endif
+
+/*
+ * Obtain read-lock on hash table.
+ */
+static inline int
+ht_rdlock(struct ht *h)
+{
+
+	return (pthread_rwlock_rdlock(&h->ht_rwlock));
+}
+
+/*
+ * Obtain write-lock on hash table.
+ */
+static inline int
+ht_wrlock(struct ht *h)
+{
+
+	return (pthread_rwlock_wrlock(&h->ht_rwlock));
+}
+
+/*
+ * Release lock on hash table.
+ */
+static inline int
+ht_unlock(struct ht *h)
+{
+
+	return (pthread_rwlock_unlock(&h->ht_rwlock));
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+void ht_init(struct ht *h, ssize_t size);
+void ht_destroy(struct ht *h);
+void *ht_find(struct ht *h, uint32_t hash);
+void *ht_find_locked(struct ht *h, uint32_t hash);
+int ht_add(struct ht *h, uint32_t hash, void *value);
+int ht_remove(struct ht *h, uint32_t hash);
+int ht_remove_locked(struct ht *h, uint32_t hash);
+int ht_remove_at_iter(struct ht_iter *iter);
+void ht_iter(struct ht *h, struct ht_iter *iter);
+void *ht_next(struct ht_iter *iter);
+
+#endif  /* LIB9P_HASHTABLE_H */
diff --git a/lib9p.h b/lib9p.h
new file mode 100644
index 000000000000..79b741c98887
--- /dev/null
+++ b/lib9p.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_LIB9P_H
+#define LIB9P_LIB9P_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/uio.h>
+#include <pthread.h>
+
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+
+#include "fcall.h"
+#include "threadpool.h"
+#include "hashtable.h"
+
+#define L9P_DEFAULT_MSIZE   8192
+#define L9P_MAX_IOV         128
+#define	L9P_NUMTHREADS      8
+
+struct l9p_request;
+struct l9p_backend;
+struct l9p_fid;
+
+/*
+ * Functions to implement underlying transport for lib9p.
+ *
+ * The transport is responsible for:
+ *
+ *   - allocating a response buffer (filling in the iovec and niov)
+ *     (gets req, pointer to base of iov array of size L9P_MAX_IOV,
+ *      pointer to niov, lt_aux)
+ *
+ *   - sending a response, when a request has a reply ready
+ *     (gets req, pointer to iov, niov, actual response length, lt_aux)
+ *
+ *   - dropping the response buffer, when a request has been
+ *     flushed or otherwise dropped without a response
+ *     (gets req, pointer to iov, niov, lt_aux)
+ *
+ * The transport is of course also responsible for feeding in
+ * request-buffers, but that happens by the transport calling
+ * l9p_connection_recv().
+ */
+struct l9p_transport {
+	void *lt_aux;
+	int (*lt_get_response_buffer)(struct l9p_request *, struct iovec *,
+	    size_t *, void *);
+	int (*lt_send_response)(struct l9p_request *, const struct iovec *,
+	    size_t, size_t, void *);
+	void (*lt_drop_response)(struct l9p_request *, const struct iovec *,
+	    size_t, void *);
+};
+
+enum l9p_pack_mode {
+	L9P_PACK,
+	L9P_UNPACK
+};
+
+enum l9p_integer_type {
+	L9P_BYTE = 1,
+	L9P_WORD = 2,
+	L9P_DWORD = 4,
+	L9P_QWORD = 8
+};
+
+enum l9p_version {
+	L9P_INVALID_VERSION = 0,
+	L9P_2000 = 1,
+	L9P_2000U = 2,
+	L9P_2000L = 3
+};
+
+/*
+ * This structure is used for unpacking (decoding) incoming
+ * requests and packing (encoding) outgoing results.  It has its
+ * own copy of the iov array, with its own counters for working
+ * through that array, but it borrows the actual DATA from the
+ * original iov array associated with the original request (see
+ * below).
+ */
+struct l9p_message {
+	enum l9p_pack_mode lm_mode;
+	struct iovec lm_iov[L9P_MAX_IOV];
+	size_t lm_niov;
+	size_t lm_cursor_iov;
+	size_t lm_cursor_offset;
+	size_t lm_size;
+};
+
+/*
+ * Data structure for a request/response pair (Tfoo/Rfoo).
+ *
+ * Note that the response is not formatted out into raw data
+ * (overwriting the request raw data) until we are really
+ * responding, with the exception of read operations Tread
+ * and Treaddir, which overlay their result-data into the
+ * iov array in the process of reading.
+ *
+ * We have room for two incoming fids, in case we are
+ * using 9P2000.L protocol.  Note that nothing that uses two
+ * fids also has an output fid (newfid), so we could have a
+ * union of lr_fid2 and lr_newfid, but keeping them separate
+ * is probably a bit less error-prone.  (If we want to shave
+ * memory requirements there are more places to look.)
+ *
+ * (The fid, fid2, and newfid fields should be removed via
+ * reorganization, as they are only used for smuggling data
+ * between request.c and the backend and should just be
+ * parameters to backend ops.)
+ */
+struct l9p_request {
+	struct l9p_message lr_req_msg;	/* for unpacking the request */
+	struct l9p_message lr_resp_msg;	/* for packing the response */
+	union l9p_fcall lr_req;		/* the request, decoded/unpacked */
+	union l9p_fcall lr_resp;	/* the response, not yet packed */
+
+	struct l9p_fid *lr_fid;
+	struct l9p_fid *lr_fid2;
+	struct l9p_fid *lr_newfid;
+
+	struct l9p_connection *lr_conn;	/* containing connection */
+	void *lr_aux;			/* reserved for transport layer */
+
+	struct iovec lr_data_iov[L9P_MAX_IOV];	/* iovecs for req + resp */
+	size_t lr_data_niov;			/* actual size of data_iov */
+
+	int lr_error;			/* result from l9p_dispatch_request */
+
+	/* proteced by threadpool mutex */
+	enum l9p_workstate lr_workstate;	/* threadpool: work state */
+	enum l9p_flushstate lr_flushstate;	/* flush state if flushee */
+	struct l9p_worker *lr_worker;		/* threadpool: worker */
+	STAILQ_ENTRY(l9p_request) lr_worklink;	/* reserved to threadpool */
+
+	/* protected by tag hash table lock */
+	struct l9p_request_queue lr_flushq;	/* q of flushers */
+	STAILQ_ENTRY(l9p_request) lr_flushlink;	/* link w/in flush queue */
+};
+
+/* N.B.: these dirents are variable length and for .L only */
+struct l9p_dirent {
+	struct l9p_qid qid;
+	uint64_t offset;
+	uint8_t type;
+	char *name;
+};
+
+/*
+ * The 9pfs protocol has the notion of a "session", which is
+ * traffic between any two "Tversion" requests.  All fids
+ * (lc_files, below) are specific to one particular session.
+ *
+ * We need a data structure per connection (client/server
+ * pair). This data structure lasts longer than these 9pfs
+ * sessions, but contains the request/response pairs and fids.
+ * Logically, the per-session data should be separate, but
+ * most of the time that would just require an extra
+ * indirection.  Instead, a new session simply clunks all
+ * fids, and otherwise keeps using this same connection.
+ */
+struct l9p_connection {
+	struct l9p_server *lc_server;
+	struct l9p_transport lc_lt;
+	struct l9p_threadpool lc_tp;
+	enum l9p_version lc_version;
+	uint32_t lc_msize;
+	uint32_t lc_max_io_size;
+	struct ht lc_files;
+	struct ht lc_requests;
+	LIST_ENTRY(l9p_connection) lc_link;
+};
+
+struct l9p_server {
+	struct l9p_backend *ls_backend;
+	enum l9p_version ls_max_version;
+	LIST_HEAD(, l9p_connection) ls_conns;
+};
+
+int l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+    enum l9p_version version);
+ssize_t l9p_pustat(struct l9p_message *msg, struct l9p_stat *s,
+    enum l9p_version version);
+uint16_t l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version);
+int l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+    struct l9p_stat *s);
+ssize_t l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de);
+
+int l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend);
+
+int l9p_connection_init(struct l9p_server *server,
+    struct l9p_connection **connp);
+void l9p_connection_free(struct l9p_connection *conn);
+void l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+    size_t niov, void *aux);
+void l9p_connection_close(struct l9p_connection *conn);
+struct l9p_fid *l9p_connection_alloc_fid(struct l9p_connection *conn,
+    uint32_t fid);
+void l9p_connection_remove_fid(struct l9p_connection *conn,
+    struct l9p_fid *fid);
+
+int l9p_dispatch_request(struct l9p_request *req);
+void l9p_respond(struct l9p_request *req, bool drop, bool rmtag);
+
+void l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+    enum l9p_pack_mode mode);
+void l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+    size_t *niov2, size_t seek);
+size_t l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length);
+void l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+    struct sbuf *sb);
+void l9p_freefcall(union l9p_fcall *fcall);
+void l9p_freestat(struct l9p_stat *stat);
+
+gid_t *l9p_getgrlist(const char *, gid_t, int *);
+
+#endif  /* LIB9P_LIB9P_H */
diff --git a/lib9p_impl.h b/lib9p_impl.h
new file mode 100644
index 000000000000..41ff07ae189c
--- /dev/null
+++ b/lib9p_impl.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LIB9P_IMPL_H
+#define LIB9P_LIB9P_IMPL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef _KERNEL
+static inline void *
+l9p_malloc(size_t size)
+{
+	void *r = malloc(size);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    size);
+		abort();
+	}
+
+	return (r);
+}
+
+static inline void *
+l9p_calloc(size_t n, size_t size)
+{
+	void *r = calloc(n, size);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    n * size);
+		abort();
+	}
+
+	return (r);
+}
+
+static inline void *
+l9p_realloc(void *ptr, size_t newsize)
+{
+	void *r = realloc(ptr, newsize);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    newsize);
+		abort();
+	}
+
+	return (r);
+}
+#endif /* _KERNEL */
+
+#endif /* LIB9P_LIB9P_IMPL_H */
diff --git a/linux_errno.h b/linux_errno.h
new file mode 100644
index 000000000000..72778daa2336
--- /dev/null
+++ b/linux_errno.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LINUX_ERRNO_H
+#define LIB9P_LINUX_ERRNO_H
+
+/*
+ * Linux error numbers that are outside of the original base range
+ * (which ends with ERANGE).
+ *
+ * This is pretty much the same as Linux's errno.h except that the
+ * names are prefixed with "LINUX_", and we add _STR with the
+ * string name.
+ *
+ * The string expansions were obtained with a little program to
+ * print every strerror().
+ *
+ * Note that BSD EDEADLK is 11 and BSD EAGAIN is 35, vs
+ * Linux / Plan9 EAGAIN at 11.  So one value in the ERANGE
+ * range still needs translation too.
+ */
+
+#define	LINUX_EAGAIN		11
+#define	LINUX_EAGAIN_STR	"Resource temporarily unavailable"
+
+#define	LINUX_EDEADLK		35
+#define	LINUX_EDEADLK_STR	"Resource deadlock avoided"
+#define	LINUX_ENAMETOOLONG	36
+#define	LINUX_ENAMETOOLONG_STR	"File name too long"
+#define	LINUX_ENOLCK		37
+#define	LINUX_ENOLCK_STR	"No locks available"
+#define	LINUX_ENOSYS		38
+#define	LINUX_ENOSYS_STR	"Function not implemented"
+#define	LINUX_ENOTEMPTY		39
+#define	LINUX_ENOTEMPTY_STR	"Directory not empty"
+#define	LINUX_ELOOP		40
+#define	LINUX_ELOOP_STR		"Too many levels of symbolic links"
+/*				41 unused */
+#define	LINUX_ENOMSG		42
+#define	LINUX_ENOMSG_STR	"No message of desired type"
+#define	LINUX_EIDRM		43
+#define	LINUX_EIDRM_STR		"Identifier removed"
+#define	LINUX_ECHRNG		44
+#define	LINUX_ECHRNG_STR	"Channel number out of range"
+#define	LINUX_EL2NSYNC		45
+#define	LINUX_EL2NSYNC_STR	"Level 2 not synchronized"
+#define	LINUX_EL3HLT		46
+#define	LINUX_EL3HLT_STR	"Level 3 halted"
+#define	LINUX_EL3RST		47
+#define	LINUX_EL3RST_STR	"Level 3 reset"
+#define	LINUX_ELNRNG		48
+#define	LINUX_ELNRNG_STR	"Link number out of range"
+#define	LINUX_EUNATCH		49
+#define	LINUX_EUNATCH_STR	"Protocol driver not attached"
+#define	LINUX_ENOCSI		50
+#define	LINUX_ENOCSI_STR	"No CSI structure available"
+#define	LINUX_EL2HLT		51
+#define	LINUX_EL2HLT_STR	"Level 2 halted"
+#define	LINUX_EBADE		52
+#define	LINUX_EBADE_STR		"Invalid exchange"
+#define	LINUX_EBADR		53
+#define	LINUX_EBADR_STR		"Invalid request descriptor"
+#define	LINUX_EXFULL		54
+#define	LINUX_EXFULL_STR	"Exchange full"
+#define	LINUX_ENOANO		55
+#define	LINUX_ENOANO_STR	"No anode"
+#define	LINUX_EBADRQC		56
+#define	LINUX_EBADRQC_STR	"Invalid request code"
+#define	LINUX_EBADSLT		57
+#define	LINUX_EBADSLT_STR	"Invalid slot"
+/*				58 unused */
+#define	LINUX_EBFONT		59
+#define	LINUX_EBFONT_STR	"Bad font file format"
+#define	LINUX_ENOSTR		60
+#define	LINUX_ENOSTR_STR	"Device not a stream"
+#define	LINUX_ENODATA		61
+#define	LINUX_ENODATA_STR	"No data available"
+#define	LINUX_ETIME		62
+#define	LINUX_ETIME_STR		"Timer expired"
+#define	LINUX_ENOSR		63
+#define	LINUX_ENOSR_STR		"Out of streams resources"
+#define	LINUX_ENONET		64
+#define	LINUX_ENONET_STR	"Machine is not on the network"
+#define	LINUX_ENOPKG		65
+#define	LINUX_ENOPKG_STR	"Package not installed"
+#define	LINUX_EREMOTE		66
+#define	LINUX_EREMOTE_STR	"Object is remote"
+#define	LINUX_ENOLINK		67
+#define	LINUX_ENOLINK_STR	"Link has been severed"
+#define	LINUX_EADV		68
+#define	LINUX_EADV_STR		"Advertise error"
+#define	LINUX_ESRMNT		69
+#define	LINUX_ESRMNT_STR	"Srmount error"
+#define	LINUX_ECOMM		70
+#define	LINUX_ECOMM_STR		"Communication error on send"
+#define	LINUX_EPROTO		71
+#define	LINUX_EPROTO_STR	"Protocol error"
+#define	LINUX_EMULTIHOP		72
+#define	LINUX_EMULTIHOP_STR	"Multihop attempted"
+#define	LINUX_EDOTDOT		73
+#define	LINUX_EDOTDOT_STR	"RFS specific error"
+#define	LINUX_EBADMSG		74
+#define	LINUX_EBADMSG_STR	"Bad message"
+#define	LINUX_EOVERFLOW		75
+#define	LINUX_EOVERFLOW_STR	"Value too large for defined data type"
+#define	LINUX_ENOTUNIQ		76
+#define	LINUX_ENOTUNIQ_STR	"Name not unique on network"
+#define	LINUX_EBADFD		77
+#define	LINUX_EBADFD_STR	"File descriptor in bad state"
+#define	LINUX_EREMCHG		78
+#define	LINUX_EREMCHG_STR	"Remote address changed"
+#define	LINUX_ELIBACC		79
+#define	LINUX_ELIBACC_STR	"Can not access a needed shared library"
+#define	LINUX_ELIBBAD		80
+#define	LINUX_ELIBBAD_STR	"Accessing a corrupted shared library"
+#define	LINUX_ELIBSCN		81
+#define	LINUX_ELIBSCN_STR	".lib section in a.out corrupted"
+#define	LINUX_ELIBMAX		82
+#define	LINUX_ELIBMAX_STR	"Attempting to link in too many shared libraries"
+#define	LINUX_ELIBEXEC		83
+#define	LINUX_ELIBEXEC_STR	"Cannot exec a shared library directly"
+#define	LINUX_EILSEQ		84
+#define	LINUX_EILSEQ_STR	"Invalid or incomplete multibyte or wide character"
+#define	LINUX_ERESTART		85
+#define	LINUX_ERESTART_STR	"Interrupted system call should be restarted"
+#define	LINUX_ESTRPIPE		86
+#define	LINUX_ESTRPIPE_STR	"Streams pipe error"
+#define	LINUX_EUSERS		87
+#define	LINUX_EUSERS_STR	"Too many users"
+#define	LINUX_ENOTSOCK		88
+#define	LINUX_ENOTSOCK_STR	"Socket operation on non-socket"
+#define	LINUX_EDESTADDRREQ	89
+#define	LINUX_EDESTADDRREQ_STR	"Destination address required"
+#define	LINUX_EMSGSIZE		90
+#define	LINUX_EMSGSIZE_STR	"Message too long"
+#define	LINUX_EPROTOTYPE	91
+#define	LINUX_EPROTOTYPE_STR	"Protocol wrong type for socket"
+#define	LINUX_ENOPROTOOPT	92
+#define	LINUX_ENOPROTOOPT_STR	"Protocol not available"
+#define	LINUX_EPROTONOSUPPORT	93
+#define	LINUX_EPROTONOSUPPORT_STR "Protocol not supported"
+#define	LINUX_ESOCKTNOSUPPORT	94
+#define	LINUX_ESOCKTNOSUPPORT_STR "Socket type not supported"
+#define	LINUX_EOPNOTSUPP	95
+#define	LINUX_EOPNOTSUPP_STR	"Operation not supported"
+#define	LINUX_EPFNOSUPPORT	96
+#define	LINUX_EPFNOSUPPORT_STR	"Protocol family not supported"
+#define	LINUX_EAFNOSUPPORT	97
+#define	LINUX_EAFNOSUPPORT_STR	"Address family not supported by protocol"
+#define	LINUX_EADDRINUSE	98
+#define	LINUX_EADDRINUSE_STR	"Address already in use"
+#define	LINUX_EADDRNOTAVAIL	99
+#define	LINUX_EADDRNOTAVAIL_STR	"Cannot assign requested address"
+#define	LINUX_ENETDOWN		100
+#define	LINUX_ENETDOWN_STR	"Network is down"
+#define	LINUX_ENETUNREACH	101
+#define	LINUX_ENETUNREACH_STR	"Network is unreachable"
+#define	LINUX_ENETRESET		102
+#define	LINUX_ENETRESET_STR	"Network dropped connection on reset"
+#define	LINUX_ECONNABORTED	103
+#define	LINUX_ECONNABORTED_STR	"Software caused connection abort"
+#define	LINUX_ECONNRESET	104
+#define	LINUX_ECONNRESET_STR	"Connection reset by peer"
+#define	LINUX_ENOBUFS		105
+#define	LINUX_ENOBUFS_STR	"No buffer space available"
+#define	LINUX_EISCONN		106
+#define	LINUX_EISCONN_STR	"Transport endpoint is already connected"
+#define	LINUX_ENOTCONN		107
+#define	LINUX_ENOTCONN_STR	"Transport endpoint is not connected"
+#define	LINUX_ESHUTDOWN		108
+#define	LINUX_ESHUTDOWN_STR	"Cannot send after transport endpoint shutdown"
+#define	LINUX_ETOOMANYREFS	109
+#define	LINUX_ETOOMANYREFS_STR	"Too many references: cannot splice"
+#define	LINUX_ETIMEDOUT		110
+#define	LINUX_ETIMEDOUT_STR	"Connection timed out"
+#define	LINUX_ECONNREFUSED	111
+#define	LINUX_ECONNREFUSED_STR	"Connection refused"
+#define	LINUX_EHOSTDOWN		112
+#define	LINUX_EHOSTDOWN_STR	"Host is down"
+#define	LINUX_EHOSTUNREACH	113
+#define	LINUX_EHOSTUNREACH_STR	"No route to host"
+#define	LINUX_EALREADY		114
+#define	LINUX_EALREADY_STR	"Operation already in progress"
+#define	LINUX_EINPROGRESS	115
+#define	LINUX_EINPROGRESS_STR	"Operation now in progress"
+#define	LINUX_ESTALE		116
+#define	LINUX_ESTALE_STR	"Stale file handle"
+#define	LINUX_EUCLEAN		117
+#define	LINUX_EUCLEAN_STR	"Structure needs cleaning"
+#define	LINUX_ENOTNAM		118
+#define	LINUX_ENOTNAM_STR	"Not a XENIX named type file"
+#define	LINUX_ENAVAIL		119
+#define	LINUX_ENAVAIL_STR	"No XENIX semaphores available"
+#define	LINUX_EISNAM		120
+#define	LINUX_EISNAM_STR	"Is a named type file"
+#define	LINUX_EREMOTEIO		121
+#define	LINUX_EREMOTEIO_STR	"Remote I/O error"
+#define	LINUX_EDQUOT		122
+#define	LINUX_EDQUOT_STR	"Quota exceeded"
+#define	LINUX_ENOMEDIUM		123
+#define	LINUX_ENOMEDIUM_STR	"No medium found"
+#define	LINUX_EMEDIUMTYPE	124
+#define	LINUX_EMEDIUMTYPE_STR	"Wrong medium type"
+#define	LINUX_ECANCELED		125
+#define	LINUX_ECANCELED_STR	"Operation canceled"
+#define	LINUX_ENOKEY		126
+#define	LINUX_ENOKEY_STR	"Required key not available"
+#define	LINUX_EKEYEXPIRED	127
+#define	LINUX_EKEYEXPIRED_STR	"Key has expired"
+#define	LINUX_EKEYREVOKED	128
+#define	LINUX_EKEYREVOKED_STR	"Key has been revoked"
+#define	LINUX_EKEYREJECTED	129
+#define	LINUX_EKEYREJECTED_STR	"Key was rejected by service"
+#define	LINUX_EOWNERDEAD	130
+#define	LINUX_EOWNERDEAD_STR	"Owner died"
+#define	LINUX_ENOTRECOVERABLE	131
+#define	LINUX_ENOTRECOVERABLE_STR "State not recoverable"
+#define	LINUX_ERFKILL		132
+#define	LINUX_ERFKILL_STR	"Operation not possible due to RF-kill"
+#define	LINUX_EHWPOISON		133
+#define	LINUX_EHWPOISON_STR	"Memory page has hardware error"
+
+#endif	/* LIB9P_LINUX_ERRNO_H */
diff --git a/log.c b/log.c
new file mode 100644
index 000000000000..fb2596a16f4e
--- /dev/null
+++ b/log.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include "log.h"
+
+static const char *l9p_log_level_names[] = {
+	"DEBUG",
+	"INFO",
+	"WARN",
+	"ERROR"
+};
+
+void
+l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...)
+{
+	const char *dest = NULL;
+	static FILE *stream = NULL;
+	va_list ap;
+
+	if (stream == NULL) {
+		dest = getenv("LIB9P_LOGGING");
+		if (dest == NULL)
+			return;
+		else if (!strcmp(dest, "stderr"))
+			stream = stderr;
+		else {
+			stream = fopen(dest, "a");
+			if (stream == NULL)
+				return;
+		}
+	}
+
+	va_start(ap, fmt);
+	fprintf(stream, "[%s]\t %s: ", l9p_log_level_names[level], func);
+	vfprintf(stream, fmt, ap);
+	fprintf(stream, "\n");
+	fflush(stream);
+	va_end(ap);
+}
diff --git a/log.h b/log.h
new file mode 100644
index 000000000000..b801d4017afa
--- /dev/null
+++ b/log.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LOG_H
+#define	LIB9P_LOG_H
+
+enum l9p_log_level {
+	L9P_DEBUG,
+	L9P_INFO,
+	L9P_WARNING,
+	L9P_ERROR
+};
+
+void l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...);
+
+#if defined(L9P_DEBUG)
+#define	L9P_LOG(level, fmt, ...) l9p_logf(level, __func__, fmt, ##__VA_ARGS__)
+#else
+#define L9P_LOG(level, fmt, ...)
+#endif
+
+#endif	/* LIB9P_LOG_H */
diff --git a/pack.c b/pack.c
new file mode 100644
index 000000000000..88f0ccb4ad73
--- /dev/null
+++ b/pack.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ÂŠ2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#ifdef __APPLE__
+# include "apple_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/uio.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "log.h"
+
+#define N(ary)          (sizeof(ary) / sizeof(*ary))
+#define STRING_SIZE(s)  (L9P_WORD + (s != NULL ? (uint16_t)strlen(s) : 0))
+#define QID_SIZE        (L9P_BYTE + L9P_DWORD + L9P_QWORD)
+
+static ssize_t l9p_iov_io(struct l9p_message *, void *, size_t);
+static inline ssize_t l9p_pu8(struct l9p_message *, uint8_t *);
+static inline ssize_t l9p_pu16(struct l9p_message *, uint16_t *);
+static inline ssize_t l9p_pu32(struct l9p_message *, uint32_t *);
+static inline ssize_t l9p_pu64(struct l9p_message *, uint64_t *);
+static ssize_t l9p_pustring(struct l9p_message *, char **s);
+static ssize_t l9p_pustrings(struct l9p_message *, uint16_t *, char **, size_t);
+static ssize_t l9p_puqid(struct l9p_message *, struct l9p_qid *);
+static ssize_t l9p_puqids(struct l9p_message *, uint16_t *, struct l9p_qid *q);
+
+/*
+ * Transfer data from incoming request, or to outgoing response,
+ * using msg to track position and direction within request/response.
+ *
+ * Returns the number of bytes actually transferred (which is always
+ * just len itself, converted to signed), or -1 if we ran out of space.
+ *
+ * Note that if we return -1, subsequent l9p_iov_io() calls with
+ * the same (and not-reset) msg and len > 0 will also return -1.
+ * This means most users can just check the *last* call for failure.
+ */
+static ssize_t
+l9p_iov_io(struct l9p_message *msg, void *buffer, size_t len)
+{
+	size_t done = 0;
+	size_t left = len;
+
+	assert(msg != NULL);
+
+	if (len == 0)
+		return (0);
+
+	if (msg->lm_cursor_iov >= msg->lm_niov)
+		return (-1);
+
+	assert(buffer != NULL);
+
+	while (left > 0) {
+		size_t idx = msg->lm_cursor_iov;
+		size_t space = msg->lm_iov[idx].iov_len - msg->lm_cursor_offset;
+		size_t towrite = MIN(space, left);
+
+		if (msg->lm_mode == L9P_PACK) {
+			memcpy((char *)msg->lm_iov[idx].iov_base +
+			    msg->lm_cursor_offset, (char *)buffer + done,
+			    towrite);
+		}
+
+		if (msg->lm_mode == L9P_UNPACK) {
+			memcpy((char *)buffer + done,
+			    (char *)msg->lm_iov[idx].iov_base +
+			    msg->lm_cursor_offset, towrite);
+		}
+
+		msg->lm_cursor_offset += towrite;
+
+		done += towrite;
+		left -= towrite;
+
+		if (space - towrite == 0) {
+			/* Advance to next iov */
+			msg->lm_cursor_iov++;
+			msg->lm_cursor_offset = 0;
+
+			if (msg->lm_cursor_iov >= msg->lm_niov && left > 0)
+				return (-1);
+		}
+	}
+
+	msg->lm_size += done;
+	return ((ssize_t)done);
+}
+
+/*
+ * Pack or unpack a byte (8 bits).
+ *
+ * Returns 1 (success, 1 byte) or -1 (error).
+ */
+static inline ssize_t
+l9p_pu8(struct l9p_message *msg, uint8_t *val)
+{
+
+	return (l9p_iov_io(msg, val, sizeof (uint8_t)));
+}
+
+/*
+ * Pack or unpack 16-bit value.
+ *
+ * Returns 2 or -1.
+ */
+static inline ssize_t
+l9p_pu16(struct l9p_message *msg, uint16_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	/*
+	 * The ifdefs are annoying, but there is no need
+	 * for all of this foolery on little-endian hosts,
+	 * and I don't expect the compiler to optimize it
+	 * all away.
+	 */
+	uint16_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole16(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint16_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint16_t));
+	*val = le16toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint16_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 32-bit value.
+ *
+ * Returns 4 or -1.
+ */
+static inline ssize_t
+l9p_pu32(struct l9p_message *msg, uint32_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	uint32_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole32(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint32_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+	*val = le32toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint32_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 64-bit value.
+ *
+ * Returns 8 or -1.
+ */
+static inline ssize_t
+l9p_pu64(struct l9p_message *msg, uint64_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	uint64_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole64(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint64_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+	*val = le64toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint64_t)));
+#endif
+}
+
+/*
+ * Pack or unpack a string, encoded as 2-byte length followed by
+ * string bytes.  The returned length is 2 greater than the
+ * length of the string itself.
+ *
+ * When unpacking, this allocates a new string (NUL-terminated).
+ *
+ * Return -1 on error (not space, or failed to allocate string,
+ * or illegal string).
+ *
+ * Note that pustring (and hence pustrings) can return an error
+ * even when l9p_iov_io succeeds.
+ */
+static ssize_t
+l9p_pustring(struct l9p_message *msg, char **s)
+{
+	uint16_t len;
+
+	if (msg->lm_mode == L9P_PACK)
+		len = *s != NULL ? (uint16_t)strlen(*s) : 0;
+
+	if (l9p_pu16(msg, &len) < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_UNPACK) {
+		*s = l9p_calloc(1, len + 1);
+		if (*s == NULL)
+			return (-1);
+	}
+
+	if (l9p_iov_io(msg, *s, len) < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_UNPACK) {
+		/*
+		 * An embedded NUL byte in a string is illegal.
+		 * We don't necessarily have to check (we'll just
+		 * treat it as a shorter string), but checking
+		 * seems like a good idea.
+		 */
+		if (memchr(*s, '\0', len) != NULL)
+			return (-1);
+	}
+
+	return ((ssize_t)len + 2);
+}
+
+/*
+ * Pack or unpack a number (*num) of strings (but at most max of
+ * them).
+ *
+ * Returns the number of bytes transferred, including the packed
+ * number of strings.  If packing and the packed number of strings
+ * was reduced, the original *num value is unchanged; only the
+ * wire-format number is reduced.  If unpacking and the input
+ * number of strings exceeds the max, the incoming *num is reduced
+ * to lim, if needed.  (NOTE ASYMMETRY HERE!)
+ *
+ * Returns -1 on error.
+ */
+static ssize_t
+l9p_pustrings(struct l9p_message *msg, uint16_t *num, char **strings,
+    size_t max)
+{
+	size_t i, lim;
+	ssize_t r, ret;
+	uint16_t adjusted;
+
+	if (msg->lm_mode == L9P_PACK) {
+		lim = *num;
+		if (lim > max)
+			lim = max;
+		adjusted = (uint16_t)lim;
+		r = l9p_pu16(msg, &adjusted);
+	} else {
+		r = l9p_pu16(msg, num);
+		lim = *num;
+		if (lim > max)
+			*num = (uint16_t)(lim = max);
+	}
+	if (r < 0)
+		return (-1);
+
+	for (i = 0; i < lim; i++) {
+		ret = l9p_pustring(msg, &strings[i]);
+		if (ret < 1)
+			return (-1);
+
+		r += ret;
+	}
+
+	return (r);
+}
+
+/*
+ * Pack or unpack a qid.
+ *
+ * Returns 13 (success) or -1 (error).
+ */
+static ssize_t
+l9p_puqid(struct l9p_message *msg, struct l9p_qid *qid)
+{
+	ssize_t r;
+	uint8_t type;
+
+	if (msg->lm_mode == L9P_PACK) {
+		type = qid->type;
+		r = l9p_pu8(msg, &type);
+	} else {
+		r = l9p_pu8(msg, &type);
+		qid->type = type;
+	}
+	if (r > 0)
+		r = l9p_pu32(msg, &qid->version);
+	if (r > 0)
+		r = l9p_pu64(msg, &qid->path);
+
+	return (r > 0 ? QID_SIZE : r);
+}
+
+/*
+ * Pack or unpack *num qids.
+ *
+ * Returns 2 + 13 * *num (after possibly setting *num), or -1 on error.
+ */
+static ssize_t
+l9p_puqids(struct l9p_message *msg, uint16_t *num, struct l9p_qid *qids)
+{
+	size_t i, lim;
+	ssize_t ret, r;
+
+	r = l9p_pu16(msg, num);
+	if (r > 0) {
+		for (i = 0, lim = *num; i < lim; i++) {
+			ret = l9p_puqid(msg, &qids[i]);
+			if (ret < 0)
+				return (-1);
+			r += ret;
+		}
+	}
+	return (r);
+}
+
+/*
+ * Pack or unpack a l9p_stat.
+ *
+ * These have variable size, and the size further depends on
+ * the protocol version.
+ *
+ * Returns the number of bytes packed/unpacked, or -1 on error.
+ */
+ssize_t
+l9p_pustat(struct l9p_message *msg, struct l9p_stat *stat,
+    enum l9p_version version)
+{
+	ssize_t r = 0;
+	uint16_t size;
+
+	/* The on-wire size field excludes the size of the size field. */
+	if (msg->lm_mode == L9P_PACK)
+		size = l9p_sizeof_stat(stat, version) - 2;
+
+	r += l9p_pu16(msg, &size);
+	r += l9p_pu16(msg, &stat->type);
+	r += l9p_pu32(msg, &stat->dev);
+	r += l9p_puqid(msg, &stat->qid);
+	r += l9p_pu32(msg, &stat->mode);
+	r += l9p_pu32(msg, &stat->atime);
+	r += l9p_pu32(msg, &stat->mtime);
+	r += l9p_pu64(msg, &stat->length);
+	r += l9p_pustring(msg, &stat->name);
+	r += l9p_pustring(msg, &stat->uid);
+	r += l9p_pustring(msg, &stat->gid);
+	r += l9p_pustring(msg, &stat->muid);
+
+	if (version >= L9P_2000U) {
+		r += l9p_pustring(msg, &stat->extension);
+		r += l9p_pu32(msg, &stat->n_uid);
+		r += l9p_pu32(msg, &stat->n_gid);
+		r += l9p_pu32(msg, &stat->n_muid);
+	}
+
+	if (r < size + 2)
+		return (-1);
+
+	return (r);
+}
+
+/*
+ * Pack or unpack a variable-length dirent.
+ *
+ * If unpacking, the name field is malloc()ed and the caller must
+ * free it.
+ *
+ * Returns the wire-format length, or -1 if we ran out of room.
+ */
+ssize_t
+l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de)
+{
+	ssize_t r, s;
+
+	r = l9p_puqid(msg, &de->qid);
+	r += l9p_pu64(msg, &de->offset);
+	r += l9p_pu8(msg, &de->type);
+	s = l9p_pustring(msg, &de->name);
+	if (r < QID_SIZE + 8 + 1 || s < 0)
+		return (-1);
+	return (r + s);
+}
+
+/*
+ * Pack or unpack a request or response (fcall).
+ *
+ * Returns 0 on success, -1 on error.  (It's up to the caller
+ * to call l9p_freefcall on our failure.)
+ */
+int
+l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+    enum l9p_version version)
+{
+	uint32_t length = 0;
+	ssize_t r;
+
+	/*
+	 * Get overall length, type, and tag, which should appear
+	 * in all messages.  If not even that works, abort immediately.
+	 */
+	l9p_pu32(msg, &length);
+	l9p_pu8(msg, &fcall->hdr.type);
+	r = l9p_pu16(msg, &fcall->hdr.tag);
+	if (r < 0)
+		return (-1);
+
+	/*
+	 * Decode remainder of message.	 When unpacking, this may
+	 * allocate memory, even if we fail during the decode.
+	 * Note that the initial fcall is zeroed out, though, so
+	 * we can just freefcall() to release whatever might have
+	 * gotten allocated, if the unpack fails due to a short
+	 * packet.
+	 */
+	switch (fcall->hdr.type) {
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		l9p_pu32(msg, &fcall->version.msize);
+		r = l9p_pustring(msg, &fcall->version.version);
+		break;
+
+	case L9P_TAUTH:
+		l9p_pu32(msg, &fcall->tauth.afid);
+		r = l9p_pustring(msg, &fcall->tauth.uname);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tauth.aname);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->tauth.n_uname);
+		break;
+
+	case L9P_RAUTH:
+		r = l9p_puqid(msg, &fcall->rauth.aqid);
+		break;
+
+	case L9P_TATTACH:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->tattach.afid);
+		r = l9p_pustring(msg, &fcall->tattach.uname);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tattach.aname);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->tattach.n_uname);
+		break;
+
+	case L9P_RATTACH:
+		r = l9p_puqid(msg, &fcall->rattach.qid);
+		break;
+
+	case L9P_RERROR:
+		r = l9p_pustring(msg, &fcall->error.ename);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->error.errnum);
+		break;
+
+	case L9P_RLERROR:
+		r = l9p_pu32(msg, &fcall->error.errnum);
+		break;
+
+	case L9P_TFLUSH:
+		r = l9p_pu16(msg, &fcall->tflush.oldtag);
+		break;
+
+	case L9P_RFLUSH:
+		break;
+
+	case L9P_TWALK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->twalk.newfid);
+		r = l9p_pustrings(msg, &fcall->twalk.nwname,
+		    fcall->twalk.wname, N(fcall->twalk.wname));
+		break;
+
+	case L9P_RWALK:
+		r = l9p_puqids(msg, &fcall->rwalk.nwqid, fcall->rwalk.wqid);
+		break;
+
+	case L9P_TOPEN:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu8(msg, &fcall->topen.mode);
+		break;
+
+	case L9P_ROPEN:
+		l9p_puqid(msg, &fcall->ropen.qid);
+		r = l9p_pu32(msg, &fcall->ropen.iounit);
+		break;
+
+	case L9P_TCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tcreate.perm);
+		r = l9p_pu8(msg, &fcall->tcreate.mode);
+		if (version >= L9P_2000U)
+			r = l9p_pustring(msg, &fcall->tcreate.extension);
+		break;
+
+	case L9P_RCREATE:
+		l9p_puqid(msg, &fcall->rcreate.qid);
+		r = l9p_pu32(msg, &fcall->rcreate.iounit);
+		break;
+
+	case L9P_TREAD:
+	case L9P_TREADDIR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu64(msg, &fcall->io.offset);
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_RREAD:
+	case L9P_RREADDIR:
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_TWRITE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu64(msg, &fcall->io.offset);
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_RWRITE:
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_TCLUNK:
+	case L9P_TSTAT:
+	case L9P_TREMOVE:
+	case L9P_TSTATFS:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RCLUNK:
+	case L9P_RREMOVE:
+		break;
+
+	case L9P_RSTAT:
+	{
+		uint16_t size = l9p_sizeof_stat(&fcall->rstat.stat,
+		    version);
+		l9p_pu16(msg, &size);
+		r = l9p_pustat(msg, &fcall->rstat.stat, version);
+	}
+		break;
+
+	case L9P_TWSTAT:
+	{
+		uint16_t size;
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu16(msg, &size);
+		r = l9p_pustat(msg, &fcall->twstat.stat, version);
+	}
+		break;
+
+	case L9P_RWSTAT:
+		break;
+
+	case L9P_RSTATFS:
+		l9p_pu32(msg, &fcall->rstatfs.statfs.type);
+		l9p_pu32(msg, &fcall->rstatfs.statfs.bsize);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.blocks);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.bfree);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.bavail);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.files);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.ffree);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.fsid);
+		r = l9p_pu32(msg, &fcall->rstatfs.statfs.namelen);
+		break;
+
+	case L9P_TLOPEN:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu32(msg, &fcall->tlopen.flags);
+		break;
+
+	case L9P_RLOPEN:
+		l9p_puqid(msg, &fcall->rlopen.qid);
+		r = l9p_pu32(msg, &fcall->rlopen.iounit);
+		break;
+
+	case L9P_TLCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tlcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tlcreate.flags);
+		l9p_pu32(msg, &fcall->tlcreate.mode);
+		r = l9p_pu32(msg, &fcall->tlcreate.gid);
+		break;
+
+	case L9P_RLCREATE:
+		l9p_puqid(msg, &fcall->rlcreate.qid);
+		r = l9p_pu32(msg, &fcall->rlcreate.iounit);
+		break;
+
+	case L9P_TSYMLINK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tsymlink.name);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tsymlink.symtgt);
+		if (r < 0)
+			break;
+		r = l9p_pu32(msg, &fcall->tlcreate.gid);
+		break;
+
+	case L9P_RSYMLINK:
+		r = l9p_puqid(msg, &fcall->rsymlink.qid);
+		break;
+
+	case L9P_TMKNOD:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tmknod.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tmknod.mode);
+		l9p_pu32(msg, &fcall->tmknod.major);
+		l9p_pu32(msg, &fcall->tmknod.minor);
+		r = l9p_pu32(msg, &fcall->tmknod.gid);
+		break;
+
+	case L9P_RMKNOD:
+		r = l9p_puqid(msg, &fcall->rmknod.qid);
+		break;
+
+	case L9P_TRENAME:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->trename.dfid);
+		r = l9p_pustring(msg, &fcall->trename.name);
+		break;
+
+	case L9P_RRENAME:
+		break;
+
+	case L9P_TREADLINK:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RREADLINK:
+		r = l9p_pustring(msg, &fcall->rreadlink.target);
+		break;
+
+	case L9P_TGETATTR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu64(msg, &fcall->tgetattr.request_mask);
+		break;
+
+	case L9P_RGETATTR:
+		l9p_pu64(msg, &fcall->rgetattr.valid);
+		l9p_puqid(msg, &fcall->rgetattr.qid);
+		l9p_pu32(msg, &fcall->rgetattr.mode);
+		l9p_pu32(msg, &fcall->rgetattr.uid);
+		l9p_pu32(msg, &fcall->rgetattr.gid);
+		l9p_pu64(msg, &fcall->rgetattr.nlink);
+		l9p_pu64(msg, &fcall->rgetattr.rdev);
+		l9p_pu64(msg, &fcall->rgetattr.size);
+		l9p_pu64(msg, &fcall->rgetattr.blksize);
+		l9p_pu64(msg, &fcall->rgetattr.blocks);
+		l9p_pu64(msg, &fcall->rgetattr.atime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.atime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.mtime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.mtime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.ctime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.ctime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.btime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.btime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.gen);
+		r = l9p_pu64(msg, &fcall->rgetattr.data_version);
+		break;
+
+	case L9P_TSETATTR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->tsetattr.valid);
+		l9p_pu32(msg, &fcall->tsetattr.mode);
+		l9p_pu32(msg, &fcall->tsetattr.uid);
+		l9p_pu32(msg, &fcall->tsetattr.gid);
+		l9p_pu64(msg, &fcall->tsetattr.size);
+		l9p_pu64(msg, &fcall->tsetattr.atime_sec);
+		l9p_pu64(msg, &fcall->tsetattr.atime_nsec);
+		l9p_pu64(msg, &fcall->tsetattr.mtime_sec);
+		r = l9p_pu64(msg, &fcall->tsetattr.mtime_nsec);
+		break;
+
+	case L9P_RSETATTR:
+		break;
+
+	case L9P_TXATTRWALK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->txattrwalk.newfid);
+		r = l9p_pustring(msg, &fcall->txattrwalk.name);
+		break;
+
+	case L9P_RXATTRWALK:
+		r = l9p_pu64(msg, &fcall->rxattrwalk.size);
+		break;
+
+	case L9P_TXATTRCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->txattrcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu64(msg, &fcall->txattrcreate.attr_size);
+		r = l9p_pu32(msg, &fcall->txattrcreate.flags);
+		break;
+
+	case L9P_RXATTRCREATE:
+		break;
+
+	case L9P_TFSYNC:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RFSYNC:
+		break;
+
+	case L9P_TLOCK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu8(msg, &fcall->tlock.type);
+		l9p_pu32(msg, &fcall->tlock.flags);
+		l9p_pu64(msg, &fcall->tlock.start);
+		l9p_pu64(msg, &fcall->tlock.length);
+		l9p_pu32(msg, &fcall->tlock.proc_id);
+		r = l9p_pustring(msg, &fcall->tlock.client_id);
+		break;
+
+	case L9P_RLOCK:
+		r = l9p_pu8(msg, &fcall->rlock.status);
+		break;
+
+	case L9P_TGETLOCK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		/* FALLTHROUGH */
+
+	case L9P_RGETLOCK:
+		l9p_pu8(msg, &fcall->getlock.type);
+		l9p_pu64(msg, &fcall->getlock.start);
+		l9p_pu64(msg, &fcall->getlock.length);
+		l9p_pu32(msg, &fcall->getlock.proc_id);
+		r = l9p_pustring(msg, &fcall->getlock.client_id);
+		break;
+
+	case L9P_TLINK:
+		l9p_pu32(msg, &fcall->tlink.dfid);
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tlink.name);
+		break;
+
+	case L9P_RLINK:
+		break;
+
+	case L9P_TMKDIR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tmkdir.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tmkdir.mode);
+		r = l9p_pu32(msg, &fcall->tmkdir.gid);
+		break;
+
+	case L9P_RMKDIR:
+		r = l9p_puqid(msg, &fcall->rmkdir.qid);
+		break;
+
+	case L9P_TRENAMEAT:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->trenameat.oldname);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->trenameat.newdirfid);
+		r = l9p_pustring(msg, &fcall->trenameat.newname);
+		break;
+
+	case L9P_RRENAMEAT:
+		break;
+
+	case L9P_TUNLINKAT:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tunlinkat.name);
+		if (r < 0)
+			break;
+		r = l9p_pu32(msg, &fcall->tunlinkat.flags);
+		break;
+
+	case L9P_RUNLINKAT:
+		break;
+
+	default:
+		L9P_LOG(L9P_ERROR, "%s(): missing case for type %d",
+		    __func__, fcall->hdr.type);
+		break;
+	}
+
+	/* Check for over- or under-run, or pustring error. */
+	if (r < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_PACK) {
+		/* Rewind to the beginning and install size at front. */
+		uint32_t len = (uint32_t)msg->lm_size;
+		msg->lm_cursor_offset = 0;
+		msg->lm_cursor_iov = 0;
+
+		/*
+		 * Subtract 4 bytes from current size, becase we're
+		 * overwriting size (rewinding message to the beginning)
+		 * and writing again, which will increase it 4 more.
+		 */
+		msg->lm_size -= sizeof(uint32_t);
+
+		if (fcall->hdr.type == L9P_RREAD ||
+		    fcall->hdr.type == L9P_RREADDIR)
+			len += fcall->io.count;
+
+		l9p_pu32(msg, &len);
+	}
+
+	return (0);
+}
+
+/*
+ * Free any strings or other data malloc'ed in the process of
+ * packing or unpacking an fcall.
+ */
+void
+l9p_freefcall(union l9p_fcall *fcall)
+{
+	uint16_t i;
+
+	switch (fcall->hdr.type) {
+
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		free(fcall->version.version);
+		return;
+
+	case L9P_TATTACH:
+		free(fcall->tattach.aname);
+		free(fcall->tattach.uname);
+		return;
+
+	case L9P_TWALK:
+		for (i = 0; i < fcall->twalk.nwname; i++)
+			free(fcall->twalk.wname[i]);
+		return;
+
+	case L9P_TCREATE:
+	case L9P_TOPEN:
+		free(fcall->tcreate.name);
+		free(fcall->tcreate.extension);
+		return;
+
+	case L9P_RSTAT:
+		l9p_freestat(&fcall->rstat.stat);
+		return;
+
+	case L9P_TWSTAT:
+		l9p_freestat(&fcall->twstat.stat);
+		return;
+
+	case L9P_TLCREATE:
+		free(fcall->tlcreate.name);
+		return;
+
+	case L9P_TSYMLINK:
+		free(fcall->tsymlink.name);
+		free(fcall->tsymlink.symtgt);
+		return;
+
+	case L9P_TMKNOD:
+		free(fcall->tmknod.name);
+		return;
+
+	case L9P_TRENAME:
+		free(fcall->trename.name);
+		return;
+
+	case L9P_RREADLINK:
+		free(fcall->rreadlink.target);
+		return;
+
+	case L9P_TXATTRWALK:
+		free(fcall->txattrwalk.name);
+		return;
+
+	case L9P_TXATTRCREATE:
+		free(fcall->txattrcreate.name);
+		return;
+
+	case L9P_TLOCK:
+		free(fcall->tlock.client_id);
+		return;
+
+	case L9P_TGETLOCK:
+	case L9P_RGETLOCK:
+		free(fcall->getlock.client_id);
+		return;
+
+	case L9P_TLINK:
+		free(fcall->tlink.name);
+		return;
+
+	case L9P_TMKDIR:
+		free(fcall->tmkdir.name);
+		return;
+
+	case L9P_TRENAMEAT:
+		free(fcall->trenameat.oldname);
+		free(fcall->trenameat.newname);
+		return;
+
+	case L9P_TUNLINKAT:
+		free(fcall->tunlinkat.name);
+		return;
+	}
+}
+
+void
+l9p_freestat(struct l9p_stat *stat)
+{
+	free(stat->name);
+	free(stat->extension);
+	free(stat->uid);
+	free(stat->gid);
+	free(stat->muid);
+}
+
+uint16_t
+l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version)
+{
+	uint16_t size = L9P_WORD /* size */
+	    + L9P_WORD /* type */
+	    + L9P_DWORD /* dev */
+	    + QID_SIZE /* qid */
+	    + 3 * L9P_DWORD /* mode, atime, mtime */
+	    + L9P_QWORD /* length */
+	    + STRING_SIZE(stat->name)
+	    + STRING_SIZE(stat->uid)
+	    + STRING_SIZE(stat->gid)
+	    + STRING_SIZE(stat->muid);
+
+	if (version >= L9P_2000U) {
+		size += STRING_SIZE(stat->extension)
+		    + 3 * L9P_DWORD;
+	}
+
+	return (size);
+}
diff --git a/pytest/.gitignore b/pytest/.gitignore
new file mode 100644
index 000000000000..72be9ceecd4d
--- /dev/null
+++ b/pytest/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+__pycache__
+testconf.ini
diff --git a/pytest/Makefile b/pytest/Makefile
new file mode 100644
index 000000000000..14dd17777636
--- /dev/null
+++ b/pytest/Makefile
@@ -0,0 +1,9 @@
+PYTHON?=python
+
+selftest:
+	for f in lerrno p9err pfod protocol sequencer; do \
+	    ${PYTHON} $$f.py; \
+	done
+
+clean cleandir:
+	rm -rf *.pyc __pycache__ *.log
diff --git a/pytest/README b/pytest/README
new file mode 100644
index 000000000000..6c8369d9521d
--- /dev/null
+++ b/pytest/README
@@ -0,0 +1,32 @@
+Here are some very skeletal instructions for using
+the client test code.
+
+on server (assumes BSD style LD_LIBRARY_PATH):
+
+mkdir /tmp/foo
+cd lib9p
+env LD_LIBRARY_PATH=. LIB9P_LOGGING=stderr example/server -h localhost -p 12345 /tmp/foo
+
+(this can be run as a non-root user for now, but some things
+only work when run as root)
+
+on client (same machine as server, but can always be run as
+non-root user):
+
+cd lib9p/pytest
+ONE TIME ONLY: copy testconf.ini.sample to testconf.ini, adjust to taste
+./client.py
+
+TODO: rework ./client so it can locate the .ini file better
+
+########
+
+IF USING diod (http://github.com/chaos/diod) AS THE SERVER ON
+A LINUX MACHINE:
+
+ - The instructions for running the server are (or were):
+     sudo ./diod -f -d 1 -n -e /tmp/9
+ - You must mkdir the exported 9pfs file system (e.g., mkdir /tmp/9).
+ - While uname is not really used, aname (the attach name) IS used
+   and must match the exported file system, e.g., testconf.ini
+   must have "aname = /tmp/9".
diff --git a/pytest/client.py b/pytest/client.py
new file mode 100755
index 000000000000..1746d4f6e277
--- /dev/null
+++ b/pytest/client.py
@@ -0,0 +1,643 @@
+#! /usr/bin/env python
+
+"""
+Run various tests, as a client.
+"""
+
+from __future__ import print_function
+
+import argparse
+try:
+    import ConfigParser as configparser
+except ImportError:
+    import configparser
+import functools
+import logging
+import os
+import socket
+import struct
+import sys
+import time
+import traceback
+
+import p9conn
+import protocol
+
+LocalError = p9conn.LocalError
+RemoteError = p9conn.RemoteError
+TEError = p9conn.TEError
+
+class TestState(object):
+    def __init__(self):
+        self.config = None
+        self.logger = None
+        self.successes = 0
+        self.skips = 0
+        self.failures = 0
+        self.exceptions = 0
+        self.clnt_tab = {}
+        self.mkclient = None
+        self.stop = False
+        self.gid = 0
+
+    def ccc(self, cid=None):
+        """
+        Connect or reconnect as client (ccc = check and connect client).
+
+        If caller provides a cid (client ID) we check that specific
+        client.  Otherwise the default ID ('base') is used.
+        In any case we return the now-connected client, plus the
+        attachment (session info) if any.
+        """
+        if cid is None:
+            cid = 'base'
+        pair = self.clnt_tab.get(cid)
+        if pair is None:
+            clnt = self.mkclient()
+            pair = [clnt, None]
+            self.clnt_tab[cid] = pair
+        else:
+            clnt = pair[0]
+        if not clnt.is_connected():
+            clnt.connect()
+        return pair
+
+    def dcc(self, cid=None):
+        """
+        Disconnect client (disconnect checked client).  If no specific
+        client ID is provided, this disconnects ALL checked clients!
+        """
+        if cid is None:
+            for cid in list(self.clnt_tab.keys()):
+                self.dcc(cid)
+        pair = self.clnt_tab.get(cid)
+        if pair is not None:
+            clnt = pair[0]
+            if clnt.is_connected():
+                clnt.shutdown()
+            del self.clnt_tab[cid]
+
+    def ccs(self, cid=None):
+        """
+        Like ccc, but establish a session as well, by setting up
+        the uname/n_uname.
+
+        Return the client instance (only).
+        """
+        pair = self.ccc(cid)
+        clnt = pair[0]
+        if pair[1] is None:
+            # No session yet - establish one.  Note, this may fail.
+            section = None if cid is None else ('client-' + cid)
+            aname = getconf(self.config, section, 'aname', '')
+            uname = getconf(self.config, section, 'uname', '')
+            if clnt.proto > protocol.plain:
+                n_uname = getint(self.config, section, 'n_uname', 1001)
+            else:
+                n_uname = None
+            clnt.attach(afid=None, aname=aname, uname=uname, n_uname=n_uname)
+            pair[1] = (aname, uname, n_uname)
+        return clnt
+
+def getconf(conf, section, name, default=None, rtype=str):
+    """
+    Get configuration item for given section, or for "client" if
+    there is no entry for that particular section (or if section
+    is None).
+
+    This lets us get specific values for specific tests or
+    groups ([foo] name=value), falling back to general values
+    ([client] name=value).
+
+    The type of the returned value <rtype> can be str, int, bool,
+    or float.  The default is str (and see getconfint, getconfbool,
+    getconffloat below).
+
+    A default value may be supplied; if it is, that's the default
+    return value (this default should have the right type).  If
+    no default is supplied, a missing value is an error.
+    """
+    try:
+        # note: conf.get(None, 'foo') raises NoSectionError
+        where = section
+        result = conf.get(where, name)
+    except (configparser.NoSectionError, configparser.NoOptionError):
+        try:
+            where = 'client'
+            result = conf.get(where, name)
+        except configparser.NoSectionError:
+            sys.exit('no [{0}] section in configuration!'.format(where))
+        except configparser.NoOptionError:
+            if default is not None:
+                return default
+            if section is not None:
+                where = '[{0}] or [{1}]'.format(section, where)
+            else:
+                where = '[{0}]'.format(where)
+            raise LocalError('need {0}=value in {1}'.format(name, where))
+    where = '[{0}]'.format(where)
+    if rtype is str:
+        return result
+    if rtype is int:
+        return int(result)
+    if rtype is float:
+        return float(result)
+    if rtype is bool:
+        if result.lower() in ('1', 't', 'true', 'y', 'yes'):
+            return True
+        if result.lower() in ('0', 'f', 'false', 'n', 'no'):
+            return False
+        raise ValueError('{0} {1}={2}: invalid boolean'.format(where, name,
+                                                              result))
+    raise ValueError('{0} {1}={2}: internal error: bad result type '
+                     '{3!r}'.format(where, name, result, rtype))
+
+def getint(conf, section, name, default=None):
+    "get integer config item"
+    return getconf(conf, section, name, default, int)
+
+def getfloat(conf, section, name, default=None):
+    "get float config item"
+    return getconf(conf, section, name, default, float)
+
+def getbool(conf, section, name, default=None):
+    "get boolean config item"
+    return getconf(conf, section, name, default, bool)
+
+def pluralize(n, singular, plural):
+    "return singular or plural based on value of n"
+    return plural if n != 1 else singular
+
+class TCDone(Exception):
+    "used in succ/fail/skip - skips rest of testcase with"
+    pass
+
+class TestCase(object):
+    """
+    Start a test case.  Most callers must then do a ccs() to connect.
+
+    A failed test will generally disconnect from the server; a
+    new ccs() will reconnect, if the server is still alive.
+    """
+    def __init__(self, name, tstate):
+        self.name = name
+        self.status = None
+        self.detail = None
+        self.tstate = tstate
+        self._shutdown = None
+        self._autoclunk = None
+        self._acconn = None
+
+    def auto_disconnect(self, conn):
+        self._shutdown = conn
+
+    def succ(self, detail=None):
+        "set success status"
+        self.status = 'SUCC'
+        self.detail = detail
+        raise TCDone()
+
+    def fail(self, detail):
+        "set failure status"
+        self.status = 'FAIL'
+        self.detail = detail
+        raise TCDone()
+
+    def skip(self, detail=None):
+        "set skip status"
+        self.status = 'SKIP'
+        self.detail = detail
+        raise TCDone()
+
+    def autoclunk(self, fid):
+        "mark fid to be closed/clunked on test exit"
+        if self._acconn is None:
+            raise ValueError('autoclunk: no _acconn')
+        self._autoclunk.append(fid)
+
+    def trace(self, msg, *args, **kwargs):
+        "add tracing info to log-file output"
+        level = kwargs.pop('level', logging.INFO)
+        self.tstate.logger.log(level, '      ' + msg, *args, **kwargs)
+
+    def ccs(self):
+        "call tstate ccs, turn socket.error connect failure into test fail"
+        try:
+            self.detail = 'connecting'
+            ret = self.tstate.ccs()
+            self.detail = None
+            self._acconn = ret
+            return ret
+        except socket.error as err:
+            self.fail(str(err))
+
+    def __enter__(self):
+        self.tstate.logger.log(logging.DEBUG, 'ENTER: %s', self.name)
+        self._autoclunk = []
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        tstate = self.tstate
+        eat_exc = False
+        tb_detail = None
+        if exc_type is TCDone:
+            # we exited with succ, fail, or skip
+            eat_exc = True
+            exc_type = None
+        if exc_type is not None:
+            if self.status is None:
+                self.status = 'EXCP'
+            else:
+                self.status += ' EXC'
+            if exc_type == TEError:
+                # timeout/eof - best guess is that we crashed the server!
+                eat_exc = True
+                tb_detail = ['timeout or EOF']
+            elif exc_type in (socket.error, RemoteError, LocalError):
+                eat_exc = True
+                tb_detail = traceback.format_exception(exc_type, exc_val,
+                                                       exc_tb)
+            level = logging.ERROR
+            tstate.failures += 1
+            tstate.exceptions += 1
+        else:
+            if self.status is None:
+                self.status = 'SUCC'
+            if self.status == 'SUCC':
+                level = logging.INFO
+                tstate.successes += 1
+            elif self.status == 'SKIP':
+                level = logging.INFO
+                tstate.skips += 1
+            else:
+                level = logging.ERROR
+                tstate.failures += 1
+        tstate.logger.log(level, '%s: %s', self.status, self.name)
+        if self.detail:
+            tstate.logger.log(level, '      detail: %s', self.detail)
+        if tb_detail:
+            for line in tb_detail:
+                tstate.logger.log(level, '      %s', line.rstrip())
+        for fid in self._autoclunk:
+            self._acconn.clunk(fid, ignore_error=True)
+        if self._shutdown:
+            self._shutdown.shutdown()
+        return eat_exc
+
+def main():
+    "the usual main"
+    parser = argparse.ArgumentParser(description='run tests against a server')
+
+    parser.add_argument('-c', '--config',
+        action='append',
+        help='specify additional file(s) to read (beyond testconf.ini)')
+
+    args = parser.parse_args()
+    config = configparser.SafeConfigParser()
+    # use case sensitive keys
+    config.optionxform = str
+
+    try:
+        with open('testconf.ini', 'r') as stream:
+            config.readfp(stream)
+    except (OSError, IOError) as err:
+        sys.exit(str(err))
+    if args.config:
+        ok = config.read(args.config)
+        failed = set(ok) - set(args.config)
+        if len(failed):
+            nfailed = len(failed)
+            word = 'files' if nfailed > 1 else 'file'
+            failed = ', '.join(failed)
+            print('failed to read {0} {1}: {2}'.format(nfailed, word, failed))
+            sys.exit(1)
+
+    logging.basicConfig(level=config.get('client', 'loglevel').upper())
+    logger = logging.getLogger(__name__)
+    tstate = TestState()
+    tstate.logger = logger
+    tstate.config = config
+
+    server = config.get('client', 'server')
+    port = config.getint('client', 'port')
+    proto = config.get('client', 'protocol')
+    may_downgrade = config.getboolean('client', 'may_downgrade')
+    timeout = config.getfloat('client', 'timeout')
+
+    tstate.stop = True # unless overwritten below
+    with TestCase('send bad packet', tstate) as tc:
+        tc.detail = 'connecting to {0}:{1}'.format(server, port)
+        try:
+            conn = p9conn.P9SockIO(logger, server=server, port=port)
+        except socket.error as err:
+            tc.fail('cannot connect at all (server down?)')
+        tc.auto_disconnect(conn)
+        tc.detail = None
+        pkt = struct.pack('<I', 256);
+        conn.write(pkt)
+        # ignore reply if any, we're just trying to trip the server
+        tstate.stop = False
+        tc.succ()
+
+    if not tstate.stop:
+        tstate.mkclient = functools.partial(p9conn.P9Client, logger,
+                                           timeout, proto, may_downgrade,
+                                           server=server, port=port)
+        tstate.stop = True
+        with TestCase('send bad Tversion', tstate) as tc:
+            try:
+                clnt = tstate.mkclient()
+            except socket.error as err:
+                tc.fail('can no longer connect, did bad pkt crash server?')
+            tc.auto_disconnect(clnt)
+            clnt.set_monkey('version', b'wrongo, fishbreath!')
+            tc.detail = 'connecting'
+            try:
+                clnt.connect()
+            except RemoteError as err:
+                tstate.stop = False
+                tc.succ(err.args[0])
+            tc.fail('server accepted a bad Tversion')
+
+    if not tstate.stop:
+        # All NUL characters in strings are invalid.
+        with TestCase('send illegal NUL in Tversion', tstate) as tc:
+            clnt = tstate.mkclient()
+            tc.auto_disconnect(clnt)
+            clnt.set_monkey('version', b'9P2000\0')
+            # Forcibly allow downgrade so that Tversion
+            # succeeds if they ignore the \0.
+            clnt.may_downgrade = True
+            tc.detail = 'connecting'
+            try:
+                clnt.connect()
+            except (TEError, RemoteError) as err:
+                tc.succ(err.args[0])
+            tc.fail('server accepted NUL in Tversion')
+
+    if not tstate.stop:
+        with TestCase('connect normally', tstate) as tc:
+            tc.detail = 'connecting'
+            try:
+                tstate.ccc()
+            except RemoteError as err:
+                # can't test any further, but this might be success
+                tstate.stop = True
+                if 'they only support version' in err.args[0]:
+                    tc.succ(err.args[0])
+                tc.fail(err.args[0])
+            tc.succ()
+
+    if not tstate.stop:
+        with TestCase('attach with bad afid', tstate) as tc:
+            clnt = tstate.ccc()[0]
+            section = 'attach-with-bad-afid'
+            aname = getconf(tstate.config, section, 'aname', '')
+            uname = getconf(tstate.config, section, 'uname', '')
+            if clnt.proto > protocol.plain:
+                n_uname = getint(tstate.config, section, 'n_uname', 1001)
+            else:
+                n_uname = None
+            try:
+                clnt.attach(afid=42, aname=aname, uname=uname, n_uname=n_uname)
+            except RemoteError as err:
+                tc.succ(err.args[0])
+            tc.dcc()
+            tc.fail('bad attach afid not rejected')
+
+    try:
+        if not tstate.stop:
+            # Various Linux tests need gids.  Just get them for everyone.
+            tstate.gid = getint(tstate.config, 'client', 'gid', 0)
+            more_test_cases(tstate)
+    finally:
+        tstate.dcc()
+
+    n_tests = tstate.successes + tstate.failures
+    print('summary:')
+    if tstate.successes:
+        print('{0}/{1} tests succeeded'.format(tstate.successes, n_tests))
+    if tstate.failures:
+        print('{0}/{1} tests failed'.format(tstate.failures, n_tests))
+    if tstate.skips:
+        print('{0} {1} skipped'.format(tstate.skips,
+                                       pluralize(tstate.skips,
+                                                 'test', 'tests')))
+    if tstate.exceptions:
+        print('{0} {1} occurred'.format(tstate.exceptions,
+                                       pluralize(tstate.exceptions,
+                                                 'exception', 'exceptions')))
+    if tstate.stop:
+        print('tests stopped early')
+    return 1 if tstate.stop or tstate.exceptions or tstate.failures else 0
+
+def more_test_cases(tstate):
+    "run cases that can only proceed if connecting works at all"
+    with TestCase('attach normally', tstate) as tc:
+        tc.ccs()
+        tc.succ()
+    if tstate.stop:
+        return
+
+    # Empty string is not technically illegal.  It's not clear
+    # whether it should be accepted or rejected.  However, it
+    # used to crash the server entirely, so it's a desirable
+    # test case.
+    with TestCase('empty string in Twalk request', tstate) as tc:
+        clnt = tc.ccs()
+        try:
+            fid, qid = clnt.lookup(clnt.rootfid, [b''])
+        except RemoteError as err:
+            tc.succ(err.args[0])
+        clnt.clunk(fid)
+        tc.succ('note: empty Twalk component name not rejected')
+
+    # Name components may not contain /
+    with TestCase('embedded / in lookup component name', tstate) as tc:
+        clnt = tc.ccs()
+        try:
+            fid, qid = clnt.lookup(clnt.rootfid, [b'/'])
+            tc.autoclunk(fid)
+        except RemoteError as err:
+            tc.succ(err.args[0])
+        tc.fail('/ in lookup component name not rejected')
+
+    # Proceed from a clean tree.  As a side effect, this also tests
+    # either the old style readdir (read() on a directory fid) or
+    # the dot-L readdir().
+    #
+    # The test case will fail if we don't have permission to remove
+    # some file(s).
+    with TestCase('clean up tree (readdir+remove)', tstate) as tc:
+        clnt = tc.ccs()
+        fset = clnt.uxreaddir(b'/')
+        fset = [i for i in fset if i != '.' and i != '..']
+        tc.trace("what's there initially: {0!r}".format(fset))
+        try:
+            clnt.uxremove(b'/', force=False, recurse=True)
+        except RemoteError as err:
+            tc.trace('failed to read or clean up tree', level=logging.ERROR)
+            tc.trace('this might be a permissions error', level=logging.ERROR)
+            tstate.stop = True
+            tc.fail(str(err))
+        fset = clnt.uxreaddir(b'/')
+        fset = [i for i in fset if i != '.' and i != '..']
+        tc.trace("what's left after removing everything: {0!r}".format(fset))
+        if fset:
+            tstate.stop = True
+            tc.trace('note: could be a permissions error', level=logging.ERROR)
+            tc.fail('/ not empty after removing all: {0!r}'.format(fset))
+        tc.succ()
+    if tstate.stop:
+        return
+
+    # Name supplied to create, mkdir, etc, may not contain /.
+    # Note that this test may fail for the wrong reason if /dir
+    # itself does not already exist, so first let's make /dir.
+    only_dotl = getbool(tstate.config, 'client', 'only_dotl', False)
+    with TestCase('mkdir', tstate) as tc:
+        clnt = tc.ccs()
+        if only_dotl and not clnt.supports(protocol.td.Tmkdir):
+            tc.skip('cannot test dot-L mkdir on {0}'.format(clnt.proto))
+        try:
+            fid, qid = clnt.uxlookup(b'/dir', None)
+            tc.autoclunk(fid)
+            tstate.stop = True
+            tc.fail('found existing /dir after cleaning tree')
+        except RemoteError as err:
+            # we'll just assume it's "no such file or directory"
+            pass
+        if only_dotl:
+            qid = clnt.mkdir(clnt.rootfid, b'dir', 0o777, tstate.gid)
+        else:
+            qid, _ = clnt.create(clnt.rootfid, b'dir',
+                                 protocol.td.DMDIR | 0o777,
+                                 protocol.td.OREAD)
+        if qid.type != protocol.td.QTDIR:
+            tstate.stop = True
+            tc.fail('creating /dir: result is not a directory')
+        tc.trace('now attempting to create /dir/sub the wrong way')
+        try:
+            if only_dotl:
+                qid = clnt.mkdir(clnt.rootfid, b'dir/sub', 0o777, tstate.gid)
+            else:
+                qid, _ = clnt.create(clnt.rootfid, b'dir/sub',
+                                     protocol.td.DMDIR | 0o777,
+                                     protocol.td.OREAD)
+            # it's not clear what happened on the server at this point!
+            tc.trace("creating dir/sub (with embedded '/') should have "
+                     'failed but did not')
+            tstate.stop = True
+            fset = clnt.uxreaddir(b'/dir')
+            if 'sub' in fset:
+                tc.trace('(found our dir/sub detritus)')
+                clnt.uxremove(b'dir/sub', force=True)
+                fset = clnt.uxreaddir(b'/dir')
+                if 'sub' not in fset:
+                    tc.trace('(successfully removed our dir/sub detritus)')
+                    tstate.stop = False
+            tc.fail('created dir/sub as single directory with embedded slash')
+        except RemoteError as err:
+            # we'll just assume it's the right kind of error
+            tc.trace('invalid path dir/sub failed with: %s', str(err))
+            tc.succ('embedded slash in mkdir correctly refused')
+    if tstate.stop:
+        return
+
+    with TestCase('getattr/setattr', tstate) as tc:
+        # This test is not really thorough enough, need to test
+        # all combinations of settings.  Should also test that
+        # old values are restored on failure, although it is not
+        # clear how to trigger failures.
+        clnt = tc.ccs()
+        if not clnt.supports(protocol.td.Tgetattr):
+            tc.skip('%s does not support Tgetattr', clnt)
+        fid, _, _, _ = clnt.uxopen(b'/dir/file', os.O_CREAT | os.O_RDWR, 0o666,
+            gid=tstate.gid)
+        tc.autoclunk(fid)
+        written = clnt.write(fid, 0, 'bytes\n')
+        if written != 6:
+            tc.trace('expected to write 6 bytes, actually wrote %d', written,
+                     level=logging.WARN)
+        attrs = clnt.Tgetattr(fid)
+        #tc.trace('getattr: after write, before setattr: got %s', attrs)
+        if attrs.size != written:
+            tc.fail('getattr: expected size=%d, got size=%d',
+                    written, attrs.size)
+        # now truncate, set mtime to (3,14), and check result
+        set_time_to = p9conn.Timespec(sec=0, nsec=140000000)
+        clnt.Tsetattr(fid, size=0, mtime=set_time_to)
+        attrs = clnt.Tgetattr(fid)
+        #tc.trace('getattr: after setattr: got %s', attrs)
+        if attrs.mtime.sec != set_time_to.sec or attrs.size != 0:
+            tc.fail('setattr: expected to get back mtime.sec={0}, size=0; '
+                    'got mtime.sec={1}, size='
+                    '{1}'.format(set_time_to.sec, attrs.mtime.sec, attrs.size))
+        # nsec is not as stable but let's check
+        if attrs.mtime.nsec != set_time_to.nsec:
+            tc.trace('setattr: expected to get back mtime_nsec=%d; '
+                     'got %d', set_time_to.nsec, mtime_nsec)
+        tc.succ('able to set and see size and mtime')
+
+    # this test should be much later, but we know the current
+    # server is broken...
+    with TestCase('rename adjusts other fids', tstate) as tc:
+        clnt = tc.ccs()
+        dirfid, _ = clnt.uxlookup(b'/dir')
+        tc.autoclunk(dirfid)
+        clnt.uxmkdir(b'd1', 0o777, tstate.gid, startdir=dirfid)
+        clnt.uxmkdir(b'd1/sub', 0o777, tstate.gid, startdir=dirfid)
+        d1fid, _ = clnt.uxlookup(b'd1', dirfid)
+        tc.autoclunk(d1fid)
+        subfid, _ = clnt.uxlookup(b'sub', d1fid)
+        tc.autoclunk(subfid)
+        fid, _, _, _ = clnt.uxopen(b'file', os.O_CREAT | os.O_RDWR,
+                                   0o666, startdir=subfid, gid=tstate.gid)
+        tc.autoclunk(fid)
+        written = clnt.write(fid, 0, 'filedata\n')
+        if written != 9:
+            tc.trace('expected to write 9 bytes, actually wrote %d', written,
+                     level=logging.WARN)
+        # Now if we rename /dir/d1 to /dir/d2, the fids for both
+        # sub/file and sub itself should still be usable.  This
+        # holds for both Trename (Linux only) and Twstat based
+        # rename ops.
+        #
+        # Note that some servers may cache some number of files and/or
+        # diretories held open, so we should open many fids to wipe
+        # out the cache (XXX notyet).
+        if clnt.supports(protocol.td.Trename):
+            clnt.rename(d1fid, dirfid, name=b'd2')
+        else:
+            clnt.wstat(d1fid, name=b'd2')
+        try:
+            rofid, _, _, _ = clnt.uxopen(b'file', os.O_RDONLY, startdir=subfid)
+            clnt.clunk(rofid)
+        except RemoteError as err:
+            tc.fail('open file in renamed dir/d2/sub: {0}'.format(err))
+        tc.succ()
+
+    # Even if xattrwalk is supported by the protocol, it's optional
+    # on the server.
+    with TestCase('xattrwalk', tstate) as tc:
+        clnt = tc.ccs()
+        if not clnt.supports(protocol.td.Txattrwalk):
+            tc.skip('{0} does not support Txattrwalk'.format(clnt))
+        dirfid, _ = clnt.uxlookup(b'/dir')
+        tc.autoclunk(dirfid)
+        try:
+            # need better tests...
+            attrfid, size = clnt.xattrwalk(dirfid)
+            tc.autoclunk(attrfid)
+            data = clnt.read(attrfid, 0, size)
+            tc.trace('xattrwalk with no name: data=%r', data)
+            tc.succ('xattrwalk size={0} datalen={1}'.format(size, len(data)))
+        except RemoteError as err:
+            tc.trace('xattrwalk on /dir: {0}'.format(err))
+        tc.succ('xattrwalk apparently not implemented')
+
+if __name__ == '__main__':
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        sys.exit('\nInterrupted')
diff --git a/pytest/lerrno.py b/pytest/lerrno.py
new file mode 100644
index 000000000000..80a9a5098b87
--- /dev/null
+++ b/pytest/lerrno.py
@@ -0,0 +1,291 @@
+#! /usr/bin/env python
+
+"""
+Error number definitions for Linux.
+"""
+
+EPERM =                 1
+ENOENT =                2
+ESRCH =                 3
+EINTR =                 4
+EIO =                   5
+ENXIO =                 6
+E2BIG =                 7
+ENOEXEC =               8
+EBADF =                 9
+ECHILD =                10
+EAGAIN =                11
+ENOMEM =                12
+EACCES =                13
+EFAULT =                14
+ENOTBLK =               15
+EBUSY =                 16
+EEXIST =                17
+EXDEV =                 18
+ENODEV =                19
+ENOTDIR =               20
+EISDIR =                21
+EINVAL =                22
+ENFILE =                23
+EMFILE =                24
+ENOTTY =                25
+ETXTBSY =               26
+EFBIG =                 27
+ENOSPC =                28
+ESPIPE =                29
+EROFS =                 30
+EMLINK =                31
+EPIPE =                 32
+EDOM =                  33
+ERANGE =                34
+EDEADLK =               35
+ENAMETOOLONG =          36
+ENOLCK =                37
+ENOSYS =                38
+ENOTEMPTY =             39
+ELOOP =                 40
+#                       41 unused
+ENOMSG =                42
+EIDRM =                 43
+ECHRNG =                44
+EL2NSYNC =              45
+EL3HLT =                46
+EL3RST =                47
+ELNRNG =                48
+EUNATCH =               49
+ENOCSI =                50
+EL2HLT =                51
+EBADE =                 52
+EBADR =                 53
+EXFULL =                54
+ENOANO =                55
+EBADRQC =               56
+EBADSLT =               57
+#                       58 unused
+EBFONT =                59
+ENOSTR =                60
+ENODATA =               61
+ETIME =                 62
+ENOSR =                 63
+ENONET =                64
+ENOPKG =                65
+EREMOTE =               66
+ENOLINK =               67
+EADV =                  68
+ESRMNT =                69
+ECOMM =                 70
+EPROTO =                71
+EMULTIHOP =             72
+EDOTDOT =               73
+EBADMSG =               74
+EOVERFLOW =             75
+ENOTUNIQ =              76
+EBADFD =                77
+EREMCHG =               78
+ELIBACC =               79
+ELIBBAD =               80
+ELIBSCN =               81
+ELIBMAX =               82
+ELIBEXEC =              83
+EILSEQ =                84
+ERESTART =              85
+ESTRPIPE =              86
+EUSERS =                87
+ENOTSOCK =              88
+EDESTADDRREQ =          89
+EMSGSIZE =              90
+EPROTOTYPE =            91
+ENOPROTOOPT =           92
+EPROTONOSUPPORT =       93
+ESOCKTNOSUPPORT =       94
+EOPNOTSUPP =            95
+EPFNOSUPPORT =          96
+EAFNOSUPPORT =          97
+EADDRINUSE =            98
+EADDRNOTAVAIL =         99
+ENETDOWN =              100
+ENETUNREACH =           101
+ENETRESET =             102
+ECONNABORTED =          103
+ECONNRESET =            104
+ENOBUFS =               105
+EISCONN =               106
+ENOTCONN =              107
+ESHUTDOWN =             108
+ETOOMANYREFS =          109
+ETIMEDOUT =             110
+ECONNREFUSED =          111
+EHOSTDOWN =             112
+EHOSTUNREACH =          113
+EALREADY =              114
+EINPROGRESS =           115
+ESTALE =                116
+EUCLEAN =               117
+ENOTNAM =               118
+ENAVAIL =               119
+EISNAM =                120
+EREMOTEIO =             121
+EDQUOT =                122
+ENOMEDIUM =             123
+EMEDIUMTYPE =           124
+ECANCELED =             125
+ENOKEY =                126
+EKEYEXPIRED =           127
+EKEYREVOKED =           128
+EKEYREJECTED =          129
+EOWNERDEAD =            130
+ENOTRECOVERABLE =       131
+ERFKILL =               132
+EHWPOISON =             133
+
+_strerror = {
+    EPERM:              'Permission denied',
+    ENOENT:             'No such file or directory',
+    ESRCH:              'No such process',
+    EINTR:              'Interrupted system call',
+    EIO:                'Input/output error',
+    ENXIO:              'Device not configured',
+    E2BIG:              'Argument list too long',
+    ENOEXEC:            'Exec format error',
+    EBADF:              'Bad file descriptor',
+    ECHILD:             'No child processes',
+    EAGAIN:             'Resource temporarily unavailable',
+    ENOMEM:             'Cannot allocate memory',
+    EACCES:             'Permission denied',
+    EFAULT:             'Bad address',
+    ENOTBLK:            'Block device required',
+    EBUSY:              'Device busy',
+    EEXIST:             'File exists',
+    EXDEV:              'Cross-device link',
+    ENODEV:             'Operation not supported by device',
+    ENOTDIR:            'Not a directory',
+    EISDIR:             'Is a directory',
+    EINVAL:             'Invalid argument',
+    ENFILE:             'Too many open files in system',
+    EMFILE:             'Too many open files',
+    ENOTTY:             'Inappropriate ioctl for device',
+    ETXTBSY:            'Text file busy',
+    EFBIG:              'File too large',
+    ENOSPC:             'No space left on device',
+    ESPIPE:             'Illegal seek',
+    EROFS:              'Read-only filesystem',
+    EMLINK:             'Too many links',
+    EPIPE:              'Broken pipe',
+    EDOM:               'Numerical argument out of domain',
+    ERANGE:             'Result too large',
+    EDEADLK:            'Resource deadlock avoided',
+    ENAMETOOLONG:       'File name too long',
+    ENOLCK:             'No locks available',
+    ENOSYS:             'Function not implemented',
+    ENOTEMPTY:          'Directory not empty',
+    ELOOP:              'Too many levels of symbolic links',
+    ENOMSG:             'No message of desired type',
+    EIDRM:              'Identifier removed',
+    ECHRNG:             'Channel number out of range',
+    EL2NSYNC:           'Level 2 not synchronized',
+    EL3HLT:             'Level 3 halted',
+    EL3RST:             'Level 3 reset',
+    ELNRNG:             'Link number out of range',
+    EUNATCH:            'Protocol driver not attached',
+    ENOCSI:             'No CSI structure available',
+    EL2HLT:             'Level 2 halted',
+    EBADE:              'Invalid exchange',
+    EBADR:              'Invalid request descriptor',
+    EXFULL:             'Exchange full',
+    ENOANO:             'No anode',
+    EBADRQC:            'Invalid request code',
+    EBADSLT:            'Invalid slot',
+    EBFONT:             'Bad font file format',
+    ENOSTR:             'Device not a stream',
+    ENODATA:            'No data available',
+    ETIME:              'Timer expired',
+    ENOSR:              'Out of streams resources',
+    ENONET:             'Machine is not on the network',
+    ENOPKG:             'Package not installed',
+    EREMOTE:            'Object is remote',
+    ENOLINK:            'Link has been severed',
+    EADV:               'Advertise error',
+    ESRMNT:             'Srmount error',
+    ECOMM:              'Communication error on send',
+    EPROTO:             'Protocol error',
+    EMULTIHOP:          'Multihop attempted',
+    EDOTDOT:            'RFS specific error',
+    EBADMSG:            'Bad message',
+    EOVERFLOW:          'Value too large for defined data type',
+    ENOTUNIQ:           'Name not unique on network',
+    EBADFD:             'File descriptor in bad state',
+    EREMCHG:            'Remote address changed',
+    ELIBACC:            'Can not access a needed shared library',
+    ELIBBAD:            'Accessing a corrupted shared library',
+    ELIBSCN:            '.lib section in a.out corrupted',
+    ELIBMAX:            'Attempting to link in too many shared libraries',
+    ELIBEXEC:           'Cannot exec a shared library directly',
+    EILSEQ:             'Invalid or incomplete multibyte or wide character',
+    ERESTART:           'Interrupted system call should be restarted',
+    ESTRPIPE:           'Streams pipe error',
+    EUSERS:             'Too many users',
+    ENOTSOCK:           'Socket operation on non-socket',
+    EDESTADDRREQ:       'Destination address required',
+    EMSGSIZE:           'Message too long',
+    EPROTOTYPE:         'Protocol wrong type for socket',
+    ENOPROTOOPT:        'Protocol not available',
+    EPROTONOSUPPORT:    'Protocol not supported',
+    ESOCKTNOSUPPORT:    'Socket type not supported',
+    EOPNOTSUPP:         'Operation not supported',
+    EPFNOSUPPORT:       'Protocol family not supported',
+    EAFNOSUPPORT:       'Address family not supported by protocol',
+    EADDRINUSE:         'Address already in use',
+    EADDRNOTAVAIL:      'Cannot assign requested address',
+    ENETDOWN:           'Network is down',
+    ENETUNREACH:        'Network is unreachable',
+    ENETRESET:          'Network dropped connection on reset',
+    ECONNABORTED:       'Software caused connection abort',
+    ECONNRESET:         'Connection reset by peer',
+    ENOBUFS:            'No buffer space available',
+    EISCONN:            'Transport endpoint is already connected',
+    ENOTCONN:           'Transport endpoint is not connected',
+    ESHUTDOWN:          'Cannot send after transport endpoint shutdown',
+    ETOOMANYREFS:       'Too many references: cannot splice',
+    ETIMEDOUT:          'Connection timed out',
+    ECONNREFUSED:       'Connection refused',
+    EHOSTDOWN:          'Host is down',
+    EHOSTUNREACH:       'No route to host',
+    EALREADY:           'Operation already in progress',
+    EINPROGRESS:        'Operation now in progress',
+    ESTALE:             'Stale file handle',
+    EUCLEAN:            'Structure needs cleaning',
+    ENOTNAM:            'Not a XENIX named type file',
+    ENAVAIL:            'No XENIX semaphores available',
+    EISNAM:             'Is a named type file',
+    EREMOTEIO:          'Remote I/O error',
+    EDQUOT:             'Quota exceeded',
+    ENOMEDIUM:          'No medium found',
+    EMEDIUMTYPE:        'Wrong medium type',
+    ECANCELED:          'Operation canceled',
+    ENOKEY:             'Required key not available',
+    EKEYEXPIRED:        'Key has expired',
+    EKEYREVOKED:        'Key has been revoked',
+    EKEYREJECTED:       'Key was rejected by service',
+    EOWNERDEAD:         'Owner died',
+    ENOTRECOVERABLE:    'State not recoverable',
+    ERFKILL:            'Operation not possible due to RF-kill',
+    EHWPOISON:          'Memory page has hardware error',
+}
+
+def strerror(errnum):
+    """
+    Translate Linux errno to string.
+
+    >>> strerror(ENOKEY)
+    'Required key not available'
+    >>> strerror(41)
+    'Unknown error 41'
+    """
+    ret = _strerror.get(errnum)
+    if ret:
+        return ret
+    return 'Unknown error {0}'.format(errnum)
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/pytest/numalloc.py b/pytest/numalloc.py
new file mode 100644
index 000000000000..4623e88e6c79
--- /dev/null
+++ b/pytest/numalloc.py
@@ -0,0 +1,379 @@
+#! /usr/bin/env python
+
+"""
+Integer number allocator.
+
+Basically, these keep track of a set of allocatable values in
+some range (you provide min and max) and let you allocate out of
+the range and return values into the range.
+
+You may pick a value using "next since last time", or "next
+available after provided value".  Note that next-after will
+wrap around as needed (modular arithmetic style).
+
+The free lists are thread-locked so that this code can be used
+with threads.
+
+    >>> a = NumAlloc(5, 10) # note closed interval: 5..10 inclusive
+    >>> a
+    NumAlloc(5, 10)
+    >>> a.avail
+    [[5, 10]]
+    >>> a.alloc()
+    5
+    >>> a.avail
+    [[6, 10]]
+    >>> a.alloc(8)
+    8
+    >>> a.avail
+    [[6, 7], [9, 10]]
+    >>> a.free(5)
+    >>> a.avail
+    [[5, 7], [9, 10]]
+    >>> a.free(8)
+    >>> a.avail
+    [[5, 10]]
+
+Attempting to free a value that is already free is an error:
+
+    >>> a.free(5)
+    Traceback (most recent call last):
+       ...
+    ValueError: free: 5 already available
+
+You can, however, free a value that is outside the min/max
+range.  You can also free multiple values at once:
+
+    >>> a.free_multi([0, 1, 2, 4])
+    >>> a.avail
+    [[0, 2], [4, 10]]
+    >>> a.free_multi([3, 12])
+    >>> a.avail
+    [[0, 10], [12, 12]]
+
+Note that this changes the min/max values:
+
+    >>> a
+    NumAlloc(0, 12)
+
+To prevent adding values outside the min/max range, create the
+NumArray with autoextend=False, or set .autoextend=False at any
+time:
+
+    >>> a.autoextend = False
+    >>> a
+    NumAlloc(0, 12, autoextend=False)
+    >>> a.free(13)
+    Traceback (most recent call last):
+       ...
+    ValueError: free: 13 is outside range limit
+
+You can create an empty range, which is really only useful once
+you free values into it:
+
+    >>> r = NumAlloc(0, -1)
+    >>> r
+    NumAlloc(0, -1)
+    >>> r.alloc() is None
+    True
+    >>> r.free_multi(range(50))
+    >>> r
+    NumAlloc(0, 49)
+
+Note that r.alloc() starts from where you last left off, even if
+you've freed a value:
+
+    >>> r.alloc()
+    0
+    >>> r.free(0)
+    >>> r.alloc()
+    1
+
+Of course, in multithreaded code you can't really depend on this
+since it will race other threads.  Still, it generally makes for
+efficient allocation.  To force allocation to start from the
+range's minimum, provide the minimum (e.g., r.min_val) as an
+argument to r.alloc():
+
+    >>> r.alloc()
+    2
+    >>> r.alloc(r.min_val)
+    0
+
+Providing a number to alloc() tries to allocate that number,
+but wraps around to the next one if needed:
+
+    >>> r.alloc(49)
+    49
+    >>> r.alloc(49)
+    3
+    >>> r.alloc(99999)
+    4
+    >>> r.avail
+    [[5, 48]]
+
+There is currently no way to find all allocated values, although
+the obvious method (going through r.avail) will work.  Any iterator
+would not be thread-safe.
+"""
+
+import threading
+
+class NumAlloc(object):
+    """
+    Number allocator object.
+    """
+    def __init__(self, min_val, max_val, autoextend=True):
+        self.min_val = min_val
+        self.max_val = max_val
+        if min_val <= max_val:
+            self.avail = [[min_val, max_val]]
+        else:
+            self.avail = []
+        self.autoextend = autoextend
+        self.last = None
+        self.lock = threading.Lock()
+
+    def __repr__(self):
+        myname = self.__class__.__name__
+        if self.autoextend:
+            ae = ''
+        else:
+            ae = ', autoextend=False'
+        return '{0}({1}, {2}{3})'.format(myname, self.min_val, self.max_val, ae)
+
+    def _find_block(self, val):
+        """
+        Find the block that contains val, or that should contain val.
+        Remember that self.avail is a list of avaliable ranges of
+        the form [[min1, max1], [min2, max2], ..., [minN, maxN]]
+        where max1 < min2, max2 < min3, ..., < minN.
+
+        The input value either falls into one of the available
+        blocks, or falls into a gap between two available blocks.
+        We want to know which block it goes in, or if it goes
+        between two, which block it comes before.
+
+        We can do a binary search to find this block.  When we
+        find it, return its index and its values.
+
+        If we find that val is not in a block, return the position
+        where the value should go, were it to be put into a new
+        block by itself.  E.g., suppose val is 17, and there is a
+        block [14,16] and a block [18,20]. We would make this
+        [14,16],[17,17],[18,20] by inserting [17,17] between them.
+        (Afterward, we will want to fuse all three blocks to make
+        [14,18].  However, if we insert as block 0, e.g., if the
+        list starts with [18,20] and we insert to get
+        [17,17][18,20], we really end up just modifying block 0 to
+        [17,20].  Or, if we insert as the new final block, we
+        might end up modifying the last block.)
+        """
+        low = 0
+        high = len(self.avail) - 1
+        while low <= high:
+            mid = low + ((high - low) // 2)
+            pair = self.avail[mid]
+            if val < pair[0]:
+                # must go before block mid
+                high = mid - 1
+            elif val > pair[1]:
+                # must go after block mid
+                low = mid + 1
+            else:
+                # val >= first and val <= last, so we found it
+                return mid, pair
+        # Low > high: no block actually contains val, or
+        # there are no blocks at all.  If there are no blocks,
+        # return block #0 and None.  Otherwise return the
+        return low, None
+
+    def alloc(self, val=None):
+        """
+        Get new available value.
+
+        If val is None, we start from the most recently
+        allocated value, plus 1.
+
+        If val is a numeric value, we start from that value.
+        Hence, since the range is min_val..max_val, you can
+        provide min_val to take the first available value.
+
+        This may return None, if no values are still available.
+        """
+        with self.lock:
+            if val is None:
+                val = self.last + 1 if self.last is not None else self.min_val
+            if val is None or val > self.max_val or val < self.min_val:
+                val = self.min_val
+            i, pair = self._find_block(val)
+            if pair is None:
+                # Value is is not available.  The next
+                # available value that is greater than val
+                # is in the block right after block i.
+                # If there is no block after i, the next
+                # available value is in block 0.  If there
+                # is no block 0, there are no available
+                # values.
+                nblocks = len(self.avail)
+                i += 1
+                if i >= nblocks:
+                    if nblocks == 0:
+                        return None
+                    i = 0
+                pair = self.avail[i]
+                val = pair[0]
+            # Value val is available - take it.
+            #
+            # There are four special cases to handle.
+            #
+            # 1. pair[0] < val < pair[1]: split the pair.
+            # 2. pair[0] == val < pair[1]: increase pair[0].
+            # 3. pair[0] == val == pair[1]: delete the pair
+            # 4. pair[0] < val == pair[1]: decrease pair[1].
+            assert pair[0] <= val <= pair[1]
+            if pair[0] == val:
+                # case 2 or 3: Take the left edge or delete the pair.
+                if val == pair[1]:
+                    del self.avail[i]
+                else:
+                    pair[0] = val + 1
+            else:
+                # case 1 or 4: split the pair or take the right edge.
+                if val == pair[1]:
+                    pair[1] = val - 1
+                else:
+                    newpair = [val + 1, pair[1]]
+                    pair[1] = val - 1
+                    self.avail.insert(i + 1, newpair)
+            self.last = val
+            return val
+
+    def free(self, val):
+        "Free one value"
+        self._free_multi('free', [val])
+
+    def free_multi(self, values):
+        "Free many values (provide any iterable)"
+        values = list(values)
+        values.sort()
+        self._free_multi('free_multi', values)
+
+    def _free_multi(self, how, values):
+        """
+        Free a (sorted) list of values.
+        """
+        if len(values) == 0:
+            return
+        with self.lock:
+            while values:
+                # Take highest value, and any contiguous lower values.
+                # Note that it can be significantly faster this way
+                # since coalesced ranges make for shorter copies.
+                highval = values.pop()
+                val = highval
+                while len(values) and values[-1] == val - 1:
+                    val = values.pop()
+                self._free_range(how, val, highval)
+
+    def _maybe_increase_max(self, how, val):
+        """
+        If needed, widen our range to include new high val -- i.e.,
+        possibly increase self.max_val.  Do nothing if this is not a
+        new all time high; fail if we have autoextend disabled.
+        """
+        if val <= self.max_val:
+            return
+        if self.autoextend:
+            self.max_val = val
+            return
+        raise ValueError('{0}: {1} is outside range limit'.format(how, val))
+
+    def _maybe_decrease_min(self, how, val):
+        """
+        If needed, widen our range to include new low val -- i.e.,
+        possibly decrease self.min_val.  Do nothing if this is not a
+        new all time low; fail if we have autoextend disabled.
+        """
+        if val >= self.min_val:
+            return
+        if self.autoextend:
+            self.min_val = val
+            return
+        raise ValueError('{0}: {1} is outside range limit'.format(how, val))
+
+    def _free_range(self, how, val, highval):
+        """
+        Free the range [val..highval].  Note, val==highval it's just
+        a one-element range.
+
+        The lock is already held.
+        """
+        # Find the place to store the lower value.
+        # We should never find an actual pair here.
+        i, pair = self._find_block(val)
+        if pair:
+            raise ValueError('{0}: {1} already available'.format(how, val))
+        # If we're freeing a range, check that the high val
+        # does not span into the *next* range, either.
+        if highval > val and i < len(self.avail):
+            if self.avail[i][0] <= highval:
+                raise ValueError('{0}: {2} (from {{1}..{2}) already '
+                                 'available'.format(how, val, highval))
+
+        # We'll need to insert a block and perhaps fuse it
+        # with blocks before and/or after.  First, check
+        # whether there *is* a before and/or after, and find
+        # their corresponding edges and whether we abut them.
+        if i > 0:
+            abuts_below = self.avail[i - 1][1] + 1 == val
+        else:
+            abuts_below = False
+        if i < len(self.avail):
+            abuts_above = self.avail[i][0] - 1 == highval
+        else:
+            abuts_above = False
+        # Now there are these four cases:
+        # 1. abuts below and above: fuse the two blocks.
+        # 2. abuts below only: adjust previous (i-1'th) block
+        # 3. abuts above only: adjust next (i'th) block
+        # 4. doesn't abut: insert new block
+        if abuts_below:
+            if abuts_above:
+                # case 1
+                self.avail[i - 1][1] = self.avail[i][1]
+                del self.avail[i]
+            else:
+                # case 2
+                self._maybe_increase_max(how, highval)
+                self.avail[i - 1][1] = highval
+        else:
+            if abuts_above:
+                # case 3
+                self._maybe_decrease_min(how, val)
+                self.avail[i][0] = val
+            else:
+                # case 4
+                self._maybe_decrease_min(how, val)
+                self._maybe_increase_max(how, highval)
+                newblock = [val, highval]
+                self.avail.insert(i, newblock)
+
+if __name__ == '__main__':
+    import doctest
+    import sys
+
+    doctest.testmod()
+    if sys.version_info[0] >= 3:
+        xrange = range
+    # run some worst case tests
+    # NB: coalesce is terribly slow when done bottom up
+    r = NumAlloc(0, 2**16 - 1)
+    for i in xrange(r.min_val, r.max_val, 2):
+        r.alloc(i)
+    print('worst case alloc: len(r.avail) = {0}'.format(len(r.avail)))
+    for i in xrange(r.max_val - 1, r.min_val, -2):
+        r.free(i)
+    print('free again; len(r.avail) should be 1; is {0}'.format(len(r.avail)))
+    if len(r.avail) != 1:
+        sys.exit('failure')
diff --git a/pytest/p9conn.py b/pytest/p9conn.py
new file mode 100644
index 000000000000..459cd1169944
--- /dev/null
+++ b/pytest/p9conn.py
@@ -0,0 +1,1788 @@
+#! /usr/bin/env python
+
+"""
+handle plan9 server <-> client connections
+
+(We can act as either server or client.)
+
+This code needs some doctests or other unit tests...
+"""
+
+import collections
+import errno
+import logging
+import math
+import os
+import socket
+import stat
+import struct
+import sys
+import threading
+import time
+
+import lerrno
+import numalloc
+import p9err
+import pfod
+import protocol
+
+# Timespec based timestamps, if present, have
+# both seconds and nanoseconds.
+Timespec = collections.namedtuple('Timespec', 'sec nsec')
+
+# File attributes from Tgetattr, or given to Tsetattr.
+# (move to protocol.py?)  We use pfod here instead of
+# namedtuple so that we can create instances with all-None
+# fields easily.
+Fileattrs = pfod.pfod('Fileattrs',
+    'ino mode uid gid nlink rdev size blksize blocks '
+    'atime mtime ctime btime gen data_version')
+
+qt2n = protocol.qid_type2name
+
+STD_P9_PORT=564
+
+class P9Error(Exception):
+    pass
+
+class RemoteError(P9Error):
+    """
+    Used when the remote returns an error.  We track the client
+    (connection instance), the operation being attempted, the
+    message, and an error number and type.  The message may be
+    from the Rerror reply, or from converting the errno in a dot-L
+    or dot-u Rerror reply.  The error number may be None if the
+    type is 'Rerror' rather than 'Rlerror'.  The message may be
+    None or empty string if a non-None errno supplies the error
+    instead.
+    """
+    def __init__(self, client, op, msg, etype, errno):
+        self.client = str(client)
+        self.op = op
+        self.msg = msg
+        self.etype = etype # 'Rerror' or 'Rlerror'
+        self.errno = errno # may be None
+        self.message = self._get_message()
+        super(RemoteError, self).__init__(self, self.message)
+
+    def __repr__(self):
+        return ('{0!r}({1}, {2}, {3}, {4}, '
+                '{5})'.format(self.__class__.__name__, self.client, self.op,
+                              self.msg, self.errno, self.etype))
+    def __str__(self):
+        prefix = '{0}: {1}: '.format(self.client, self.op)
+        if self.errno: # check for "is not None", or just non-false-y?
+            name = {'Rerror': '.u', 'Rlerror': 'Linux'}[self.etype]
+            middle = '[{0} error {1}] '.format(name, self.errno)
+        else:
+            middle = ''
+        return '{0}{1}{2}'.format(prefix, middle, self.message)
+
+    def is_ENOTSUP(self):
+        if self.etype == 'Rlerror':
+            return self.errno == lerrno.EOPNOTSUPP
+        return self.errno == errno.EOPNOTSUPP
+
+    def _get_message(self):
+        "get message based on self.msg or self.errno"
+        if self.errno is not None:
+            return {
+                'Rlerror': p9err.dotl_strerror,
+                'Rerror' : p9err.dotu_strerror,
+            }[self.etype](self.errno)
+        return self.msg
+
+class LocalError(P9Error):
+    pass
+
+class TEError(LocalError):
+    pass
+
+class P9SockIO(object):
+    """
+    Common base for server and client, handle send and
+    receive to communications channel.  Note that this
+    need not set up the channel initially, only the logger.
+    The channel is typically connected later.  However, you
+    can provide one initially.
+    """
+    def __init__(self, logger, name=None, server=None, port=STD_P9_PORT):
+        self.logger = logger
+        self.channel = None
+        self.name = name
+        self.maxio = None
+        self.size_coder = struct.Struct('<I')
+        if server is not None:
+            self.connect(server, port)
+        self.max_payload = 2**32 - self.size_coder.size
+
+    def __str__(self):
+        if self.name:
+            return self.name
+        return repr(self)
+
+    def get_recommended_maxio(self):
+        "suggest a max I/O size, for when self.maxio is 0 / unset"
+        return 16 * 4096
+
+    def min_maxio(self):
+        "return a minimum size below which we refuse to work"
+        return self.size_coder.size + 100
+
+    def connect(self, server, port=STD_P9_PORT):
+        """
+        Connect to given server name / IP address.
+
+        If self.name was none, sets self.name to ip:port on success.
+        """
+        if self.is_connected():
+            raise LocalError('already connected')
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
+        sock.connect((server, port))
+        if self.name is None:
+            if port == STD_P9_PORT:
+                name = server
+            else:
+                name = '{0}:{1}'.format(server, port)
+        else:
+            name = None
+        self.declare_connected(sock, name, None)
+
+    def is_connected(self):
+        "predicate: are we connected?"
+        return self.channel != None
+
+    def declare_connected(self, chan, name, maxio):
+        """
+        Now available for normal protocol (size-prefixed) I/O.
+        
+        Replaces chan and name and adjusts maxio, if those
+        parameters are not None.
+        """
+        if maxio:
+            minio = self.min_maxio()
+            if maxio < minio:
+                raise LocalError('maxio={0} < minimum {1}'.format(maxio, minio))
+        if chan is not None:
+            self.channel = chan
+        if name is not None:
+            self.name = name
+        if maxio is not None:
+            self.maxio = maxio
+            self.max_payload = maxio - self.size_coder.size
+
+    def reduce_maxio(self, maxio):
+        "Reduce maximum I/O size per other-side request"
+        minio = self.min_maxio()
+        if maxio < minio:
+            raise LocalError('new maxio={0} < minimum {1}'.format(maxio, minio))
+        if maxio > self.maxio:
+            raise LocalError('new maxio={0} > current {1}'.format(maxio,
+                                                                  self.maxio))
+        self.maxio = maxio
+        self.max_payload = maxio - self.size_coder.size
+
+    def declare_disconnected(self):
+        "Declare comm channel dead (note: leaves self.name set!)"
+        self.channel = None
+        self.maxio = None
+
+    def shutwrite(self):
+        "Do a SHUT_WR on the outbound channel - can't send more"
+        chan = self.channel
+        # we're racing other threads here
+        try:
+            chan.shutdown(socket.SHUT_WR)
+        except (OSError, AttributeError):
+            pass
+
+    def shutdown(self):
+        "Shut down comm channel"
+        if self.channel:
+            try:
+                self.channel.shutdown(socket.SHUT_RDWR)
+            except socket.error:
+                pass
+            self.channel.close()
+            self.declare_disconnected()
+
+    def read(self):
+        """
+        Try to read a complete packet.
+
+        Returns '' for EOF, as read() usually does.
+
+        If we can't even get the size, this still returns ''.
+        If we get a sensible size but are missing some data,
+        we can return a short packet.  Since we know if we did
+        this, we also return a boolean: True means "really got a
+        complete packet."
+
+        Note that '' EOF always returns False: EOF is never a
+        complete packet.
+        """
+        if self.channel is None:
+            return b'', False
+        size_field = self.xread(self.size_coder.size)
+        if len(size_field) < self.size_coder.size:
+            if len(size_field) == 0:
+                self.logger.log(logging.INFO, '%s: normal EOF', self)
+            else:
+                self.logger.log(logging.ERROR,
+                               '%s: EOF while reading size (got %d bytes)',
+                               self, len(size_field))
+                # should we raise an error here?
+            return b'', False
+
+        size = self.size_coder.unpack(size_field)[0] - self.size_coder.size
+        if size <= 0 or size > self.max_payload:
+            self.logger.log(logging.ERROR,
+                            '%s: incoming size %d is insane '
+                            '(max payload is %d)',
+                            self, size, self.max_payload)
+            # indicate EOF - should we raise an error instead, here?
+            return b'', False
+        data = self.xread(size)
+        return data, len(data) == size
+
+    def xread(self, nbytes):
+        """
+        Read nbytes bytes, looping if necessary.  Return '' for
+        EOF; may return a short count if we get some data, then
+        EOF.
+        """
+        assert nbytes > 0
+        # Try to get everything at once (should usually succeed).
+        # Return immediately for EOF or got-all-data.
+        data = self.channel.recv(nbytes)
+        if data == b'' or len(data) == nbytes:
+            return data
+
+        # Gather data fragments into an array, then join it all at
+        # the end.
+        count = len(data)
+        data = [data]
+        while count < nbytes:
+            more = self.channel.recv(nbytes - count)
+            if more == b'':
+                break
+            count += len(more)
+            data.append(more)
+        return b''.join(data)
+
+    def write(self, data):
+        """
+        Write all the data, in the usual encoding.  Note that
+        the length of the data, including the length of the length
+        itself, is already encoded in the first 4 bytes of the
+        data.
+
+        Raises IOError if we can't write everything.
+
+        Raises LocalError if len(data) exceeds max_payload.
+        """
+        size = len(data)
+        assert size >= 4
+        if size > self.max_payload:
+            raise LocalError('data length {0} exceeds '
+                             'maximum {1}'.format(size, self.max_payload))
+        self.channel.sendall(data)
+
+def _pathcat(prefix, suffix):
+    """
+    Concatenate paths we are using on the server side.  This is
+    basically just prefix + / + suffix, with two complications:
+
+    It's possible we don't have a prefix path, in which case
+    we want the suffix without a leading slash.
+
+    It's possible that the prefix is just b'/', in which case we
+    want prefix + suffix.
+    """
+    if prefix:
+        if prefix == b'/':  # or prefix.endswith(b'/')?
+            return prefix + suffix
+        return prefix + b'/' + suffix
+    return suffix
+
+class P9Client(P9SockIO):
+    """
+    Act as client.
+
+    We need the a logger (see logging), a timeout, and a protocol
+    version to request.  By default, we will downgrade to a lower
+    version if asked.
+
+    If server and port are supplied, they are remembered and become
+    the default for .connect() (which is still deferred).
+
+    Note that we keep a table of fid-to-path in self.live_fids,
+    but at any time (except while holding the lock) a fid can
+    be deleted entirely, and the table entry may just be True
+    if we have no path name.  In general, we update the name
+    when we can.
+    """
+    def __init__(self, logger, timeout, version, may_downgrade=True,
+                 server=None, port=None):
+        super(P9Client, self).__init__(logger)
+        self.timeout = timeout
+        self.iproto = protocol.p9_version(version)
+        self.may_downgrade = may_downgrade
+        self.tagalloc = numalloc.NumAlloc(0, 65534)
+        self.tagstate = {}
+        # The next bit is slighlty dirty: perhaps we should just
+        # allocate NOFID out of the 2**32-1 range, so as to avoid
+        # "knowing" that it's 2**32-1.
+        self.fidalloc = numalloc.NumAlloc(0, protocol.td.NOFID - 1)
+        self.live_fids = {}
+        self.rootfid = None
+        self.rootqid = None
+        self.rthread = None
+        self.lock = threading.Lock()
+        self.new_replies = threading.Condition(self.lock)
+        self._monkeywrench = {}
+        self._server = server
+        self._port = port
+        self._unsup = {}
+
+    def get_monkey(self, what):
+        "check for a monkey-wrench"
+        with self.lock:
+            wrench = self._monkeywrench.get(what)
+            if wrench is None:
+                return None
+            if isinstance(wrench, list):
+                # repeats wrench[0] times, or forever if that's 0
+                ret = wrench[1]
+                if wrench[0] > 0:
+                    wrench[0] -= 1
+                    if wrench[0] == 0:
+                        del self._monkeywrench[what]
+            else:
+                ret = wrench
+                del self._monkeywrench[what]
+        return ret
+
+    def set_monkey(self, what, how, repeat=None):
+        """
+        Set a monkey-wrench.  If repeat is not None it is the number of
+        times the wrench is applied (0 means forever, or until you call
+        set again with how=None).  What is what to monkey-wrench, which
+        depends on the op.  How is generally a replacement value.
+        """
+        if how is None:
+            with self.lock:
+                try:
+                    del self._monkeywrench[what]
+                except KeyError:
+                    pass
+            return
+        if repeat is not None:
+            how = [repeat, how]
+        with self.lock:
+            self._monkeywrench[what] = how
+
+    def get_tag(self, for_Tversion=False):
+        "get next available tag ID"
+        with self.lock:
+            if for_Tversion:
+                tag = 65535
+            else:
+                tag = self.tagalloc.alloc()
+            if tag is None:
+                raise LocalError('all tags in use')
+            self.tagstate[tag] = True # ie, in use, still waiting
+        return tag
+
+    def set_tag(self, tag, reply):
+        "set the reply info for the given tag"
+        assert tag >= 0 and tag < 65536
+        with self.lock:
+            # check whether we're still waiting for the tag
+            state = self.tagstate.get(tag)
+            if state is True:
+                self.tagstate[tag] = reply # i.e., here's the answer
+                self.new_replies.notify_all()
+                return
+            # state must be one of these...
+            if state is False:
+                # We gave up on this tag.  Reply came anyway.
+                self.logger.log(logging.INFO,
+                                '%s: got tag %d = %r after timing out on it',
+                                self, tag, reply)
+                self.retire_tag_locked(tag)
+                return
+            if state is None:
+                # We got a tag back from the server that was not
+                # outstanding!
+                self.logger.log(logging.WARNING,
+                                '%s: got tag %d = %r when tag %d not in use!',
+                                self, tag, reply, tag)
+                return
+            # We got a second reply before handling the first reply!
+            self.logger.log(logging.WARNING,
+                            '%s: got tag %d = %r when tag %d = %r!',
+                            self, tag, reply, tag, state)
+            return
+
+    def retire_tag(self, tag):
+        "retire the given tag - only used by the thread that handled the result"
+        if tag == 65535:
+            return
+        assert tag >= 0 and tag < 65535
+        with self.lock:
+            self.retire_tag_locked(tag)
+
+    def retire_tag_locked(self, tag):
+        "retire the given tag while holding self.lock"
+        # must check "in tagstate" because we can race
+        # with retire_all_tags.
+        if tag in self.tagstate:
+            del self.tagstate[tag]
+            self.tagalloc.free(tag)
+
+    def retire_all_tags(self):
+        "retire all tags, after connection drop"
+        with self.lock:
+            # release all tags in any state (waiting, answered, timedout)
+            self.tagalloc.free_multi(self.tagstate.keys())
+            self.tagstate = {}
+            self.new_replies.notify_all()
+
+    def alloc_fid(self):
+        "allocate new fid"
+        with self.lock:
+            fid = self.fidalloc.alloc()
+            self.live_fids[fid] = True
+        return fid
+
+    def getpath(self, fid):
+        "get path from fid, or return None if no path known, or not valid"
+        with self.lock:
+            path = self.live_fids.get(fid)
+        if path is True:
+            path = None
+        return path
+
+    def getpathX(self, fid):
+        """
+        Much like getpath, but return <fid N, unknown path> if necessary.
+        If we do have a path, return its repr().
+        """
+        path = self.getpath(fid)
+        if path is None:
+            return '<fid {0}, unknown path>'.format(fid)
+        return repr(path)
+
+    def setpath(self, fid, path):
+        "associate fid with new path (possibly from another fid)"
+        with self.lock:
+            if isinstance(path, int):
+                path = self.live_fids.get(path)
+            # path might now be None (not a live fid after all), or
+            # True (we have no path name), or potentially even the
+            # empty string (invalid for our purposes).  Treat all of
+            # those as True, meaning "no known path".
+            if not path:
+                path = True
+            if self.live_fids.get(fid):
+                # Existing fid maps to either True or its old path.
+                # Set the new path (which may be just a placeholder).
+                self.live_fids[fid] = path
+
+    def did_rename(self, fid, ncomp, newdir=None):
+        """
+        Announce that we renamed using a fid - we'll try to update
+        other fids based on this (we can't really do it perfectly).
+
+        NOTE: caller must provide a final-component.
+        The caller can supply the new path (and should
+        do so if the rename is not based on the retained path
+        for the supplied fid, i.e., for rename ops where fid
+        can move across directories).  The rules:
+
+         - If newdir is None (default), we use stored path.
+         - Otherwise, newdir provides the best approximation
+           we have to the path that needs ncomp appended.
+
+        (This is based on the fact that renames happen via Twstat
+        or Trename, or Trenameat, which change just one tail component,
+        but the path names vary.)
+        """
+        if ncomp is None:
+            return
+        opath = self.getpath(fid)
+        if newdir is None:
+            if opath is None:
+                return
+            ocomps = opath.split(b'/')
+            ncomps = ocomps[0:-1]
+        else:
+            ocomps = None           # well, none yet anyway
+            ncomps = newdir.split(b'/')
+        ncomps.append(ncomp)
+        if opath is None or opath[0] != '/':
+            # We don't have enough information to fix anything else.
+            # Just store the new path and return.  We have at least
+            # a partial path now, which is no worse than before.
+            npath = b'/'.join(ncomps)
+            with self.lock:
+                if fid in self.live_fids:
+                    self.live_fids[fid] = npath
+            return
+        if ocomps is None:
+            ocomps = opath.split(b'/')
+        olen = len(ocomps)
+        ofinal = ocomps[olen - 1]
+        # Old paths is full path.  Find any other fids that start
+        # with some or all the components in ocomps.  Note that if
+        # we renamed /one/two/three to /four/five this winds up
+        # renaming files /one/a to /four/a, /one/two/b to /four/five/b,
+        # and so on.
+        with self.lock:
+            for fid2, path2 in self.live_fids.iteritems():
+                # Skip fids without byte-string paths
+                if not isinstance(path2, bytes):
+                    continue
+                # Before splitting (which is a bit expensive), try
+                # a straightforward prefix match.  This might give
+                # some false hits, e.g., prefix /one/two/threepenny
+                # starts with /one/two/three, but it quickly eliminates
+                # /raz/baz/mataz and the like.
+                if not path2.startswith(opath):
+                    continue
+                # Split up the path, and use that to make sure that
+                # the final component is a full match.
+                parts2 = path2.split(b'/')
+                if parts2[olen - 1] != ofinal:
+                    continue
+                # OK, path2 starts with the old (renamed) sequence.
+                # Replace the old components with the new ones.
+                # This updates the renamed fid when we come across
+                # it!  It also handles a change in the number of
+                # components, thanks to Python's slice assignment.
+                parts2[0:olen] = ncomps
+                self.live_fids[fid2] = b'/'.join(parts2)
+
+    def retire_fid(self, fid):
+        "retire one fid"
+        with self.lock:
+            self.fidalloc.free(fid)
+            del self.live_fids[fid]
+
+    def retire_all_fids(self):
+        "return live fids to pool"
+        # this is useful for debugging fid leaks:
+        #for fid in self.live_fids:
+        #    print 'retiring', fid, self.getpathX(fid)
+        with self.lock:
+            self.fidalloc.free_multi(self.live_fids.keys())
+            self.live_fids = {}
+
+    def read_responses(self):
+        "Read responses.  This gets spun off as a thread."
+        while self.is_connected():
+            pkt, is_full = super(P9Client, self).read()
+            if pkt == b'':
+                self.shutwrite()
+                self.retire_all_tags()
+                return
+            if not is_full:
+                self.logger.log(logging.WARNING, '%s: got short packet', self)
+            try:
+                # We have one special case: if we're not yet connected
+                # with a version, we must unpack *as if* it's a plain
+                # 9P2000 response.
+                if self.have_version:
+                    resp = self.proto.unpack(pkt)
+                else:
+                    resp = protocol.plain.unpack(pkt)
+            except protocol.SequenceError as err:
+                self.logger.log(logging.ERROR, '%s: bad response: %s',
+                                self, err)
+                try:
+                    resp = self.proto.unpack(pkt, noerror=True)
+                except protocol.SequenceError:
+                    header = self.proto.unpack_header(pkt, noerror=True)
+                    self.logger.log(logging.ERROR,
+                                    '%s: (not even raw-decodable)', self)
+                    self.logger.log(logging.ERROR,
+                                    '%s: header decode produced %r',
+                                    self, header)
+                else:
+                    self.logger.log(logging.ERROR,
+                                    '%s: raw decode produced %r',
+                                    self, resp)
+                # after this kind of problem, probably need to
+                # shut down, but let's leave that out for a bit
+            else:
+                # NB: all protocol responses have a "tag",
+                # so resp['tag'] always exists.
+                self.logger.log(logging.DEBUG, "read_resp: tag %d resp %r", resp.tag, resp)
+                self.set_tag(resp.tag, resp)
+
+    def wait_for(self, tag):
+        """
+        Wait for a response to the given tag.  Return the response,
+        releasing the tag.  If self.timeout is not None, wait at most
+        that long (and release the tag even if there's no reply), else
+        wait forever.
+
+        If this returns None, either the tag was bad initially, or
+        a timeout occurred, or the connection got shut down.
+        """
+        self.logger.log(logging.DEBUG, "wait_for: tag %d", tag)
+        if self.timeout is None:
+            deadline = None
+        else:
+            deadline = time.time() + self.timeout
+        with self.lock:
+            while True:
+                # tagstate is True (waiting) or False (timedout) or
+                # a valid response, or None if we've reset the tag
+                # states (retire_all_tags, after connection drop).
+                resp = self.tagstate.get(tag, None)
+                if resp is None:
+                    # out of sync, exit loop
+                    break
+                if resp is True:
+                    # still waiting for a response - wait some more
+                    self.new_replies.wait(self.timeout)
+                    if deadline and time.time() > deadline:
+                        # Halt the waiting, but go around once more.
+                        # Note we may have killed the tag by now though.
+                        if tag in self.tagstate:
+                            self.tagstate[tag] = False
+                    continue
+                # resp is either False (timeout) or a reply.
+                # If resp is False, change it to None; the tag
+                # is now dead until we get a reply (then we
+                # just toss the reply).
+                # Otherwise, we're done with the tag: free it.
+                # In either case, stop now.
+                if resp is False:
+                    resp = None
+                else:
+                    self.tagalloc.free(tag)
+                    del self.tagstate[tag]
+                break
+        return resp
+
+    def badresp(self, req, resp):
+        """
+        Complain that a response was not something expected.
+        """
+        if resp is None:
+            self.shutdown()
+            raise TEError('{0}: {1}: timeout or EOF'.format(self, req))
+        if isinstance(resp, protocol.rrd.Rlerror):
+            raise RemoteError(self, req, None, 'Rlerror', resp.ecode)
+        if isinstance(resp, protocol.rrd.Rerror):
+            if resp.errnum is None:
+                raise RemoteError(self, req, resp.errstr, 'Rerror', None)
+            raise RemoteError(self, req, None, 'Rerror', resp.errnum)
+        raise LocalError('{0}: {1} got response {2!r}'.format(self, req, resp))
+
+    def supports(self, req_code):
+        """
+        Test self.proto.support(req_code) unless we've recorded that
+        while the protocol supports it, the client does not.
+        """
+        return req_code not in self._unsup and self.proto.supports(req_code)
+
+    def supports_all(self, *req_codes):
+        "basically just all(supports(...))"
+        return all(self.supports(code) for code in req_codes)
+
+    def unsupported(self, req_code):
+        """
+        Record an ENOTSUP (RemoteError was ENOTSUP) for a request.
+        Must be called from the op, this does not happen automatically.
+        (It's just an optimization.)
+        """
+        self._unsup[req_code] = True
+
+    def connect(self, server=None, port=None):
+        """
+        Connect to given server/port pair.
+
+        The server and port are remembered.  If given as None,
+        the last remembered values are used.  The initial
+        remembered values are from the creation of this client
+        instance.
+
+        New values are only remembered here on a *successful*
+        connect, however.
+        """
+        if server is None:
+            server = self._server
+            if server is None:
+                raise LocalError('connect: no server specified and no default')
+        if port is None:
+            port = self._port
+            if port is None:
+                port = STD_P9_PORT
+        self.name = None            # wipe out previous name, if any
+        super(P9Client, self).connect(server, port)
+        maxio = self.get_recommended_maxio()
+        self.declare_connected(None, None, maxio)
+        self.proto = self.iproto    # revert to initial protocol
+        self.have_version = False
+        self.rthread = threading.Thread(target=self.read_responses)
+        self.rthread.start()
+        tag = self.get_tag(for_Tversion=True)
+        req = protocol.rrd.Tversion(tag=tag, msize=maxio,
+                                    version=self.get_monkey('version'))
+        super(P9Client, self).write(self.proto.pack_from(req))
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rversion):
+            self.shutdown()
+            if isinstance(resp, protocol.rrd.Rerror):
+                version = req.version or self.proto.get_version()
+                # for python3, we need to convert version to string
+                if not isinstance(version, str):
+                    version = version.decode('utf-8', 'surrogateescape')
+                raise RemoteError(self, 'version ' + version,
+                                  resp.errstr, 'Rerror', None)
+            self.badresp('version', resp)
+        their_maxio = resp.msize
+        try:
+            self.reduce_maxio(their_maxio)
+        except LocalError as err:
+            raise LocalError('{0}: sent maxio={1}, they tried {2}: '
+                             '{3}'.format(self, maxio, their_maxio,
+                                          err.args[0]))
+        if resp.version != self.proto.get_version():
+            if not self.may_downgrade:
+                self.shutdown()
+                raise LocalError('{0}: they only support '
+                                 'version {1!r}'.format(self, resp.version))
+            # raises LocalError if the version is bad
+            # (should we wrap it with a connect-to-{0} msg?)
+            self.proto = self.proto.downgrade_to(resp.version)
+        self._server = server
+        self._port = port
+        self.have_version = True
+
+    def attach(self, afid, uname, aname, n_uname):
+        """
+        Attach.
+
+        Currently we don't know how to do authentication,
+        but we'll pass any provided afid through.
+        """
+        if afid is None:
+            afid = protocol.td.NOFID
+        if uname is None:
+            uname = ''
+        if aname is None:
+            aname = ''
+        if n_uname is None:
+            n_uname = protocol.td.NONUNAME
+        tag = self.get_tag()
+        fid = self.alloc_fid()
+        pkt = self.proto.Tattach(tag=tag, fid=fid, afid=afid,
+                                 uname=uname, aname=aname,
+                                 n_uname=n_uname)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rattach):
+            self.retire_fid(fid)
+            self.badresp('attach', resp)
+        # probably should check resp.qid
+        self.rootfid = fid
+        self.rootqid = resp.qid
+        self.setpath(fid, b'/')
+
+    def shutdown(self):
+        "disconnect from server"
+        if self.rootfid is not None:
+            self.clunk(self.rootfid, ignore_error=True)
+        self.retire_all_tags()
+        self.retire_all_fids()
+        self.rootfid = None
+        self.rootqid = None
+        super(P9Client, self).shutdown()
+        if self.rthread:
+            self.rthread.join()
+            self.rthread = None
+
+    def dupfid(self, fid):
+        """
+        Copy existing fid to a new fid.
+        """
+        tag = self.get_tag()
+        newfid = self.alloc_fid()
+        pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid, nwname=0,
+                               wname=[])
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwalk):
+            self.retire_fid(newfid)
+            self.badresp('walk {0}'.format(self.getpathX(fid)), resp)
+        # Copy path too
+        self.setpath(newfid, fid)
+        return newfid
+
+    def lookup(self, fid, components):
+        """
+        Do Twalk.  Caller must provide a starting fid, which should
+        be rootfid to look up from '/' - we do not do / vs . here.
+        Caller must also provide a component-ized path (on purpose,
+        so that caller can provide invalid components like '' or '/').
+        The components must be byte-strings as well, for the same
+        reason.
+
+        We do allocate the new fid ourselves here, though.
+
+        There's no logic here to split up long walks (yet?).
+        """
+        # these are too easy to screw up, so check
+        if self.rootfid is None:
+            raise LocalError('{0}: not attached'.format(self))
+        if (isinstance(components, (str, bytes) or
+            not all(isinstance(i, bytes) for i in components))):
+            raise LocalError('{0}: lookup: invalid '
+                             'components {1!r}'.format(self, components))
+        tag = self.get_tag()
+        newfid = self.alloc_fid()
+        startpath = self.getpath(fid)
+        pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid,
+                               nwname=len(components), wname=components)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwalk):
+            self.retire_fid(newfid)
+            self.badresp('walk {0} in '
+                         '{1}'.format(components, self.getpathX(fid)),
+                         resp)
+        # Just because we got Rwalk does not mean we got ALL the
+        # way down the path.  Raise OSError(ENOENT) if we're short.
+        if resp.nwqid > len(components):
+            # ??? this should be impossible. Local error?  Remote error?
+            # OS Error?
+            self.clunk(newfid, ignore_error=True)
+            raise LocalError('{0}: walk {1} in {2} returned {3} '
+                             'items'.format(self, components,
+                                            self.getpathX(fid), resp.nwqid))
+        if resp.nwqid < len(components):
+            self.clunk(newfid, ignore_error=True)
+            # Looking up a/b/c and got just a/b, c is what's missing.
+            # Looking up a/b/c and got just a, b is what's missing.
+            missing = components[resp.nwqid]
+            within = _pathcat(startpath, b'/'.join(components[:resp.nwqid]))
+            raise OSError(errno.ENOENT,
+                          '{0}: {1} in {2}'.format(os.strerror(errno.ENOENT),
+                                                   missing, within))
+        self.setpath(newfid, _pathcat(startpath, b'/'.join(components)))
+        return newfid, resp.wqid
+
+    def lookup_last(self, fid, components):
+        """
+        Like lookup, but return only the last component's qid.
+        As a special case, if components is an empty list, we
+        handle that.
+        """
+        rfid, wqid = self.lookup(fid, components)
+        if len(wqid):
+            return rfid, wqid[-1]
+        if fid == self.rootfid:         # usually true, if we get here at all
+            return rfid, self.rootqid
+        tag = self.get_tag()
+        pkt = self.proto.Tstat(tag=tag, fid=rfid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rstat):
+            self.badresp('stat {0}'.format(self.getpathX(fid)), resp)
+        statval = self.proto.unpack_wirestat(resp.data)
+        return rfid, statval.qid
+
+    def clunk(self, fid, ignore_error=False):
+        "issue clunk(fid)"
+        tag = self.get_tag()
+        pkt = self.proto.Tclunk(tag=tag, fid=fid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rclunk):
+            if ignore_error:
+                return
+            self.badresp('clunk {0}'.format(self.getpathX(fid)), resp)
+        self.retire_fid(fid)
+
+    def remove(self, fid, ignore_error=False):
+        "issue remove (old style), which also clunks fid"
+        tag = self.get_tag()
+        pkt = self.proto.Tremove(tag=tag, fid=fid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rremove):
+            if ignore_error:
+                # remove failed: still need to clunk the fid
+                self.clunk(fid, True)
+                return
+            self.badresp('remove {0}'.format(self.getpathX(fid)), resp)
+        self.retire_fid(fid)
+
+    def create(self, fid, name, perm, mode, filetype=None, extension=b''):
+        """
+        Issue create op (note that this may be mkdir, symlink, etc).
+        fid is the directory in which the create happens, and for
+        regular files, it becomes, on success, a fid referring to
+        the now-open file.  perm is, e.g., 0644, 0755, etc.,
+        optionally with additional high bits.  mode is a mode
+        byte (e.g., protocol.td.ORDWR, or OWRONLY|OTRUNC, etc.).
+
+        As a service to callers, we take two optional arguments
+        specifying the file type ('dir', 'symlink', 'device',
+        'fifo', or 'socket') and additional info if needed.
+        The additional info for a symlink is the target of the
+        link (a byte string), and the additional info for a device
+        is a byte string with "b <major> <minor>" or "c <major> <minor>".
+
+        Otherwise, callers can leave filetype=None and encode the bits
+        into the mode (caller must still provide extension if needed).
+
+        We do NOT check whether the extension matches extra DM bits,
+        or that there's only one DM bit set, or whatever, since this
+        is a testing setup.
+        """
+        tag = self.get_tag()
+        if filetype is not None:
+            perm |= {
+                'dir': protocol.td.DMDIR,
+                'symlink': protocol.td.DMSYMLINK,
+                'device': protocol.td.DMDEVICE,
+                'fifo': protocol.td.DMNAMEDPIPE,
+                'socket': protocol.td.DMSOCKET,
+            }[filetype]
+        pkt = self.proto.Tcreate(tag=tag, fid=fid, name=name,
+            perm=perm, mode=mode, extension=extension)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rcreate):
+            self.badresp('create {0} in {1}'.format(name, self.getpathX(fid)),
+                         resp)
+        if resp.qid.type == protocol.td.QTFILE:
+            # Creating a regular file opens the file,
+            # thus changing the fid's path.
+            self.setpath(fid, _pathcat(self.getpath(fid), name))
+        return resp.qid, resp.iounit
+
+    def open(self, fid, mode):
+        "use Topen to open file or directory fid (mode is 1 byte)"
+        tag = self.get_tag()
+        pkt = self.proto.Topen(tag=tag, fid=fid, mode=mode)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Ropen):
+            self.badresp('open {0}'.format(self.getpathX(fid)), resp)
+        return resp.qid, resp.iounit
+
+    def lopen(self, fid, flags):
+        "use Tlopen to open file or directory fid (flags from L_O_*)"
+        tag = self.get_tag()
+        pkt = self.proto.Tlopen(tag=tag, fid=fid, flags=flags)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rlopen):
+            self.badresp('lopen {0}'.format(self.getpathX(fid)), resp)
+        return resp.qid, resp.iounit
+
+    def read(self, fid, offset, count):
+        "read (up to) count bytes from offset, given open fid"
+        tag = self.get_tag()
+        pkt = self.proto.Tread(tag=tag, fid=fid, offset=offset, count=count)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rread):
+            self.badresp('read {0} bytes at offset {1} in '
+                         '{2}'.format(count, offset, self.getpathX(fid)),
+                         resp)
+        return resp.data
+
+    def write(self, fid, offset, data):
+        "write (up to) count bytes to offset, given open fid"
+        tag = self.get_tag()
+        pkt = self.proto.Twrite(tag=tag, fid=fid, offset=offset,
+                                count=len(data), data=data)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwrite):
+            self.badresp('write {0} bytes at offset {1} in '
+                         '{2}'.format(len(data), offset, self.getpathX(fid)),
+                         resp)
+        return resp.count
+
+    # Caller may
+    #  - pass an actual stat object, or
+    #  - pass in all the individual to-set items by keyword, or
+    #  - mix and match a bit: get an existing stat, then use
+    #    keywords to override fields.
+    # We convert "None"s to the internal "do not change" values,
+    # and for diagnostic purposes, can turn "do not change" back
+    # to None at the end, too.
+    def wstat(self, fid, statobj=None, **kwargs):
+        if statobj is None:
+            statobj = protocol.td.stat()
+        else:
+            statobj = statobj._copy()
+        # Fields in stat that you can't send as a wstat: the
+        # type and qid are informative.  Similarly, the
+        # 'extension' is an input when creating a file but
+        # read-only when stat-ing.
+        #
+        # It's not clear what it means to set dev, but we'll leave
+        # it in as an optional parameter here.  fs/backend.c just
+        # errors out on an attempt to change it.
+        if self.proto == protocol.plain:
+            forbid = ('type', 'qid', 'extension',
+                      'n_uid', 'n_gid', 'n_muid')
+        else:
+            forbid = ('type', 'qid', 'extension')
+        nochange = {
+            'type': 0,
+            'qid': protocol.td.qid(0, 0, 0),
+            'dev': 2**32 - 1,
+            'mode': 2**32 - 1,
+            'atime': 2**32 - 1,
+            'mtime': 2**32 - 1,
+            'length': 2**64 - 1,
+            'name': b'',
+            'uid': b'',
+            'gid': b'',
+            'muid': b'',
+            'extension': b'',
+            'n_uid': 2**32 - 1,
+            'n_gid': 2**32 - 1,
+            'n_muid': 2**32 - 1,
+        }
+        for field in statobj._fields:
+            if field in kwargs:
+                if field in forbid:
+                    raise ValueError('cannot wstat a stat.{0}'.format(field))
+                statobj[field] = kwargs.pop(field)
+            else:
+                if field in forbid or statobj[field] is None:
+                    statobj[field] = nochange[field]
+        if kwargs:
+            raise TypeError('wstat() got an unexpected keyword argument '
+                            '{0!r}'.format(kwargs.popitem()))
+
+        data = self.proto.pack_wirestat(statobj)
+        tag = self.get_tag()
+        pkt = self.proto.Twstat(tag=tag, fid=fid, data=data)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwstat):
+            # For error viewing, switch all the do-not-change
+            # and can't-change fields to None.
+            statobj.qid = None
+            for field in statobj._fields:
+                if field in forbid:
+                    statobj[field] = None
+                elif field in nochange and statobj[field] == nochange[field]:
+                    statobj[field] = None
+            self.badresp('wstat {0}={1}'.format(self.getpathX(fid), statobj),
+                         resp)
+        # wstat worked - change path names if needed
+        if statobj.name != b'':
+            self.did_rename(fid, statobj.name)
+
+    def readdir(self, fid, offset, count):
+        "read (up to) count bytes of dir data from offset, given open fid"
+        tag = self.get_tag()
+        pkt = self.proto.Treaddir(tag=tag, fid=fid, offset=offset, count=count)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rreaddir):
+            self.badresp('readdir {0} bytes at offset {1} in '
+                         '{2}'.format(count, offset, self.getpathX(fid)),
+                         resp)
+        return resp.data
+
+    def rename(self, fid, dfid, name):
+        "invoke Trename: rename file <fid> to <dfid>/name"
+        tag = self.get_tag()
+        pkt = self.proto.Trename(tag=tag, fid=fid, dfid=dfid, name=name)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rrename):
+            self.badresp('rename {0} to {2} in '
+                         '{1}'.format(self.getpathX(fid),
+                                      self.getpathX(dfid), name),
+                         resp)
+        self.did_rename(fid, name, self.getpath(dfid))
+
+    def renameat(self, olddirfid, oldname, newdirfid, newname):
+        "invoke Trenameat: rename <olddirfid>/oldname to <newdirfid>/newname"
+        tag = self.get_tag()
+        pkt = self.proto.Trenameat(tag=tag,
+                                   olddirfid=olddirfid, oldname=oldname,
+                                   newdirfid=newdirfid, newname=newname)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rrenameat):
+            self.badresp('rename {1} in {0} to {3} in '
+                         '{2}'.format(oldname, self.getpathX(olddirfid),
+                                      newname, self.getpathX(newdirdfid)),
+                         resp)
+        # There's no renamed *fid*, just a renamed file!  So no
+        # call to self.did_rename().
+
+    def unlinkat(self, dirfd, name, flags):
+        "invoke Tunlinkat - flags should be 0 or protocol.td.AT_REMOVEDIR"
+        tag = self.get_tag()
+        pkt = self.proto.Tunlinkat(tag=tag, dirfd=dirfd,
+                                   name=name, flags=flags)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Runlinkat):
+            self.badresp('unlinkat {0} in '
+                         '{1}'.format(name, self.getpathX(dirfd)), resp)
+
+    def decode_stat_objects(self, bstring, noerror=False):
+        """
+        Read on a directory returns an array of stat objects.
+        Note that for .u these encode extra data.
+
+        It's possible for this to produce a SequenceError, if
+        the data are incorrect, unless you pass noerror=True.
+        """
+        objlist = []
+        offset = 0
+        while offset < len(bstring):
+            obj, offset = self.proto.unpack_wirestat(bstring, offset, noerror)
+            objlist.append(obj)
+        return objlist
+
+    def decode_readdir_dirents(self, bstring, noerror=False):
+        """
+        Readdir on a directory returns an array of dirent objects.
+
+        It's possible for this to produce a SequenceError, if
+        the data are incorrect, unless you pass noerror=True.
+        """
+        objlist = []
+        offset = 0
+        while offset < len(bstring):
+            obj, offset = self.proto.unpack_dirent(bstring, offset, noerror)
+            objlist.append(obj)
+        return objlist
+
+    def lcreate(self, fid, name, lflags, mode, gid):
+        "issue lcreate (.L)"
+        tag = self.get_tag()
+        pkt = self.proto.Tlcreate(tag=tag, fid=fid, name=name,
+                                  flags=lflags, mode=mode, gid=gid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rlcreate):
+            self.badresp('create {0} in '
+                         '{1}'.format(name, self.getpathX(fid)), resp)
+        # Creating a file opens the file,
+        # thus changing the fid's path.
+        self.setpath(fid, _pathcat(self.getpath(fid), name))
+        return resp.qid, resp.iounit
+
+    def mkdir(self, dfid, name, mode, gid):
+        "issue mkdir (.L)"
+        tag = self.get_tag()
+        pkt = self.proto.Tmkdir(tag=tag, dfid=dfid, name=name,
+                                mode=mode, gid=gid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rmkdir):
+            self.badresp('mkdir {0} in '
+                         '{1}'.format(name, self.getpathX(dfid)), resp)
+        return resp.qid
+
+    # We don't call this getattr(), for the obvious reason.
+    def Tgetattr(self, fid, request_mask=protocol.td.GETATTR_ALL):
+        "issue Tgetattr.L - get what you ask for, or everything by default"
+        tag = self.get_tag()
+        pkt = self.proto.Tgetattr(tag=tag, fid=fid, request_mask=request_mask)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rgetattr):
+            self.badresp('Tgetattr {0} of '
+                         '{1}'.format(request_mask, self.getpathX(fid)), resp)
+        attrs = Fileattrs()
+        # Handle the simplest valid-bit tests:
+        for name in ('mode', 'nlink', 'uid', 'gid', 'rdev',
+                     'size', 'blocks', 'gen', 'data_version'):
+            bit = getattr(protocol.td, 'GETATTR_' + name.upper())
+            if resp.valid & bit:
+                attrs[name] = resp[name]
+        # Handle the timestamps, which are timespec pairs
+        for name in ('atime', 'mtime', 'ctime', 'btime'):
+            bit = getattr(protocol.td, 'GETATTR_' + name.upper())
+            if resp.valid & bit:
+                attrs[name] = Timespec(sec=resp[name + '_sec'],
+                                       nsec=resp[name + '_nsec'])
+        # There is no control bit for blksize; qemu and Linux always
+        # provide one.
+        attrs.blksize = resp.blksize
+        # Handle ino, which comes out of qid.path
+        if resp.valid & protocol.td.GETATTR_INO:
+            attrs.ino = resp.qid.path
+        return attrs
+
+    # We don't call this setattr(), for the obvious reason.
+    # See wstat for usage.  Note that time fields can be set
+    # with either second or nanosecond resolutions, and some
+    # can be set without supplying an actual timestamp, so
+    # this is all pretty ad-hoc.
+    #
+    # There's also one keyword-only argument, ctime=<anything>,
+    # which means "set SETATTR_CTIME".  This has the same effect
+    # as supplying valid=protocol.td.SETATTR_CTIME.
+    def Tsetattr(self, fid, valid=0, attrs=None, **kwargs):
+        if attrs is None:
+            attrs = Fileattrs()
+        else:
+            attrs = attrs._copy()
+
+        # Start with an empty (all-zero) Tsetattr instance.  We
+        # don't really need to zero out tag and fid, but it doesn't
+        # hurt.  Note that if caller says, e.g., valid=SETATTR_SIZE
+        # but does not supply an incoming size (via "attrs" or a size=
+        # argument), we'll ask to set that field to 0.
+        attrobj = protocol.rrd.Tsetattr()
+        for field in attrobj._fields:
+            attrobj[field] = 0
+
+        # In this case, forbid means "only as kwargs": these values
+        # in an incoming attrs object are merely ignored.
+        forbid = ('ino', 'nlink', 'rdev', 'blksize', 'blocks', 'btime',
+                  'gen', 'data_version')
+        for field in attrs._fields:
+            if field in kwargs:
+                if field in forbid:
+                    raise ValueError('cannot Tsetattr {0}'.format(field))
+                attrs[field] = kwargs.pop(field)
+            elif attrs[field] is None:
+                continue
+            # OK, we're setting this attribute.  Many are just
+            # numeric - if that's the case, we're good, set the
+            # field and the appropriate bit.
+            bitname = 'SETATTR_' + field.upper()
+            bit = getattr(protocol.td, bitname)
+            if field in ('mode', 'uid', 'gid', 'size'):
+                valid |= bit
+                attrobj[field] = attrs[field]
+                continue
+            # Timestamps are special:  The value may be given as
+            # an integer (seconds), or as a float (we convert to
+            # (we convert to sec+nsec), or as a timespec (sec+nsec).
+            # If specified as 0, we mean "we are not providing the
+            # actual time, use the server's time."
+            #
+            # The ctime field's value, if any, is *ignored*.
+            if field in ('atime', 'mtime'):
+                value = attrs[field]
+                if hasattr(value, '__len__'):
+                    if len(value) != 2:
+                        raise ValueError('invalid {0}={1!r}'.format(field,
+                                                                    value))
+                    sec = value[0]
+                    nsec = value[1]
+                else:
+                    sec = value
+                    if isinstance(sec, float):
+                        nsec, sec = math.modf(sec)
+                        nsec = int(round(nsec * 1000000000))
+                    else:
+                        nsec = 0
+                valid |= bit
+                attrobj[field + '_sec'] = sec
+                attrobj[field + '_nsec'] = nsec
+                if sec != 0 or nsec != 0:
+                    # Add SETATTR_ATIME_SET or SETATTR_MTIME_SET
+                    # as appropriate, to tell the server to *this
+                    # specific* time, instead of just "server now".
+                    bit = getattr(protocol.td, bitname + '_SET')
+                    valid |= bit
+        if 'ctime' in kwargs:
+            kwargs.pop('ctime')
+            valid |= protocol.td.SETATTR_CTIME
+        if kwargs:
+            raise TypeError('Tsetattr() got an unexpected keyword argument '
+                            '{0!r}'.format(kwargs.popitem()))
+
+        tag = self.get_tag()
+        attrobj.valid = valid
+        attrobj.tag = tag
+        attrobj.fid = fid
+        pkt = self.proto.pack(attrobj)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rsetattr):
+            self.badresp('Tsetattr {0} {1} of '
+                         '{2}'.format(valid, attrs, self.getpathX(fid)), resp)
+
+    def xattrwalk(self, fid, name=None):
+        "walk one name or all names: caller should read() the returned fid"
+        tag = self.get_tag()
+        newfid = self.alloc_fid()
+        pkt = self.proto.Txattrwalk(tag=tag, fid=fid, newfid=newfid,
+                                    name=name or '')
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rxattrwalk):
+            self.retire_fid(newfid)
+            self.badresp('Txattrwalk {0} of '
+                         '{1}'.format(name, self.getpathX(fid)), resp)
+        if name:
+            self.setpath(newfid, 'xattr:' + name)
+        else:
+            self.setpath(newfid, 'xattr')
+        return newfid, resp.size
+
+    def _pathsplit(self, path, startdir, allow_empty=False):
+        "common code for uxlookup and uxopen"
+        if self.rootfid is None:
+            raise LocalError('{0}: not attached'.format(self))
+        if path.startswith(b'/') or startdir is None:
+            startdir = self.rootfid
+        components = [i for i in path.split(b'/') if i != b'']
+        if len(components) == 0 and not allow_empty:
+            raise LocalError('{0}: {1!r}: empty path'.format(self, path))
+        return components, startdir
+
+    def uxlookup(self, path, startdir=None):
+        """
+        Unix-style lookup.  That is, lookup('/foo/bar') or
+        lookup('foo/bar').  If startdir is not None and the
+        path does not start with '/' we look up from there.
+        """
+        components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+        return self.lookup_last(startdir, components)
+
+    def uxopen(self, path, oflags=0, perm=None, gid=None,
+               startdir=None, filetype=None):
+        """
+        Unix-style open()-with-option-to-create, or mkdir().
+        oflags is 0/1/2 with optional os.O_CREAT, perm defaults
+        to 0o666 (files) or 0o777 (directories).  If we use
+        a Linux create or mkdir op, we will need a gid, but it's
+        not required if you are opening an existing file.
+
+        Adds a final boolean value for "did we actually create".
+        Raises OSError if you ask for a directory but it's a file,
+        or vice versa.  (??? reconsider this later)
+
+        Note that this does not handle other file types, only
+        directories.
+        """
+        needtype = {
+            'dir': protocol.td.QTDIR,
+            None: protocol.td.QTFILE,
+        }[filetype]
+        omode_byte = oflags & 3 # cheating
+        # allow looking up /, but not creating /
+        allow_empty = (oflags & os.O_CREAT) == 0
+        components, startdir = self._pathsplit(path, startdir,
+                                               allow_empty=allow_empty)
+        if not (oflags & os.O_CREAT):
+            # Not creating, i.e., just look up and open existing file/dir.
+            fid, qid = self.lookup_last(startdir, components)
+            # If we got this far, use Topen on the fid; we did not
+            # create the file.
+            return self._uxopen2(path, needtype, fid, qid, omode_byte, False)
+
+        # Only used if using dot-L, but make sure it's always provided
+        # since this is generic.
+        if gid is None:
+            raise ValueError('gid is required when creating file or dir')
+
+        if len(components) > 1:
+            # Look up all but last component; this part must succeed.
+            fid, _ = self.lookup(startdir, components[:-1])
+
+            # Now proceed with the final component, using fid
+            # as the start dir.  Remember to clunk it!
+            startdir = fid
+            clunk_startdir = True
+            components = components[-1:]
+        else:
+            # Use startdir as the start dir, and get a new fid.
+            # Do not clunk startdir!
+            clunk_startdir = False
+            fid = self.alloc_fid()
+
+        # Now look up the (single) component.  If this fails,
+        # assume the file or directory needs to be created.
+        tag = self.get_tag()
+        pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid,
+                               nwname=1, wname=components)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if isinstance(resp, protocol.rrd.Rwalk):
+            if clunk_startdir:
+                self.clunk(startdir, ignore_error=True)
+            # fid successfully walked to refer to final component.
+            # Just need to actually open the file.
+            self.setpath(fid, _pathcat(self.getpath(startdir), components[0]))
+            qid = resp.wqid[0]
+            return self._uxopen2(needtype, fid, qid, omode_byte, False)
+
+        # Walk failed.  If we allocated a fid, retire it.  Then set
+        # up a fid that points to the parent directory in which to
+        # create the file or directory.  Note that if we're creating
+        # a file, this fid will get changed so that it points to the
+        # file instead of the directory, but if we're creating a
+        # directory, it will be unchanged.
+        if fid != startdir:
+            self.retire_fid(fid)
+        fid = self.dupfid(startdir)
+
+        try:
+            qid, iounit = self._uxcreate(filetype, fid, components[0],
+                                         oflags, omode_byte, perm, gid)
+
+            # Success.  If we created an ordinary file, we have everything
+            # now as create alters the incoming (dir) fid to open the file.
+            # Otherwise (mkdir), we need to open the file, as with
+            # a successful lookup.
+            #
+            # Note that qid type should match "needtype".
+            if filetype != 'dir':
+                if qid.type == needtype:
+                    return fid, qid, iounit, True
+                self.clunk(fid, ignore_error=True)
+                raise OSError(_wrong_file_type(qid),
+                             '{0}: server told to create {1} but '
+                             'created {2} instead'.format(path,
+                                                          qt2n(needtype),
+                                                          qt2n(qid.type)))
+
+            # Success: created dir; but now need to walk to and open it.
+            fid = self.alloc_fid()
+            tag = self.get_tag()
+            pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid,
+                                   nwname=1, wname=components)
+            super(P9Client, self).write(pkt)
+            resp = self.wait_for(tag)
+            if not isinstance(resp, protocol.rrd.Rwalk):
+                self.clunk(fid, ignore_error=True)
+                raise OSError(errno.ENOENT,
+                              '{0}: server made dir but then failed to '
+                              'find it again'.format(path))
+                self.setpath(fid, _pathcat(self.getpath(fid), components[0]))
+            return self._uxopen2(needtype, fid, qid, omode_byte, True)
+        finally:
+            # Regardless of success/failure/exception, make sure
+            # we clunk startdir if needed.
+            if clunk_startdir:
+                self.clunk(startdir, ignore_error=True)
+
+    def _uxcreate(self, filetype, fid, name, oflags, omode_byte, perm, gid):
+        """
+        Helper for creating dir-or-file.  The fid argument is the
+        parent directory on input, but will point to the file (if
+        we're creating a file) on return.  oflags only applies if
+        we're creating a file (even then we use omode_byte if we
+        are using the plan9 create op).
+        """
+        # Try to create or mkdir as appropriate.
+        if self.supports_all(protocol.td.Tlcreate, protocol.td.Tmkdir):
+            # Use Linux style create / mkdir.
+            if filetype == 'dir':
+                if perm is None:
+                    perm = 0o777
+                return self.mkdir(startdir, name, perm, gid), None
+            if perm is None:
+                perm = 0o666
+            lflags = flags_to_linux_flags(oflags)
+            return self.lcreate(fid, name, lflags, perm, gid)
+
+        if filetype == 'dir':
+            if perm is None:
+                perm = protocol.td.DMDIR | 0o777
+            else:
+                perm |= protocol.td.DMDIR
+        else:
+            if perm is None:
+                perm = 0o666
+        return self.create(fid, name, perm, omode_byte)
+
+    def _uxopen2(self, needtype, fid, qid, omode_byte, didcreate):
+        "common code for finishing up uxopen"
+        if qid.type != needtype:
+            self.clunk(fid, ignore_error=True)
+            raise OSError(_wrong_file_type(qid),
+                          '{0}: is {1}, expected '
+                          '{2}'.format(path, qt2n(qid.type), qt2n(needtype)))
+        qid, iounit = self.open(fid, omode_byte)
+        # ? should we re-check qid? it should not have changed
+        return fid, qid, iounit, didcreate
+
+    def uxmkdir(self, path, perm, gid, startdir=None):
+        """
+        Unix-style mkdir.
+
+        The gid is only applied if we are using .L style mkdir.
+        """
+        components, startdir = self._pathsplit(path, startdir)
+        clunkme = None
+        if len(components) > 1:
+            fid, _ = self.lookup(startdir, components[:-1])
+            startdir = fid
+            clunkme = fid
+            components = components[-1:]
+        try:
+            if self.supports(protocol.td.Tmkdir):
+                qid = self.mkdir(startdir, components[0], perm, gid)
+            else:
+                qid, _ = self.create(startdir, components[0],
+                                     protocol.td.DMDIR | perm,
+                                     protocol.td.OREAD)
+                # Should we chown/chgrp the dir?
+        finally:
+            if clunkme:
+                self.clunk(clunkme, ignore_error=True)
+        return qid
+
+    def uxreaddir(self, path, startdir=None, no_dotl=False):
+        """
+        Read a directory to get a list of names (which may or may not
+        include '.' and '..').
+
+        If no_dotl is True (or anything non-false-y), this uses the
+        plain or .u readdir format, otherwise it uses dot-L readdir
+        if possible.
+        """
+        components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+        fid, qid = self.lookup_last(startdir, components)
+        try:
+            if qid.type != protocol.td.QTDIR:
+                raise OSError(errno.ENOTDIR,
+                              '{0}: {1}'.format(self.getpathX(fid),
+                                                os.strerror(errno.ENOTDIR)))
+            # We need both Tlopen and Treaddir to use Treaddir.
+            if not self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir):
+                no_dotl = True
+            if no_dotl:
+                statvals = self.uxreaddir_stat_fid(fid)
+                return [i.name for i in statvals]
+
+            dirents = self.uxreaddir_dotl_fid(fid)
+            return [dirent.name for dirent in dirents]
+        finally:
+            self.clunk(fid, ignore_error=True)
+
+    def uxreaddir_stat(self, path, startdir=None):
+        """
+        Use directory read to get plan9 style stat data (plain or .u readdir).
+
+        Note that this gets a fid, then opens it, reads, then clunks
+        the fid.  If you already have a fid, you may want to use
+        uxreaddir_stat_fid (but note that this opens, yet does not
+        clunk, the fid).
+
+        We return the qid plus the list of the contents.  If the
+        target is not a directory, the qid will not have type QTDIR
+        and the contents list will be empty.
+
+        Raises OSError if this is applied to a non-directory.
+        """
+        components, startdir = self._pathsplit(path, startdir)
+        fid, qid = self.lookup_last(startdir, components)
+        try:
+            if qid.type != protocol.td.QTDIR:
+                raise OSError(errno.ENOTDIR,
+                              '{0}: {1}'.format(self.getpathX(fid),
+                                                os.strerror(errno.ENOTDIR)))
+            statvals = self.ux_readdir_stat_fid(fid)
+            return qid, statvals
+        finally:
+            self.clunk(fid, ignore_error=True)
+
+    def uxreaddir_stat_fid(self, fid):
+        """
+        Implement readdir loop that extracts stat values.
+        This opens, but does not clunk, the given fid.
+
+        Unlike uxreaddir_stat(), if this is applied to a file,
+        rather than a directory, it just returns no entries.
+        """
+        statvals = []
+        qid, iounit = self.open(fid, protocol.td.OREAD)
+        # ?? is a zero iounit allowed? if so, what do we use here?
+        if qid.type == protocol.td.QTDIR:
+            if iounit <= 0:
+                iounit = 512 # probably good enough
+            offset = 0
+            while True:
+                bstring = self.read(fid, offset, iounit)
+                if bstring == b'':
+                    break
+                statvals.extend(self.decode_stat_objects(bstring))
+                offset += len(bstring)
+        return statvals
+
+    def uxreaddir_dotl_fid(self, fid):
+        """
+        Implement readdir loop that uses dot-L style dirents.
+        This opens, but does not clunk, the given fid.
+
+        If applied to a file, the lopen should fail, because of the
+        L_O_DIRECTORY flag.
+        """
+        dirents = []
+        qid, iounit = self.lopen(fid, protocol.td.OREAD |
+                                      protocol.td.L_O_DIRECTORY)
+        # ?? is a zero iounit allowed? if so, what do we use here?
+        # but, we want a minimum of over 256 anyway, let's go for 512
+        if iounit < 512:
+            iounit = 512
+        offset = 0
+        while True:
+            bstring = self.readdir(fid, offset, iounit)
+            if bstring == b'':
+                break
+            ents = self.decode_readdir_dirents(bstring)
+            if len(ents) == 0:
+                break               # ???
+            dirents.extend(ents)
+            offset = ents[-1].offset
+        return dirents
+
+    def uxremove(self, path, startdir=None, filetype=None,
+                 force=False, recurse=False):
+        """
+        Implement rm / rmdir, with optional -rf.
+        if filetype is None, remove dir or file.  If 'dir' or 'file'
+        remove only if it's one of those.  If force is set, ignore
+        failures to remove.  If recurse is True, remove contents of
+        directories (recursively).
+
+        File type mismatches (when filetype!=None) raise OSError (?).
+        """
+        components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+        # Look up all components. If
+        # we get an error we'll just assume the file does not
+        # exist (is this good?).
+        try:
+            fid, qid = self.lookup_last(startdir, components)
+        except RemoteError:
+            return
+        if qid.type == protocol.td.QTDIR:
+            # it's a directory, remove only if allowed.
+            # Note that we must check for "rm -r /" (len(components)==0).
+            if filetype == 'file':
+                self.clunk(fid, ignore_error=True)
+                raise OSError(_wrong_file_type(qid),
+                              '{0}: is dir, expected file'.format(path))
+            isroot = len(components) == 0
+            closer = self.clunk if isroot else self.remove
+            if recurse:
+                # NB: _rm_recursive does not clunk fid
+                self._rm_recursive(fid, filetype, force)
+            # This will fail if the directory is non-empty, unless of
+            # course we tell it to ignore error.
+            closer(fid, ignore_error=force)
+            return
+        # Not a directory, call it a file (even if socket or fifo etc).
+        if filetype == 'dir':
+            self.clunk(fid, ignore_error=True)
+            raise OSError(_wrong_file_type(qid),
+                          '{0}: is file, expected dir'.format(path))
+        self.remove(fid, ignore_error=force)
+
+    def _rm_file_by_dfid(self, dfid, name, force=False):
+        """
+        Remove a file whose name is <name> (no path, just a component
+        name) whose parent directory is <dfid>.  We may assume that the
+        file really is a file (or a socket, or fifo, or some such, but
+        definitely not a directory).
+
+        If force is set, ignore failures.
+        """
+        # If we have unlinkat, that's the fast way.  But it may
+        # return an ENOTSUP error.  If it does we shouldn't bother
+        # doing this again.
+        if self.supports(protocol.td.Tunlinkat):
+            try:
+                self.unlinkat(dfid, name, 0)
+                return
+            except RemoteError as err:
+                if not err.is_ENOTSUP():
+                    raise
+                self.unsupported(protocol.td.Tunlinkat)
+                # fall through to remove() op
+        # Fall back to lookup + remove.
+        try:
+            fid, qid = self.lookup_last(dfid, [name])
+        except RemoteError:
+            # If this has an errno we could tell ENOENT from EPERM,
+            # and actually raise an error for the latter.  Should we?
+            return
+        self.remove(fid, ignore_error=force)
+
+    def _rm_recursive(self, dfid, filetype, force):
+        """
+        Recursively remove a directory.  filetype is probably None,
+        but if it's 'dir' we fail if the directory contains non-dir
+        files.
+
+        If force is set, ignore failures.
+
+        Although we open dfid (via the readdir.*_fid calls) we
+        do not clunk it here; that's the caller's job.
+        """
+        # first, remove contents
+        if self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir):
+            for entry in self.uxreaddir_dotl_fid(dfid):
+                if entry.name in (b'.', b'..'):
+                    continue
+                fid, qid = self.lookup(dfid, [entry.name])
+                try:
+                    attrs = self.Tgetattr(fid, protocol.td.GETATTR_MODE)
+                    if stat.S_ISDIR(attrs.mode):
+                        self.uxremove(entry.name, dfid, filetype, force, True)
+                    else:
+                        self.remove(fid)
+                        fid = None
+                finally:
+                    if fid is not None:
+                        self.clunk(fid, ignore_error=True)
+        else:
+            for statobj in self.uxreaddir_stat_fid(dfid):
+                # skip . and ..
+                name = statobj.name
+                if name in (b'.', b'..'):
+                    continue
+                if statobj.qid.type == protocol.td.QTDIR:
+                    self.uxremove(name, dfid, filetype, force, True)
+                else:
+                    self._rm_file_by_dfid(dfid, name, force)
+
+def _wrong_file_type(qid):
+    "return EISDIR or ENOTDIR for passing to OSError"
+    if qid.type == protocol.td.QTDIR:
+        return errno.EISDIR
+    return errno.ENOTDIR
+
+def flags_to_linux_flags(flags):
+    """
+    Convert OS flags (O_CREAT etc) to Linux flags (protocol.td.L_O_CREAT etc).
+    """
+    flagmap = {
+        os.O_CREAT: protocol.td.L_O_CREAT,
+        os.O_EXCL: protocol.td.L_O_EXCL,
+        os.O_NOCTTY: protocol.td.L_O_NOCTTY,
+        os.O_TRUNC: protocol.td.L_O_TRUNC,
+        os.O_APPEND: protocol.td.L_O_APPEND,
+        os.O_DIRECTORY: protocol.td.L_O_DIRECTORY,
+    }
+
+    result = flags & os.O_RDWR
+    flags &= ~os.O_RDWR
+    for key, value in flagmap.iteritems():
+        if flags & key:
+            result |= value
+            flags &= ~key
+    if flags:
+        raise ValueError('untranslated bits 0x{0:x} in os flags'.format(flags))
+    return result
diff --git a/pytest/p9err.py b/pytest/p9err.py
new file mode 100644
index 000000000000..407278b2a404
--- /dev/null
+++ b/pytest/p9err.py
@@ -0,0 +1,146 @@
+#! /usr/bin/env python
+
+"""
+Error number definitions for 9P2000, .u, and .L.
+
+Note that there is no native-to-9P2000 (plain) translation
+table since 9P2000 takes error *strings* rather than error
+*numbers*.
+"""
+
+import errno as _errno
+import lerrno as _lerrno
+import os as _os
+
+_native_to_dotu = {
+    # These are in the "standard" range(1, errno.ERANGE)
+    # but do not map to themselves, so map them here first.
+    _errno.ENOTEMPTY: _errno.EPERM,
+    _errno.EDQUOT: _errno.EPERM,
+    _errno.ENOSYS: _errno.EPERM,
+}
+
+_native_to_dotl = {}
+
+# Add standard errno's.
+for _i in range(1, _errno.ERANGE):
+    _native_to_dotu.setdefault(_i, _i)
+    _native_to_dotl[_i] = _i
+
+# Add linux errno's.  Note that Linux EAGAIN at #11 overrides BSD EDEADLK,
+# but Linux has EDEADLK at #35 which overrides BSD EAGAIN, so it all
+# works out.
+#
+# We just list every BSD error name here, since the hasattr()s do
+# the real work.
+for _i in (
+    'EDEADLK',
+    'EAGAIN',
+    'EINPROGRESS',
+    'EALREADY',
+    'ENOTSOCK',
+    'EDESTADDRREQ',
+    'EMSGSIZE',
+    'EPROTOTYPE',
+    'ENOPROTOOPT',
+    'EPROTONOSUPPORT',
+    'ESOCKTNOSUPPORT',
+    'EOPNOTSUPP',
+    'EPFNOSUPPORT',
+    'EAFNOSUPPORT',
+    'EADDRINUSE',
+    'EADDRNOTAVAIL',
+    'ENETDOWN',
+    'ENETUNREACH',
+    'ENETRESET',
+    'ECONNABORTED',
+    'ECONNRESET',
+    'ENOBUFS',
+    'EISCONN',
+    'ENOTCONN',
+    'ESHUTDOWN',
+    'ETOOMANYREFS',
+    'ETIMEDOUT',
+    'ECONNREFUSED',
+    'ELOOP',
+    'ENAMETOOLONG',
+    'EHOSTDOWN',
+    'EHOSTUNREACH',
+    'ENOTEMPTY',
+    'EPROCLIM',
+    'EUSERS',
+    'EDQUOT',
+    'ESTALE',
+    'EREMOTE',
+    'EBADRPC',
+    'ERPCMISMATCH',
+    'EPROGUNAVAIL',
+    'EPROGMISMATCH',
+    'EPROCUNAVAIL',
+    'ENOLCK',
+    'ENOSYS',
+    'EFTYPE',
+    'EAUTH',
+    'ENEEDAUTH',
+    'EIDRM',
+    'ENOMSG',
+    'EOVERFLOW',
+    'ECANCELED',
+    'EILSEQ',
+    'EDOOFUS',
+    'EBADMSG',
+    'EMULTIHOP',
+    'ENOLINK',
+    'EPROTO',
+    'ENOTCAPABLE',
+    'ECAPMODE',
+    'ENOTRECOVERABLE',
+    'EOWNERDEAD',
+):
+    if hasattr(_errno, _i) and hasattr(_lerrno, _i):
+        _native_to_dotl[getattr(_errno, _i)] = getattr(_lerrno, _i)
+del _i
+
+def to_dotu(errnum):
+    """
+    Translate native errno to 9P2000.u errno.
+
+    >>> import errno
+    >>> to_dotu(errno.EIO)
+    5
+    >>> to_dotu(errno.EDQUOT)
+    1
+    >>> to_dotu(errno.ELOOP)
+    5
+
+    There is a corresponding dotu_strerror() (which is really
+    just os.strerror):
+
+    >>> dotu_strerror(5)
+    'Input/output error'
+
+    """
+    return _native_to_dotu.get(errnum, _errno.EIO) # default to EIO
+
+def to_dotl(errnum):
+    """
+    Translate native errno to 9P2000.L errno.
+
+    >>> import errno
+    >>> to_dotl(errno.ELOOP)
+    40
+
+    There is a corresponding dotl_strerror():
+
+    >>> dotl_strerror(40)
+    'Too many levels of symbolic links'
+    """
+    return _native_to_dotl.get(errnum, _lerrno.ENOTRECOVERABLE)
+
+dotu_strerror = _os.strerror
+
+dotl_strerror = _lerrno.strerror
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/pytest/pfod.py b/pytest/pfod.py
new file mode 100644
index 000000000000..6167354e88cc
--- /dev/null
+++ b/pytest/pfod.py
@@ -0,0 +1,204 @@
+#! /usr/bin/env python
+
+from __future__ import print_function
+
+__all__ = ['pfod', 'OrderedDict']
+
+### shameless stealing from namedtuple here
+
+"""
+pfod - prefilled OrderedDict
+
+This is basically a hybrid of a class and an OrderedDict,
+or, sort of a data-only class.  When an instance of the
+class is created, all its fields are set to None if not
+initialized.
+
+Because it is an OrderedDict you can add extra fields to an
+instance, and they will be in inst.keys().  Because it
+behaves in a class-like way, if the keys are 'foo' and 'bar'
+you can write print(inst.foo) or inst.bar = 3.  Setting an
+attribute that does not currently exist causes a new key
+to be added to the instance.
+"""
+
+import sys as _sys
+from keyword import iskeyword as _iskeyword
+from collections import OrderedDict
+from collections import deque as _deque
+
+_class_template = '''\
+class {typename}(OrderedDict):
+    '{typename}({arg_list})'
+    __slots__ = ()
+
+    _fields = {field_names!r}
+
+    def __init__(self, *args, **kwargs):
+        'Create new instance of {typename}()'
+        super({typename}, self).__init__()
+        args = _deque(args)
+        for field in self._fields:
+            if field in kwargs:
+                self[field] = kwargs.pop(field)
+            elif len(args) > 0:
+                self[field] = args.popleft()
+            else:
+                self[field] = None
+        if len(kwargs):
+            raise TypeError('unexpected kwargs %s' % kwargs.keys())
+        if len(args):
+            raise TypeError('unconsumed args %r' % tuple(args))
+
+    def _copy(self):
+        'copy to new instance'
+        new = {typename}()
+        new.update(self)
+        return new
+
+    def __getattr__(self, attr):
+        if attr in self:
+            return self[attr]
+        raise AttributeError('%r object has no attribute %r' %
+            (self.__class__.__name__, attr))
+
+    def __setattr__(self, attr, val):
+        if attr.startswith('_OrderedDict_'):
+            super({typename}, self).__setattr__(attr, val)
+        else:
+            self[attr] = val
+
+    def __repr__(self):
+        'Return a nicely formatted representation string'
+        return '{typename}({repr_fmt})'.format(**self)
+'''
+
+_repr_template = '{name}={{{name}!r}}'
+
+# Workaround for py2k exec-as-statement, vs py3k exec-as-function.
+# Since the syntax differs, we have to exec the definition of _exec!
+if _sys.version_info[0] < 3:
+    # py2k: need a real function.  (There is a way to deal with
+    # this without a function if the py2k is new enough, but this
+    # works in more cases.)
+    exec("""def _exec(string, gdict, ldict):
+        "Python 2: exec string in gdict, ldict"
+        exec string in gdict, ldict""")
+else:
+    # py3k: just make an alias for builtin function exec
+    exec("_exec = exec")
+
+def pfod(typename, field_names, verbose=False, rename=False):
+    """
+    Return a new subclass of OrderedDict with named fields.
+
+    Fields are accessible by name.  Note that this means
+    that to copy a PFOD you must use _copy() - field names
+    may not start with '_' unless they are all numeric.
+
+    When creating an instance of the new class, fields
+    that are not initialized are set to None.
+
+    >>> Point = pfod('Point', ['x', 'y'])
+    >>> Point.__doc__                   # docstring for the new class
+    'Point(x, y)'
+    >>> p = Point(11, y=22)             # instantiate with positional args or keywords
+    >>> p
+    Point(x=11, y=22)
+    >>> p['x'] + p['y']                 # indexable
+    33
+    >>> p.x + p.y                       # fields also accessable by name
+    33
+    >>> p._copy()
+    Point(x=11, y=22)
+    >>> p2 = Point()
+    >>> p2.extra = 2
+    >>> p2
+    Point(x=None, y=None)
+    >>> p2.extra
+    2
+    >>> p2['extra']
+    2
+    """
+
+    # Validate the field names.  At the user's option, either generate an error
+    if _sys.version_info[0] >= 3:
+        string_type = str
+    else:
+        string_type = basestring
+    # message or automatically replace the field name with a valid name.
+    if isinstance(field_names, string_type):
+        field_names = field_names.replace(',', ' ').split()
+    field_names = list(map(str, field_names))
+    typename = str(typename)
+    if rename:
+        seen = set()
+        for index, name in enumerate(field_names):
+            if (not all(c.isalnum() or c=='_' for c in name)
+                or _iskeyword(name)
+                or not name
+                or name[0].isdigit()
+                or name.startswith('_')
+                or name in seen):
+                field_names[index] = '_%d' % index
+            seen.add(name)
+    for name in [typename] + field_names:
+        if type(name) != str:
+            raise TypeError('Type names and field names must be strings')
+        if not all(c.isalnum() or c=='_' for c in name):
+            raise ValueError('Type names and field names can only contain '
+                             'alphanumeric characters and underscores: %r' % name)
+        if _iskeyword(name):
+            raise ValueError('Type names and field names cannot be a '
+                             'keyword: %r' % name)
+        if name[0].isdigit():
+            raise ValueError('Type names and field names cannot start with '
+                             'a number: %r' % name)
+    seen = set()
+    for name in field_names:
+        if name.startswith('_OrderedDict_'):
+            raise ValueError('Field names cannot start with _OrderedDict_: '
+                             '%r' % name)
+        if name.startswith('_') and not rename:
+            raise ValueError('Field names cannot start with an underscore: '
+                             '%r' % name)
+        if name in seen:
+            raise ValueError('Encountered duplicate field name: %r' % name)
+        seen.add(name)
+
+    # Fill-in the class template
+    class_definition = _class_template.format(
+        typename = typename,
+        field_names = tuple(field_names),
+        arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
+        repr_fmt = ', '.join(_repr_template.format(name=name)
+                             for name in field_names),
+    )
+    if verbose:
+        print(class_definition,
+            file=verbose if isinstance(verbose, file) else _sys.stdout)
+
+    # Execute the template string in a temporary namespace and support
+    # tracing utilities by setting a value for frame.f_globals['__name__']
+    namespace = dict(__name__='PFOD%s' % typename,
+                     OrderedDict=OrderedDict, _deque=_deque)
+    try:
+        _exec(class_definition, namespace, namespace)
+    except SyntaxError as e:
+        raise SyntaxError(e.message + ':\n' + class_definition)
+    result = namespace[typename]
+
+    # For pickling to work, the __module__ variable needs to be set to the frame
+    # where the named tuple is created.  Bypass this step in environments where
+    # sys._getframe is not defined (Jython for example) or sys._getframe is not
+    # defined for arguments greater than 0 (IronPython).
+    try:
+        result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
+    except (AttributeError, ValueError):
+        pass
+
+    return result
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/pytest/protocol.py b/pytest/protocol.py
new file mode 100644
index 000000000000..6b0cd9ad0327
--- /dev/null
+++ b/pytest/protocol.py
@@ -0,0 +1,1998 @@
+#! /usr/bin/env python
+
+"""
+Protocol definitions for python based lib9p server/client.
+
+The sub-namespace td has type definitions (qid, stat) and values
+that are "#define" constants in C code (e.g., DMDIR, QTFILE, etc).
+This also contains the byte values for protocol codes like Tversion,
+Rversion, Rerror, and so on.
+
+    >>> td.Tversion
+    100
+    >>> td.Rlerror
+    7
+
+The qid and stat types are PFOD classes and generate instances that
+are a cross between namedtuple and OrderedDictionary (see pfod.py
+for details):
+
+    >>> td.qid(type=td.QTFILE, path=2, version=1)
+    qid(type=0, version=1, path=2)
+
+The td.stat() type output is pretty long, since it has all the
+dotu-specific members (used only when packing for dotu/dotl and
+set only when unpacking those), so here's just one field:
+
+    >>> td.stat(*(15 * [0])).mode
+    0
+    >>> import pprint; pprint.pprint(td.stat()._fields)
+    ('type',
+     'dev',
+     'qid',
+     'mode',
+     'atime',
+     'mtime',
+     'length',
+     'name',
+     'uid',
+     'gid',
+     'muid',
+     'extension',
+     'n_uid',
+     'n_gid',
+     'n_muid')
+
+Stat objects sent across the protocol must first be encoded into
+wirestat objects, which are basically size-counted pre-sequenced
+stat objects.  The pre-sequencing uses:
+
+    >>> td.stat_seq
+    Sequencer('stat')
+
+For parsing bytes returned in a Tread on a directory, td.wirestat_seq
+is the sequencer.  However, most users should rely on the packers and
+unpackers in each protocol (see {pack,unpack}_wirestat below).
+
+    >>> td.wirestat_seq
+    Sequencer('wirestat')
+
+There is a dictionary fcall_to_name that maps from byte value
+to protocol code.  Names map to themselves as well:
+
+    >>> fcall_names[101]
+    'Rversion'
+    >>> fcall_names['Tversion']
+    'Tversion'
+
+The sub-namespace rrd has request (Tversion, Topen, etc) and
+response (Rversion, Ropen, etc) data definitions.  Each of these
+is a PFOD class:
+
+    >>> rrd.Tversion(1000, 'hello', tag=0)
+    Tversion(tag=0, msize=1000, version='hello')
+
+The function p9_version() looks up the instance of each supported
+protocol, or raises a KeyError when given an invalid protocol.
+The names may be spelled in any mixture of cases.
+
+The names plain, dotu, and dotl are predefined as the three
+supported protocols:
+
+    >>> p9_version('invalid')
+    Traceback (most recent call last):
+        ...
+    KeyError: 'invalid'
+    >>> p9_version('9p2000') == plain
+    True
+    >>> p9_version('9P2000') == plain
+    True
+    >>> p9_version('9P2000.u') == dotu
+    True
+    >>> p9_version('9p2000.L') == dotl
+    True
+
+Protocol instances have a pack() method that encodes a set of
+arguments into a packet.  To know what to encode, pack() must
+receive an fcall value and a dictionary containing argument
+values, or something equivalent.  The required argument values
+depend on the fcall.  For instance, a Tversion fcall needs three
+arguments: the version name, the tag, and the msize (these of
+course are the pre-filled fields in a Tversion PFOD instance).
+
+    >>> args = {'version': '!', 'tag': 1, 'msize': 1000}
+    >>> pkt = dotu.pack(fcall='Tversion', args=args)
+    >>> len(pkt)
+    14
+
+The length of string '!' is 1, and the packet (or wire) format of
+a Tversion request is:
+
+   size[4] fcall[1] tag[2] msize[4] version[s]
+
+which corresponds to a struct's IBHIH (for the fixed size parts)
+followed by 1 B (for the string).  The overall packet is 14 bytes
+long, so we have size=9, fcall=100, tag=1, msize=1000, and the
+version string is length=1, value=33 (ord('!')).
+
+    >>> import struct
+    >>> struct.unpack('<IBHIHB', pkt)
+    (14, 100, 1, 1000, 1, 33)
+
+Of course, this packed a completely bogus "version" string, but
+that's what we told it to do.  Protocol instances remember their
+version, so we can get it right by omitting the version from the
+arguments:
+
+    >>> dotu.version
+    '9P2000.u'
+    >>> args = {'tag': 99, 'msize': 1000}
+    >>> pkt = dotu.pack(fcall='Tversion', args=args)
+    >>> len(pkt)
+    21
+
+The fcall can be supplied numerically:
+
+    >>> pkt2 = dotu.pack(fcall=td.Tversion, args=args)
+    >>> pkt == pkt2
+    True
+
+Instead of providing an fcall you can provide an instance of
+the appropriate PFOD.  In this case pack() finds the type from
+the PFOD instance.  As usual, the version parameter is filled in
+for you:
+
+    >>> pkt2 = dotu.pack(rrd.Tversion(tag=99, msize=1000))
+    >>> pkt == pkt2
+    True
+
+Note that it's up to you to check the other end's version and
+switch to a "lower" protocol as needed.  Each instance does provide
+a downgrade_to() method that gets you a possibly-downgraded instance.
+This will fail if you are actually trying to upgrade, and also if
+you provide a bogus version:
+
+    >>> dotu.downgrade_to('9P2000.L')
+    Traceback (most recent call last):
+        ...
+    KeyError: '9P2000.L'
+    >>> dotu.downgrade_to('we never heard of this protocol')
+    Traceback (most recent call last):
+        ...
+    KeyError: 'we never heard of this protocol'
+
+Hence you might use:
+
+    try:
+        proto = protocol.dotl.downgrade(vstr)
+    except KeyError:
+        pkt = protocol.plain.pack(fcall='Rerror',
+            args={'tag': tag, 'errstr': 'unknown protocol version '
+                    '{0!r}'.format(vstr)})
+    else:
+        pkt = proto.pack(fcall='Rversion', args={'tag': tag, 'msize': msize})
+
+When using a PFOD instance, it is slightly more efficient to use
+pack_from():
+
+    try:
+        proto = protocol.dotl.downgrade(vstr)
+        reply = protocol.rrd.Rversion(tag=tag, msize=msize)
+    except KeyError:
+        proto = protocol.plain
+        reply = protocol.rrd.Rerror(tag=tag,
+            errstr='unknown protocol version {0!r}'.format(vstr))
+    pkt = proto.pack_from(reply)
+
+does the equivalent of the try/except/else variant.  Note that
+the protocol.rrd.Rversion() instance has version=None.  Like
+proto.pack, the pack_from will detect this "missing" value and
+fill it in.
+
+Because errors vary (one should use Rlerror for dotl and Rerror
+for dotu and plain), and it's convenient to use an Exception
+instance for an error, all protocols provide .error().  This
+builds the appropriate kind of error response, extracting and
+converting errno's and error messages as appropriate.
+
+If <err> is an instance of Exception, err.errno provides the errnum
+or ecode value (if used, for dotu and dotl) and err.strerror as the
+errstr value (if used, for plain 9p2000).  Otherwise err should be
+an integer, and we'll use os.strerror() to get a message.
+
+When using plain 9P2000 this sends error *messages*:
+
+    >>> import errno, os
+    >>> utf8 = os.strerror(errno.ENOENT).encode('utf-8')
+    >>> pkt = None
+    >>> try:
+    ...     os.open('presumably this file does not exist here', 0)
+    ... except OSError as err:
+    ...     pkt = plain.error(1, err)
+    ...
+    >>> pkt[-len(utf8):] == utf8
+    True
+    >>> pkt2 = plain.error(1, errno.ENOENT)
+    >>> pkt == pkt2
+    True
+
+When using 9P2000.u it sends the error code as well, and when
+using 9P2000.L it sends only the error code (and more error
+codes can pass through):
+
+    >>> len(pkt)
+    34
+    >>> len(dotu.error(1, errno.ENOENT))
+    38
+    >>> len(dotl.error(1, errno.ENOENT))
+    11
+
+For even more convenience (and another slight speed hack), the
+protocol has member functions for each valid pfod, which
+effectively do a pack_from of a pfod built from the arguments.  In
+the above example this is not very useful (because we want two
+different replies), but for Rlink, for instance, which has only
+a tag, a server might implement Tlink() as:
+
+    def do_Tlink(proto, data): # data will be a protocol.rrd.Tlink(...)
+        tag = data.tag
+        dfid = data.dfid
+        fid = data.fid
+        name = data.name
+        ... some code to set up for doing the link link ...
+        try:
+            os.link(path1, path2)
+        except OSError as err:
+            return proto.error(tag, err)
+        else:
+            return proto.Rlink(tag)
+
+    >>> pkt = dotl.Rlink(12345)
+    >>> struct.unpack('<IBH', pkt)
+    (7, 71, 12345)
+
+Similarly, a client can build a Tversion packet quite trivially:
+
+    >>> vpkt = dotl.Tversion(tag=0, msize=12345)
+
+To see that this is a valid version packet, let's unpack its bytes.
+The overall length is 21 bytes: 4 bytes of size, 1 byte of code 100
+for Tversion, 2 bytes of tag, 4 bytes of msize, 2 bytes of string
+length, and 8 bytes of string '9P2000.L'.
+
+    >>> tup = struct.unpack('<IBHIH8B', vpkt)
+    >>> tup[0:5]
+    (21, 100, 0, 12345, 8)
+    >>> ''.join(chr(i) for i in tup[5:])
+    '9P2000.L'
+
+Of course, since you can *pack*, you can also *unpack*.  It's
+possible that the incoming packet is malformed.  If so, this
+raises various errors (see below).
+
+Unpack is actually a two step process: first we unpack a header
+(where the size is already removed and is implied by len(data)),
+then we unpack the data within the packet.  You can invoke the
+first step separately.  Furthermore, there's a noerror argument
+that leaves some fields set to None or empty strings, if the
+packet is too short.  (Note that we need a hack for py2k vs py3k
+strings here, for doctests.  Also, encoding 12345 into a byte
+string produces '90', by ASCII luck!)
+
+    >>> pkt = pkt[4:] # strip generated size
+    >>> import sys
+    >>> py3k = sys.version_info[0] >= 3
+    >>> b2s = lambda x: x.decode('utf-8') if py3k else x
+    >>> d = plain.unpack_header(pkt[0:1], noerror=True)
+    >>> d.data = b2s(d.data)
+    >>> d
+    Header(size=5, dsize=0, fcall=71, data='')
+    >>> d = plain.unpack_header(pkt[0:2], noerror=True)
+    >>> d.data = b2s(d.data)
+    >>> d
+    Header(size=6, dsize=1, fcall=71, data='9')
+
+Without noerror=True a short packet raises a SequenceError:
+
+    >>> plain.unpack_header(pkt[0:0])   # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: out of data while unpacking 'fcall'
+
+Of course, a normal packet decodes fine:
+
+    >>> d = plain.unpack_header(pkt)
+    >>> d.data = b2s(d.data)
+    >>> d
+    Header(size=7, dsize=2, fcall=71, data='90')
+
+but one that is too *long* potentially raises a SequencError.
+(This is impossible for a header, though, since the size and
+data size are both implied: either there is an fcall code, and
+the rest of the bytes are "data", or there isn't and the packet
+is too short.  So we can only demonstrate this for regular
+unpack; see below.)
+
+Note that all along, this has been decoding Rlink (fcall=71),
+which is not valid for plain 9P2000 protocol.  It's up to the
+caller to check:
+
+    >>> plain.supports(71)
+    False
+
+    >>> plain.unpack(pkt)           # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: invalid fcall 'Rlink' for 9P2000
+    >>> dotl.unpack(pkt)
+    Rlink(tag=12345)
+
+However, the unpack() method DOES check that the fcall type is
+valid, even if you supply noerror=True.  This is because we can
+only really decode the header, not the data, if the fcall is
+invalid:
+
+    >>> plain.unpack(pkt, noerror=True)     # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: invalid fcall 'Rlink' for 9P2000
+
+The same applies to much-too-short packets even if noerror is set.
+Specifically, if the (post-"size") header shortens down to the empty
+string, the fcall will be None:
+
+    >>> dotl.unpack(b'', noerror=True)      # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: invalid fcall None for 9P2000.L
+
+If there is at least a full header, though, noerror will do the obvious:
+
+    >>> dotl.unpack(pkt[0:1], noerror=True)
+    Rlink(tag=None)
+    >>> dotl.unpack(pkt[0:2], noerror=True)
+    Rlink(tag=None)
+
+If the packet is too long, noerror suppresses the SequenceError:
+
+    >>> dotl.unpack(pkt + b'x')             # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: 1 byte(s) unconsumed
+    >>> dotl.unpack(pkt + b'x', noerror=True)
+    Rlink(tag=12345)
+
+To pack a stat object when producing data for reading a directory,
+use pack_wirestat.  This puts a size in front of the packed stat
+data (they're represented this way in read()-of-directory data,
+but not elsewhere).
+
+To unpack the result of a Tstat or a read() on a directory, use
+unpack_wirestat.  The stat values are variable length so this
+works with offsets.  If the packet is truncated, you'll get a
+SequenceError, but just as for header unpacking, you can use
+noerror to suppress this.
+
+(First, we'll need to build some valid packet data.)
+
+    >>> statobj = td.stat(type=0,dev=0,qid=td.qid(0,0,0),mode=0,
+    ... atime=0,mtime=0,length=0,name=b'foo',uid=b'0',gid=b'0',muid=b'0')
+    >>> data = plain.pack_wirestat(statobj)
+    >>> len(data)
+    55
+
+Now we can unpack it:
+
+    >>> newobj, offset = plain.unpack_wirestat(data, 0)
+    >>> newobj == statobj
+    True
+    >>> offset
+    55
+
+Since the packed data do not include the dotu extensions, we get
+a SequenceError if we try to unpack with dotu or dotl:
+
+    >>> dotu.unpack_wirestat(data, 0)       # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: out of data while unpacking 'extension'
+
+When using noerror, the returned new offset will be greater
+than the length of the packet, after a failed unpack, and some
+elements may be None:
+
+    >>> newobj, offset = plain.unpack_wirestat(data[0:10], 0, noerror=True)
+    >>> offset
+    55
+    >>> newobj.length is None
+    True
+
+Similarly, use unpack_dirent to unpack the result of a dot-L
+readdir(), using offsets.  (Build them with pack_dirent.)
+
+    >>> dirent = td.dirent(qid=td.qid(1,2,3),offset=0,
+    ... type=td.DT_REG,name=b'foo')
+    >>> pkt = dotl.pack_dirent(dirent)
+    >>> len(pkt)
+    27
+
+and then:
+
+    >>> newde, offset = dotl.unpack_dirent(pkt, 0)
+    >>> newde == dirent
+    True
+    >>> offset
+    27
+
+"""
+
+from __future__ import print_function
+
+import collections
+import os
+import re
+import sys
+
+import p9err
+import pfod
+import sequencer
+
+SequenceError = sequencer.SequenceError
+
+fcall_names = {}
+
+# begin ???
+# to interfere with (eg) the size part of the packet:
+#   pkt = proto.pack(fcall=protocol.td.Tversion,
+#       size=123, # wrong
+#       args={ 'tag': 1, msize: 1000, version: '9p2000.u' })
+# a standard Twrite:
+#   pkt = proto.pack(fcall=protocol.td.Twrite,
+#       args={ 'tag': 1, 'fid': 2, 'offset': 0, 'data': b'rawdata' })
+# or:
+#   pkt = proto.pack(fcall=protocol.td.Twrite,
+#       data=proto.Twrite(tag=1, fid=2, offset=0, data=b'rawdata' })
+# a broken Twrite:
+#   pkt = proto.pack(fcall=protocol.td.Twrite,
+#       args={ 'tag': 1, 'fid': 2, 'offset': 0, 'count': 99,
+#           'data': b'rawdata' })  -- XXX won't work (yet?)
+#
+# build a QID: (td => typedefs and defines)
+#    qid = protocol.td.qid(type=protocol.td.QTFILE, version=1, path=2)
+# build the Twrite data as a data structure:
+#    wrdata = protocol.td.Twrite(tag=1, fid=2, offset=0, data=b'rawdata')
+#
+# turn incoming byte stream data into a Header and remaining data:
+#    foo = proto.pack(data)
+
+class _PackInfo(object):
+    """
+    Essentially just a Sequencer, except that we remember
+    if there are any :auto annotations on any of the coders,
+    and we check for coders that are string coders ('data[size]').
+
+    This could in theory be a recursive check, but in practice
+    all the automatics are at the top level, and we have no mechanism
+    to pass down inner automatics.
+    """
+    def __init__(self, seq):
+        self.seq = seq
+        self.autos = None
+        for pair in seq:        # (cond, code) pair
+            sub = pair[1]
+            if sub.aux is None:
+                continue
+            assert sub.aux == 'auto' or sub.aux == 'len'
+            if self.autos is None:
+                self.autos = []
+            self.autos.append(pair)
+
+    def __repr__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.seq)
+
+    def pack(self, auto_vars, conditions, data, rodata):
+        """
+        Pack data.  Insert automatic and/or counted variables
+        automatically, if they are not already set in the data.
+
+        If rodata ("read-only data") is True we make sure not
+        to modify the caller's data.  Since data is a PFOD rather
+        than a normal ordered dictionary, we use _copy().
+        """
+        if self.autos:
+            for cond, sub in self.autos:
+                # False conditionals don't need to be filled-in.
+                if cond is not None and not conditions[cond]:
+                    continue
+                if sub.aux == 'auto':
+                    # Automatic variable, e.g., version.  The
+                    # sub-coder's name ('version') is the test item.
+                    if data.get(sub.name) is None:
+                        if rodata:
+                            data = data._copy()
+                            rodata = False
+                        data[sub.name] = auto_vars[sub.name]
+                else:
+                    # Automatic length, e.g., data[count].  The
+                    # sub-coders's repeat item ('count') is the
+                    # test item.  Of course, it's possible that
+                    # the counted item is missing as well.  If so
+                    # we just leave both None and take the
+                    # encoding error.
+                    assert sub.aux == 'len'
+                    if data.get(sub.repeat) is not None:
+                        continue
+                    item = data.get(sub.name)
+                    if item is not None:
+                        if rodata:
+                            data = data._copy()
+                            rodata = False
+                        data[sub.repeat] = len(item)
+        return self.seq.pack(data, conditions)
+
+class _P9Proto(object):
+    def __init__(self, auto_vars, conditions, p9_data, pfods, index):
+        self.auto_vars = auto_vars      # currently, just version
+        self.conditions = conditions    # '.u'
+        self.pfods = pfods # dictionary, maps pfod to packinfo
+        self.index = index # for comparison: plain < dotu < dotl
+
+        self.use_rlerror = rrd.Rlerror in pfods
+
+        for dtype in pfods:
+            name = dtype.__name__
+            # For each Txxx/Rxxx, define a self.<name>() to
+            # call self.pack_from().
+            #
+            # The packinfo is from _Packinfo(seq); the fcall and
+            # seq come from p9_data.protocol[<name>].
+            proto_tuple = p9_data.protocol[name]
+            assert dtype == proto_tuple[0]
+            packinfo = pfods[dtype]
+            # in theory we can do this with no names using nested
+            # lambdas, but that's just too confusing, so let's
+            # do it with nested functions instead.
+            def builder(constructor=dtype, packinfo=packinfo):
+                "return function that calls _pack_from with built PFOD"
+                def invoker(self, *args, **kwargs):
+                    "build PFOD and call _pack_from"
+                    return self._pack_from(constructor(*args, **kwargs),
+                                           rodata=False, caller=None,
+                                           packinfo=packinfo)
+                return invoker
+            func = builder()
+            func.__name__ = name
+            func.__doc__ = 'pack from {0}'.format(name)
+            setattr(self.__class__, name, func)
+
+    def __repr__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.version)
+
+    def __str__(self):
+        return self.version
+
+    # define rich-comparison operators, so we can, e.g., test vers > plain
+    def __lt__(self, other):
+        return self.index < other.index
+    def __le__(self, other):
+        return self.index <= other.index
+    def __eq__(self, other):
+        return self.index == other.index
+    def __ne__(self, other):
+        return self.index != other.index
+    def __gt__(self, other):
+        return self.index > other.index
+    def __ge__(self, other):
+        return self.index >= other.index
+
+    def downgrade_to(self, other_name):
+        """
+        Downgrade from this protocol to a not-greater one.
+
+        Raises KeyError if other_name is not a valid protocol,
+        or this is not a downgrade (with setting back to self
+        considered a valid "downgrade", i.e., we're doing subseteq
+        rather than subset).
+        """
+        if not isinstance(other_name, str) and isinstance(other_name, bytes):
+            other_name = other_name.decode('utf-8', 'surrogateescape')
+        other = p9_version(other_name)
+        if other > self:
+            raise KeyError(other_name)
+        return other
+
+    def error(self, tag, err):
+        "produce Rerror or Rlerror, whichever is appropriate"
+        if isinstance(err, Exception):
+            errnum = err.errno
+            errmsg = err.strerror
+        else:
+            errnum = err
+            errmsg = os.strerror(errnum)
+        if self.use_rlerror:
+            return self.Rlerror(tag=tag, ecode=p9err.to_dotl(errnum))
+        return self.Rerror(tag=tag, errstr=errmsg,
+                           errnum=p9err.to_dotu(errnum))
+
+    def pack(self, *args, **kwargs):
+        "pack up a pfod or fcall-and-arguments"
+        fcall = kwargs.pop('fcall', None)
+        if fcall is None:
+            # Called without fcall=...
+            # This requires that args have one argument that
+            # is the PFOD; kwargs should be empty (but we'll take
+            # data=pfod as well).  The size is implied, and
+            # fcall comes from the pfod.
+            data = kwargs.pop('data', None)
+            if data is None:
+                if len(args) != 1:
+                    raise TypeError('pack() with no fcall requires 1 argument')
+                data = args[0]
+            if len(kwargs):
+                raise TypeError('pack() got an unexpected keyword argument '
+                                '{0}'.format(kwargs.popitem()[0]))
+            return self._pack_from(data, True, 'pack', None)
+
+        # Called as pack(fcall=whatever, data={...}).
+        # The data argument must be a dictionary since we're going to
+        # apply ** to it in the call to build the PFOD.  Note that
+        # it could already be a PFOD, which is OK, but we're going to
+        # copy it to a new one regardless (callers that have a PFOD
+        # should use pack_from instead).
+        if len(args):
+            raise TypeError('pack() got unexpected arguments '
+                            '{0!r}'.format(args))
+        data = kwargs.pop('args', None)
+        if len(kwargs):
+            raise TypeError('pack() got an unexpected keyword argument '
+                            '{0}'.format(kwargs.popitem()[0]))
+        if not isinstance(data, dict):
+            raise TypeError('pack() with fcall and data '
+                            'requires data to be a dictionary')
+        try:
+            name = fcall_names[fcall]
+        except KeyError:
+            raise TypeError('pack(): {0} is not a valid '
+                            'fcall value'.format(fcall))
+        cls = getattr(rrd, name)
+        data = cls(**data)
+        return self._pack_from(data, False, 'pack', None)
+
+    def pack_from(self, data):
+        "pack from pfod data, using its type to determine fcall"
+        return self._pack_from(data, True, 'pack_from', None)
+
+    def _pack_from(self, data, rodata, caller, packinfo):
+        """
+        Internal pack(): called from both invokers (self.Tversion,
+        self.Rwalk, etc.) and from pack and pack_from methods.
+        "caller" says which.  If rodata is True we're not supposed to
+        modify the incoming data, as it may belong to someone
+        else.  Some calls to pack() build a PFOD and hence pass in
+        False.
+
+        The predefined invokers pass in a preconstructed PFOD,
+        *and* set rodata=False, *and* provide a packinfo, so that
+        we never have to copy, nor look up the packinfo.
+        """
+        if caller is not None:
+            assert caller in ('pack', 'pack_from') and packinfo is None
+            # Indirect call from pack_from(), or from pack() after
+            # pack() built a PFOD.  We make sure this kind of PFOD
+            # is allowed for this protocol.
+            packinfo = self.pfods.get(data.__class__, None)
+            if packinfo is None:
+                raise TypeError('{0}({1!r}): invalid '
+                                'input'.format(caller, data))
+
+        # Pack the data
+        pkt = packinfo.pack(self.auto_vars, self.conditions, data, rodata)
+
+        fcall = data.__class__.__name__
+        fcall_code = getattr(td, fcall)
+
+        # That's the inner data; now we must add the header,
+        # with fcall (translated back to byte code value) and
+        # outer data.  The size is implied by len(pkt).  There
+        # are no other auto variables, and no conditions.
+        #
+        # NB: the size includes the size of the header itself
+        # and the fcall code byte, plus the size of the data.
+        data = _9p_data.header_pfod(size=4 + 1 + len(pkt), dsize=len(pkt),
+                                    fcall=fcall_code, data=pkt)
+        empty = None # logically should be {}, but not actually used below
+        pkt = _9p_data.header_pack_seq.pack(data, empty)
+        return pkt
+
+    @staticmethod
+    def unpack_header(bstring, noerror=False):
+        """
+        Unpack header.
+
+        We know that our caller has already stripped off the
+        overall size field (4 bytes), leaving us with the fcall
+        (1 byte) and data (len(bstring)-1 bytes).  If len(bstring)
+        is 0, this is an invalid header: set dsize to 0 and let
+        fcall become None, if noerror is set.
+        """
+        vdict = _9p_data.header_pfod()
+        vdict['size'] = len(bstring) + 4
+        vdict['dsize'] = max(0, len(bstring) - 1)
+        _9p_data.header_unpack_seq.unpack(vdict, None, bstring, noerror)
+        return vdict
+
+    def unpack(self, bstring, noerror=False):
+        "produce filled PFOD from fcall in packet"
+        vdict = self.unpack_header(bstring, noerror)
+        # NB: vdict['dsize'] is used internally during unpack, to
+        # find out how many bytes to copy to vdict['data'], but by
+        # the time unpack is done, we no longer need it.
+        #
+        # size = vdict['size']
+        # dsize = vdict['dsize']
+        fcall = vdict['fcall']
+        data = vdict['data']
+        # Note: it's possible for size and/or fcall to be None,
+        # when noerror is true.  However, if we support fcall, then
+        # clearly fcall is not None; and since fcall follows size,
+        # we can always proceed if we support fcall.
+        if self.supports(fcall):
+            fcall = fcall_names[fcall]
+            cls = getattr(rrd, fcall)
+            seq = self.pfods[cls].seq
+        elif fcall == td.Rlerror:
+            # As a special case for diod, we accept Rlerror even
+            # if it's not formally part of the protocol.
+            cls = rrd.Rlerror
+            seq = dotl.pfods[rrd.Rlerror].seq
+        else:
+            fcall = fcall_names.get(fcall, fcall)
+            raise SequenceError('invalid fcall {0!r} for '
+                                '{1}'.format(fcall, self))
+        vdict = cls()
+        seq.unpack(vdict, self.conditions, data, noerror)
+        return vdict
+
+    def pack_wirestat(self, statobj):
+        """
+        Pack a stat object to appear as data returned by read()
+        on a directory.  Essentially, we prefix the data with a size.
+        """
+        data = td.stat_seq.pack(statobj, self.conditions)
+        return td.wirestat_seq.pack({'size': len(data), 'data': data}, {})
+
+    def unpack_wirestat(self, bstring, offset, noerror=False):
+        """
+        Produce the next td.stat object from byte-string,
+        returning it and new offset.
+        """
+        statobj = td.stat()
+        d = { 'size': None }
+        newoff = td.wirestat_seq.unpack_from(d, self.conditions, bstring,
+                                             offset, noerror)
+        size = d['size']
+        if size is None:        # implies noerror; newoff==offset+2
+            return statobj, newoff
+        # We now have size and data.  If noerror, data might be
+        # too short, in which case we'll unpack a partial statobj.
+        # Or (with or without noeror), data might be too long, so
+        # that while len(data) == size, not all the data get used.
+        # That may be allowed by the protocol: it's not clear.
+        data = d['data']
+        used = td.stat_seq.unpack_from(statobj, self.conditions, data,
+                                       0, noerror)
+        # if size != used ... then what?
+        return statobj, newoff
+
+    def pack_dirent(self, dirent):
+        """
+        Dirents (dot-L only) are easy to pack, but we provide
+        this function for symmetry.  (Should we raise an error
+        if called on plain or dotu?)
+        """
+        return td.dirent_seq.pack(dirent, self.conditions)
+
+    def unpack_dirent(self, bstring, offset, noerror=False):
+        """
+        Produces the next td.dirent object from byte-string,
+        returning it and new offset.
+        """
+        deobj = td.dirent()
+        offset = td.dirent_seq.unpack_from(deobj, self.conditions, bstring,
+                                           offset, noerror)
+        return deobj, offset
+
+    def supports(self, fcall):
+        """
+        Return True if and only if this protocol supports the
+        given fcall.
+
+        >>> plain.supports(100)
+        True
+        >>> plain.supports('Tversion')
+        True
+        >>> plain.supports('Rlink')
+        False
+        """
+        fcall = fcall_names.get(fcall, None)
+        if fcall is None:
+            return False
+        cls = getattr(rrd, fcall)
+        return cls in self.pfods
+
+    def get_version(self, as_bytes=True):
+        "get Plan 9 protocol version, as string or (default) as bytes"
+        ret = self.auto_vars['version']
+        if as_bytes and not isinstance(ret, bytes):
+            ret = ret.encode('utf-8')
+        return ret
+
+    @property
+    def version(self):
+        "Plan 9 protocol version"
+        return self.get_version(as_bytes=False)
+
+DEBUG = False
+
+# This defines a special en/decoder named "s" using a magic
+# builtin.  This and stat are the only variable-length
+# decoders, and this is the only recursively-variable-length
+# one (i.e., stat decoding is effectively fixed size once we
+# handle strings).  So this magic avoids the need for recursion.
+#
+# Note that _string_ is, in effect, size[2] orig_var[size].
+_STRING_MAGIC = '_string_'
+SDesc = "typedef s: " + _STRING_MAGIC
+
+# This defines an en/decoder for type "qid",
+# which en/decodes 1 byte called type, 4 called version, and
+# 8 called path (for a total of 13 bytes).
+#
+# It also defines QTDIR, QTAPPEND, etc.  (These are not used
+# for en/decode, or at least not yet.)
+QIDDesc = """\
+typedef qid: type[1] version[4] path[8]
+
+    #define QTDIR       0x80
+    #define QTAPPEND    0x40
+    #define QTEXCL      0x20
+    #define QTMOUNT     0x10
+    #define QTAUTH      0x08
+    #define QTTMP       0x04
+    #define QTSYMLINK   0x02
+    #define QTFILE      0x00
+"""
+
+# This defines a stat decoder, which has a 9p2000 standard front,
+# followed by an optional additional portion.
+#
+# The constants are named DMDIR etc.
+STATDesc = """
+typedef stat: type[2] dev[4] qid[qid] mode[4] atime[4] mtime[4] \
+length[8] name[s] uid[s] gid[s] muid[s] \
+{.u: extension[s] n_uid[4] n_gid[4] n_muid[4] }
+
+    #define DMDIR           0x80000000
+    #define DMAPPEND        0x40000000
+    #define DMMOUNT         0x10000000
+    #define DMAUTH          0x08000000
+    #define DMTMP           0x04000000
+    #define DMSYMLINK       0x02000000
+            /* 9P2000.u extensions */
+    #define DMDEVICE        0x00800000
+    #define DMNAMEDPIPE     0x00200000
+    #define DMSOCKET        0x00100000
+    #define DMSETUID        0x00080000
+    #define DMSETGID        0x00040000
+"""
+
+# This defines a wirestat decoder.  A wirestat is a size and then
+# a (previously encoded, or future-decoded) stat.
+WirestatDesc = """
+typedef wirestat: size[2] data[size]
+"""
+
+# This defines a dirent decoder, which has a dot-L specific format.
+#
+# The dirent type fields are defined as DT_* (same as BSD and Linux).
+DirentDesc = """
+typedef dirent: qid[qid] offset[8] type[1] name[s]
+
+    #define DT_UNKNOWN       0
+    #define DT_FIFO          1
+    #define DT_CHR           2
+    #define DT_DIR           4
+    #define DT_BLK           6
+    #define DT_REG           8
+    #define DT_LNK          10
+    #define DT_SOCK         12
+    #define DT_WHT          14
+"""
+
+# N.B.: this is largely a slightly more rigidly formatted variant of
+# the contents of:
+# https://github.com/chaos/diod/blob/master/protocol.md
+#
+# Note that <name> = <value>: ... assigns names for the fcall
+# fcall (function call) table.  Names without "= value" are
+# assumed to be the previous value +1 (and the two names are
+# also checked to make sure they are Tfoo,Rfoo).
+ProtocolDesc = """\
+Rlerror.L = 7: tag[2] ecode[4]
+    ecode is a numerical Linux errno
+
+Tstatfs.L = 8: tag[2] fid[4]
+Rstatfs.L: tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8] \
+         files[8] ffree[8] fsid[8] namelen[4]
+    Rstatfs corresponds to Linux statfs structure:
+    struct statfs {
+        long    f_type;     /* type of file system */
+        long    f_bsize;    /* optimal transfer block size */
+        long    f_blocks;   /* total data blocks in file system */
+        long    f_bfree;    /* free blocks in fs */
+        long    f_bavail;   /* free blocks avail to non-superuser */
+        long    f_files;    /* total file nodes in file system */
+        long    f_ffree;    /* free file nodes in fs */
+        fsid_t  f_fsid;     /* file system id */
+        long    f_namelen;  /* maximum length of filenames */
+    };
+
+    This comes from nowhere obvious...
+        #define FSTYPE      0x01021997
+
+Tlopen.L = 12: tag[2] fid[4] flags[4]
+Rlopen.L: tag[2] qid[qid] iounit[4]
+    lopen prepares fid for file (or directory) I/O.
+
+    flags contains Linux open(2) flag bits, e.g., O_RDONLY, O_RDWR, O_WRONLY.
+
+        #define L_O_CREAT       000000100
+        #define L_O_EXCL        000000200
+        #define L_O_NOCTTY      000000400
+        #define L_O_TRUNC       000001000
+        #define L_O_APPEND      000002000
+        #define L_O_NONBLOCK    000004000
+        #define L_O_DSYNC       000010000
+        #define L_O_FASYNC      000020000
+        #define L_O_DIRECT      000040000
+        #define L_O_LARGEFILE   000100000
+        #define L_O_DIRECTORY   000200000
+        #define L_O_NOFOLLOW    000400000
+        #define L_O_NOATIME     001000000
+        #define L_O_CLOEXEC     002000000
+        #define L_O_SYNC        004000000
+        #define L_O_PATH        010000000
+        #define L_O_TMPFILE     020000000
+
+Tlcreate.L = 14: tag[2] fid[4] name[s] flags[4] mode[4] gid[4]
+Rlcreate.L: tag[2] qid[qid] iounit[4]
+    lcreate creates a regular file name in directory fid and prepares
+    it for I/O.
+
+    fid initially represents the parent directory of the new file.
+    After the call it represents the new file.
+
+    flags contains Linux open(2) flag bits (including O_CREAT).
+
+    mode contains Linux creat(2) mode (permissions) bits.
+
+    gid is the effective gid of the caller.
+
+Tsymlink.L = 16: tag[2] dfid[4] name[s] symtgt[s] gid[4]
+Rsymlink.L: tag[2] qid[qid]
+    symlink creates a symbolic link name in directory dfid.  The
+    link will point to symtgt.
+
+    gid is the effective group id of the caller.
+
+    The qid for the new symbolic link is returned in the reply.
+
+Tmknod.L = 18: tag[2] dfid[4] name[s] mode[4] major[4] minor[4] gid[4]
+Rmknod.L: tag[2] qid[qid]
+    mknod creates a device node name in directory dfid with major
+    and minor numbers.
+
+    mode contains Linux mknod(2) mode bits.  (Note that these
+    include the S_IFMT bits which may be S_IFBLK, S_IFCHR, or
+    S_IFSOCK.)
+
+    gid is the effective group id of the caller.
+
+    The qid for the new device node is returned in the reply.
+
+Trename.L = 20: tag[2] fid[4] dfid[4] name[s]
+Rrename.L: tag[2]
+    rename renames a file system object referenced by fid, to name
+    in the directory referenced by dfid.
+
+    This operation will eventually be replaced by renameat.
+
+Treadlink.L = 22: tag[2] fid[4]
+Rreadlink.L: tag[2] target[s]
+    readlink returns the contents of teh symbolic link referenced by fid.
+
+Tgetattr.L = 24: tag[2] fid[4] request_mask[8]
+Rgetattr.L: tag[2] valid[8] qid[qid] mode[4] uid[4] gid[4] nlink[8] \
+          rdev[8] size[8] blksize[8] blocks[8] \
+          atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8] \
+          ctime_sec[8] ctime_nsec[8] btime_sec[8] btime_nsec[8] \
+          gen[8] data_version[8]
+
+    getattr gets attributes of a file system object referenced by fid.
+    The response is intended to follow pretty closely the fields
+    returned by the stat(2) system call:
+
+    struct stat {
+        dev_t     st_dev;     /* ID of device containing file */
+        ino_t     st_ino;     /* inode number */
+        mode_t    st_mode;    /* protection */
+        nlink_t   st_nlink;   /* number of hard links */
+        uid_t     st_uid;     /* user ID of owner */
+        gid_t     st_gid;     /* group ID of owner */
+        dev_t     st_rdev;    /* device ID (if special file) */
+        off_t     st_size;    /* total size, in bytes */
+        blksize_t st_blksize; /* blocksize for file system I/O */
+        blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
+        time_t    st_atime;   /* time of last access */
+        time_t    st_mtime;   /* time of last modification */
+        time_t    st_ctime;   /* time of last status change */
+    };
+
+    The differences are:
+
+     * st_dev is omitted
+     * st_ino is contained in the path component of qid
+     * times are nanosecond resolution
+     * btime, gen and data_version fields are reserved for future use
+
+    Not all fields are valid in every call. request_mask is a bitmask
+    indicating which fields are requested. valid is a bitmask
+    indicating which fields are valid in the response. The mask
+    values are as follows:
+
+    #define GETATTR_MODE        0x00000001
+    #define GETATTR_NLINK       0x00000002
+    #define GETATTR_UID         0x00000004
+    #define GETATTR_GID         0x00000008
+    #define GETATTR_RDEV        0x00000010
+    #define GETATTR_ATIME       0x00000020
+    #define GETATTR_MTIME       0x00000040
+    #define GETATTR_CTIME       0x00000080
+    #define GETATTR_INO         0x00000100
+    #define GETATTR_SIZE        0x00000200
+    #define GETATTR_BLOCKS      0x00000400
+
+    #define GETATTR_BTIME       0x00000800
+    #define GETATTR_GEN         0x00001000
+    #define GETATTR_DATA_VERSION 0x00002000
+
+    #define GETATTR_BASIC       0x000007ff  /* Mask for fields up to BLOCKS */
+    #define GETATTR_ALL         0x00003fff  /* Mask for All fields above */
+
+Tsetattr.L = 26: tag[2] fid[4] valid[4] mode[4] uid[4] gid[4] size[8] \
+               atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8]
+Rsetattr.L: tag[2]
+    setattr sets attributes of a file system object referenced by
+    fid.  As with getattr, valid is a bitmask selecting which
+    fields to set, which can be any combination of:
+
+    mode - Linux chmod(2) mode bits.
+
+    uid, gid - New owner, group of the file as described in Linux chown(2).
+
+    size - New file size as handled by Linux truncate(2).
+
+    atime_sec, atime_nsec - Time of last file access.
+
+    mtime_sec, mtime_nsec - Time of last file modification.
+
+    The valid bits are defined as follows:
+
+    #define SETATTR_MODE        0x00000001
+    #define SETATTR_UID         0x00000002
+    #define SETATTR_GID         0x00000004
+    #define SETATTR_SIZE        0x00000008
+    #define SETATTR_ATIME       0x00000010
+    #define SETATTR_MTIME       0x00000020
+    #define SETATTR_CTIME       0x00000040
+    #define SETATTR_ATIME_SET   0x00000080
+    #define SETATTR_MTIME_SET   0x00000100
+
+    If a time bit is set without the corresponding SET bit, the
+    current system time on the server is used instead of the value
+    sent in the request.
+
+Txattrwalk.L = 30: tag[2] fid[4] newfid[4] name[s]
+Rxattrwalk.L: tag[2] size[8]
+    xattrwalk gets a newfid pointing to xattr name.  This fid can
+    later be used to read the xattr value.  If name is NULL newfid
+    can be used to get the list of extended attributes associated
+    with the file system object.
+
+Txattrcreate.L = 32: tag[2] fid[4] name[s] attr_size[8] flags[4]
+Rxattrcreate.L: tag[2]
+    xattrcreate gets a fid pointing to the xattr name.  This fid
+    can later be used to set the xattr value.
+
+    flag is derived from set Linux setxattr. The manpage says
+
+        The flags parameter can be used to refine the semantics of
+        the operation.  XATTR_CREATE specifies a pure create,
+        which fails if the named attribute exists already.
+        XATTR_REPLACE specifies a pure replace operation, which
+        fails if the named attribute does not already exist.  By
+        default (no flags), the extended attribute will be created
+        if need be, or will simply replace the value if the
+        attribute exists.
+
+    The actual setxattr operation happens when the fid is clunked.
+    At that point the written byte count and the attr_size
+    specified in TXATTRCREATE should be same otherwise an error
+    will be returned.
+
+Treaddir.L = 40: tag[2] fid[4] offset[8] count[4]
+Rreaddir.L: tag[2] count[4] data[count]
+    readdir requests that the server return directory entries from
+    the directory represented by fid, previously opened with
+    lopen.  offset is zero on the first call.
+
+    Directory entries are represented as variable-length records:
+        qid[qid] offset[8] type[1] name[s]
+    At most count bytes will be returned in data.  If count is not
+    zero in the response, more data is available.  On subsequent
+    calls, offset is the offset returned in the last directory
+    entry of the previous call.
+
+Tfsync.L = 50: tag[2] fid[4]
+Rfsync.L: tag[2]
+    fsync tells the server to flush any cached data associated
+    with fid, previously opened with lopen.
+
+Tlock.L = 52: tag[2] fid[4] type[1] flags[4] start[8] length[8] \
+       proc_id[4] client_id[s]
+Rlock.L: tag[2] status[1]
+    lock is used to acquire or release a POSIX record lock on fid
+    and has semantics similar to Linux fcntl(F_SETLK).
+
+    type has one of the values:
+
+        #define LOCK_TYPE_RDLCK 0
+        #define LOCK_TYPE_WRLCK 1
+        #define LOCK_TYPE_UNLCK 2
+
+    start, length, and proc_id correspond to the analagous fields
+    passed to Linux fcntl(F_SETLK):
+
+    struct flock {
+        short l_type;  /* Type of lock: F_RDLCK, F_WRLCK, F_UNLCK */
+        short l_whence;/* How to intrprt l_start: SEEK_SET,SEEK_CUR,SEEK_END */
+        off_t l_start; /* Starting offset for lock */
+        off_t l_len;   /* Number of bytes to lock */
+        pid_t l_pid;   /* PID of process blocking our lock (F_GETLK only) */
+    };
+
+    flags bits are:
+
+        #define LOCK_SUCCESS    0
+        #define LOCK_BLOCKED    1
+        #define LOCK_ERROR      2
+        #define LOCK_GRACE      3
+
+    The Linux v9fs client implements the fcntl(F_SETLKW)
+    (blocking) lock request by calling lock with
+    LOCK_FLAGS_BLOCK set.  If the response is LOCK_BLOCKED,
+    it retries the lock request in an interruptible loop until
+    status is no longer LOCK_BLOCKED.
+
+    The Linux v9fs client translates BSD advisory locks (flock) to
+    whole-file POSIX record locks.  v9fs does not implement
+    mandatory locks and will return ENOLCK if use is attempted.
+
+    Because of POSIX record lock inheritance and upgrade
+    properties, pass-through servers must be implemented
+    carefully.
+
+Tgetlock.L = 54: tag[2] fid[4] type[1] start[8] length[8] proc_id[4] \
+               client_id[s]
+Rgetlock.L: tag[2] type[1] start[8] length[8] proc_id[4] client_id[s]
+    getlock tests for the existence of a POSIX record lock and has
+    semantics similar to Linux fcntl(F_GETLK).
+
+    As with lock, type has one of the values defined above, and
+    start, length, and proc_id correspond to the analagous fields
+    in struct flock passed to Linux fcntl(F_GETLK), and client_Id
+    is an additional mechanism for uniquely identifying the lock
+    requester and is set to the nodename by the Linux v9fs client.
+
+Tlink.L = 70: tag[2] dfid[4] fid[4] name[s]
+Rlink.L: tag[2]
+    link creates a hard link name in directory dfid.  The link
+    target is referenced by fid.
+
+Tmkdir.L = 72: tag[2] dfid[4] name[s] mode[4] gid[4]
+Rmkdir.L: tag[2] qid[qid]
+    mkdir creates a new directory name in parent directory dfid.
+
+    mode contains Linux mkdir(2) mode bits.
+
+    gid is the effective group ID of the caller.
+
+    The qid of the new directory is returned in the response.
+
+Trenameat.L = 74: tag[2] olddirfid[4] oldname[s] newdirfid[4] newname[s]
+Rrenameat.L: tag[2]
+    Change the name of a file from oldname to newname, possible
+    moving it from old directory represented by olddirfid to new
+    directory represented by newdirfid.
+
+    If the server returns ENOTSUPP, the client should fall back to
+    the rename operation.
+
+Tunlinkat.L = 76: tag[2] dirfd[4] name[s] flags[4]
+Runlinkat.L: tag[2]
+    Unlink name from directory represented by dirfd.  If the file
+    is represented by a fid, that fid is not clunked.  If the
+    server returns ENOTSUPP, the client should fall back to the
+    remove operation.
+
+    There seems to be only one defined flag:
+
+        #define AT_REMOVEDIR    0x200
+
+Tversion = 100: tag[2] msize[4] version[s]:auto
+Rversion: tag[2] msize[4] version[s]
+
+    negotiate protocol version
+
+    version establishes the msize, which is the maximum message
+    size inclusive of the size value that can be handled by both
+    client and server.
+
+    It also establishes the protocol version.  For 9P2000.L
+    version must be the string 9P2000.L.
+
+Tauth = 102: tag[2] afid[4] uname[s] aname[s] n_uname[4]
+Rauth: tag[2] aqid[qid]
+    auth initiates an authentication handshake for n_uname.
+    Rlerror is returned if authentication is not required.  If
+    successful, afid is used to read/write the authentication
+    handshake (protocol does not specify what is read/written),
+    and afid is presented in the attach.
+
+Tattach = 104: tag[2] fid[4] afid[4] uname[s] aname[s] {.u: n_uname[4] }
+Rattach: tag[2] qid[qid]
+    attach introduces a new user to the server, and establishes
+    fid as the root for that user on the file tree selected by
+    aname.
+
+    afid can be NOFID (~0) or the fid from a previous auth
+    handshake.  The afid can be clunked immediately after the
+    attach.
+
+        #define NOFID       0xffffffff
+
+    n_uname, if not set to NONUNAME (~0), is the uid of the
+    user and is used in preference to uname.  Note that it appears
+    in both .u and .L (unlike most .u-specific features).
+
+        #define NONUNAME    0xffffffff
+
+    v9fs has several modes of access which determine how it uses
+    attach.  In the default access=user, an initial attach is sent
+    for the user provided in the uname=name mount option, and for
+    each user that accesses the file system thereafter.  For
+    access=, only the initial attach is sent for and all other
+    users are denied access by the client.
+
+Rerror = 107: tag[2] errstr[s] {.u: errnum[4] }
+
+Tflush = 108: tag[2] oldtag[2]
+Rflush: tag[2]
+    flush aborts an in-flight request referenced by oldtag, if any.
+
+Twalk = 110: tag[2] fid[4] newfid[4] nwname[2] nwname*(wname[s])
+Rwalk: tag[2] nwqid[2] nwqid*(wqid[qid])
+    walk is used to descend a directory represented by fid using
+    successive path elements provided in the wname array.  If
+    succesful, newfid represents the new path.
+
+    fid can be cloned to newfid by calling walk with nwname set to
+    zero.
+
+    if nwname==0, fid need not represent a directory.
+
+Topen = 112: tag[2] fid[4] mode[1]
+Ropen: tag[2] qid[qid] iounit[4]
+    open prepares fid for file (or directory) I/O.
+
+    mode is:
+        #define OREAD       0   /* open for read */
+        #define OWRITE      1   /* open for write */
+        #define ORDWR       2   /* open for read and write */
+        #define OEXEC       3   /* open for execute */
+
+        #define OTRUNC      16  /* truncate (illegal if OEXEC) */
+        #define OCEXEC      32  /* close on exec (nonsensical) */
+        #define ORCLOSE     64  /* remove on close */
+        #define ODIRECT     128 /* direct access (.u extension?) */
+
+Tcreate = 114: tag[2] fid[4] name[s] perm[4] mode[1] {.u: extension[s] }
+Rcreate: tag[2] qid[qid] iounit[4]
+    create is similar to open; however, the incoming fid is the
+    diretory in which the file is to be created, and on success,
+    return, the fid refers to the then-created file.
+
+Tread = 116: tag[2] fid[4] offset[8] count[4]
+Rread: tag[2] count[4] data[count]
+    perform a read on the file represented by fid.  Note that in
+    v9fs, a read(2) or write(2) system call for a chunk of the
+    file that won't fit in a single request is broken up into
+    multiple requests.
+
+    Under 9P2000.L, read cannot be used on directories.  See readdir.
+
+Twrite = 118: tag[2] fid[4] offset[8] count[4] data[count]
+Rwrite: tag[2] count[4]
+    perform a write on the file represented by fid.  Note that in
+    v9fs, a read(2) or write(2) system call for a chunk of the
+    file that won't fit in a single request is broken up into
+    multiple requests.
+
+    write cannot be used on directories.
+
+Tclunk = 120: tag[2] fid[4]
+Rclunk: tag[2]
+    clunk signifies that fid is no longer needed by the client.
+
+Tremove = 122: tag[2] fid[4]
+Rremove: tag[2]
+    remove removes the file system object represented by fid.
+
+    The fid is always clunked (even on error).
+
+Tstat = 124: tag[2] fid[4]
+Rstat: tag[2] size[2] data[size]
+
+Twstat = 126: tag[2] fid[4] size[2] data[size]
+Rwstat: tag[2]
+"""
+
+class _Token(object):
+    r"""
+    A scanned token.
+
+    Tokens have a type (tok.ttype) and value (tok.value).  The value
+    is generally the token itself, although sometimes a prefix and/or
+    suffix has been removed (for 'label', 'word*', ':aux', and
+    '[type]' tokens).  If prefix and/or suffix are removed, the full
+    original token is
+    in its .orig.
+
+    Tokens are:
+     - 'word', 'word*', or 'label':
+         '[.\w]+' followed by optional '*' or ':':
+
+     - 'aux': ':' followed by '\w+' (used for :auto annotation)
+
+     - 'type':
+       open bracket '[', followed by '\w+' or '\d+' (only one of these),
+       followed by close bracket ']'
+
+     - '(', ')', '{', '}': themeselves
+
+    Each token can have arbitrary leading white space (which is
+    discarded).
+
+    (Probably should return ':' as a char and handle it in parser,
+    but oh well.)
+    """
+    def __init__(self, ttype, value, orig=None):
+        self.ttype = ttype
+        self.value = value
+        self.orig = value if orig is None else orig
+        if self.ttype == 'type' and self.value.isdigit():
+            self.ival = int(self.value)
+        else:
+            self.ival = None
+    def __str__(self):
+        return self.orig
+
+_Token.tok_expr = re.compile(r'\s*([.\w]+(?:\*|:)?'
+                             r'|:\w+'
+                             r'|\[(?:\w+|\d+)\]'
+                             r'|[(){}])')
+
+def _scan(string):
+    """
+    Tokenize a string.
+
+    Note: This raises a ValueError with the position of any unmatched
+    character in the string.
+    """
+    tlist = []
+
+    # make sure entire string is tokenized properly
+    pos = 0
+    for item in _Token.tok_expr.finditer(string):
+        span = item.span()
+        if span[0] != pos:
+            print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
+                string, ' ' * pos))
+            raise ValueError('unmatched lexeme', pos)
+        pos = span[1]
+        tlist.append(item.group(1))
+    if pos != len(string):
+        print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
+            string, ' ' * pos))
+        raise ValueError('unmatched lexeme', pos)
+
+    # classify each token, stripping decorations
+    result = []
+    for item in tlist:
+        if item in ('(', ')', '{', '}'):
+            tok = _Token(item, item)
+        elif item[0] == ':':
+            tok = _Token('aux', item[1:], item)
+        elif item.endswith(':'):
+            tok = _Token('label', item[0:-1], item)
+        elif item.endswith('*'):
+            tok = _Token('word*', item[0:-1], item)
+        elif item[0] == '[':
+            # integer or named type
+            if item[-1] != ']':
+                raise ValueError('internal error: "{0}" is not [...]'.format(
+                    item))
+            tok = _Token('type', item[1:-1], item)
+        else:
+            tok = _Token('word', item)
+        result.append(tok)
+    return result
+
+def _debug_print_sequencer(seq):
+    """for debugging"""
+    print('sequencer is {0!r}'.format(seq), file=sys.stderr)
+    for i, enc in enumerate(seq):
+        print(' [{0:d}] = {1}'.format(i, enc), file=sys.stderr)
+
+def _parse_expr(seq, string, typedefs):
+    """
+    Parse "expression-ish" items, which is a list of:
+        name[type]
+        name*(subexpr)    (a literal asterisk)
+        { label ... }
+
+    The "type" may be an integer or a second name.  In the case
+    of a second name it must be something from <typedefs>.
+
+    The meaning of name[integer] is that we are going to encode
+    or decode a fixed-size field of <integer> bytes, using the
+    given name.
+
+    For name[name2], we can look up name2 in our typedefs table.
+    The only real typedefs's used here are "stat" and "s"; each
+    of these expands to a variable-size encode/decode.  See the
+    special case below, though.
+
+    The meaning of name*(...) is: the earlier name will have been
+    defined by an earlier _parse_expr for this same line.  That
+    earlier name provides a repeat-count.
+
+    Inside the parens we get a name[type] sub-expressino.  This may
+    not recurse further, so we can use a pretty cheesy parser.
+
+    As a special case, given name[name2], we first check whether
+    name2 is an earlier name a la name*(...).  Here the meaning
+    is much like name2*(name[1]), except that the result is a
+    simple byte string, rather than an array.
+
+    The meaning of "{ label ... " is that everything following up
+    to "}" is optional and used only with 9P2000.u and/or 9P2000.L.
+    Inside the {...} pair is the usual set of tokens, but again
+    {...} cannot recurse.
+
+    The parse fills in a Sequencer instance, and returns a list
+    of the parsed names.
+    """
+    names = []
+    cond = None
+
+    tokens = collections.deque(_scan(string))
+
+    def get_subscripted(tokens):
+        """
+        Allows name[integer] and name1[name2] only; returns
+        tuple after stripping off both tokens, or returns None
+        and does not strip tokens.
+        """
+        if len(tokens) == 0 or tokens[0].ttype != 'word':
+            return None
+        if len(tokens) > 1 and tokens[1].ttype == 'type':
+            word = tokens.popleft()
+            return word, tokens.popleft()
+        return None
+
+    def lookup(name, typeinfo, aux=None):
+        """
+        Convert cond (if not None) to its .value, so that instead
+        of (x, '.u') we get '.u'.
+
+        Convert typeinfo to an encdec.  Typeinfo may be 1/2/4/8, or
+        one of our typedef names.  If it's a typedef name it will
+        normally correspond to an EncDecTyped, but we have one special
+        case for string types, and another for using an earlier-defined
+        variable.
+        """
+        condval = None if cond is None else cond.value
+        if typeinfo.ival is None:
+            try:
+                cls, sub = typedefs[typeinfo.value]
+            except KeyError:
+                raise ValueError('unknown type name {0}'.format(typeinfo))
+            # the type name is typeinfo.value; the corresponding
+            # pfod class is cls; the *variable* name is name;
+            # and the sub-sequence is sub.  But if cls is None
+            # then it's our string type.
+            if cls is None:
+                encdec = sequencer.EncDecSimple(name, _STRING_MAGIC, aux)
+            else:
+                encdec = sequencer.EncDecTyped(cls, name, sub, aux)
+        else:
+            if typeinfo.ival not in (1, 2, 4, 8):
+                raise ValueError('bad integer code in {0}'.format(typeinfo))
+            encdec = sequencer.EncDecSimple(name, typeinfo.ival, aux)
+        return condval, encdec
+
+    def emit_simple(name, typeinfo, aux=None):
+        """
+        Emit name[type].  We may be inside a conditional; if so
+        cond is not None.
+        """
+        condval, encdec = lookup(name, typeinfo, aux)
+        seq.append_encdec(condval, encdec)
+        names.append(name)
+
+    def emit_repeat(name1, name2, typeinfo):
+        """
+        Emit name1*(name2[type]).
+
+        Note that the conditional is buried in the sub-coder for
+        name2.  It must be passed through anyway in case the sub-
+        coder is only partly conditional.  If the sub-coder is
+        fully conditional, each sub-coding uses or produces no
+        bytes and hence the array itself is effectively conditional
+        as well (it becomes name1 * [None]).
+
+        We don't (currently) have any auxiliary data for arrays.
+        """
+        if name1 not in names:
+            raise ValueError('{0}*({1}[{2}]): '
+                             '{0} undefined'.format(name1, name2,
+                                                    typeinfo.value))
+        condval, encdec = lookup(name2, typeinfo)
+        encdec = sequencer.EncDecA(name1, name2, encdec)
+        seq.append_encdec(condval, encdec)
+        names.append(name2)
+
+    def emit_bytes_repeat(name1, name2):
+        """
+        Emit name1[name2], e.g., data[count].
+        """
+        condval = None if cond is None else cond.value
+        # Note that the two names are reversed when compared to
+        # count*(data[type]).  The "sub-coder" is handled directly
+        # by EncDecA, hence is None.
+        #
+        # As a peculiar side effect, all bytes-repeats cause the
+        # count itself to become automatic (to have an aux of 'len').
+        encdec = sequencer.EncDecA(name2, name1, None, 'len')
+        seq.append_encdec(condval, encdec)
+        names.append(name1)
+
+    supported_conditions = ('.u')
+    while tokens:
+        token = tokens.popleft()
+        if token.ttype == 'label':
+            raise ValueError('misplaced label')
+        if token.ttype == 'aux':
+            raise ValueError('misplaced auxiliary')
+        if token.ttype == '{':
+            if cond is not None:
+                raise ValueError('nested "{"')
+            if len(tokens) == 0:
+                raise ValueError('unclosed "{"')
+            cond = tokens.popleft()
+            if cond.ttype != 'label':
+                raise ValueError('"{" not followed by cond label')
+            if cond.value not in supported_conditions:
+                raise ValueError('unsupported condition "{0}"'.format(
+                    cond.value))
+            continue
+        if token.ttype == '}':
+            if cond is None:
+                raise ValueError('closing "}" w/o opening "{"')
+            cond = None
+            continue
+        if token.ttype == 'word*':
+            if len(tokens) == 0 or tokens[0].ttype != '(':
+                raise ValueError('{0} not followed by (...)'.format(token))
+            tokens.popleft()
+            repeat = get_subscripted(tokens)
+            if repeat is None:
+                raise ValueError('parse error after {0}('.format(token))
+            if len(tokens) == 0 or tokens[0].ttype != ')':
+                raise ValueError('missing ")" after {0}({1}{2}'.format(
+                    token, repeat[0], repeat[1]))
+            tokens.popleft()
+            # N.B.: a repeat cannot have an auxiliary info (yet?).
+            emit_repeat(token.value, repeat[0].value, repeat[1])
+            continue
+        if token.ttype == 'word':
+            # Special case: _STRING_MAGIC turns into a string
+            # sequencer.  This should be used with just one
+            # typedef (typedef s: _string_).
+            if token.value == _STRING_MAGIC:
+                names.append(_STRING_MAGIC) # XXX temporary
+                continue
+            if len(tokens) == 0 or tokens[0].ttype != 'type':
+                raise ValueError('parse error after {0}'.format(token))
+            type_or_size = tokens.popleft()
+            # Check for name[name2] where name2 is a word (not a
+            # number) that is in the names[] array.
+            if type_or_size.value in names:
+                # NB: this cannot have auxiliary info.
+                emit_bytes_repeat(token.value, type_or_size.value)
+                continue
+            if len(tokens) > 0 and tokens[0].ttype == 'aux':
+                aux = tokens.popleft()
+                if aux.value != 'auto':
+                    raise ValueError('{0}{1}: only know "auto", not '
+                                     '{2}'.format(token, type_or_size,
+                                                  aux.value))
+                emit_simple(token.value, type_or_size, aux.value)
+            else:
+                emit_simple(token.value, type_or_size)
+            continue
+        raise ValueError('"{0}" not valid here"'.format(token))
+
+    if cond is not None:
+        raise ValueError('unclosed "}"')
+
+    return names
+
+class _ProtoDefs(object):
+    def __init__(self):
+        # Scan our typedefs. This may execute '#define's as well.
+        self.typedefs = {}
+        self.defines = {}
+        typedef_re = re.compile(r'\s*typedef\s+(\w+)\s*:\s*(.*)')
+        self.parse_lines('SDesc', SDesc, typedef_re, self.handle_typedef)
+        self.parse_lines('QIDDesc', QIDDesc, typedef_re, self.handle_typedef)
+        self.parse_lines('STATDesc', STATDesc, typedef_re, self.handle_typedef)
+        self.parse_lines('WirestatDesc', WirestatDesc, typedef_re,
+                         self.handle_typedef)
+        self.parse_lines('DirentDesc', DirentDesc, typedef_re,
+                         self.handle_typedef)
+
+        # Scan protocol (the bulk of the work).  This, too, may
+        # execute '#define's.
+        self.protocol = {}
+        proto_re = re.compile(r'(\*?\w+)(\.\w+)?\s*(?:=\s*(\d+))?\s*:\s*(.*)')
+        self.prev_proto_value = None
+        self.parse_lines('ProtocolDesc', ProtocolDesc,
+                         proto_re, self.handle_proto_def)
+
+        self.setup_header()
+
+        # set these up for export()
+        self.plain = {}
+        self.dotu = {}
+        self.dotl = {}
+
+    def parse_lines(self, name, text, regexp, match_handler):
+        """
+        Parse a sequence of lines.  Match each line using the
+        given regexp, or (first) as a #define line.  Note that
+        indented lines are either #defines or are commentary!
+
+        If hnadling raises a ValueError, we complain and include
+        the appropriate line offset.  Then we sys.exit(1) (!).
+        """
+        define = re.compile(r'\s*#define\s+(\w+)\s+([^/]*)'
+                            r'(\s*/\*.*\*/)?\s*$')
+        for lineoff, line in enumerate(text.splitlines()):
+            try:
+                match = define.match(line)
+                if match:
+                    self.handle_define(*match.groups())
+                    continue
+                match = regexp.match(line)
+                if match:
+                    match_handler(*match.groups())
+                    continue
+                if len(line) and not line[0].isspace():
+                    raise ValueError('unhandled line: {0}'.format(line))
+            except ValueError as err:
+                print('Internal error while parsing {0}:\n'
+                      '    {1}\n'
+                      '(at line offset +{2}, discounting \\-newline)\n'
+                      'The original line in question reads:\n'
+                      '{3}'.format(name, err.args[0], lineoff, line),
+                      file=sys.stderr)
+                sys.exit(1)
+
+    def handle_define(self, name, value, comment):
+        """
+        Handle #define match.
+
+        The regexp has three fields, matching the name, value,
+        and possibly-empty comment; these are our arguments.
+        """
+        # Obnoxious: int(,0) requires new 0o syntax in py3k;
+        # work around by trying twice, once with base 0, then again
+        # with explicit base 8 if the first attempt fails.
+        try:
+            value = int(value, 0)
+        except ValueError:
+            value = int(value, 8)
+        if DEBUG:
+            print('define: defining {0} as {1:x}'.format(name, value),
+                  file=sys.stderr)
+        if name in self.defines:
+            raise ValueError('redefining {0}'.format(name))
+        self.defines[name] = (value, comment)
+
+    def handle_typedef(self, name, expr):
+        """
+        Handle typedef match.
+
+        The regexp has just two fields, the name and the expression
+        to parse (note that the expression must fit all on one line,
+        using backslach-newline if needed).
+
+        Typedefs may refer back to existing typedefs, so we pass
+        self.typedefs to _parse_expr().
+        """
+        seq = sequencer.Sequencer(name)
+        fields = _parse_expr(seq, expr, self.typedefs)
+        # Check for special string magic typedef.  (The name
+        # probably should be just 's' but we won't check that
+        # here.)
+        if len(fields) == 1 and fields[0] == _STRING_MAGIC:
+            cls = None
+        else:
+            cls = pfod.pfod(name, fields)
+        if DEBUG:
+            print('typedef: {0} = {1!r}; '.format(name, fields),
+                  end='', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        if name in self.typedefs:
+            raise ValueError('redefining {0}'.format(name))
+        self.typedefs[name] = cls, seq
+
+    def handle_proto_def(self, name, proto_version, value, expr):
+        """
+        Handle protocol definition.
+
+        The regexp matched:
+        - The name of the protocol option such as Tversion,
+          Rversion, Rlerror, etc.
+        - The protocol version, if any (.u or .L).
+        - The value, if specified.  If no value is specified
+          we use "the next value".
+        - The expression to parse.
+
+        As with typedefs, the expression must fit all on one
+        line.
+        """
+        if value:
+            value = int(value)
+        elif self.prev_proto_value is not None:
+            value = self.prev_proto_value + 1
+        else:
+            raise ValueError('{0}: missing protocol value'.format(name))
+        if value < 0 or value > 255:
+            raise ValueError('{0}: protocol value {1} out of '
+                             'range'.format(name, value))
+        self.prev_proto_value = value
+
+        seq = sequencer.Sequencer(name)
+        fields = _parse_expr(seq, expr, self.typedefs)
+        cls = pfod.pfod(name, fields)
+        if DEBUG:
+            print('proto: {0} = {1}; '.format(name, value),
+                  end='', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        if name in self.protocol:
+            raise ValueError('redefining {0}'.format(name))
+        self.protocol[name] = cls, value, proto_version, seq
+
+    def setup_header(self):
+        """
+        Handle header definition.
+
+        This is a bit gimmicky and uses some special cases,
+        because data is sized to dsize which is effectively
+        just size - 5.  We can't express this in our mini language,
+        so we just hard-code the sequencer and pfod.
+
+        In addition, the unpacker never gets the original packet's
+        size field, only the fcall and the data.
+        """
+        self.header_pfod = pfod.pfod('Header', 'size dsize fcall data')
+
+        seq = sequencer.Sequencer('Header-pack')
+        # size: 4 bytes
+        seq.append_encdec(None, sequencer.EncDecSimple('size', 4, None))
+        # fcall: 1 byte
+        seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
+        # data: string of length dsize
+        seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
+        if DEBUG:
+            print('Header-pack:', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        self.header_pack_seq = seq
+
+        seq = sequencer.Sequencer('Header-unpack')
+        seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
+        seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
+        if DEBUG:
+            print('Header-unpack:', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        self.header_unpack_seq = seq
+
+    def export(self, mod):
+        """
+        Dump results of internal parsing process
+        into our module namespace.
+
+        Note that we do not export the 's' typedef, which
+        did not define a data structure.
+
+        Check for name collisions while we're at it.
+        """
+        namespace = type('td', (object,), {})
+
+        # Export the typedefs (qid, stat).
+        setattr(mod, 'td', namespace)
+        for key in self.typedefs:
+            cls = self.typedefs[key][0]
+            if cls is None:
+                continue
+            setattr(namespace, key, cls)
+
+        # Export two sequencers for en/decoding stat fields
+        # (needed for reading directories and doing Twstat).
+        setattr(namespace, 'stat_seq', self.typedefs['stat'][1])
+        setattr(namespace, 'wirestat_seq', self.typedefs['wirestat'][1])
+
+        # Export the similar dirent decoder.
+        setattr(namespace, 'dirent_seq', self.typedefs['dirent'][1])
+
+        # Export the #define values
+        for key, val in self.defines.items():
+            if hasattr(namespace, key):
+                print('{0!r} is both a #define and a typedef'.format(key))
+                raise AssertionError('bad internal names')
+            setattr(namespace, key, val[0])
+
+        # Export Tattach, Rattach, Twrite, Rversion, etc values.
+        # Set up fcall_names[] table to map from value back to name.
+        # We also map fcall names to themselves, so given either a
+        # name or a byte code we can find out whether it's a valid
+        # fcall.
+        for key, val in self.protocol.items():
+            if hasattr(namespace, key):
+                prev_def = '#define' if key in self.defines else 'typedef'
+                print('{0!r} is both a {1} and a protocol '
+                      'value'.format(key, prev_def))
+                raise AssertionError('bad internal names')
+            setattr(namespace, key, val[1])
+            fcall_names[key] = key
+            fcall_names[val[1]] = key
+
+        # Hook up PFOD's for each protocol object -- for
+        # Tversion/Rversion, Twrite/Rwrite, Tlopen/Rlopen, etc.
+        # They go in the rrd name-space, and also in dictionaries
+        # per-protocol here, with the lookup pointing to a _PackInfo
+        # for the corresponding sequencer.
+        #
+        # Note that each protocol PFOD is optionally annotated with
+        # its specific version.  We know that .L > .u > plain; but
+        # all the "lesser" PFODs are available to all "greater"
+        # protocols at all times.
+        #
+        # (This is sort-of-wrong for Rerror vs Rlerror, but we
+        # don't bother to exclude Rerror from .L.)
+        #
+        # The PFODs themselves were already created, at parse time.
+        namespace = type('rrd', (object,), {})
+        setattr(mod, 'rrd', namespace)
+        for key, val in self.protocol.items():
+            cls = val[0]
+            proto_version = val[2]
+            seq = val[3]
+            packinfo = _PackInfo(seq)
+            if proto_version is None:
+                # all three protocols have it
+                self.plain[cls] = packinfo
+                self.dotu[cls] = packinfo
+                self.dotl[cls] = packinfo
+            elif proto_version == '.u':
+                # only .u and .L have it
+                self.dotu[cls] = packinfo
+                self.dotl[cls] = packinfo
+            elif proto_version == '.L':
+                # only .L has it
+                self.dotl[cls] = packinfo
+            else:
+                raise AssertionError('unknown protocol {1} for '
+                                     '{0}'.format(key, proto_version))
+            setattr(namespace, key, cls)
+
+_9p_data = _ProtoDefs()
+_9p_data.export(sys.modules[__name__])
+
+# Currently we look up by text-string, in lowercase.
+_9p_versions = {
+    '9p2000': _P9Proto({'version': '9P2000'},
+                       {'.u': False},
+                       _9p_data,
+                       _9p_data.plain,
+                       0),
+    '9p2000.u': _P9Proto({'version': '9P2000.u'},
+                         {'.u': True},
+                         _9p_data,
+                         _9p_data.dotu,
+                         1),
+    '9p2000.l': _P9Proto({'version': '9P2000.L'},
+                         {'.u': True},
+                         _9p_data,
+                         _9p_data.dotl,
+                         2),
+}
+def p9_version(vers_string):
+    """
+    Return protocol implementation of given version.  Raises
+    KeyError if the version is invalid.  Note that the KeyError
+    will be on a string-ified, lower-cased version of the vers_string
+    argument, even if it comes in as a bytes instance in py3k.
+    """
+    if not isinstance(vers_string, str) and isinstance(vers_string, bytes):
+        vers_string = vers_string.decode('utf-8', 'surrogateescape')
+    return _9p_versions[vers_string.lower()]
+
+plain = p9_version('9p2000')
+dotu = p9_version('9p2000.u')
+dotl = p9_version('9p2000.L')
+
+def qid_type2name(qidtype):
+    """
+    Convert qid type field to printable string.
+
+    >>> qid_type2name(td.QTDIR)
+    'dir'
+    >>> qid_type2name(td.QTAPPEND)
+    'append-only'
+    >>> qid_type2name(0xff)
+    'invalid(0xff)'
+    """
+    try:
+        # Is it ever OK to have multiple bits set,
+        # e.g., both QTAPPEND and QTEXCL?
+        return {
+            td.QTDIR: 'dir',
+            td.QTAPPEND: 'append-only',
+            td.QTEXCL: 'exclusive',
+            td.QTMOUNT: 'mount',
+            td.QTAUTH: 'auth',
+            td.QTTMP: 'tmp',
+            td.QTSYMLINK: 'symlink',
+            td.QTFILE: 'file',
+        }[qidtype]
+    except KeyError:
+        pass
+    return 'invalid({0:#x})'.format(qidtype)
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/pytest/sequencer.py b/pytest/sequencer.py
new file mode 100644
index 000000000000..207d22986470
--- /dev/null
+++ b/pytest/sequencer.py
@@ -0,0 +1,653 @@
+#! /usr/bin/env python
+
+from __future__ import print_function
+
+#__all__ = ['EncDec', 'EncDecSimple', 'EncDecTyped', 'EncDecA',
+#    'SequenceError', 'Sequencer']
+
+import abc
+import struct
+import sys
+
+_ProtoStruct = {
+    '1': struct.Struct('<B'),
+    '2': struct.Struct('<H'),
+    '4': struct.Struct('<I'),
+    '8': struct.Struct('<Q'),
+    '_string_': None,   # handled specially
+}
+for _i in (1, 2, 4, 8):
+    _ProtoStruct[_i] = _ProtoStruct[str(_i)]
+del _i
+
+class EncDec(object):
+    __metaclass__ = abc.ABCMeta
+    """
+    Base class for en/de-coders, which are put into sequencers.
+
+    All have a name and arbitrary user-supplied auxiliary data
+    (default=None).
+
+    All provide a pack() and unpack().  The pack() function
+    returns a "bytes" value.  This is internally implemented as a
+    function apack() that returns a list of struct.pack() bytes,
+    and pack() just joins them up as needed.
+
+    The pack/unpack functions take a dictionary of variable names
+    and values, and a second dictionary for conditionals, but at
+    this level conditionals don't apply: they are just being
+    passed through.  Variable names do apply to array encoders
+
+    EncDec also provide b2s() and s2b() static methods, which
+    convert strings to bytes and vice versa, as reversibly as
+    possible (using surrogateescape encoding). In Python2 this is
+    a no-op since the string type *is* the bytes type (<type
+    'unicode'>) is the unicode-ized string type).
+
+    EncDec also provides b2u() and u2b() to do conversion to/from
+    Unicode.
+
+    These are partly for internal use (all strings get converted
+    to UTF-8 byte sequences when coding a _string_ type) and partly
+    for doctests, where we just want some py2k/py3k compat hacks.
+    """
+    def __init__(self, name, aux):
+        self.name = name
+        self.aux = aux
+
+    @staticmethod
+    def b2u(byte_sequence):
+        "transform bytes to unicode"
+        return byte_sequence.decode('utf-8', 'surrogateescape')
+
+    @staticmethod
+    def u2b(unicode_sequence):
+        "transform unicode to bytes"
+        return unicode_sequence.encode('utf-8', 'surrogateescape')
+
+    if sys.version_info[0] >= 3:
+        b2s = b2u
+        @staticmethod
+        def s2b(string):
+            "transform string to bytes (leaves raw byte sequence unchanged)"
+            if isinstance(string, bytes):
+                return string
+            return string.encode('utf-8', 'surrogateescape')
+    else:
+        @staticmethod
+        def b2s(byte_sequence):
+            "transform bytes to string - no-op in python2.7"
+            return byte_sequence
+        @staticmethod
+        def s2b(string):
+            "transform string or unicode to bytes"
+            if isinstance(string, unicode):
+                return string.encode('utf-8', 'surrogateescape')
+            return string
+
+    def pack(self, vdict, cdict, val):
+        "encode value <val> into a byte-string"
+        return b''.join(self.apack(vdict, cdict, val))
+
+    @abc.abstractmethod
+    def apack(self, vdict, cdict, val):
+        "encode value <val> into [bytes1, b2, ..., bN]"
+
+    @abc.abstractmethod
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        "unpack bytes from <bstring> at <offset>"
+
+
+class EncDecSimple(EncDec):
+    r"""
+    Encode/decode a simple (but named) field.  The field is not an
+    array, which requires using EncDecA, nor a typed object
+    like a qid or stat instance -- those require a Sequence and
+    EncDecTyped.
+
+    The format is one of '1'/1, '2'/2, '4'/4, '8'/8, or '_string_'.
+
+    Note: using b2s here is purely a doctest/tetsmod python2/python3
+    compat hack.  The output of e.pack is <type 'bytes'>; b2s
+    converts it to a string, purely for display purposes.  (It might
+    be better to map py2 output to bytes but they just print as a
+    string anyway.)  In normal use, you should not call b2s here.
+
+    >>> e = EncDecSimple('eggs', 2)
+    >>> e.b2s(e.pack({}, {}, 0))
+    '\x00\x00'
+    >>> e.b2s(e.pack({}, {}, 256))
+    '\x00\x01'
+
+    Values that cannot be packed produce a SequenceError:
+
+    >>> e.pack({}, {}, None)
+    Traceback (most recent call last):
+        ...
+    SequenceError: failed while packing 'eggs'=None
+    >>> e.pack({}, {}, -1)
+    Traceback (most recent call last):
+        ...
+    SequenceError: failed while packing 'eggs'=-1
+
+    Unpacking both returns a value, and tells how many bytes it
+    used out of the bytestring or byte-array argument.  If there
+    are not enough bytes remaining at the starting offset, it
+    raises a SequenceError, unless noerror=True (then unset
+    values are None)
+
+    >>> e.unpack({}, {}, b'\x00\x01', 0)
+    (256, 2)
+    >>> e.unpack({}, {}, b'', 0)
+    Traceback (most recent call last):
+        ...
+    SequenceError: out of data while unpacking 'eggs'
+    >>> e.unpack({}, {}, b'', 0, noerror=True)
+    (None, 2)
+
+    Note that strings can be provided as regular strings, byte
+    strings (same as regular strings in py2k), or Unicode strings
+    (same as regular strings in py3k).  Unicode strings will be
+    converted to UTF-8 before being packed.  Since this leaves
+    7-bit characters alone, these examples work in both py2k and
+    py3k.  (Note: the UTF-8 encoding of u'\u1234' is
+    '\0xe1\0x88\0xb4' or 225, 136, 180. The b2i trick below is
+    another py2k vs py3k special case just for doctests: py2k
+    tries to display the utf-8 encoded data as a string.)
+
+    >>> e = EncDecSimple('spam', '_string_')
+    >>> e.b2s(e.pack({}, {}, 'p3=unicode,p2=bytes'))
+    '\x13\x00p3=unicode,p2=bytes'
+
+    >>> e.b2s(e.pack({}, {}, b'bytes'))
+    '\x05\x00bytes'
+
+    >>> import sys
+    >>> ispy3k = sys.version_info[0] >= 3
+
+    >>> b2i = lambda x: x if ispy3k else ord(x)
+    >>> [b2i(x) for x in e.pack({}, {}, u'\u1234')]
+    [3, 0, 225, 136, 180]
+
+    The byte length of the utf-8 data cannot exceed 65535 since
+    the encoding has the length as a 2-byte field (a la the
+    encoding for 'eggs' here).  A too-long string produces
+    a SequenceError as well.
+
+    >>> e.pack({}, {}, 16384 * 'spam')
+    Traceback (most recent call last):
+        ...
+    SequenceError: string too long (len=65536) while packing 'spam'
+
+    Unpacking strings produces byte arrays.  (Of course,
+    in py2k these are also known as <type 'str'>.)
+
+    >>> unpacked = e.unpack({}, {}, b'\x04\x00data', 0)
+    >>> etype = bytes if ispy3k else str
+    >>> print(isinstance(unpacked[0], etype))
+    True
+    >>> e.b2s(unpacked[0])
+    'data'
+    >>> unpacked[1]
+    6
+
+    You may use e.b2s() to conver them to unicode strings in py3k,
+    or you may set e.autob2s.  This still only really does
+    anything in py3k, since py2k strings *are* bytes, so it's
+    really just intended for doctest purposes (see EncDecA):
+
+    >>> e.autob2s = True
+    >>> e.unpack({}, {}, b'\x07\x00stringy', 0)
+    ('stringy', 9)
+    """
+    def __init__(self, name, fmt, aux=None):
+        super(EncDecSimple, self).__init__(name, aux)
+        self.fmt = fmt
+        self.struct = _ProtoStruct[fmt]
+        self.autob2s = False
+
+    def __repr__(self):
+        if self.aux is None:
+            return '{0}({1!r}, {2!r})'.format(self.__class__.__name__,
+                self.name, self.fmt)
+        return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+            self.name, self.fmt, self.aux)
+
+    __str__ = __repr__
+
+    def apack(self, vdict, cdict, val):
+        "encode a value"
+        try:
+            if self.struct:
+                return [self.struct.pack(val)]
+            sval = self.s2b(val)
+            if len(sval) > 65535:
+                raise SequenceError('string too long (len={0:d}) '
+                    'while packing {1!r}'.format(len(sval), self.name))
+            return [EncDecSimple.string_len.pack(len(sval)), sval]
+        # Include AttributeError in case someone tries to, e.g.,
+        # pack name=None and self.s2b() tries to use .encode on it.
+        except (struct.error, AttributeError):
+            raise SequenceError('failed '
+                'while packing {0!r}={1!r}'.format(self.name, val))
+
+    def _unpack1(self, via, bstring, offset, noerror):
+        "internal function to unpack single item"
+        try:
+            tup = via.unpack_from(bstring, offset)
+        except struct.error as err:
+            if 'unpack_from requires a buffer of at least' in str(err):
+                if noerror:
+                    return None, offset + via.size
+                raise SequenceError('out of data '
+                    'while unpacking {0!r}'.format(self.name))
+            # not clear what to do here if noerror
+            raise SequenceError('failed '
+                'while unpacking {0!r}'.format(self.name))
+        assert len(tup) == 1
+        return tup[0], offset + via.size
+
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        "decode a value; return the value and the new offset"
+        if self.struct:
+            return self._unpack1(self.struct, bstring, offset, noerror)
+        slen, offset = self._unpack1(EncDecSimple.string_len, bstring, offset,
+            noerror)
+        if slen is None:
+            return None, offset
+        nexto = offset + slen
+        if len(bstring) < nexto:
+            if noerror:
+                val = None
+            else:
+                raise SequenceError('out of data '
+                    'while unpacking {0!r}'.format(self.name))
+        else:
+            val = bstring[offset:nexto]
+            if self.autob2s:
+                val = self.b2s(val)
+        return val, nexto
+
+# string length: 2 byte unsigned field
+EncDecSimple.string_len = _ProtoStruct[2]
+
+class EncDecTyped(EncDec):
+    r"""
+    EncDec for typed objects (which are build from PFODs, which are
+    a sneaky class variant of OrderedDict similar to namedtuple).
+
+    Calling the klass() function with no arguments must create an
+    instance with all-None members.
+
+    We also require a Sequencer to pack and unpack the members of
+    the underlying pfod.
+
+    >>> qid_s = Sequencer('qid')
+    >>> qid_s.append_encdec(None, EncDecSimple('type', 1))
+    >>> qid_s.append_encdec(None, EncDecSimple('version', 4))
+    >>> qid_s.append_encdec(None, EncDecSimple('path', 8))
+    >>> len(qid_s)
+    3
+
+    >>> from pfod import pfod
+    >>> qid = pfod('qid', ['type', 'version', 'path'])
+    >>> len(qid._fields)
+    3
+    >>> qid_inst = qid(1, 2, 3)
+    >>> qid_inst
+    qid(type=1, version=2, path=3)
+
+    >>> e = EncDecTyped(qid, 'aqid', qid_s)
+    >>> e.b2s(e.pack({}, {}, qid_inst))
+    '\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'
+    >>> e.unpack({}, {},
+    ... b'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00', 0)
+    (qid(type=1, version=2, path=3), 13)
+
+    If an EncDecTyped instance has a conditional sequencer, note
+    that unpacking will leave un-selected items set to None (see
+    the Sequencer example below):
+
+    >>> breakfast = pfod('breakfast', 'eggs spam ham')
+    >>> breakfast()
+    breakfast(eggs=None, spam=None, ham=None)
+    >>> bfseq = Sequencer('breakfast')
+    >>> bfseq.append_encdec(None, EncDecSimple('eggs', 1))
+    >>> bfseq.append_encdec('yuck', EncDecSimple('spam', 1))
+    >>> bfseq.append_encdec(None, EncDecSimple('ham', 1))
+    >>> e = EncDecTyped(breakfast, 'bfname', bfseq)
+    >>> e.unpack({}, {'yuck': False}, b'\x02\x01\x04', 0)
+    (breakfast(eggs=2, spam=None, ham=1), 2)
+
+    This used just two of the three bytes: eggs=2, ham=1.
+
+    >>> e.unpack({}, {'yuck': True}, b'\x02\x01\x04', 0)
+    (breakfast(eggs=2, spam=1, ham=4), 3)
+
+    This used the third byte, so ham=4.
+    """
+    def __init__(self, klass, name, sequence, aux=None):
+        assert len(sequence) == len(klass()._fields) # temporary
+        super(EncDecTyped, self).__init__(name, aux)
+        self.klass = klass
+        self.name = name
+        self.sequence = sequence
+
+    def __repr__(self):
+        if self.aux is None:
+            return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+                self.klass, self.name, self.sequence)
+        return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
+            self.klass, self.name, self.sequence, self.aux)
+
+    __str__ = __repr__
+
+    def apack(self, vdict, cdict, val):
+        """
+        Pack each of our instance variables.
+
+        Note that some packing may be conditional.
+        """
+        return self.sequence.apack(val, cdict)
+
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        """
+        Unpack each instance variable, into a new object of
+        self.klass.  Return the new instance and new offset.
+
+        Note that some unpacking may be conditional.
+        """
+        obj = self.klass()
+        offset = self.sequence.unpack_from(obj, cdict, bstring, offset, noerror)
+        return obj, offset
+
+class EncDecA(EncDec):
+    r"""
+    EncDec for arrays (repeated objects).
+
+    We take the name of repeat count variable, and a sub-coder
+    (Sequencer instance).  For instance, we can en/de-code
+    repeat='nwname' copies of name='wname', or nwname of
+    name='wqid', in a Twalk en/de-code.
+
+    Note that we don't pack or unpack the repeat count itself --
+    that must be done by higher level code.  We just get its value
+    from vdict.
+
+    >>> subcode = EncDecSimple('wname', '_string_')
+    >>> e = EncDecA('nwname', 'wname', subcode)
+    >>> e.b2s(e.pack({'nwname': 2}, {}, ['A', 'BC']))
+    '\x01\x00A\x02\x00BC'
+
+    >>> subcode.autob2s = True # so that A and BC decode to py3k str
+    >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00BC', 0)
+    (['A', 'BC'], 7)
+
+    When using noerror, the first sub-item that fails to decode
+    completely starts the None-s.  Strings whose length fails to
+    decode are assumed to be zero bytes long as well, for the
+    purpose of showing the expected packet length:
+
+    >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00', 0, noerror=True)
+    (['A', None], 7)
+    >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02', 0, noerror=True)
+    (['A', None], 5)
+    >>> e.unpack({'nwname': 3}, {}, b'\x01\x00A\x02', 0, noerror=True)
+    (['A', None, None], 7)
+
+    As a special case, supplying None for the sub-coder
+    makes the repeated item pack or unpack a simple byte
+    string.  (Note that autob2s is not supported here.)
+    A too-short byte string is simply truncated!
+
+    >>> e = EncDecA('count', 'data', None)
+    >>> e.b2s(e.pack({'count': 5}, {}, b'12345'))
+    '12345'
+    >>> x = list(e.unpack({'count': 3}, {}, b'123', 0))
+    >>> x[0] = e.b2s(x[0])
+    >>> x
+    ['123', 3]
+    >>> x = list(e.unpack({'count': 3}, {}, b'12', 0, noerror=True))
+    >>> x[0] = e.b2s(x[0])
+    >>> x
+    ['12', 3]
+    """
+    def __init__(self, repeat, name, sub, aux=None):
+        super(EncDecA, self).__init__(name, aux)
+        self.repeat = repeat
+        self.name = name
+        self.sub = sub
+
+    def __repr__(self):
+        if self.aux is None:
+            return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+                self.repeat, self.name, self.sub)
+        return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
+            self.repeat, self.name, self.sub, self.aux)
+
+    __str__ = __repr__
+
+    def apack(self, vdict, cdict, val):
+        "pack each val[i], for i in range(vdict[self.repeat])"
+        num = vdict[self.repeat]
+        assert num == len(val)
+        if self.sub is None:
+            assert isinstance(val, bytes)
+            return [val]
+        parts = []
+        for i in val:
+            parts.extend(self.sub.apack(vdict, cdict, i))
+        return parts
+
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        "unpack repeatedly, per self.repeat, into new array."
+        num = vdict[self.repeat]
+        if num is None and noerror:
+            num = 0
+        else:
+            assert num >= 0
+        if self.sub is None:
+            nexto = offset + num
+            if len(bstring) < nexto and not noerror:
+                raise SequenceError('out of data '
+                    'while unpacking {0!r}'.format(self.name))
+            return bstring[offset:nexto], nexto
+        array = []
+        for i in range(num):
+            obj, offset = self.sub.unpack(vdict, cdict, bstring, offset,
+                noerror)
+            array.append(obj)
+        return array, offset
+
+class SequenceError(Exception):
+    "sequence error: item too big, or ran out of data"
+    pass
+
+class Sequencer(object):
+    r"""
+    A sequencer is an object that packs (marshals) or unpacks
+    (unmarshals) a series of objects, according to their EncDec
+    instances.
+
+    The objects themselves (and their values) come from, or
+    go into, a dictionary: <vdict>, the first argument to
+    pack/unpack.
+
+    Some fields may be conditional.  The conditions are in a
+    separate dictionary (the second or <cdict> argument).
+
+    Some objects may be dictionaries or PFODs, e.g., they may
+    be a Plan9 qid or stat structure.  These have their own
+    sub-encoding.
+
+    As with each encoder, we have both an apack() function
+    (returns a list of parts) and a plain pack().  Users should
+    mostly stick with plain pack().
+
+    >>> s = Sequencer('monty')
+    >>> s
+    Sequencer('monty')
+    >>> e = EncDecSimple('eggs', 2)
+    >>> s.append_encdec(None, e)
+    >>> s.append_encdec(None, EncDecSimple('spam', 1))
+    >>> s[0]
+    (None, EncDecSimple('eggs', 2))
+    >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {}))
+    '\x01\x02A'
+
+    When particular fields are conditional, they appear in
+    packed output, or are taken from the byte-string during
+    unpacking, only if their condition is true.
+
+    As with struct, use unpack_from to start at an arbitrary
+    offset and/or omit verification that the entire byte-string
+    is consumed.
+
+    >>> s = Sequencer('python')
+    >>> s.append_encdec(None, e)
+    >>> s.append_encdec('.u', EncDecSimple('spam', 1))
+    >>> s[1]
+    ('.u', EncDecSimple('spam', 1))
+    >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': True}))
+    '\x01\x02A'
+    >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': False}))
+    '\x01\x02'
+
+    >>> d = {}
+    >>> s.unpack(d, {'.u': True}, b'\x01\x02A')
+    >>> print(d['eggs'], d['spam'])
+    513 65
+    >>> d = {}
+    >>> s.unpack(d, {'.u': False}, b'\x01\x02A', 0)
+    Traceback (most recent call last):
+        ...
+    SequenceError: 1 byte(s) unconsumed
+    >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
+    2
+    >>> print(d)
+    {'eggs': 513}
+
+    The incoming dictionary-like object may be pre-initialized
+    if you like; only sequences that decode are filled-in:
+
+    >>> d = {'eggs': None, 'spam': None}
+    >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
+    2
+    >>> print(d['eggs'], d['spam'])
+    513 None
+
+    Some objects may be arrays; if so their EncDec is actually
+    an EncDecA, the repeat count must be in the dictionary, and
+    the object itself must have a len() and be index-able:
+
+    >>> s = Sequencer('arr')
+    >>> s.append_encdec(None, EncDecSimple('n', 1))
+    >>> ae = EncDecSimple('array', 2)
+    >>> s.append_encdec(None, EncDecA('n', 'array', ae))
+    >>> ae.b2s(s.pack({'n': 2, 'array': [257, 514]}, {}))
+    '\x02\x01\x01\x02\x02'
+
+    Unpacking an array creates a list of the number of items.
+    The EncDec encoder that decodes the number of items needs to
+    occur first in the sequencer, so that the dictionary will have
+    acquired the repeat-count variable's value by the time we hit
+    the array's encdec:
+
+    >>> d = {}
+    >>> s.unpack(d, {}, b'\x01\x04\x00')
+    >>> d['n'], d['array']
+    (1, [4])
+    """
+    def __init__(self, name):
+        self.name = name
+        self._codes = []
+        self.debug = False # or sys.stderr
+
+    def __repr__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.name)
+
+    __str__ = __repr__
+
+    def __len__(self):
+        return len(self._codes)
+
+    def __iter__(self):
+        return iter(self._codes)
+
+    def __getitem__(self, index):
+        return self._codes[index]
+
+    def dprint(self, *args, **kwargs):
+        if not self.debug:
+            return
+        if isinstance(self.debug, bool):
+            dest = sys.stdout
+        else:
+            dest = self.debug
+        print(*args, file=dest, **kwargs)
+
+    def append_encdec(self, cond, code):
+        "add EncDec en/de-coder, conditional on cond"
+        self._codes.append((cond, code))
+
+    def apack(self, vdict, cdict):
+        """
+        Produce packed representation of each field.
+        """
+        packed_data = []
+        for cond, code in self._codes:
+            # Skip this item if it's conditional on a false thing.
+            if cond is not None and not cdict[cond]:
+                self.dprint('skip %r - %r is False' % (code, cond))
+                continue
+
+            # Pack the item.
+            self.dprint('pack %r - no cond or %r is True' % (code, cond))
+            packed_data.extend(code.apack(vdict, cdict, vdict[code.name]))
+
+        return packed_data
+
+    def pack(self, vdict, cdict):
+        """
+        Flatten packed data.
+        """
+        return b''.join(self.apack(vdict, cdict))
+
+    def unpack_from(self, vdict, cdict, bstring, offset=0, noerror=False):
+        """
+        Unpack from byte string.
+
+        The values are unpacked into a dictionary vdict;
+        some of its entries may themselves be ordered
+        dictionaries created by typedefed codes.
+
+        Raises SequenceError if the string is too short,
+        unless you set noerror, in which case we assume
+        you want see what you can get out of the data.
+        """
+        for cond, code in self._codes:
+            # Skip this item if it's conditional on a false thing.
+            if cond is not None and not cdict[cond]:
+                self.dprint('skip %r - %r is False' % (code, cond))
+                continue
+
+            # Unpack the item.
+            self.dprint('unpack %r - no cond or %r is True' % (code, cond))
+            obj, offset = code.unpack(vdict, cdict, bstring, offset, noerror)
+            vdict[code.name] = obj
+
+        return offset
+
+    def unpack(self, vdict, cdict, bstring, noerror=False):
+        """
+        Like unpack_from but unless noerror=True, requires that
+        we completely use up the given byte string.
+        """
+        offset = self.unpack_from(vdict, cdict, bstring, 0, noerror)
+        if not noerror and offset != len(bstring):
+            raise SequenceError('{0} byte(s) unconsumed'.format(
+                len(bstring) - offset))
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/pytest/testconf.ini.sample b/pytest/testconf.ini.sample
new file mode 100644
index 000000000000..a9e252ac7419
--- /dev/null
+++ b/pytest/testconf.ini.sample
@@ -0,0 +1,16 @@
+# test configuration
+
+[client]
+server = localhost
+port = 12345
+# timeout is in seconds
+timeout = 0.1
+loglevel = INFO
+logfile = ./ctest.log
+# logfmt = ...
+# protocol = 9p2000, 9p2000.u, or 9p2000.L
+protocol = 9p2000.L
+only_dotl = true
+may_downgrade = False
+uname = anonymous
+n_uname = 1001
diff --git a/request.c b/request.c
new file mode 100644
index 000000000000..357bd23bf98a
--- /dev/null
+++ b/request.c
@@ -0,0 +1,1440 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fcall.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "linux_errno.h"
+#include "backend/backend.h"
+#include "threadpool.h"
+
+#define N(x)    (sizeof(x) / sizeof(x[0]))
+
+static int l9p_dispatch_tversion(struct l9p_request *req);
+static int l9p_dispatch_tattach(struct l9p_request *req);
+static int l9p_dispatch_tclunk(struct l9p_request *req);
+static int l9p_dispatch_tcreate(struct l9p_request *req);
+static int l9p_dispatch_topen(struct l9p_request *req);
+static int l9p_dispatch_tread(struct l9p_request *req);
+static int l9p_dispatch_tremove(struct l9p_request *req);
+static int l9p_dispatch_tstat(struct l9p_request *req);
+static int l9p_dispatch_twalk(struct l9p_request *req);
+static int l9p_dispatch_twrite(struct l9p_request *req);
+static int l9p_dispatch_twstat(struct l9p_request *req);
+static int l9p_dispatch_tstatfs(struct l9p_request *req);
+static int l9p_dispatch_tlopen(struct l9p_request *req);
+static int l9p_dispatch_tlcreate(struct l9p_request *req);
+static int l9p_dispatch_tsymlink(struct l9p_request *req);
+static int l9p_dispatch_tmknod(struct l9p_request *req);
+static int l9p_dispatch_trename(struct l9p_request *req);
+static int l9p_dispatch_treadlink(struct l9p_request *req);
+static int l9p_dispatch_tgetattr(struct l9p_request *req);
+static int l9p_dispatch_tsetattr(struct l9p_request *req);
+static int l9p_dispatch_txattrwalk(struct l9p_request *req);
+static int l9p_dispatch_txattrcreate(struct l9p_request *req);
+static int l9p_dispatch_treaddir(struct l9p_request *req);
+static int l9p_dispatch_tfsync(struct l9p_request *req);
+static int l9p_dispatch_tlock(struct l9p_request *req);
+static int l9p_dispatch_tgetlock(struct l9p_request *req);
+static int l9p_dispatch_tlink(struct l9p_request *req);
+static int l9p_dispatch_tmkdir(struct l9p_request *req);
+static int l9p_dispatch_trenameat(struct l9p_request *req);
+static int l9p_dispatch_tunlinkat(struct l9p_request *req);
+
+/*
+ * Each Txxx handler has a "must run" flag.  If it is false,
+ * we check for a flush request before calling the handler.
+ * If a flush is already requested we can instantly fail the
+ * request with EINTR.
+ *
+ * Tclunk and Tremove must run because they make their fids
+ * become invalid.  Tversion and Tattach should never get
+ * a flush request applied (it makes no sense as the connection
+ * is not really running yet), so it should be harmless to
+ * set them either way, but for now we have them as must-run.
+ * Flushing a Tflush is not really allowed either so we keep
+ * these as must-run too (although they run without being done
+ * threaded anyway).
+ */
+struct l9p_handler {
+	enum l9p_ftype type;
+	int (*handler)(struct l9p_request *);
+	bool must_run;
+};
+
+static const struct l9p_handler l9p_handlers_no_version[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+};
+
+static const struct l9p_handler l9p_handlers_base[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotu[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotL[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false},
+	{L9P_TSTATFS, l9p_dispatch_tstatfs, false},
+	{L9P_TLOPEN, l9p_dispatch_tlopen, false},
+	{L9P_TLCREATE, l9p_dispatch_tlcreate, false},
+	{L9P_TSYMLINK, l9p_dispatch_tsymlink, false},
+	{L9P_TMKNOD, l9p_dispatch_tmknod, false},
+	{L9P_TRENAME, l9p_dispatch_trename, false},
+	{L9P_TREADLINK, l9p_dispatch_treadlink, false},
+	{L9P_TGETATTR, l9p_dispatch_tgetattr, false},
+	{L9P_TSETATTR, l9p_dispatch_tsetattr, false},
+	{L9P_TXATTRWALK, l9p_dispatch_txattrwalk, false},
+	{L9P_TXATTRCREATE, l9p_dispatch_txattrcreate, false},
+	{L9P_TREADDIR, l9p_dispatch_treaddir, false},
+	{L9P_TFSYNC, l9p_dispatch_tfsync, false},
+	{L9P_TLOCK, l9p_dispatch_tlock, true},
+	{L9P_TGETLOCK, l9p_dispatch_tgetlock, true},
+	{L9P_TLINK, l9p_dispatch_tlink, false},
+	{L9P_TMKDIR, l9p_dispatch_tmkdir, false},
+	{L9P_TRENAMEAT, l9p_dispatch_trenameat, false},
+	{L9P_TUNLINKAT, l9p_dispatch_tunlinkat, false},
+};
+
+/*
+ * NB: version index 0 is reserved for new connections, and
+ * is a protocol that handles only L9P_TVERSION.  Once we get a
+ * valid version, we start a new session using its dispatch table.
+ */
+static const struct {
+	const char *name;
+	const struct l9p_handler *handlers;
+	int n_handlers;
+} l9p_versions[] = {
+	{ "<none>", l9p_handlers_no_version, N(l9p_handlers_no_version) },
+	{ "9P2000", l9p_handlers_base, N(l9p_handlers_base) },
+	{ "9P2000.u", l9p_handlers_dotu, N(l9p_handlers_dotu), },
+	{ "9P2000.L", l9p_handlers_dotL, N(l9p_handlers_dotL), },
+};
+
+/*
+ * Run the appropriate handler for this request.
+ * It's our caller's responsibility to respond.
+ */
+int
+l9p_dispatch_request(struct l9p_request *req)
+{
+	struct l9p_connection *conn;
+#if defined(L9P_DEBUG)
+	struct sbuf *sb;
+#endif
+	size_t i, n;
+	const struct l9p_handler *handlers, *hp;
+	bool flush_requested;
+
+	conn = req->lr_conn;
+	flush_requested = req->lr_flushstate == L9P_FLUSH_REQUESTED_PRE_START;
+
+	handlers = l9p_versions[conn->lc_version].handlers;
+	n = (size_t)l9p_versions[conn->lc_version].n_handlers;
+	for (hp = handlers, i = 0; i < n; hp++, i++)
+		if (req->lr_req.hdr.type == hp->type)
+			goto found;
+	hp = NULL;
+found:
+
+#if defined(L9P_DEBUG)
+	sb = sbuf_new_auto();
+	if (flush_requested) {
+		sbuf_cat(sb, "FLUSH requested pre-dispatch");
+		if (hp != NULL && hp->must_run)
+			sbuf_cat(sb, ", but must run");
+		sbuf_cat(sb, ": ");
+	}
+	l9p_describe_fcall(&req->lr_req, conn->lc_version, sb);
+	sbuf_finish(sb);
+
+	L9P_LOG(L9P_DEBUG, "%s", sbuf_data(sb));
+	sbuf_delete(sb);
+#endif
+
+	if (hp != NULL) {
+		if (!flush_requested || hp->must_run)
+			return (hp->handler(req));
+		return (EINTR);
+	}
+
+	L9P_LOG(L9P_WARNING, "unknown request of type %d",
+	    req->lr_req.hdr.type);
+	return (ENOSYS);
+}
+
+/*
+ * Translate BSD errno to 9P2000/9P2000.u errno.
+ */
+static inline int
+e29p(int errnum)
+{
+	static int const table[] = {
+		[ENOTEMPTY] = EPERM,
+		[EDQUOT] = EPERM,
+		[ENOSYS] = EPERM,	/* ??? */
+	};
+
+	if ((size_t)errnum < N(table) && table[errnum] != 0)
+		return (table[errnum]);
+	if (errnum <= ERANGE)
+		return (errnum);
+	return (EIO);			/* ??? */
+}
+
+/*
+ * Translate BSD errno to Linux errno.
+ */
+static inline int
+e2linux(int errnum)
+{
+	static int const table[] = {
+		[EDEADLK] = LINUX_EDEADLK,
+		[EAGAIN] = LINUX_EAGAIN,
+		[EINPROGRESS] = LINUX_EINPROGRESS,
+		[EALREADY] = LINUX_EALREADY,
+		[ENOTSOCK] = LINUX_ENOTSOCK,
+		[EDESTADDRREQ] = LINUX_EDESTADDRREQ,
+		[EMSGSIZE] = LINUX_EMSGSIZE,
+		[EPROTOTYPE] = LINUX_EPROTOTYPE,
+		[ENOPROTOOPT] = LINUX_ENOPROTOOPT,
+		[EPROTONOSUPPORT] = LINUX_EPROTONOSUPPORT,
+		[ESOCKTNOSUPPORT] = LINUX_ESOCKTNOSUPPORT,
+		[EOPNOTSUPP] = LINUX_EOPNOTSUPP,
+		[EPFNOSUPPORT] = LINUX_EPFNOSUPPORT,
+		[EAFNOSUPPORT] = LINUX_EAFNOSUPPORT,
+		[EADDRINUSE] = LINUX_EADDRINUSE,
+		[EADDRNOTAVAIL] = LINUX_EADDRNOTAVAIL,
+		[ENETDOWN] = LINUX_ENETDOWN,
+		[ENETUNREACH] = LINUX_ENETUNREACH,
+		[ENETRESET] = LINUX_ENETRESET,
+		[ECONNABORTED] = LINUX_ECONNABORTED,
+		[ECONNRESET] = LINUX_ECONNRESET,
+		[ENOBUFS] = LINUX_ENOBUFS,
+		[EISCONN] = LINUX_EISCONN,
+		[ENOTCONN] = LINUX_ENOTCONN,
+		[ESHUTDOWN] = LINUX_ESHUTDOWN,
+		[ETOOMANYREFS] = LINUX_ETOOMANYREFS,
+		[ETIMEDOUT] = LINUX_ETIMEDOUT,
+		[ECONNREFUSED] = LINUX_ECONNREFUSED,
+		[ELOOP] = LINUX_ELOOP,
+		[ENAMETOOLONG] = LINUX_ENAMETOOLONG,
+		[EHOSTDOWN] = LINUX_EHOSTDOWN,
+		[EHOSTUNREACH] = LINUX_EHOSTUNREACH,
+		[ENOTEMPTY] = LINUX_ENOTEMPTY,
+		[EPROCLIM] = LINUX_EAGAIN,
+		[EUSERS] = LINUX_EUSERS,
+		[EDQUOT] = LINUX_EDQUOT,
+		[ESTALE] = LINUX_ESTALE,
+		[EREMOTE] = LINUX_EREMOTE,
+		/* EBADRPC = unmappable? */
+		/* ERPCMISMATCH = unmappable? */
+		/* EPROGUNAVAIL = unmappable? */
+		/* EPROGMISMATCH = unmappable? */
+		/* EPROCUNAVAIL = unmappable? */
+		[ENOLCK] = LINUX_ENOLCK,
+		[ENOSYS] = LINUX_ENOSYS,
+		/* EFTYPE = unmappable? */
+		/* EAUTH = unmappable? */
+		/* ENEEDAUTH = unmappable? */
+		[EIDRM] = LINUX_EIDRM,
+		[ENOMSG] = LINUX_ENOMSG,
+		[EOVERFLOW] = LINUX_EOVERFLOW,
+		[ECANCELED] = LINUX_ECANCELED,
+		[EILSEQ] = LINUX_EILSEQ,
+		/* EDOOFUS = unmappable? */
+		[EBADMSG] = LINUX_EBADMSG,
+		[EMULTIHOP] = LINUX_EMULTIHOP,
+		[ENOLINK] = LINUX_ENOLINK,
+		[EPROTO] = LINUX_EPROTO,
+		/* ENOTCAPABLE = unmappable? */
+#ifdef ECAPMODE
+		[ECAPMODE] = EPERM,
+#endif
+#ifdef ENOTRECOVERABLE
+		[ENOTRECOVERABLE] = LINUX_ENOTRECOVERABLE,
+#endif
+#ifdef EOWNERDEAD
+		[EOWNERDEAD] = LINUX_EOWNERDEAD,
+#endif
+	};
+
+	/*
+	 * In case we want to return a raw Linux errno, allow negative
+	 * values a la Linux kernel internals.
+	 *
+	 * Values up to ERANGE are shared across systems (see
+	 * linux_errno.h), except for EAGAIN.
+	 */
+	if (errnum < 0)
+		return (-errnum);
+
+	if ((size_t)errnum < N(table) && table[errnum] != 0)
+		return (table[errnum]);
+
+	if (errnum <= ERANGE)
+		return (errnum);
+
+	L9P_LOG(L9P_WARNING, "cannot map errno %d to anything reasonable",
+	    errnum);
+
+	return (LINUX_ENOTRECOVERABLE);	/* ??? */
+}
+
+/*
+ * Send response to request, or possibly just drop request.
+ * We also need to know whether to remove the request from
+ * the tag hash table.
+ */
+void
+l9p_respond(struct l9p_request *req, bool drop, bool rmtag)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	size_t iosize;
+#if defined(L9P_DEBUG)
+	struct sbuf *sb;
+	const char *ftype;
+#endif
+	int error;
+
+	req->lr_resp.hdr.tag = req->lr_req.hdr.tag;
+
+	error = req->lr_error;
+	if (error == 0)
+		req->lr_resp.hdr.type = req->lr_req.hdr.type + 1;
+	else {
+		if (conn->lc_version == L9P_2000L) {
+			req->lr_resp.hdr.type = L9P_RLERROR;
+			req->lr_resp.error.errnum = (uint32_t)e2linux(error);
+		} else {
+			req->lr_resp.hdr.type = L9P_RERROR;
+			req->lr_resp.error.ename = strerror(error);
+			req->lr_resp.error.errnum = (uint32_t)e29p(error);
+		}
+	}
+
+#if defined(L9P_DEBUG)
+	sb = sbuf_new_auto();
+	l9p_describe_fcall(&req->lr_resp, conn->lc_version, sb);
+	sbuf_finish(sb);
+
+	switch (req->lr_flushstate) {
+	case L9P_FLUSH_NONE:
+		ftype = "";
+		break;
+	case L9P_FLUSH_REQUESTED_PRE_START:
+		ftype = "FLUSH requested pre-dispatch: ";
+		break;
+	case L9P_FLUSH_REQUESTED_POST_START:
+		ftype = "FLUSH requested while running: ";
+		break;
+	case L9P_FLUSH_TOOLATE:
+		ftype = "FLUSH requested too late: ";
+		break;
+	}
+	L9P_LOG(L9P_DEBUG, "%s%s%s",
+	    drop ? "DROP: " : "", ftype, sbuf_data(sb));
+	sbuf_delete(sb);
+#endif
+
+	error = drop ? 0 :
+	    l9p_pufcall(&req->lr_resp_msg, &req->lr_resp, conn->lc_version);
+	if (rmtag)
+		ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+	if (error != 0) {
+		L9P_LOG(L9P_ERROR, "cannot pack response");
+		drop = true;
+	}
+
+	if (drop) {
+		conn->lc_lt.lt_drop_response(req,
+		    req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+		    conn->lc_lt.lt_aux);
+	} else {
+		iosize = req->lr_resp_msg.lm_size;
+
+		/*
+		 * Include I/O size in calculation for Rread and
+		 * Rreaddir responses.
+		 */
+		if (req->lr_resp.hdr.type == L9P_RREAD ||
+		    req->lr_resp.hdr.type == L9P_RREADDIR)
+			iosize += req->lr_resp.io.count;
+
+		conn->lc_lt.lt_send_response(req,
+		    req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+		    iosize, conn->lc_lt.lt_aux);
+	}
+
+	l9p_freefcall(&req->lr_req);
+	l9p_freefcall(&req->lr_resp);
+
+	free(req);
+}
+
+/*
+ * This allows a caller to iterate through the data in a
+ * read or write request (creating the data if packing,
+ * scanning through it if unpacking).  This is used for
+ * writing readdir entries, so mode should be L9P_PACK
+ * (but we allow L9P_UNPACK so that debug code can also scan
+ * through the data later, if desired).
+ *
+ * This relies on the Tread op having positioned the request's
+ * iov to the beginning of the data buffer (note the l9p_seek_iov
+ * in l9p_dispatch_tread).
+ */
+void
+l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+    enum l9p_pack_mode mode)
+{
+
+	msg->lm_size = 0;
+	msg->lm_mode = mode;
+	msg->lm_cursor_iov = 0;
+	msg->lm_cursor_offset = 0;
+	msg->lm_niov = req->lr_data_niov;
+	memcpy(msg->lm_iov, req->lr_data_iov,
+	    sizeof (struct iovec) * req->lr_data_niov);
+}
+
+enum fid_lookup_flags {
+	F_REQUIRE_OPEN = 0x01,	/* require that the file be marked OPEN */
+	F_REQUIRE_DIR = 0x02,	/* require that the file be marked ISDIR */
+	F_REQUIRE_XATTR = 0x04,	/* require that the file be marked XATTR */
+	F_REQUIRE_AUTH = 0x08,	/* require that the fid be marked AUTH */
+	F_FORBID_OPEN = 0x10,	/* forbid that the file be marked OPEN */
+	F_FORBID_DIR = 0x20,	/* forbid that the file be marked ISDIR */
+	F_FORBID_XATTR = 0x40,	/* forbid that the file be marked XATTR */
+	F_ALLOW_AUTH = 0x80,	/* allow that the fid be marked AUTH */
+};
+
+/*
+ * Look up a fid.  It must correspond to a valid file, else we return
+ * the given errno (some "not a valid fid" calls must return EIO and
+ * some must return EINVAL and qemu returns ENOENT in other cases and
+ * so on, so we just provide a general "return this error number").
+ *
+ * Callers may also set constraints: fid must be (or not be) open,
+ * must be (or not be) a directory, must be (or not be) an xattr.
+ *
+ * Only one op has a fid that *must* be an auth fid.  Most ops forbid
+ * auth fids  So instead of FORBID we have ALLOW here and the default
+ * is FORBID.
+ */
+static inline int
+fid_lookup(struct l9p_connection *conn, uint32_t fid, int err, int flags,
+    struct l9p_fid **afile)
+{
+	struct l9p_fid *file;
+
+	file = ht_find(&conn->lc_files, fid);
+	if (file == NULL)
+		return (err);
+
+	/*
+	 * As soon as we go multithreaded / async, this
+	 * assert has to become "return EINVAL" or "return err".
+	 *
+	 * We may also need a way to mark a fid as
+	 * "in async op" (valid for some purposes, but cannot be
+	 * used elsewhere until async op is completed or aborted).
+	 *
+	 * For now, this serves for bug-detecting.
+	 */
+	assert(l9p_fid_isvalid(file));
+
+	/*
+	 * Note that we're inline expanded and flags is constant,
+	 * so unnecessary tests just drop out entirely.
+	 */
+	if ((flags & F_REQUIRE_OPEN) && !l9p_fid_isopen(file))
+		return (EINVAL);
+	if ((flags & F_FORBID_OPEN) && l9p_fid_isopen(file))
+		return (EINVAL);
+	if ((flags & F_REQUIRE_DIR) && !l9p_fid_isdir(file))
+		return (ENOTDIR);
+	if ((flags & F_FORBID_DIR) && l9p_fid_isdir(file))
+		return (EISDIR);
+	if ((flags & F_REQUIRE_XATTR) && !l9p_fid_isxattr(file))
+		return (EINVAL);
+	if ((flags & F_FORBID_XATTR) && l9p_fid_isxattr(file))
+		return (EINVAL);
+	if (l9p_fid_isauth(file)) {
+		if ((flags & (F_REQUIRE_AUTH | F_ALLOW_AUTH)) == 0)
+			return (EINVAL);
+	} else if (flags & F_REQUIRE_AUTH)
+		return (EINVAL);
+	*afile = file;
+	return (0);
+}
+
+/*
+ * Append variable-size stat object and adjust io count.
+ * Returns 0 if the entire stat object was packed, -1 if not.
+ * A fully packed object updates the request's io count.
+ *
+ * Caller must use their own private l9p_message object since
+ * a partially packed object will leave the message object in
+ * a useless state.
+ *
+ * Frees the stat object.
+ */
+int
+l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+    struct l9p_stat *st)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	uint16_t size = l9p_sizeof_stat(st, conn->lc_version);
+	int ret = 0;
+
+	assert(msg->lm_mode == L9P_PACK);
+
+	if (req->lr_resp.io.count + size > req->lr_req.io.count ||
+	    l9p_pustat(msg, st, conn->lc_version) < 0)
+		ret = -1;
+	else
+		req->lr_resp.io.count += size;
+	l9p_freestat(st);
+	return (ret);
+}
+
+static int
+l9p_dispatch_tversion(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_server *server = conn->lc_server;
+	enum l9p_version remote_version = L9P_INVALID_VERSION;
+	size_t i;
+	const char *remote_version_name;
+
+	for (i = 0; i < N(l9p_versions); i++) {
+		if (strcmp(req->lr_req.version.version,
+		    l9p_versions[i].name) == 0) {
+			remote_version = (enum l9p_version)i;
+			break;
+		}
+	}
+
+	if (remote_version == L9P_INVALID_VERSION) {
+		L9P_LOG(L9P_ERROR, "unsupported remote version: %s",
+		    req->lr_req.version.version);
+		return (ENOSYS);
+	}
+
+	remote_version_name = l9p_versions[remote_version].name;
+	L9P_LOG(L9P_INFO, "remote version: %s", remote_version_name);
+	L9P_LOG(L9P_INFO, "local version: %s",
+	    l9p_versions[server->ls_max_version].name);
+
+	conn->lc_version = MIN(remote_version, server->ls_max_version);
+	conn->lc_msize = MIN(req->lr_req.version.msize, conn->lc_msize);
+	conn->lc_max_io_size = conn->lc_msize - 24;
+	req->lr_resp.version.version = strdup(remote_version_name);
+	req->lr_resp.version.msize = conn->lc_msize;
+	return (0);
+}
+
+static int
+l9p_dispatch_tattach(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * We still don't have Tauth yet, but let's code this part
+	 * anyway.
+	 *
+	 * Look up the auth fid first since if it fails we can just
+	 * return immediately.
+	 */
+	if (req->lr_req.tattach.afid != L9P_NOFID) {
+		error = fid_lookup(conn, req->lr_req.tattach.afid, EINVAL,
+		    F_REQUIRE_AUTH, &req->lr_fid2);
+		if (error)
+			return (error);
+	} else
+		req->lr_fid2 = NULL;
+
+	fid = l9p_connection_alloc_fid(conn, req->lr_req.hdr.fid);
+	if (fid == NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+
+	/* For backend convenience, set NONUNAME on 9P2000. */
+	if (conn->lc_version == L9P_2000)
+		req->lr_req.tattach.n_uname = L9P_NONUNAME;
+	error = be->attach(be->softc, req);
+
+	/*
+	 * On success, fid becomes valid; on failure, disconnect.
+	 * It certainly *should* be a directory here...
+	 */
+	if (error == 0) {
+		l9p_fid_setvalid(fid);
+		if (req->lr_resp.rattach.qid.type & L9P_QTDIR)
+			l9p_fid_setdir(fid);
+	} else
+		l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tclunk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Note that clunk is the only way to dispose of an auth fid. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_ALLOW_AUTH, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	l9p_fid_unsetvalid(fid);
+
+	/*
+	 * If it's an xattr fid there must, by definition, be an
+	 * xattrclunk.  The xattrclunk function can only be NULL if
+	 * xattrwalk and xattrcreate are NULL or always return error.
+	 *
+	 * Q: do we want to allow async xattrclunk in case of very
+	 * large xattr create?  This will make things difficult,
+	 * so probably not.
+	 */
+	if (l9p_fid_isxattr(fid))
+		error = be->xattrclunk(be->softc, fid);
+	else
+		error = be->clunk(be->softc, fid);
+
+	/* fid is now gone regardless of any error return */
+	l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	uint32_t dmperm;
+	int error;
+
+	/* Incoming fid must represent a directory that has not been opened. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	dmperm = req->lr_req.tcreate.perm;
+#define MKDIR_OR_SIMILAR \
+    (L9P_DMDIR | L9P_DMSYMLINK | L9P_DMNAMEDPIPE | L9P_DMSOCKET | L9P_DMDEVICE)
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 *  - break out different kinds of create (file vs mkdir etc)
+	 *  - add async file-create (leaves req->lr_fid in limbo)
+	 *
+	 * A successful file-create changes the fid into an open file.
+	 */
+	error = be->create(be->softc, req);
+	if (error == 0 && (dmperm & MKDIR_OR_SIMILAR) == 0) {
+		l9p_fid_unsetdir(req->lr_fid);
+		l9p_fid_setopen(req->lr_fid);
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_topen(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - add async open (leaves req->lr_fid in limbo)
+	 */
+	error = be->open(be->softc, req);
+	if (error == 0)
+		l9p_fid_setopen(req->lr_fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tread(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Xattr fids are not open, so we need our own tests. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust so that writing messages (packing data) starts
+	 * right after the count field in the response.
+	 *
+	 * size[4] + Rread[1] + tag[2] + count[4] = 11
+	 */
+	l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 11);
+
+	/*
+	 * If it's an xattr fid there must, by definition, be an
+	 * xattrread.  The xattrread function can only be NULL if
+	 * xattrwalk and xattrcreate are NULL or always return error.
+	 *
+	 * TODO:
+	 *   separate out directory-read
+	 *   allow async read
+	 */
+	be = conn->lc_server->ls_backend;
+	fid = req->lr_fid;
+	if (l9p_fid_isxattr(fid)) {
+		error = be->xattrread(be->softc, req);
+	} else if (l9p_fid_isopen(fid)) {
+		error = be->read(be->softc, req);
+	} else {
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_tremove(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * ?? Should we allow Tremove on auth fids? If so, do
+	 * we pretend it is just a Tclunk?
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	l9p_fid_unsetvalid(fid);
+
+	error = be->remove(be->softc, fid);
+	/* fid is now gone regardless of any error return */
+	l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tstat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Allow Tstat on auth fid?  Seems harmless enough... */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_ALLOW_AUTH, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	req->lr_fid = fid;
+	error = be->stat(be->softc, req);
+
+	if (error == 0) {
+		if (l9p_fid_isauth(fid))
+			req->lr_resp.rstat.stat.qid.type |= L9P_QTAUTH;
+
+		/* should we check req->lr_resp.rstat.qid.type L9P_QTDIR bit? */
+		if (req->lr_resp.rstat.stat.qid.type &= L9P_QTDIR)
+			l9p_fid_setdir(fid);
+		else
+			l9p_fid_unsetdir(fid);
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_twalk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid, *newfid;
+	uint16_t n;
+	int error;
+
+	/* Can forbid XATTR, but cannot require DIR. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &fid);
+	if (error)
+		return (error);
+
+	if (req->lr_req.twalk.hdr.fid != req->lr_req.twalk.newfid) {
+		newfid = l9p_connection_alloc_fid(conn,
+		    req->lr_req.twalk.newfid);
+		if (newfid == NULL)
+			return (EINVAL);
+	} else
+		newfid = fid;
+
+	be = conn->lc_server->ls_backend;
+	req->lr_fid = fid;
+	req->lr_newfid = newfid;
+	error = be->walk(be->softc, req);
+
+	/*
+	 * If newfid == fid, then fid itself has (potentially) changed,
+	 * but is still valid.  Otherwise set newfid valid on
+	 * success, and destroy it on error.
+	 */
+	if (newfid != fid) {
+		if (error == 0)
+			l9p_fid_setvalid(newfid);
+		else
+			l9p_connection_remove_fid(conn, newfid);
+	}
+
+	/*
+	 * If we walked any name elements, the last (n-1'th) qid
+	 * has the type (dir vs file) for the new fid.  Otherwise
+	 * the type of newfid is the same as fid.  Of course, if
+	 * n==0 and fid==newfid, fid is already set up correctly
+	 * as the whole thing was a big no-op, but it's safe to
+	 * copy its dir bit to itself.
+	 */
+	if (error == 0) {
+		n = req->lr_resp.rwalk.nwqid;
+		if (n > 0) {
+			if (req->lr_resp.rwalk.wqid[n - 1].type & L9P_QTDIR)
+				l9p_fid_setdir(newfid);
+		} else {
+			if (l9p_fid_isdir(fid))
+				l9p_fid_setdir(newfid);
+		}
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_twrite(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Cannot require open due to xattr write, but can forbid dir. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_DIR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust to point to the data to be written (a la
+	 * l9p_dispatch_tread, but we're pointing into the request
+	 * buffer rather than the response):
+	 *
+	 * size[4] + Twrite[1] + tag[2] + fid[4] + offset[8] + count[4] = 23
+	 */
+	l9p_seek_iov(req->lr_req_msg.lm_iov, req->lr_req_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 23);
+
+	/*
+	 * Unlike read, write and xattrwrite are optional (for R/O fs).
+	 *
+	 * TODO:
+	 *   allow async write
+	 */
+	be = conn->lc_server->ls_backend;
+	fid = req->lr_fid;
+	if (l9p_fid_isxattr(fid)) {
+		error = be->xattrwrite != NULL ?
+		    be->xattrwrite(be->softc, req) : ENOSYS;
+	} else if (l9p_fid_isopen(fid)) {
+		error = be->write != NULL ?
+		    be->write(be->softc, req) : ENOSYS;
+	} else {
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_twstat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	error = be->wstat != NULL ? be->wstat(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tstatfs(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Should we allow statfs on auth fids? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	error = be->statfs(be->softc, req);
+	return (error);
+}
+
+static int
+l9p_dispatch_tlopen(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - add async open (leaves req->lr_fid in limbo)
+	 */
+	error = be->lopen != NULL ? be->lopen(be->softc, req) : ENOSYS;
+	if (error == 0)
+		l9p_fid_setopen(req->lr_fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tlcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 *  - add async create (leaves req->lr_fid in limbo)
+	 */
+	error = be->lcreate != NULL ? be->lcreate(be->softc, req) : ENOSYS;
+	if (error == 0) {
+		l9p_fid_unsetdir(req->lr_fid);
+		l9p_fid_setopen(req->lr_fid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_tsymlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* This doesn't affect the containing dir; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 */
+	error = be->symlink != NULL ? be->symlink(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tmknod(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* This doesn't affect the containing dir; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 */
+	error = be->mknod != NULL ? be->mknod(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_trename(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Rename directory or file (including symlink etc). */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/* Doesn't affect new dir fid; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.trename.dfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name (trename.name)
+	 */
+	error = be->rename != NULL ? be->rename(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_treadlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/*
+	 * The underlying readlink will fail unless it's a symlink,
+	 * and the back end has to check, but we might as well forbid
+	 * directories and open files here since it's cheap.
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->readlink != NULL ? be->readlink(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tgetattr(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->getattr != NULL ? be->getattr(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tsetattr(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->setattr != NULL ? be->setattr(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_txattrwalk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid, *newfid;
+	int error;
+
+	/*
+	 * Not sure if we care if file-or-dir is open or not.
+	 * However, the fid argument should always be a file or
+	 * dir and the newfid argument must be supplied, must
+	 * be different, and always becomes a new xattr,
+	 * so this is not very much like Twalk.
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &fid);
+	if (error)
+		return (error);
+
+	newfid = l9p_connection_alloc_fid(conn, req->lr_req.txattrwalk.newfid);
+	if (newfid == NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+	req->lr_newfid = newfid;
+	error = be->xattrwalk != NULL ? be->xattrwalk(be->softc, req) : ENOSYS;
+
+	/*
+	 * Success/fail is similar to Twalk, except that we need
+	 * to set the xattr type bit in the new fid.  It's also
+	 * much simpler since newfid is always a new fid.
+	 */
+	if (error == 0) {
+		l9p_fid_setvalid(newfid);
+		l9p_fid_setxattr(newfid);
+	} else {
+		l9p_connection_remove_fid(conn, newfid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_txattrcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * Forbid incoming open fid since it's going to become an
+	 * xattr fid instead.  If it turns out we need to allow
+	 * it, fs code will need to handle this.
+	 *
+	 * Curiously, qemu 9pfs uses ENOENT for a bad txattrwalk
+	 * fid, but EINVAL for txattrcreate (so we do too).
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_XATTR | F_FORBID_OPEN, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+	error = be->xattrcreate != NULL ? be->xattrcreate(be->softc, req) :
+	    ENOSYS;
+
+	/*
+	 * On success, fid has changed from a regular (file or dir)
+	 * fid to an xattr fid.
+	 */
+	if (error == 0) {
+		l9p_fid_unsetdir(fid);
+		l9p_fid_setxattr(fid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_treaddir(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust so that writing messages (packing data) starts
+	 * right after the count field in the response.
+	 *
+	 * size[4] + Rreaddir[1] + tag[2] + count[4] = 11
+	 */
+	l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 11);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->readdir != NULL ? be->readdir(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tfsync(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->fsync != NULL ? be->fsync(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tlock(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Forbid directories? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO: multiple client handling; perhaps async locking.
+	 */
+	error = be->lock != NULL ? be->lock(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tgetlock(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO: multiple client handling; perhaps async locking.
+	 */
+	error = be->getlock != NULL ? be->getlock(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/*
+	 * Note, dfid goes into fid2 in current scheme.
+	 *
+	 * Allow open dir?  Target dir fid is not modified...
+	 */
+	error = fid_lookup(conn, req->lr_req.tlink.dfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_DIR | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->link != NULL ? be->link(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tmkdir(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/* Slashes embedded in the name are not allowed */
+	if (strchr(req->lr_req.tlcreate.name, '/') != NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+	error = be->mkdir != NULL ? be->mkdir(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_trenameat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	error = fid_lookup(conn, req->lr_req.trenameat.newdirfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/* TODO: check old and new names */
+	error = be->renameat != NULL ? be->renameat(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tunlinkat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/* TODO: check dir-or-file name */
+	error = be->unlinkat != NULL ? be->unlinkat(be->softc, req) : ENOSYS;
+	return (error);
+}
diff --git a/rfuncs.c b/rfuncs.c
new file mode 100644
index 000000000000..3995d413e3a6
--- /dev/null
+++ b/rfuncs.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#include <casper/cap_pwd.h>
+#include <casper/cap_grp.h>
+#endif
+
+#include "rfuncs.h"
+
+/*
+ * This is essentially a clone of the BSD basename_r function,
+ * which is like POSIX basename() but puts the result in a user
+ * supplied buffer.
+ *
+ * In BSD basename_r, the buffer must be least MAXPATHLEN bytes
+ * long.  In our case we take the size of the buffer as an argument.
+ *
+ * Note that it's impossible in general to do this without
+ * a temporary buffer since basename("foo/bar") is "bar",
+ * but basename("foo/bar/") is still "bar" -- no trailing
+ * slash is allowed.
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the basename of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * basename, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_basename(const char *path, char *buf, size_t bufsize)
+{
+	const char *endp, *comp;
+	size_t len;
+
+	/*
+	 * NULL or empty path means ".".  This is perhaps overly
+	 * forgiving but matches libc basename_r(), and avoids
+	 * breaking the code below.
+	 */
+	if (path == NULL || *path == '\0') {
+		comp = ".";
+		len = 1;
+	} else {
+		/*
+		 * Back up over any trailing slashes.  If we reach
+		 * the top of the path and it's still a trailing
+		 * slash, it's also a leading slash and the entire
+		 * path is just "/" (or "//", or "///", etc).
+		 */
+		endp = path + strlen(path) - 1;
+		while (*endp == '/' && endp > path)
+			endp--;
+		/* Invariant: *endp != '/' || endp == path */
+		if (*endp == '/') {
+			/* then endp==path and hence entire path is "/" */
+			comp = "/";
+			len = 1;
+		} else {
+			/*
+			 * We handled empty strings earlier, and
+			 * we just proved *endp != '/'.  Hence
+			 * we have a non-empty basename, ending
+			 * at endp.
+			 *
+			 * Back up one path name component.  The
+			 * part between these two is the basename.
+			 *
+			 * Note that we only stop backing up when
+			 * either comp==path, or comp[-1] is '/'.
+			 *
+			 * Suppose path[0] is '/'.  Then, since *endp
+			 * is *not* '/', we had comp>path initially, and
+			 * stopped backing up because we found a '/'
+			 * (perhaps path[0], perhaps a later '/').
+			 *
+			 * Or, suppose path[0] is NOT '/'.  Then,
+			 * either there are no '/'s at all and
+			 * comp==path, or comp[-1] is '/'.
+			 *
+			 * In all cases, we want all bytes from *comp
+			 * to *endp, inclusive.
+			 */
+			comp = endp;
+			while (comp > path && comp[-1] != '/')
+				comp--;
+			len = (size_t)(endp - comp + 1);
+		}
+	}
+	if (buf == NULL) {
+		buf = malloc(len + 1);
+		if (buf == NULL)
+			return (NULL);
+	} else {
+		if (len >= bufsize) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+	memcpy(buf, comp, len);
+	buf[len] = '\0';
+	return (buf);
+}
+
+/*
+ * This is much like POSIX dirname(), but is reentrant.
+ *
+ * We examine a path, find the directory portion, and copy that
+ * to a user supplied buffer <buf> of the given size <bufsize>.
+ *
+ * Note that dirname("/foo/bar/") is "/foo", dirname("/foo") is "/",
+ * and dirname("////") is "/". However, dirname("////foo/bar") is
+ * "////foo" (we do not resolve these leading slashes away -- this
+ * matches the BSD libc behavior).
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the dirname of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * dirname, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_dirname(const char *path, char *buf, size_t bufsize)
+{
+	const char *endp, *dirpart;
+	size_t len;
+
+	/*
+	 * NULL or empty path means ".".  This is perhaps overly
+	 * forgiving but matches libc dirname(), and avoids breaking
+	 * the code below.
+	 */
+	if (path == NULL || *path == '\0') {
+		dirpart = ".";
+		len = 1;
+	} else {
+		/*
+		 * Back up over any trailing slashes, then back up
+		 * one path name, then back up over more slashes.
+		 * In all cases, stop as soon as endp==path so
+		 * that we do not back out of the buffer entirely.
+		 *
+		 * The first loop takes care of trailing slashes
+		 * in names like "/foo/bar//" (where the dirname
+		 * part is to be "/foo"), the second strips out
+		 * the non-dir-name part, and the third leaves us
+		 * pointing to the end of the directory component.
+		 *
+		 * If the entire name is of the form "/foo" or
+		 * "//foo" (or "/foo/", etc, but we already
+		 * handled trailing slashes), we end up pointing
+		 * to the leading "/", which is what we want; but
+		 * if it is of the form "foo" (or "foo/", etc) we
+		 * point to a non-slash.  So, if (and only if)
+		 * endp==path AND *endp is not '/', the dirname is
+		 * ".", but in all cases, the LENGTH of the
+		 * dirname is (endp-path+1).
+		 */
+		endp = path + strlen(path) - 1;
+		while (endp > path && *endp == '/')
+			endp--;
+		while (endp > path && *endp != '/')
+			endp--;
+		while (endp > path && *endp == '/')
+			endp--;
+
+		len = (size_t)(endp - path + 1);
+		if (endp == path && *endp != '/')
+			dirpart = ".";
+		else
+			dirpart = path;
+	}
+	if (buf == NULL) {
+		buf = malloc(len + 1);
+		if (buf == NULL)
+			return (NULL);
+	} else {
+		if (len >= bufsize) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+	memcpy(buf, dirpart, len);
+	buf[len] = '\0';
+	return (buf);
+}
+
+static void
+r_pginit(struct r_pgdata *pg)
+{
+
+	/* Note: init to half size since the first thing we do is double it */
+	pg->r_pgbufsize = 1 << 9;
+	pg->r_pgbuf = NULL;	/* note that realloc(NULL) == malloc */
+}
+
+static int
+r_pgexpand(struct r_pgdata *pg)
+{
+	size_t nsize;
+
+	nsize = pg->r_pgbufsize << 1;
+	if (nsize >= (1 << 20) ||
+	    (pg->r_pgbuf = realloc(pg->r_pgbuf, nsize)) == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+void
+r_pgfree(struct r_pgdata *pg)
+{
+
+	free(pg->r_pgbuf);
+}
+
+struct passwd *
+r_getpwuid(uid_t uid, struct r_pgdata *pg)
+{
+	struct passwd *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = getpwuid_r(uid, &pg->r_pgun.un_pw,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+struct group *
+r_getgrgid(gid_t gid, struct r_pgdata *pg)
+{
+	struct group *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = getgrgid_r(gid, &pg->r_pgun.un_gr,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+#if defined(WITH_CASPER)
+struct passwd *
+r_cap_getpwuid(cap_channel_t *cap, uid_t uid, struct r_pgdata *pg)
+{
+	struct passwd *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = cap_getpwuid_r(cap, uid, &pg->r_pgun.un_pw,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+struct group *
+r_cap_getgrgid(cap_channel_t *cap, gid_t gid, struct r_pgdata *pg)
+{
+	struct group *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = cap_getgrgid_r(cap, gid, &pg->r_pgun.un_gr,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+#endif
diff --git a/rfuncs.h b/rfuncs.h
new file mode 100644
index 000000000000..22d329311d9f
--- /dev/null
+++ b/rfuncs.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_RFUNCS_H
+#define LIB9P_RFUNCS_H
+
+#include <grp.h>
+#include <pwd.h>
+#include <string.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#endif
+
+/*
+ * Reentrant, optionally-malloc-ing versions of
+ * basename() and dirname().
+ */
+char	*r_basename(const char *, char *, size_t);
+char	*r_dirname(const char *, char *, size_t);
+
+/*
+ * Yuck: getpwuid, getgrgid are not thread-safe, and the
+ * POSIX replacements (getpwuid_r, getgrgid_r) are horrible.
+ * This is to allow us to loop over the get.*_r calls with ever
+ * increasing buffers until they succeed or get unreasonable
+ * (same idea as the libc code for the non-reentrant versions,
+ * although prettier).
+ *
+ * The getpwuid/getgrgid functions auto-init one of these,
+ * but the caller must call r_pgfree() when done with the
+ * return values.
+ *
+ * If we need more later, we may have to expose the init function.
+ */
+struct r_pgdata {
+	char	*r_pgbuf;
+	size_t	r_pgbufsize;
+	union {
+		struct passwd un_pw;
+		struct group un_gr;
+	} r_pgun;
+};
+
+/* void r_pginit(struct r_pgdata *); */
+void r_pgfree(struct r_pgdata *);
+struct passwd *r_getpwuid(uid_t, struct r_pgdata *);
+struct group *r_getgrgid(gid_t, struct r_pgdata *);
+
+#if defined(WITH_CASPER)
+struct passwd *r_cap_getpwuid(cap_channel_t *, uid_t, struct r_pgdata *);
+struct group *r_cap_getgrgid(cap_channel_t *, gid_t, struct r_pgdata *);
+#endif
+
+#endif	/* LIB9P_RFUNCS_H */
diff --git a/sbuf/sbuf.c b/sbuf/sbuf.c
new file mode 100644
index 000000000000..525bb52db3f6
--- /dev/null
+++ b/sbuf/sbuf.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf reimplementation for Mac OS X.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include "sbuf.h"
+
+#define	SBUF_INITIAL_SIZE	128
+
+struct sbuf *
+sbuf_new_auto()
+{
+	struct sbuf *s;
+
+	s = malloc(sizeof(struct sbuf));
+	s->s_buf = calloc(1, SBUF_INITIAL_SIZE + 1);
+	s->s_capacity = s->s_buf != NULL ? SBUF_INITIAL_SIZE : 0;
+	s->s_size = 0;
+
+	return (s);
+}
+
+int
+sbuf_cat(struct sbuf *s, const char *str)
+{
+	int req = (int)strlen(str);
+
+	if (s->s_size + req >= s->s_capacity) {
+		s->s_capacity = s->s_size + req + 1;
+		s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity);
+	}
+	if (s->s_buf == NULL)
+		return (-1);
+
+	strcpy(s->s_buf + s->s_size, str);
+	s->s_size += req;
+
+	return (0);
+}
+
+int
+sbuf_printf(struct sbuf *s, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, fmt);
+	ret = sbuf_vprintf(s, fmt, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+int
+sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args)
+{
+	va_list copy;
+	int req;
+
+	va_copy(copy, args);
+	req = vsnprintf(NULL, 0, fmt, copy);
+	va_end(copy);
+
+	if (s->s_size + req >= s->s_capacity) {
+		s->s_capacity = s->s_size + req + 1;
+		s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity);
+	}
+	if (s->s_buf == NULL)
+		return (-1);
+
+	req = vsnprintf(s->s_buf + s->s_size, req + 1, fmt, args);
+	s->s_size += req;
+
+	return (0);
+}
+
+char *
+sbuf_data(struct sbuf *s)
+{
+	return (s->s_buf);
+}
+
+int
+sbuf_finish(struct sbuf *s)
+{
+	if (s->s_buf != NULL)
+		s->s_buf[s->s_size] = '\0';
+	return (0);
+}
+
+void
+sbuf_delete(struct sbuf *s)
+{
+	free(s->s_buf);
+	free(s);
+}
diff --git a/sbuf/sbuf.h b/sbuf/sbuf.h
new file mode 100644
index 000000000000..e3ac87dd4142
--- /dev/null
+++ b/sbuf/sbuf.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf reimplementation for Mac OS X.
+ */
+
+#ifndef LIB9P_SBUF_H
+#define LIB9P_SBUF_H
+
+#include <stdarg.h>
+
+struct sbuf
+{
+	char *s_buf;
+	int s_size;
+	int s_capacity;
+	int s_position;
+};
+
+struct sbuf *sbuf_new_auto(void);
+int sbuf_cat(struct sbuf *s, const char *str);
+int sbuf_printf(struct sbuf *s, const char *fmt, ...);
+int sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args);
+int sbuf_done(struct sbuf *s);
+void sbuf_delete(struct sbuf *s);
+int sbuf_finish(struct sbuf *s);
+char *sbuf_data(struct sbuf *s);
+
+#endif /* LIB9P_SBUF_H */
+
diff --git a/threadpool.c b/threadpool.c
new file mode 100644
index 000000000000..a973a143e9e2
--- /dev/null
+++ b/threadpool.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <pthread.h>
+#if defined(__FreeBSD__)
+#include <pthread_np.h>
+#endif
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "threadpool.h"
+
+static void l9p_threadpool_rflush(struct l9p_threadpool *tp,
+    struct l9p_request *req);
+
+static void *
+l9p_responder(void *arg)
+{
+	struct l9p_threadpool *tp;
+	struct l9p_worker *worker = arg;
+	struct l9p_request *req;
+
+	tp = worker->ltw_tp;
+	for (;;) {
+		/* get next reply to send */
+		pthread_mutex_lock(&tp->ltp_mtx);
+		while (STAILQ_EMPTY(&tp->ltp_replyq) && !worker->ltw_exiting)
+			pthread_cond_wait(&tp->ltp_reply_cv, &tp->ltp_mtx);
+		if (worker->ltw_exiting) {
+			pthread_mutex_unlock(&tp->ltp_mtx);
+			break;
+		}
+
+		/* off reply queue */
+		req = STAILQ_FIRST(&tp->ltp_replyq);
+		STAILQ_REMOVE_HEAD(&tp->ltp_replyq, lr_worklink);
+
+		/* request is now in final glide path, can't be Tflush-ed */
+		req->lr_workstate = L9P_WS_REPLYING;
+
+		/* any flushers waiting for this request can go now */
+		if (req->lr_flushstate != L9P_FLUSH_NONE)
+			l9p_threadpool_rflush(tp, req);
+
+		pthread_mutex_unlock(&tp->ltp_mtx);
+
+		/* send response */
+		l9p_respond(req, false, true);
+	}
+	return (NULL);
+}
+
+static void *
+l9p_worker(void *arg)
+{
+	struct l9p_threadpool *tp;
+	struct l9p_worker *worker = arg;
+	struct l9p_request *req;
+
+	tp = worker->ltw_tp;
+	pthread_mutex_lock(&tp->ltp_mtx);
+	for (;;) {
+		while (STAILQ_EMPTY(&tp->ltp_workq) && !worker->ltw_exiting)
+			pthread_cond_wait(&tp->ltp_work_cv, &tp->ltp_mtx);
+		if (worker->ltw_exiting)
+			break;
+
+		/* off work queue; now work-in-progress, by us */
+		req = STAILQ_FIRST(&tp->ltp_workq);
+		STAILQ_REMOVE_HEAD(&tp->ltp_workq, lr_worklink);
+		req->lr_workstate = L9P_WS_INPROGRESS;
+		req->lr_worker = worker;
+		pthread_mutex_unlock(&tp->ltp_mtx);
+
+		/* actually try the request */
+		req->lr_error = l9p_dispatch_request(req);
+
+		/* move to responder queue, updating work-state */
+		pthread_mutex_lock(&tp->ltp_mtx);
+		req->lr_workstate = L9P_WS_RESPQUEUED;
+		req->lr_worker = NULL;
+		STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+
+		/* signal the responder */
+		pthread_cond_signal(&tp->ltp_reply_cv);
+	}
+	pthread_mutex_unlock(&tp->ltp_mtx);
+	return (NULL);
+}
+
+/*
+ * Just before finally replying to a request that got touched by
+ * a Tflush request, we enqueue its flushers (requests of type
+ * Tflush, which are now on the flushee's lr_flushq) onto the
+ * response queue.
+ */
+static void
+l9p_threadpool_rflush(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+	struct l9p_request *flusher;
+
+	/*
+	 * https://swtch.com/plan9port/man/man9/flush.html says:
+	 *
+	 * "Should multiple Tflushes be received for a pending
+	 * request, they must be answered in order.  A Rflush for
+	 * any of the multiple Tflushes implies an answer for all
+	 * previous ones.  Therefore, should a server receive a
+	 * request and then multiple flushes for that request, it
+	 * need respond only to the last flush."  This means
+	 * we could march through the queue of flushers here,
+	 * marking all but the last one as "to be dropped" rather
+	 * than "to be replied-to".
+	 *
+	 * However, we'll leave that for later, if ever -- it
+	 * should be harmless to respond to each, in order.
+	 */
+	STAILQ_FOREACH(flusher, &req->lr_flushq, lr_flushlink) {
+		flusher->lr_workstate = L9P_WS_RESPQUEUED;
+#ifdef notdef
+		if (not the last) {
+			flusher->lr_flushstate = L9P_FLUSH_NOT_RUN;
+			/* or, flusher->lr_drop = true ? */
+		}
+#endif
+		STAILQ_INSERT_TAIL(&tp->ltp_replyq, flusher, lr_worklink);
+	}
+}
+
+int
+l9p_threadpool_init(struct l9p_threadpool *tp, int size)
+{
+	struct l9p_worker *worker;
+#if defined(__FreeBSD__)
+	char threadname[16];
+#endif
+	int error;
+	int i, nworkers, nresponders;
+
+	if (size <= 0)
+		return (EINVAL);
+	error = pthread_mutex_init(&tp->ltp_mtx, NULL);
+	if (error)
+		return (error);
+	error = pthread_cond_init(&tp->ltp_work_cv, NULL);
+	if (error)
+		goto fail_work_cv;
+	error = pthread_cond_init(&tp->ltp_reply_cv, NULL);
+	if (error)
+		goto fail_reply_cv;
+
+	STAILQ_INIT(&tp->ltp_workq);
+	STAILQ_INIT(&tp->ltp_replyq);
+	LIST_INIT(&tp->ltp_workers);
+
+	nresponders = 0;
+	nworkers = 0;
+	for (i = 0; i <= size; i++) {
+		worker = calloc(1, sizeof(struct l9p_worker));
+		worker->ltw_tp = tp;
+		worker->ltw_responder = i == 0;
+		error = pthread_create(&worker->ltw_thread, NULL,
+		    worker->ltw_responder ? l9p_responder : l9p_worker,
+		    (void *)worker);
+		if (error) {
+			free(worker);
+			break;
+		}
+		if (worker->ltw_responder)
+			nresponders++;
+		else
+			nworkers++;
+
+#if defined(__FreeBSD__)
+		if (worker->ltw_responder) {
+			pthread_set_name_np(worker->ltw_thread, "9p-responder");
+		} else {
+			sprintf(threadname, "9p-worker:%d", i - 1);
+			pthread_set_name_np(worker->ltw_thread, threadname);
+		}
+#endif
+
+		LIST_INSERT_HEAD(&tp->ltp_workers, worker, ltw_link);
+	}
+	if (nresponders == 0 || nworkers == 0) {
+		/* need the one responder, and at least one worker */
+		l9p_threadpool_shutdown(tp);
+		return (error);
+	}
+	return (0);
+
+	/*
+	 * We could avoid these labels by having multiple destroy
+	 * paths (one for each error case), or by having booleans
+	 * for which variables were initialized.  Neither is very
+	 * appealing...
+	 */
+fail_reply_cv:
+	pthread_cond_destroy(&tp->ltp_work_cv);
+fail_work_cv:
+	pthread_mutex_destroy(&tp->ltp_mtx);
+
+	return (error);
+}
+
+/*
+ * Run a request, usually by queueing it.
+ */
+void
+l9p_threadpool_run(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+
+	/*
+	 * Flush requests must be handled specially, since they
+	 * can cancel / kill off regular requests.  (But we can
+	 * run them through the regular dispatch mechanism.)
+	 */
+	if (req->lr_req.hdr.type == L9P_TFLUSH) {
+		/* not on a work queue yet so we can touch state */
+		req->lr_workstate = L9P_WS_IMMEDIATE;
+		(void) l9p_dispatch_request(req);
+	} else {
+		pthread_mutex_lock(&tp->ltp_mtx);
+		req->lr_workstate = L9P_WS_NOTSTARTED;
+		STAILQ_INSERT_TAIL(&tp->ltp_workq, req, lr_worklink);
+		pthread_cond_signal(&tp->ltp_work_cv);
+		pthread_mutex_unlock(&tp->ltp_mtx);
+	}
+}
+
+/*
+ * Run a Tflush request.  Called via l9p_dispatch_request() since
+ * it has some debug code in it, but not called from worker thread.
+ */
+int
+l9p_threadpool_tflush(struct l9p_request *req)
+{
+	struct l9p_connection *conn;
+	struct l9p_threadpool *tp;
+	struct l9p_request *flushee;
+	uint16_t oldtag;
+	enum l9p_flushstate nstate;
+
+	/*
+	 * Find what we're supposed to flush (the flushee, as it were).
+	 */
+	req->lr_error = 0;	/* Tflush always succeeds */
+	conn = req->lr_conn;
+	tp = &conn->lc_tp;
+	oldtag = req->lr_req.tflush.oldtag;
+	ht_wrlock(&conn->lc_requests);
+	flushee = ht_find_locked(&conn->lc_requests, oldtag);
+	if (flushee == NULL) {
+		/*
+		 * Nothing to flush!  The old request must have
+		 * been done and gone already.  Just queue this
+		 * Tflush for a success reply.
+		 */
+		ht_unlock(&conn->lc_requests);
+		pthread_mutex_lock(&tp->ltp_mtx);
+		goto done;
+	}
+
+	/*
+	 * Found the original request.  We'll need to inspect its
+	 * work-state to figure out what to do.
+	 */
+	pthread_mutex_lock(&tp->ltp_mtx);
+	ht_unlock(&conn->lc_requests);
+
+	switch (flushee->lr_workstate) {
+
+	case L9P_WS_NOTSTARTED:
+		/*
+		 * Flushee is on work queue, but not yet being
+		 * handled by a worker.
+		 *
+		 * The documentation -- see
+		 * http://ericvh.github.io/9p-rfc/rfc9p2000.html
+		 * https://swtch.com/plan9port/man/man9/flush.html
+		 * -- says that "the server should answer the
+		 * flush message immediately".  However, Linux
+		 * sends flush requests for operations that
+		 * must finish, such as Tclunk, and it's not
+		 * possible to *answer* the flush request until
+		 * it has been handled (if necessary) or aborted
+		 * (if allowed).
+		 *
+		 * We therefore now just  the original request
+		 * and let the request-handler do whatever is
+		 * appropriate.  NOTE: we could have a table of
+		 * "requests that can be aborted without being
+		 * run" vs "requests that must be run to be
+		 * aborted", but for now that seems like an
+		 * unnecessary complication.
+		 */
+		nstate = L9P_FLUSH_REQUESTED_PRE_START;
+		break;
+
+	case L9P_WS_IMMEDIATE:
+		/*
+		 * This state only applies to Tflush requests, and
+		 * flushing a Tflush is illegal.  But we'll do nothing
+		 * special here, which will make us act like a flush
+		 * request for the flushee that arrived too late to
+		 * do anything about the flushee.
+		 */
+		nstate = L9P_FLUSH_REQUESTED_POST_START;
+		break;
+
+	case L9P_WS_INPROGRESS:
+		/*
+		 * Worker thread flushee->lr_worker is working on it.
+		 * Kick it to get it out of blocking system calls.
+		 * (This requires that it carefully set up some
+		 * signal handlers, and may be FreeBSD-dependent,
+		 * it probably cannot be handled this way on MacOS.)
+		 */
+#ifdef notyet
+		pthread_kill(...);
+#endif
+		nstate = L9P_FLUSH_REQUESTED_POST_START;
+		break;
+
+	case L9P_WS_RESPQUEUED:
+		/*
+		 * The flushee is already in the response queue.
+		 * We'll just mark it as having had some flush
+		 * action applied.
+		 */
+		nstate = L9P_FLUSH_TOOLATE;
+		break;
+
+	case L9P_WS_REPLYING:
+		/*
+		 * Although we found the flushee, it's too late to
+		 * make us depend on it: it's already heading out
+		 * the door as a reply.
+		 *
+		 * We don't want to do anything to the flushee.
+		 * Instead, we want to work the same way as if
+		 * we had never found the tag.
+		 */
+		goto done;
+	}
+
+	/*
+	 * Now add us to the list of Tflush-es that are waiting
+	 * for the flushee (creating the list if needed, i.e., if
+	 * this is the first Tflush for the flushee).  We (req)
+	 * will get queued for reply later, when the responder
+	 * processes the flushee and calls l9p_threadpool_rflush().
+	 */
+	if (flushee->lr_flushstate == L9P_FLUSH_NONE)
+		STAILQ_INIT(&flushee->lr_flushq);
+	flushee->lr_flushstate = nstate;
+	STAILQ_INSERT_TAIL(&flushee->lr_flushq, req, lr_flushlink);
+
+	pthread_mutex_unlock(&tp->ltp_mtx);
+
+	return (0);
+
+done:
+	/*
+	 * This immediate op is ready to be replied-to now, so just
+	 * stick it onto the reply queue.
+	 */
+	req->lr_workstate = L9P_WS_RESPQUEUED;
+	STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+	pthread_mutex_unlock(&tp->ltp_mtx);
+	pthread_cond_signal(&tp->ltp_reply_cv);
+	return (0);
+}
+
+int
+l9p_threadpool_shutdown(struct l9p_threadpool *tp)
+{
+	struct l9p_worker *worker, *tmp;
+
+	LIST_FOREACH_SAFE(worker, &tp->ltp_workers, ltw_link, tmp) {
+		pthread_mutex_lock(&tp->ltp_mtx);
+		worker->ltw_exiting = true;
+		if (worker->ltw_responder)
+			pthread_cond_signal(&tp->ltp_reply_cv);
+		else
+			pthread_cond_broadcast(&tp->ltp_work_cv);
+		pthread_mutex_unlock(&tp->ltp_mtx);
+		pthread_join(worker->ltw_thread, NULL);
+		LIST_REMOVE(worker, ltw_link);
+		free(worker);
+	}
+	pthread_cond_destroy(&tp->ltp_reply_cv);
+	pthread_cond_destroy(&tp->ltp_work_cv);
+	pthread_mutex_destroy(&tp->ltp_mtx);
+
+	return (0);
+}
diff --git a/threadpool.h b/threadpool.h
new file mode 100644
index 000000000000..2855c1c54577
--- /dev/null
+++ b/threadpool.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef	LIB9P_THREADPOOL_H
+#define	LIB9P_THREADPOOL_H
+
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+
+STAILQ_HEAD(l9p_request_queue, l9p_request);
+
+/*
+ * Most of the workers in the threadpool run requests.
+ *
+ * One distinguished worker delivers responses from the
+ * response queue.  The reason this worker exists is to
+ * guarantee response order, so that flush responses go
+ * after their flushed requests.
+ */
+struct l9p_threadpool {
+    struct l9p_connection *	ltp_conn;	/* the connection */
+    struct l9p_request_queue	ltp_workq;	/* requests awaiting a worker */
+    struct l9p_request_queue	ltp_replyq;	/* requests that are done */
+    pthread_mutex_t		ltp_mtx;	/* locks queues and cond vars */
+    pthread_cond_t		ltp_work_cv;	/* to signal regular workers */
+    pthread_cond_t		ltp_reply_cv;	/* to signal reply-worker */
+    LIST_HEAD(, l9p_worker)	ltp_workers;	/* list of all workers */
+};
+
+/*
+ * All workers, including the responder, use this as their
+ * control structure.  (The only thing that distinguishes the
+ * responder is that it runs different code and waits on the
+ * reply_cv.)
+ */
+struct l9p_worker {
+    struct l9p_threadpool *	ltw_tp;
+    pthread_t			ltw_thread;
+    bool			ltw_exiting;
+    bool			ltw_responder;
+    LIST_ENTRY(l9p_worker)	ltw_link;
+};
+
+/*
+ * Each request has a "work state" telling where the request is,
+ * in terms of workers working on it.  That is, this tells us
+ * which threadpool queue, if any, the request is in now or would
+ * go in, or what's happening with it.
+ */
+enum l9p_workstate {
+	L9P_WS_NOTSTARTED,		/* not yet started */
+	L9P_WS_IMMEDIATE,		/* Tflush being done sans worker */
+	L9P_WS_INPROGRESS,		/* worker is working on it */
+	L9P_WS_RESPQUEUED,		/* worker is done, response queued */
+	L9P_WS_REPLYING,		/* responder is in final reply path */
+};
+
+/*
+ * Each request has a "flush state", initally NONE meaning no
+ * Tflush affected the request.
+ *
+ * If a Tflush comes in before we ever assign a work thread,
+ * the flush state goes to FLUSH_REQUESTED_PRE_START.
+ *
+ * If a Tflush comes in after we assign a work thread, the
+ * flush state goes to FLUSH_REQUESTED_POST_START.  The flush
+ * request may be too late: the request might finish anyway.
+ * Or it might be soon enough to abort.  In all cases, though, the
+ * operation requesting the flush (the "flusher") must wait for
+ * the other request (the "flushee") to go through the respond
+ * path.  The respond routine gets to decide whether to send a
+ * normal response, send an error, or drop the request
+ * entirely.
+ *
+ * There's one especially annoying case: what if a Tflush comes in
+ * *while* we're sending a response?  In this case it's too late:
+ * the flush just waits for the fully-composed response.
+ */
+enum l9p_flushstate {
+	L9P_FLUSH_NONE = 0,		/* must be zero */
+	L9P_FLUSH_REQUESTED_PRE_START,	/* not even started before flush */
+	L9P_FLUSH_REQUESTED_POST_START,	/* started, then someone said flush */
+	L9P_FLUSH_TOOLATE		/* too late, already responding */
+};
+
+void	l9p_threadpool_flushee_done(struct l9p_request *);
+int	l9p_threadpool_init(struct l9p_threadpool *, int);
+void	l9p_threadpool_run(struct l9p_threadpool *, struct l9p_request *);
+int	l9p_threadpool_shutdown(struct l9p_threadpool *);
+int	l9p_threadpool_tflush(struct l9p_request *);
+
+#endif	/* LIB9P_THREADPOOL_H  */
diff --git a/transport/socket.c b/transport/socket.c
new file mode 100644
index 000000000000..8b6a9e59c8e9
--- /dev/null
+++ b/transport/socket.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sys/types.h>
+#ifdef __APPLE__
+# include "../apple_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/socket.h>
+#include <sys/event.h>
+#include <sys/uio.h>
+#include <netdb.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../log.h"
+#include "socket.h"
+
+struct l9p_socket_softc
+{
+	struct l9p_connection *ls_conn;
+	struct sockaddr ls_sockaddr;
+	socklen_t ls_socklen;
+	pthread_t ls_thread;
+	int ls_fd;
+};
+
+static int l9p_socket_readmsg(struct l9p_socket_softc *, void **, size_t *);
+static int l9p_socket_get_response_buffer(struct l9p_request *,
+    struct iovec *, size_t *, void *);
+static int l9p_socket_send_response(struct l9p_request *, const struct iovec *,
+    const size_t, const size_t, void *);
+static void l9p_socket_drop_response(struct l9p_request *, const struct iovec *,
+    size_t, void *);
+static void *l9p_socket_thread(void *);
+static ssize_t xread(int, void *, size_t);
+static ssize_t xwrite(int, void *, size_t);
+
+int
+l9p_start_server(struct l9p_server *server, const char *host, const char *port)
+{
+	struct addrinfo *res, *res0, hints;
+	struct kevent kev[2];
+	struct kevent event[2];
+	int err, kq, i, val, evs, nsockets = 0;
+	int sockets[2];
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = PF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	err = getaddrinfo(host, port, &hints, &res0);
+
+	if (err)
+		return (-1);
+
+	for (res = res0; res; res = res->ai_next) {
+		int s = socket(res->ai_family, res->ai_socktype,
+		    res->ai_protocol);
+
+		val = 1;
+		setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+
+		if (s < 0)
+			continue;
+
+		if (bind(s, res->ai_addr, res->ai_addrlen) < 0) {
+			close(s);
+			continue;
+		}
+
+		sockets[nsockets] = s;
+		EV_SET(&kev[nsockets++], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0,
+		    0, 0);
+		listen(s, 10);
+	}
+
+	if (nsockets < 1) {
+		L9P_LOG(L9P_ERROR, "bind(): %s", strerror(errno));
+		return(-1);
+	}
+
+	kq = kqueue();
+
+	if (kevent(kq, kev, nsockets, NULL, 0, NULL) < 0) {
+		L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+		return (-1);
+	}
+
+	for (;;) {
+		evs = kevent(kq, NULL, 0, event, nsockets, NULL);
+		if (evs < 0) {
+			if (errno == EINTR)
+				continue;
+
+			L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+			return (-1);
+		}
+
+		for (i = 0; i < evs; i++) {
+			struct sockaddr client_addr;
+			socklen_t client_addr_len = sizeof(client_addr);
+			int news = accept((int)event[i].ident, &client_addr,
+			    &client_addr_len);
+
+			if (news < 0) {
+				L9P_LOG(L9P_WARNING, "accept(): %s",
+				    strerror(errno));
+				continue;
+			}
+
+			l9p_socket_accept(server, news, &client_addr,
+			    client_addr_len);
+		}
+	}
+
+}
+
+void
+l9p_socket_accept(struct l9p_server *server, int conn_fd,
+    struct sockaddr *client_addr, socklen_t client_addr_len)
+{
+	struct l9p_socket_softc *sc;
+	struct l9p_connection *conn;
+	char host[NI_MAXHOST + 1];
+	char serv[NI_MAXSERV + 1];
+	int err;
+
+	err = getnameinfo(client_addr, client_addr_len, host, NI_MAXHOST, serv,
+	    NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV);
+
+	if (err != 0) {
+		L9P_LOG(L9P_WARNING, "cannot look up client name: %s",
+		    gai_strerror(err));
+	} else {
+		L9P_LOG(L9P_INFO, "new connection from %s:%s", host, serv);
+	}
+
+	if (l9p_connection_init(server, &conn) != 0) {
+		L9P_LOG(L9P_ERROR, "cannot create new connection");
+		return;
+	}
+
+	sc = l9p_calloc(1, sizeof(*sc));
+	sc->ls_conn = conn;
+	sc->ls_fd = conn_fd;
+
+	/*
+	 * Fill in transport handler functions and aux argument.
+	 */
+	conn->lc_lt.lt_aux = sc;
+	conn->lc_lt.lt_get_response_buffer = l9p_socket_get_response_buffer;
+	conn->lc_lt.lt_send_response = l9p_socket_send_response;
+	conn->lc_lt.lt_drop_response = l9p_socket_drop_response;
+
+	err = pthread_create(&sc->ls_thread, NULL, l9p_socket_thread, sc);
+	if (err) {
+		L9P_LOG(L9P_ERROR,
+		    "pthread_create (for connection from %s:%s): error %s",
+		    host, serv, strerror(err));
+		l9p_connection_close(sc->ls_conn);
+		free(sc);
+	}
+}
+
+static void *
+l9p_socket_thread(void *arg)
+{
+	struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+	struct iovec iov;
+	void *buf;
+	size_t length;
+
+	for (;;) {
+		if (l9p_socket_readmsg(sc, &buf, &length) != 0)
+			break;
+
+		iov.iov_base = buf;
+		iov.iov_len = length;
+		l9p_connection_recv(sc->ls_conn, &iov, 1, NULL);
+		free(buf);
+	}
+
+	L9P_LOG(L9P_INFO, "connection closed");
+	l9p_connection_close(sc->ls_conn);
+	free(sc);
+	return (NULL);
+}
+
+static int
+l9p_socket_readmsg(struct l9p_socket_softc *sc, void **buf, size_t *size)
+{
+	uint32_t msize;
+	size_t toread;
+	ssize_t ret;
+	void *buffer;
+	int fd = sc->ls_fd;
+
+	assert(fd > 0);
+
+	buffer = l9p_malloc(sizeof(uint32_t));
+
+	ret = xread(fd, buffer, sizeof(uint32_t));
+	if (ret < 0) {
+		L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+		return (-1);
+	}
+
+	if (ret != sizeof(uint32_t)) {
+		if (ret == 0)
+			L9P_LOG(L9P_DEBUG, "%p: EOF", (void *)sc->ls_conn);
+		else
+			L9P_LOG(L9P_ERROR,
+			    "short read: %zd bytes of %zd expected",
+			    ret, sizeof(uint32_t));
+		return (-1);
+	}
+
+	msize = le32toh(*(uint32_t *)buffer);
+	toread = msize - sizeof(uint32_t);
+	buffer = l9p_realloc(buffer, msize);
+
+	ret = xread(fd, (char *)buffer + sizeof(uint32_t), toread);
+	if (ret < 0) {
+		L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+		return (-1);
+	}
+
+	if (ret != (ssize_t)toread) {
+		L9P_LOG(L9P_ERROR, "short read: %zd bytes of %zd expected",
+		    ret, toread);
+		return (-1);
+	}
+
+	*size = msize;
+	*buf = buffer;
+	L9P_LOG(L9P_INFO, "%p: read complete message, buf=%p size=%d",
+	    (void *)sc->ls_conn, buffer, msize);
+
+	return (0);
+}
+
+static int
+l9p_socket_get_response_buffer(struct l9p_request *req, struct iovec *iov,
+    size_t *niovp, void *arg __unused)
+{
+	size_t size = req->lr_conn->lc_msize;
+	void *buf;
+
+	buf = l9p_malloc(size);
+	iov[0].iov_base = buf;
+	iov[0].iov_len = size;
+
+	*niovp = 1;
+	return (0);
+}
+
+static int
+l9p_socket_send_response(struct l9p_request *req __unused,
+    const struct iovec *iov, const size_t niov __unused, const size_t iolen,
+    void *arg)
+{
+	struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+
+	assert(sc->ls_fd >= 0);
+
+	L9P_LOG(L9P_DEBUG, "%p: sending reply, buf=%p, size=%d", arg,
+	    iov[0].iov_base, iolen);
+
+	if (xwrite(sc->ls_fd, iov[0].iov_base, iolen) != (int)iolen) {
+		L9P_LOG(L9P_ERROR, "short write: %s", strerror(errno));
+		return (-1);
+	}
+
+	free(iov[0].iov_base);
+	return (0);
+}
+
+static void
+l9p_socket_drop_response(struct l9p_request *req __unused,
+    const struct iovec *iov, size_t niov __unused, void *arg)
+{
+
+	L9P_LOG(L9P_DEBUG, "%p: drop buf=%p", arg, iov[0].iov_base);
+	free(iov[0].iov_base);
+}
+
+static ssize_t
+xread(int fd, void *buf, size_t count)
+{
+	size_t done = 0;
+	ssize_t ret;
+
+	while (done < count) {
+		ret = read(fd, (char *)buf + done, count - done);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return (-1);
+		}
+
+		if (ret == 0)
+			return ((ssize_t)done);
+
+		done += (size_t)ret;
+	}
+
+	return ((ssize_t)done);
+}
+
+static ssize_t
+xwrite(int fd, void *buf, size_t count)
+{
+	size_t done = 0;
+	ssize_t ret;
+
+	while (done < count) {
+		ret = write(fd, (char *)buf + done, count - done);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return (-1);
+		}
+
+		if (ret == 0)
+			return ((ssize_t)done);
+
+		done += (size_t)ret;
+	}
+
+	return ((ssize_t)done);
+}
diff --git a/transport/socket.h b/transport/socket.h
new file mode 100644
index 000000000000..b022da1a923e
--- /dev/null
+++ b/transport/socket.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_SOCKET_H
+#define LIB9P_SOCKET_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "../lib9p.h"
+
+int l9p_start_server(struct l9p_server *server, const char *host,
+    const char *port);
+void l9p_socket_accept(struct l9p_server *server, int conn_fd,
+    struct sockaddr *client_addr, socklen_t client_addr_len);
+
+#endif /* LIB9P_SOCKET_H */
diff --git a/utils.c b/utils.c
new file mode 100644
index 000000000000..609d7a216c82
--- /dev/null
+++ b/utils.c
@@ -0,0 +1,1268 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "fcall.h"
+#include "linux_errno.h"
+
+#ifdef __APPLE__
+  #define GETGROUPS_GROUP_TYPE_IS_INT
+#endif
+
+#define N(ary)          (sizeof(ary) / sizeof(*ary))
+
+/* See l9p_describe_bits() below. */
+struct descbits {
+	uint64_t	db_mask;	/* mask value */
+	uint64_t	db_match;	/* match value */
+	const char	*db_name;	/* name for matched value */
+};
+
+
+static bool l9p_describe_bits(const char *, uint64_t, const char *,
+    const struct descbits *, struct sbuf *);
+static void l9p_describe_fid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_mode(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_name(const char *, char *, struct sbuf *);
+static void l9p_describe_perm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_lperm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_qid(const char *, struct l9p_qid *, struct sbuf *);
+static void l9p_describe_l9stat(const char *, struct l9p_stat *,
+    enum l9p_version, struct sbuf *);
+static void l9p_describe_statfs(const char *, struct l9p_statfs *,
+    struct sbuf *);
+static void l9p_describe_time(struct sbuf *, const char *, uint64_t, uint64_t);
+static void l9p_describe_readdir(struct sbuf *, struct l9p_f_io *);
+static void l9p_describe_size(const char *, uint64_t, struct sbuf *);
+static void l9p_describe_ugid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_getattr_mask(uint64_t, struct sbuf *);
+static void l9p_describe_unlinkat_flags(const char *, uint32_t, struct sbuf *);
+static const char *lookup_linux_errno(uint32_t);
+
+/*
+ * Using indexed initializers, we can have these occur in any order.
+ * Using adjacent-string concatenation ("T" #name, "R" #name), we
+ * get both Tfoo and Rfoo strings with one copy of the name.
+ * Alas, there is no stupid cpp trick to lowercase-ify, so we
+ * have to write each name twice.  In which case we might as well
+ * make the second one a string in the first place and not bother
+ * with the stringizing.
+ *
+ * This table should have entries for each enum value in fcall.h.
+ */
+#define X(NAME, name)	[L9P_T##NAME - L9P__FIRST] = "T" name, \
+			[L9P_R##NAME - L9P__FIRST] = "R" name
+static const char *ftype_names[] = {
+	X(VERSION,	"version"),
+	X(AUTH,		"auth"),
+	X(ATTACH,	"attach"),
+	X(ERROR,	"error"),
+	X(LERROR,	"lerror"),
+	X(FLUSH,	"flush"),
+	X(WALK,		"walk"),
+	X(OPEN,		"open"),
+	X(CREATE,	"create"),
+	X(READ,		"read"),
+	X(WRITE,	"write"),
+	X(CLUNK,	"clunk"),
+	X(REMOVE,	"remove"),
+	X(STAT,		"stat"),
+	X(WSTAT,	"wstat"),
+	X(STATFS,	"statfs"),
+	X(LOPEN,	"lopen"),
+	X(LCREATE,	"lcreate"),
+	X(SYMLINK,	"symlink"),
+	X(MKNOD,	"mknod"),
+	X(RENAME,	"rename"),
+	X(READLINK,	"readlink"),
+	X(GETATTR,	"getattr"),
+	X(SETATTR,	"setattr"),
+	X(XATTRWALK,	"xattrwalk"),
+	X(XATTRCREATE,	"xattrcreate"),
+	X(READDIR,	"readdir"),
+	X(FSYNC,	"fsync"),
+	X(LOCK,		"lock"),
+	X(GETLOCK,	"getlock"),
+	X(LINK,		"link"),
+	X(MKDIR,	"mkdir"),
+	X(RENAMEAT,	"renameat"),
+	X(UNLINKAT,	"unlinkat"),
+};
+#undef X
+
+void
+l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+    size_t *niov2, size_t seek)
+{
+	size_t remainder = 0;
+	size_t left = seek;
+	size_t i, j;
+
+	for (i = 0; i < niov1; i++) {
+		size_t toseek = MIN(left, iov1[i].iov_len);
+		left -= toseek;
+
+		if (toseek == iov1[i].iov_len)
+			continue;
+
+		if (left == 0) {
+			remainder = toseek;
+			break;
+		}
+	}
+
+	for (j = i; j < niov1; j++) {
+		iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder;
+		iov2[j - i].iov_len = iov1[j].iov_len - remainder;
+		remainder = 0;
+	}
+
+	*niov2 = j - i;
+}
+
+size_t
+l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length)
+{
+	size_t i, done = 0;
+
+	for (i = 0; i < niov; i++) {
+		size_t toseek = MIN(length - done, iov[i].iov_len);
+		done += toseek;
+
+		if (toseek < iov[i].iov_len) {
+			iov[i].iov_len = toseek;
+			return (i + 1);
+		}
+	}
+
+	return (niov);
+}
+
+/*
+ * This wrapper for getgrouplist() that malloc'ed memory, and
+ * papers over FreeBSD vs Mac differences in the getgrouplist()
+ * argument types.
+ *
+ * Note that this function guarantees that *either*:
+ *     return value != NULL and *angroups has been set
+ * or: return value == NULL and *angroups is 0
+ */
+gid_t *
+l9p_getgrlist(const char *name, gid_t basegid, int *angroups)
+{
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	int i, *int_groups;
+#endif
+	gid_t *groups;
+	int ngroups;
+
+	/*
+	 * Todo, perhaps: while getgrouplist() returns -1, expand.
+	 * For now just use NGROUPS_MAX.
+	 */
+	ngroups = NGROUPS_MAX;
+	groups = malloc((size_t)ngroups * sizeof(*groups));
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	int_groups = groups ? malloc((size_t)ngroups * sizeof(*int_groups)) :
+	    NULL;
+	if (int_groups == NULL) {
+		free(groups);
+		groups = NULL;
+	}
+#endif
+	if (groups == NULL) {
+		*angroups = 0;
+		return (NULL);
+	}
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	(void) getgrouplist(name, (int)basegid, int_groups, &ngroups);
+	for (i = 0; i < ngroups; i++)
+		groups[i] = (gid_t)int_groups[i];
+#else
+	(void) getgrouplist(name, basegid, groups, &ngroups);
+#endif
+	*angroups = ngroups;
+	return (groups);
+}
+
+/*
+ * For the various debug describe ops: decode bits in a bit-field-y
+ * value.  For example, we might produce:
+ *     value=0x3c[FOO,BAR,QUUX,?0x20]
+ * when FOO is bit 0x10, BAR is 0x08, and QUUX is 0x04 (as defined
+ * by the table).  This leaves 0x20 (bit 5) as a mystery, while bits
+ * 4, 3, and 2 were decoded.  (Bits 0 and 1 were 0 on input hence
+ * were not attempted here.)
+ *
+ * For general use we take a uint64_t <value>.  The bit description
+ * table <db> is an array of {mask, match, str} values ending with
+ * {0, 0, NULL}.
+ *
+ * If <str> is non-NULL we'll print it and the mask as well (if
+ * str is NULL we'll print neither).  The mask is always printed in
+ * hex at the moment.  See undec description too.
+ *
+ * For convenience, you can use a mask-and-match value, e.g., to
+ * decode a 2-bit field in bits 0 and 1 you can mask against 3 and
+ * match the values 0, 1, 2, and 3.  To handle this, make sure that
+ * all masks-with-same-match are sequential.
+ *
+ * If there are any nonzero undecoded bits, print them after
+ * all the decode-able bits have been handled.
+ *
+ * The <oc> argument defines the open and close bracket characters,
+ * typically "[]", that surround the entire string.  If NULL, no
+ * brackets are added, else oc[0] goes in the front and oc[1] at
+ * the end, after printing any <str><value> part.
+ *
+ * Returns true if it printed anything (other than the implied
+ * str-and-value, that is).
+ */
+static bool
+l9p_describe_bits(const char *str, uint64_t value, const char *oc,
+    const struct descbits *db, struct sbuf *sb)
+{
+	const char *sep;
+	char bracketbuf[2] = "";
+	bool printed = false;
+
+	if (str != NULL)
+		sbuf_printf(sb, "%s0x%" PRIx64, str, value);
+
+	if (oc != NULL)
+		bracketbuf[0] = oc[0];
+	sep = bracketbuf;
+	for (; db->db_name != NULL; db++) {
+		if ((value & db->db_mask) == db->db_match) {
+			sbuf_printf(sb, "%s%s", sep, db->db_name);
+			sep = ",";
+			printed = true;
+
+			/*
+			 * Clear the field, and make sure we
+			 * won't match a zero-valued field with
+			 * this same mask.
+			 */
+			value &= ~db->db_mask;
+			while (db[1].db_mask == db->db_mask &&
+			    db[1].db_name != NULL)
+				db++;
+		}
+	}
+	if (value != 0) {
+		sbuf_printf(sb, "%s?0x%" PRIx64, sep, value);
+		printed = true;
+	}
+	if (printed && oc != NULL) {
+		bracketbuf[0] = oc[1];
+		sbuf_cat(sb, bracketbuf);
+	}
+	return (printed);
+}
+
+/*
+ * Show file ID.
+ */
+static void
+l9p_describe_fid(const char *str, uint32_t fid, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu32, str, fid);
+}
+
+/*
+ * Show user or group ID.
+ */
+static void
+l9p_describe_ugid(const char *str, uint32_t ugid, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu32, str, ugid);
+}
+
+/*
+ * Show file mode (O_RDWR, O_RDONLY, etc).  The argument is
+ * an l9p_omode, not a Linux flags mode.  Linux flags are
+ * decoded with l9p_describe_lflags.
+ */
+static void
+l9p_describe_mode(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9P_OACCMODE,	L9P_OREAD,	"OREAD" },
+		{ L9P_OACCMODE,	L9P_OWRITE,	"OWRITE" },
+		{ L9P_OACCMODE,	L9P_ORDWR,	"ORDWR" },
+		{ L9P_OACCMODE,	L9P_OEXEC,	"OEXEC" },
+
+		{ L9P_OCEXEC,	L9P_OCEXEC,	"OCEXEC" },
+		{ L9P_ODIRECT,	L9P_ODIRECT,	"ODIRECT" },
+		{ L9P_ORCLOSE,	L9P_ORCLOSE,	"ORCLOSE" },
+		{ L9P_OTRUNC,	L9P_OTRUNC,	"OTRUNC" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, mode, "[]", bits, sb);
+}
+
+/*
+ * Show Linux mode/flags.
+ */
+static void
+l9p_describe_lflags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+	    { L9P_OACCMODE,	L9P_OREAD,		"O_READ" },
+	    { L9P_OACCMODE,	L9P_OWRITE,		"O_WRITE" },
+	    { L9P_OACCMODE,	L9P_ORDWR,		"O_RDWR" },
+	    { L9P_OACCMODE,	L9P_OEXEC,		"O_EXEC" },
+
+	    { L9P_L_O_APPEND,	L9P_L_O_APPEND,		"O_APPEND" },
+	    { L9P_L_O_CLOEXEC,	L9P_L_O_CLOEXEC,	"O_CLOEXEC" },
+	    { L9P_L_O_CREAT,	L9P_L_O_CREAT,		"O_CREAT" },
+	    { L9P_L_O_DIRECT,	L9P_L_O_DIRECT,		"O_DIRECT" },
+	    { L9P_L_O_DIRECTORY, L9P_L_O_DIRECTORY,	"O_DIRECTORY" },
+	    { L9P_L_O_DSYNC,	L9P_L_O_DSYNC,		"O_DSYNC" },
+	    { L9P_L_O_EXCL,	L9P_L_O_EXCL,		"O_EXCL" },
+	    { L9P_L_O_FASYNC,	L9P_L_O_FASYNC,		"O_FASYNC" },
+	    { L9P_L_O_LARGEFILE, L9P_L_O_LARGEFILE,	"O_LARGEFILE" },
+	    { L9P_L_O_NOATIME,	L9P_L_O_NOATIME,	"O_NOATIME" },
+	    { L9P_L_O_NOCTTY,	L9P_L_O_NOCTTY,		"O_NOCTTY" },
+	    { L9P_L_O_NOFOLLOW,	L9P_L_O_NOFOLLOW,	"O_NOFOLLOW" },
+	    { L9P_L_O_NONBLOCK,	L9P_L_O_NONBLOCK,	"O_NONBLOCK" },
+	    { L9P_L_O_PATH,	L9P_L_O_PATH,		"O_PATH" },
+	    { L9P_L_O_SYNC,	L9P_L_O_SYNC,		"O_SYNC" },
+	    { L9P_L_O_TMPFILE,	L9P_L_O_TMPFILE,	"O_TMPFILE" },
+	    { L9P_L_O_TMPFILE,	L9P_L_O_TMPFILE,	"O_TMPFILE" },
+	    { L9P_L_O_TRUNC,	L9P_L_O_TRUNC,		"O_TRUNC" },
+	    { 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+/*
+ * Show file name or other similar, potentially-very-long string.
+ * Actual strings get quotes, a NULL name (if it occurs) gets
+ * <null> (no quotes), so you can tell the difference.
+ */
+static void
+l9p_describe_name(const char *str, char *name, struct sbuf *sb)
+{
+	size_t len;
+
+	if (name == NULL) {
+		sbuf_printf(sb, "%s<null>", str);
+		return;
+	}
+
+	len = strlen(name);
+
+	if (len > 32)
+		sbuf_printf(sb, "%s\"%.*s...\"", str, 32 - 3, name);
+	else
+		sbuf_printf(sb, "%s\"%.*s\"", str, (int)len, name);
+}
+
+/*
+ * Show permissions (rwx etc).  Prints the value in hex only if
+ * the rwx bits do not cover the entire value.
+ */
+static void
+l9p_describe_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	char pbuf[12];
+
+	strmode(mode & 0777, pbuf);
+	if ((mode & ~(uint32_t)0777) != 0)
+		sbuf_printf(sb, "%s0x%" PRIx32 "<%.9s>", str, mode, pbuf + 1);
+	else
+		sbuf_printf(sb, "%s<%.9s>", str, pbuf + 1);
+}
+
+/*
+ * Show "extended" permissions: regular permissions, but also the
+ * various DM* extension bits from 9P2000.u.
+ */
+static void
+l9p_describe_ext_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9P_DMDIR,	L9P_DMDIR,	"DMDIR" },
+		{ L9P_DMAPPEND,	L9P_DMAPPEND,	"DMAPPEND" },
+		{ L9P_DMEXCL,	L9P_DMEXCL,	"DMEXCL" },
+		{ L9P_DMMOUNT,	L9P_DMMOUNT,	"DMMOUNT" },
+		{ L9P_DMAUTH,	L9P_DMAUTH,	"DMAUTH" },
+		{ L9P_DMTMP,	L9P_DMTMP,	"DMTMP" },
+		{ L9P_DMSYMLINK, L9P_DMSYMLINK,	"DMSYMLINK" },
+		{ L9P_DMDEVICE,	L9P_DMDEVICE,	"DMDEVICE" },
+		{ L9P_DMNAMEDPIPE, L9P_DMNAMEDPIPE, "DMNAMEDPIPE" },
+		{ L9P_DMSOCKET,	L9P_DMSOCKET,	"DMSOCKET" },
+		{ L9P_DMSETUID,	L9P_DMSETUID,	"DMSETUID" },
+		{ L9P_DMSETGID,	L9P_DMSETGID,	"DMSETGID" },
+		{ 0, 0, NULL }
+	};
+	bool need_sep;
+
+	sbuf_printf(sb, "%s[", str);
+	need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+	    bits, sb);
+	l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+	sbuf_cat(sb, "]");
+}
+
+/*
+ * Show Linux-specific permissions: regular permissions, but also
+ * the S_IFMT field.
+ */
+static void
+l9p_describe_lperm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ S_IFMT,	S_IFIFO,	"S_IFIFO" },
+		{ S_IFMT,	S_IFCHR,	"S_IFCHR" },
+		{ S_IFMT,	S_IFDIR,	"S_IFDIR" },
+		{ S_IFMT,	S_IFBLK,	"S_IFBLK" },
+		{ S_IFMT,	S_IFREG,	"S_IFREG" },
+		{ S_IFMT,	S_IFLNK,	"S_IFLNK" },
+		{ S_IFMT,	S_IFSOCK,	"S_IFSOCK" },
+		{ 0, 0, NULL }
+	};
+	bool need_sep;
+
+	sbuf_printf(sb, "%s[", str);
+	need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+	    bits, sb);
+	l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+	sbuf_cat(sb, "]");
+}
+
+/*
+ * Show qid (<type, version, path> tuple).
+ */
+static void
+l9p_describe_qid(const char *str, struct l9p_qid *qid, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		/*
+		 * NB: L9P_QTFILE is 0, i.e., is implied by no
+		 * other bits being set.  We get this produced
+		 * when we mask against 0xff and compare for
+		 * L9P_QTFILE, but we must do it first so that
+		 * we mask against the original (not-adjusted)
+		 * value.
+		 */
+		{ 0xff,		L9P_QTFILE,	"FILE" },
+		{ L9P_QTDIR,	L9P_QTDIR,	"DIR" },
+		{ L9P_QTAPPEND,	L9P_QTAPPEND,	"APPEND" },
+		{ L9P_QTEXCL,	L9P_QTEXCL,	"EXCL" },
+		{ L9P_QTMOUNT,	L9P_QTMOUNT,	"MOUNT" },
+		{ L9P_QTAUTH,	L9P_QTAUTH,	"AUTH" },
+		{ L9P_QTTMP,	L9P_QTTMP,	"TMP" },
+		{ L9P_QTSYMLINK, L9P_QTSYMLINK,	"SYMLINK" },
+		{ 0, 0, NULL }
+	};
+
+	assert(qid != NULL);
+
+	sbuf_cat(sb, str);
+	(void) l9p_describe_bits("<", qid->type, "[]", bits, sb);
+	sbuf_printf(sb, ",%" PRIu32 ",0x%016" PRIx64 ">",
+	    qid->version, qid->path);
+}
+
+/*
+ * Show size.
+ */
+static void
+l9p_describe_size(const char *str, uint64_t size, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu64, str, size);
+}
+
+/*
+ * Show l9stat (including 9P2000.u extensions if appropriate).
+ */
+static void
+l9p_describe_l9stat(const char *str, struct l9p_stat *st,
+    enum l9p_version version, struct sbuf *sb)
+{
+	bool dotu = version >= L9P_2000U;
+
+	assert(st != NULL);
+
+	sbuf_printf(sb, "%stype=0x%04" PRIx32 " dev=0x%08" PRIx32, str,
+	    st->type, st->dev);
+	l9p_describe_qid(" qid=", &st->qid, sb);
+	l9p_describe_ext_perm(" mode=", st->mode, sb);
+	if (st->atime != (uint32_t)-1)
+		sbuf_printf(sb, " atime=%" PRIu32, st->atime);
+	if (st->mtime != (uint32_t)-1)
+		sbuf_printf(sb, " mtime=%" PRIu32, st->mtime);
+	if (st->length != (uint64_t)-1)
+		sbuf_printf(sb, " length=%" PRIu64, st->length);
+	l9p_describe_name(" name=", st->name, sb);
+	/*
+	 * It's pretty common to have NULL name+gid+muid.  They're
+	 * just noise if NULL *and* dot-u; decode only if non-null
+	 * or not-dot-u.
+	 */
+	if (st->uid != NULL || !dotu)
+		l9p_describe_name(" uid=", st->uid, sb);
+	if (st->gid != NULL || !dotu)
+		l9p_describe_name(" gid=", st->gid, sb);
+	if (st->muid != NULL || !dotu)
+		l9p_describe_name(" muid=", st->muid, sb);
+	if (dotu) {
+		if (st->extension != NULL)
+			l9p_describe_name(" extension=", st->extension, sb);
+		sbuf_printf(sb,
+		    " n_uid=%" PRIu32 " n_gid=%" PRIu32 " n_muid=%" PRIu32,
+		    st->n_uid, st->n_gid, st->n_muid);
+	}
+}
+
+static void
+l9p_describe_statfs(const char *str, struct l9p_statfs *st, struct sbuf *sb)
+{
+
+	assert(st != NULL);
+
+	sbuf_printf(sb, "%stype=0x%04lx bsize=%lu blocks=%" PRIu64
+	    " bfree=%" PRIu64 " bavail=%" PRIu64 " files=%" PRIu64
+	    " ffree=%" PRIu64 " fsid=0x%" PRIx64 " namelen=%" PRIu32 ">",
+	    str, (u_long)st->type, (u_long)st->bsize, st->blocks,
+	    st->bfree, st->bavail, st->files,
+	    st->ffree, st->fsid, st->namelen);
+}
+
+/*
+ * Decode a <seconds,nsec> timestamp.
+ *
+ * Perhaps should use asctime_r.  For now, raw values.
+ */
+static void
+l9p_describe_time(struct sbuf *sb, const char *s, uint64_t sec, uint64_t nsec)
+{
+
+	sbuf_cat(sb, s);
+	if (nsec > 999999999)
+		sbuf_printf(sb, "%" PRIu64 ".<invalid nsec %" PRIu64 ">)",
+		    sec, nsec);
+	else
+		sbuf_printf(sb, "%" PRIu64 ".%09" PRIu64, sec, nsec);
+}
+
+/*
+ * Decode readdir data (.L format, variable length names).
+ */
+static void
+l9p_describe_readdir(struct sbuf *sb, struct l9p_f_io *io)
+{
+	uint32_t count;
+#ifdef notyet
+	int i;
+	struct l9p_message msg;
+	struct l9p_dirent de;
+#endif
+
+	if ((count = io->count) == 0) {
+		sbuf_printf(sb, " EOF (count=0)");
+		return;
+	}
+
+	/*
+	 * Can't do this yet because we do not have the original
+	 * req.
+	 */
+#ifdef notyet
+	sbuf_printf(sb, " count=%" PRIu32 " [", count);
+
+	l9p_init_msg(&msg, req, L9P_UNPACK);
+	for (i = 0; msg.lm_size < count; i++) {
+		if (l9p_pudirent(&msg, &de) < 0) {
+			sbuf_printf(sb, " bad count");
+			break;
+		}
+
+		sbuf_printf(sb, i ? ", " : " ");
+		l9p_describe_qid(" qid=", &de.qid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " type=%d",
+		    de.offset, de.type);
+		l9p_describe_name(" name=", de.name);
+		free(de.name);
+	}
+	sbuf_printf(sb, "]=%d dir entries", i);
+#else /* notyet */
+	sbuf_printf(sb, " count=%" PRIu32, count);
+#endif
+}
+
+/*
+ * Decode Tgetattr request_mask field.
+ */
+static void
+l9p_describe_getattr_mask(uint64_t request_mask, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		/*
+		 * Note: ALL and BASIC must occur first and second.
+		 * This is a little dirty: it depends on the way the
+		 * describe_bits code clears the values.  If we
+		 * match ALL, we clear all those bits and do not
+		 * match BASIC; if we match BASIC, we clear all
+		 * those bits and do not match individual bits.  Thus
+		 * if we have BASIC but not all the additional bits,
+		 * we'll see, e.g., [BASIC,BTIME,GEN]; if we have
+		 * all the additional bits too, we'll see [ALL].
+		 *
+		 * Since <undec> is true below, we'll also spot any
+		 * bits added to the protocol since we made this table.
+		 */
+		{ L9PL_GETATTR_ALL,	L9PL_GETATTR_ALL,	"ALL" },
+		{ L9PL_GETATTR_BASIC,	L9PL_GETATTR_BASIC,	"BASIC" },
+
+		/* individual bits in BASIC */
+		{ L9PL_GETATTR_MODE,	L9PL_GETATTR_MODE,	"MODE" },
+		{ L9PL_GETATTR_NLINK,	L9PL_GETATTR_NLINK,	"NLINK" },
+		{ L9PL_GETATTR_UID,	L9PL_GETATTR_UID,	"UID" },
+		{ L9PL_GETATTR_GID,	L9PL_GETATTR_GID,	"GID" },
+		{ L9PL_GETATTR_RDEV,	L9PL_GETATTR_RDEV,	"RDEV" },
+		{ L9PL_GETATTR_ATIME,	L9PL_GETATTR_ATIME,	"ATIME" },
+		{ L9PL_GETATTR_MTIME,	L9PL_GETATTR_MTIME,	"MTIME" },
+		{ L9PL_GETATTR_CTIME,	L9PL_GETATTR_CTIME,	"CTIME" },
+		{ L9PL_GETATTR_INO,	L9PL_GETATTR_INO,	"INO" },
+		{ L9PL_GETATTR_SIZE,	L9PL_GETATTR_SIZE,	"SIZE" },
+		{ L9PL_GETATTR_BLOCKS,	L9PL_GETATTR_BLOCKS,	"BLOCKS" },
+
+		/* additional bits in ALL */
+		{ L9PL_GETATTR_BTIME,	L9PL_GETATTR_BTIME,	"BTIME" },
+		{ L9PL_GETATTR_GEN,	L9PL_GETATTR_GEN,	"GEN" },
+		{ L9PL_GETATTR_DATA_VERSION, L9PL_GETATTR_DATA_VERSION,
+							"DATA_VERSION" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(" request_mask=", request_mask, "[]", bits,
+	    sb);
+}
+
+/*
+ * Decode Tunlinkat flags.
+ */
+static void
+l9p_describe_unlinkat_flags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9PL_AT_REMOVEDIR, L9PL_AT_REMOVEDIR, "AT_REMOVEDIR" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+static const char *
+lookup_linux_errno(uint32_t linux_errno)
+{
+	static char unknown[50];
+
+	/*
+	 * Error numbers in the "base" range (1..ERANGE) are common
+	 * across BSD, MacOS, Linux, and Plan 9.
+	 *
+	 * Error numbers outside that range require translation.
+	 */
+	const char *const table[] = {
+#define X0(name) [name] = name ## _STR
+#define	X(name) [name] = name ## _STR
+		X(LINUX_EAGAIN),
+		X(LINUX_EDEADLK),
+		X(LINUX_ENAMETOOLONG),
+		X(LINUX_ENOLCK),
+		X(LINUX_ENOSYS),
+		X(LINUX_ENOTEMPTY),
+		X(LINUX_ELOOP),
+		X(LINUX_ENOMSG),
+		X(LINUX_EIDRM),
+		X(LINUX_ECHRNG),
+		X(LINUX_EL2NSYNC),
+		X(LINUX_EL3HLT),
+		X(LINUX_EL3RST),
+		X(LINUX_ELNRNG),
+		X(LINUX_EUNATCH),
+		X(LINUX_ENOCSI),
+		X(LINUX_EL2HLT),
+		X(LINUX_EBADE),
+		X(LINUX_EBADR),
+		X(LINUX_EXFULL),
+		X(LINUX_ENOANO),
+		X(LINUX_EBADRQC),
+		X(LINUX_EBADSLT),
+		X(LINUX_EBFONT),
+		X(LINUX_ENOSTR),
+		X(LINUX_ENODATA),
+		X(LINUX_ETIME),
+		X(LINUX_ENOSR),
+		X(LINUX_ENONET),
+		X(LINUX_ENOPKG),
+		X(LINUX_EREMOTE),
+		X(LINUX_ENOLINK),
+		X(LINUX_EADV),
+		X(LINUX_ESRMNT),
+		X(LINUX_ECOMM),
+		X(LINUX_EPROTO),
+		X(LINUX_EMULTIHOP),
+		X(LINUX_EDOTDOT),
+		X(LINUX_EBADMSG),
+		X(LINUX_EOVERFLOW),
+		X(LINUX_ENOTUNIQ),
+		X(LINUX_EBADFD),
+		X(LINUX_EREMCHG),
+		X(LINUX_ELIBACC),
+		X(LINUX_ELIBBAD),
+		X(LINUX_ELIBSCN),
+		X(LINUX_ELIBMAX),
+		X(LINUX_ELIBEXEC),
+		X(LINUX_EILSEQ),
+		X(LINUX_ERESTART),
+		X(LINUX_ESTRPIPE),
+		X(LINUX_EUSERS),
+		X(LINUX_ENOTSOCK),
+		X(LINUX_EDESTADDRREQ),
+		X(LINUX_EMSGSIZE),
+		X(LINUX_EPROTOTYPE),
+		X(LINUX_ENOPROTOOPT),
+		X(LINUX_EPROTONOSUPPORT),
+		X(LINUX_ESOCKTNOSUPPORT),
+		X(LINUX_EOPNOTSUPP),
+		X(LINUX_EPFNOSUPPORT),
+		X(LINUX_EAFNOSUPPORT),
+		X(LINUX_EADDRINUSE),
+		X(LINUX_EADDRNOTAVAIL),
+		X(LINUX_ENETDOWN),
+		X(LINUX_ENETUNREACH),
+		X(LINUX_ENETRESET),
+		X(LINUX_ECONNABORTED),
+		X(LINUX_ECONNRESET),
+		X(LINUX_ENOBUFS),
+		X(LINUX_EISCONN),
+		X(LINUX_ENOTCONN),
+		X(LINUX_ESHUTDOWN),
+		X(LINUX_ETOOMANYREFS),
+		X(LINUX_ETIMEDOUT),
+		X(LINUX_ECONNREFUSED),
+		X(LINUX_EHOSTDOWN),
+		X(LINUX_EHOSTUNREACH),
+		X(LINUX_EALREADY),
+		X(LINUX_EINPROGRESS),
+		X(LINUX_ESTALE),
+		X(LINUX_EUCLEAN),
+		X(LINUX_ENOTNAM),
+		X(LINUX_ENAVAIL),
+		X(LINUX_EISNAM),
+		X(LINUX_EREMOTEIO),
+		X(LINUX_EDQUOT),
+		X(LINUX_ENOMEDIUM),
+		X(LINUX_EMEDIUMTYPE),
+		X(LINUX_ECANCELED),
+		X(LINUX_ENOKEY),
+		X(LINUX_EKEYEXPIRED),
+		X(LINUX_EKEYREVOKED),
+		X(LINUX_EKEYREJECTED),
+		X(LINUX_EOWNERDEAD),
+		X(LINUX_ENOTRECOVERABLE),
+		X(LINUX_ERFKILL),
+		X(LINUX_EHWPOISON),
+#undef X0
+#undef X
+	};
+	if ((size_t)linux_errno < N(table) && table[linux_errno] != NULL)
+		return (table[linux_errno]);
+	if (linux_errno <= ERANGE)
+		return (strerror((int)linux_errno));
+	(void) snprintf(unknown, sizeof(unknown),
+	    "Unknown error %d", linux_errno);
+	return (unknown);
+}
+
+void
+l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+    struct sbuf *sb)
+{
+	uint64_t mask;
+	uint8_t type;
+	int i;
+
+	assert(fcall != NULL);
+	assert(sb != NULL);
+	assert(version <= L9P_2000L && version >= L9P_INVALID_VERSION);
+
+	type = fcall->hdr.type;
+
+	if (type < L9P__FIRST || type >= L9P__LAST_PLUS_1 ||
+	    ftype_names[type - L9P__FIRST] == NULL) {
+		const char *rr;
+
+		/*
+		 * Can't say for sure that this distinction --
+		 * an even number is a request, an odd one is
+		 * a response -- will be maintained forever,
+		 * but it's good enough for now.
+		 */
+		rr = (type & 1) != 0 ? "response" : "request";
+		sbuf_printf(sb, "<unknown %s %d> tag=%d", rr, type,
+		    fcall->hdr.tag);
+	} else {
+		sbuf_printf(sb, "%s tag=%d", ftype_names[type - L9P__FIRST],
+		    fcall->hdr.tag);
+	}
+
+	switch (type) {
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		sbuf_printf(sb, " version=\"%s\" msize=%d", fcall->version.version,
+		    fcall->version.msize);
+		return;
+
+	case L9P_TAUTH:
+		l9p_describe_fid(" afid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+		    fcall->tauth.uname, fcall->tauth.aname);
+		return;
+
+	case L9P_TATTACH:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" afid=", fcall->tattach.afid, sb);
+		sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+		    fcall->tattach.uname, fcall->tattach.aname);
+		if (version >= L9P_2000U)
+			sbuf_printf(sb, " n_uname=%d", fcall->tattach.n_uname);
+		return;
+
+	case L9P_RATTACH:
+		l9p_describe_qid(" ", &fcall->rattach.qid, sb);
+		return;
+
+	case L9P_RERROR:
+		sbuf_printf(sb, " ename=\"%s\" errnum=%d", fcall->error.ename,
+		    fcall->error.errnum);
+		return;
+
+	case L9P_RLERROR:
+		sbuf_printf(sb, " errnum=%d (%s)", fcall->error.errnum,
+		    lookup_linux_errno(fcall->error.errnum));
+		return;
+
+	case L9P_TFLUSH:
+		sbuf_printf(sb, " oldtag=%d", fcall->tflush.oldtag);
+		return;
+
+	case L9P_RFLUSH:
+		return;
+
+	case L9P_TWALK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" newfid=", fcall->twalk.newfid, sb);
+		if (fcall->twalk.nwname) {
+			sbuf_cat(sb, " wname=\"");
+			for (i = 0; i < fcall->twalk.nwname; i++)
+				sbuf_printf(sb, "%s%s", i == 0 ? "" : "/",
+				    fcall->twalk.wname[i]);
+			sbuf_cat(sb, "\"");
+		}
+		return;
+
+	case L9P_RWALK:
+		sbuf_printf(sb, " wqid=[");
+		for (i = 0; i < fcall->rwalk.nwqid; i++)
+			l9p_describe_qid(i == 0 ? "" : ",",
+			    &fcall->rwalk.wqid[i], sb);
+		sbuf_cat(sb, "]");
+		return;
+
+	case L9P_TOPEN:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+		return;
+
+	case L9P_ROPEN:
+		l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->ropen.iounit);
+		return;
+
+	case L9P_TCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tcreate.name, sb);
+		l9p_describe_ext_perm(" perm=", fcall->tcreate.perm, sb);
+		l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+		if (version >= L9P_2000U && fcall->tcreate.extension != NULL)
+			l9p_describe_name(" extension=",
+			    fcall->tcreate.extension, sb);
+		return;
+
+	case L9P_RCREATE:
+		l9p_describe_qid(" qid=", &fcall->rcreate.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rcreate.iounit);
+		return;
+
+	case L9P_TREAD:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+		    fcall->io.offset, fcall->io.count);
+		return;
+
+	case L9P_RREAD:
+	case L9P_RWRITE:
+		sbuf_printf(sb, " count=%" PRIu32, fcall->io.count);
+		return;
+
+	case L9P_TWRITE:
+	case L9P_TREADDIR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+		    fcall->io.offset, fcall->io.count);
+		return;
+
+	case L9P_TCLUNK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RCLUNK:
+		return;
+
+	case L9P_TREMOVE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RREMOVE:
+		return;
+
+	case L9P_TSTAT:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RSTAT:
+		l9p_describe_l9stat(" ", &fcall->rstat.stat, version, sb);
+		return;
+
+	case L9P_TWSTAT:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_l9stat(" ", &fcall->twstat.stat, version, sb);
+		return;
+
+	case L9P_RWSTAT:
+		return;
+
+	case L9P_TSTATFS:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RSTATFS:
+		l9p_describe_statfs(" ", &fcall->rstatfs.statfs, sb);
+		return;
+
+	case L9P_TLOPEN:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+		return;
+
+	case L9P_RLOPEN:
+		l9p_describe_qid(" qid=", &fcall->rlopen.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rlopen.iounit);
+		return;
+
+	case L9P_TLCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tlcreate.name, sb);
+		/* confusing: "flags" is open-mode, "mode" is permissions */
+		l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+		/* TLCREATE mode/permissions have S_IFREG (0x8000) set */
+		l9p_describe_lperm(" mode=", fcall->tlcreate.mode, sb);
+		l9p_describe_ugid(" gid=", fcall->tlcreate.gid, sb);
+		return;
+
+	case L9P_RLCREATE:
+		l9p_describe_qid(" qid=", &fcall->rlcreate.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rlcreate.iounit);
+		return;
+
+	case L9P_TSYMLINK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tsymlink.name, sb);
+		l9p_describe_name(" symtgt=", fcall->tsymlink.symtgt, sb);
+		l9p_describe_ugid(" gid=", fcall->tsymlink.gid, sb);
+		return;
+
+	case L9P_RSYMLINK:
+		l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+		return;
+
+	case L9P_TMKNOD:
+		l9p_describe_fid(" dfid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tmknod.name, sb);
+		/*
+		 * TMKNOD mode/permissions have S_IFBLK/S_IFCHR/S_IFIFO
+		 * bits.  The major and minor values are only meaningful
+		 * for S_IFBLK and S_IFCHR, but just decode always here.
+		 */
+		l9p_describe_lperm(" mode=", fcall->tmknod.mode, sb);
+		sbuf_printf(sb, " major=%u minor=%u",
+		    fcall->tmknod.major, fcall->tmknod.minor);
+		l9p_describe_ugid(" gid=", fcall->tmknod.gid, sb);
+		return;
+
+	case L9P_RMKNOD:
+		l9p_describe_qid(" qid=", &fcall->rmknod.qid, sb);
+		return;
+
+	case L9P_TRENAME:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" dfid=", fcall->trename.dfid, sb);
+		l9p_describe_name(" name=", fcall->trename.name, sb);
+		return;
+
+	case L9P_RRENAME:
+		return;
+
+	case L9P_TREADLINK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RREADLINK:
+		l9p_describe_name(" target=", fcall->rreadlink.target, sb);
+		return;
+
+	case L9P_TGETATTR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_getattr_mask(fcall->tgetattr.request_mask, sb);
+		return;
+
+	case L9P_RGETATTR:
+		/* Don't need to decode bits: they're implied by the output */
+		mask = fcall->rgetattr.valid;
+		sbuf_printf(sb, " valid=0x%016" PRIx64, mask);
+		l9p_describe_qid(" qid=", &fcall->rgetattr.qid, sb);
+		if (mask & L9PL_GETATTR_MODE)
+			l9p_describe_lperm(" mode=", fcall->rgetattr.mode, sb);
+		if (mask & L9PL_GETATTR_UID)
+			l9p_describe_ugid(" uid=", fcall->rgetattr.uid, sb);
+		if (mask & L9PL_GETATTR_GID)
+			l9p_describe_ugid(" gid=", fcall->rgetattr.gid, sb);
+		if (mask & L9PL_GETATTR_NLINK)
+			sbuf_printf(sb, " nlink=%" PRIu64,
+			    fcall->rgetattr.nlink);
+		if (mask & L9PL_GETATTR_RDEV)
+			sbuf_printf(sb, " rdev=0x%" PRIx64,
+			    fcall->rgetattr.rdev);
+		if (mask & L9PL_GETATTR_SIZE)
+			l9p_describe_size(" size=", fcall->rgetattr.size, sb);
+		if (mask & L9PL_GETATTR_BLOCKS)
+			sbuf_printf(sb, " blksize=%" PRIu64 " blocks=%" PRIu64,
+			    fcall->rgetattr.blksize, fcall->rgetattr.blocks);
+		if (mask & L9PL_GETATTR_ATIME)
+			l9p_describe_time(sb, " atime=",
+			    fcall->rgetattr.atime_sec,
+			    fcall->rgetattr.atime_nsec);
+		if (mask & L9PL_GETATTR_MTIME)
+			l9p_describe_time(sb, " mtime=",
+			    fcall->rgetattr.mtime_sec,
+			    fcall->rgetattr.mtime_nsec);
+		if (mask & L9PL_GETATTR_CTIME)
+			l9p_describe_time(sb, " ctime=",
+			    fcall->rgetattr.ctime_sec,
+			    fcall->rgetattr.ctime_nsec);
+		if (mask & L9PL_GETATTR_BTIME)
+			l9p_describe_time(sb, " btime=",
+			    fcall->rgetattr.btime_sec,
+			    fcall->rgetattr.btime_nsec);
+		if (mask & L9PL_GETATTR_GEN)
+			sbuf_printf(sb, " gen=0x%" PRIx64, fcall->rgetattr.gen);
+		if (mask & L9PL_GETATTR_DATA_VERSION)
+			sbuf_printf(sb, " data_version=0x%" PRIx64,
+			    fcall->rgetattr.data_version);
+		return;
+
+	case L9P_TSETATTR:
+		/* As with RGETATTR, we'll imply decode via output. */
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		mask = fcall->tsetattr.valid;
+		/* NB: tsetattr valid mask is only 32 bits, hence %08x */
+		sbuf_printf(sb, " valid=0x%08" PRIx64, mask);
+		if (mask & L9PL_SETATTR_MODE)
+			l9p_describe_lperm(" mode=", fcall->tsetattr.mode, sb);
+		if (mask & L9PL_SETATTR_UID)
+			l9p_describe_ugid(" uid=", fcall->tsetattr.uid, sb);
+		if (mask & L9PL_SETATTR_GID)
+			l9p_describe_ugid(" uid=", fcall->tsetattr.gid, sb);
+		if (mask & L9PL_SETATTR_SIZE)
+			l9p_describe_size(" size=", fcall->tsetattr.size, sb);
+		if (mask & L9PL_SETATTR_ATIME) {
+			if (mask & L9PL_SETATTR_ATIME_SET)
+				l9p_describe_time(sb, " atime=",
+				    fcall->tsetattr.atime_sec,
+				    fcall->tsetattr.atime_nsec);
+			else
+				sbuf_cat(sb, " atime=now");
+		}
+		if (mask & L9PL_SETATTR_MTIME) {
+			if (mask & L9PL_SETATTR_MTIME_SET)
+				l9p_describe_time(sb, " mtime=",
+				    fcall->tsetattr.mtime_sec,
+				    fcall->tsetattr.mtime_nsec);
+			else
+				sbuf_cat(sb, " mtime=now");
+		}
+		if (mask & L9PL_SETATTR_CTIME)
+			sbuf_cat(sb, " ctime=now");
+		return;
+
+	case L9P_RSETATTR:
+		return;
+
+	case L9P_TXATTRWALK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" newfid=", fcall->txattrwalk.newfid, sb);
+		l9p_describe_name(" name=", fcall->txattrwalk.name, sb);
+		return;
+
+	case L9P_RXATTRWALK:
+		l9p_describe_size(" size=", fcall->rxattrwalk.size, sb);
+		return;
+
+	case L9P_TXATTRCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->txattrcreate.name, sb);
+		l9p_describe_size(" size=", fcall->txattrcreate.attr_size, sb);
+		sbuf_printf(sb, " flags=%" PRIu32, fcall->txattrcreate.flags);
+		return;
+
+	case L9P_RXATTRCREATE:
+		return;
+
+	case L9P_RREADDIR:
+		l9p_describe_readdir(sb, &fcall->io);
+		return;
+
+	case L9P_TFSYNC:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RFSYNC:
+		return;
+
+	case L9P_TLOCK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		/* decode better later */
+		sbuf_printf(sb, " type=%d flags=0x%" PRIx32
+		    " start=%" PRIu64 " length=%" PRIu64
+		    " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+		    fcall->tlock.type, fcall->tlock.flags,
+		    fcall->tlock.start, fcall->tlock.length,
+		    fcall->tlock.proc_id, fcall->tlock.client_id);
+		return;
+
+	case L9P_RLOCK:
+		sbuf_printf(sb, " status=%d", fcall->rlock.status);
+		return;
+
+	case L9P_TGETLOCK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		/* FALLTHROUGH */
+
+	case L9P_RGETLOCK:
+		/* decode better later */
+		sbuf_printf(sb, " type=%d "
+		    " start=%" PRIu64 " length=%" PRIu64
+		    " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+		    fcall->getlock.type,
+		    fcall->getlock.start, fcall->getlock.length,
+		    fcall->getlock.proc_id, fcall->getlock.client_id);
+		return;
+
+	case L9P_TLINK:
+		l9p_describe_fid(" dfid=", fcall->tlink.dfid, sb);
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tlink.name, sb);
+		return;
+
+	case L9P_RLINK:
+		return;
+
+	case L9P_TMKDIR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tmkdir.name, sb);
+		/* TMKDIR mode/permissions have S_IFDIR set */
+		l9p_describe_lperm(" mode=", fcall->tmkdir.mode, sb);
+		l9p_describe_ugid(" gid=", fcall->tmkdir.gid, sb);
+		return;
+
+	case L9P_RMKDIR:
+		l9p_describe_qid(" qid=", &fcall->rmkdir.qid, sb);
+		return;
+
+	case L9P_TRENAMEAT:
+		l9p_describe_fid(" olddirfid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" oldname=", fcall->trenameat.oldname,
+		    sb);
+		l9p_describe_fid(" newdirfid=", fcall->trenameat.newdirfid, sb);
+		l9p_describe_name(" newname=", fcall->trenameat.newname,
+		    sb);
+		return;
+
+	case L9P_RRENAMEAT:
+		return;
+
+	case L9P_TUNLINKAT:
+		l9p_describe_fid(" dirfd=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tunlinkat.name, sb);
+		l9p_describe_unlinkat_flags(" flags=",
+		    fcall->tunlinkat.flags, sb);
+		return;
+
+	case L9P_RUNLINKAT:
+		return;
+
+	default:
+		sbuf_printf(sb, " <missing case in %s()>", __func__);
+	}
+}
-- 
cgit v1.3