53 files changed, 28580 insertions, 0 deletions
diff --git a/sys/kern/Make.tags.inc b/sys/kern/Make.tags.inc
new file mode 100644
index 000000000000..79cb83a0a22d
--- /dev/null
+++ b/sys/kern/Make.tags.inc
@@ -0,0 +1,19 @@
+#	@(#)Make.tags.inc	8.2 (Berkeley) 11/23/94
+
+# Common files for "make tags".
+# Included by the Makefile for each architecture.
+
+# Put the ../sys stuff near the end so that subroutine definitions win when
+# there is a struct tag with the same name (eg., vmmeter).  The real
+# solution would probably be for ctags to generate "struct vmmeter" tags.
+
+COMM=	/sys/conf/*.[ch] \
+	/sys/dev/*.[ch] /sys/dev/scsi/*.[ch] \
+	/sys/isofs/*/*.[ch] \
+	/sys/kern/*.[ch] /sys/libkern/*.[ch] \
+	/sys/miscfs/*/*.[ch] \
+	/sys/net/*.[ch] /sys/netccitt/*.[ch] /sys/netinet/*.[ch] \
+	/sys/netiso/*.[ch] /sys/netns/*.[ch] \
+	/sys/nfs/*.[ch] /sys/sys/*.[ch] \
+	/sys/ufs/*/*.[ch] \
+	/sys/vm/*.[ch]
diff --git a/sys/kern/Makefile b/sys/kern/Makefile
new file mode 100644
index 000000000000..3159d20e9691
--- /dev/null
+++ b/sys/kern/Makefile
@@ -0,0 +1,50 @@
+#	@(#)Makefile	8.3 (Berkeley) 2/14/95
+
+# Makefile for kernel tags files, init_sysent, etc.
+
+ARCH=	hp300 i386 luna68k news3400 pmax sparc tahoe vax
+
+all:
+	@echo "make tags, make links or init_sysent.c only"
+
+init_sysent.c syscalls.c ../sys/syscall.h ../sys/syscallargs.h: makesyscalls.sh syscalls.master
+	-mv -f init_sysent.c init_sysent.c.bak
+	-mv -f syscalls.c syscalls.c.bak
+	-mv -f ../sys/syscall.h ../sys/syscall.h.bak
+	sh makesyscalls.sh syscalls.conf syscalls.master
+
+# Kernel tags:
+# Tags files are built in the top-level directory for each architecture,
+# with a makefile listing the architecture-dependent files, etc.  The list
+# of common files is in ./Make.tags.inc.  Links to the correct tags file
+# are placed in each source directory.  We need to have links to tags files
+# from the generic directories that are relative to the machine type, even
+# via remote mounts; therefore we use symlinks to $SYSTAGS, which points at
+# ${SYSDIR}/${MACHINE}/tags.
+
+SYSTAGS=/var/db/sys_tags
+SYSDIR=/sys
+
+# Directories in which to place tags links (other than machine-dependent)
+DGEN=	conf \
+	dev dev/scsi \
+	hp hp/dev hp/hpux \
+	kern libkern \
+	miscfs miscfs/deadfs miscfs/fdesc miscfs/fifofs miscfs/kernfs \
+	miscfs/lofs miscfs/nullfs miscfs/portal miscfs/procfs \
+	miscfs/specfs miscfs/umapfs miscfs/union \
+	net netccitt netinet netiso netns nfs scripts sys \
+	ufs ufs/ffs ufs/lfs ufs/mfs ufs/ufs \
+	vm
+
+tags::
+	-for i in ${ARCH}; do \
+	    (cd ../$$i && make ${MFLAGS} tags); done
+
+links::
+	rm -f ${SYSTAGS}
+	ln -s ${SYSDIR}/${MACHINE}/tags ${SYSTAGS}
+	-for i in ${DGEN}; do \
+	    (cd ../$$i && { rm -f tags; ln -s ${SYSTAGS} tags; }) done
+	-for i in ${ARCH}; do \
+	    (cd ../$$i && make ${MFLAGS} SYSTAGS=${SYSTAGS} links); done
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
new file mode 100644
index 000000000000..61a0a14d5087
--- /dev/null
+++ b/sys/kern/init_main.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/clist.h>
+#include <sys/device.h>
+#include <sys/protosw.h>
+#include <sys/reboot.h>
+#include <sys/user.h>
+#include <sys/syscallargs.h>
+
+#include <ufs/ufs/quota.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+
+#ifdef HPFPLIB
+char	copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California.\nCopyright (c) 1992 Hewlett-Packard Company\nCopyright (c) 1992 Motorola Inc.\nAll rights reserved.\n\n";
+#else
+char	copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California.  All rights reserved.\n\n";
+#endif
+
+/* Components of the first process -- never freed. */
+struct	session session0;
+struct	pgrp pgrp0;
+struct	proc proc0;
+struct	pcred cred0;
+struct	filedesc0 filedesc0;
+struct	plimit limit0;
+struct	vmspace vmspace0;
+struct	proc *curproc = &proc0;
+struct	proc *initproc, *pageproc;
+
+int	cmask = CMASK;
+extern	struct user *proc0paddr;
+
+struct	vnode *rootvp, *swapdev_vp;
+int	boothowto;
+struct	timeval boottime;
+struct	timeval runtime;
+
+static void start_init __P((struct proc *p, void *framep));
+
+/*
+ * System startup; initialize the world, create process 0, mount root
+ * filesystem, and fork to create init and pagedaemon.  Most of the
+ * hard work is done in the lower-level initialization routines including
+ * startup(), which does memory initialization and autoconfiguration.
+ */
+main(framep)
+	void *framep;
+{
+	register struct proc *p;
+	register struct filedesc0 *fdp;
+	register struct pdevinit *pdev;
+	register int i;
+	int s;
+	register_t rval[2];
+	extern struct pdevinit pdevinit[];
+	extern void roundrobin __P((void *));
+	extern void schedcpu __P((void *));
+
+	/*
+	 * Initialize the current process pointer (curproc) before
+	 * any possible traps/probes to simplify trap processing.
+	 */
+	p = &proc0;
+	curproc = p;
+	/*
+	 * Attempt to find console and initialize
+	 * in case of early panic or other messages.
+	 */
+	consinit();
+	printf(copyright);
+
+	vm_mem_init();
+	kmeminit();
+	cpu_startup();
+
+	/*
+	 * Initialize process and pgrp structures.
+	 */
+	procinit();
+
+	/*
+	 * Create process 0 (the swapper).
+	 */
+	LIST_INSERT_HEAD(&allproc, p, p_list);
+	p->p_pgrp = &pgrp0;
+	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
+	LIST_INIT(&pgrp0.pg_members);
+	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
+
+	pgrp0.pg_session = &session0;
+	session0.s_count = 1;
+	session0.s_leader = p;
+
+	p->p_flag = P_INMEM | P_SYSTEM;
+	p->p_stat = SRUN;
+	p->p_nice = NZERO;
+	bcopy("swapper", p->p_comm, sizeof ("swapper"));
+
+	/* Create credentials. */
+	cred0.p_refcnt = 1;
+	p->p_cred = &cred0;
+	p->p_ucred = crget();
+	p->p_ucred->cr_ngroups = 1;	/* group 0 */
+
+	/* Create the file descriptor table. */
+	fdp = &filedesc0;
+	p->p_fd = &fdp->fd_fd;
+	fdp->fd_fd.fd_refcnt = 1;
+	fdp->fd_fd.fd_cmask = cmask;
+	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
+	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
+	fdp->fd_fd.fd_nfiles = NDFILE;
+
+	/* Create the limits structures. */
+	p->p_limit = &limit0;
+	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
+		limit0.pl_rlimit[i].rlim_cur =
+		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
+	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
+	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
+	i = ptoa(cnt.v_free_count);
+	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
+	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
+	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
+	limit0.p_refcnt = 1;
+
+	/* Allocate a prototype map so we have something to fork. */
+	p->p_vmspace = &vmspace0;
+	vmspace0.vm_refcnt = 1;
+	pmap_pinit(&vmspace0.vm_pmap);
+	vm_map_init(&p->p_vmspace->vm_map, round_page(VM_MIN_ADDRESS),
+	    trunc_page(VM_MAX_ADDRESS), TRUE);
+	vmspace0.vm_map.pmap = &vmspace0.vm_pmap;
+	p->p_addr = proc0paddr;				/* XXX */
+
+	/*
+	 * We continue to place resource usage info and signal
+	 * actions in the user struct so they're pageable.
+	 */
+	p->p_stats = &p->p_addr->u_stats;
+	p->p_sigacts = &p->p_addr->u_sigacts;
+
+	/*
+	 * Charge root for one process.
+	 */
+	(void)chgproccnt(0, 1);
+
+	rqinit();
+
+	/* Configure virtual memory system, set vm rlimits. */
+	vm_init_limits(p);
+
+	/* Initialize the file systems. */
+	vfsinit();
+
+	/* Start real time and statistics clocks. */
+	initclocks();
+
+	/* Initialize mbuf's. */
+	mbinit();
+
+	/* Initialize clists. */
+	clist_init();
+
+#ifdef SYSVSHM
+	/* Initialize System V style shared memory. */
+	shminit();
+#endif
+
+	/* Attach pseudo-devices. */
+	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
+		(*pdev->pdev_attach)(pdev->pdev_count);
+
+	/*
+	 * Initialize protocols.  Block reception of incoming packets
+	 * until everything is ready.
+	 */
+	s = splimp();
+	ifinit();
+	domaininit();
+	splx(s);
+
+#ifdef GPROF
+	/* Initialize kernel profiling. */
+	kmstartup();
+#endif
+
+	/* Kick off timeout driven events by calling first time. */
+	roundrobin(NULL);
+	schedcpu(NULL);
+
+	/* Mount the root file system. */
+	if (vfs_mountroot())
+		panic("cannot mount root");
+	mountlist.cqh_first->mnt_flag |= MNT_ROOTFS;
+
+	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
+	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
+		panic("cannot find root vnode");
+	fdp->fd_fd.fd_cdir = rootvnode;
+	VREF(fdp->fd_fd.fd_cdir);
+	VOP_UNLOCK(rootvnode, 0, p);
+	fdp->fd_fd.fd_rdir = NULL;
+	swapinit();
+
+	/*
+	 * Now can look at time, having had a chance to verify the time
+	 * from the file system.  Reset p->p_rtime as it may have been
+	 * munched in mi_switch() after the time got set.
+	 */
+	p->p_stats->p_start = runtime = mono_time = boottime = time;
+	p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
+
+	/* Initialize signal state for process 0. */
+	siginit(p);
+
+	/* Create process 1 (init(8)). */
+	if (fork(p, NULL, rval))
+		panic("fork init");
+	if (rval[1]) {
+		start_init(curproc, framep);
+		return;
+	}
+
+	/* Create process 2 (the pageout daemon). */
+	if (fork(p, NULL, rval))
+		panic("fork pager");
+	if (rval[1]) {
+		/*
+		 * Now in process 2.
+		 */
+		p = curproc;
+		pageproc = p;
+		p->p_flag |= P_INMEM | P_SYSTEM;	/* XXX */
+		bcopy("pagedaemon", curproc->p_comm, sizeof ("pagedaemon"));
+		vm_pageout();
+		/* NOTREACHED */
+	}
+
+	/* The scheduler is an infinite loop. */
+	scheduler();
+	/* NOTREACHED */
+}
+
+/*
+ * List of paths to try when searching for "init".
+ */
+static char *initpaths[] = {
+	"/sbin/init",
+	"/sbin/oinit",
+	"/sbin/init.bak",
+	NULL,
+};
+
+/*
+ * Start the initial user process; try exec'ing each pathname in "initpaths".
+ * The program is invoked with one argument containing the boot flags.
+ */
+static void
+start_init(p, framep)
+	struct proc *p;
+	void *framep;
+{
+	vm_offset_t addr;
+	struct execve_args /* {
+		syscallarg(char *) path;
+		syscallarg(char **) argp;
+		syscallarg(char **) envp;
+	} */ args;
+	int options, i, error;
+	register_t retval[2];
+	char flags[4] = "-", *flagsp;
+	char **pathp, *path, *ucp, **uap, *arg0, *arg1;
+
+	initproc = p;
+
+	/*
+	 * We need to set the system call frame as if we were entered through
+	 * a syscall() so that when we call execve() below, it will be able
+	 * to set the entry point (see setregs) when it tries to exec.  The
+	 * startup code in "locore.s" has allocated space for the frame and
+	 * passed a pointer to that space as main's argument.
+	 */
+	cpu_set_init_frame(p, framep);
+
+	/*
+	 * Need just enough stack to hold the faked-up "execve()" arguments.
+	 */
+	addr = trunc_page(VM_MAX_ADDRESS - PAGE_SIZE);
+	if (vm_allocate(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, FALSE) != 0)
+		panic("init: couldn't allocate argument space");
+	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
+
+	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
+		/*
+		 * Construct the boot flag argument.
+		 */
+		options = 0;
+		flagsp = flags + 1;
+		ucp = (char *)USRSTACK;
+		if (boothowto & RB_SINGLE) {
+			*flagsp++ = 's';
+			options = 1;
+		}
+#ifdef notyet
+                if (boothowto & RB_FASTBOOT) {
+			*flagsp++ = 'f';
+			options = 1;
+		}
+#endif
+		/*
+		 * Move out the flags (arg 1), if necessary.
+		 */
+		if (options != 0) {
+			*flagsp++ = '\0';
+			i = flagsp - flags;
+			(void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
+			arg1 = ucp;
+		}
+
+		/*
+		 * Move out the file name (also arg 0).
+		 */
+		i = strlen(path) + 1;
+		(void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
+		arg0 = ucp;
+
+		/*
+		 * Move out the arg pointers.
+		 */
+		uap = (char **)((long)ucp & ~ALIGNBYTES);
+		(void)suword((caddr_t)--uap, 0);	/* terminator */
+		if (options != 0)
+			(void)suword((caddr_t)--uap, (long)arg1);
+		(void)suword((caddr_t)--uap, (long)arg0);
+
+		/*
+		 * Point at the arguments.
+		 */
+		SCARG(&args, path) = arg0;
+		SCARG(&args, argp) = uap;
+		SCARG(&args, envp) = NULL;
+
+		/*
+		 * Now try to exec the program.  If can't for any reason
+		 * other than it doesn't exist, complain.
+		 */
+		if ((error = execve(p, &args, retval)) == 0)
+			return;
+		if (error != ENOENT)
+			printf("exec %s: error %d\n", path, error);
+	}
+	printf("init: not found\n");
+	panic("no init");
+}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
new file mode 100644
index 000000000000..0bbdd2025b6b
--- /dev/null
+++ b/sys/kern/init_sysent.c
@@ -0,0 +1,767 @@
+/*
+ * System call switch table.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from	@(#)syscalls.master	8.6 (Berkeley) 3/30/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/signal.h>
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+int	nosys();
+int	exit();
+int	fork();
+int	read();
+int	write();
+int	open();
+int	close();
+int	wait4();
+int	link();
+int	unlink();
+int	chdir();
+int	fchdir();
+int	mknod();
+int	chmod();
+int	chown();
+int	obreak();
+int	getfsstat();
+int	getpid();
+int	mount();
+int	unmount();
+int	setuid();
+int	getuid();
+int	geteuid();
+int	ptrace();
+int	recvmsg();
+int	sendmsg();
+int	recvfrom();
+int	accept();
+int	getpeername();
+int	getsockname();
+int	access();
+int	chflags();
+int	fchflags();
+int	sync();
+int	kill();
+int	getppid();
+int	dup();
+int	pipe();
+int	getegid();
+int	profil();
+#ifdef KTRACE
+int	ktrace();
+#else
+#endif
+int	sigaction();
+int	getgid();
+int	sigprocmask();
+int	getlogin();
+int	setlogin();
+int	acct();
+int	sigpending();
+int	sigaltstack();
+int	ioctl();
+int	reboot();
+int	revoke();
+int	symlink();
+int	readlink();
+int	execve();
+int	umask();
+int	chroot();
+int	msync();
+int	vfork();
+int	sbrk();
+int	sstk();
+int	ovadvise();
+int	munmap();
+int	mprotect();
+int	madvise();
+int	mincore();
+int	getgroups();
+int	setgroups();
+int	getpgrp();
+int	setpgid();
+int	setitimer();
+int	swapon();
+int	getitimer();
+int	getdtablesize();
+int	dup2();
+int	fcntl();
+int	select();
+int	fsync();
+int	setpriority();
+int	socket();
+int	connect();
+int	getpriority();
+int	sigreturn();
+int	bind();
+int	setsockopt();
+int	listen();
+int	sigsuspend();
+#ifdef TRACE
+int	vtrace();
+#else
+#endif
+int	gettimeofday();
+int	getrusage();
+int	getsockopt();
+#ifdef vax
+int	resuba();
+#else
+#endif
+int	readv();
+int	writev();
+int	settimeofday();
+int	fchown();
+int	fchmod();
+int	rename();
+int	flock();
+int	mkfifo();
+int	sendto();
+int	shutdown();
+int	socketpair();
+int	mkdir();
+int	rmdir();
+int	utimes();
+int	adjtime();
+int	setsid();
+int	quotactl();
+#ifdef NFS
+int	nfssvc();
+#else
+#endif
+int	statfs();
+int	fstatfs();
+#ifdef NFS
+int	getfh();
+#else
+#endif
+#if defined(SYSVSHM) && !defined(alpha)
+#else
+#endif
+int	setgid();
+int	setegid();
+int	seteuid();
+#ifdef LFS
+int	lfs_bmapv();
+int	lfs_markv();
+int	lfs_segclean();
+int	lfs_segwait();
+#else
+#endif
+int	stat();
+int	fstat();
+int	lstat();
+int	pathconf();
+int	fpathconf();
+int	getrlimit();
+int	setrlimit();
+int	getdirentries();
+int	mmap();
+int	nosys();
+int	lseek();
+int	truncate();
+int	ftruncate();
+int	__sysctl();
+int	mlock();
+int	munlock();
+int	undelete();
+#if defined(SYSVSHM) && 0
+int	shmat();
+int	shmctl();
+int	shmdt();
+int	shmget();
+#else
+#endif
+
+#ifdef COMPAT_43
+#define compat_43(func) __CONCAT(compat_43_,func)
+
+int	compat_43(creat)();
+int	compat_43(lseek)();
+int	compat_43(stat)();
+int	compat_43(lstat)();
+#ifdef KTRACE
+#else
+#endif
+int	compat_43(fstat)();
+int	compat_43(getkerninfo)();
+int	compat_43(getpagesize)();
+int	compat_43(mmap)();
+int	compat_43(wait)();
+int	compat_43(gethostname)();
+int	compat_43(sethostname)();
+int	compat_43(accept)();
+int	compat_43(send)();
+int	compat_43(recv)();
+int	compat_43(sigvec)();
+int	compat_43(sigblock)();
+int	compat_43(sigsetmask)();
+int	compat_43(sigstack)();
+int	compat_43(recvmsg)();
+int	compat_43(sendmsg)();
+#ifdef TRACE
+#else
+#endif
+#ifdef vax
+#else
+#endif
+int	compat_43(recvfrom)();
+int	compat_43(setreuid)();
+int	compat_43(setregid)();
+int	compat_43(truncate)();
+int	compat_43(ftruncate)();
+int	compat_43(getpeername)();
+int	compat_43(gethostid)();
+int	compat_43(sethostid)();
+int	compat_43(getrlimit)();
+int	compat_43(setrlimit)();
+int	compat_43(killpg)();
+int	compat_43(quota)();
+int	compat_43(getsockname)();
+#ifdef NFS
+#else
+#endif
+int	compat_43(getdirentries)();
+#ifdef NFS
+#else
+#endif
+#if defined(SYSVSHM) && !defined(alpha)
+int	compat_43(shmsys)();
+#else
+#endif
+#ifdef LFS
+#else
+#endif
+#if defined(SYSVSHM) && 0
+#else
+#endif
+
+#else /* COMPAT_43 */
+#define compat_43(func) nosys
+#endif /* COMPAT_43 */
+
+#define	s(type)	sizeof(type)
+
+struct sysent sysent[] = {
+	{ 0, 0,
+	    nosys },				/* 0 = syscall */
+	{ 1, s(struct exit_args),
+	    exit },				/* 1 = exit */
+	{ 0, 0,
+	    fork },				/* 2 = fork */
+	{ 3, s(struct read_args),
+	    read },				/* 3 = read */
+	{ 3, s(struct write_args),
+	    write },				/* 4 = write */
+	{ 3, s(struct open_args),
+	    open },				/* 5 = open */
+	{ 1, s(struct close_args),
+	    close },				/* 6 = close */
+	{ 4, s(struct wait4_args),
+	    wait4 },				/* 7 = wait4 */
+	{ 2, s(struct compat_43_creat_args),
+	    compat_43(creat) },			/* 8 = compat_43 creat */
+	{ 2, s(struct link_args),
+	    link },				/* 9 = link */
+	{ 1, s(struct unlink_args),
+	    unlink },				/* 10 = unlink */
+	{ 0, 0,
+	    nosys },				/* 11 = obsolete execv */
+	{ 1, s(struct chdir_args),
+	    chdir },				/* 12 = chdir */
+	{ 1, s(struct fchdir_args),
+	    fchdir },				/* 13 = fchdir */
+	{ 3, s(struct mknod_args),
+	    mknod },				/* 14 = mknod */
+	{ 2, s(struct chmod_args),
+	    chmod },				/* 15 = chmod */
+	{ 3, s(struct chown_args),
+	    chown },				/* 16 = chown */
+	{ 1, s(struct obreak_args),
+	    obreak },				/* 17 = break */
+	{ 3, s(struct getfsstat_args),
+	    getfsstat },			/* 18 = getfsstat */
+	{ 3, s(struct compat_43_lseek_args),
+	    compat_43(lseek) },			/* 19 = compat_43 lseek */
+	{ 0, 0,
+	    getpid },				/* 20 = getpid */
+	{ 4, s(struct mount_args),
+	    mount },				/* 21 = mount */
+	{ 2, s(struct unmount_args),
+	    unmount },				/* 22 = unmount */
+	{ 1, s(struct setuid_args),
+	    setuid },				/* 23 = setuid */
+	{ 0, 0,
+	    getuid },				/* 24 = getuid */
+	{ 0, 0,
+	    geteuid },				/* 25 = geteuid */
+	{ 4, s(struct ptrace_args),
+	    ptrace },				/* 26 = ptrace */
+	{ 3, s(struct recvmsg_args),
+	    recvmsg },				/* 27 = recvmsg */
+	{ 3, s(struct sendmsg_args),
+	    sendmsg },				/* 28 = sendmsg */
+	{ 6, s(struct recvfrom_args),
+	    recvfrom },				/* 29 = recvfrom */
+	{ 3, s(struct accept_args),
+	    accept },				/* 30 = accept */
+	{ 3, s(struct getpeername_args),
+	    getpeername },			/* 31 = getpeername */
+	{ 3, s(struct getsockname_args),
+	    getsockname },			/* 32 = getsockname */
+	{ 2, s(struct access_args),
+	    access },				/* 33 = access */
+	{ 2, s(struct chflags_args),
+	    chflags },				/* 34 = chflags */
+	{ 2, s(struct fchflags_args),
+	    fchflags },				/* 35 = fchflags */
+	{ 0, 0,
+	    sync },				/* 36 = sync */
+	{ 2, s(struct kill_args),
+	    kill },				/* 37 = kill */
+	{ 2, s(struct compat_43_stat_args),
+	    compat_43(stat) },			/* 38 = compat_43 stat */
+	{ 0, 0,
+	    getppid },				/* 39 = getppid */
+	{ 2, s(struct compat_43_lstat_args),
+	    compat_43(lstat) },			/* 40 = compat_43 lstat */
+	{ 1, s(struct dup_args),
+	    dup },				/* 41 = dup */
+	{ 0, 0,
+	    pipe },				/* 42 = pipe */
+	{ 0, 0,
+	    getegid },				/* 43 = getegid */
+	{ 4, s(struct profil_args),
+	    profil },				/* 44 = profil */
+#ifdef KTRACE
+	{ 4, s(struct ktrace_args),
+	    ktrace },				/* 45 = ktrace */
+#else
+	{ 0, 0,
+	    nosys },				/* 45 = unimplemented ktrace */
+#endif
+	{ 3, s(struct sigaction_args),
+	    sigaction },			/* 46 = sigaction */
+	{ 0, 0,
+	    getgid },				/* 47 = getgid */
+	{ 2, s(struct sigprocmask_args),
+	    sigprocmask },			/* 48 = sigprocmask */
+	{ 2, s(struct getlogin_args),
+	    getlogin },				/* 49 = getlogin */
+	{ 1, s(struct setlogin_args),
+	    setlogin },				/* 50 = setlogin */
+	{ 1, s(struct acct_args),
+	    acct },				/* 51 = acct */
+	{ 0, 0,
+	    sigpending },			/* 52 = sigpending */
+	{ 2, s(struct sigaltstack_args),
+	    sigaltstack },			/* 53 = sigaltstack */
+	{ 3, s(struct ioctl_args),
+	    ioctl },				/* 54 = ioctl */
+	{ 1, s(struct reboot_args),
+	    reboot },				/* 55 = reboot */
+	{ 1, s(struct revoke_args),
+	    revoke },				/* 56 = revoke */
+	{ 2, s(struct symlink_args),
+	    symlink },				/* 57 = symlink */
+	{ 3, s(struct readlink_args),
+	    readlink },				/* 58 = readlink */
+	{ 3, s(struct execve_args),
+	    execve },				/* 59 = execve */
+	{ 1, s(struct umask_args),
+	    umask },				/* 60 = umask */
+	{ 1, s(struct chroot_args),
+	    chroot },				/* 61 = chroot */
+	{ 2, s(struct compat_43_fstat_args),
+	    compat_43(fstat) },			/* 62 = compat_43 fstat */
+	{ 4, s(struct compat_43_getkerninfo_args),
+	    compat_43(getkerninfo) },		/* 63 = compat_43 getkerninfo */
+	{ 0, 0,
+	    compat_43(getpagesize) },		/* 64 = compat_43 getpagesize */
+	{ 2, s(struct msync_args),
+	    msync },				/* 65 = msync */
+	{ 0, 0,
+	    vfork },				/* 66 = vfork */
+	{ 0, 0,
+	    nosys },				/* 67 = obsolete vread */
+	{ 0, 0,
+	    nosys },				/* 68 = obsolete vwrite */
+	{ 1, s(struct sbrk_args),
+	    sbrk },				/* 69 = sbrk */
+	{ 1, s(struct sstk_args),
+	    sstk },				/* 70 = sstk */
+	{ 6, s(struct compat_43_mmap_args),
+	    compat_43(mmap) },			/* 71 = compat_43 mmap */
+	{ 1, s(struct ovadvise_args),
+	    ovadvise },				/* 72 = vadvise */
+	{ 2, s(struct munmap_args),
+	    munmap },				/* 73 = munmap */
+	{ 3, s(struct mprotect_args),
+	    mprotect },				/* 74 = mprotect */
+	{ 3, s(struct madvise_args),
+	    madvise },				/* 75 = madvise */
+	{ 0, 0,
+	    nosys },				/* 76 = obsolete vhangup */
+	{ 0, 0,
+	    nosys },				/* 77 = obsolete vlimit */
+	{ 3, s(struct mincore_args),
+	    mincore },				/* 78 = mincore */
+	{ 2, s(struct getgroups_args),
+	    getgroups },			/* 79 = getgroups */
+	{ 2, s(struct setgroups_args),
+	    setgroups },			/* 80 = setgroups */
+	{ 0, 0,
+	    getpgrp },				/* 81 = getpgrp */
+	{ 2, s(struct setpgid_args),
+	    setpgid },				/* 82 = setpgid */
+	{ 3, s(struct setitimer_args),
+	    setitimer },			/* 83 = setitimer */
+	{ 0, 0,
+	    compat_43(wait) },			/* 84 = compat_43 wait */
+	{ 1, s(struct swapon_args),
+	    swapon },				/* 85 = swapon */
+	{ 2, s(struct getitimer_args),
+	    getitimer },			/* 86 = getitimer */
+	{ 2, s(struct compat_43_gethostname_args),
+	    compat_43(gethostname) },		/* 87 = compat_43 gethostname */
+	{ 2, s(struct compat_43_sethostname_args),
+	    compat_43(sethostname) },		/* 88 = compat_43 sethostname */
+	{ 0, 0,
+	    getdtablesize },			/* 89 = getdtablesize */
+	{ 2, s(struct dup2_args),
+	    dup2 },				/* 90 = dup2 */
+	{ 0, 0,
+	    nosys },				/* 91 = unimplemented getdopt */
+	{ 3, s(struct fcntl_args),
+	    fcntl },				/* 92 = fcntl */
+	{ 5, s(struct select_args),
+	    select },				/* 93 = select */
+	{ 0, 0,
+	    nosys },				/* 94 = unimplemented setdopt */
+	{ 1, s(struct fsync_args),
+	    fsync },				/* 95 = fsync */
+	{ 3, s(struct setpriority_args),
+	    setpriority },			/* 96 = setpriority */
+	{ 3, s(struct socket_args),
+	    socket },				/* 97 = socket */
+	{ 3, s(struct connect_args),
+	    connect },				/* 98 = connect */
+	{ 3, s(struct compat_43_accept_args),
+	    compat_43(accept) },		/* 99 = compat_43 accept */
+	{ 2, s(struct getpriority_args),
+	    getpriority },			/* 100 = getpriority */
+	{ 4, s(struct compat_43_send_args),
+	    compat_43(send) },			/* 101 = compat_43 send */
+	{ 4, s(struct compat_43_recv_args),
+	    compat_43(recv) },			/* 102 = compat_43 recv */
+	{ 1, s(struct sigreturn_args),
+	    sigreturn },			/* 103 = sigreturn */
+	{ 3, s(struct bind_args),
+	    bind },				/* 104 = bind */
+	{ 5, s(struct setsockopt_args),
+	    setsockopt },			/* 105 = setsockopt */
+	{ 2, s(struct listen_args),
+	    listen },				/* 106 = listen */
+	{ 0, 0,
+	    nosys },				/* 107 = obsolete vtimes */
+	{ 3, s(struct compat_43_sigvec_args),
+	    compat_43(sigvec) },		/* 108 = compat_43 sigvec */
+	{ 1, s(struct compat_43_sigblock_args),
+	    compat_43(sigblock) },		/* 109 = compat_43 sigblock */
+	{ 1, s(struct compat_43_sigsetmask_args),
+	    compat_43(sigsetmask) },		/* 110 = compat_43 sigsetmask */
+	{ 1, s(struct sigsuspend_args),
+	    sigsuspend },			/* 111 = sigsuspend */
+	{ 2, s(struct compat_43_sigstack_args),
+	    compat_43(sigstack) },		/* 112 = compat_43 sigstack */
+	{ 3, s(struct compat_43_recvmsg_args),
+	    compat_43(recvmsg) },		/* 113 = compat_43 recvmsg */
+	{ 3, s(struct compat_43_sendmsg_args),
+	    compat_43(sendmsg) },		/* 114 = compat_43 sendmsg */
+#ifdef TRACE
+	{ 2, s(struct vtrace_args),
+	    vtrace },				/* 115 = vtrace */
+#else
+	{ 0, 0,
+	    nosys },				/* 115 = obsolete vtrace */
+#endif
+	{ 2, s(struct gettimeofday_args),
+	    gettimeofday },			/* 116 = gettimeofday */
+	{ 2, s(struct getrusage_args),
+	    getrusage },			/* 117 = getrusage */
+	{ 5, s(struct getsockopt_args),
+	    getsockopt },			/* 118 = getsockopt */
+#ifdef vax
+	{ 1, s(struct resuba_args),
+	    resuba },				/* 119 = resuba */
+#else
+	{ 0, 0,
+	    nosys },				/* 119 = unimplemented resuba */
+#endif
+	{ 3, s(struct readv_args),
+	    readv },				/* 120 = readv */
+	{ 3, s(struct writev_args),
+	    writev },				/* 121 = writev */
+	{ 2, s(struct settimeofday_args),
+	    settimeofday },			/* 122 = settimeofday */
+	{ 3, s(struct fchown_args),
+	    fchown },				/* 123 = fchown */
+	{ 2, s(struct fchmod_args),
+	    fchmod },				/* 124 = fchmod */
+	{ 6, s(struct compat_43_recvfrom_args),
+	    compat_43(recvfrom) },		/* 125 = compat_43 recvfrom */
+	{ 2, s(struct compat_43_setreuid_args),
+	    compat_43(setreuid) },		/* 126 = compat_43 setreuid */
+	{ 2, s(struct compat_43_setregid_args),
+	    compat_43(setregid) },		/* 127 = compat_43 setregid */
+	{ 2, s(struct rename_args),
+	    rename },				/* 128 = rename */
+	{ 2, s(struct compat_43_truncate_args),
+	    compat_43(truncate) },		/* 129 = compat_43 truncate */
+	{ 2, s(struct compat_43_ftruncate_args),
+	    compat_43(ftruncate) },		/* 130 = compat_43 ftruncate */
+	{ 2, s(struct flock_args),
+	    flock },				/* 131 = flock */
+	{ 2, s(struct mkfifo_args),
+	    mkfifo },				/* 132 = mkfifo */
+	{ 6, s(struct sendto_args),
+	    sendto },				/* 133 = sendto */
+	{ 2, s(struct shutdown_args),
+	    shutdown },				/* 134 = shutdown */
+	{ 4, s(struct socketpair_args),
+	    socketpair },			/* 135 = socketpair */
+	{ 2, s(struct mkdir_args),
+	    mkdir },				/* 136 = mkdir */
+	{ 1, s(struct rmdir_args),
+	    rmdir },				/* 137 = rmdir */
+	{ 2, s(struct utimes_args),
+	    utimes },				/* 138 = utimes */
+	{ 0, 0,
+	    nosys },				/* 139 = obsolete 4.2 sigreturn */
+	{ 2, s(struct adjtime_args),
+	    adjtime },				/* 140 = adjtime */
+	{ 3, s(struct compat_43_getpeername_args),
+	    compat_43(getpeername) },		/* 141 = compat_43 getpeername */
+	{ 0, 0,
+	    compat_43(gethostid) },		/* 142 = compat_43 gethostid */
+	{ 1, s(struct compat_43_sethostid_args),
+	    compat_43(sethostid) },		/* 143 = compat_43 sethostid */
+	{ 2, s(struct compat_43_getrlimit_args),
+	    compat_43(getrlimit) },		/* 144 = compat_43 getrlimit */
+	{ 2, s(struct compat_43_setrlimit_args),
+	    compat_43(setrlimit) },		/* 145 = compat_43 setrlimit */
+	{ 2, s(struct compat_43_killpg_args),
+	    compat_43(killpg) },		/* 146 = compat_43 killpg */
+	{ 0, 0,
+	    setsid },				/* 147 = setsid */
+	{ 4, s(struct quotactl_args),
+	    quotactl },				/* 148 = quotactl */
+	{ 0, 0,
+	    compat_43(quota) },			/* 149 = compat_43 quota */
+	{ 3, s(struct compat_43_getsockname_args),
+	    compat_43(getsockname) },		/* 150 = compat_43 getsockname */
+	{ 0, 0,
+	    nosys },				/* 151 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 152 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 153 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 154 = unimplemented */
+#ifdef NFS
+	{ 2, s(struct nfssvc_args),
+	    nfssvc },				/* 155 = nfssvc */
+#else
+	{ 0, 0,
+	    nosys },				/* 155 = unimplemented nfssvc */
+#endif
+	{ 4, s(struct compat_43_getdirentries_args),
+	    compat_43(getdirentries) },		/* 156 = compat_43 getdirentries */
+	{ 2, s(struct statfs_args),
+	    statfs },				/* 157 = statfs */
+	{ 2, s(struct fstatfs_args),
+	    fstatfs },				/* 158 = fstatfs */
+	{ 0, 0,
+	    nosys },				/* 159 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 160 = unimplemented */
+#ifdef NFS
+	{ 2, s(struct getfh_args),
+	    getfh },				/* 161 = getfh */
+#else
+	{ 0, 0,
+	    nosys },				/* 161 = unimplemented getfh */
+#endif
+	{ 0, 0,
+	    nosys },				/* 162 = unimplemented getdomainname */
+	{ 0, 0,
+	    nosys },				/* 163 = unimplemented setdomainname */
+	{ 0, 0,
+	    nosys },				/* 164 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 165 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 166 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 167 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 168 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 169 = unimplemented semsys */
+	{ 0, 0,
+	    nosys },				/* 170 = unimplemented msgsys */
+#if defined(SYSVSHM) && !defined(alpha)
+	{ 4, s(struct compat_43_shmsys_args),
+	    compat_43(shmsys) },		/* 171 = compat_43 shmsys */
+#else
+	{ 0, 0,
+	    nosys },				/* 171 = unimplemented shmsys */
+#endif
+	{ 0, 0,
+	    nosys },				/* 172 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 173 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 174 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 175 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 176 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 177 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 178 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 179 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 180 = unimplemented */
+	{ 1, s(struct setgid_args),
+	    setgid },				/* 181 = setgid */
+	{ 1, s(struct setegid_args),
+	    setegid },				/* 182 = setegid */
+	{ 1, s(struct seteuid_args),
+	    seteuid },				/* 183 = seteuid */
+#ifdef LFS
+	{ 3, s(struct lfs_bmapv_args),
+	    lfs_bmapv },			/* 184 = lfs_bmapv */
+	{ 3, s(struct lfs_markv_args),
+	    lfs_markv },			/* 185 = lfs_markv */
+	{ 2, s(struct lfs_segclean_args),
+	    lfs_segclean },			/* 186 = lfs_segclean */
+	{ 2, s(struct lfs_segwait_args),
+	    lfs_segwait },			/* 187 = lfs_segwait */
+#else
+	{ 0, 0,
+	    nosys },				/* 184 = unimplemented lfs_bmapv */
+	{ 0, 0,
+	    nosys },				/* 185 = unimplemented lfs_markv */
+	{ 0, 0,
+	    nosys },				/* 186 = unimplemented lfs_segclean */
+	{ 0, 0,
+	    nosys },				/* 187 = unimplemented lfs_segwait */
+#endif
+	{ 2, s(struct stat_args),
+	    stat },				/* 188 = stat */
+	{ 2, s(struct fstat_args),
+	    fstat },				/* 189 = fstat */
+	{ 2, s(struct lstat_args),
+	    lstat },				/* 190 = lstat */
+	{ 2, s(struct pathconf_args),
+	    pathconf },				/* 191 = pathconf */
+	{ 2, s(struct fpathconf_args),
+	    fpathconf },			/* 192 = fpathconf */
+	{ 0, 0,
+	    nosys },				/* 193 = unimplemented */
+	{ 2, s(struct getrlimit_args),
+	    getrlimit },			/* 194 = getrlimit */
+	{ 2, s(struct setrlimit_args),
+	    setrlimit },			/* 195 = setrlimit */
+	{ 4, s(struct getdirentries_args),
+	    getdirentries },			/* 196 = getdirentries */
+	{ 7, s(struct mmap_args),
+	    mmap },				/* 197 = mmap */
+	{ 0, 0,
+	    nosys },				/* 198 = __syscall */
+	{ 4, s(struct lseek_args),
+	    lseek },				/* 199 = lseek */
+	{ 3, s(struct truncate_args),
+	    truncate },				/* 200 = truncate */
+	{ 3, s(struct ftruncate_args),
+	    ftruncate },			/* 201 = ftruncate */
+	{ 6, s(struct __sysctl_args),
+	    __sysctl },				/* 202 = __sysctl */
+	{ 2, s(struct mlock_args),
+	    mlock },				/* 203 = mlock */
+	{ 2, s(struct munlock_args),
+	    munlock },				/* 204 = munlock */
+	{ 1, s(struct undelete_args),
+	    undelete },				/* 205 = undelete */
+	{ 0, 0,
+	    nosys },				/* 206 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 207 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 208 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 209 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 210 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 211 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 212 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 213 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 214 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 215 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 216 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 217 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 218 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 219 = unimplemented */
+	{ 0, 0,
+	    nosys },				/* 220 = unimplemented semctl */
+	{ 0, 0,
+	    nosys },				/* 221 = unimplemented semget */
+	{ 0, 0,
+	    nosys },				/* 222 = unimplemented semop */
+	{ 0, 0,
+	    nosys },				/* 223 = unimplemented semconfig */
+	{ 0, 0,
+	    nosys },				/* 224 = unimplemented msgctl */
+	{ 0, 0,
+	    nosys },				/* 225 = unimplemented msgget */
+	{ 0, 0,
+	    nosys },				/* 226 = unimplemented msgsnd */
+	{ 0, 0,
+	    nosys },				/* 227 = unimplemented msgrcv */
+#if defined(SYSVSHM) && 0
+	{ 3, s(struct shmat_args),
+	    shmat },				/* 228 = shmat */
+	{ 3, s(struct shmctl_args),
+	    shmctl },				/* 229 = shmctl */
+	{ 1, s(struct shmdt_args),
+	    shmdt },				/* 230 = shmdt */
+	{ 3, s(struct shmget_args),
+	    shmget },				/* 231 = shmget */
+#else
+	{ 0, 0,
+	    nosys },				/* 228 = unimplemented shmat */
+	{ 0, 0,
+	    nosys },				/* 229 = unimplemented shmctl */
+	{ 0, 0,
+	    nosys },				/* 230 = unimplemented shmdt */
+	{ 0, 0,
+	    nosys },				/* 231 = unimplemented shmget */
+#endif
+};
+
+int	nsysent= sizeof(sysent) / sizeof(sysent[0]);
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
new file mode 100644
index 000000000000..a23543ce0e38
--- /dev/null
+++ b/sys/kern/kern_acct.c
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)kern_acct.c 8.8 (Berkeley) 5/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+
+acct(a1, a2, a3)
+	struct proc *a1;
+	struct acct_args /* {
+		syscallarg(char *) path;
+	} */ *a2;
+	int *a3;
+{
+	/*
+	 * Body deleted.
+	 */
+	return (ENOSYS);
+}
+
+acct_process(a1)
+	struct proc *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+/*
+ * Periodically check the file system to see if accounting
+ * should be turned on or off. Beware the case where the vnode
+ * has been vgone()'d out from underneath us, e.g. when the file
+ * system containing the accounting file has been forcibly unmounted.
+ */
+
+/*
+ * Values associated with enabling and disabling accounting
+ */
+int	acctsuspend = 2;	/* stop accounting when < 2% free space left */
+int	acctresume = 4;		/* resume when free space risen to > 4% */
+int	acctchkfreq = 15;	/* frequency (in seconds) to check space */
+
+/*
+ * SHOULD REPLACE THIS WITH A DRIVER THAT CAN BE READ TO SIMPLIFY.
+ */
+struct	vnode *acctp;
+struct	vnode *savacctp;
+
+/* ARGSUSED */
+void
+acctwatch(a)
+	void *a;
+{
+	struct statfs sb;
+
+	if (savacctp) {
+		if (savacctp->v_type == VBAD) {
+			(void) vn_close(savacctp, FWRITE, NOCRED, NULL);
+			savacctp = NULL;
+			return;
+		}
+		(void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0);
+		if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
+			acctp = savacctp;
+			savacctp = NULL;
+			log(LOG_NOTICE, "Accounting resumed\n");
+		}
+	} else {
+		if (acctp == NULL)
+			return;
+		if (acctp->v_type == VBAD) {
+			(void) vn_close(acctp, FWRITE, NOCRED, NULL);
+			acctp = NULL;
+			return;
+		}
+		(void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0);
+		if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
+			savacctp = acctp;
+			acctp = NULL;
+			log(LOG_NOTICE, "Accounting suspended\n");
+		}
+	}
+	timeout(acctwatch, NULL, acctchkfreq * hz);
+}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
new file mode 100644
index 000000000000..3f2e4241b49a
--- /dev/null
+++ b/sys/kern/kern_descrip.c
@@ -0,0 +1,930 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+#include <sys/unistd.h>
+#include <sys/resourcevar.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+/*
+ * Descriptor management.
+ */
+struct filelist filehead;	/* head of list of open files */
+int nfiles;			/* actual number of open files */
+
+/*
+ * System calls on descriptors.
+ */
+/* ARGSUSED */
+int
+getdtablesize(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	return (0);
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+/* ARGSUSED */
+int
+dup(p, uap, retval)
+	struct proc *p;
+	struct dup_args /* {
+		syscallarg(u_int) fd;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp;
+	u_int old;
+	int new, error;
+
+	old = SCARG(uap, fd);
+	/*
+	 * XXX Compatibility
+	 */
+	if (old &~ 077) {
+		SCARG(uap, fd) &= 077;
+		return (dup2(p, uap, retval));
+	}
+
+	fdp = p->p_fd;
+	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
+		return (EBADF);
+	if (error = fdalloc(p, 0, &new))
+		return (error);
+	return (finishdup(fdp, (int)old, new, retval));
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ */
+/* ARGSUSED */
+int
+dup2(p, uap, retval)
+	struct proc *p;
+	struct dup2_args /* {
+		syscallarg(u_int) from;
+		syscallarg(u_int) to;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register int old = SCARG(uap, from), new = SCARG(uap, to);
+	int i, error;
+
+	if (old >= fdp->fd_nfiles ||
+	    fdp->fd_ofiles[old] == NULL ||
+	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+	    new >= maxfiles)
+		return (EBADF);
+	if (old == new) {
+		*retval = new;
+		return (0);
+	}
+	if (new >= fdp->fd_nfiles) {
+		if (error = fdalloc(p, new, &i))
+			return (error);
+		if (new != i)
+			panic("dup2: fdalloc");
+	} else if (fdp->fd_ofiles[new]) {
+		if (fdp->fd_ofileflags[new] & UF_MAPPED)
+			(void) munmapfd(p, new);
+		/*
+		 * dup2() must succeed even if the close has an error.
+		 */
+		(void) closef(fdp->fd_ofiles[new], p);
+	}
+	return (finishdup(fdp, (int)old, (int)new, retval));
+}
+
+/*
+ * The file control system call.
+ */
+/* ARGSUSED */
+int
+fcntl(p, uap, retval)
+	struct proc *p;
+	register struct fcntl_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) cmd;
+		syscallarg(void *) arg;
+	} */ *uap;
+	register_t *retval;
+{
+	int fd = SCARG(uap, fd);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register char *pop;
+	struct vnode *vp;
+	int i, tmp, error, flg = F_POSIX;
+	struct flock fl;
+	u_int newmin;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	pop = &fdp->fd_ofileflags[fd];
+	switch (SCARG(uap, cmd)) {
+
+	case F_DUPFD:
+		newmin = (long)SCARG(uap, arg);
+		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+		    newmin >= maxfiles)
+			return (EINVAL);
+		if (error = fdalloc(p, newmin, &i))
+			return (error);
+		return (finishdup(fdp, fd, i, retval));
+
+	case F_GETFD:
+		*retval = *pop & 1;
+		return (0);
+
+	case F_SETFD:
+		*pop = (*pop &~ 1) | ((long)SCARG(uap, arg) & 1);
+		return (0);
+
+	case F_GETFL:
+		*retval = OFLAGS(fp->f_flag);
+		return (0);
+
+	case F_SETFL:
+		fp->f_flag &= ~FCNTLFLAGS;
+		fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
+		tmp = fp->f_flag & FNONBLOCK;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		if (error)
+			return (error);
+		tmp = fp->f_flag & FASYNC;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+		if (!error)
+			return (0);
+		fp->f_flag &= ~FNONBLOCK;
+		tmp = 0;
+		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		return (error);
+
+	case F_GETOWN:
+		if (fp->f_type == DTYPE_SOCKET) {
+			*retval = ((struct socket *)fp->f_data)->so_pgid;
+			return (0);
+		}
+		error = (*fp->f_ops->fo_ioctl)
+			(fp, TIOCGPGRP, (caddr_t)retval, p);
+		*retval = -*retval;
+		return (error);
+
+	case F_SETOWN:
+		if (fp->f_type == DTYPE_SOCKET) {
+			((struct socket *)fp->f_data)->so_pgid =
+			    (long)SCARG(uap, arg);
+			return (0);
+		}
+		if ((long)SCARG(uap, arg) <= 0) {
+			SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg));
+		} else {
+			struct proc *p1 = pfind((long)SCARG(uap, arg));
+			if (p1 == 0)
+				return (ESRCH);
+			SCARG(uap, arg) = (void *)(long)p1->p_pgrp->pg_id;
+		}
+		return ((*fp->f_ops->fo_ioctl)
+			(fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p));
+
+	case F_SETLKW:
+		flg |= F_WAIT;
+		/* Fall into F_SETLK */
+
+	case F_SETLK:
+		if (fp->f_type != DTYPE_VNODE)
+			return (EBADF);
+		vp = (struct vnode *)fp->f_data;
+		/* Copy in the lock structure */
+		error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
+		    sizeof (fl));
+		if (error)
+			return (error);
+		if (fl.l_whence == SEEK_CUR)
+			fl.l_start += fp->f_offset;
+		switch (fl.l_type) {
+
+		case F_RDLCK:
+			if ((fp->f_flag & FREAD) == 0)
+				return (EBADF);
+			p->p_flag |= P_ADVLOCK;
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+		case F_WRLCK:
+			if ((fp->f_flag & FWRITE) == 0)
+				return (EBADF);
+			p->p_flag |= P_ADVLOCK;
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+		case F_UNLCK:
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
+				F_POSIX));
+
+		default:
+			return (EINVAL);
+		}
+
+	case F_GETLK:
+		if (fp->f_type != DTYPE_VNODE)
+			return (EBADF);
+		vp = (struct vnode *)fp->f_data;
+		/* Copy in the lock structure */
+		error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
+		    sizeof (fl));
+		if (error)
+			return (error);
+		if (fl.l_whence == SEEK_CUR)
+			fl.l_start += fp->f_offset;
+		if (error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX))
+			return (error);
+		return (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
+		    sizeof (fl)));
+
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Common code for dup, dup2, and fcntl(F_DUPFD).
+ */
+int
+finishdup(fdp, old, new, retval)
+	register struct filedesc *fdp;
+	register int old, new;
+	register_t *retval;
+{
+	register struct file *fp;
+
+	fp = fdp->fd_ofiles[old];
+	fdp->fd_ofiles[new] = fp;
+	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+	fp->f_count++;
+	if (new > fdp->fd_lastfile)
+		fdp->fd_lastfile = new;
+	*retval = new;
+	return (0);
+}
+
+/*
+ * Close a file descriptor.
+ */
+/* ARGSUSED */
+int
+close(p, uap, retval)
+	struct proc *p;
+	struct close_args /* {
+		syscallarg(int) fd;
+	} */ *uap;
+	register_t *retval;
+{
+	int fd = SCARG(uap, fd);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register u_char *pf;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	pf = (u_char *)&fdp->fd_ofileflags[fd];
+	if (*pf & UF_MAPPED)
+		(void) munmapfd(p, fd);
+	fdp->fd_ofiles[fd] = NULL;
+	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+		fdp->fd_lastfile--;
+	if (fd < fdp->fd_freefile)
+		fdp->fd_freefile = fd;
+	*pf = 0;
+	return (closef(fp, p));
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Return status information about a file descriptor.
+ */
+/* ARGSUSED */
+int
+compat_43_fstat(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_fstat_args /* {
+		syscallarg(int) fd;
+		syscallarg(struct ostat *) sb;
+	} */ *uap;
+	register_t *retval;
+{
+	int fd = SCARG(uap, fd);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct stat ub;
+	struct ostat oub;
+	int error;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
+
+	case DTYPE_VNODE:
+		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &ub);
+		break;
+
+	default:
+		panic("ofstat");
+		/*NOTREACHED*/
+	}
+	cvtstat(&ub, &oub);
+	if (error == 0)
+		error = copyout((caddr_t)&oub, (caddr_t)SCARG(uap, sb),
+		    sizeof (oub));
+	return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Return status information about a file descriptor.
+ */
+/* ARGSUSED */
+int
+fstat(p, uap, retval)
+	struct proc *p;
+	register struct fstat_args /* {
+		syscallarg(int) fd;
+		syscallarg(struct stat *) sb;
+	} */ *uap;
+	register_t *retval;
+{
+	int fd = SCARG(uap, fd);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct stat ub;
+	int error;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
+
+	case DTYPE_VNODE:
+		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &ub);
+		break;
+
+	default:
+		panic("fstat");
+		/*NOTREACHED*/
+	}
+	if (error == 0)
+		error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb),
+		    sizeof (ub));
+	return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+/* ARGSUSED */
+int
+fpathconf(p, uap, retval)
+	struct proc *p;
+	register struct fpathconf_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) name;
+	} */ *uap;
+	register_t *retval;
+{
+	int fd = SCARG(uap, fd);
+	struct filedesc *fdp = p->p_fd;
+	struct file *fp;
+	struct vnode *vp;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
+
+	case DTYPE_SOCKET:
+		if (SCARG(uap, name) != _PC_PIPE_BUF)
+			return (EINVAL);
+		*retval = PIPE_BUF;
+		return (0);
+
+	case DTYPE_VNODE:
+		vp = (struct vnode *)fp->f_data;
+		return (VOP_PATHCONF(vp, SCARG(uap, name), retval));
+
+	default:
+		panic("fpathconf");
+	}
+	/*NOTREACHED*/
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int fdexpand;
+
+int
+fdalloc(p, want, result)
+	struct proc *p;
+	int want;
+	int *result;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register int i;
+	int lim, last, nfiles;
+	struct file **newofile;
+	char *newofileflags;
+
+	/*
+	 * Search for a free descriptor starting at the higher
+	 * of want or fd_freefile.  If that fails, consider
+	 * expanding the ofile array.
+	 */
+	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	for (;;) {
+		last = min(fdp->fd_nfiles, lim);
+		if ((i = want) < fdp->fd_freefile)
+			i = fdp->fd_freefile;
+		for (; i < last; i++) {
+			if (fdp->fd_ofiles[i] == NULL) {
+				fdp->fd_ofileflags[i] = 0;
+				if (i > fdp->fd_lastfile)
+					fdp->fd_lastfile = i;
+				if (want <= fdp->fd_freefile)
+					fdp->fd_freefile = i;
+				*result = i;
+				return (0);
+			}
+		}
+
+		/*
+		 * No space in current array.  Expand?
+		 */
+		if (fdp->fd_nfiles >= lim)
+			return (EMFILE);
+		if (fdp->fd_nfiles < NDEXTENT)
+			nfiles = NDEXTENT;
+		else
+			nfiles = 2 * fdp->fd_nfiles;
+		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
+		    M_FILEDESC, M_WAITOK);
+		newofileflags = (char *) &newofile[nfiles];
+		/*
+		 * Copy the existing ofile and ofileflags arrays
+		 * and zero the new portion of each array.
+		 */
+		bcopy(fdp->fd_ofiles, newofile,
+			(i = sizeof(struct file *) * fdp->fd_nfiles));
+		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
+		bcopy(fdp->fd_ofileflags, newofileflags,
+			(i = sizeof(char) * fdp->fd_nfiles));
+		bzero(newofileflags + i, nfiles * sizeof(char) - i);
+		if (fdp->fd_nfiles > NDFILE)
+			FREE(fdp->fd_ofiles, M_FILEDESC);
+		fdp->fd_ofiles = newofile;
+		fdp->fd_ofileflags = newofileflags;
+		fdp->fd_nfiles = nfiles;
+		fdexpand++;
+	}
+}
+
+/*
+ * Check to see whether n user file descriptors
+ * are available to the process p.
+ */
+int
+fdavail(p, n)
+	struct proc *p;
+	register int n;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file **fpp;
+	register int i, lim;
+
+	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+		return (1);
+	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++)
+		if (*fpp == NULL && --n <= 0)
+			return (1);
+	return (0);
+}
+
+/*
+ * Create a new open file structure and allocate
+ * a file decriptor for the process that refers to it.
+ */
+int
+falloc(p, resultfp, resultfd)
+	register struct proc *p;
+	struct file **resultfp;
+	int *resultfd;
+{
+	register struct file *fp, *fq;
+	int error, i;
+
+	if (error = fdalloc(p, 0, &i))
+		return (error);
+	if (nfiles >= maxfiles) {
+		tablefull("file");
+		return (ENFILE);
+	}
+	/*
+	 * Allocate a new file descriptor.
+	 * If the process has file descriptor zero open, add to the list
+	 * of open files at that point, otherwise put it at the front of
+	 * the list of open files.
+	 */
+	nfiles++;
+	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
+	bzero(fp, sizeof(struct file));
+	if (fq = p->p_fd->fd_ofiles[0]) {
+		LIST_INSERT_AFTER(fq, fp, f_list);
+	} else {
+		LIST_INSERT_HEAD(&filehead, fp, f_list);
+	}
+	p->p_fd->fd_ofiles[i] = fp;
+	fp->f_count = 1;
+	fp->f_cred = p->p_ucred;
+	crhold(fp->f_cred);
+	if (resultfp)
+		*resultfp = fp;
+	if (resultfd)
+		*resultfd = i;
+	return (0);
+}
+
+/*
+ * Free a file descriptor.
+ */
+void
+ffree(fp)
+	register struct file *fp;
+{
+	register struct file *fq;
+
+	LIST_REMOVE(fp, f_list);
+	crfree(fp->f_cred);
+#ifdef DIAGNOSTIC
+	fp->f_count = 0;
+#endif
+	nfiles--;
+	FREE(fp, M_FILE);
+}
+
+/*
+ * Copy a filedesc structure.
+ */
+struct filedesc *
+fdcopy(p)
+	struct proc *p;
+{
+	register struct filedesc *newfdp, *fdp = p->p_fd;
+	register struct file **fpp;
+	register int i;
+
+	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
+	    M_FILEDESC, M_WAITOK);
+	bcopy(fdp, newfdp, sizeof(struct filedesc));
+	VREF(newfdp->fd_cdir);
+	if (newfdp->fd_rdir)
+		VREF(newfdp->fd_rdir);
+	newfdp->fd_refcnt = 1;
+
+	/*
+	 * If the number of open files fits in the internal arrays
+	 * of the open file structure, use them, otherwise allocate
+	 * additional memory for the number of descriptors currently
+	 * in use.
+	 */
+	if (newfdp->fd_lastfile < NDFILE) {
+		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
+		newfdp->fd_ofileflags =
+		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
+		i = NDFILE;
+	} else {
+		/*
+		 * Compute the smallest multiple of NDEXTENT needed
+		 * for the file descriptors currently in use,
+		 * allowing the table to shrink.
+		 */
+		i = newfdp->fd_nfiles;
+		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
+			i /= 2;
+		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
+		    M_FILEDESC, M_WAITOK);
+		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
+	}
+	newfdp->fd_nfiles = i;
+	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
+	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
+	fpp = newfdp->fd_ofiles;
+	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
+		if (*fpp != NULL)
+			(*fpp)->f_count++;
+	return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(p)
+	struct proc *p;
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file **fpp;
+	register int i;
+
+	if (--fdp->fd_refcnt > 0)
+		return;
+	fpp = fdp->fd_ofiles;
+	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
+		if (*fpp)
+			(void) closef(*fpp, p);
+	if (fdp->fd_nfiles > NDFILE)
+		FREE(fdp->fd_ofiles, M_FILEDESC);
+	vrele(fdp->fd_cdir);
+	if (fdp->fd_rdir)
+		vrele(fdp->fd_rdir);
+	FREE(fdp, M_FILEDESC);
+}
+
+/*
+ * Internal form of close.
+ * Decrement reference count on file structure.
+ * Note: p may be NULL when closing a file
+ * that was being passed in a message.
+ */
+int
+closef(fp, p)
+	register struct file *fp;
+	register struct proc *p;
+{
+	struct vnode *vp;
+	struct flock lf;
+	int error;
+
+	if (fp == NULL)
+		return (0);
+	/*
+	 * POSIX record locking dictates that any close releases ALL
+	 * locks owned by this process.  This is handled by setting
+	 * a flag in the unlock to free ONLY locks obeying POSIX
+	 * semantics, and not to free BSD-style file locks.
+	 * If the descriptor was in a message, POSIX-style locks
+	 * aren't passed with the descriptor.
+	 */
+	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		lf.l_type = F_UNLCK;
+		vp = (struct vnode *)fp->f_data;
+		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
+	}
+	if (--fp->f_count > 0)
+		return (0);
+	if (fp->f_count < 0)
+		panic("closef: count < 0");
+	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		lf.l_type = F_UNLCK;
+		vp = (struct vnode *)fp->f_data;
+		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+	}
+	if (fp->f_ops)
+		error = (*fp->f_ops->fo_close)(fp, p);
+	else
+		error = 0;
+	ffree(fp);
+	return (error);
+}
+
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on
+ * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+/* ARGSUSED */
+int
+flock(p, uap, retval)
+	struct proc *p;
+	register struct flock_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) how;
+	} */ *uap;
+	register_t *retval;
+{
+	int fd = SCARG(uap, fd);
+	int how = SCARG(uap, how);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vnode *vp;
+	struct flock lf;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (EOPNOTSUPP);
+	vp = (struct vnode *)fp->f_data;
+	lf.l_whence = SEEK_SET;
+	lf.l_start = 0;
+	lf.l_len = 0;
+	if (how & LOCK_UN) {
+		lf.l_type = F_UNLCK;
+		fp->f_flag &= ~FHASLOCK;
+		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
+	}
+	if (how & LOCK_EX)
+		lf.l_type = F_WRLCK;
+	else if (how & LOCK_SH)
+		lf.l_type = F_RDLCK;
+	else
+		return (EBADF);
+	fp->f_flag |= FHASLOCK;
+	if (how & LOCK_NB)
+		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
+	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
+}
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process.  Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ */
+/* ARGSUSED */
+int
+fdopen(dev, mode, type, p)
+	dev_t dev;
+	int mode, type;
+	struct proc *p;
+{
+
+	/*
+	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
+	 * the file descriptor being sought for duplication. The error 
+	 * return ensures that the vnode for this device will be released
+	 * by vn_open. Open will detect this special error and take the
+	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+	 * will simply report the error.
+	 */
+	p->p_dupfd = minor(dev);
+	return (ENODEV);
+}
+
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+int
+dupfdopen(fdp, indx, dfd, mode, error)
+	register struct filedesc *fdp;
+	register int indx, dfd;
+	int mode;
+	int error;
+{
+	register struct file *wfp;
+	struct file *fp;
+
+	/*
+	 * If the to-be-dup'd fd number is greater than the allowed number
+	 * of file descriptors, or the fd to be dup'd has already been
+	 * closed, reject.  Note, check for new == old is necessary as
+	 * falloc could allocate an already closed to-be-dup'd descriptor
+	 * as the new descriptor.
+	 */
+	fp = fdp->fd_ofiles[indx];
+	if ((u_int)dfd >= fdp->fd_nfiles ||
+	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
+		return (EBADF);
+
+	/*
+	 * There are two cases of interest here.
+	 *
+	 * For ENODEV simply dup (dfd) to file descriptor
+	 * (indx) and return.
+	 *
+	 * For ENXIO steal away the file structure from (dfd) and
+	 * store it in (indx).  (dfd) is effectively closed by
+	 * this operation.
+	 *
+	 * Any other error code is just returned.
+	 */
+	switch (error) {
+	case ENODEV:
+		/*
+		 * Check that the mode the file is being opened for is a
+		 * subset of the mode of the existing descriptor.
+		 */
+		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
+			return (EACCES);
+		fdp->fd_ofiles[indx] = wfp;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		wfp->f_count++;
+		if (indx > fdp->fd_lastfile)
+			fdp->fd_lastfile = indx;
+		return (0);
+
+	case ENXIO:
+		/*
+		 * Steal away the file pointer from dfd, and stuff it into indx.
+		 */
+		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+		fdp->fd_ofiles[dfd] = NULL;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		fdp->fd_ofileflags[dfd] = 0;
+		/*
+		 * Complete the clean up of the filedesc structure by
+		 * recomputing the various hints.
+		 */
+		if (indx > fdp->fd_lastfile)
+			fdp->fd_lastfile = indx;
+		else
+			while (fdp->fd_lastfile > 0 &&
+			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+				fdp->fd_lastfile--;
+			if (dfd < fdp->fd_freefile)
+				fdp->fd_freefile = dfd;
+		return (0);
+
+	default:
+		return (error);
+	}
+	/* NOTREACHED */
+}
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
new file mode 100644
index 000000000000..4ed48ac9110e
--- /dev/null
+++ b/sys/kern/kern_exit.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_exit.c	8.10 (Berkeley) 2/23/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <sys/ptrace.h>
+
+#include <machine/cpu.h>
+#ifdef COMPAT_43
+#include <machine/reg.h>
+#include <machine/psl.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+__dead void cpu_exit __P((struct proc *));
+__dead void exit1 __P((struct proc *, int));
+
+/*
+ * exit --
+ *	Death of process.
+ */
+struct rexit_args {
+	int	rval;
+};
+__dead void
+exit(p, uap, retval)
+	struct proc *p;
+	struct rexit_args *uap;
+	int *retval;
+{
+
+	exit1(p, W_EXITCODE(uap->rval, 0));
+	/* NOTREACHED */
+}
+
+/*
+ * Exit: deallocate address space and other resources, change proc state
+ * to zombie, and unlink proc from allproc and parent's lists.  Save exit
+ * status and rusage for wait().  Check for child processes and orphan them.
+ */
+__dead void
+exit1(p, rv)
+	register struct proc *p;
+	int rv;
+{
+	register struct proc *q, *nq;
+	register struct proc **pp;
+	register struct vmspace *vm;
+
+	if (p->p_pid == 1)
+		panic("init died (signal %d, exit %d)",
+		    WTERMSIG(rv), WEXITSTATUS(rv));
+#ifdef PGINPROF
+	vmsizmon();
+#endif
+	if (p->p_flag & P_PROFIL)
+		stopprofclock(p);
+	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
+		M_ZOMBIE, M_WAITOK);
+	/*
+	 * If parent is waiting for us to exit or exec,
+	 * P_PPWAIT is set; we will wakeup the parent below.
+	 */
+	p->p_flag &= ~(P_TRACED | P_PPWAIT);
+	p->p_flag |= P_WEXIT;
+	p->p_sigignore = ~0;
+	p->p_siglist = 0;
+	untimeout(realitexpire, (caddr_t)p);
+
+	/*
+	 * Close open files and release open-file table.
+	 * This may block!
+	 */
+	fdfree(p);
+
+	/* The next two chunks should probably be moved to vmspace_exit. */
+	vm = p->p_vmspace;
+#ifdef SYSVSHM
+	if (vm->vm_shm)
+		shmexit(p);
+#endif
+	/*
+	 * Release user portion of address space.
+	 * This releases references to vnodes,
+	 * which could cause I/O if the file has been unlinked.
+	 * Need to do this early enough that we can still sleep.
+	 * Can't free the entire vmspace as the kernel stack
+	 * may be mapped within that space also.
+	 */
+	if (vm->vm_refcnt == 1)
+		(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
+		    VM_MAXUSER_ADDRESS);
+
+	if (SESS_LEADER(p)) {
+		register struct session *sp = p->p_session;
+
+		if (sp->s_ttyvp) {
+			/*
+			 * Controlling process.
+			 * Signal foreground pgrp,
+			 * drain controlling terminal
+			 * and revoke access to controlling terminal.
+			 */
+			if (sp->s_ttyp->t_session == sp) {
+				if (sp->s_ttyp->t_pgrp)
+					pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
+				(void) ttywait(sp->s_ttyp);
+				/*
+				 * The tty could have been revoked
+				 * if we blocked.
+				 */
+				if (sp->s_ttyvp)
+					VOP_REVOKE(sp->s_ttyvp, REVOKEALL);
+			}
+			if (sp->s_ttyvp)
+				vrele(sp->s_ttyvp);
+			sp->s_ttyvp = NULL;
+			/*
+			 * s_ttyp is not zero'd; we use this to indicate
+			 * that the session once had a controlling terminal.
+			 * (for logging and informational purposes)
+			 */
+		}
+		sp->s_leader = NULL;
+	}
+	fixjobc(p, p->p_pgrp, 0);
+	p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+	(void)acct_process(p);
+#ifdef KTRACE
+	/* 
+	 * release trace file
+	 */
+	p->p_traceflag = 0;	/* don't trace the vrele() */
+	if (p->p_tracep)
+		vrele(p->p_tracep);
+#endif
+	/*
+	 * Remove proc from allproc queue and pidhash chain.
+	 * Place onto zombproc.  Unlink from parent's child list.
+	 */
+	LIST_REMOVE(p, p_list);
+	LIST_INSERT_HEAD(&zombproc, p, p_list);
+	p->p_stat = SZOMB;
+
+	LIST_REMOVE(p, p_hash);
+
+	q = p->p_children.lh_first;
+	if (q)		/* only need this if any child is S_ZOMB */
+		wakeup((caddr_t) initproc);
+	for (; q != 0; q = nq) {
+		nq = q->p_sibling.le_next;
+		LIST_REMOVE(q, p_sibling);
+		LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling);
+		q->p_pptr = initproc;
+		/*
+		 * Traced processes are killed
+		 * since their existence means someone is screwing up.
+		 */
+		if (q->p_flag & P_TRACED) {
+			q->p_flag &= ~P_TRACED;
+			psignal(q, SIGKILL);
+		}
+	}
+
+	/*
+	 * Save exit status and final rusage info, adding in child rusage
+	 * info and self times.
+	 */
+	p->p_xstat = rv;
+	*p->p_ru = p->p_stats->p_ru;
+	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
+	ruadd(p->p_ru, &p->p_stats->p_cru);
+
+	/*
+	 * Notify parent that we're gone.
+	 */
+	psignal(p->p_pptr, SIGCHLD);
+	wakeup((caddr_t)p->p_pptr);
+#if defined(tahoe)
+	/* move this to cpu_exit */
+	p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL;
+#endif
+	/*
+	 * Clear curproc after we've done all operations
+	 * that could block, and before tearing down the rest
+	 * of the process state that might be used from clock, etc.
+	 * Also, can't clear curproc while we're still runnable,
+	 * as we're not on a run queue (we are current, just not
+	 * a proper proc any longer!).
+	 *
+	 * Other substructures are freed from wait().
+	 */
+	curproc = NULL;
+	if (--p->p_limit->p_refcnt == 0)
+		FREE(p->p_limit, M_SUBPROC);
+
+	/*
+	 * Finally, call machine-dependent code to release the remaining
+	 * resources including address space, the kernel stack and pcb.
+	 * The address space is released by "vmspace_free(p->p_vmspace)";
+	 * This is machine-dependent, as we may have to change stacks
+	 * or ensure that the current one isn't reallocated before we
+	 * finish.  cpu_exit will end with a call to cpu_swtch(), finishing
+	 * our execution (pun intended).
+	 */
+	cpu_exit(p);
+}
+
+struct wait_args {
+	int	pid;
+	int	*status;
+	int	options;
+	struct	rusage *rusage;
+#ifdef COMPAT_43
+	int	compat;		/* pseudo */
+#endif
+};
+
+#ifdef COMPAT_43
+#if defined(hp300) || defined(luna68k)
+#include <machine/frame.h>
+#define GETPS(rp)	((struct frame *)(rp))->f_sr
+#else
+#define GETPS(rp)	(rp)[PS]
+#endif
+
+compat_43_wait(p, uap, retval)
+	struct proc *p;
+	register struct wait_args *uap;
+	int *retval;
+{
+
+#ifdef PSL_ALLCC
+	if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) {
+		uap->options = 0;
+		uap->rusage = NULL;
+	} else {
+		uap->options = p->p_md.md_regs[R0];
+		uap->rusage = (struct rusage *)p->p_md.md_regs[R1];
+	}
+#else
+	uap->options = 0;
+	uap->rusage = NULL;
+#endif
+	uap->pid = WAIT_ANY;
+	uap->status = NULL;
+	uap->compat = 1;
+	return (wait1(p, uap, retval));
+}
+
+wait4(p, uap, retval)
+	struct proc *p;
+	struct wait_args *uap;
+	int *retval;
+{
+
+	uap->compat = 0;
+	return (wait1(p, uap, retval));
+}
+#else
+#define	wait1	wait4
+#endif
+
+int
+wait1(q, uap, retval)
+	register struct proc *q;
+	register struct wait_args *uap;
+	int retval[];
+{
+	register int nfound;
+	register struct proc *p, *t;
+	int status, error;
+
+	if (uap->pid == 0)
+		uap->pid = -q->p_pgid;
+#ifdef notyet
+	if (uap->options &~ (WUNTRACED|WNOHANG))
+		return (EINVAL);
+#endif
+loop:
+	nfound = 0;
+	for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
+		if (uap->pid != WAIT_ANY &&
+		    p->p_pid != uap->pid && p->p_pgid != -uap->pid)
+			continue;
+		nfound++;
+		if (p->p_stat == SZOMB) {
+			retval[0] = p->p_pid;
+#ifdef COMPAT_43
+			if (uap->compat)
+				retval[1] = p->p_xstat;
+			else
+#endif
+			if (uap->status) {
+				status = p->p_xstat;	/* convert to int */
+				if (error = copyout((caddr_t)&status,
+				    (caddr_t)uap->status, sizeof(status)))
+					return (error);
+			}
+			if (uap->rusage && (error = copyout((caddr_t)p->p_ru,
+			    (caddr_t)uap->rusage, sizeof (struct rusage))))
+				return (error);
+			/*
+			 * If we got the child via a ptrace 'attach',
+			 * we need to give it back to the old parent.
+			 */
+			if (p->p_oppid && (t = pfind(p->p_oppid))) {
+				p->p_oppid = 0;
+				proc_reparent(p, t);
+				psignal(t, SIGCHLD);
+				wakeup((caddr_t)t);
+				return (0);
+			}
+			p->p_xstat = 0;
+			ruadd(&q->p_stats->p_cru, p->p_ru);
+			FREE(p->p_ru, M_ZOMBIE);
+
+			/*
+			 * Decrement the count of procs running with this uid.
+			 */
+			(void)chgproccnt(p->p_cred->p_ruid, -1);
+
+			/*
+			 * Free up credentials.
+			 */
+			if (--p->p_cred->p_refcnt == 0) {
+				crfree(p->p_cred->pc_ucred);
+				FREE(p->p_cred, M_SUBPROC);
+			}
+
+			/*
+			 * Release reference to text vnode
+			 */
+			if (p->p_textvp)
+				vrele(p->p_textvp);
+
+			/*
+			 * Finally finished with old proc entry.
+			 * Unlink it from its process group and free it.
+			 */
+			leavepgrp(p);
+			LIST_REMOVE(p, p_list);	/* off zombproc */
+			LIST_REMOVE(p, p_sibling);
+
+			/*
+			 * Give machine-dependent layer a chance
+			 * to free anything that cpu_exit couldn't
+			 * release while still running in process context.
+			 */
+			cpu_wait(p);
+			FREE(p, M_PROC);
+			nprocs--;
+			return (0);
+		}
+		if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
+		    (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
+			p->p_flag |= P_WAITED;
+			retval[0] = p->p_pid;
+#ifdef COMPAT_43
+			if (uap->compat) {
+				retval[1] = W_STOPCODE(p->p_xstat);
+				error = 0;
+			} else
+#endif
+			if (uap->status) {
+				status = W_STOPCODE(p->p_xstat);
+				error = copyout((caddr_t)&status,
+					(caddr_t)uap->status, sizeof(status));
+			} else
+				error = 0;
+			return (error);
+		}
+	}
+	if (nfound == 0)
+		return (ECHILD);
+	if (uap->options & WNOHANG) {
+		retval[0] = 0;
+		return (0);
+	}
+	if (error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0))
+		return (error);
+	goto loop;
+}
+
+/*
+ * make process 'parent' the new parent of process 'child'.
+ */
+void
+proc_reparent(child, parent)
+	register struct proc *child;
+	register struct proc *parent;
+{
+
+	if (child->p_pptr == parent)
+		return;
+
+	LIST_REMOVE(child, p_sibling);
+	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
+	child->p_pptr = parent;
+}
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
new file mode 100644
index 000000000000..6c5f22f0d037
--- /dev/null
+++ b/sys/kern/kern_fork.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_fork.c	8.8 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/acct.h>
+#include <sys/ktrace.h>
+
+/* ARGSUSED */
+fork(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	return (fork1(p, 0, retval));
+}
+
+/* ARGSUSED */
+vfork(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	return (fork1(p, 1, retval));
+}
+
+int	nprocs = 1;		/* process 0 */
+
+fork1(p1, isvfork, retval)
+	register struct proc *p1;
+	int isvfork;
+	register_t *retval;
+{
+	register struct proc *p2;
+	register uid_t uid;
+	struct proc *newproc;
+	struct proc **hash;
+	int count;
+	static int nextpid, pidchecked = 0;
+
+	/*
+	 * Although process entries are dynamically created, we still keep
+	 * a global limit on the maximum number we will create.  Don't allow
+	 * a nonprivileged user to use the last process; don't let root
+	 * exceed the limit. The variable nprocs is the current number of
+	 * processes, maxproc is the limit.
+	 */
+	uid = p1->p_cred->p_ruid;
+	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
+		tablefull("proc");
+		return (EAGAIN);
+	}
+
+	/*
+	 * Increment the count of procs running with this uid. Don't allow
+	 * a nonprivileged user to exceed their current limit.
+	 */
+	count = chgproccnt(uid, 1);
+	if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
+		(void)chgproccnt(uid, -1);
+		return (EAGAIN);
+	}
+
+	/* Allocate new proc. */
+	MALLOC(newproc, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK);
+
+	/*
+	 * Find an unused process ID.  We remember a range of unused IDs
+	 * ready to use (from nextpid+1 through pidchecked-1).
+	 */
+	nextpid++;
+retry:
+	/*
+	 * If the process ID prototype has wrapped around,
+	 * restart somewhat above 0, as the low-numbered procs
+	 * tend to include daemons that don't exit.
+	 */
+	if (nextpid >= PID_MAX) {
+		nextpid = 100;
+		pidchecked = 0;
+	}
+	if (nextpid >= pidchecked) {
+		int doingzomb = 0;
+
+		pidchecked = PID_MAX;
+		/*
+		 * Scan the active and zombie procs to check whether this pid
+		 * is in use.  Remember the lowest pid that's greater
+		 * than nextpid, so we can avoid checking for a while.
+		 */
+		p2 = allproc.lh_first;
+again:
+		for (; p2 != 0; p2 = p2->p_list.le_next) {
+			while (p2->p_pid == nextpid ||
+			    p2->p_pgrp->pg_id == nextpid) {
+				nextpid++;
+				if (nextpid >= pidchecked)
+					goto retry;
+			}
+			if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
+				pidchecked = p2->p_pid;
+			if (p2->p_pgrp->pg_id > nextpid && 
+			    pidchecked > p2->p_pgrp->pg_id)
+				pidchecked = p2->p_pgrp->pg_id;
+		}
+		if (!doingzomb) {
+			doingzomb = 1;
+			p2 = zombproc.lh_first;
+			goto again;
+		}
+	}
+
+	nprocs++;
+	p2 = newproc;
+	p2->p_stat = SIDL;			/* protect against others */
+	p2->p_pid = nextpid;
+	LIST_INSERT_HEAD(&allproc, p2, p_list);
+	p2->p_forw = p2->p_back = NULL;		/* shouldn't be necessary */
+	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
+
+	/*
+	 * Make a proc table entry for the new process.
+	 * Start by zeroing the section of proc that is zero-initialized,
+	 * then copy the section that is copied directly from the parent.
+	 */
+	bzero(&p2->p_startzero,
+	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
+	bcopy(&p1->p_startcopy, &p2->p_startcopy,
+	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
+
+	/*
+	 * Duplicate sub-structures as needed.
+	 * Increase reference counts on shared objects.
+	 * The p_stats and p_sigacts substructs are set in vm_fork.
+	 */
+	p2->p_flag = P_INMEM;
+	if (p1->p_flag & P_PROFIL)
+		startprofclock(p2);
+	MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred),
+	    M_SUBPROC, M_WAITOK);
+	bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
+	p2->p_cred->p_refcnt = 1;
+	crhold(p1->p_ucred);
+
+	/* bump references to the text vnode (for procfs) */
+	p2->p_textvp = p1->p_textvp;
+	if (p2->p_textvp)
+		VREF(p2->p_textvp);
+
+	p2->p_fd = fdcopy(p1);
+	/*
+	 * If p_limit is still copy-on-write, bump refcnt,
+	 * otherwise get a copy that won't be modified.
+	 * (If PL_SHAREMOD is clear, the structure is shared
+	 * copy-on-write.)
+	 */
+	if (p1->p_limit->p_lflags & PL_SHAREMOD)
+		p2->p_limit = limcopy(p1->p_limit);
+	else {
+		p2->p_limit = p1->p_limit;
+		p2->p_limit->p_refcnt++;
+	}
+
+	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
+		p2->p_flag |= P_CONTROLT;
+	if (isvfork)
+		p2->p_flag |= P_PPWAIT;
+	LIST_INSERT_AFTER(p1, p2, p_pglist);
+	p2->p_pptr = p1;
+	LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
+	LIST_INIT(&p2->p_children);
+
+#ifdef KTRACE
+	/*
+	 * Copy traceflag and tracefile if enabled.
+	 * If not inherited, these were zeroed above.
+	 */
+	if (p1->p_traceflag&KTRFAC_INHERIT) {
+		p2->p_traceflag = p1->p_traceflag;
+		if ((p2->p_tracep = p1->p_tracep) != NULL)
+			VREF(p2->p_tracep);
+	}
+#endif
+
+	/*
+	 * This begins the section where we must prevent the parent
+	 * from being swapped.
+	 */
+	p1->p_flag |= P_NOSWAP;
+	/*
+	 * Set return values for child before vm_fork,
+	 * so they can be copied to child stack.
+	 * We return parent pid, and mark as child in retval[1].
+	 * NOTE: the kernel stack may be at a different location in the child
+	 * process, and thus addresses of automatic variables (including retval)
+	 * may be invalid after vm_fork returns in the child process.
+	 */
+	retval[0] = p1->p_pid;
+	retval[1] = 1;
+	if (vm_fork(p1, p2, isvfork)) {
+		/*
+		 * Child process.  Set start time and get to work.
+		 */
+		(void) splclock();
+		p2->p_stats->p_start = time;
+		(void) spl0();
+		p2->p_acflag = AFORK;
+		return (0);
+	}
+
+	/*
+	 * Make child runnable and add to run queue.
+	 */
+	(void) splhigh();
+	p2->p_stat = SRUN;
+	setrunqueue(p2);
+	(void) spl0();
+
+	/*
+	 * Now can be swapped.
+	 */
+	p1->p_flag &= ~P_NOSWAP;
+
+	/*
+	 * Preserve synchronization semantics of vfork.  If waiting for
+	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
+	 * proc (in case of exit).
+	 */
+	if (isvfork)
+		while (p2->p_flag & P_PPWAIT)
+			tsleep(p1, PWAIT, "ppwait", 0);
+
+	/*
+	 * Return child pid to parent process,
+	 * marking us as parent via retval[1].
+	 */
+	retval[0] = p2->p_pid;
+	retval[1] = 0;
+	return (0);
+}
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
new file mode 100644
index 000000000000..b84175439a85
--- /dev/null
+++ b/sys/kern/kern_ktrace.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_ktrace.c	8.5 (Berkeley) 5/14/95
+ */
+
+#ifdef KTRACE
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ktrace.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+struct ktr_header *
+ktrgetheader(type)
+	int type;
+{
+	register struct ktr_header *kth;
+	struct proc *p = curproc;	/* XXX */
+
+	MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header), 
+		M_TEMP, M_WAITOK);
+	kth->ktr_type = type;
+	microtime(&kth->ktr_time);
+	kth->ktr_pid = p->p_pid;
+	bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN);
+	return (kth);
+}
+
+void
+ktrsyscall(vp, code, argsize, args)
+	struct vnode *vp;
+	int code, argsize;
+	register_t args[];
+{
+	struct	ktr_header *kth;
+	struct	ktr_syscall *ktp;
+	register len = sizeof(struct ktr_syscall) + argsize;
+	struct proc *p = curproc;	/* XXX */
+	register_t *argp;
+	int i;
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_SYSCALL);
+	MALLOC(ktp, struct ktr_syscall *, len, M_TEMP, M_WAITOK);
+	ktp->ktr_code = code;
+	ktp->ktr_argsize = argsize;
+	argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
+	for (i = 0; i < (argsize / sizeof *argp); i++)
+		*argp++ = args[i];
+	kth->ktr_buf = (caddr_t)ktp;
+	kth->ktr_len = len;
+	ktrwrite(vp, kth);
+	FREE(ktp, M_TEMP);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+void
+ktrsysret(vp, code, error, retval)
+	struct vnode *vp;
+	int code, error, retval;
+{
+	struct ktr_header *kth;
+	struct ktr_sysret ktp;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_SYSRET);
+	ktp.ktr_code = code;
+	ktp.ktr_error = error;
+	ktp.ktr_retval = retval;		/* what about val2 ? */
+
+	kth->ktr_buf = (caddr_t)&ktp;
+	kth->ktr_len = sizeof(struct ktr_sysret);
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+void
+ktrnamei(vp, path)
+	struct vnode *vp;
+	char *path;
+{
+	struct ktr_header *kth;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_NAMEI);
+	kth->ktr_len = strlen(path);
+	kth->ktr_buf = path;
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+void
+ktrgenio(vp, fd, rw, iov, len, error)
+	struct vnode *vp;
+	int fd;
+	enum uio_rw rw;
+	register struct iovec *iov;
+	int len, error;
+{
+	struct ktr_header *kth;
+	register struct ktr_genio *ktp;
+	register caddr_t cp;
+	register int resid = len, cnt;
+	struct proc *p = curproc;	/* XXX */
+	
+	if (error)
+		return;
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_GENIO);
+	MALLOC(ktp, struct ktr_genio *, sizeof(struct ktr_genio) + len,
+		M_TEMP, M_WAITOK);
+	ktp->ktr_fd = fd;
+	ktp->ktr_rw = rw;
+	cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio));
+	while (resid > 0) {
+		if ((cnt = iov->iov_len) > resid)
+			cnt = resid;
+		if (copyin(iov->iov_base, cp, (unsigned)cnt))
+			goto done;
+		cp += cnt;
+		resid -= cnt;
+		iov++;
+	}
+	kth->ktr_buf = (caddr_t)ktp;
+	kth->ktr_len = sizeof (struct ktr_genio) + len;
+
+	ktrwrite(vp, kth);
+done:
+	FREE(kth, M_TEMP);
+	FREE(ktp, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+void
+ktrpsig(vp, sig, action, mask, code)
+	struct vnode *vp;
+	int sig;
+	sig_t action;
+	int mask, code;
+{
+	struct ktr_header *kth;
+	struct ktr_psig	kp;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_PSIG);
+	kp.signo = (char)sig;
+	kp.action = action;
+	kp.mask = mask;
+	kp.code = code;
+	kth->ktr_buf = (caddr_t)&kp;
+	kth->ktr_len = sizeof (struct ktr_psig);
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+void
+ktrcsw(vp, out, user)
+	struct vnode *vp;
+	int out, user;
+{
+	struct ktr_header *kth;
+	struct	ktr_csw kc;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_CSW);
+	kc.out = out;
+	kc.user = user;
+	kth->ktr_buf = (caddr_t)&kc;
+	kth->ktr_len = sizeof (struct ktr_csw);
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+/* Interface and common routines */
+
+/*
+ * ktrace system call
+ */
+/* ARGSUSED */
+int
+ktrace(curp, uap, retval)
+	struct proc *curp;
+	register struct ktrace_args /* {
+		syscallarg(char *) fname;
+		syscallarg(int) ops;
+		syscallarg(int) facs;
+		syscallarg(int) pid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp = NULL;
+	register struct proc *p;
+	struct pgrp *pg;
+	int facs = SCARG(uap, facs) & ~KTRFAC_ROOT;
+	int ops = KTROP(SCARG(uap, ops));
+	int descend = SCARG(uap, ops) & KTRFLAG_DESCEND;
+	int ret = 0;
+	int error = 0;
+	struct nameidata nd;
+
+	curp->p_traceflag |= KTRFAC_ACTIVE;
+	if (ops != KTROP_CLEAR) {
+		/*
+		 * an operation which requires a file argument.
+		 */
+		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
+		    curp);
+		if (error = vn_open(&nd, FREAD|FWRITE, 0)) {
+			curp->p_traceflag &= ~KTRFAC_ACTIVE;
+			return (error);
+		}
+		vp = nd.ni_vp;
+		VOP_UNLOCK(vp, 0, p);
+		if (vp->v_type != VREG) {
+			(void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
+			curp->p_traceflag &= ~KTRFAC_ACTIVE;
+			return (EACCES);
+		}
+	}
+	/*
+	 * Clear all uses of the tracefile
+	 */
+	if (ops == KTROP_CLEARFILE) {
+		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+			if (p->p_tracep == vp) {
+				if (ktrcanset(curp, p)) {
+					p->p_tracep = NULL;
+					p->p_traceflag = 0;
+					(void) vn_close(vp, FREAD|FWRITE,
+						p->p_ucred, p);
+				} else
+					error = EPERM;
+			}
+		}
+		goto done;
+	}
+	/*
+	 * need something to (un)trace (XXX - why is this here?)
+	 */
+	if (!facs) {
+		error = EINVAL;
+		goto done;
+	}
+	/* 
+	 * do it
+	 */
+	if (SCARG(uap, pid) < 0) {
+		/*
+		 * by process group
+		 */
+		pg = pgfind(-SCARG(uap, pid));
+		if (pg == NULL) {
+			error = ESRCH;
+			goto done;
+		}
+		for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next)
+			if (descend)
+				ret |= ktrsetchildren(curp, p, ops, facs, vp);
+			else 
+				ret |= ktrops(curp, p, ops, facs, vp);
+					
+	} else {
+		/*
+		 * by pid
+		 */
+		p = pfind(SCARG(uap, pid));
+		if (p == NULL) {
+			error = ESRCH;
+			goto done;
+		}
+		if (descend)
+			ret |= ktrsetchildren(curp, p, ops, facs, vp);
+		else
+			ret |= ktrops(curp, p, ops, facs, vp);
+	}
+	if (!ret)
+		error = EPERM;
+done:
+	if (vp != NULL)
+		(void) vn_close(vp, FWRITE, curp->p_ucred, curp);
+	curp->p_traceflag &= ~KTRFAC_ACTIVE;
+	return (error);
+}
+
+int
+ktrops(curp, p, ops, facs, vp)
+	struct proc *p, *curp;
+	int ops, facs;
+	struct vnode *vp;
+{
+
+	if (!ktrcanset(curp, p))
+		return (0);
+	if (ops == KTROP_SET) {
+		if (p->p_tracep != vp) { 
+			/*
+			 * if trace file already in use, relinquish
+			 */
+			if (p->p_tracep != NULL)
+				vrele(p->p_tracep);
+			VREF(vp);
+			p->p_tracep = vp;
+		}
+		p->p_traceflag |= facs;
+		if (curp->p_ucred->cr_uid == 0)
+			p->p_traceflag |= KTRFAC_ROOT;
+	} else {	
+		/* KTROP_CLEAR */
+		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
+			/* no more tracing */
+			p->p_traceflag = 0;
+			if (p->p_tracep != NULL) {
+				vrele(p->p_tracep);
+				p->p_tracep = NULL;
+			}
+		}
+	}
+
+	return (1);
+}
+
+ktrsetchildren(curp, top, ops, facs, vp)
+	struct proc *curp, *top;
+	int ops, facs;
+	struct vnode *vp;
+{
+	register struct proc *p;
+	register int ret = 0;
+
+	p = top;
+	for (;;) {
+		ret |= ktrops(curp, p, ops, facs, vp);
+		/*
+		 * If this process has children, descend to them next,
+		 * otherwise do any siblings, and if done with this level,
+		 * follow back up the tree (but not past top).
+		 */
+		if (p->p_children.lh_first)
+			p = p->p_children.lh_first;
+		else for (;;) {
+			if (p == top)
+				return (ret);
+			if (p->p_sibling.le_next) {
+				p = p->p_sibling.le_next;
+				break;
+			}
+			p = p->p_pptr;
+		}
+	}
+	/*NOTREACHED*/
+}
+
+ktrwrite(vp, kth)
+	struct vnode *vp;
+	register struct ktr_header *kth;
+{
+	struct uio auio;
+	struct iovec aiov[2];
+	register struct proc *p = curproc;	/* XXX */
+	int error;
+
+	if (vp == NULL)
+		return;
+	auio.uio_iov = &aiov[0];
+	auio.uio_offset = 0;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	aiov[0].iov_base = (caddr_t)kth;
+	aiov[0].iov_len = sizeof(struct ktr_header);
+	auio.uio_resid = sizeof(struct ktr_header);
+	auio.uio_iovcnt = 1;
+	auio.uio_procp = (struct proc *)0;
+	if (kth->ktr_len > 0) {
+		auio.uio_iovcnt++;
+		aiov[1].iov_base = kth->ktr_buf;
+		aiov[1].iov_len = kth->ktr_len;
+		auio.uio_resid += kth->ktr_len;
+	}
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred);
+	VOP_UNLOCK(vp, 0, p);
+	if (!error)
+		return;
+	/*
+	 * If error encountered, give up tracing on this vnode.
+	 */
+	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
+	    error);
+	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+		if (p->p_tracep == vp) {
+			p->p_tracep = NULL;
+			p->p_traceflag = 0;
+			vrele(vp);
+		}
+	}
+}
+
+/*
+ * Return true if caller has permission to set the ktracing state
+ * of target.  Essentially, the target can't possess any
+ * more permissions than the caller.  KTRFAC_ROOT signifies that
+ * root previously set the tracing status on the target process, and 
+ * so, only root may further change it.
+ *
+ * TODO: check groups.  use caller effective gid.
+ */
+ktrcanset(callp, targetp)
+	struct proc *callp, *targetp;
+{
+	register struct pcred *caller = callp->p_cred;
+	register struct pcred *target = targetp->p_cred;
+
+	if ((caller->pc_ucred->cr_uid == target->p_ruid &&
+	     target->p_ruid == target->p_svuid &&
+	     caller->p_rgid == target->p_rgid &&	/* XXX */
+	     target->p_rgid == target->p_svgid &&
+	     (targetp->p_traceflag & KTRFAC_ROOT) == 0) ||
+	     caller->pc_ucred->cr_uid == 0)
+		return (1);
+
+	return (0);
+}
+
+#endif
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
new file mode 100644
index 000000000000..363cde5d682f
--- /dev/null
+++ b/sys/kern/kern_malloc.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 1987, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_malloc.c	8.4 (Berkeley) 5/20/95
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+struct kmembuckets bucket[MINBUCKET + 16];
+struct kmemstats kmemstats[M_LAST];
+struct kmemusage *kmemusage;
+char *kmembase, *kmemlimit;
+char *memname[] = INITKMEMNAMES;
+
+#ifdef DIAGNOSTIC
+/*
+ * This structure provides a set of masks to catch unaligned frees.
+ */
+long addrmask[] = { 0,
+	0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+	0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+	0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+	0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+};
+
+/*
+ * The WEIRD_ADDR is used as known text to copy into free objects so
+ * that modifications after frees can be detected.
+ */
+#define WEIRD_ADDR	0xdeadbeef
+#define MAX_COPY	32
+
+/*
+ * Normally the first word of the structure is used to hold the list
+ * pointer for free objects. However, when running with diagnostics,
+ * we use the third and fourth fields, so as to catch modifications
+ * in the most commonly trashed first two words.
+ */
+struct freelist {
+	long	spare0;
+	short	type;
+	long	spare1;
+	caddr_t	next;
+};
+#else /* !DIAGNOSTIC */
+struct freelist {
+	caddr_t	next;
+};
+#endif /* DIAGNOSTIC */
+
+/*
+ * Allocate a block of memory
+ */
+void *
+malloc(size, type, flags)
+	unsigned long size;
+	int type, flags;
+{
+	register struct kmembuckets *kbp;
+	register struct kmemusage *kup;
+	register struct freelist *freep;
+	long indx, npg, allocsize;
+	int s;
+	caddr_t va, cp, savedlist;
+#ifdef DIAGNOSTIC
+	long *end, *lp;
+	int copysize;
+	char *savedtype;
+#endif
+#ifdef DEBUG
+	extern int simplelockrecurse;
+#endif
+#ifdef KMEMSTATS
+	register struct kmemstats *ksp = &kmemstats[type];
+
+	if (((unsigned long)type) > M_LAST)
+		panic("malloc - bogus type");
+#endif
+	indx = BUCKETINDX(size);
+	kbp = &bucket[indx];
+	s = splimp();
+#ifdef KMEMSTATS
+	while (ksp->ks_memuse >= ksp->ks_limit) {
+		if (flags & M_NOWAIT) {
+			splx(s);
+			return ((void *) NULL);
+		}
+		if (ksp->ks_limblocks < 65535)
+			ksp->ks_limblocks++;
+		tsleep((caddr_t)ksp, PSWP+2, memname[type], 0);
+	}
+	ksp->ks_size |= 1 << indx;
+#endif
+#ifdef DIAGNOSTIC
+	copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
+#endif
+#ifdef DEBUG
+	if (flags & M_NOWAIT)
+		simplelockrecurse++;
+#endif
+	if (kbp->kb_next == NULL) {
+		kbp->kb_last = NULL;
+		if (size > MAXALLOCSAVE)
+			allocsize = roundup(size, CLBYTES);
+		else
+			allocsize = 1 << indx;
+		npg = clrnd(btoc(allocsize));
+		va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg),
+					   !(flags & M_NOWAIT));
+		if (va == NULL) {
+			splx(s);
+#ifdef DEBUG
+			if (flags & M_NOWAIT)
+				simplelockrecurse--;
+#endif
+			return ((void *) NULL);
+		}
+#ifdef KMEMSTATS
+		kbp->kb_total += kbp->kb_elmpercl;
+#endif
+		kup = btokup(va);
+		kup->ku_indx = indx;
+		if (allocsize > MAXALLOCSAVE) {
+			if (npg > 65535)
+				panic("malloc: allocation too large");
+			kup->ku_pagecnt = npg;
+#ifdef KMEMSTATS
+			ksp->ks_memuse += allocsize;
+#endif
+			goto out;
+		}
+#ifdef KMEMSTATS
+		kup->ku_freecnt = kbp->kb_elmpercl;
+		kbp->kb_totalfree += kbp->kb_elmpercl;
+#endif
+		/*
+		 * Just in case we blocked while allocating memory,
+		 * and someone else also allocated memory for this
+		 * bucket, don't assume the list is still empty.
+		 */
+		savedlist = kbp->kb_next;
+		kbp->kb_next = cp = va + (npg * NBPG) - allocsize;
+		for (;;) {
+			freep = (struct freelist *)cp;
+#ifdef DIAGNOSTIC
+			/*
+			 * Copy in known text to detect modification
+			 * after freeing.
+			 */
+			end = (long *)&cp[copysize];
+			for (lp = (long *)cp; lp < end; lp++)
+				*lp = WEIRD_ADDR;
+			freep->type = M_FREE;
+#endif /* DIAGNOSTIC */
+			if (cp <= va)
+				break;
+			cp -= allocsize;
+			freep->next = cp;
+		}
+		freep->next = savedlist;
+		if (kbp->kb_last == NULL)
+			kbp->kb_last = (caddr_t)freep;
+	}
+	va = kbp->kb_next;
+	kbp->kb_next = ((struct freelist *)va)->next;
+#ifdef DIAGNOSTIC
+	freep = (struct freelist *)va;
+	savedtype = (unsigned)freep->type < M_LAST ?
+		memname[freep->type] : "???";
+	if (kbp->kb_next &&
+	    !kernacc(kbp->kb_next, sizeof(struct freelist), 0)) {
+		printf("%s of object 0x%x size %d %s %s (invalid addr 0x%x)\n",
+			"Data modified on freelist: word 2.5", va, size,
+			"previous type", savedtype, kbp->kb_next);
+		kbp->kb_next = NULL;
+	}
+#if BYTE_ORDER == BIG_ENDIAN
+	freep->type = WEIRD_ADDR >> 16;
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+	freep->type = (short)WEIRD_ADDR;
+#endif
+	if (((long)(&freep->next)) & 0x2)
+		freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16));
+	else
+		freep->next = (caddr_t)WEIRD_ADDR;
+	end = (long *)&va[copysize];
+	for (lp = (long *)va; lp < end; lp++) {
+		if (*lp == WEIRD_ADDR)
+			continue;
+		printf("%s %d of object 0x%x size %d %s %s (0x%x != 0x%x)\n",
+			"Data modified on freelist: word", lp - (long *)va,
+			va, size, "previous type", savedtype, *lp, WEIRD_ADDR);
+		break;
+	}
+	freep->spare0 = 0;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+	kup = btokup(va);
+	if (kup->ku_indx != indx)
+		panic("malloc: wrong bucket");
+	if (kup->ku_freecnt == 0)
+		panic("malloc: lost data");
+	kup->ku_freecnt--;
+	kbp->kb_totalfree--;
+	ksp->ks_memuse += 1 << indx;
+out:
+	kbp->kb_calls++;
+	ksp->ks_inuse++;
+	ksp->ks_calls++;
+	if (ksp->ks_memuse > ksp->ks_maxused)
+		ksp->ks_maxused = ksp->ks_memuse;
+#else
+out:
+#endif
+	splx(s);
+#ifdef DEBUG
+	if (flags & M_NOWAIT)
+		simplelockrecurse--;
+#endif
+	return ((void *) va);
+}
+
+/*
+ * Free a block of memory allocated by malloc.
+ */
+void
+free(addr, type)
+	void *addr;
+	int type;
+{
+	register struct kmembuckets *kbp;
+	register struct kmemusage *kup;
+	register struct freelist *freep;
+	long size;
+	int s;
+#ifdef DIAGNOSTIC
+	caddr_t cp;
+	long *end, *lp, alloc, copysize;
+#endif
+#ifdef KMEMSTATS
+	register struct kmemstats *ksp = &kmemstats[type];
+#endif
+
+	kup = btokup(addr);
+	size = 1 << kup->ku_indx;
+	kbp = &bucket[kup->ku_indx];
+	s = splimp();
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for returns of data that do not point to the
+	 * beginning of the allocation.
+	 */
+	if (size > NBPG * CLSIZE)
+		alloc = addrmask[BUCKETINDX(NBPG * CLSIZE)];
+	else
+		alloc = addrmask[kup->ku_indx];
+	if (((u_long)addr & alloc) != 0)
+		panic("free: unaligned addr 0x%x, size %d, type %s, mask %d\n",
+			addr, size, memname[type], alloc);
+#endif /* DIAGNOSTIC */
+	if (size > MAXALLOCSAVE) {
+		kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt));
+#ifdef KMEMSTATS
+		size = kup->ku_pagecnt << PGSHIFT;
+		ksp->ks_memuse -= size;
+		kup->ku_indx = 0;
+		kup->ku_pagecnt = 0;
+		if (ksp->ks_memuse + size >= ksp->ks_limit &&
+		    ksp->ks_memuse < ksp->ks_limit)
+			wakeup((caddr_t)ksp);
+		ksp->ks_inuse--;
+		kbp->kb_total -= 1;
+#endif
+		splx(s);
+		return;
+	}
+	freep = (struct freelist *)addr;
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for multiple frees. Use a quick check to see if
+	 * it looks free before laboriously searching the freelist.
+	 */
+	if (freep->spare0 == WEIRD_ADDR) {
+		for (cp = kbp->kb_next; cp; cp = *(caddr_t *)cp) {
+			if (addr != cp)
+				continue;
+			printf("multiply freed item 0x%x\n", addr);
+			panic("free: duplicated free");
+		}
+	}
+	/*
+	 * Copy in known text to detect modification after freeing
+	 * and to make it look free. Also, save the type being freed
+	 * so we can list likely culprit if modification is detected
+	 * when the object is reallocated.
+	 */
+	copysize = size < MAX_COPY ? size : MAX_COPY;
+	end = (long *)&((caddr_t)addr)[copysize];
+	for (lp = (long *)addr; lp < end; lp++)
+		*lp = WEIRD_ADDR;
+	freep->type = type;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+	kup->ku_freecnt++;
+	if (kup->ku_freecnt >= kbp->kb_elmpercl)
+		if (kup->ku_freecnt > kbp->kb_elmpercl)
+			panic("free: multiple frees");
+		else if (kbp->kb_totalfree > kbp->kb_highwat)
+			kbp->kb_couldfree++;
+	kbp->kb_totalfree++;
+	ksp->ks_memuse -= size;
+	if (ksp->ks_memuse + size >= ksp->ks_limit &&
+	    ksp->ks_memuse < ksp->ks_limit)
+		wakeup((caddr_t)ksp);
+	ksp->ks_inuse--;
+#endif
+	if (kbp->kb_next == NULL)
+		kbp->kb_next = addr;
+	else
+		((struct freelist *)kbp->kb_last)->next = addr;
+	freep->next = NULL;
+	kbp->kb_last = addr;
+	splx(s);
+}
+
+/*
+ * Initialize the kernel memory allocator
+ */
+kmeminit()
+{
+	register long indx;
+	int npg;
+
+#if	((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
+		ERROR!_kmeminit:_MAXALLOCSAVE_not_power_of_2
+#endif
+#if	(MAXALLOCSAVE > MINALLOCSIZE * 32768)
+		ERROR!_kmeminit:_MAXALLOCSAVE_too_big
+#endif
+#if	(MAXALLOCSAVE < CLBYTES)
+		ERROR!_kmeminit:_MAXALLOCSAVE_too_small
+#endif
+	npg = VM_KMEM_SIZE/ NBPG;
+	kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
+		(vm_size_t)(npg * sizeof(struct kmemusage)));
+	kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
+		(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * NBPG), FALSE);
+#ifdef KMEMSTATS
+	for (indx = 0; indx < MINBUCKET + 16; indx++) {
+		if (1 << indx >= CLBYTES)
+			bucket[indx].kb_elmpercl = 1;
+		else
+			bucket[indx].kb_elmpercl = CLBYTES / (1 << indx);
+		bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
+	}
+	for (indx = 0; indx < M_LAST; indx++)
+		kmemstats[indx].ks_limit = npg * NBPG * 6 / 10;
+#endif
+}
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
new file mode 100644
index 000000000000..67017933bc1d
--- /dev/null
+++ b/sys/kern/kern_proc.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <ufs/ufs/quota.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+/*
+ * Structure associated with user cacheing.
+ */
+struct uidinfo {
+	LIST_ENTRY(uidinfo) ui_hash;
+	uid_t	ui_uid;
+	long	ui_proccnt;
+};
+#define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
+LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
+u_long uihash;		/* size of hash table - 1 */
+
+/*
+ * Other process lists
+ */
+struct pidhashhead *pidhashtbl;
+u_long pidhash;
+struct pgrphashhead *pgrphashtbl;
+u_long pgrphash;
+struct proclist allproc;
+struct proclist zombproc;
+
+/*
+ * Initialize global process hashing structures.
+ */
+void
+procinit()
+{
+
+	LIST_INIT(&allproc);
+	LIST_INIT(&zombproc);
+	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
+	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
+	uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
+}
+
+/*
+ * Change the count associated with number of processes
+ * a given user is using.
+ */
+int
+chgproccnt(uid, diff)
+	uid_t	uid;
+	int	diff;
+{
+	register struct uidinfo *uip;
+	register struct uihashhead *uipp;
+
+	uipp = UIHASH(uid);
+	for (uip = uipp->lh_first; uip != 0; uip = uip->ui_hash.le_next)
+		if (uip->ui_uid == uid)
+			break;
+	if (uip) {
+		uip->ui_proccnt += diff;
+		if (uip->ui_proccnt > 0)
+			return (uip->ui_proccnt);
+		if (uip->ui_proccnt < 0)
+			panic("chgproccnt: procs < 0");
+		LIST_REMOVE(uip, ui_hash);
+		FREE(uip, M_PROC);
+		return (0);
+	}
+	if (diff <= 0) {
+		if (diff == 0)
+			return(0);
+		panic("chgproccnt: lost user");
+	}
+	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
+	LIST_INSERT_HEAD(uipp, uip, ui_hash);
+	uip->ui_uid = uid;
+	uip->ui_proccnt = diff;
+	return (diff);
+}
+
+/*
+ * Is p an inferior of the current process?
+ */
+inferior(p)
+	register struct proc *p;
+{
+
+	for (; p != curproc; p = p->p_pptr)
+		if (p->p_pid == 0)
+			return (0);
+	return (1);
+}
+
+/*
+ * Locate a process by number
+ */
+struct proc *
+pfind(pid)
+	register pid_t pid;
+{
+	register struct proc *p;
+
+	for (p = PIDHASH(pid)->lh_first; p != 0; p = p->p_hash.le_next)
+		if (p->p_pid == pid)
+			return (p);
+	return (NULL);
+}
+
+/*
+ * Locate a process group by number
+ */
+struct pgrp *
+pgfind(pgid)
+	register pid_t pgid;
+{
+	register struct pgrp *pgrp;
+
+	for (pgrp = PGRPHASH(pgid)->lh_first; pgrp != 0;
+	     pgrp = pgrp->pg_hash.le_next)
+		if (pgrp->pg_id == pgid)
+			return (pgrp);
+	return (NULL);
+}
+
+/*
+ * Move p to a new or existing process group (and session)
+ */
+int
+enterpgrp(p, pgid, mksess)
+	register struct proc *p;
+	pid_t pgid;
+	int mksess;
+{
+	register struct pgrp *pgrp = pgfind(pgid);
+
+#ifdef DIAGNOSTIC
+	if (pgrp != NULL && mksess)	/* firewalls */
+		panic("enterpgrp: setsid into non-empty pgrp");
+	if (SESS_LEADER(p))
+		panic("enterpgrp: session leader attempted setpgrp");
+#endif
+	if (pgrp == NULL) {
+		pid_t savepid = p->p_pid;
+		struct proc *np;
+		/*
+		 * new process group
+		 */
+#ifdef DIAGNOSTIC
+		if (p->p_pid != pgid)
+			panic("enterpgrp: new pgrp and pid != pgid");
+#endif
+		MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP,
+		    M_WAITOK);
+		if ((np = pfind(savepid)) == NULL || np != p)
+			return (ESRCH);
+		if (mksess) {
+			register struct session *sess;
+
+			/*
+			 * new session
+			 */
+			MALLOC(sess, struct session *, sizeof(struct session),
+			    M_SESSION, M_WAITOK);
+			sess->s_leader = p;
+			sess->s_count = 1;
+			sess->s_ttyvp = NULL;
+			sess->s_ttyp = NULL;
+			bcopy(p->p_session->s_login, sess->s_login,
+			    sizeof(sess->s_login));
+			p->p_flag &= ~P_CONTROLT;
+			pgrp->pg_session = sess;
+#ifdef DIAGNOSTIC
+			if (p != curproc)
+				panic("enterpgrp: mksession and p != curproc");
+#endif
+		} else {
+			pgrp->pg_session = p->p_session;
+			pgrp->pg_session->s_count++;
+		}
+		pgrp->pg_id = pgid;
+		LIST_INIT(&pgrp->pg_members);
+		LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
+		pgrp->pg_jobc = 0;
+	} else if (pgrp == p->p_pgrp)
+		return (0);
+
+	/*
+	 * Adjust eligibility of affected pgrps to participate in job control.
+	 * Increment eligibility counts before decrementing, otherwise we
+	 * could reach 0 spuriously during the first call.
+	 */
+	fixjobc(p, pgrp, 1);
+	fixjobc(p, p->p_pgrp, 0);
+
+	LIST_REMOVE(p, p_pglist);
+	if (p->p_pgrp->pg_members.lh_first == 0)
+		pgdelete(p->p_pgrp);
+	p->p_pgrp = pgrp;
+	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
+	return (0);
+}
+
+/*
+ * remove process from process group
+ */
+int
+leavepgrp(p)
+	register struct proc *p;
+{
+
+	LIST_REMOVE(p, p_pglist);
+	if (p->p_pgrp->pg_members.lh_first == 0)
+		pgdelete(p->p_pgrp);
+	p->p_pgrp = 0;
+	return (0);
+}
+
+/*
+ * delete a process group
+ */
+void
+pgdelete(pgrp)
+	register struct pgrp *pgrp;
+{
+
+	if (pgrp->pg_session->s_ttyp != NULL && 
+	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
+		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
+	LIST_REMOVE(pgrp, pg_hash);
+	if (--pgrp->pg_session->s_count == 0)
+		FREE(pgrp->pg_session, M_SESSION);
+	FREE(pgrp, M_PGRP);
+}
+
+static void orphanpg();
+
+/*
+ * Adjust pgrp jobc counters when specified process changes process group.
+ * We count the number of processes in each process group that "qualify"
+ * the group for terminal job control (those with a parent in a different
+ * process group of the same session).  If that count reaches zero, the
+ * process group becomes orphaned.  Check both the specified process'
+ * process group and that of its children.
+ * entering == 0 => p is leaving specified group.
+ * entering == 1 => p is entering specified group.
+ */
+void
+fixjobc(p, pgrp, entering)
+	register struct proc *p;
+	register struct pgrp *pgrp;
+	int entering;
+{
+	register struct pgrp *hispgrp;
+	register struct session *mysession = pgrp->pg_session;
+
+	/*
+	 * Check p's parent to see whether p qualifies its own process
+	 * group; if so, adjust count for p's process group.
+	 */
+	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
+	    hispgrp->pg_session == mysession)
+		if (entering)
+			pgrp->pg_jobc++;
+		else if (--pgrp->pg_jobc == 0)
+			orphanpg(pgrp);
+
+	/*
+	 * Check this process' children to see whether they qualify
+	 * their process groups; if so, adjust counts for children's
+	 * process groups.
+	 */
+	for (p = p->p_children.lh_first; p != 0; p = p->p_sibling.le_next)
+		if ((hispgrp = p->p_pgrp) != pgrp &&
+		    hispgrp->pg_session == mysession &&
+		    p->p_stat != SZOMB)
+			if (entering)
+				hispgrp->pg_jobc++;
+			else if (--hispgrp->pg_jobc == 0)
+				orphanpg(hispgrp);
+}
+
+/* 
+ * A process group has become orphaned;
+ * if there are any stopped processes in the group,
+ * hang-up all process in that group.
+ */
+static void
+orphanpg(pg)
+	struct pgrp *pg;
+{
+	register struct proc *p;
+
+	for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
+		if (p->p_stat == SSTOP) {
+			for (p = pg->pg_members.lh_first; p != 0;
+			    p = p->p_pglist.le_next) {
+				psignal(p, SIGHUP);
+				psignal(p, SIGCONT);
+			}
+			return;
+		}
+	}
+}
+
+#ifdef DEBUG
+pgrpdump()
+{
+	register struct pgrp *pgrp;
+	register struct proc *p;
+	register i;
+
+	for (i = 0; i <= pgrphash; i++) {
+		if (pgrp = pgrphashtbl[i].lh_first) {
+			printf("\tindx %d\n", i);
+			for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) {
+				printf("\tpgrp %x, pgid %d, sess %x, sesscnt %d, mem %x\n",
+				    pgrp, pgrp->pg_id, pgrp->pg_session,
+				    pgrp->pg_session->s_count,
+				    pgrp->pg_members.lh_first);
+				for (p = pgrp->pg_members.lh_first; p != 0;
+				    p = p->p_pglist.le_next) {
+					printf("\t\tpid %d addr %x pgrp %x\n", 
+					    p->p_pid, p, p->p_pgrp);
+				}
+			}
+		}
+	}
+}
+#endif /* DEBUG */
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
new file mode 100644
index 000000000000..29e4c679c6f2
--- /dev/null
+++ b/sys/kern/kern_prot.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_prot.c	8.9 (Berkeley) 2/14/95
+ */
+
+/*
+ * System calls related to processes and protection
+ */
+
+#include <sys/param.h>
+#include <sys/acct.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/malloc.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+/* ARGSUSED */
+int
+getpid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_pid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	retval[1] = p->p_pptr->p_pid;
+#endif
+	return (0);
+}
+
+/* ARGSUSED */
+int
+getppid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_pptr->p_pid;
+	return (0);
+}
+
+/* Get process group ID; note that POSIX getpgrp takes no parameter */
+int
+getpgrp(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_pgrp->pg_id;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+getuid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_cred->p_ruid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	retval[1] = p->p_ucred->cr_uid;
+#endif
+	return (0);
+}
+
+/* ARGSUSED */
+int
+geteuid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_ucred->cr_uid;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+getgid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_cred->p_rgid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	retval[1] = p->p_ucred->cr_groups[0];
+#endif
+	return (0);
+}
+
+/*
+ * Get effective group ID.  The "egid" is groups[0], and could be obtained
+ * via getgroups.  This syscall exists because it is somewhat painful to do
+ * correctly in a library function.
+ */
+/* ARGSUSED */
+int
+getegid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_ucred->cr_groups[0];
+	return (0);
+}
+
+int
+getgroups(p, uap, retval)
+	struct proc *p;
+	register struct getgroups_args /* {
+		syscallarg(u_int) gidsetsize;
+		syscallarg(gid_t *) gidset;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register u_int ngrp;
+	int error;
+
+	if ((ngrp = SCARG(uap, gidsetsize)) == 0) {
+		*retval = pc->pc_ucred->cr_ngroups;
+		return (0);
+	}
+	if (ngrp < pc->pc_ucred->cr_ngroups)
+		return (EINVAL);
+	ngrp = pc->pc_ucred->cr_ngroups;
+	if (error = copyout((caddr_t)pc->pc_ucred->cr_groups,
+	    (caddr_t)SCARG(uap, gidset), ngrp * sizeof(gid_t)))
+		return (error);
+	*retval = ngrp;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+setsid(p, uap, retval)
+	register struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	if (p->p_pgid == p->p_pid || pgfind(p->p_pid)) {
+		return (EPERM);
+	} else {
+		(void)enterpgrp(p, p->p_pid, 1);
+		*retval = p->p_pid;
+		return (0);
+	}
+}
+
+/*
+ * set process group (setpgid/old setpgrp)
+ *
+ * caller does setpgid(targpid, targpgid)
+ *
+ * pid must be caller or child of caller (ESRCH)
+ * if a child
+ *	pid must be in same session (EPERM)
+ *	pid can't have done an exec (EACCES)
+ * if pgid != pid
+ * 	there must exist some pid in same session having pgid (EPERM)
+ * pid must not be session leader (EPERM)
+ */
+/* ARGSUSED */
+int
+setpgid(curp, uap, retval)
+	struct proc *curp;
+	register struct setpgid_args /* {
+		syscallarg(int) pid;
+		syscallarg(int) pgid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct proc *targp;		/* target process */
+	register struct pgrp *pgrp;		/* target pgrp */
+
+	if (SCARG(uap, pid) != 0 && SCARG(uap, pid) != curp->p_pid) {
+		if ((targp = pfind(SCARG(uap, pid))) == 0 || !inferior(targp))
+			return (ESRCH);
+		if (targp->p_session != curp->p_session)
+			return (EPERM);
+		if (targp->p_flag & P_EXEC)
+			return (EACCES);
+	} else
+		targp = curp;
+	if (SESS_LEADER(targp))
+		return (EPERM);
+	if (SCARG(uap, pgid) == 0)
+		SCARG(uap, pgid) = targp->p_pid;
+	else if (SCARG(uap, pgid) != targp->p_pid)
+		if ((pgrp = pgfind(SCARG(uap, pgid))) == 0 ||
+	            pgrp->pg_session != curp->p_session)
+			return (EPERM);
+	return (enterpgrp(targp, SCARG(uap, pgid), 0));
+}
+
+/* ARGSUSED */
+int
+setuid(p, uap, retval)
+	struct proc *p;
+	struct setuid_args /* {
+		syscallarg(uid_t) uid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register uid_t uid;
+	int error;
+
+	uid = SCARG(uap, uid);
+	if (uid != pc->p_ruid &&
+	    (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	/*
+	 * Everything's okay, do it.
+	 * Transfer proc count to new user.
+	 * Copy credentials so other references do not see our changes.
+	 */
+	(void)chgproccnt(pc->p_ruid, -1);
+	(void)chgproccnt(uid, 1);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_uid = uid;
+	pc->p_ruid = uid;
+	pc->p_svuid = uid;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+seteuid(p, uap, retval)
+	struct proc *p;
+	struct seteuid_args /* {
+		syscallarg(uid_t) euid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register uid_t euid;
+	int error;
+
+	euid = SCARG(uap, euid);
+	if (euid != pc->p_ruid && euid != pc->p_svuid &&
+	    (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	/*
+	 * Everything's okay, do it.  Copy credentials so other references do
+	 * not see our changes.
+	 */
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_uid = euid;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+setgid(p, uap, retval)
+	struct proc *p;
+	struct setgid_args /* {
+		syscallarg(gid_t) gid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register gid_t gid;
+	int error;
+
+	gid = SCARG(uap, gid);
+	if (gid != pc->p_rgid && (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_groups[0] = gid;
+	pc->p_rgid = gid;
+	pc->p_svgid = gid;		/* ??? */
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+setegid(p, uap, retval)
+	struct proc *p;
+	struct setegid_args /* {
+		syscallarg(gid_t) egid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register gid_t egid;
+	int error;
+
+	egid = SCARG(uap, egid);
+	if (egid != pc->p_rgid && egid != pc->p_svgid &&
+	    (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_groups[0] = egid;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+setgroups(p, uap, retval)
+	struct proc *p;
+	struct setgroups_args /* {
+		syscallarg(u_int) gidsetsize;
+		syscallarg(gid_t *) gidset;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register u_int ngrp;
+	int error;
+
+	if (error = suser(pc->pc_ucred, &p->p_acflag))
+		return (error);
+	ngrp = SCARG(uap, gidsetsize);
+	if (ngrp < 1 || ngrp > NGROUPS)
+		return (EINVAL);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	if (error = copyin((caddr_t)SCARG(uap, gidset),
+	    (caddr_t)pc->pc_ucred->cr_groups, ngrp * sizeof(gid_t)))
+		return (error);
+	pc->pc_ucred->cr_ngroups = ngrp;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/* ARGSUSED */
+int
+compat_43_setreuid(p, uap, retval)
+	register struct proc *p;
+	struct compat_43_setreuid_args /* {
+		syscallarg(int) ruid;
+		syscallarg(int) euid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	union {
+		struct setuid_args sa;
+		struct seteuid_args ea;
+	} args;
+
+	/*
+	 * If ruid == euid then setreuid is being used to emulate setuid,
+	 * just do it.
+	 */
+	if (SCARG(uap, ruid) != -1 && SCARG(uap, ruid) == SCARG(uap, euid)) {
+		SCARG(&args.sa, uid) = SCARG(uap, ruid);
+		return (setuid(p, &args.sa, retval));
+	}
+	/*
+	 * Otherwise we assume that the intent of setting ruid is to be
+	 * able to get back ruid priviledge (i.e. swapping ruid and euid).
+	 * So we make sure that we will be able to do so, but do not
+	 * actually set the ruid.
+	 */
+	if (SCARG(uap, ruid) != (uid_t)-1 && SCARG(uap, ruid) != pc->p_ruid &&
+	    SCARG(uap, ruid) != pc->p_svuid)
+		return (EPERM);
+	if (SCARG(uap, euid) == (uid_t)-1)
+		return (0);
+	SCARG(&args.ea, euid) = SCARG(uap, euid);
+	return (seteuid(p, &args.ea, retval));
+}
+
+/* ARGSUSED */
+int
+compat_43_setregid(p, uap, retval)
+	register struct proc *p;
+	struct compat_43_setregid_args /* {
+		syscallarg(int) rgid;
+		syscallarg(int) egid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	union {
+		struct setgid_args sa;
+		struct setegid_args ea;
+	} args;
+
+	/*
+	 * If rgid == egid then setreuid is being used to emulate setgid,
+	 * just do it.
+	 */
+	if (SCARG(uap, rgid) != -1 && SCARG(uap, rgid) == SCARG(uap, egid)) {
+		SCARG(&args.sa, gid) = SCARG(uap, rgid);
+		return (setgid(p, &args.sa, retval));
+	}
+	/*
+	 * Otherwise we assume that the intent of setting rgid is to be
+	 * able to get back rgid priviledge (i.e. swapping rgid and egid).
+	 * So we make sure that we will be able to do so, but do not
+	 * actually set the rgid.
+	 */
+	if (SCARG(uap, rgid) != (gid_t)-1 && SCARG(uap, rgid) != pc->p_rgid &&
+	    SCARG(uap, rgid) != pc->p_svgid)
+		return (EPERM);
+	if (SCARG(uap, egid) == (gid_t)-1)
+		return (0);
+	SCARG(&args.ea, egid) = SCARG(uap, egid);
+	return (setegid(p, &args.ea, retval));
+}
+#endif /* defined(COMPAT_43) || defined(COMPAT_SUNOS) */
+
+/*
+ * Check if gid is a member of the group set.
+ */
+int
+groupmember(gid, cred)
+	gid_t gid;
+	register struct ucred *cred;
+{
+	register gid_t *gp;
+	gid_t *egp;
+
+	egp = &(cred->cr_groups[cred->cr_ngroups]);
+	for (gp = cred->cr_groups; gp < egp; gp++)
+		if (*gp == gid)
+			return (1);
+	return (0);
+}
+
+/*
+ * Test whether the specified credentials imply "super-user"
+ * privilege; if so, and we have accounting info, set the flag
+ * indicating use of super-powers.
+ * Returns 0 or error.
+ */
+int
+suser(cred, acflag)
+	struct ucred *cred;
+	u_short *acflag;
+{
+	if (cred->cr_uid == 0) {
+		if (acflag)
+			*acflag |= ASU;
+		return (0);
+	}
+	return (EPERM);
+}
+
+/*
+ * Allocate a zeroed cred structure.
+ */
+struct ucred *
+crget()
+{
+	register struct ucred *cr;
+
+	MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK);
+	bzero((caddr_t)cr, sizeof(*cr));
+	cr->cr_ref = 1;
+	return (cr);
+}
+
+/*
+ * Free a cred structure.
+ * Throws away space when ref count gets to 0.
+ */
+void
+crfree(cr)
+	struct ucred *cr;
+{
+	int s;
+
+	s = splimp();				/* ??? */
+	if (--cr->cr_ref == 0)
+		FREE((caddr_t)cr, M_CRED);
+	(void) splx(s);
+}
+
+/*
+ * Copy cred structure to a new one and free the old one.
+ */
+struct ucred *
+crcopy(cr)
+	struct ucred *cr;
+{
+	struct ucred *newcr;
+
+	if (cr->cr_ref == 1)
+		return (cr);
+	newcr = crget();
+	*newcr = *cr;
+	crfree(cr);
+	newcr->cr_ref = 1;
+	return (newcr);
+}
+
+/*
+ * Dup cred struct to a new held one.
+ */
+struct ucred *
+crdup(cr)
+	struct ucred *cr;
+{
+	struct ucred *newcr;
+
+	newcr = crget();
+	*newcr = *cr;
+	newcr->cr_ref = 1;
+	return (newcr);
+}
+
+/*
+ * Get login name, if available.
+ */
+/* ARGSUSED */
+int
+getlogin(p, uap, retval)
+	struct proc *p;
+	struct getlogin_args /* {
+		syscallarg(char *) namebuf;
+		syscallarg(u_int) namelen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	if (SCARG(uap, namelen) > sizeof (p->p_pgrp->pg_session->s_login))
+		SCARG(uap, namelen) = sizeof (p->p_pgrp->pg_session->s_login);
+	return (copyout((caddr_t) p->p_pgrp->pg_session->s_login,
+	    (caddr_t) SCARG(uap, namebuf), SCARG(uap, namelen)));
+}
+
+/*
+ * Set login name.
+ */
+/* ARGSUSED */
+int
+setlogin(p, uap, retval)
+	struct proc *p;
+	struct setlogin_args /* {
+		syscallarg(char *) namebuf;
+	} */ *uap;
+	register_t *retval;
+{
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	error = copyinstr((caddr_t) SCARG(uap, namebuf),
+	    (caddr_t) p->p_pgrp->pg_session->s_login,
+	    sizeof (p->p_pgrp->pg_session->s_login) - 1, (u_int *)0);
+	if (error == ENAMETOOLONG)
+		error = EINVAL;
+	return (error);
+}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
new file mode 100644
index 000000000000..569b9d973a10
--- /dev/null
+++ b/sys/kern/kern_resource.c
@@ -0,0 +1,489 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_resource.c	8.8 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/resourcevar.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+#include <vm/vm.h>
+
+int	donice __P((struct proc *curp, struct proc *chgp, int n));
+int	dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
+
+/*
+ * Resource controls and accounting.
+ */
+
+int
+getpriority(curp, uap, retval)
+	struct proc *curp;
+	register struct getpriority_args /* {
+		syscallarg(int) which;
+		syscallarg(int) who;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct proc *p;
+	register int low = PRIO_MAX + 1;
+
+	switch (SCARG(uap, which)) {
+
+	case PRIO_PROCESS:
+		if (SCARG(uap, who) == 0)
+			p = curp;
+		else
+			p = pfind(SCARG(uap, who));
+		if (p == 0)
+			break;
+		low = p->p_nice;
+		break;
+
+	case PRIO_PGRP: {
+		register struct pgrp *pg;
+
+		if (SCARG(uap, who) == 0)
+			pg = curp->p_pgrp;
+		else if ((pg = pgfind(SCARG(uap, who))) == NULL)
+			break;
+		for (p = pg->pg_members.lh_first; p != 0;
+		     p = p->p_pglist.le_next) {
+			if (p->p_nice < low)
+				low = p->p_nice;
+		}
+		break;
+	}
+
+	case PRIO_USER:
+		if (SCARG(uap, who) == 0)
+			SCARG(uap, who) = curp->p_ucred->cr_uid;
+		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
+			if (p->p_ucred->cr_uid == SCARG(uap, who) &&
+			    p->p_nice < low)
+				low = p->p_nice;
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	if (low == PRIO_MAX + 1)
+		return (ESRCH);
+	*retval = low;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+setpriority(curp, uap, retval)
+	struct proc *curp;
+	register struct setpriority_args /* {
+		syscallarg(int) which;
+		syscallarg(int) who;
+		syscallarg(int) prio;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct proc *p;
+	int found = 0, error = 0;
+
+	switch (SCARG(uap, which)) {
+
+	case PRIO_PROCESS:
+		if (SCARG(uap, who) == 0)
+			p = curp;
+		else
+			p = pfind(SCARG(uap, who));
+		if (p == 0)
+			break;
+		error = donice(curp, p, SCARG(uap, prio));
+		found++;
+		break;
+
+	case PRIO_PGRP: {
+		register struct pgrp *pg;
+		 
+		if (SCARG(uap, who) == 0)
+			pg = curp->p_pgrp;
+		else if ((pg = pgfind(SCARG(uap, who))) == NULL)
+			break;
+		for (p = pg->pg_members.lh_first; p != 0;
+		    p = p->p_pglist.le_next) {
+			error = donice(curp, p, SCARG(uap, prio));
+			found++;
+		}
+		break;
+	}
+
+	case PRIO_USER:
+		if (SCARG(uap, who) == 0)
+			SCARG(uap, who) = curp->p_ucred->cr_uid;
+		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next)
+			if (p->p_ucred->cr_uid == SCARG(uap, who)) {
+				error = donice(curp, p, SCARG(uap, prio));
+				found++;
+			}
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	if (found == 0)
+		return (ESRCH);
+	return (error);
+}
+
+int
+donice(curp, chgp, n)
+	register struct proc *curp, *chgp;
+	register int n;
+{
+	register struct pcred *pcred = curp->p_cred;
+
+	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
+	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
+	    pcred->p_ruid != chgp->p_ucred->cr_uid)
+		return (EPERM);
+	if (n > PRIO_MAX)
+		n = PRIO_MAX;
+	if (n < PRIO_MIN)
+		n = PRIO_MIN;
+	if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
+		return (EACCES);
+	chgp->p_nice = n;
+	(void)resetpriority(chgp);
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/* ARGSUSED */
+int
+compat_43_setrlimit(p, uap, retval)
+	struct proc *p;
+	struct compat_43_setrlimit_args /* {
+		syscallarg(u_int) which;
+		syscallarg(struct ogetrlimit *) rlp;
+	} */ *uap;
+	register_t *retval;
+{
+	struct orlimit olim;
+	struct rlimit lim;
+	int error;
+
+	if (error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&olim,
+	    sizeof (struct orlimit)))
+		return (error);
+	lim.rlim_cur = olim.rlim_cur;
+	lim.rlim_max = olim.rlim_max;
+	return (dosetrlimit(p, SCARG(uap, which), &lim));
+}
+
+/* ARGSUSED */
+int
+compat_43_getrlimit(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_getrlimit_args /* {
+		syscallarg(u_int) which;
+		syscallarg(struct ogetrlimit *) rlp;
+	} */ *uap;
+	register_t *retval;
+{
+	struct orlimit olim;
+
+	if (SCARG(uap, which) >= RLIM_NLIMITS)
+		return (EINVAL);
+	olim.rlim_cur = p->p_rlimit[SCARG(uap, which)].rlim_cur;
+	if (olim.rlim_cur == -1)
+		olim.rlim_cur = 0x7fffffff;
+	olim.rlim_max = p->p_rlimit[SCARG(uap, which)].rlim_max;
+	if (olim.rlim_max == -1)
+		olim.rlim_max = 0x7fffffff;
+	return (copyout((caddr_t)&olim, (caddr_t)SCARG(uap, rlp),
+	    sizeof(olim)));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/* ARGSUSED */
+int
+setrlimit(p, uap, retval)
+	struct proc *p;
+	register struct setrlimit_args /* {
+		syscallarg(u_int) which;
+		syscallarg(struct rlimit *) rlp;
+	} */ *uap;
+	register_t *retval;
+{
+	struct rlimit alim;
+	int error;
+
+	if (error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim,
+	    sizeof (struct rlimit)))
+		return (error);
+	return (dosetrlimit(p, SCARG(uap, which), &alim));
+}
+
+int
+dosetrlimit(p, which, limp)
+	struct proc *p;
+	u_int which;
+	struct rlimit *limp;
+{
+	register struct rlimit *alimp;
+	extern unsigned maxdmap;
+	int error;
+
+	if (which >= RLIM_NLIMITS)
+		return (EINVAL);
+	alimp = &p->p_rlimit[which];
+	if (limp->rlim_cur > alimp->rlim_max || 
+	    limp->rlim_max > alimp->rlim_max)
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+	if (limp->rlim_cur > limp->rlim_max)
+		limp->rlim_cur = limp->rlim_max;
+	if (p->p_limit->p_refcnt > 1 &&
+	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
+		p->p_limit->p_refcnt--;
+		p->p_limit = limcopy(p->p_limit);
+		alimp = &p->p_rlimit[which];
+	}
+
+	switch (which) {
+
+	case RLIMIT_DATA:
+		if (limp->rlim_cur > maxdmap)
+			limp->rlim_cur = maxdmap;
+		if (limp->rlim_max > maxdmap)
+			limp->rlim_max = maxdmap;
+		break;
+
+	case RLIMIT_STACK:
+		if (limp->rlim_cur > maxdmap)
+			limp->rlim_cur = maxdmap;
+		if (limp->rlim_max > maxdmap)
+			limp->rlim_max = maxdmap;
+		/*
+		 * Stack is allocated to the max at exec time with only
+		 * "rlim_cur" bytes accessible.  If stack limit is going
+		 * up make more accessible, if going down make inaccessible.
+		 */
+		if (limp->rlim_cur != alimp->rlim_cur) {
+			vm_offset_t addr;
+			vm_size_t size;
+			vm_prot_t prot;
+
+			if (limp->rlim_cur > alimp->rlim_cur) {
+				prot = VM_PROT_ALL;
+				size = limp->rlim_cur - alimp->rlim_cur;
+				addr = USRSTACK - limp->rlim_cur;
+			} else {
+				prot = VM_PROT_NONE;
+				size = alimp->rlim_cur - limp->rlim_cur;
+				addr = USRSTACK - alimp->rlim_cur;
+			}
+			addr = trunc_page(addr);
+			size = round_page(size);
+			(void) vm_map_protect(&p->p_vmspace->vm_map,
+					      addr, addr+size, prot, FALSE);
+		}
+		break;
+
+	case RLIMIT_NOFILE:
+		if (limp->rlim_cur > maxfiles)
+			limp->rlim_cur = maxfiles;
+		if (limp->rlim_max > maxfiles)
+			limp->rlim_max = maxfiles;
+		break;
+
+	case RLIMIT_NPROC:
+		if (limp->rlim_cur > maxproc)
+			limp->rlim_cur = maxproc;
+		if (limp->rlim_max > maxproc)
+			limp->rlim_max = maxproc;
+		break;
+	}
+	*alimp = *limp;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+getrlimit(p, uap, retval)
+	struct proc *p;
+	register struct getrlimit_args /* {
+		syscallarg(u_int) which;
+		syscallarg(struct rlimit *) rlp;
+	} */ *uap;
+	register_t *retval;
+{
+
+	if (SCARG(uap, which) >= RLIM_NLIMITS)
+		return (EINVAL);
+	return (copyout((caddr_t)&p->p_rlimit[SCARG(uap, which)],
+	    (caddr_t)SCARG(uap, rlp), sizeof (struct rlimit)));
+}
+
+/*
+ * Transform the running time and tick information in proc p into user,
+ * system, and interrupt time usage.
+ */
+void
+calcru(p, up, sp, ip)
+	register struct proc *p;
+	register struct timeval *up;
+	register struct timeval *sp;
+	register struct timeval *ip;
+{
+	register u_quad_t u, st, ut, it, tot;
+	register u_long sec, usec;
+	register int s;
+	struct timeval tv;
+
+	s = splstatclock();
+	st = p->p_sticks;
+	ut = p->p_uticks;
+	it = p->p_iticks;
+	splx(s);
+
+	tot = st + ut + it;
+	if (tot == 0) {
+		up->tv_sec = up->tv_usec = 0;
+		sp->tv_sec = sp->tv_usec = 0;
+		if (ip != NULL)
+			ip->tv_sec = ip->tv_usec = 0;
+		return;
+	}
+
+	sec = p->p_rtime.tv_sec;
+	usec = p->p_rtime.tv_usec;
+	if (p == curproc) {
+		/*
+		 * Adjust for the current time slice.  This is actually fairly
+		 * important since the error here is on the order of a time
+		 * quantum, which is much greater than the sampling error.
+		 */
+		microtime(&tv);
+		sec += tv.tv_sec - runtime.tv_sec;
+		usec += tv.tv_usec - runtime.tv_usec;
+	}
+	u = sec * 1000000 + usec;
+	st = (u * st) / tot;
+	sp->tv_sec = st / 1000000;
+	sp->tv_usec = st % 1000000;
+	ut = (u * ut) / tot;
+	up->tv_sec = ut / 1000000;
+	up->tv_usec = ut % 1000000;
+	if (ip != NULL) {
+		it = (u * it) / tot;
+		ip->tv_sec = it / 1000000;
+		ip->tv_usec = it % 1000000;
+	}
+}
+
+/* ARGSUSED */
+int
+getrusage(p, uap, retval)
+	register struct proc *p;
+	register struct getrusage_args /* {
+		syscallarg(int) who;
+		syscallarg(struct rusage *) rusage;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct rusage *rup;
+
+	switch (SCARG(uap, who)) {
+
+	case RUSAGE_SELF:
+		rup = &p->p_stats->p_ru;
+		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
+		break;
+
+	case RUSAGE_CHILDREN:
+		rup = &p->p_stats->p_cru;
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	return (copyout((caddr_t)rup, (caddr_t)SCARG(uap, rusage),
+	    sizeof (struct rusage)));
+}
+
+void
+ruadd(ru, ru2)
+	register struct rusage *ru, *ru2;
+{
+	register long *ip, *ip2;
+	register int i;
+
+	timevaladd(&ru->ru_utime, &ru2->ru_utime);
+	timevaladd(&ru->ru_stime, &ru2->ru_stime);
+	if (ru->ru_maxrss < ru2->ru_maxrss)
+		ru->ru_maxrss = ru2->ru_maxrss;
+	ip = &ru->ru_first; ip2 = &ru2->ru_first;
+	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
+		*ip++ += *ip2++;
+}
+
+/*
+ * Make a copy of the plimit structure.
+ * We share these structures copy-on-write after fork,
+ * and copy when a limit is changed.
+ */
+struct plimit *
+limcopy(lim)
+	struct plimit *lim;
+{
+	register struct plimit *copy;
+
+	MALLOC(copy, struct plimit *, sizeof(struct plimit),
+	    M_SUBPROC, M_WAITOK);
+	bcopy(lim->pl_rlimit, copy->pl_rlimit,
+	    sizeof(struct rlimit) * RLIM_NLIMITS);
+	copy->p_lflags = 0;
+	copy->p_refcnt = 1;
+	return (copy);
+}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
new file mode 100644
index 000000000000..5683b9c7c935
--- /dev/null
+++ b/sys/kern/kern_sig.c
@@ -0,0 +1,1219 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_sig.c	8.14 (Berkeley) 5/14/95
+ */
+
+#define	SIGPROP		/* include signal properties table */
+#include <sys/param.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/file.h>
+#include <sys/kernel.h>
+#include <sys/wait.h>
+#include <sys/ktrace.h>
+#include <sys/syslog.h>
+#include <sys/stat.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+#include <sys/user.h>		/* for coredump */
+
+void stop __P((struct proc *p));
+
+/*
+ * Can process p, with pcred pc, send the signal signum to process q?
+ */
+#define CANSIGNAL(p, pc, q, signum) \
+	((pc)->pc_ucred->cr_uid == 0 || \
+	    (pc)->p_ruid == (q)->p_cred->p_ruid || \
+	    (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \
+	    (pc)->p_ruid == (q)->p_ucred->cr_uid || \
+	    (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \
+	    ((signum) == SIGCONT && (q)->p_session == (p)->p_session))
+
+/* ARGSUSED */
+int
+sigaction(p, uap, retval)
+	struct proc *p;
+	register struct sigaction_args /* {
+		syscallarg(int) signum;
+		syscallarg(struct sigaction *) nsa;
+		syscallarg(struct sigaction *) osa;
+	} */ *uap;
+	register_t *retval;
+{
+	struct sigaction vec;
+	register struct sigaction *sa;
+	register struct sigacts *ps = p->p_sigacts;
+	register int signum;
+	int bit, error;
+
+	signum = SCARG(uap, signum);
+	if (signum <= 0 || signum >= NSIG ||
+	    signum == SIGKILL || signum == SIGSTOP)
+		return (EINVAL);
+	sa = &vec;
+	if (SCARG(uap, osa)) {
+		sa->sa_handler = ps->ps_sigact[signum];
+		sa->sa_mask = ps->ps_catchmask[signum];
+		bit = sigmask(signum);
+		sa->sa_flags = 0;
+		if ((ps->ps_sigonstack & bit) != 0)
+			sa->sa_flags |= SA_ONSTACK;
+		if ((ps->ps_sigintr & bit) == 0)
+			sa->sa_flags |= SA_RESTART;
+		if (p->p_flag & P_NOCLDSTOP)
+			sa->sa_flags |= SA_NOCLDSTOP;
+		if (error = copyout((caddr_t)sa, (caddr_t)SCARG(uap, osa),
+		    sizeof (vec)))
+			return (error);
+	}
+	if (SCARG(uap, nsa)) {
+		if (error = copyin((caddr_t)SCARG(uap, nsa), (caddr_t)sa,
+		    sizeof (vec)))
+			return (error);
+		setsigvec(p, signum, sa);
+	}
+	return (0);
+}
+
+void
+setsigvec(p, signum, sa)
+	register struct proc *p;
+	int signum;
+	register struct sigaction *sa;
+{
+	register struct sigacts *ps = p->p_sigacts;
+	register int bit;
+
+	bit = sigmask(signum);
+	/*
+	 * Change setting atomically.
+	 */
+	(void) splhigh();
+	ps->ps_sigact[signum] = sa->sa_handler;
+	ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask;
+	if ((sa->sa_flags & SA_RESTART) == 0)
+		ps->ps_sigintr |= bit;
+	else
+		ps->ps_sigintr &= ~bit;
+	if (sa->sa_flags & SA_ONSTACK)
+		ps->ps_sigonstack |= bit;
+	else
+		ps->ps_sigonstack &= ~bit;
+#ifdef COMPAT_SUNOS
+	if (sa->sa_flags & SA_USERTRAMP)
+		ps->ps_usertramp |= bit;
+	else
+		ps->ps_usertramp &= ~bit;
+#endif
+	if (signum == SIGCHLD) {
+		if (sa->sa_flags & SA_NOCLDSTOP)
+			p->p_flag |= P_NOCLDSTOP;
+		else
+			p->p_flag &= ~P_NOCLDSTOP;
+	}
+	/*
+	 * Set bit in p_sigignore for signals that are set to SIG_IGN,
+	 * and for signals set to SIG_DFL where the default is to ignore.
+	 * However, don't put SIGCONT in p_sigignore,
+	 * as we have to restart the process.
+	 */
+	if (sa->sa_handler == SIG_IGN ||
+	    (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) {
+		p->p_siglist &= ~bit;		/* never to be seen again */
+		if (signum != SIGCONT)
+			p->p_sigignore |= bit;	/* easier in psignal */
+		p->p_sigcatch &= ~bit;
+	} else {
+		p->p_sigignore &= ~bit;
+		if (sa->sa_handler == SIG_DFL)
+			p->p_sigcatch &= ~bit;
+		else
+			p->p_sigcatch |= bit;
+	}
+	(void) spl0();
+}
+
+/*
+ * Initialize signal state for process 0;
+ * set to ignore signals that are ignored by default.
+ */
+void
+siginit(p)
+	struct proc *p;
+{
+	register int i;
+
+	for (i = 0; i < NSIG; i++)
+		if (sigprop[i] & SA_IGNORE && i != SIGCONT)
+			p->p_sigignore |= sigmask(i);
+}
+
+/*
+ * Reset signals for an exec of the specified process.
+ */
+void
+execsigs(p)
+	register struct proc *p;
+{
+	register struct sigacts *ps = p->p_sigacts;
+	register int nc, mask;
+
+	/*
+	 * Reset caught signals.  Held signals remain held
+	 * through p_sigmask (unless they were caught,
+	 * and are now ignored by default).
+	 */
+	while (p->p_sigcatch) {
+		nc = ffs((long)p->p_sigcatch);
+		mask = sigmask(nc);
+		p->p_sigcatch &= ~mask;
+		if (sigprop[nc] & SA_IGNORE) {
+			if (nc != SIGCONT)
+				p->p_sigignore |= mask;
+			p->p_siglist &= ~mask;
+		}
+		ps->ps_sigact[nc] = SIG_DFL;
+	}
+	/*
+	 * Reset stack state to the user stack.
+	 * Clear set of signals caught on the signal stack.
+	 */
+	ps->ps_sigstk.ss_flags = SA_DISABLE;
+	ps->ps_sigstk.ss_size = 0;
+	ps->ps_sigstk.ss_base = 0;
+	ps->ps_flags = 0;
+}
+
+/*
+ * Manipulate signal mask.
+ * Note that we receive new mask, not pointer,
+ * and return old mask as return value;
+ * the library stub does the rest.
+ */
+int
+sigprocmask(p, uap, retval)
+	register struct proc *p;
+	struct sigprocmask_args /* {
+		syscallarg(int) how;
+		syscallarg(sigset_t) mask;
+	} */ *uap;
+	register_t *retval;
+{
+	int error = 0;
+
+	*retval = p->p_sigmask;
+	(void) splhigh();
+
+	switch (SCARG(uap, how)) {
+	case SIG_BLOCK:
+		p->p_sigmask |= SCARG(uap, mask) &~ sigcantmask;
+		break;
+
+	case SIG_UNBLOCK:
+		p->p_sigmask &= ~SCARG(uap, mask);
+		break;
+
+	case SIG_SETMASK:
+		p->p_sigmask = SCARG(uap, mask) &~ sigcantmask;
+		break;
+	
+	default:
+		error = EINVAL;
+		break;
+	}
+	(void) spl0();
+	return (error);
+}
+
+/* ARGSUSED */
+int
+sigpending(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*retval = p->p_siglist;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Generalized interface signal handler, 4.3-compatible.
+ */
+/* ARGSUSED */
+int
+compat_43_sigvec(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_sigvec_args /* {
+		syscallarg(int) signum;
+		syscallarg(struct sigvec *) nsv;
+		syscallarg(struct sigvec *) osv;
+	} */ *uap;
+	register_t *retval;
+{
+	struct sigvec vec;
+	register struct sigacts *ps = p->p_sigacts;
+	register struct sigvec *sv;
+	register int signum;
+	int bit, error;
+
+	signum = SCARG(uap, signum);
+	if (signum <= 0 || signum >= NSIG ||
+	    signum == SIGKILL || signum == SIGSTOP)
+		return (EINVAL);
+	sv = &vec;
+	if (SCARG(uap, osv)) {
+		*(sig_t *)&sv->sv_handler = ps->ps_sigact[signum];
+		sv->sv_mask = ps->ps_catchmask[signum];
+		bit = sigmask(signum);
+		sv->sv_flags = 0;
+		if ((ps->ps_sigonstack & bit) != 0)
+			sv->sv_flags |= SV_ONSTACK;
+		if ((ps->ps_sigintr & bit) != 0)
+			sv->sv_flags |= SV_INTERRUPT;
+#ifndef COMPAT_SUNOS
+		if (p->p_flag & P_NOCLDSTOP)
+			sv->sv_flags |= SA_NOCLDSTOP;
+#endif
+		if (error = copyout((caddr_t)sv, (caddr_t)SCARG(uap, osv),
+		    sizeof (vec)))
+			return (error);
+	}
+	if (SCARG(uap, nsv)) {
+		if (error = copyin((caddr_t)SCARG(uap, nsv), (caddr_t)sv,
+		    sizeof (vec)))
+			return (error);
+#ifdef COMPAT_SUNOS
+		/*
+		 * SunOS uses this bit (4, aka SA_DISABLE) as SV_RESETHAND,
+		 * `reset to SIG_DFL on delivery'. We have no such option
+		 * now or ever!
+		 */
+		if (sv->sv_flags & SA_DISABLE)
+			return (EINVAL);
+		sv->sv_flags |= SA_USERTRAMP;
+#endif
+		sv->sv_flags ^= SA_RESTART;	/* opposite of SV_INTERRUPT */
+		setsigvec(p, signum, (struct sigaction *)sv);
+	}
+	return (0);
+}
+
+int
+compat_43_sigblock(p, uap, retval)
+	register struct proc *p;
+	struct compat_43_sigblock_args /* {
+		syscallarg(int) mask;
+	} */ *uap;
+	register_t *retval;
+{
+
+	(void) splhigh();
+	*retval = p->p_sigmask;
+	p->p_sigmask |= SCARG(uap, mask) &~ sigcantmask;
+	(void) spl0();
+	return (0);
+}
+
+int
+compat_43_sigsetmask(p, uap, retval)
+	struct proc *p;
+	struct compat_43_sigsetmask_args /* {
+		syscallarg(int) mask;
+	} */ *uap;
+	register_t *retval;
+{
+
+	(void) splhigh();
+	*retval = p->p_sigmask;
+	p->p_sigmask = SCARG(uap, mask) &~ sigcantmask;
+	(void) spl0();
+	return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Suspend process until signal, providing mask to be set
+ * in the meantime.  Note nonstandard calling convention:
+ * libc stub passes mask, not pointer, to save a copyin.
+ */
+/* ARGSUSED */
+int
+sigsuspend(p, uap, retval)
+	register struct proc *p;
+	struct sigsuspend_args /* {
+		syscallarg(int) mask;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct sigacts *ps = p->p_sigacts;
+
+	/*
+	 * When returning from sigpause, we want
+	 * the old mask to be restored after the
+	 * signal handler has finished.  Thus, we
+	 * save it here and mark the sigacts structure
+	 * to indicate this.
+	 */
+	ps->ps_oldmask = p->p_sigmask;
+	ps->ps_flags |= SAS_OLDMASK;
+	p->p_sigmask = SCARG(uap, mask) &~ sigcantmask;
+	while (tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0) == 0)
+		/* void */;
+	/* always return EINTR rather than ERESTART... */
+	return (EINTR);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/* ARGSUSED */
+int
+compat_43_sigstack(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_sigstack_args /* {
+		syscallarg(struct sigstack *) nss;
+		syscallarg(struct sigstack *) oss;
+	} */ *uap;
+	register_t *retval;
+{
+	struct sigstack ss;
+	struct sigacts *psp;
+	int error = 0;
+
+	psp = p->p_sigacts;
+	ss.ss_sp = psp->ps_sigstk.ss_base;
+	ss.ss_onstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
+	if (SCARG(uap, oss) && (error = copyout((caddr_t)&ss,
+	    (caddr_t)SCARG(uap, oss), sizeof (struct sigstack))))
+		return (error);
+	if (SCARG(uap, nss) && (error = copyin((caddr_t)SCARG(uap, nss),
+	    (caddr_t)&ss, sizeof (ss))) == 0) {
+		psp->ps_sigstk.ss_base = ss.ss_sp;
+		psp->ps_sigstk.ss_size = 0;
+		psp->ps_sigstk.ss_flags |= ss.ss_onstack & SA_ONSTACK;
+		psp->ps_flags |= SAS_ALTSTACK;
+	}
+	return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/* ARGSUSED */
+int
+sigaltstack(p, uap, retval)
+	struct proc *p;
+	register struct sigaltstack_args /* {
+		syscallarg(struct sigaltstack *) nss;
+		syscallarg(struct sigaltstack *) oss;
+	} */ *uap;
+	register_t *retval;
+{
+	struct sigacts *psp;
+	struct sigaltstack ss;
+	int error;
+
+	psp = p->p_sigacts;
+	if ((psp->ps_flags & SAS_ALTSTACK) == 0)
+		psp->ps_sigstk.ss_flags |= SA_DISABLE;
+	if (SCARG(uap, oss) && (error = copyout((caddr_t)&psp->ps_sigstk,
+	    (caddr_t)SCARG(uap, oss), sizeof (struct sigaltstack))))
+		return (error);
+	if (SCARG(uap, nss) == 0)
+		return (0);
+	if (error = copyin((caddr_t)SCARG(uap, nss), (caddr_t)&ss,
+	    sizeof (ss)))
+		return (error);
+	if (ss.ss_flags & SA_DISABLE) {
+		if (psp->ps_sigstk.ss_flags & SA_ONSTACK)
+			return (EINVAL);
+		psp->ps_flags &= ~SAS_ALTSTACK;
+		psp->ps_sigstk.ss_flags = ss.ss_flags;
+		return (0);
+	}
+	if (ss.ss_size < MINSIGSTKSZ)
+		return (ENOMEM);
+	psp->ps_flags |= SAS_ALTSTACK;
+	psp->ps_sigstk= ss;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+kill(cp, uap, retval)
+	register struct proc *cp;
+	register struct kill_args /* {
+		syscallarg(int) pid;
+		syscallarg(int) signum;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct proc *p;
+	register struct pcred *pc = cp->p_cred;
+
+	if ((u_int)SCARG(uap, signum) >= NSIG)
+		return (EINVAL);
+	if (SCARG(uap, pid) > 0) {
+		/* kill single process */
+		if ((p = pfind(SCARG(uap, pid))) == NULL)
+			return (ESRCH);
+		if (!CANSIGNAL(cp, pc, p, SCARG(uap, signum)))
+			return (EPERM);
+		if (SCARG(uap, signum))
+			psignal(p, SCARG(uap, signum));
+		return (0);
+	}
+	switch (SCARG(uap, pid)) {
+	case -1:		/* broadcast signal */
+		return (killpg1(cp, SCARG(uap, signum), 0, 1));
+	case 0:			/* signal own process group */
+		return (killpg1(cp, SCARG(uap, signum), 0, 0));
+	default:		/* negative explicit process group */
+		return (killpg1(cp, SCARG(uap, signum), -SCARG(uap, pid), 0));
+	}
+	/* NOTREACHED */
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/* ARGSUSED */
+int
+compat_43_killpg(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_killpg_args /* {
+		syscallarg(int) pgid;
+		syscallarg(int) signum;
+	} */ *uap;
+	register_t *retval;
+{
+
+	if ((u_int)SCARG(uap, signum) >= NSIG)
+		return (EINVAL);
+	return (killpg1(p, SCARG(uap, signum), SCARG(uap, pgid), 0));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Common code for kill process group/broadcast kill.
+ * cp is calling process.
+ */
+int
+killpg1(cp, signum, pgid, all)
+	register struct proc *cp;
+	int signum, pgid, all;
+{
+	register struct proc *p;
+	register struct pcred *pc = cp->p_cred;
+	struct pgrp *pgrp;
+	int nfound = 0;
+	
+	if (all)	
+		/* 
+		 * broadcast 
+		 */
+		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || 
+			    p == cp || !CANSIGNAL(cp, pc, p, signum))
+				continue;
+			nfound++;
+			if (signum)
+				psignal(p, signum);
+		}
+	else {
+		if (pgid == 0)		
+			/* 
+			 * zero pgid means send to my process group.
+			 */
+			pgrp = cp->p_pgrp;
+		else {
+			pgrp = pgfind(pgid);
+			if (pgrp == NULL)
+				return (ESRCH);
+		}
+		for (p = pgrp->pg_members.lh_first; p != 0;
+		     p = p->p_pglist.le_next) {
+			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
+			    p->p_stat == SZOMB ||
+			    !CANSIGNAL(cp, pc, p, signum))
+				continue;
+			nfound++;
+			if (signum)
+				psignal(p, signum);
+		}
+	}
+	return (nfound ? 0 : ESRCH);
+}
+
+/*
+ * Send a signal to a process group.
+ */
+void
+gsignal(pgid, signum)
+	int pgid, signum;
+{
+	struct pgrp *pgrp;
+
+	if (pgid && (pgrp = pgfind(pgid)))
+		pgsignal(pgrp, signum, 0);
+}
+
+/*
+ * Send a signal to a process group.  If checktty is 1,
+ * limit to members which have a controlling terminal.
+ */
+void
+pgsignal(pgrp, signum, checkctty)
+	struct pgrp *pgrp;
+	int signum, checkctty;
+{
+	register struct proc *p;
+
+	if (pgrp)
+		for (p = pgrp->pg_members.lh_first; p != 0;
+		     p = p->p_pglist.le_next)
+			if (checkctty == 0 || p->p_flag & P_CONTROLT)
+				psignal(p, signum);
+}
+
+/*
+ * Send a signal caused by a trap to the current process.
+ * If it will be caught immediately, deliver it with correct code.
+ * Otherwise, post it normally.
+ */
+void
+trapsignal(p, signum, code)
+	struct proc *p;
+	register int signum;
+	u_long code;
+{
+	register struct sigacts *ps = p->p_sigacts;
+	int mask;
+
+	mask = sigmask(signum);
+	if ((p->p_flag & P_TRACED) == 0 && (p->p_sigcatch & mask) != 0 &&
+	    (p->p_sigmask & mask) == 0) {
+		p->p_stats->p_ru.ru_nsignals++;
+#ifdef KTRACE
+		if (KTRPOINT(p, KTR_PSIG))
+			ktrpsig(p->p_tracep, signum, ps->ps_sigact[signum], 
+				p->p_sigmask, code);
+#endif
+		sendsig(ps->ps_sigact[signum], signum, p->p_sigmask, code);
+		p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+	} else {
+		ps->ps_code = code;	/* XXX for core dump/debugger */
+		ps->ps_sig = signum;	/* XXX to verify code */
+		psignal(p, signum);
+	}
+}
+
+/*
+ * Send the signal to the process.  If the signal has an action, the action
+ * is usually performed by the target process rather than the caller; we add
+ * the signal to the set of pending signals for the process.
+ *
+ * Exceptions:
+ *   o When a stop signal is sent to a sleeping process that takes the
+ *     default action, the process is stopped without awakening it.
+ *   o SIGCONT restarts stopped processes (or puts them back to sleep)
+ *     regardless of the signal action (eg, blocked or ignored).
+ *
+ * Other ignored signals are discarded immediately.
+ */
+void
+psignal(p, signum)
+	register struct proc *p;
+	register int signum;
+{
+	register int s, prop;
+	register sig_t action;
+	int mask;
+
+	if ((u_int)signum >= NSIG || signum == 0)
+		panic("psignal signal number");
+	mask = sigmask(signum);
+	prop = sigprop[signum];
+
+	/*
+	 * If proc is traced, always give parent a chance.
+	 */
+	if (p->p_flag & P_TRACED)
+		action = SIG_DFL;
+	else {
+		/*
+		 * If the signal is being ignored,
+		 * then we forget about it immediately.
+		 * (Note: we don't set SIGCONT in p_sigignore,
+		 * and if it is set to SIG_IGN,
+		 * action will be SIG_DFL here.)
+		 */
+		if (p->p_sigignore & mask)
+			return;
+		if (p->p_sigmask & mask)
+			action = SIG_HOLD;
+		else if (p->p_sigcatch & mask)
+			action = SIG_CATCH;
+		else
+			action = SIG_DFL;
+	}
+
+	if (p->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
+	    (p->p_flag & P_TRACED) == 0)
+		p->p_nice = NZERO;
+
+	if (prop & SA_CONT)
+		p->p_siglist &= ~stopsigmask;
+
+	if (prop & SA_STOP) {
+		/*
+		 * If sending a tty stop signal to a member of an orphaned
+		 * process group, discard the signal here if the action
+		 * is default; don't stop the process below if sleeping,
+		 * and don't clear any pending SIGCONT.
+		 */
+		if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
+		    action == SIG_DFL)
+			return;
+		p->p_siglist &= ~contsigmask;
+	}
+	p->p_siglist |= mask;
+
+	/*
+	 * Defer further processing for signals which are held,
+	 * except that stopped processes must be continued by SIGCONT.
+	 */
+	if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
+		return;
+	s = splhigh();
+	switch (p->p_stat) {
+
+	case SSLEEP:
+		/*
+		 * If process is sleeping uninterruptibly
+		 * we can't interrupt the sleep... the signal will
+		 * be noticed when the process returns through
+		 * trap() or syscall().
+		 */
+		if ((p->p_flag & P_SINTR) == 0)
+			goto out;
+		/*
+		 * Process is sleeping and traced... make it runnable
+		 * so it can discover the signal in issignal() and stop
+		 * for the parent.
+		 */
+		if (p->p_flag & P_TRACED)
+			goto run;
+		/*
+		 * If SIGCONT is default (or ignored) and process is
+		 * asleep, we are finished; the process should not
+		 * be awakened.
+		 */
+		if ((prop & SA_CONT) && action == SIG_DFL) {
+			p->p_siglist &= ~mask;
+			goto out;
+		}
+		/*
+		 * When a sleeping process receives a stop
+		 * signal, process immediately if possible.
+		 * All other (caught or default) signals
+		 * cause the process to run.
+		 */
+		if (prop & SA_STOP) {
+			if (action != SIG_DFL)
+				goto runfast;
+			/*
+			 * If a child holding parent blocked,
+			 * stopping could cause deadlock.
+			 */
+			if (p->p_flag & P_PPWAIT)
+				goto out;
+			p->p_siglist &= ~mask;
+			p->p_xstat = signum;
+			if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+				psignal(p->p_pptr, SIGCHLD);
+			stop(p);
+			goto out;
+		} else
+			goto runfast;
+		/*NOTREACHED*/
+
+	case SSTOP:
+		/*
+		 * If traced process is already stopped,
+		 * then no further action is necessary.
+		 */
+		if (p->p_flag & P_TRACED)
+			goto out;
+
+		/*
+		 * Kill signal always sets processes running.
+		 */
+		if (signum == SIGKILL)
+			goto runfast;
+
+		if (prop & SA_CONT) {
+			/*
+			 * If SIGCONT is default (or ignored), we continue the
+			 * process but don't leave the signal in p_siglist, as
+			 * it has no further action.  If SIGCONT is held, we
+			 * continue the process and leave the signal in
+			 * p_siglist.  If the process catches SIGCONT, let it
+			 * handle the signal itself.  If it isn't waiting on
+			 * an event, then it goes back to run state.
+			 * Otherwise, process goes back to sleep state.
+			 */
+			if (action == SIG_DFL)
+				p->p_siglist &= ~mask;
+			if (action == SIG_CATCH)
+				goto runfast;
+			if (p->p_wchan == 0)
+				goto run;
+			p->p_stat = SSLEEP;
+			goto out;
+		}
+
+		if (prop & SA_STOP) {
+			/*
+			 * Already stopped, don't need to stop again.
+			 * (If we did the shell could get confused.)
+			 */
+			p->p_siglist &= ~mask;		/* take it away */
+			goto out;
+		}
+
+		/*
+		 * If process is sleeping interruptibly, then simulate a
+		 * wakeup so that when it is continued, it will be made
+		 * runnable and can look at the signal.  But don't make
+		 * the process runnable, leave it stopped.
+		 */
+		if (p->p_wchan && p->p_flag & P_SINTR)
+			unsleep(p);
+		goto out;
+
+	default:
+		/*
+		 * SRUN, SIDL, SZOMB do nothing with the signal,
+		 * other than kicking ourselves if we are running.
+		 * It will either never be noticed, or noticed very soon.
+		 */
+		if (p == curproc)
+			signotify(p);
+		goto out;
+	}
+	/*NOTREACHED*/
+
+runfast:
+	/*
+	 * Raise priority to at least PUSER.
+	 */
+	if (p->p_priority > PUSER)
+		p->p_priority = PUSER;
+run:
+	setrunnable(p);
+out:
+	splx(s);
+}
+
+/*
+ * If the current process has received a signal (should be caught or cause
+ * termination, should interrupt current syscall), return the signal number.
+ * Stop signals with default action are processed immediately, then cleared;
+ * they aren't returned.  This is checked after each entry to the system for
+ * a syscall or trap (though this can usually be done without calling issignal
+ * by checking the pending signal masks in the CURSIG macro.) The normal call
+ * sequence is
+ *
+ *	while (signum = CURSIG(curproc))
+ *		postsig(signum);
+ */
+int
+issignal(p)
+	register struct proc *p;
+{
+	register int signum, mask, prop;
+
+	for (;;) {
+		mask = p->p_siglist & ~p->p_sigmask;
+		if (p->p_flag & P_PPWAIT)
+			mask &= ~stopsigmask;
+		if (mask == 0)	 	/* no signal to send */
+			return (0);
+		signum = ffs((long)mask);
+		mask = sigmask(signum);
+		prop = sigprop[signum];
+		/*
+		 * We should see pending but ignored signals
+		 * only if P_TRACED was on when they were posted.
+		 */
+		if (mask & p->p_sigignore && (p->p_flag & P_TRACED) == 0) {
+			p->p_siglist &= ~mask;
+			continue;
+		}
+		if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
+			/*
+			 * If traced, always stop, and stay
+			 * stopped until released by the parent.
+			 *
+			 * Note that we must clear the pending signal
+			 * before we call trace_req since that routine
+			 * might cause a fault, calling tsleep and
+			 * leading us back here again with the same signal.
+			 * Then we would be deadlocked because the tracer
+			 * would still be blocked on the ipc struct from
+			 * the initial request.
+			 */
+			p->p_xstat = signum;
+			p->p_siglist &= ~mask;
+			psignal(p->p_pptr, SIGCHLD);
+			do {
+				stop(p);
+				mi_switch();
+			} while (!trace_req(p) && p->p_flag & P_TRACED);
+
+			/*
+			 * If parent wants us to take the signal,
+			 * then it will leave it in p->p_xstat;
+			 * otherwise we just look for signals again.
+			 */
+			signum = p->p_xstat;
+			if (signum == 0)
+				continue;
+
+			/*
+			 * Put the new signal into p_siglist.  If the
+			 * signal is being masked, look for other signals.
+			 */
+			mask = sigmask(signum);
+			p->p_siglist |= mask;
+			if (p->p_sigmask & mask)
+				continue;
+
+			/*
+			 * If the traced bit got turned off, go back up
+			 * to the top to rescan signals.  This ensures
+			 * that p_sig* and ps_sigact are consistent.
+			 */
+			if ((p->p_flag & P_TRACED) == 0)
+				continue;
+		}
+
+		/*
+		 * Decide whether the signal should be returned.
+		 * Return the signal's number, or fall through
+		 * to clear it from the pending mask.
+		 */
+		switch ((long)p->p_sigacts->ps_sigact[signum]) {
+
+		case (long)SIG_DFL:
+			/*
+			 * Don't take default actions on system processes.
+			 */
+			if (p->p_pid <= 1) {
+#ifdef DIAGNOSTIC
+				/*
+				 * Are you sure you want to ignore SIGSEGV
+				 * in init? XXX
+				 */
+				printf("Process (pid %d) got signal %d\n",
+					p->p_pid, signum);
+#endif
+				break;		/* == ignore */
+			}
+			/*
+			 * If there is a pending stop signal to process
+			 * with default action, stop here,
+			 * then clear the signal.  However,
+			 * if process is member of an orphaned
+			 * process group, ignore tty stop signals.
+			 */
+			if (prop & SA_STOP) {
+				if (p->p_flag & P_TRACED ||
+		    		    (p->p_pgrp->pg_jobc == 0 &&
+				    prop & SA_TTYSTOP))
+					break;	/* == ignore */
+				p->p_xstat = signum;
+				stop(p);
+				if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+					psignal(p->p_pptr, SIGCHLD);
+				mi_switch();
+				break;
+			} else if (prop & SA_IGNORE) {
+				/*
+				 * Except for SIGCONT, shouldn't get here.
+				 * Default action is to ignore; drop it.
+				 */
+				break;		/* == ignore */
+			} else
+				return (signum);
+			/*NOTREACHED*/
+
+		case (long)SIG_IGN:
+			/*
+			 * Masking above should prevent us ever trying
+			 * to take action on an ignored signal other
+			 * than SIGCONT, unless process is traced.
+			 */
+			if ((prop & SA_CONT) == 0 &&
+			    (p->p_flag & P_TRACED) == 0)
+				printf("issignal\n");
+			break;		/* == ignore */
+
+		default:
+			/*
+			 * This signal has an action, let
+			 * postsig() process it.
+			 */
+			return (signum);
+		}
+		p->p_siglist &= ~mask;		/* take the signal! */
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Put the argument process into the stopped state and notify the parent
+ * via wakeup.  Signals are handled elsewhere.  The process must not be
+ * on the run queue.
+ */
+void
+stop(p)
+	register struct proc *p;
+{
+
+	p->p_stat = SSTOP;
+	p->p_flag &= ~P_WAITED;
+	wakeup((caddr_t)p->p_pptr);
+}
+
+/*
+ * Take the action for the specified signal
+ * from the current set of pending signals.
+ */
+void
+postsig(signum)
+	register int signum;
+{
+	register struct proc *p = curproc;
+	register struct sigacts *ps = p->p_sigacts;
+	register sig_t action;
+	u_long code;
+	int mask, returnmask;
+
+#ifdef DIAGNOSTIC
+	if (signum == 0)
+		panic("postsig");
+#endif
+	mask = sigmask(signum);
+	p->p_siglist &= ~mask;
+	action = ps->ps_sigact[signum];
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_PSIG))
+		ktrpsig(p->p_tracep,
+		    signum, action, ps->ps_flags & SAS_OLDMASK ?
+		    ps->ps_oldmask : p->p_sigmask, 0);
+#endif
+	if (action == SIG_DFL) {
+		/*
+		 * Default action, where the default is to kill
+		 * the process.  (Other cases were ignored above.)
+		 */
+		sigexit(p, signum);
+		/* NOTREACHED */
+	} else {
+		/*
+		 * If we get here, the signal must be caught.
+		 */
+#ifdef DIAGNOSTIC
+		if (action == SIG_IGN || (p->p_sigmask & mask))
+			panic("postsig action");
+#endif
+		/*
+		 * Set the new mask value and also defer further
+		 * occurences of this signal.
+		 *
+		 * Special case: user has done a sigpause.  Here the
+		 * current mask is not of interest, but rather the
+		 * mask from before the sigpause is what we want
+		 * restored after the signal processing is completed.
+		 */
+		(void) splhigh();
+		if (ps->ps_flags & SAS_OLDMASK) {
+			returnmask = ps->ps_oldmask;
+			ps->ps_flags &= ~SAS_OLDMASK;
+		} else
+			returnmask = p->p_sigmask;
+		p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+		(void) spl0();
+		p->p_stats->p_ru.ru_nsignals++;
+		if (ps->ps_sig != signum) {
+			code = 0;
+		} else {
+			code = ps->ps_code;
+			ps->ps_code = 0;
+			ps->ps_sig = 0;
+		}
+		sendsig(action, signum, returnmask, code);
+	}
+}
+
+/*
+ * Kill the current process for stated reason.
+ */
+void
+killproc(p, why)
+	struct proc *p;
+	char *why;
+{
+
+	log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
+	uprintf("sorry, pid %d was killed: %s\n", p->p_pid, why);
+	psignal(p, SIGKILL);
+}
+
+/*
+ * Force the current process to exit with the specified signal, dumping core
+ * if appropriate.  We bypass the normal tests for masked and caught signals,
+ * allowing unrecoverable failures to terminate the process without changing
+ * signal state.  Mark the accounting record with the signal termination.
+ * If dumping core, save the signal number for the debugger.  Calls exit and
+ * does not return.
+ */
+void
+sigexit(p, signum)
+	register struct proc *p;
+	int signum;
+{
+
+	p->p_acflag |= AXSIG;
+	if (sigprop[signum] & SA_CORE) {
+		p->p_sigacts->ps_sig = signum;
+		if (coredump(p) == 0)
+			signum |= WCOREFLAG;
+	}
+	exit1(p, W_EXITCODE(0, signum));
+	/* NOTREACHED */
+}
+
+/*
+ * Dump core, into a file named "progname.core", unless the process was
+ * setuid/setgid.
+ */
+int
+coredump(p)
+	register struct proc *p;
+{
+	register struct vnode *vp;
+	register struct pcred *pcred = p->p_cred;
+	register struct ucred *cred = pcred->pc_ucred;
+	register struct vmspace *vm = p->p_vmspace;
+	struct nameidata nd;
+	struct vattr vattr;
+	int error, error1;
+	char name[MAXCOMLEN+6];		/* progname.core */
+
+	if (pcred->p_svuid != pcred->p_ruid || pcred->p_svgid != pcred->p_rgid)
+		return (EFAULT);
+	if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >=
+	    p->p_rlimit[RLIMIT_CORE].rlim_cur)
+		return (EFAULT);
+	sprintf(name, "%s.core", p->p_comm);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
+	if (error = vn_open(&nd,
+	    O_CREAT | FWRITE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))
+		return (error);
+	vp = nd.ni_vp;
+
+	/* Don't dump to non-regular files or files with links. */
+	if (vp->v_type != VREG ||
+	    VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
+		error = EFAULT;
+		goto out;
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_size = 0;
+	VOP_LEASE(vp, p, cred, LEASE_WRITE);
+	VOP_SETATTR(vp, &vattr, cred, p);
+	p->p_acflag |= ACORE;
+	bcopy(p, &p->p_addr->u_kproc.kp_proc, sizeof(struct proc));
+	fill_eproc(p, &p->p_addr->u_kproc.kp_eproc);
+	error = cpu_coredump(p, vp, cred);
+	if (error == 0)
+		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
+		    (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
+		    IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+	if (error == 0)
+		error = vn_rdwr(UIO_WRITE, vp,
+		    (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
+		    round_page(ctob(vm->vm_ssize)),
+		    (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
+		    IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+out:
+	VOP_UNLOCK(vp, 0, p);
+	error1 = vn_close(vp, FWRITE, cred, p);
+	if (error == 0)
+		error = error1;
+	return (error);
+}
+
+/*
+ * Nonexistent system call-- signal process (may want to handle it).
+ * Flag error in case process won't see signal immediately (blocked or ignored).
+ */
+/* ARGSUSED */
+int
+nosys(p, args, retval)
+	struct proc *p;
+	void *args;
+	register_t *retval;
+{
+
+	psignal(p, SIGSYS);
+	return (ENOSYS);
+}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
new file mode 100644
index 000000000000..df8371077adf
--- /dev/null
+++ b/sys/kern/kern_subr.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_subr.c	8.4 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+int
+uiomove(cp, n, uio)
+	register caddr_t cp;
+	register int n;
+	register struct uio *uio;
+{
+	register struct iovec *iov;
+	u_int cnt;
+	int error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
+		panic("uiomove: mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("uiomove proc");
+#endif
+	while (n > 0 && uio->uio_resid) {
+		iov = uio->uio_iov;
+		cnt = iov->iov_len;
+		if (cnt == 0) {
+			uio->uio_iov++;
+			uio->uio_iovcnt--;
+			continue;
+		}
+		if (cnt > n)
+			cnt = n;
+		switch (uio->uio_segflg) {
+
+		case UIO_USERSPACE:
+		case UIO_USERISPACE:
+			if (uio->uio_rw == UIO_READ)
+				error = copyout(cp, iov->iov_base, cnt);
+			else
+				error = copyin(iov->iov_base, cp, cnt);
+			if (error)
+				return (error);
+			break;
+
+		case UIO_SYSSPACE:
+			if (uio->uio_rw == UIO_READ)
+				bcopy((caddr_t)cp, iov->iov_base, cnt);
+			else
+				bcopy(iov->iov_base, (caddr_t)cp, cnt);
+			break;
+		}
+		iov->iov_base += cnt;
+		iov->iov_len -= cnt;
+		uio->uio_resid -= cnt;
+		uio->uio_offset += cnt;
+		cp += cnt;
+		n -= cnt;
+	}
+	return (error);
+}
+
+/*
+ * Give next character to user as result of read.
+ */
+int
+ureadc(c, uio)
+	register int c;
+	register struct uio *uio;
+{
+	register struct iovec *iov;
+
+	if (uio->uio_resid <= 0)
+		panic("ureadc: non-positive resid");
+again:
+	if (uio->uio_iovcnt <= 0)
+		panic("ureadc: non-positive iovcnt");
+	iov = uio->uio_iov;
+	if (iov->iov_len <= 0) {
+		uio->uio_iovcnt--;
+		uio->uio_iov++;
+		goto again;
+	}
+	switch (uio->uio_segflg) {
+
+	case UIO_USERSPACE:
+		if (subyte(iov->iov_base, c) < 0)
+			return (EFAULT);
+		break;
+
+	case UIO_SYSSPACE:
+		*iov->iov_base = c;
+		break;
+
+	case UIO_USERISPACE:
+		if (suibyte(iov->iov_base, c) < 0)
+			return (EFAULT);
+		break;
+	}
+	iov->iov_base++;
+	iov->iov_len--;
+	uio->uio_resid--;
+	uio->uio_offset++;
+	return (0);
+}
+
+#ifdef vax	/* unused except by ct.c, other oddities XXX */
+/*
+ * Get next character written in by user from uio.
+ */
+int
+uwritec(uio)
+	struct uio *uio;
+{
+	register struct iovec *iov;
+	register int c;
+
+	if (uio->uio_resid <= 0)
+		return (-1);
+again:
+	if (uio->uio_iovcnt <= 0)
+		panic("uwritec: non-positive iovcnt");
+	iov = uio->uio_iov;
+	if (iov->iov_len == 0) {
+		uio->uio_iov++;
+		if (--uio->uio_iovcnt == 0)
+			return (-1);
+		goto again;
+	}
+	switch (uio->uio_segflg) {
+
+	case UIO_USERSPACE:
+		c = fubyte(iov->iov_base);
+		break;
+
+	case UIO_SYSSPACE:
+		c = *(u_char *) iov->iov_base;
+		break;
+
+	case UIO_USERISPACE:
+		c = fuibyte(iov->iov_base);
+		break;
+	}
+	if (c < 0)
+		return (-1);
+	iov->iov_base++;
+	iov->iov_len--;
+	uio->uio_resid--;
+	uio->uio_offset++;
+	return (c);
+}
+#endif /* vax */
+
+/*
+ * General routine to allocate a hash table.
+ */
+void *
+hashinit(elements, type, hashmask)
+	int elements, type;
+	u_long *hashmask;
+{
+	long hashsize;
+	LIST_HEAD(generic, generic) *hashtbl;
+	int i;
+
+	if (elements <= 0)
+		panic("hashinit: bad cnt");
+	for (hashsize = 1; hashsize <= elements; hashsize <<= 1)
+		continue;
+	hashsize >>= 1;
+	hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK);
+	for (i = 0; i < hashsize; i++)
+		LIST_INIT(&hashtbl[i]);
+	*hashmask = hashsize - 1;
+	return (hashtbl);
+}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
new file mode 100644
index 000000000000..6c8202731f69
--- /dev/null
+++ b/sys/kern/kern_synch.c
@@ -0,0 +1,671 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <machine/cpu.h>
+
+u_char	curpriority;		/* usrpri of curproc */
+int	lbolt;			/* once a second sleep address */
+
+/*
+ * Force switch among equal priority processes every 100ms.
+ */
+/* ARGSUSED */
+void
+roundrobin(arg)
+	void *arg;
+{
+
+	need_resched();
+	timeout(roundrobin, NULL, hz / 10);
+}
+
+/*
+ * Constants for digital decay and forget:
+ *	90% of (p_estcpu) usage in 5 * loadav time
+ *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
+ *          Note that, as ps(1) mentions, this can let percentages
+ *          total over 100% (I've seen 137.9% for 3 processes).
+ *
+ * Note that hardclock updates p_estcpu and p_cpticks independently.
+ *
+ * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
+ * That is, the system wants to compute a value of decay such
+ * that the following for loop:
+ * 	for (i = 0; i < (5 * loadavg); i++)
+ * 		p_estcpu *= decay;
+ * will compute
+ * 	p_estcpu *= 0.1;
+ * for all values of loadavg:
+ *
+ * Mathematically this loop can be expressed by saying:
+ * 	decay ** (5 * loadavg) ~= .1
+ *
+ * The system computes decay as:
+ * 	decay = (2 * loadavg) / (2 * loadavg + 1)
+ *
+ * We wish to prove that the system's computation of decay
+ * will always fulfill the equation:
+ * 	decay ** (5 * loadavg) ~= .1
+ *
+ * If we compute b as:
+ * 	b = 2 * loadavg
+ * then
+ * 	decay = b / (b + 1)
+ *
+ * We now need to prove two things:
+ *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
+ *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
+ *	
+ * Facts:
+ *         For x close to zero, exp(x) =~ 1 + x, since
+ *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
+ *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
+ *         For x close to zero, ln(1+x) =~ x, since
+ *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
+ *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
+ *         ln(.1) =~ -2.30
+ *
+ * Proof of (1):
+ *    Solve (factor)**(power) =~ .1 given power (5*loadav):
+ *	solving for factor,
+ *      ln(factor) =~ (-2.30/5*loadav), or
+ *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
+ *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
+ *
+ * Proof of (2):
+ *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
+ *	solving for power,
+ *      power*ln(b/(b+1)) =~ -2.30, or
+ *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
+ *
+ * Actual power values for the implemented algorithm are as follows:
+ *      loadav: 1       2       3       4
+ *      power:  5.68    10.32   14.94   19.55
+ */
+
+/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
+#define	loadfactor(loadav)	(2 * (loadav))
+#define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
+
+/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
+fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
+
+/*
+ * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
+ * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
+ * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
+ *
+ * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
+ *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
+ *
+ * If you dont want to bother with the faster/more-accurate formula, you
+ * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
+ * (more general) method of calculating the %age of CPU used by a process.
+ */
+#define	CCPU_SHIFT	11
+
+/*
+ * Recompute process priorities, every hz ticks.
+ */
+/* ARGSUSED */
+void
+schedcpu(arg)
+	void *arg;
+{
+	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+	register struct proc *p;
+	register int s;
+	register unsigned int newcpu;
+
+	wakeup((caddr_t)&lbolt);
+	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+		/*
+		 * Increment time in/out of memory and sleep time
+		 * (if sleeping).  We ignore overflow; with 16-bit int's
+		 * (remember them?) overflow takes 45 days.
+		 */
+		p->p_swtime++;
+		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
+			p->p_slptime++;
+		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
+		/*
+		 * If the process has slept the entire second,
+		 * stop recalculating its priority until it wakes up.
+		 */
+		if (p->p_slptime > 1)
+			continue;
+		s = splstatclock();	/* prevent state changes */
+		/*
+		 * p_pctcpu is only for ps.
+		 */
+#if	(FSHIFT >= CCPU_SHIFT)
+		p->p_pctcpu += (hz == 100)?
+			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
+                	100 * (((fixpt_t) p->p_cpticks)
+				<< (FSHIFT - CCPU_SHIFT)) / hz;
+#else
+		p->p_pctcpu += ((FSCALE - ccpu) *
+			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
+#endif
+		p->p_cpticks = 0;
+		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
+		p->p_estcpu = min(newcpu, UCHAR_MAX);
+		resetpriority(p);
+		if (p->p_priority >= PUSER) {
+#define	PPQ	(128 / NQS)		/* priorities per queue */
+			if ((p != curproc) &&
+			    p->p_stat == SRUN &&
+			    (p->p_flag & P_INMEM) &&
+			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
+				remrq(p);
+				p->p_priority = p->p_usrpri;
+				setrunqueue(p);
+			} else
+				p->p_priority = p->p_usrpri;
+		}
+		splx(s);
+	}
+	vmmeter();
+	if (bclnlist != NULL)
+		wakeup((caddr_t)pageproc);
+	timeout(schedcpu, (void *)0, hz);
+}
+
+/*
+ * Recalculate the priority of a process after it has slept for a while.
+ * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
+ * least six times the loadfactor will decay p_estcpu to zero.
+ */
+void
+updatepri(p)
+	register struct proc *p;
+{
+	register unsigned int newcpu = p->p_estcpu;
+	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+
+	if (p->p_slptime > 5 * loadfac)
+		p->p_estcpu = 0;
+	else {
+		p->p_slptime--;	/* the first time was done in schedcpu */
+		while (newcpu && --p->p_slptime)
+			newcpu = (int) decay_cpu(loadfac, newcpu);
+		p->p_estcpu = min(newcpu, UCHAR_MAX);
+	}
+	resetpriority(p);
+}
+
+/*
+ * We're only looking at 7 bits of the address; everything is
+ * aligned to 4, lots of things are aligned to greater powers
+ * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
+ */
+#define TABLESIZE	128
+#define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
+struct slpque {
+	struct proc *sq_head;
+	struct proc **sq_tailp;
+} slpque[TABLESIZE];
+
+/*
+ * During autoconfiguration or after a panic, a sleep will simply
+ * lower the priority briefly to allow interrupts, then return.
+ * The priority to be used (safepri) is machine-dependent, thus this
+ * value is initialized and maintained in the machine-dependent layers.
+ * This priority will typically be 0, or the lowest priority
+ * that is safe for use on the interrupt stack; it can be made
+ * higher to block network software interrupts after panics.
+ */
+int safepri;
+
+/*
+ * General sleep call.  Suspends the current process until a wakeup is
+ * performed on the specified identifier.  The process will then be made
+ * runnable with the specified priority.  Sleeps at most timo/hz seconds
+ * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
+ * before and after sleeping, else signals are not checked.  Returns 0 if
+ * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
+ * signal needs to be delivered, ERESTART is returned if the current system
+ * call should be restarted if possible, and EINTR is returned if the system
+ * call should be interrupted by the signal (return EINTR).
+ */
+int
+tsleep(ident, priority, wmesg, timo)
+	void *ident;
+	int priority, timo;
+	char *wmesg;
+{
+	register struct proc *p = curproc;
+	register struct slpque *qp;
+	register s;
+	int sig, catch = priority & PCATCH;
+	extern int cold;
+	void endtsleep __P((void *));
+
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 1, 0);
+#endif
+	s = splhigh();
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration,
+		 * just give interrupts a chance, then just return;
+		 * don't run any other procs or panic below,
+		 * in case this is the idle process and already asleep.
+		 */
+		splx(safepri);
+		splx(s);
+		return (0);
+	}
+#ifdef DIAGNOSTIC
+	if (ident == NULL || p->p_stat != SRUN || p->p_back)
+		panic("tsleep");
+#endif
+	p->p_wchan = ident;
+	p->p_wmesg = wmesg;
+	p->p_slptime = 0;
+	p->p_priority = priority & PRIMASK;
+	qp = &slpque[LOOKUP(ident)];
+	if (qp->sq_head == 0)
+		qp->sq_head = p;
+	else
+		*qp->sq_tailp = p;
+	*(qp->sq_tailp = &p->p_forw) = 0;
+	if (timo)
+		timeout(endtsleep, (void *)p, timo);
+	/*
+	 * We put ourselves on the sleep queue and start our timeout
+	 * before calling CURSIG, as we could stop there, and a wakeup
+	 * or a SIGCONT (or both) could occur while we were stopped.
+	 * A SIGCONT would cause us to be marked as SSLEEP
+	 * without resuming us, thus we must be ready for sleep
+	 * when CURSIG is called.  If the wakeup happens while we're
+	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
+	 */
+	if (catch) {
+		p->p_flag |= P_SINTR;
+		if (sig = CURSIG(p)) {
+			if (p->p_wchan)
+				unsleep(p);
+			p->p_stat = SRUN;
+			goto resume;
+		}
+		if (p->p_wchan == 0) {
+			catch = 0;
+			goto resume;
+		}
+	} else
+		sig = 0;
+	p->p_stat = SSLEEP;
+	p->p_stats->p_ru.ru_nvcsw++;
+	mi_switch();
+resume:
+	curpriority = p->p_usrpri;
+	splx(s);
+	p->p_flag &= ~P_SINTR;
+	if (p->p_flag & P_TIMEOUT) {
+		p->p_flag &= ~P_TIMEOUT;
+		if (sig == 0) {
+#ifdef KTRACE
+			if (KTRPOINT(p, KTR_CSW))
+				ktrcsw(p->p_tracep, 0, 0);
+#endif
+			return (EWOULDBLOCK);
+		}
+	} else if (timo)
+		untimeout(endtsleep, (void *)p);
+	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
+#ifdef KTRACE
+		if (KTRPOINT(p, KTR_CSW))
+			ktrcsw(p->p_tracep, 0, 0);
+#endif
+		if (p->p_sigacts->ps_sigintr & sigmask(sig))
+			return (EINTR);
+		return (ERESTART);
+	}
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 0, 0);
+#endif
+	return (0);
+}
+
+/*
+ * Implement timeout for tsleep.
+ * If process hasn't been awakened (wchan non-zero),
+ * set timeout flag and undo the sleep.  If proc
+ * is stopped, just unsleep so it will remain stopped.
+ */
+void
+endtsleep(arg)
+	void *arg;
+{
+	register struct proc *p;
+	int s;
+
+	p = (struct proc *)arg;
+	s = splhigh();
+	if (p->p_wchan) {
+		if (p->p_stat == SSLEEP)
+			setrunnable(p);
+		else
+			unsleep(p);
+		p->p_flag |= P_TIMEOUT;
+	}
+	splx(s);
+}
+
+/*
+ * Short-term, non-interruptable sleep.
+ */
+void
+sleep(ident, priority)
+	void *ident;
+	int priority;
+{
+	register struct proc *p = curproc;
+	register struct slpque *qp;
+	register s;
+	extern int cold;
+
+#ifdef DIAGNOSTIC
+	if (priority > PZERO) {
+		printf("sleep called with priority %d > PZERO, wchan: %x\n",
+		    priority, ident);
+		panic("old sleep");
+	}
+#endif
+	s = splhigh();
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration,
+		 * just give interrupts a chance, then just return;
+		 * don't run any other procs or panic below,
+		 * in case this is the idle process and already asleep.
+		 */
+		splx(safepri);
+		splx(s);
+		return;
+	}
+#ifdef DIAGNOSTIC
+	if (ident == NULL || p->p_stat != SRUN || p->p_back)
+		panic("sleep");
+#endif
+	p->p_wchan = ident;
+	p->p_wmesg = NULL;
+	p->p_slptime = 0;
+	p->p_priority = priority;
+	qp = &slpque[LOOKUP(ident)];
+	if (qp->sq_head == 0)
+		qp->sq_head = p;
+	else
+		*qp->sq_tailp = p;
+	*(qp->sq_tailp = &p->p_forw) = 0;
+	p->p_stat = SSLEEP;
+	p->p_stats->p_ru.ru_nvcsw++;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 1, 0);
+#endif
+	mi_switch();
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 0, 0);
+#endif
+	curpriority = p->p_usrpri;
+	splx(s);
+}
+
+/*
+ * Remove a process from its wait queue
+ */
+void
+unsleep(p)
+	register struct proc *p;
+{
+	register struct slpque *qp;
+	register struct proc **hp;
+	int s;
+
+	s = splhigh();
+	if (p->p_wchan) {
+		hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
+		while (*hp != p)
+			hp = &(*hp)->p_forw;
+		*hp = p->p_forw;
+		if (qp->sq_tailp == &p->p_forw)
+			qp->sq_tailp = hp;
+		p->p_wchan = 0;
+	}
+	splx(s);
+}
+
+/*
+ * Make all processes sleeping on the specified identifier runnable.
+ */
+void
+wakeup(ident)
+	register void *ident;
+{
+	register struct slpque *qp;
+	register struct proc *p, **q;
+	int s;
+
+	s = splhigh();
+	qp = &slpque[LOOKUP(ident)];
+restart:
+	for (q = &qp->sq_head; p = *q; ) {
+#ifdef DIAGNOSTIC
+		if (p->p_back || p->p_stat != SSLEEP && p->p_stat != SSTOP)
+			panic("wakeup");
+#endif
+		if (p->p_wchan == ident) {
+			p->p_wchan = 0;
+			*q = p->p_forw;
+			if (qp->sq_tailp == &p->p_forw)
+				qp->sq_tailp = q;
+			if (p->p_stat == SSLEEP) {
+				/* OPTIMIZED EXPANSION OF setrunnable(p); */
+				if (p->p_slptime > 1)
+					updatepri(p);
+				p->p_slptime = 0;
+				p->p_stat = SRUN;
+				if (p->p_flag & P_INMEM)
+					setrunqueue(p);
+				/*
+				 * Since curpriority is a user priority,
+				 * p->p_priority is always better than
+				 * curpriority.
+				 */
+				if ((p->p_flag & P_INMEM) == 0)
+					wakeup((caddr_t)&proc0);
+				else
+					need_resched();
+				/* END INLINE EXPANSION */
+				goto restart;
+			}
+		} else
+			q = &p->p_forw;
+	}
+	splx(s);
+}
+
+/*
+ * The machine independent parts of mi_switch().
+ * Must be called at splstatclock() or higher.
+ */
+void
+mi_switch()
+{
+	register struct proc *p = curproc;	/* XXX */
+	register struct rlimit *rlim;
+	register long s, u;
+	struct timeval tv;
+
+#ifdef DEBUG
+	if (p->p_simple_locks)
+		panic("sleep: holding simple lock");
+#endif
+	/*
+	 * Compute the amount of time during which the current
+	 * process was running, and add that to its total so far.
+	 */
+	microtime(&tv);
+	u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
+	s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
+	if (u < 0) {
+		u += 1000000;
+		s--;
+	} else if (u >= 1000000) {
+		u -= 1000000;
+		s++;
+	}
+	p->p_rtime.tv_usec = u;
+	p->p_rtime.tv_sec = s;
+
+	/*
+	 * Check if the process exceeds its cpu resource allocation.
+	 * If over max, kill it.  In any case, if it has run for more
+	 * than 10 minutes, reduce priority to give others a chance.
+	 */
+	rlim = &p->p_rlimit[RLIMIT_CPU];
+	if (s >= rlim->rlim_cur) {
+		if (s >= rlim->rlim_max)
+			psignal(p, SIGKILL);
+		else {
+			psignal(p, SIGXCPU);
+			if (rlim->rlim_cur < rlim->rlim_max)
+				rlim->rlim_cur += 5;
+		}
+	}
+	if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) {
+		p->p_nice = NZERO + 4;
+		resetpriority(p);
+	}
+
+	/*
+	 * Pick a new current process and record its start time.
+	 */
+	cnt.v_swtch++;
+	cpu_switch(p);
+	microtime(&runtime);
+}
+
+/*
+ * Initialize the (doubly-linked) run queues
+ * to be empty.
+ */
+void
+rqinit()
+{
+	register int i;
+
+	for (i = 0; i < NQS; i++)
+		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
+}
+
+/*
+ * Change process state to be runnable,
+ * placing it on the run queue if it is in memory,
+ * and awakening the swapper if it isn't in memory.
+ */
+void
+setrunnable(p)
+	register struct proc *p;
+{
+	register int s;
+
+	s = splhigh();
+	switch (p->p_stat) {
+	case 0:
+	case SRUN:
+	case SZOMB:
+	default:
+		panic("setrunnable");
+	case SSTOP:
+	case SSLEEP:
+		unsleep(p);		/* e.g. when sending signals */
+		break;
+
+	case SIDL:
+		break;
+	}
+	p->p_stat = SRUN;
+	if (p->p_flag & P_INMEM)
+		setrunqueue(p);
+	splx(s);
+	if (p->p_slptime > 1)
+		updatepri(p);
+	p->p_slptime = 0;
+	if ((p->p_flag & P_INMEM) == 0)
+		wakeup((caddr_t)&proc0);
+	else if (p->p_priority < curpriority)
+		need_resched();
+}
+
+/*
+ * Compute the priority of a process when running in user mode.
+ * Arrange to reschedule if the resulting priority is better
+ * than that of the current process.
+ */
+void
+resetpriority(p)
+	register struct proc *p;
+{
+	register unsigned int newpriority;
+
+	newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
+	newpriority = min(newpriority, MAXPRI);
+	p->p_usrpri = newpriority;
+	if (newpriority < curpriority)
+		need_resched();
+}
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
new file mode 100644
index 000000000000..b178da3a0302
--- /dev/null
+++ b/sys/kern/kern_sysctl.c
@@ -0,0 +1,793 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Karels at Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_sysctl.c	8.9 (Berkeley) 5/20/95
+ */
+
+/*
+ * sysctl system call.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/unistd.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+sysctlfn kern_sysctl;
+sysctlfn hw_sysctl;
+#ifdef DEBUG
+sysctlfn debug_sysctl;
+#endif
+extern sysctlfn vm_sysctl;
+extern sysctlfn vfs_sysctl;
+extern sysctlfn net_sysctl;
+extern sysctlfn cpu_sysctl;
+
+/*
+ * Locking and stats
+ */
+static struct sysctl_lock {
+	int	sl_lock;
+	int	sl_want;
+	int	sl_locked;
+} memlock;
+
+int
+__sysctl(p, uap, retval)
+	struct proc *p;
+	register struct __sysctl_args /* {
+		syscallarg(int *) name;
+		syscallarg(u_int) namelen;
+		syscallarg(void *) old;
+		syscallarg(size_t *) oldlenp;
+		syscallarg(void *) new;
+		syscallarg(size_t) newlen;
+	} */ *uap;
+	register_t *retval;
+{
+	int error, dolock = 1;
+	size_t savelen, oldlen = 0;
+	sysctlfn *fn;
+	int name[CTL_MAXNAME];
+
+	if (SCARG(uap, new) != NULL &&
+	    (error = suser(p->p_ucred, &p->p_acflag)))
+		return (error);
+	/*
+	 * all top-level sysctl names are non-terminal
+	 */
+	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
+		return (EINVAL);
+	if (error =
+	    copyin(SCARG(uap, name), &name, SCARG(uap, namelen) * sizeof(int)))
+		return (error);
+
+	switch (name[0]) {
+	case CTL_KERN:
+		fn = kern_sysctl;
+		if (name[2] == KERN_VNODE)	/* XXX */
+			dolock = 0;
+		break;
+	case CTL_HW:
+		fn = hw_sysctl;
+		break;
+	case CTL_VM:
+		fn = vm_sysctl;
+		break;
+	case CTL_NET:
+		fn = net_sysctl;
+		break;
+	case CTL_VFS:
+		fn = vfs_sysctl;
+		break;
+	case CTL_MACHDEP:
+		fn = cpu_sysctl;
+		break;
+#ifdef DEBUG
+	case CTL_DEBUG:
+		fn = debug_sysctl;
+		break;
+#endif
+	default:
+		return (EOPNOTSUPP);
+	}
+
+	if (SCARG(uap, oldlenp) &&
+	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
+		return (error);
+	if (SCARG(uap, old) != NULL) {
+		if (!useracc(SCARG(uap, old), oldlen, B_WRITE))
+			return (EFAULT);
+		while (memlock.sl_lock) {
+			memlock.sl_want = 1;
+			sleep((caddr_t)&memlock, PRIBIO+1);
+			memlock.sl_locked++;
+		}
+		memlock.sl_lock = 1;
+		if (dolock)
+			vslock(SCARG(uap, old), oldlen);
+		savelen = oldlen;
+	}
+	error = (*fn)(name + 1, SCARG(uap, namelen) - 1, SCARG(uap, old),
+	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
+	if (SCARG(uap, old) != NULL) {
+		if (dolock)
+			vsunlock(SCARG(uap, old), savelen, B_WRITE);
+		memlock.sl_lock = 0;
+		if (memlock.sl_want) {
+			memlock.sl_want = 0;
+			wakeup((caddr_t)&memlock);
+		}
+	}
+	if (error)
+		return (error);
+	if (SCARG(uap, oldlenp))
+		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
+	*retval = oldlen;
+	return (0);
+}
+
+/*
+ * Attributes stored in the kernel.
+ */
+char hostname[MAXHOSTNAMELEN];
+int hostnamelen;
+long hostid;
+int securelevel;
+
+/*
+ * kernel related system variables.
+ */
+kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	int error, level, inthostid;
+	extern char ostype[], osrelease[], version[];
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1 && !(name[0] == KERN_PROC || name[0] == KERN_PROF))
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case KERN_OSTYPE:
+		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
+	case KERN_OSRELEASE:
+		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
+	case KERN_OSREV:
+		return (sysctl_rdint(oldp, oldlenp, newp, BSD));
+	case KERN_VERSION:
+		return (sysctl_rdstring(oldp, oldlenp, newp, version));
+	case KERN_MAXVNODES:
+		return(sysctl_int(oldp, oldlenp, newp, newlen, &desiredvnodes));
+	case KERN_MAXPROC:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxproc));
+	case KERN_MAXFILES:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles));
+	case KERN_ARGMAX:
+		return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX));
+	case KERN_SECURELVL:
+		level = securelevel;
+		if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
+		    newp == NULL)
+			return (error);
+		if (level < securelevel && p->p_pid != 1)
+			return (EPERM);
+		securelevel = level;
+		return (0);
+	case KERN_HOSTNAME:
+		error = sysctl_string(oldp, oldlenp, newp, newlen,
+		    hostname, sizeof(hostname));
+		if (newp && !error)
+			hostnamelen = newlen;
+		return (error);
+	case KERN_HOSTID:
+		inthostid = hostid;  /* XXX assumes sizeof long <= sizeof int */
+		error =  sysctl_int(oldp, oldlenp, newp, newlen, &inthostid);
+		hostid = inthostid;
+		return (error);
+	case KERN_CLOCKRATE:
+		return (sysctl_clockrate(oldp, oldlenp));
+	case KERN_BOOTTIME:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime,
+		    sizeof(struct timeval)));
+	case KERN_VNODE:
+		return (sysctl_vnode(oldp, oldlenp, p));
+	case KERN_PROC:
+		return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
+	case KERN_FILE:
+		return (sysctl_file(oldp, oldlenp));
+#ifdef GPROF
+	case KERN_PROF:
+		return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
+		    newp, newlen));
+#endif
+	case KERN_POSIX1:
+		return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION));
+	case KERN_NGROUPS:
+		return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX));
+	case KERN_JOB_CONTROL:
+		return (sysctl_rdint(oldp, oldlenp, newp, 1));
+	case KERN_SAVED_IDS:
+#ifdef _POSIX_SAVED_IDS
+		return (sysctl_rdint(oldp, oldlenp, newp, 1));
+#else
+		return (sysctl_rdint(oldp, oldlenp, newp, 0));
+#endif
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * hardware related system variables.
+ */
+hw_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	extern char machine[], cpu_model[];
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case HW_MACHINE:
+		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
+	case HW_MODEL:
+		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
+	case HW_NCPU:
+		return (sysctl_rdint(oldp, oldlenp, newp, 1));	/* XXX */
+	case HW_BYTEORDER:
+		return (sysctl_rdint(oldp, oldlenp, newp, BYTE_ORDER));
+	case HW_PHYSMEM:
+		return (sysctl_rdint(oldp, oldlenp, newp, ctob(physmem)));
+	case HW_USERMEM:
+		return (sysctl_rdint(oldp, oldlenp, newp,
+		    ctob(physmem - cnt.v_wire_count)));
+	case HW_PAGESIZE:
+		return (sysctl_rdint(oldp, oldlenp, newp, PAGE_SIZE));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
+#ifdef DEBUG
+/*
+ * Debugging related system variables.
+ */
+struct ctldebug debug0, debug1, debug2, debug3, debug4;
+struct ctldebug debug5, debug6, debug7, debug8, debug9;
+struct ctldebug debug10, debug11, debug12, debug13, debug14;
+struct ctldebug debug15, debug16, debug17, debug18, debug19;
+static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
+	&debug0, &debug1, &debug2, &debug3, &debug4,
+	&debug5, &debug6, &debug7, &debug8, &debug9,
+	&debug10, &debug11, &debug12, &debug13, &debug14,
+	&debug15, &debug16, &debug17, &debug18, &debug19,
+};
+int
+debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	struct ctldebug *cdp;
+
+	/* all sysctl names at this level are name and field */
+	if (namelen != 2)
+		return (ENOTDIR);		/* overloaded */
+	cdp = debugvars[name[0]];
+	if (name[0] >= CTL_DEBUG_MAXID || cdp->debugname == 0)
+		return (EOPNOTSUPP);
+	switch (name[1]) {
+	case CTL_DEBUG_NAME:
+		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
+	case CTL_DEBUG_VALUE:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+#endif /* DEBUG */
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for an integer-valued sysctl function.
+ */
+sysctl_int(oldp, oldlenp, newp, newlen, valp)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	int *valp;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < sizeof(int))
+		return (ENOMEM);
+	if (newp && newlen != sizeof(int))
+		return (EINVAL);
+	*oldlenp = sizeof(int);
+	if (oldp)
+		error = copyout(valp, oldp, sizeof(int));
+	if (error == 0 && newp)
+		error = copyin(newp, valp, sizeof(int));
+	return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdint(oldp, oldlenp, newp, val)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	int val;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < sizeof(int))
+		return (ENOMEM);
+	if (newp)
+		return (EPERM);
+	*oldlenp = sizeof(int);
+	if (oldp)
+		error = copyout((caddr_t)&val, oldp, sizeof(int));
+	return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a string-valued sysctl function.
+ */
+sysctl_string(oldp, oldlenp, newp, newlen, str, maxlen)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	char *str;
+	int maxlen;
+{
+	int len, error = 0;
+
+	len = strlen(str) + 1;
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp && newlen >= maxlen)
+		return (EINVAL);
+	if (oldp) {
+		*oldlenp = len;
+		error = copyout(str, oldp, len);
+	}
+	if (error == 0 && newp) {
+		error = copyin(newp, str, newlen);
+		str[newlen] = 0;
+	}
+	return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdstring(oldp, oldlenp, newp, str)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	char *str;
+{
+	int len, error = 0;
+
+	len = strlen(str) + 1;
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp)
+		return (EPERM);
+	*oldlenp = len;
+	if (oldp)
+		error = copyout(str, oldp, len);
+	return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_struct(oldp, oldlenp, newp, newlen, sp, len)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	void *sp;
+	int len;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp && newlen > len)
+		return (EINVAL);
+	if (oldp) {
+		*oldlenp = len;
+		error = copyout(sp, oldp, len);
+	}
+	if (error == 0 && newp)
+		error = copyin(newp, sp, len);
+	return (error);
+}
+
+/*
+ * Validate parameters and get old parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_rdstruct(oldp, oldlenp, newp, sp, len)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp, *sp;
+	int len;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp)
+		return (EPERM);
+	*oldlenp = len;
+	if (oldp)
+		error = copyout(sp, oldp, len);
+	return (error);
+}
+
+/*
+ * Get file structures.
+ */
+sysctl_file(where, sizep)
+	char *where;
+	size_t *sizep;
+{
+	int buflen, error;
+	struct file *fp;
+	char *start = where;
+
+	buflen = *sizep;
+	if (where == NULL) {
+		/*
+		 * overestimate by 10 files
+		 */
+		*sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct file);
+		return (0);
+	}
+
+	/*
+	 * first copyout filehead
+	 */
+	if (buflen < sizeof(filehead)) {
+		*sizep = 0;
+		return (0);
+	}
+	if (error = copyout((caddr_t)&filehead, where, sizeof(filehead)))
+		return (error);
+	buflen -= sizeof(filehead);
+	where += sizeof(filehead);
+
+	/*
+	 * followed by an array of file structures
+	 */
+	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
+		if (buflen < sizeof(struct file)) {
+			*sizep = where - start;
+			return (ENOMEM);
+		}
+		if (error = copyout((caddr_t)fp, where, sizeof (struct file)))
+			return (error);
+		buflen -= sizeof(struct file);
+		where += sizeof(struct file);
+	}
+	*sizep = where - start;
+	return (0);
+}
+
+/*
+ * try over estimating by 5 procs
+ */
+#define KERN_PROCSLOP	(5 * sizeof (struct kinfo_proc))
+
+sysctl_doproc(name, namelen, where, sizep)
+	int *name;
+	u_int namelen;
+	char *where;
+	size_t *sizep;
+{
+	register struct proc *p;
+	register struct kinfo_proc *dp = (struct kinfo_proc *)where;
+	register int needed = 0;
+	int buflen = where != NULL ? *sizep : 0;
+	int doingzomb;
+	struct eproc eproc;
+	int error = 0;
+
+	if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
+		return (EINVAL);
+	p = allproc.lh_first;
+	doingzomb = 0;
+again:
+	for (; p != 0; p = p->p_list.le_next) {
+		/*
+		 * Skip embryonic processes.
+		 */
+		if (p->p_stat == SIDL)
+			continue;
+		/*
+		 * TODO - make more efficient (see notes below).
+		 * do by session.
+		 */
+		switch (name[0]) {
+
+		case KERN_PROC_PID:
+			/* could do this with just a lookup */
+			if (p->p_pid != (pid_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_PGRP:
+			/* could do this by traversing pgrp */
+			if (p->p_pgrp->pg_id != (pid_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_TTY:
+			if ((p->p_flag & P_CONTROLT) == 0 ||
+			    p->p_session->s_ttyp == NULL ||
+			    p->p_session->s_ttyp->t_dev != (dev_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_UID:
+			if (p->p_ucred->cr_uid != (uid_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_RUID:
+			if (p->p_cred->p_ruid != (uid_t)name[1])
+				continue;
+			break;
+		}
+		if (buflen >= sizeof(struct kinfo_proc)) {
+			fill_eproc(p, &eproc);
+			if (error = copyout((caddr_t)p, &dp->kp_proc,
+			    sizeof(struct proc)))
+				return (error);
+			if (error = copyout((caddr_t)&eproc, &dp->kp_eproc,
+			    sizeof(eproc)))
+				return (error);
+			dp++;
+			buflen -= sizeof(struct kinfo_proc);
+		}
+		needed += sizeof(struct kinfo_proc);
+	}
+	if (doingzomb == 0) {
+		p = zombproc.lh_first;
+		doingzomb++;
+		goto again;
+	}
+	if (where != NULL) {
+		*sizep = (caddr_t)dp - where;
+		if (needed > *sizep)
+			return (ENOMEM);
+	} else {
+		needed += KERN_PROCSLOP;
+		*sizep = needed;
+	}
+	return (0);
+}
+
+/*
+ * Fill in an eproc structure for the specified process.
+ */
+void
+fill_eproc(p, ep)
+	register struct proc *p;
+	register struct eproc *ep;
+{
+	register struct tty *tp;
+
+	ep->e_paddr = p;
+	ep->e_sess = p->p_pgrp->pg_session;
+	ep->e_pcred = *p->p_cred;
+	ep->e_ucred = *p->p_ucred;
+	if (p->p_stat == SIDL || p->p_stat == SZOMB) {
+		ep->e_vm.vm_rssize = 0;
+		ep->e_vm.vm_tsize = 0;
+		ep->e_vm.vm_dsize = 0;
+		ep->e_vm.vm_ssize = 0;
+#ifndef sparc
+		/* ep->e_vm.vm_pmap = XXX; */
+#endif
+	} else {
+		register struct vmspace *vm = p->p_vmspace;
+
+#ifdef pmap_resident_count
+		ep->e_vm.vm_rssize = pmap_resident_count(&vm->vm_pmap); /*XXX*/
+#else
+		ep->e_vm.vm_rssize = vm->vm_rssize;
+#endif
+		ep->e_vm.vm_tsize = vm->vm_tsize;
+		ep->e_vm.vm_dsize = vm->vm_dsize;
+		ep->e_vm.vm_ssize = vm->vm_ssize;
+#ifndef sparc
+		ep->e_vm.vm_pmap = vm->vm_pmap;
+#endif
+	}
+	if (p->p_pptr)
+		ep->e_ppid = p->p_pptr->p_pid;
+	else
+		ep->e_ppid = 0;
+	ep->e_pgid = p->p_pgrp->pg_id;
+	ep->e_jobc = p->p_pgrp->pg_jobc;
+	if ((p->p_flag & P_CONTROLT) &&
+	     (tp = ep->e_sess->s_ttyp)) {
+		ep->e_tdev = tp->t_dev;
+		ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		ep->e_tsess = tp->t_session;
+	} else
+		ep->e_tdev = NODEV;
+	ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
+	if (SESS_LEADER(p))
+		ep->e_flag |= EPROC_SLEADER;
+	if (p->p_wmesg)
+		strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN);
+	ep->e_xsize = ep->e_xrssize = 0;
+	ep->e_xccount = ep->e_xswrss = 0;
+}
+
+#ifdef COMPAT_43
+#include <sys/socket.h>
+#define	KINFO_PROC		(0<<8)
+#define	KINFO_RT		(1<<8)
+#define	KINFO_VNODE		(2<<8)
+#define	KINFO_FILE		(3<<8)
+#define	KINFO_METER		(4<<8)
+#define	KINFO_LOADAVG		(5<<8)
+#define	KINFO_CLOCKRATE		(6<<8)
+
+compat_43_getkerninfo(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_getkerninfo_args /* {
+		syscallarg(int) op;
+		syscallarg(char *) where;
+		syscallarg(int *) size;
+		syscallarg(int) arg;
+	} */ *uap;
+	register_t *retval;
+{
+	int error, name[5];
+	size_t size;
+
+	if (SCARG(uap, size) && (error = copyin((caddr_t)SCARG(uap, size),
+	    (caddr_t)&size, sizeof(size))))
+		return (error);
+
+	switch (SCARG(uap, op) & 0xff00) {
+
+	case KINFO_RT:
+		name[0] = PF_ROUTE;
+		name[1] = 0;
+		name[2] = (SCARG(uap, op) & 0xff0000) >> 16;
+		name[3] = SCARG(uap, op) & 0xff;
+		name[4] = SCARG(uap, arg);
+		error =
+		    net_sysctl(name, 5, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	case KINFO_VNODE:
+		name[0] = KERN_VNODE;
+		error =
+		    kern_sysctl(name, 1, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	case KINFO_PROC:
+		name[0] = KERN_PROC;
+		name[1] = SCARG(uap, op) & 0xff;
+		name[2] = SCARG(uap, arg);
+		error =
+		    kern_sysctl(name, 3, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	case KINFO_FILE:
+		name[0] = KERN_FILE;
+		error =
+		    kern_sysctl(name, 1, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	case KINFO_METER:
+		name[0] = VM_METER;
+		error =
+		    vm_sysctl(name, 1, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	case KINFO_LOADAVG:
+		name[0] = VM_LOADAVG;
+		error =
+		    vm_sysctl(name, 1, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	case KINFO_CLOCKRATE:
+		name[0] = KERN_CLOCKRATE;
+		error =
+		    kern_sysctl(name, 1, SCARG(uap, where), &size, NULL, 0, p);
+		break;
+
+	default:
+		return (EOPNOTSUPP);
+	}
+	if (error)
+		return (error);
+	*retval = size;
+	if (SCARG(uap, size))
+		error = copyout((caddr_t)&size, (caddr_t)SCARG(uap, size),
+		    sizeof(size));
+	return (error);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
new file mode 100644
index 000000000000..f4facf6f9fa0
--- /dev/null
+++ b/sys/kern/kern_time.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_time.c	8.4 (Berkeley) 5/26/95
+ */
+
+#include <sys/param.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+#include <machine/cpu.h>
+
+/* 
+ * Time of day and interval timer support.
+ *
+ * These routines provide the kernel entry points to get and set
+ * the time-of-day and per-process interval timers.  Subroutines
+ * here provide support for adding and subtracting timeval structures
+ * and decrementing interval timers, optionally reloading the interval
+ * timers when they expire.
+ */
+
+/* ARGSUSED */
+int
+gettimeofday(p, uap, retval)
+	struct proc *p;
+	register struct gettimeofday_args /* {
+		syscallarg(struct timeval *) tp;
+		syscallarg(struct timezone *) tzp;
+	} */ *uap;
+	register_t *retval;
+{
+	struct timeval atv;
+	int error = 0;
+
+	if (SCARG(uap, tp)) {
+		microtime(&atv);
+		if (error = copyout((caddr_t)&atv, (caddr_t)SCARG(uap, tp),
+		    sizeof (atv)))
+			return (error);
+	}
+	if (SCARG(uap, tzp))
+		error = copyout((caddr_t)&tz, (caddr_t)SCARG(uap, tzp),
+		    sizeof (tz));
+	return (error);
+}
+
+/* ARGSUSED */
+int
+settimeofday(p, uap, retval)
+	struct proc *p;
+	struct settimeofday_args /* {
+		syscallarg(struct timeval *) tv;
+		syscallarg(struct timezone *) tzp;
+	} */ *uap;
+	register_t *retval;
+{
+	struct timeval atv, delta;
+	struct timezone atz;
+	int error, s;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	/* Verify all parameters before changing time. */
+	if (SCARG(uap, tv) && (error = copyin((caddr_t)SCARG(uap, tv),
+	    (caddr_t)&atv, sizeof(atv))))
+		return (error);
+	if (SCARG(uap, tzp) && (error = copyin((caddr_t)SCARG(uap, tzp),
+	    (caddr_t)&atz, sizeof(atz))))
+		return (error);
+	if (SCARG(uap, tv)) {
+		/*
+		 * If the system is secure, we do not allow the time to be 
+		 * set to an earlier value (it may be slowed using adjtime,
+		 * but not set back). This feature prevent interlopers from
+		 * setting arbitrary time stamps on files.
+		 */
+		if (securelevel > 0 && timercmp(&atv, &time, <))
+			return (EPERM);
+		/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
+		s = splclock();
+		/* nb. delta.tv_usec may be < 0, but this is OK here */
+		delta.tv_sec = atv.tv_sec - time.tv_sec;
+		delta.tv_usec = atv.tv_usec - time.tv_usec;
+		time = atv;
+		(void) splsoftclock();
+		timevaladd(&boottime, &delta);
+		timevalfix(&boottime);
+		timevaladd(&runtime, &delta);
+		timevalfix(&runtime);
+#		ifdef NFS
+			lease_updatetime(delta.tv_sec);
+#		endif
+		splx(s);
+		resettodr();
+	}
+	if (SCARG(uap, tzp))
+		tz = atz;
+	return (0);
+}
+
+extern	int tickadj;			/* "standard" clock skew, us./tick */
+int	tickdelta;			/* current clock skew, us. per tick */
+long	timedelta;			/* unapplied time correction, us. */
+long	bigadj = 1000000;		/* use 10x skew above bigadj us. */
+
+/* ARGSUSED */
+int
+adjtime(p, uap, retval)
+	struct proc *p;
+	register struct adjtime_args /* {
+		syscallarg(struct timeval *) delta;
+		syscallarg(struct timeval *) olddelta;
+	} */ *uap;
+	register_t *retval;
+{
+	struct timeval atv;
+	register long ndelta, ntickdelta, odelta;
+	int s, error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	if (error = copyin((caddr_t)SCARG(uap, delta), (caddr_t)&atv,
+	    sizeof(struct timeval)))
+		return (error);
+
+	/*
+	 * Compute the total correction and the rate at which to apply it.
+	 * Round the adjustment down to a whole multiple of the per-tick
+	 * delta, so that after some number of incremental changes in
+	 * hardclock(), tickdelta will become zero, lest the correction
+	 * overshoot and start taking us away from the desired final time.
+	 */
+	ndelta = atv.tv_sec * 1000000 + atv.tv_usec;
+	if (ndelta > bigadj)
+		ntickdelta = 10 * tickadj;
+	else
+		ntickdelta = tickadj;
+	if (ndelta % ntickdelta)
+		ndelta = ndelta / ntickdelta * ntickdelta;
+
+	/*
+	 * To make hardclock()'s job easier, make the per-tick delta negative
+	 * if we want time to run slower; then hardclock can simply compute
+	 * tick + tickdelta, and subtract tickdelta from timedelta.
+	 */
+	if (ndelta < 0)
+		ntickdelta = -ntickdelta;
+	s = splclock();
+	odelta = timedelta;
+	timedelta = ndelta;
+	tickdelta = ntickdelta;
+	splx(s);
+
+	if (SCARG(uap, olddelta)) {
+		atv.tv_sec = odelta / 1000000;
+		atv.tv_usec = odelta % 1000000;
+		(void) copyout((caddr_t)&atv, (caddr_t)SCARG(uap, olddelta),
+		    sizeof(struct timeval));
+	}
+	return (0);
+}
+
+/*
+ * Get value of an interval timer.  The process virtual and
+ * profiling virtual time timers are kept in the p_stats area, since
+ * they can be swapped out.  These are kept internally in the
+ * way they are specified externally: in time until they expire.
+ *
+ * The real time interval timer is kept in the process table slot
+ * for the process, and its value (it_value) is kept as an
+ * absolute time rather than as a delta, so that it is easy to keep
+ * periodic real-time signals from drifting.
+ *
+ * Virtual time timers are processed in the hardclock() routine of
+ * kern_clock.c.  The real time timer is processed by a timeout
+ * routine, called from the softclock() routine.  Since a callout
+ * may be delayed in real time due to interrupt processing in the system,
+ * it is possible for the real time timeout routine (realitexpire, given below),
+ * to be delayed in real time past when it is supposed to occur.  It
+ * does not suffice, therefore, to reload the real timer .it_value from the
+ * real time timers .it_interval.  Rather, we compute the next time in
+ * absolute time the timer should go off.
+ */
+/* ARGSUSED */
+int
+getitimer(p, uap, retval)
+	struct proc *p;
+	register struct getitimer_args /* {
+		syscallarg(u_int) which;
+		syscallarg(struct itimerval *) itv;
+	} */ *uap;
+	register_t *retval;
+{
+	struct itimerval aitv;
+	int s;
+
+	if (SCARG(uap, which) > ITIMER_PROF)
+		return (EINVAL);
+	s = splclock();
+	if (SCARG(uap, which) == ITIMER_REAL) {
+		/*
+		 * Convert from absolute to relative time in .it_value
+		 * part of real time timer.  If time for real time timer
+		 * has passed return 0, else return difference between
+		 * current time and time for the timer to go off.
+		 */
+		aitv = p->p_realtimer;
+		if (timerisset(&aitv.it_value))
+			if (timercmp(&aitv.it_value, &time, <))
+				timerclear(&aitv.it_value);
+			else
+				timevalsub(&aitv.it_value,
+				    (struct timeval *)&time);
+	} else
+		aitv = p->p_stats->p_timer[SCARG(uap, which)];
+	splx(s);
+	return (copyout((caddr_t)&aitv, (caddr_t)SCARG(uap, itv),
+	    sizeof (struct itimerval)));
+}
+
+/* ARGSUSED */
+int
+setitimer(p, uap, retval)
+	struct proc *p;
+	register struct setitimer_args /* {
+		syscallarg(u_int) which;
+		syscallarg(struct itimerval *) itv;
+		syscallarg(struct itimerval *) oitv;
+	} */ *uap;
+	register_t *retval;
+{
+	struct itimerval aitv;
+	register struct itimerval *itvp;
+	int s, error;
+
+	if (SCARG(uap, which) > ITIMER_PROF)
+		return (EINVAL);
+	itvp = SCARG(uap, itv);
+	if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
+	    sizeof(struct itimerval))))
+		return (error);
+	if ((SCARG(uap, itv) = SCARG(uap, oitv)) &&
+	    (error = getitimer(p, uap, retval)))
+		return (error);
+	if (itvp == 0)
+		return (0);
+	if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval))
+		return (EINVAL);
+	s = splclock();
+	if (SCARG(uap, which) == ITIMER_REAL) {
+		untimeout(realitexpire, (caddr_t)p);
+		if (timerisset(&aitv.it_value)) {
+			timevaladd(&aitv.it_value, (struct timeval *)&time);
+			timeout(realitexpire, (caddr_t)p, hzto(&aitv.it_value));
+		}
+		p->p_realtimer = aitv;
+	} else
+		p->p_stats->p_timer[SCARG(uap, which)] = aitv;
+	splx(s);
+	return (0);
+}
+
+/*
+ * Real interval timer expired:
+ * send process whose timer expired an alarm signal.
+ * If time is not set up to reload, then just return.
+ * Else compute next time timer should go off which is > current time.
+ * This is where delay in processing this timeout causes multiple
+ * SIGALRM calls to be compressed into one.
+ */
+void
+realitexpire(arg)
+	void *arg;
+{
+	register struct proc *p;
+	int s;
+
+	p = (struct proc *)arg;
+	psignal(p, SIGALRM);
+	if (!timerisset(&p->p_realtimer.it_interval)) {
+		timerclear(&p->p_realtimer.it_value);
+		return;
+	}
+	for (;;) {
+		s = splclock();
+		timevaladd(&p->p_realtimer.it_value,
+		    &p->p_realtimer.it_interval);
+		if (timercmp(&p->p_realtimer.it_value, &time, >)) {
+			timeout(realitexpire, (caddr_t)p,
+			    hzto(&p->p_realtimer.it_value));
+			splx(s);
+			return;
+		}
+		splx(s);
+	}
+}
+
+/*
+ * Check that a proposed value to load into the .it_value or
+ * .it_interval part of an interval timer is acceptable, and
+ * fix it to have at least minimal value (i.e. if it is less
+ * than the resolution of the clock, round it up.)
+ */
+int
+itimerfix(tv)
+	struct timeval *tv;
+{
+
+	if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
+	    tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+		return (EINVAL);
+	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
+		tv->tv_usec = tick;
+	return (0);
+}
+
+/*
+ * Decrement an interval timer by a specified number
+ * of microseconds, which must be less than a second,
+ * i.e. < 1000000.  If the timer expires, then reload
+ * it.  In this case, carry over (usec - old value) to
+ * reduce the value reloaded into the timer so that
+ * the timer does not drift.  This routine assumes
+ * that it is called in a context where the timers
+ * on which it is operating cannot change in value.
+ */
+int
+itimerdecr(itp, usec)
+	register struct itimerval *itp;
+	int usec;
+{
+
+	if (itp->it_value.tv_usec < usec) {
+		if (itp->it_value.tv_sec == 0) {
+			/* expired, and already in next interval */
+			usec -= itp->it_value.tv_usec;
+			goto expire;
+		}
+		itp->it_value.tv_usec += 1000000;
+		itp->it_value.tv_sec--;
+	}
+	itp->it_value.tv_usec -= usec;
+	usec = 0;
+	if (timerisset(&itp->it_value))
+		return (1);
+	/* expired, exactly at end of interval */
+expire:
+	if (timerisset(&itp->it_interval)) {
+		itp->it_value = itp->it_interval;
+		itp->it_value.tv_usec -= usec;
+		if (itp->it_value.tv_usec < 0) {
+			itp->it_value.tv_usec += 1000000;
+			itp->it_value.tv_sec--;
+		}
+	} else
+		itp->it_value.tv_usec = 0;		/* sec is already 0 */
+	return (0);
+}
+
+/*
+ * Add and subtract routines for timevals.
+ * N.B.: subtract routine doesn't deal with
+ * results which are before the beginning,
+ * it just gets very confused in this case.
+ * Caveat emptor.
+ */
+timevaladd(t1, t2)
+	struct timeval *t1, *t2;
+{
+
+	t1->tv_sec += t2->tv_sec;
+	t1->tv_usec += t2->tv_usec;
+	timevalfix(t1);
+}
+
+timevalsub(t1, t2)
+	struct timeval *t1, *t2;
+{
+
+	t1->tv_sec -= t2->tv_sec;
+	t1->tv_usec -= t2->tv_usec;
+	timevalfix(t1);
+}
+
+timevalfix(t1)
+	struct timeval *t1;
+{
+
+	if (t1->tv_usec < 0) {
+		t1->tv_sec--;
+		t1->tv_usec += 1000000;
+	}
+	if (t1->tv_usec >= 1000000) {
+		t1->tv_sec++;
+		t1->tv_usec -= 1000000;
+	}
+}
diff --git a/sys/kern/kern_xxx.c b/sys/kern/kern_xxx.c
new file mode 100644
index 000000000000..caa1cdd10c00
--- /dev/null
+++ b/sys/kern/kern_xxx.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_xxx.c	8.3 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+/* ARGSUSED */
+int
+reboot(p, uap, retval)
+	struct proc *p;
+	struct reboot_args /* {
+		syscallarg(int) opt;
+	} */ *uap;
+	register_t *retval;
+{
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	boot(SCARG(uap, opt));
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+/* ARGSUSED */
+int
+compat_43_gethostname(p, uap, retval)
+	struct proc *p;
+	struct compat_43_gethostname_args /* {
+		syscallarg(char *) hostname;
+		syscallarg(u_int) len;
+	} */ *uap;
+	register_t *retval;
+{
+	int name;
+
+	name = KERN_HOSTNAME;
+	return (kern_sysctl(&name, 1, SCARG(uap, hostname), &SCARG(uap, len),
+	    0, 0));
+}
+
+/* ARGSUSED */
+int
+compat_43_sethostname(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_sethostname_args /* {
+		syscallarg(char *) hostname;
+		syscallarg(u_int) len;
+	} */ *uap;
+	register_t *retval;
+{
+	int name;
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	name = KERN_HOSTNAME;
+	return (kern_sysctl(&name, 1, 0, 0, SCARG(uap, hostname),
+	    SCARG(uap, len)));
+}
+
+/* ARGSUSED */
+int
+compat_43_gethostid(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	*(int32_t *)retval = hostid;
+	return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+#ifdef COMPAT_43
+/* ARGSUSED */
+int
+compat_43_sethostid(p, uap, retval)
+	struct proc *p;
+	struct compat_43_sethostid_args /* {
+		syscallarg(int32_t) hostid;
+	} */ *uap;
+	register_t *retval;
+{
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	hostid = SCARG(uap, hostid);
+	return (0);
+}
+
+int
+compat_43_quota(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+
+	return (ENOSYS);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh
new file mode 100644
index 000000000000..4e2c28c44e02
--- /dev/null
+++ b/sys/kern/makesyscalls.sh
@@ -0,0 +1,365 @@
+#! /bin/sh -
+#
+#	@(#)makesyscalls.sh	8.2 (Berkeley) 2/14/95
+
+set -e
+
+case $# in
+    2)	;;
+    *)	echo "Usage: $0 config-file input-file" 1>&2
+	exit 1
+	;;
+esac
+
+# source the config file.
+. $1
+
+# the config file sets the following variables:
+#	sysnames	the syscall names file
+#	sysnumhdr	the syscall numbers file
+#	syssw		the syscall switch file
+#	sysarghdr	the syscall argument struct definitions
+#	compatopts	those syscall types that are for 'compat' syscalls
+#	switchname	the name for the 'struct sysent' we define
+#	namesname	the name for the 'char *[]' we define
+#	constprefix	the prefix for the system call constants
+#
+# NOTE THAT THIS makesyscalls.sh DOES NOT SUPPORT 'LIBCOMPAT'.
+
+# tmp files:
+sysdcl="sysent.dcl"
+syscompat_pref="sysent."
+sysent="sysent.switch"
+
+syscompat_files=""
+for file in $compatopts; do
+	syscompat_files="$syscompat_files $syscompat_pref$file"
+done
+
+trap "rm $sysdcl $syscompat_files $sysent" 0
+
+# Awk program (must support nawk extensions)
+# Use "awk" at Berkeley, "nawk" or "gawk" elsewhere.
+awk=${AWK:-awk}
+
+# Does this awk have a "toupper" function? (i.e. is it GNU awk)
+isgawk=`$awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null`
+
+# If this awk does not define "toupper" then define our own.
+if [ "$isgawk" = TRUE ] ; then
+	# GNU awk provides it.
+	toupper=
+else
+	# Provide our own toupper()
+	toupper='
+function toupper(str) {
+	_toupper_cmd = "echo "str" |tr a-z A-Z"
+	_toupper_cmd | getline _toupper_str;
+	close(_toupper_cmd);
+	return _toupper_str;
+}'
+fi
+
+# before handing it off to awk, make a few adjustments:
+#	(1) insert spaces around {, }, (, ), *, and commas.
+#	(2) get rid of any and all dollar signs (so that rcs id use safe)
+#
+# The awk script will deal with blank lines and lines that
+# start with the comment character (';').
+
+sed -e '
+s/\$//g
+:join
+	/\\$/{a\
+
+	N
+	s/\\\n//
+	b join
+	}
+2,${
+	/^#/!s/\([{}()*,]\)/ \1 /g
+}
+' < $2 | $awk "
+$toupper
+BEGIN {
+	sysnames = \"$sysnames\"
+	sysnumhdr = \"$sysnumhdr\"
+	sysarghdr = \"$sysarghdr\"
+	switchname = \"$switchname\"
+	namesname = \"$namesname\"
+	constprefix = \"$constprefix\"
+
+	sysdcl = \"$sysdcl\"
+	syscompat_pref = \"$syscompat_pref\"
+	sysent = \"$sysent\"
+	infile = \"$2\"
+
+	compatopts = \"$compatopts\"
+	"'
+
+	printf "/*\n * System call switch table.\n *\n" > sysdcl
+	printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysdcl
+
+	ncompat = split(compatopts,compat)
+	for (i = 1; i <= ncompat; i++) {
+		compat_upper[i] = toupper(compat[i])
+		compat_file[i] = sprintf("%s%s", syscompat_pref, compat[i])
+
+		printf "\n#ifdef %s\n", compat_upper[i] > compat_file[i]
+		printf "#define %s(func) __CONCAT(%s_,func)\n\n", \
+		    compat[i], compat[i] > compat_file[i]
+	}
+
+	printf "/*\n * System call names.\n *\n" > sysnames
+	printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames
+
+	printf "/*\n * System call numbers.\n *\n" > sysnumhdr
+	printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnumhdr
+
+	printf "/*\n * System call argument lists.\n *\n" > sysarghdr
+	printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysarghdr
+}
+NR == 1 {
+	printf " * created from%s\n */\n\n", $0 > sysdcl
+
+	printf "#define\ts(type)\tsizeof(type)\n\n" > sysent
+	printf "struct sysent %s[] = {\n",switchname > sysent
+
+	printf " * created from%s\n */\n\n", $0 > sysnames
+	printf "char *%s[] = {\n",namesname > sysnames
+
+	printf " * created from%s\n */\n\n", $0 > sysnumhdr
+
+	printf " * created from%s\n */\n\n", $0 > sysarghdr
+	printf "#define\tsyscallarg(x)\tunion { x datum; register_t pad; }\n" \
+		> sysarghdr
+	next
+}
+NF == 0 || $1 ~ /^;/ {
+	next
+}
+$1 ~ /^#[ 	]*include/ {
+	print > sysdcl
+	next
+}
+$1 ~ /^#[ 	]*if/ {
+	print > sysent
+	print > sysdcl
+	for (i = 1; i <= ncompat; i++)
+		print > compat_file[i]
+	print > sysnames
+	savesyscall = syscall
+	next
+}
+$1 ~ /^#[ 	]*else/ {
+	print > sysent
+	print > sysdcl
+	for (i = 1; i <= ncompat; i++)
+		print > compat_file[i]
+	print > sysnames
+	syscall = savesyscall
+	next
+}
+$1 ~ /^#/ {
+	print > sysent
+	print > sysdcl
+	for (i = 1; i <= ncompat; i++)
+		print > compat_file[i]
+	print > sysnames
+	next
+}
+syscall != $1 {
+	printf "%s: line %d: syscall number out of sync at %d\n", \
+	   infile, NR, syscall
+	printf "line is:\n"
+	print
+	exit 1
+}
+function parserr(was, wanted) {
+	printf "%s: line %d: unexpected %s (expected %s)\n", \
+	    infile, NR, was, wanted
+	exit 1
+}
+function parseline() {
+	f=3			# toss number and type
+	if ($NF != "}") {
+		funcalias=$NF
+		end=NF-1
+	} else {
+		funcalias=""
+		end=NF
+	}
+	if ($f != "{")
+		parserr($f, "{")
+	f++
+	if ($end != "}")
+		parserr($end, "}")
+	end--
+	if ($end != ";")
+		parserr($end, ";")
+	end--
+	if ($end != ")")
+		parserr($end, ")")
+	end--
+
+	f++			# toss return type
+
+	funcname=$f
+	if (funcalias == "")
+		funcalias=funcname
+	f++
+
+	if ($f != "(")
+		parserr($f, ")")
+	f++
+
+	argc= 0;
+	if (f == end) {
+		if ($f != "void")
+			parserr($f, "argument definition")
+		return
+	}
+
+	while (f <= end) {
+		argc++
+		argtype[argc]=""
+		oldf=""
+		while (f < end && $(f+1) != ",") {
+			if (argtype[argc] != "" && oldf != "*")
+				argtype[argc] = argtype[argc]" ";
+			argtype[argc] = argtype[argc]$f;
+			oldf = $f;
+			f++
+		}
+		if (argtype[argc] == "")
+			parserr($f, "argument definition")
+		argname[argc]=$f;
+		f += 2;			# skip name, and any comma
+	}
+}
+function putent(nodefs, declfile, compatwrap) {
+	# output syscall declaration for switch table
+	if (compatwrap == "")
+		printf("int\t%s();\n", funcname) > declfile
+	else
+		printf("int\t%s(%s)();\n", compatwrap, funcname) > declfile
+
+	# output syscall switch entry
+#	printf("\t{ { %d", argc) > sysent
+#	for (i = 1; i <= argc; i++) {
+#		if (i == 5) 		# wrap the line
+#			printf(",\n\t    ") > sysent
+#		else
+#			printf(", ") > sysent
+#		printf("s(%s)", argtypenospc[i]) > sysent
+#	}
+	printf("\t{ %d, ", argc) > sysent
+	if (argc == 0)
+		printf("0") > sysent
+	else if (compatwrap == "")
+		printf("s(struct %s_args)", funcname) > sysent
+	else
+		printf("s(struct %s_%s_args)", compatwrap, funcname) > sysent
+	if (compatwrap == "")
+		wfn = sprintf("%s", funcname);
+	else
+		wfn = sprintf("%s(%s)", compatwrap, funcname);
+	printf(",\n\t    %s },", wfn) > sysent
+	for (i = 0; i < (33 - length(wfn)) / 8; i++)
+		printf("\t") > sysent
+	if (compatwrap == "")
+		printf("/* %d = %s */\n", syscall, funcalias) > sysent
+	else
+		printf("/* %d = %s %s */\n", syscall, compatwrap,
+		    funcalias) > sysent
+
+	# output syscall name for names table
+	if (compatwrap == "")
+		printf("\t\"%s\",\t\t\t/* %d = %s */\n", funcalias, syscall,
+		    funcalias) > sysnames
+	else
+		printf("\t\"%s_%s\",\t/* %d = %s %s */\n", compatwrap,
+		    funcalias, syscall, compatwrap, funcalias) > sysnames
+
+	# output syscall number of header, if appropriate
+	if (nodefs == "" || nodefs == "NOARGS")
+		printf("#define\t%s%s\t%d\n", constprefix, funcalias,
+		    syscall) > sysnumhdr
+	else if (nodefs != "NODEF")
+		printf("\t\t\t\t/* %d is %s %s */\n", syscall,
+		    compatwrap, funcalias) > sysnumhdr
+
+	# output syscall argument structure, if it has arguments
+	if (argc != 0 && nodefs != "NOARGS") {
+		if (compatwrap == "")
+			printf("\nstruct %s_args {\n", funcname) > sysarghdr
+		else
+			printf("\nstruct %s_%s_args {\n", compatwrap,
+			    funcname) > sysarghdr
+		for (i = 1; i <= argc; i++)
+			printf("\tsyscallarg(%s) %s;\n", argtype[i],
+			    argname[i]) > sysarghdr
+		printf("};\n") > sysarghdr
+	}
+}
+$2 == "STD" {
+	parseline()
+	putent("", sysdcl, "")
+	syscall++
+	next
+}
+$2 == "NODEF" || $2 == "NOARGS" {
+	parseline()
+	putent($2, sysdcl, "")
+	syscall++
+	next
+}
+$2 == "OBSOL" || $2 == "UNIMPL" {
+	if ($2 == "OBSOL")
+		comment="obsolete"
+	else
+		comment="unimplemented"
+	for (i = 3; i <= NF; i++)
+		comment=comment " " $i
+
+	printf("\t{ 0, 0,\n\t    nosys },\t\t\t\t/* %d = %s */\n", \
+	    syscall, comment) > sysent
+	printf("\t\"#%d (%s)\",\t\t/* %d = %s */\n", \
+	    syscall, comment, syscall, comment) > sysnames
+	if ($2 != "UNIMPL")
+		printf("\t\t\t\t/* %d is %s */\n", syscall, comment) > sysnumhdr
+	syscall++
+	next
+}
+{
+	for (i = 1; i <= ncompat; i++) {
+		if ($2 == compat_upper[i]) {
+			parseline();
+			putent("COMMENT", compat_file[i], compat[i])
+			syscall++
+			next
+		}
+	}
+	printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2
+	exit 1
+}
+END {
+	printf "\n#undef\tsyscallarg\n" > sysarghdr
+
+        for (i = 1; i <= ncompat; i++) {
+		printf("\n#else /* %s */\n", compat_upper[i]) > compat_file[i]
+		printf("#define %s(func) nosys\n", compat[i]) > \
+		    compat_file[i]
+		printf("#endif /* %s */\n\n", compat_upper[i]) > compat_file[i]
+        }
+
+	printf("};\n\n") > sysent
+	printf("int\tn%s= sizeof(%s) / sizeof(%s[0]);\n", switchname,
+	    switchname, switchname) > sysent
+
+	printf("};\n") > sysnames
+} '
+
+cat $sysdcl $syscompat_files $sysent > $syssw
+
+#chmod 444 $sysnames $syshdr $syssw
diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c
new file mode 100644
index 000000000000..728133978adb
--- /dev/null
+++ b/sys/kern/subr_autoconf.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratories.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_autoconf.c	8.3 (Berkeley) 5/17/94
+ *
+ * from: $Header: subr_autoconf.c,v 1.12 93/02/01 19:31:48 torek Exp $ (LBL)
+ */
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+#include <libkern/libkern.h>
+
+/*
+ * Autoconfiguration subroutines.
+ */
+
+/*
+ * ioconf.c exports exactly two names: cfdata and cfroots.  All system
+ * devices and drivers are found via these tables.
+ */
+extern struct cfdata cfdata[];
+extern short cfroots[];
+
+#define	ROOT ((struct device *)NULL)
+
+struct matchinfo {
+	cfmatch_t fn;
+	struct	device *parent;
+	void	*aux;
+	struct	cfdata *match;
+	int	pri;
+};
+
+/*
+ * Apply the matching function and choose the best.  This is used
+ * a few times and we want to keep the code small.
+ */
+static void
+mapply(m, cf)
+	register struct matchinfo *m;
+	register struct cfdata *cf;
+{
+	register int pri;
+
+	if (m->fn != NULL)
+		pri = (*m->fn)(m->parent, cf, m->aux);
+	else
+		pri = (*cf->cf_driver->cd_match)(m->parent, cf, m->aux);
+	if (pri > m->pri) {
+		m->match = cf;
+		m->pri = pri;
+	}
+}
+
+/*
+ * Iterate over all potential children of some device, calling the given
+ * function (default being the child's match function) for each one.
+ * Nonzero returns are matches; the highest value returned is considered
+ * the best match.  Return the `found child' if we got a match, or NULL
+ * otherwise.  The `aux' pointer is simply passed on through.
+ *
+ * Note that this function is designed so that it can be used to apply
+ * an arbitrary function to all potential children (its return value
+ * can be ignored).
+ */
+struct cfdata *
+config_search(fn, parent, aux)
+	cfmatch_t fn;
+	register struct device *parent;
+	void *aux;
+{
+	register struct cfdata *cf;
+	register short *p;
+	struct matchinfo m;
+
+	m.fn = fn;
+	m.parent = parent;
+	m.aux = aux;
+	m.match = NULL;
+	m.pri = 0;
+	for (cf = cfdata; cf->cf_driver; cf++) {
+		/*
+		 * Skip cf if no longer eligible, otherwise scan through
+		 * parents for one matching `parent', and try match function.
+		 */
+		if (cf->cf_fstate == FSTATE_FOUND)
+			continue;
+		for (p = cf->cf_parents; *p >= 0; p++)
+			if (parent->dv_cfdata == &cfdata[*p])
+				mapply(&m, cf);
+	}
+	return (m.match);
+}
+
+/*
+ * Find the given root device.
+ * This is much like config_search, but there is no parent.
+ */
+struct cfdata *
+config_rootsearch(fn, rootname, aux)
+	register cfmatch_t fn;
+	register char *rootname;
+	register void *aux;
+{
+	register struct cfdata *cf;
+	register short *p;
+	struct matchinfo m;
+
+	m.fn = fn;
+	m.parent = ROOT;
+	m.aux = aux;
+	m.match = NULL;
+	m.pri = 0;
+	/*
+	 * Look at root entries for matching name.  We do not bother
+	 * with found-state here since only one root should ever be
+	 * searched (and it must be done first).
+	 */
+	for (p = cfroots; *p >= 0; p++) {
+		cf = &cfdata[*p];
+		if (strcmp(cf->cf_driver->cd_name, rootname) == 0)
+			mapply(&m, cf);
+	}
+	return (m.match);
+}
+
+static char *msgs[3] = { "", " not configured\n", " unsupported\n" };
+
+/*
+ * The given `aux' argument describes a device that has been found
+ * on the given parent, but not necessarily configured.  Locate the
+ * configuration data for that device (using the cd_match configuration
+ * driver function) and attach it, and return true.  If the device was
+ * not configured, call the given `print' function and return 0.
+ */
+int
+config_found(parent, aux, print)
+	struct device *parent;
+	void *aux;
+	cfprint_t print;
+{
+	struct cfdata *cf;
+
+	if ((cf = config_search((cfmatch_t)NULL, parent, aux)) != NULL) {
+		config_attach(parent, cf, aux, print);
+		return (1);
+	}
+	printf(msgs[(*print)(aux, parent->dv_xname)]);
+	return (0);
+}
+
+/*
+ * As above, but for root devices.
+ */
+int
+config_rootfound(rootname, aux)
+	char *rootname;
+	void *aux;
+{
+	struct cfdata *cf;
+
+	if ((cf = config_rootsearch((cfmatch_t)NULL, rootname, aux)) != NULL) {
+		config_attach(ROOT, cf, aux, (cfprint_t)NULL);
+		return (1);
+	}
+	printf("root device %s not configured\n", rootname);
+	return (0);
+}
+
+/* just like sprintf(buf, "%d") except that it works from the end */
+static char *
+number(ep, n)
+	register char *ep;
+	register int n;
+{
+
+	*--ep = 0;
+	while (n >= 10) {
+		*--ep = (n % 10) + '0';
+		n /= 10;
+	}
+	*--ep = n + '0';
+	return (ep);
+}
+
+/*
+ * Attach a found device.  Allocates memory for device variables.
+ */
+void
+config_attach(parent, cf, aux, print)
+	register struct device *parent;
+	register struct cfdata *cf;
+	register void *aux;
+	cfprint_t print;
+{
+	register struct device *dev;
+	register struct cfdriver *cd;
+	register size_t lname, lunit;
+	register char *xunit;
+	int myunit;
+	char num[10];
+	static struct device **nextp = &alldevs;
+
+	cd = cf->cf_driver;
+	if (cd->cd_devsize < sizeof(struct device))
+		panic("config_attach");
+	myunit = cf->cf_unit;
+	if (cf->cf_fstate == FSTATE_NOTFOUND)
+		cf->cf_fstate = FSTATE_FOUND;
+	else
+		cf->cf_unit++;
+
+	/* compute length of name and decimal expansion of unit number */
+	lname = strlen(cd->cd_name);
+	xunit = number(&num[sizeof num], myunit);
+	lunit = &num[sizeof num] - xunit;
+	if (lname + lunit >= sizeof(dev->dv_xname))
+		panic("config_attach: device name too long");
+
+	/* get memory for all device vars */
+	dev = (struct device *)malloc(cd->cd_devsize, M_DEVBUF, M_WAITOK);
+					/* XXX cannot wait! */
+	bzero(dev, cd->cd_devsize);
+	*nextp = dev;			/* link up */
+	nextp = &dev->dv_next;
+	dev->dv_class = cd->cd_class;
+	dev->dv_cfdata = cf;
+	dev->dv_unit = myunit;
+	bcopy(cd->cd_name, dev->dv_xname, lname);
+	bcopy(xunit, dev->dv_xname + lname, lunit);
+	dev->dv_parent = parent;
+	if (parent == ROOT)
+		printf("%s (root)", dev->dv_xname);
+	else {
+		printf("%s at %s", dev->dv_xname, parent->dv_xname);
+		(void) (*print)(aux, (char *)0);
+	}
+
+	/* put this device in the devices array */
+	if (dev->dv_unit >= cd->cd_ndevs) {
+		/*
+		 * Need to expand the array.
+		 */
+		int old = cd->cd_ndevs, oldbytes, new, newbytes;
+		void **nsp;
+
+		if (old == 0) {
+			new = max(MINALLOCSIZE / sizeof(void *),
+			    dev->dv_unit + 1);
+			newbytes = new * sizeof(void *);
+			nsp = malloc(newbytes, M_DEVBUF, M_WAITOK);	/*XXX*/
+			bzero(nsp, newbytes);
+		} else {
+			new = cd->cd_ndevs;
+			do {
+				new *= 2;
+			} while (new <= dev->dv_unit);
+			oldbytes = old * sizeof(void *);
+			newbytes = new * sizeof(void *);
+			nsp = malloc(newbytes, M_DEVBUF, M_WAITOK);	/*XXX*/
+			bcopy(cd->cd_devs, nsp, oldbytes);
+			bzero(&nsp[old], newbytes - oldbytes);
+			free(cd->cd_devs, M_DEVBUF);
+		}
+		cd->cd_ndevs = new;
+		cd->cd_devs = nsp;
+	}
+	if (cd->cd_devs[dev->dv_unit])
+		panic("config_attach: duplicate %s", dev->dv_xname);
+	cd->cd_devs[dev->dv_unit] = dev;
+
+	/*
+	 * Before attaching, clobber any unfound devices that are
+	 * otherwise identical.
+	 */
+	for (cf = cfdata; cf->cf_driver; cf++)
+		if (cf->cf_driver == cd && cf->cf_unit == dev->dv_unit &&
+		    cf->cf_fstate == FSTATE_NOTFOUND)
+			cf->cf_fstate = FSTATE_FOUND;
+	(*cd->cd_attach)(parent, dev, aux);
+}
+
+/*
+ * Attach an event.  These must come from initially-zero space (see
+ * commented-out assignments below), but that occurs naturally for
+ * device instance variables.
+ */
+void
+evcnt_attach(dev, name, ev)
+	struct device *dev;
+	const char *name;
+	struct evcnt *ev;
+{
+	static struct evcnt **nextp = &allevents;
+
+#ifdef DIAGNOSTIC
+	if (strlen(name) >= sizeof(ev->ev_name))
+		panic("evcnt_attach");
+#endif
+	/* ev->ev_next = NULL; */
+	ev->ev_dev = dev;
+	/* ev->ev_count = 0; */
+	strcpy(ev->ev_name, name);
+	*nextp = ev;
+	nextp = &ev->ev_next;
+}
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
new file mode 100644
index 000000000000..792a1cec1b20
--- /dev/null
+++ b/sys/kern/subr_log.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_log.c	8.3 (Berkeley) 2/14/95
+ */
+
+/*
+ * Error log buffer for kernel printf's.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/msgbuf.h>
+#include <sys/file.h>
+
+#define LOG_RDPRI	(PZERO + 1)
+
+#define LOG_ASYNC	0x04
+#define LOG_RDWAIT	0x08
+
+struct logsoftc {
+	int	sc_state;		/* see above for possibilities */
+	struct	selinfo sc_selp;	/* process waiting on select call */
+	int	sc_pgid;		/* process/group for async I/O */
+} logsoftc;
+
+int	log_open;			/* also used in log() */
+
+/*ARGSUSED*/
+int
+logopen(dev, flags, mode, p)
+	dev_t dev;
+	int flags, mode;
+	struct proc *p;
+{
+	register struct msgbuf *mbp = msgbufp;
+
+	if (log_open)
+		return (EBUSY);
+	log_open = 1;
+	logsoftc.sc_pgid = p->p_pid;		/* signal process only */
+	/*
+	 * Potential race here with putchar() but since putchar should be
+	 * called by autoconf, msg_magic should be initialized by the time
+	 * we get here.
+	 */
+	if (mbp->msg_magic != MSG_MAGIC) {
+		register int i;
+
+		mbp->msg_magic = MSG_MAGIC;
+		mbp->msg_bufx = mbp->msg_bufr = 0;
+		for (i=0; i < MSG_BSIZE; i++)
+			mbp->msg_bufc[i] = 0;
+	}
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+logclose(dev, flag, mode, p)
+	dev_t dev;
+	int flag, mode;
+	struct proc *p;
+{
+
+	log_open = 0;
+	logsoftc.sc_state = 0;
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+logread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct msgbuf *mbp = msgbufp;
+	register long l;
+	register int s;
+	int error = 0;
+
+	s = splhigh();
+	while (mbp->msg_bufr == mbp->msg_bufx) {
+		if (flag & IO_NDELAY) {
+			splx(s);
+			return (EWOULDBLOCK);
+		}
+		logsoftc.sc_state |= LOG_RDWAIT;
+		if (error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH,
+		    "klog", 0)) {
+			splx(s);
+			return (error);
+		}
+	}
+	splx(s);
+	logsoftc.sc_state &= ~LOG_RDWAIT;
+
+	while (uio->uio_resid > 0) {
+		l = mbp->msg_bufx - mbp->msg_bufr;
+		if (l < 0)
+			l = MSG_BSIZE - mbp->msg_bufr;
+		l = min(l, uio->uio_resid);
+		if (l == 0)
+			break;
+		error = uiomove((caddr_t)&mbp->msg_bufc[mbp->msg_bufr],
+			(int)l, uio);
+		if (error)
+			break;
+		mbp->msg_bufr += l;
+		if (mbp->msg_bufr < 0 || mbp->msg_bufr >= MSG_BSIZE)
+			mbp->msg_bufr = 0;
+	}
+	return (error);
+}
+
+/*ARGSUSED*/
+int
+logselect(dev, rw, p)
+	dev_t dev;
+	int rw;
+	struct proc *p;
+{
+	int s = splhigh();
+
+	switch (rw) {
+
+	case FREAD:
+		if (msgbufp->msg_bufr != msgbufp->msg_bufx) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &logsoftc.sc_selp);
+		break;
+	}
+	splx(s);
+	return (0);
+}
+
+void
+logwakeup()
+{
+	struct proc *p;
+
+	if (!log_open)
+		return;
+	selwakeup(&logsoftc.sc_selp);
+	if (logsoftc.sc_state & LOG_ASYNC) {
+		if (logsoftc.sc_pgid < 0)
+			gsignal(-logsoftc.sc_pgid, SIGIO); 
+		else if (p = pfind(logsoftc.sc_pgid))
+			psignal(p, SIGIO);
+	}
+	if (logsoftc.sc_state & LOG_RDWAIT) {
+		wakeup((caddr_t)msgbufp);
+		logsoftc.sc_state &= ~LOG_RDWAIT;
+	}
+}
+
+/*ARGSUSED*/
+int
+logioctl(dev, com, data, flag, p)
+	dev_t dev;
+	u_long com;
+	caddr_t data;
+	int flag;
+	struct proc *p;
+{
+	long l;
+	int s;
+
+	switch (com) {
+
+	/* return number of characters immediately available */
+	case FIONREAD:
+		s = splhigh();
+		l = msgbufp->msg_bufx - msgbufp->msg_bufr;
+		splx(s);
+		if (l < 0)
+			l += MSG_BSIZE;
+		*(int *)data = l;
+		break;
+
+	case FIONBIO:
+		break;
+
+	case FIOASYNC:
+		if (*(int *)data)
+			logsoftc.sc_state |= LOG_ASYNC;
+		else
+			logsoftc.sc_state &= ~LOG_ASYNC;
+		break;
+
+	case TIOCSPGRP:
+		logsoftc.sc_pgid = *(int *)data;
+		break;
+
+	case TIOCGPGRP:
+		*(int *)data = logsoftc.sc_pgid;
+		break;
+
+	default:
+		return (-1);
+	}
+	return (0);
+}
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
new file mode 100644
index 000000000000..8a9a44edaf39
--- /dev/null
+++ b/sys/kern/subr_prf.c
@@ -0,0 +1,606 @@
+/*-
+ * Copyright (c) 1986, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_prf.c	8.4 (Berkeley) 5/4/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/reboot.h>
+#include <sys/msgbuf.h>
+#include <sys/proc.h>
+#include <sys/ioctl.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/tty.h>
+#include <sys/tprintf.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+
+/*
+ * Note that stdarg.h and the ANSI style va_start macro is used for both
+ * ANSI and traditional C compilers.
+ */
+#include <machine/stdarg.h>
+
+#ifdef KADB
+#include <machine/kdbparam.h>
+#endif
+
+#define TOCONS	0x01
+#define TOTTY	0x02
+#define TOLOG	0x04
+
+struct	tty *constty;			/* pointer to console "window" tty */
+
+extern	cnputc();			/* standard console putc */
+int	(*v_putc)() = cnputc;		/* routine to putc on virtual console */
+
+void  logpri __P((int level));
+static void  putchar __P((int ch, int flags, struct tty *tp));
+static char *ksprintn __P((u_long num, int base, int *len));
+void kprintf __P((const char *fmt, int flags, struct tty *tp, va_list ap));
+
+int consintr = 1;			/* Ok to handle console interrupts? */
+
+/*
+ * Variable panicstr contains argument to first call to panic; used as flag
+ * to indicate that the kernel has already called panic.
+ */
+const char *panicstr;
+
+/*
+ * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
+ * and then reboots.  If we are called twice, then we avoid trying to sync
+ * the disks as this often leads to recursive panics.
+ */
+#ifdef __GNUC__
+volatile void boot(int flags);	/* boot() does not return */
+volatile			/* panic() does not return */
+#endif
+void
+#ifdef __STDC__
+panic(const char *fmt, ...)
+#else
+panic(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	int bootopt;
+	va_list ap;
+
+	bootopt = RB_AUTOBOOT | RB_DUMP;
+	if (panicstr)
+		bootopt |= RB_NOSYNC;
+	else
+		panicstr = fmt;
+
+	va_start(ap, fmt);
+	printf("panic: %r\n", fmt, ap);
+	va_end(ap);
+
+#ifdef KGDB
+	kgdb_panic();
+#endif
+#ifdef KADB
+	if (boothowto & RB_KDB)
+		kdbpanic();
+#endif
+	boot(bootopt);
+}
+
+/*
+ * Warn that a system table is full.
+ */
+void
+tablefull(tab)
+	const char *tab;
+{
+
+	log(LOG_ERR, "%s: table is full\n", tab);
+}
+
+/*
+ * Uprintf prints to the controlling terminal for the current process.
+ * It may block if the tty queue is overfull.  No message is printed if
+ * the queue does not clear in a reasonable time.
+ */
+void
+#ifdef __STDC__
+uprintf(const char *fmt, ...)
+#else
+uprintf(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	register struct proc *p = curproc;
+	va_list ap;
+
+	if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+		va_start(ap, fmt);
+		kprintf(fmt, TOTTY, p->p_session->s_ttyp, ap);
+		va_end(ap);
+	}
+}
+
+tpr_t
+tprintf_open(p)
+	register struct proc *p;
+{
+
+	if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+		SESSHOLD(p->p_session);
+		return ((tpr_t) p->p_session);
+	}
+	return ((tpr_t) NULL);
+}
+
+void
+tprintf_close(sess)
+	tpr_t sess;
+{
+
+	if (sess)
+		SESSRELE((struct session *) sess);
+}
+
+/*
+ * tprintf prints on the controlling terminal associated
+ * with the given session.
+ */
+void
+#ifdef __STDC__
+tprintf(tpr_t tpr, const char *fmt, ...)
+#else
+tprintf(tpr, fmt, va_alist)
+	tpr_t tpr;
+	char *fmt;
+#endif
+{
+	register struct session *sess = (struct session *)tpr;
+	struct tty *tp = NULL;
+	int flags = TOLOG;
+	va_list ap;
+
+	logpri(LOG_INFO);
+	if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) {
+		flags |= TOTTY;
+		tp = sess->s_ttyp;
+	}
+	va_start(ap, fmt);
+	kprintf(fmt, flags, tp, ap);
+	va_end(ap);
+	logwakeup();
+}
+
+/*
+ * Ttyprintf displays a message on a tty; it should be used only by
+ * the tty driver, or anything that knows the underlying tty will not
+ * be revoke(2)'d away.  Other callers should use tprintf.
+ */
+void
+#ifdef __STDC__
+ttyprintf(struct tty *tp, const char *fmt, ...)
+#else
+ttyprintf(tp, fmt, va_alist)
+	struct tty *tp;
+	char *fmt;
+#endif
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	kprintf(fmt, TOTTY, tp, ap);
+	va_end(ap);
+}
+
+extern	int log_open;
+
+/*
+ * Log writes to the log buffer, and guarantees not to sleep (so can be
+ * called by interrupt routines).  If there is no process reading the
+ * log yet, it writes to the console also.
+ */
+void
+#ifdef __STDC__
+log(int level, const char *fmt, ...)
+#else
+log(level, fmt, va_alist)
+	int level;
+	char *fmt;
+#endif
+{
+	register int s;
+	va_list ap;
+
+	s = splhigh();
+	logpri(level);
+	va_start(ap, fmt);
+	kprintf(fmt, TOLOG, NULL, ap);
+	splx(s);
+	va_end(ap);
+	if (!log_open) {
+		va_start(ap, fmt);
+		kprintf(fmt, TOCONS, NULL, ap);
+		va_end(ap);
+	}
+	logwakeup();
+}
+
+void
+logpri(level)
+	int level;
+{
+	register int ch;
+	register char *p;
+
+	putchar('<', TOLOG, NULL);
+	for (p = ksprintn((u_long)level, 10, NULL); ch = *p--;)
+		putchar(ch, TOLOG, NULL);
+	putchar('>', TOLOG, NULL);
+}
+
+void
+#ifdef __STDC__
+addlog(const char *fmt, ...)
+#else
+addlog(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	register int s;
+	va_list ap;
+
+	s = splhigh();
+	va_start(ap, fmt);
+	kprintf(fmt, TOLOG, NULL, ap);
+	splx(s);
+	va_end(ap);
+	if (!log_open) {
+		va_start(ap, fmt);
+		kprintf(fmt, TOCONS, NULL, ap);
+		va_end(ap);
+	}
+	logwakeup();
+}
+
+void
+#ifdef __STDC__
+printf(const char *fmt, ...)
+#else
+printf(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	va_list ap;
+	register int savintr;
+
+	savintr = consintr;		/* disable interrupts */
+	consintr = 0;
+	va_start(ap, fmt);
+	kprintf(fmt, TOCONS | TOLOG, NULL, ap);
+	va_end(ap);
+	if (!panicstr)
+		logwakeup();
+	consintr = savintr;		/* reenable interrupts */
+}
+
+/*
+ * Scaled down version of printf(3).
+ *
+ * Two additional formats:
+ *
+ * The format %b is supported to decode error registers.
+ * Its usage is:
+ *
+ *	printf("reg=%b\n", regval, "<base><arg>*");
+ *
+ * where <base> is the output base expressed as a control character, e.g.
+ * \10 gives octal; \20 gives hex.  Each arg is a sequence of characters,
+ * the first of which gives the bit number to be inspected (origin 1), and
+ * the next characters (up to a control character, i.e. a character <= 32),
+ * give the name of the register.  Thus:
+ *
+ *	kprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ *
+ * would produce output:
+ *
+ *	reg=3<BITTWO,BITONE>
+ *
+ * The format %r passes an additional format string and argument list
+ * recursively.  Its usage is:
+ *
+ * fn(char *fmt, ...)
+ * {
+ *	va_list ap;
+ *	va_start(ap, fmt);
+ *	printf("prefix: %r: suffix\n", fmt, ap);
+ *	va_end(ap);
+ * }
+ *
+ * Space or zero padding and a field width are supported for the numeric
+ * formats only.
+ */
+void
+kprintf(fmt, flags, tp, ap)
+	register const char *fmt;
+	int flags;
+	struct tty *tp;
+	va_list ap;
+{
+	register char *p, *q;
+	register int ch, n;
+	u_long ul;
+	int base, lflag, tmp, width;
+	char padc;
+
+	for (;;) {
+		padc = ' ';
+		width = 0;
+		while ((ch = *(u_char *)fmt++) != '%') {
+			if (ch == '\0')
+				return;
+			putchar(ch, flags, tp);
+		}
+		lflag = 0;
+reswitch:	switch (ch = *(u_char *)fmt++) {
+		case '0':
+			padc = '0';
+			goto reswitch;
+		case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			for (width = 0;; ++fmt) {
+				width = width * 10 + ch - '0';
+				ch = *fmt;
+				if (ch < '0' || ch > '9')
+					break;
+			}
+			goto reswitch;
+		case 'l':
+			lflag = 1;
+			goto reswitch;
+		case 'b':
+			ul = va_arg(ap, int);
+			p = va_arg(ap, char *);
+			for (q = ksprintn(ul, *p++, NULL); ch = *q--;)
+				putchar(ch, flags, tp);
+
+			if (!ul)
+				break;
+
+			for (tmp = 0; n = *p++;) {
+				if (ul & (1 << (n - 1))) {
+					putchar(tmp ? ',' : '<', flags, tp);
+					for (; (n = *p) > ' '; ++p)
+						putchar(n, flags, tp);
+					tmp = 1;
+				} else
+					for (; *p > ' '; ++p)
+						continue;
+			}
+			if (tmp)
+				putchar('>', flags, tp);
+			break;
+		case 'c':
+			putchar(va_arg(ap, int), flags, tp);
+			break;
+		case 'r':
+			p = va_arg(ap, char *);
+			kprintf(p, flags, tp, va_arg(ap, va_list));
+			break;
+		case 's':
+			p = va_arg(ap, char *);
+			while (ch = *p++)
+				putchar(ch, flags, tp);
+			break;
+		case 'd':
+			ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+			if ((long)ul < 0) {
+				putchar('-', flags, tp);
+				ul = -(long)ul;
+			}
+			base = 10;
+			goto number;
+		case 'o':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 8;
+			goto number;
+		case 'u':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 10;
+			goto number;
+		case 'x':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 16;
+number:			p = ksprintn(ul, base, &tmp);
+			if (width && (width -= tmp) > 0)
+				while (width--)
+					putchar(padc, flags, tp);
+			while (ch = *p--)
+				putchar(ch, flags, tp);
+			break;
+		default:
+			putchar('%', flags, tp);
+			if (lflag)
+				putchar('l', flags, tp);
+			/* FALLTHROUGH */
+		case '%':
+			putchar(ch, flags, tp);
+		}
+	}
+}
+
+/*
+ * Print a character on console or users terminal.  If destination is
+ * the console then the last MSGBUFS characters are saved in msgbuf for
+ * inspection later.
+ */
+static void
+putchar(c, flags, tp)
+	register int c;
+	int flags;
+	struct tty *tp;
+{
+	extern int msgbufmapped;
+	register struct msgbuf *mbp;
+
+	if (panicstr)
+		constty = NULL;
+	if ((flags & TOCONS) && tp == NULL && constty) {
+		tp = constty;
+		flags |= TOTTY;
+	}
+	if ((flags & TOTTY) && tp && tputchar(c, tp) < 0 &&
+	    (flags & TOCONS) && tp == constty)
+		constty = NULL;
+	if ((flags & TOLOG) &&
+	    c != '\0' && c != '\r' && c != 0177 && msgbufmapped) {
+		mbp = msgbufp;
+		if (mbp->msg_magic != MSG_MAGIC) {
+			bzero((caddr_t)mbp, sizeof(*mbp));
+			mbp->msg_magic = MSG_MAGIC;
+		}
+		mbp->msg_bufc[mbp->msg_bufx++] = c;
+		if (mbp->msg_bufx < 0 || mbp->msg_bufx >= MSG_BSIZE)
+			mbp->msg_bufx = 0;
+		/* If the buffer is full, keep the most recent data. */
+		if (mbp->msg_bufr == mbp->msg_bufx) {
+			if (++mbp->msg_bufr >= MSG_BSIZE)
+				mbp->msg_bufr = 0;
+		}
+	}
+	if ((flags & TOCONS) && constty == NULL && c != '\0')
+		(*v_putc)(c);
+}
+
+/*
+ * Scaled down version of sprintf(3).
+ */
+#ifdef __STDC__
+sprintf(char *buf, const char *cfmt, ...)
+#else
+sprintf(buf, cfmt, va_alist)
+	char *buf, *cfmt;
+#endif
+{
+	register const char *fmt = cfmt;
+	register char *p, *bp;
+	register int ch, base;
+	u_long ul;
+	int lflag;
+	va_list ap;
+
+	va_start(ap, cfmt);
+	for (bp = buf; ; ) {
+		while ((ch = *(u_char *)fmt++) != '%')
+			if ((*bp++ = ch) == '\0')
+				return ((bp - buf) - 1);
+
+		lflag = 0;
+reswitch:	switch (ch = *(u_char *)fmt++) {
+		case 'l':
+			lflag = 1;
+			goto reswitch;
+		case 'c':
+			*bp++ = va_arg(ap, int);
+			break;
+		case 's':
+			p = va_arg(ap, char *);
+			while (*bp++ = *p++)
+				continue;
+			--bp;
+			break;
+		case 'd':
+			ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+			if ((long)ul < 0) {
+				*bp++ = '-';
+				ul = -(long)ul;
+			}
+			base = 10;
+			goto number;
+			break;
+		case 'o':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 8;
+			goto number;
+			break;
+		case 'u':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 10;
+			goto number;
+			break;
+		case 'x':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 16;
+number:			for (p = ksprintn(ul, base, NULL); ch = *p--;)
+				*bp++ = ch;
+			break;
+		default:
+			*bp++ = '%';
+			if (lflag)
+				*bp++ = 'l';
+			/* FALLTHROUGH */
+		case '%':
+			*bp++ = ch;
+		}
+	}
+	va_end(ap);
+}
+
+/*
+ * Put a number (base <= 16) in a buffer in reverse order; return an
+ * optional length and a pointer to the NULL terminated (preceded?)
+ * buffer.
+ */
+static char *
+ksprintn(ul, base, lenp)
+	register u_long ul;
+	register int base, *lenp;
+{					/* A long in base 8, plus NULL. */
+	static char buf[sizeof(long) * NBBY / 3 + 2];
+	register char *p;
+
+	p = buf;
+	do {
+		*++p = "0123456789abcdef"[ul % base];
+	} while (ul /= base);
+	if (lenp)
+		*lenp = p - buf;
+	return (p);
+}
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
new file mode 100644
index 000000000000..237553d7c8cb
--- /dev/null
+++ b/sys/kern/subr_prof.c
@@ -0,0 +1,262 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_prof.c	8.4 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/malloc.h>
+#include <sys/gmon.h>
+
+/*
+ * Froms is actually a bunch of unsigned shorts indexing tos
+ */
+struct gmonparam _gmonparam = { GMON_PROF_OFF };
+
+extern char etext[];
+
+void
+kmstartup()
+{
+	char *cp;
+	struct gmonparam *p = &_gmonparam;
+	/*
+	 * Round lowpc and highpc to multiples of the density we're using
+	 * so the rest of the scaling (here and in gprof) stays in ints.
+	 */
+	p->lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
+	p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
+	p->textsize = p->highpc - p->lowpc;
+	printf("Profiling kernel, textsize=%d [%x..%x]\n",
+	       p->textsize, p->lowpc, p->highpc);
+	p->kcountsize = p->textsize / HISTFRACTION;
+	p->hashfraction = HASHFRACTION;
+	p->fromssize = p->textsize / HASHFRACTION;
+	p->tolimit = p->textsize * ARCDENSITY / 100;
+	if (p->tolimit < MINARCS)
+		p->tolimit = MINARCS;
+	else if (p->tolimit > MAXARCS)
+		p->tolimit = MAXARCS;
+	p->tossize = p->tolimit * sizeof(struct tostruct);
+	cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize,
+	    M_GPROF, M_NOWAIT);
+	if (cp == 0) {
+		printf("No memory for profiling.\n");
+		return;
+	}
+	bzero(cp, p->kcountsize + p->tossize + p->fromssize);
+	p->tos = (struct tostruct *)cp;
+	cp += p->tossize;
+	p->kcount = (u_short *)cp;
+	cp += p->kcountsize;
+	p->froms = (u_short *)cp;
+}
+
+/*
+ * Return kernel profiling information.
+ */
+int
+sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+{
+	struct gmonparam *gp = &_gmonparam;
+	int error;
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case GPROF_STATE:
+		error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
+		if (error)
+			return (error);
+		if (gp->state == GMON_PROF_OFF)
+			stopprofclock(&proc0);
+		else
+			startprofclock(&proc0);
+		return (0);
+	case GPROF_COUNT:
+		return (sysctl_struct(oldp, oldlenp, newp, newlen,
+		    gp->kcount, gp->kcountsize));
+	case GPROF_FROMS:
+		return (sysctl_struct(oldp, oldlenp, newp, newlen,
+		    gp->froms, gp->fromssize));
+	case GPROF_TOS:
+		return (sysctl_struct(oldp, oldlenp, newp, newlen,
+		    gp->tos, gp->tossize));
+	case GPROF_GMONPARAM:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+#endif /* GPROF */
+
+/*
+ * Profiling system call.
+ *
+ * The scale factor is a fixed point number with 16 bits of fraction, so that
+ * 1.0 is represented as 0x10000.  A scale factor of 0 turns off profiling.
+ */
+/* ARGSUSED */
+int
+profil(p, uap, retval)
+	struct proc *p;
+	register struct profil_args /* {
+		syscallarg(caddr_t) samples;
+		syscallarg(u_int) size;
+		syscallarg(u_int) offset;
+		syscallarg(u_int) scale;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct uprof *upp;
+	int s;
+
+	if (SCARG(uap, scale) > (1 << 16))
+		return (EINVAL);
+	if (SCARG(uap, scale) == 0) {
+		stopprofclock(p);
+		return (0);
+	}
+	upp = &p->p_stats->p_prof;
+
+	/* Block profile interrupts while changing state. */
+	s = splstatclock();
+	upp->pr_off = SCARG(uap, offset);
+	upp->pr_scale = SCARG(uap, scale);
+	upp->pr_base = SCARG(uap, samples);
+	upp->pr_size = SCARG(uap, size);
+	startprofclock(p);
+	splx(s);
+
+	return (0);
+}
+
+/*
+ * Scale is a fixed-point number with the binary point 16 bits
+ * into the value, and is <= 1.0.  pc is at most 32 bits, so the
+ * intermediate result is at most 48 bits.
+ */
+#define	PC_TO_INDEX(pc, prof) \
+	((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
+	    (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
+
+/*
+ * Collect user-level profiling statistics; called on a profiling tick,
+ * when a process is running in user-mode.  This routine may be called
+ * from an interrupt context.  We try to update the user profiling buffers
+ * cheaply with fuswintr() and suswintr().  If that fails, we revert to
+ * an AST that will vector us to trap() with a context in which copyin
+ * and copyout will work.  Trap will then call addupc_task().
+ *
+ * Note that we may (rarely) not get around to the AST soon enough, and
+ * lose profile ticks when the next tick overwrites this one, but in this
+ * case the system is overloaded and the profile is probably already
+ * inaccurate.
+ */
+void
+addupc_intr(p, pc, ticks)
+	register struct proc *p;
+	register u_long pc;
+	u_int ticks;
+{
+	register struct uprof *prof;
+	register caddr_t addr;
+	register u_int i;
+	register int v;
+
+	if (ticks == 0)
+		return;
+	prof = &p->p_stats->p_prof;
+	if (pc < prof->pr_off ||
+	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+		return;			/* out of range; ignore */
+
+	addr = prof->pr_base + i;
+	if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) {
+		prof->pr_addr = pc;
+		prof->pr_ticks = ticks;
+		need_proftick(p);
+	}
+}
+
+/*
+ * Much like before, but we can afford to take faults here.  If the
+ * update fails, we simply turn off profiling.
+ */
+void
+addupc_task(p, pc, ticks)
+	register struct proc *p;
+	register u_long pc;
+	u_int ticks;
+{
+	register struct uprof *prof;
+	register caddr_t addr;
+	register u_int i;
+	u_short v;
+
+	/* Testing P_PROFIL may be unnecessary, but is certainly safe. */
+	if ((p->p_flag & P_PROFIL) == 0 || ticks == 0)
+		return;
+
+	prof = &p->p_stats->p_prof;
+	if (pc < prof->pr_off ||
+	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+		return;
+
+	addr = prof->pr_base + i;
+	if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
+		v += ticks;
+		if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
+			return;
+	}
+	stopprofclock(p);
+}
diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c
new file mode 100644
index 000000000000..45b2d64619f7
--- /dev/null
+++ b/sys/kern/subr_xxx.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_xxx.c	8.3 (Berkeley) 3/29/95
+ */
+
+/*
+ * Miscellaneous trivial functions, including many
+ * that are often inline-expanded or done in assembler.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Unsupported device function (e.g. writing to read-only device).
+ */
+int
+enodev()
+{
+
+	return (ENODEV);
+}
+
+/*
+ * Unconfigured device function; driver not configured.
+ */
+int
+enxio()
+{
+
+	return (ENXIO);
+}
+
+/*
+ * Unsupported ioctl function.
+ */
+int
+enoioctl()
+{
+
+	return (ENOTTY);
+}
+
+/*
+ * Unsupported system function.
+ * This is used for an otherwise-reasonable operation
+ * that is not supported by the current system binary.
+ */
+int
+enosys()
+{
+
+	return (ENOSYS);
+}
+
+/*
+ * Return error for operation not supported
+ * on a specific object or file type.
+ */
+int
+eopnotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Return error for an inval operation
+ * on a specific object or file type.
+ */
+int
+einval()
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Generic null operation, always returns success.
+ */
+int
+nullop()
+{
+
+	return (0);
+}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
new file mode 100644
index 000000000000..08385b3276e0
--- /dev/null
+++ b/sys/kern/sys_generic.c
@@ -0,0 +1,690 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sys_generic.c	8.9 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+/*
+ * Read system call.
+ */
+/* ARGSUSED */
+int
+read(p, uap, retval)
+	struct proc *p;
+	register struct read_args /* {
+		syscallarg(int) fd;
+		syscallarg(char *) buf;
+		syscallarg(u_int) nbyte;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	struct iovec aiov;
+	long cnt, error = 0;
+#ifdef KTRACE
+	struct iovec ktriov;
+#endif
+
+	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
+	    (fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	aiov.iov_base = (caddr_t)SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, nbyte);
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = SCARG(uap, nbyte);
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))
+		ktriov = aiov;
+#endif
+	cnt = SCARG(uap, nbyte);
+	if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO) && error == 0)
+		ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, &ktriov,
+		    cnt, error);
+#endif
+	*retval = cnt;
+	return (error);
+}
+
+/*
+ * Scatter read system call.
+ */
+int
+readv(p, uap, retval)
+	struct proc *p;
+	register struct readv_args /* {
+		syscallarg(int) fd;
+		syscallarg(struct iovec *) iovp;
+		syscallarg(u_int) iovcnt;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	register struct iovec *iov;
+	struct iovec *needfree;
+	struct iovec aiov[UIO_SMALLIOV];
+	long i, cnt, error = 0;
+	u_int iovlen;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+
+	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
+	    (fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	/* note: can't use iovlen until iovcnt is validated */
+	iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
+	if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
+		if (SCARG(uap, iovcnt) > UIO_MAXIOV)
+			return (EINVAL);
+		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+		needfree = iov;
+	} else {
+		iov = aiov;
+		needfree = NULL;
+	}
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = SCARG(uap, iovcnt);
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	if (error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen))
+		goto done;
+	auio.uio_resid = 0;
+	for (i = 0; i < SCARG(uap, iovcnt); i++) {
+		if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
+			error = EINVAL;
+			goto done;
+		}
+		auio.uio_resid += iov->iov_len;
+		iov++;
+	}
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))  {
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	cnt = auio.uio_resid;
+	if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, ktriov,
+			    cnt, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+	*retval = cnt;
+done:
+	if (needfree)
+		FREE(needfree, M_IOV);
+	return (error);
+}
+
+/*
+ * Write system call
+ */
+int
+write(p, uap, retval)
+	struct proc *p;
+	register struct write_args /* {
+		syscallarg(int) fd;
+		syscallarg(char *) buf;
+		syscallarg(u_int) nbyte;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	struct iovec aiov;
+	long cnt, error = 0;
+#ifdef KTRACE
+	struct iovec ktriov;
+#endif
+
+	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
+	    (fp->f_flag & FWRITE) == 0)
+		return (EBADF);
+	aiov.iov_base = (caddr_t)SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, nbyte);
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = SCARG(uap, nbyte);
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))
+		ktriov = aiov;
+#endif
+	cnt = SCARG(uap, nbyte);
+	if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		if (error == EPIPE)
+			psignal(p, SIGPIPE);
+	}
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO) && error == 0)
+		ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
+		    &ktriov, cnt, error);
+#endif
+	*retval = cnt;
+	return (error);
+}
+
+/*
+ * Gather write system call
+ */
+int
+writev(p, uap, retval)
+	struct proc *p;
+	register struct writev_args /* {
+		syscallarg(int) fd;
+		syscallarg(struct iovec *) iovp;
+		syscallarg(u_int) iovcnt;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	register struct iovec *iov;
+	struct iovec *needfree;
+	struct iovec aiov[UIO_SMALLIOV];
+	long i, cnt, error = 0;
+	u_int iovlen;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+
+	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
+	    (fp->f_flag & FWRITE) == 0)
+		return (EBADF);
+	/* note: can't use iovlen until iovcnt is validated */
+	iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
+	if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
+		if (SCARG(uap, iovcnt) > UIO_MAXIOV)
+			return (EINVAL);
+		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+		needfree = iov;
+	} else {
+		iov = aiov;
+		needfree = NULL;
+	}
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = SCARG(uap, iovcnt);
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	if (error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen))
+		goto done;
+	auio.uio_resid = 0;
+	for (i = 0; i < SCARG(uap, iovcnt); i++) {
+		if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
+			error = EINVAL;
+			goto done;
+		}
+		auio.uio_resid += iov->iov_len;
+		iov++;
+	}
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))  {
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	cnt = auio.uio_resid;
+	if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		if (error == EPIPE)
+			psignal(p, SIGPIPE);
+	}
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
+				ktriov, cnt, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+	*retval = cnt;
+done:
+	if (needfree)
+		FREE(needfree, M_IOV);
+	return (error);
+}
+
+/*
+ * Ioctl system call
+ */
+/* ARGSUSED */
+int
+ioctl(p, uap, retval)
+	struct proc *p;
+	register struct ioctl_args /* {
+		syscallarg(int) fd;
+		syscallarg(u_long) com;
+		syscallarg(caddr_t) data;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp;
+	register u_long com;
+	register int error;
+	register u_int size;
+	caddr_t data, memp;
+	int tmp;
+#define STK_PARAMS	128
+	char stkbuf[STK_PARAMS];
+
+	fdp = p->p_fd;
+	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
+		return (EBADF);
+
+	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
+		return (EBADF);
+
+	switch (com = SCARG(uap, com)) {
+	case FIONCLEX:
+		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
+		return (0);
+	case FIOCLEX:
+		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
+		return (0);
+	}
+
+	/*
+	 * Interpret high order word to find amount of data to be
+	 * copied to/from the user's address space.
+	 */
+	size = IOCPARM_LEN(com);
+	if (size > IOCPARM_MAX)
+		return (ENOTTY);
+	memp = NULL;
+	if (size > sizeof (stkbuf)) {
+		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+		data = memp;
+	} else
+		data = stkbuf;
+	if (com&IOC_IN) {
+		if (size) {
+			error = copyin(SCARG(uap, data), data, (u_int)size);
+			if (error) {
+				if (memp)
+					free(memp, M_IOCTLOPS);
+				return (error);
+			}
+		} else
+			*(caddr_t *)data = SCARG(uap, data);
+	} else if ((com&IOC_OUT) && size)
+		/*
+		 * Zero the buffer so the user always
+		 * gets back something deterministic.
+		 */
+		bzero(data, size);
+	else if (com&IOC_VOID)
+		*(caddr_t *)data = SCARG(uap, data);
+
+	switch (com) {
+
+	case FIONBIO:
+		if (tmp = *(int *)data)
+			fp->f_flag |= FNONBLOCK;
+		else
+			fp->f_flag &= ~FNONBLOCK;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		break;
+
+	case FIOASYNC:
+		if (tmp = *(int *)data)
+			fp->f_flag |= FASYNC;
+		else
+			fp->f_flag &= ~FASYNC;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+		break;
+
+	case FIOSETOWN:
+		tmp = *(int *)data;
+		if (fp->f_type == DTYPE_SOCKET) {
+			((struct socket *)fp->f_data)->so_pgid = tmp;
+			error = 0;
+			break;
+		}
+		if (tmp <= 0) {
+			tmp = -tmp;
+		} else {
+			struct proc *p1 = pfind(tmp);
+			if (p1 == 0) {
+				error = ESRCH;
+				break;
+			}
+			tmp = p1->p_pgrp->pg_id;
+		}
+		error = (*fp->f_ops->fo_ioctl)
+			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
+		break;
+
+	case FIOGETOWN:
+		if (fp->f_type == DTYPE_SOCKET) {
+			error = 0;
+			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
+			break;
+		}
+		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
+		*(int *)data = -*(int *)data;
+		break;
+
+	default:
+		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
+		/*
+		 * Copy any data to user, size was
+		 * already set and checked above.
+		 */
+		if (error == 0 && (com&IOC_OUT) && size)
+			error = copyout(data, SCARG(uap, data), (u_int)size);
+		break;
+	}
+	if (memp)
+		free(memp, M_IOCTLOPS);
+	return (error);
+}
+
+int	selwait, nselcoll;
+
+/*
+ * Select system call.
+ */
+int
+select(p, uap, retval)
+	register struct proc *p;
+	register struct select_args /* {
+		syscallarg(u_int) nd;
+		syscallarg(fd_set *) in;
+		syscallarg(fd_set *) ou;
+		syscallarg(fd_set *) ex;
+		syscallarg(struct timeval *) tv;
+	} */ *uap;
+	register_t *retval;
+{
+	fd_set ibits[3], obits[3];
+	struct timeval atv;
+	int s, ncoll, error, timo = 0;
+	u_int ni;
+
+	bzero((caddr_t)ibits, sizeof(ibits));
+	bzero((caddr_t)obits, sizeof(obits));
+	if (SCARG(uap, nd) > FD_SETSIZE)
+		return (EINVAL);
+	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
+		/* forgiving; slightly wrong */
+		SCARG(uap, nd) = p->p_fd->fd_nfiles;
+	}
+	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
+
+#define	getbits(name, x) \
+	if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \
+	    (caddr_t)&ibits[x], ni))) \
+		goto done;
+	getbits(in, 0);
+	getbits(ou, 1);
+	getbits(ex, 2);
+#undef	getbits
+
+	if (SCARG(uap, tv)) {
+		error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv,
+			sizeof (atv));
+		if (error)
+			goto done;
+		if (itimerfix(&atv)) {
+			error = EINVAL;
+			goto done;
+		}
+		s = splclock();
+		timevaladd(&atv, (struct timeval *)&time);
+		splx(s);
+	}
+retry:
+	ncoll = nselcoll;
+	p->p_flag |= P_SELECT;
+	error = selscan(p, ibits, obits, SCARG(uap, nd), retval);
+	if (error || *retval)
+		goto done;
+	s = splhigh();
+	if (SCARG(uap, tv)) {
+		if (timercmp(&time, &atv, >=)) {
+			splx(s);
+			goto done;
+		}
+		/*
+		 * If poll wait was tiny, this could be zero; we will
+		 * have to round it up to avoid sleeping forever.  If
+		 * we retry below, the timercmp above will get us out.
+		 * Note that if wait was 0, the timercmp will prevent
+		 * us from getting here the first time.
+		 */
+		timo = hzto(&atv);
+		if (timo == 0)
+			timo = 1;
+	}
+	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
+		splx(s);
+		goto retry;
+	}
+	p->p_flag &= ~P_SELECT;
+	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
+	splx(s);
+	if (error == 0)
+		goto retry;
+done:
+	p->p_flag &= ~P_SELECT;
+	/* select is not restarted after signals... */
+	if (error == ERESTART)
+		error = EINTR;
+	if (error == EWOULDBLOCK)
+		error = 0;
+#define	putbits(name, x) \
+	if (SCARG(uap, name) && (error2 = copyout((caddr_t)&obits[x], \
+	    (caddr_t)SCARG(uap, name), ni))) \
+		error = error2;
+	if (error == 0) {
+		int error2;
+
+		putbits(in, 0);
+		putbits(ou, 1);
+		putbits(ex, 2);
+#undef putbits
+	}
+	return (error);
+}
+
+int
+selscan(p, ibits, obits, nfd, retval)
+	struct proc *p;
+	fd_set *ibits, *obits;
+	int nfd;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register int msk, i, j, fd;
+	register fd_mask bits;
+	struct file *fp;
+	int n = 0;
+	static int flag[3] = { FREAD, FWRITE, 0 };
+
+	for (msk = 0; msk < 3; msk++) {
+		for (i = 0; i < nfd; i += NFDBITS) {
+			bits = ibits[msk].fds_bits[i/NFDBITS];
+			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
+				bits &= ~(1 << j);
+				fp = fdp->fd_ofiles[fd];
+				if (fp == NULL)
+					return (EBADF);
+				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
+					FD_SET(fd, &obits[msk]);
+					n++;
+				}
+			}
+		}
+	}
+	*retval = n;
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+seltrue(dev, flag, p)
+	dev_t dev;
+	int flag;
+	struct proc *p;
+{
+
+	return (1);
+}
+
+/*
+ * Record a select request.
+ */
+void
+selrecord(selector, sip)
+	struct proc *selector;
+	struct selinfo *sip;
+{
+	struct proc *p;
+	pid_t mypid;
+
+	mypid = selector->p_pid;
+	if (sip->si_pid == mypid)
+		return;
+	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
+	    p->p_wchan == (caddr_t)&selwait)
+		sip->si_flags |= SI_COLL;
+	else
+		sip->si_pid = mypid;
+}
+
+/*
+ * Do a wakeup when a selectable event occurs.
+ */
+void
+selwakeup(sip)
+	register struct selinfo *sip;
+{
+	register struct proc *p;
+	int s;
+
+	if (sip->si_pid == 0)
+		return;
+	if (sip->si_flags & SI_COLL) {
+		nselcoll++;
+		sip->si_flags &= ~SI_COLL;
+		wakeup((caddr_t)&selwait);
+	}
+	p = pfind(sip->si_pid);
+	sip->si_pid = 0;
+	if (p != NULL) {
+		s = splhigh();
+		if (p->p_wchan == (caddr_t)&selwait) {
+			if (p->p_stat == SSLEEP)
+				setrunnable(p);
+			else
+				unsleep(p);
+		} else if (p->p_flag & P_SELECT)
+			p->p_flag &= ~P_SELECT;
+		splx(s);
+	}
+}
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
new file mode 100644
index 000000000000..abc2dc75ec8f
--- /dev/null
+++ b/sys/kern/sys_socket.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sys_socket.c	8.3 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+struct	fileops socketops =
+    { soo_read, soo_write, soo_ioctl, soo_select, soo_close };
+
+/* ARGSUSED */
+int
+soo_read(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+
+	return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0,
+		uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0));
+}
+
+/* ARGSUSED */
+int
+soo_write(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+
+	return (sosend((struct socket *)fp->f_data, (struct mbuf *)0,
+		uio, (struct mbuf *)0, (struct mbuf *)0, 0));
+}
+
+int
+soo_ioctl(fp, cmd, data, p)
+	struct file *fp;
+	u_long cmd;
+	register caddr_t data;
+	struct proc *p;
+{
+	register struct socket *so = (struct socket *)fp->f_data;
+
+	switch (cmd) {
+
+	case FIONBIO:
+		if (*(int *)data)
+			so->so_state |= SS_NBIO;
+		else
+			so->so_state &= ~SS_NBIO;
+		return (0);
+
+	case FIOASYNC:
+		if (*(int *)data) {
+			so->so_state |= SS_ASYNC;
+			so->so_rcv.sb_flags |= SB_ASYNC;
+			so->so_snd.sb_flags |= SB_ASYNC;
+		} else {
+			so->so_state &= ~SS_ASYNC;
+			so->so_rcv.sb_flags &= ~SB_ASYNC;
+			so->so_snd.sb_flags &= ~SB_ASYNC;
+		}
+		return (0);
+
+	case FIONREAD:
+		*(int *)data = so->so_rcv.sb_cc;
+		return (0);
+
+	case SIOCSPGRP:
+		so->so_pgid = *(int *)data;
+		return (0);
+
+	case SIOCGPGRP:
+		*(int *)data = so->so_pgid;
+		return (0);
+
+	case SIOCATMARK:
+		*(int *)data = (so->so_state&SS_RCVATMARK) != 0;
+		return (0);
+	}
+	/*
+	 * Interface/routing/protocol specific ioctls:
+	 * interface and routing ioctls should have a
+	 * different entry since a socket's unnecessary
+	 */
+	if (IOCGROUP(cmd) == 'i')
+		return (ifioctl(so, cmd, data, p));
+	if (IOCGROUP(cmd) == 'r')
+		return (rtioctl(cmd, data, p));
+	return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, 
+	    (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0));
+}
+
+int
+soo_select(fp, which, p)
+	struct file *fp;
+	int which;
+	struct proc *p;
+{
+	register struct socket *so = (struct socket *)fp->f_data;
+	register int s = splnet();
+
+	switch (which) {
+
+	case FREAD:
+		if (soreadable(so)) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &so->so_rcv.sb_sel);
+		so->so_rcv.sb_flags |= SB_SEL;
+		break;
+
+	case FWRITE:
+		if (sowriteable(so)) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &so->so_snd.sb_sel);
+		so->so_snd.sb_flags |= SB_SEL;
+		break;
+
+	case 0:
+		if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &so->so_rcv.sb_sel);
+		so->so_rcv.sb_flags |= SB_SEL;
+		break;
+	}
+	splx(s);
+	return (0);
+}
+
+int
+soo_stat(so, ub)
+	register struct socket *so;
+	register struct stat *ub;
+{
+
+	bzero((caddr_t)ub, sizeof (*ub));
+	ub->st_mode = S_IFSOCK;
+	return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
+	    (struct mbuf *)ub, (struct mbuf *)0, 
+	    (struct mbuf *)0));
+}
+
+/* ARGSUSED */
+int
+soo_close(fp, p)
+	struct file *fp;
+	struct proc *p;
+{
+	int error = 0;
+
+	if (fp->f_data)
+		error = soclose((struct socket *)fp->f_data);
+	fp->f_data = 0;
+	return (error);
+}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
new file mode 100644
index 000000000000..91cbdc937f8d
--- /dev/null
+++ b/sys/kern/syscalls.c
@@ -0,0 +1,279 @@
+/*
+ * System call names.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from	@(#)syscalls.master	8.6 (Berkeley) 3/30/95
+ */
+
+char *syscallnames[] = {
+	"syscall",			/* 0 = syscall */
+	"exit",			/* 1 = exit */
+	"fork",			/* 2 = fork */
+	"read",			/* 3 = read */
+	"write",			/* 4 = write */
+	"open",			/* 5 = open */
+	"close",			/* 6 = close */
+	"wait4",			/* 7 = wait4 */
+	"compat_43_creat",	/* 8 = compat_43 creat */
+	"link",			/* 9 = link */
+	"unlink",			/* 10 = unlink */
+	"#11 (obsolete execv)",		/* 11 = obsolete execv */
+	"chdir",			/* 12 = chdir */
+	"fchdir",			/* 13 = fchdir */
+	"mknod",			/* 14 = mknod */
+	"chmod",			/* 15 = chmod */
+	"chown",			/* 16 = chown */
+	"break",			/* 17 = break */
+	"getfsstat",			/* 18 = getfsstat */
+	"compat_43_lseek",	/* 19 = compat_43 lseek */
+	"getpid",			/* 20 = getpid */
+	"mount",			/* 21 = mount */
+	"unmount",			/* 22 = unmount */
+	"setuid",			/* 23 = setuid */
+	"getuid",			/* 24 = getuid */
+	"geteuid",			/* 25 = geteuid */
+	"ptrace",			/* 26 = ptrace */
+	"recvmsg",			/* 27 = recvmsg */
+	"sendmsg",			/* 28 = sendmsg */
+	"recvfrom",			/* 29 = recvfrom */
+	"accept",			/* 30 = accept */
+	"getpeername",			/* 31 = getpeername */
+	"getsockname",			/* 32 = getsockname */
+	"access",			/* 33 = access */
+	"chflags",			/* 34 = chflags */
+	"fchflags",			/* 35 = fchflags */
+	"sync",			/* 36 = sync */
+	"kill",			/* 37 = kill */
+	"compat_43_stat",	/* 38 = compat_43 stat */
+	"getppid",			/* 39 = getppid */
+	"compat_43_lstat",	/* 40 = compat_43 lstat */
+	"dup",			/* 41 = dup */
+	"pipe",			/* 42 = pipe */
+	"getegid",			/* 43 = getegid */
+	"profil",			/* 44 = profil */
+#ifdef KTRACE
+	"ktrace",			/* 45 = ktrace */
+#else
+	"#45 (unimplemented ktrace)",		/* 45 = unimplemented ktrace */
+#endif
+	"sigaction",			/* 46 = sigaction */
+	"getgid",			/* 47 = getgid */
+	"sigprocmask",			/* 48 = sigprocmask */
+	"getlogin",			/* 49 = getlogin */
+	"setlogin",			/* 50 = setlogin */
+	"acct",			/* 51 = acct */
+	"sigpending",			/* 52 = sigpending */
+	"sigaltstack",			/* 53 = sigaltstack */
+	"ioctl",			/* 54 = ioctl */
+	"reboot",			/* 55 = reboot */
+	"revoke",			/* 56 = revoke */
+	"symlink",			/* 57 = symlink */
+	"readlink",			/* 58 = readlink */
+	"execve",			/* 59 = execve */
+	"umask",			/* 60 = umask */
+	"chroot",			/* 61 = chroot */
+	"compat_43_fstat",	/* 62 = compat_43 fstat */
+	"compat_43_getkerninfo",	/* 63 = compat_43 getkerninfo */
+	"compat_43_getpagesize",	/* 64 = compat_43 getpagesize */
+	"msync",			/* 65 = msync */
+	"vfork",			/* 66 = vfork */
+	"#67 (obsolete vread)",		/* 67 = obsolete vread */
+	"#68 (obsolete vwrite)",		/* 68 = obsolete vwrite */
+	"sbrk",			/* 69 = sbrk */
+	"sstk",			/* 70 = sstk */
+	"compat_43_mmap",	/* 71 = compat_43 mmap */
+	"vadvise",			/* 72 = vadvise */
+	"munmap",			/* 73 = munmap */
+	"mprotect",			/* 74 = mprotect */
+	"madvise",			/* 75 = madvise */
+	"#76 (obsolete vhangup)",		/* 76 = obsolete vhangup */
+	"#77 (obsolete vlimit)",		/* 77 = obsolete vlimit */
+	"mincore",			/* 78 = mincore */
+	"getgroups",			/* 79 = getgroups */
+	"setgroups",			/* 80 = setgroups */
+	"getpgrp",			/* 81 = getpgrp */
+	"setpgid",			/* 82 = setpgid */
+	"setitimer",			/* 83 = setitimer */
+	"compat_43_wait",	/* 84 = compat_43 wait */
+	"swapon",			/* 85 = swapon */
+	"getitimer",			/* 86 = getitimer */
+	"compat_43_gethostname",	/* 87 = compat_43 gethostname */
+	"compat_43_sethostname",	/* 88 = compat_43 sethostname */
+	"getdtablesize",			/* 89 = getdtablesize */
+	"dup2",			/* 90 = dup2 */
+	"#91 (unimplemented getdopt)",		/* 91 = unimplemented getdopt */
+	"fcntl",			/* 92 = fcntl */
+	"select",			/* 93 = select */
+	"#94 (unimplemented setdopt)",		/* 94 = unimplemented setdopt */
+	"fsync",			/* 95 = fsync */
+	"setpriority",			/* 96 = setpriority */
+	"socket",			/* 97 = socket */
+	"connect",			/* 98 = connect */
+	"compat_43_accept",	/* 99 = compat_43 accept */
+	"getpriority",			/* 100 = getpriority */
+	"compat_43_send",	/* 101 = compat_43 send */
+	"compat_43_recv",	/* 102 = compat_43 recv */
+	"sigreturn",			/* 103 = sigreturn */
+	"bind",			/* 104 = bind */
+	"setsockopt",			/* 105 = setsockopt */
+	"listen",			/* 106 = listen */
+	"#107 (obsolete vtimes)",		/* 107 = obsolete vtimes */
+	"compat_43_sigvec",	/* 108 = compat_43 sigvec */
+	"compat_43_sigblock",	/* 109 = compat_43 sigblock */
+	"compat_43_sigsetmask",	/* 110 = compat_43 sigsetmask */
+	"sigsuspend",			/* 111 = sigsuspend */
+	"compat_43_sigstack",	/* 112 = compat_43 sigstack */
+	"compat_43_recvmsg",	/* 113 = compat_43 recvmsg */
+	"compat_43_sendmsg",	/* 114 = compat_43 sendmsg */
+#ifdef TRACE
+	"vtrace",			/* 115 = vtrace */
+#else
+	"#115 (obsolete vtrace)",		/* 115 = obsolete vtrace */
+#endif
+	"gettimeofday",			/* 116 = gettimeofday */
+	"getrusage",			/* 117 = getrusage */
+	"getsockopt",			/* 118 = getsockopt */
+#ifdef vax
+	"resuba",			/* 119 = resuba */
+#else
+	"#119 (unimplemented resuba)",		/* 119 = unimplemented resuba */
+#endif
+	"readv",			/* 120 = readv */
+	"writev",			/* 121 = writev */
+	"settimeofday",			/* 122 = settimeofday */
+	"fchown",			/* 123 = fchown */
+	"fchmod",			/* 124 = fchmod */
+	"compat_43_recvfrom",	/* 125 = compat_43 recvfrom */
+	"compat_43_setreuid",	/* 126 = compat_43 setreuid */
+	"compat_43_setregid",	/* 127 = compat_43 setregid */
+	"rename",			/* 128 = rename */
+	"compat_43_truncate",	/* 129 = compat_43 truncate */
+	"compat_43_ftruncate",	/* 130 = compat_43 ftruncate */
+	"flock",			/* 131 = flock */
+	"mkfifo",			/* 132 = mkfifo */
+	"sendto",			/* 133 = sendto */
+	"shutdown",			/* 134 = shutdown */
+	"socketpair",			/* 135 = socketpair */
+	"mkdir",			/* 136 = mkdir */
+	"rmdir",			/* 137 = rmdir */
+	"utimes",			/* 138 = utimes */
+	"#139 (obsolete 4.2 sigreturn)",		/* 139 = obsolete 4.2 sigreturn */
+	"adjtime",			/* 140 = adjtime */
+	"compat_43_getpeername",	/* 141 = compat_43 getpeername */
+	"compat_43_gethostid",	/* 142 = compat_43 gethostid */
+	"compat_43_sethostid",	/* 143 = compat_43 sethostid */
+	"compat_43_getrlimit",	/* 144 = compat_43 getrlimit */
+	"compat_43_setrlimit",	/* 145 = compat_43 setrlimit */
+	"compat_43_killpg",	/* 146 = compat_43 killpg */
+	"setsid",			/* 147 = setsid */
+	"quotactl",			/* 148 = quotactl */
+	"compat_43_quota",	/* 149 = compat_43 quota */
+	"compat_43_getsockname",	/* 150 = compat_43 getsockname */
+	"#151 (unimplemented)",		/* 151 = unimplemented */
+	"#152 (unimplemented)",		/* 152 = unimplemented */
+	"#153 (unimplemented)",		/* 153 = unimplemented */
+	"#154 (unimplemented)",		/* 154 = unimplemented */
+#ifdef NFS
+	"nfssvc",			/* 155 = nfssvc */
+#else
+	"#155 (unimplemented nfssvc)",		/* 155 = unimplemented nfssvc */
+#endif
+	"compat_43_getdirentries",	/* 156 = compat_43 getdirentries */
+	"statfs",			/* 157 = statfs */
+	"fstatfs",			/* 158 = fstatfs */
+	"#159 (unimplemented)",		/* 159 = unimplemented */
+	"#160 (unimplemented)",		/* 160 = unimplemented */
+#ifdef NFS
+	"getfh",			/* 161 = getfh */
+#else
+	"#161 (unimplemented getfh)",		/* 161 = unimplemented getfh */
+#endif
+	"#162 (unimplemented getdomainname)",		/* 162 = unimplemented getdomainname */
+	"#163 (unimplemented setdomainname)",		/* 163 = unimplemented setdomainname */
+	"#164 (unimplemented)",		/* 164 = unimplemented */
+	"#165 (unimplemented)",		/* 165 = unimplemented */
+	"#166 (unimplemented)",		/* 166 = unimplemented */
+	"#167 (unimplemented)",		/* 167 = unimplemented */
+	"#168 (unimplemented)",		/* 168 = unimplemented */
+	"#169 (unimplemented semsys)",		/* 169 = unimplemented semsys */
+	"#170 (unimplemented msgsys)",		/* 170 = unimplemented msgsys */
+#if defined(SYSVSHM) && !defined(alpha)
+	"compat_43_shmsys",	/* 171 = compat_43 shmsys */
+#else
+	"#171 (unimplemented shmsys)",		/* 171 = unimplemented shmsys */
+#endif
+	"#172 (unimplemented)",		/* 172 = unimplemented */
+	"#173 (unimplemented)",		/* 173 = unimplemented */
+	"#174 (unimplemented)",		/* 174 = unimplemented */
+	"#175 (unimplemented)",		/* 175 = unimplemented */
+	"#176 (unimplemented)",		/* 176 = unimplemented */
+	"#177 (unimplemented)",		/* 177 = unimplemented */
+	"#178 (unimplemented)",		/* 178 = unimplemented */
+	"#179 (unimplemented)",		/* 179 = unimplemented */
+	"#180 (unimplemented)",		/* 180 = unimplemented */
+	"setgid",			/* 181 = setgid */
+	"setegid",			/* 182 = setegid */
+	"seteuid",			/* 183 = seteuid */
+#ifdef LFS
+	"lfs_bmapv",			/* 184 = lfs_bmapv */
+	"lfs_markv",			/* 185 = lfs_markv */
+	"lfs_segclean",			/* 186 = lfs_segclean */
+	"lfs_segwait",			/* 187 = lfs_segwait */
+#else
+	"#184 (unimplemented lfs_bmapv)",		/* 184 = unimplemented lfs_bmapv */
+	"#185 (unimplemented lfs_markv)",		/* 185 = unimplemented lfs_markv */
+	"#186 (unimplemented lfs_segclean)",		/* 186 = unimplemented lfs_segclean */
+	"#187 (unimplemented lfs_segwait)",		/* 187 = unimplemented lfs_segwait */
+#endif
+	"stat",			/* 188 = stat */
+	"fstat",			/* 189 = fstat */
+	"lstat",			/* 190 = lstat */
+	"pathconf",			/* 191 = pathconf */
+	"fpathconf",			/* 192 = fpathconf */
+	"#193 (unimplemented)",		/* 193 = unimplemented */
+	"getrlimit",			/* 194 = getrlimit */
+	"setrlimit",			/* 195 = setrlimit */
+	"getdirentries",			/* 196 = getdirentries */
+	"mmap",			/* 197 = mmap */
+	"__syscall",			/* 198 = __syscall */
+	"lseek",			/* 199 = lseek */
+	"truncate",			/* 200 = truncate */
+	"ftruncate",			/* 201 = ftruncate */
+	"__sysctl",			/* 202 = __sysctl */
+	"mlock",			/* 203 = mlock */
+	"munlock",			/* 204 = munlock */
+	"undelete",			/* 205 = undelete */
+	"#206 (unimplemented)",		/* 206 = unimplemented */
+	"#207 (unimplemented)",		/* 207 = unimplemented */
+	"#208 (unimplemented)",		/* 208 = unimplemented */
+	"#209 (unimplemented)",		/* 209 = unimplemented */
+	"#210 (unimplemented)",		/* 210 = unimplemented */
+	"#211 (unimplemented)",		/* 211 = unimplemented */
+	"#212 (unimplemented)",		/* 212 = unimplemented */
+	"#213 (unimplemented)",		/* 213 = unimplemented */
+	"#214 (unimplemented)",		/* 214 = unimplemented */
+	"#215 (unimplemented)",		/* 215 = unimplemented */
+	"#216 (unimplemented)",		/* 216 = unimplemented */
+	"#217 (unimplemented)",		/* 217 = unimplemented */
+	"#218 (unimplemented)",		/* 218 = unimplemented */
+	"#219 (unimplemented)",		/* 219 = unimplemented */
+	"#220 (unimplemented semctl)",		/* 220 = unimplemented semctl */
+	"#221 (unimplemented semget)",		/* 221 = unimplemented semget */
+	"#222 (unimplemented semop)",		/* 222 = unimplemented semop */
+	"#223 (unimplemented semconfig)",		/* 223 = unimplemented semconfig */
+	"#224 (unimplemented msgctl)",		/* 224 = unimplemented msgctl */
+	"#225 (unimplemented msgget)",		/* 225 = unimplemented msgget */
+	"#226 (unimplemented msgsnd)",		/* 226 = unimplemented msgsnd */
+	"#227 (unimplemented msgrcv)",		/* 227 = unimplemented msgrcv */
+#if defined(SYSVSHM) && 0
+	"shmat",			/* 228 = shmat */
+	"shmctl",			/* 229 = shmctl */
+	"shmdt",			/* 230 = shmdt */
+	"shmget",			/* 231 = shmget */
+#else
+	"#228 (unimplemented shmat)",		/* 228 = unimplemented shmat */
+	"#229 (unimplemented shmctl)",		/* 229 = unimplemented shmctl */
+	"#230 (unimplemented shmdt)",		/* 230 = unimplemented shmdt */
+	"#231 (unimplemented shmget)",		/* 231 = unimplemented shmget */
+#endif
+};
diff --git a/sys/kern/syscalls.conf b/sys/kern/syscalls.conf
new file mode 100644
index 000000000000..71b82ceff152
--- /dev/null
+++ b/sys/kern/syscalls.conf
@@ -0,0 +1,12 @@
+#	@(#)syscalls.conf	8.1 (Berkeley) 2/14/95
+
+sysnames="syscalls.c"
+sysnumhdr="../sys/syscall.h"
+syssw="init_sysent.c"
+sysarghdr="../sys/syscallargs.h"
+compatopts="compat_43"
+libcompatopts=""
+
+switchname="sysent"
+namesname="syscallnames"
+constprefix="SYS_"
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
new file mode 100644
index 000000000000..b57cd73bde30
--- /dev/null
+++ b/sys/kern/syscalls.master
@@ -0,0 +1,355 @@
+	@(#)syscalls.master	8.6 (Berkeley) 3/30/95
+; System call name/number "master" file.
+; (See syscalls.conf to see what it is processed into.)
+;
+; Fields: number type [type-dependent ...]
+;	number	system call number, must be in order
+;	type	one of STD, OBSOL, UNIMPL, NODEF, NOARGS, or one of
+;		the compatibility options defined in syscalls.conf.
+;
+; types:
+;	STD	always included
+;	OBSOL	obsolete, not included in system
+;	UNIMPL	unimplemented, not included in system
+;	NODEF	included, but don't define the syscall number
+;	NOARGS	included, but don't define the syscall args structure
+;
+; The compat options are defined in the syscalls.conf file, and the
+; compat option name is prefixed to the syscall name.  Other than
+; that, they're like NODEF (for 'compat' options), or STD (for
+; 'libcompat' options).
+;
+; The type-dependent arguments are as follows:
+; For STD, NODEF, NOARGS, and compat syscalls:
+;	{ pseudo-proto } [alias]
+; For other syscalls:
+;	[comment]
+;
+; #ifdef's, etc. may be included, and are copied to the output files.
+; #include's are copied to the syscall switch definition file only.
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/signal.h>
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+; Reserved/unimplemented system calls in the range 0-150 inclusive
+; are reserved for use in future Berkeley releases.
+; Additional system calls implemented in vendor and other
+; redistributions should be placed in the reserved range at the end
+; of the current calls.
+
+0	STD		{ int nosys(void); } syscall
+1	STD		{ int exit(int rval); }
+2	STD		{ int fork(void); }
+3	STD		{ int read(int fd, char *buf, u_int nbyte); }
+4	STD		{ int write(int fd, char *buf, u_int nbyte); }
+5	STD		{ int open(char *path, int flags, int mode); }
+6	STD		{ int close(int fd); }
+7	STD		{ int wait4(int pid, int *status, int options, \
+			    struct rusage *rusage); }
+8	COMPAT_43	{ int creat(char *path, int mode); }
+9	STD		{ int link(char *path, char *link); }
+10	STD		{ int unlink(char *path); }
+11	OBSOL		execv
+12	STD		{ int chdir(char *path); }
+13	STD		{ int fchdir(int fd); }
+14	STD		{ int mknod(char *path, int mode, int dev); }
+15	STD		{ int chmod(char *path, int mode); }
+16	STD		{ int chown(char *path, int uid, int gid); }
+17	STD		{ int obreak(char *nsize); } break
+18	STD		{ int getfsstat(struct statfs *buf, long bufsize, \
+			    int flags); }
+19	COMPAT_43	{ long lseek(int fd, long offset, int whence); }
+20	STD		{ pid_t getpid(void); }
+21	STD		{ int mount(char *type, char *path, int flags, \
+			    caddr_t data); }
+22	STD		{ int unmount(char *path, int flags); }
+23	STD		{ int setuid(uid_t uid); }
+24	STD		{ uid_t getuid(void); }
+25	STD		{ uid_t geteuid(void); }
+26	STD		{ int ptrace(int req, pid_t pid, caddr_t addr, \
+			    int data); }
+27	STD		{ int recvmsg(int s, struct msghdr *msg, int flags); }
+28	STD		{ int sendmsg(int s, caddr_t msg, int flags); }
+29	STD		{ int recvfrom(int s, caddr_t buf, size_t len, \
+			    int flags, caddr_t from, int *fromlenaddr); }
+30	STD		{ int accept(int s, caddr_t name, int *anamelen); }
+31	STD		{ int getpeername(int fdes, caddr_t asa, int *alen); }
+32	STD		{ int getsockname(int fdes, caddr_t asa, int *alen); }
+33	STD		{ int access(char *path, int flags); }
+34	STD		{ int chflags(char *path, int flags); }
+35	STD		{ int fchflags(int fd, int flags); }
+36	STD		{ int sync(void); }
+37	STD		{ int kill(int pid, int signum); }
+38	COMPAT_43	{ int stat(char *path, struct ostat *ub); }
+39	STD		{ pid_t getppid(void); }
+40	COMPAT_43	{ int lstat(char *path, struct ostat *ub); }
+41	STD		{ int dup(u_int fd); }
+42	STD		{ int pipe(void); }
+43	STD		{ gid_t getegid(void); }
+44	STD		{ int profil(caddr_t samples, u_int size, \
+			    u_int offset, u_int scale); }
+#ifdef KTRACE
+45	STD		{ int ktrace(char *fname, int ops, int facs, \
+			    int pid); }
+#else
+45	UNIMPL		ktrace
+#endif
+46	STD		{ int sigaction(int signum, struct sigaction *nsa, \
+			    struct sigaction *osa); }
+47	STD		{ gid_t getgid(void); }
+48	STD		{ int sigprocmask(int how, sigset_t mask); }
+49	STD		{ int getlogin(char *namebuf, u_int namelen); }
+50	STD		{ int setlogin(char *namebuf); }
+51	STD		{ int acct(char *path); }
+52	STD		{ int sigpending(void); }
+53	STD		{ int sigaltstack(struct sigaltstack *nss, \
+			    struct sigaltstack *oss); }
+54	STD		{ int ioctl(int fd, u_long com, caddr_t data); }
+55	STD		{ int reboot(int opt); }
+56	STD		{ int revoke(char *path); }
+57	STD		{ int symlink(char *path, char *link); }
+58	STD		{ int readlink(char *path, char *buf, int count); }
+59	STD		{ int execve(char *path, char **argp, char **envp); }
+60	STD		{ int umask(int newmask); }
+61	STD		{ int chroot(char *path); }
+62	COMPAT_43	{ int fstat(int fd, struct ostat *sb); }
+63	COMPAT_43	{ int getkerninfo(int op, char *where, int *size, \
+			    int arg); }
+64	COMPAT_43	{ int getpagesize(void); }
+65	STD		{ int msync(caddr_t addr, int len); }
+66	STD		{ int vfork(void); }
+67	OBSOL		vread
+68	OBSOL		vwrite
+69	STD		{ int sbrk(int incr); }
+70	STD		{ int sstk(int incr); }
+71	COMPAT_43	{ int mmap(caddr_t addr, int len, int prot, \
+			    int flags, int fd, long pos); }
+72	STD		{ int ovadvise(int anom); } vadvise
+73	STD		{ int munmap(caddr_t addr, int len); }
+74	STD		{ int mprotect(caddr_t addr, int len, int prot); }
+75	STD		{ int madvise(caddr_t addr, int len, int behav); }
+76	OBSOL		vhangup
+77	OBSOL		vlimit
+78	STD		{ int mincore(caddr_t addr, int len, char *vec); }
+79	STD		{ int getgroups(u_int gidsetsize, gid_t *gidset); }
+80	STD		{ int setgroups(u_int gidsetsize, gid_t *gidset); }
+81	STD		{ int getpgrp(void); }
+82	STD		{ int setpgid(int pid, int pgid); }
+83	STD		{ int setitimer(u_int which, struct itimerval *itv, \
+			    struct itimerval *oitv); }
+84	COMPAT_43	{ int wait(void); }
+85	STD		{ int swapon(char *name); }
+86	STD		{ int getitimer(u_int which, struct itimerval *itv); }
+87	COMPAT_43	{ int gethostname(char *hostname, u_int len); }
+88	COMPAT_43	{ int sethostname(char *hostname, u_int len); }
+89	STD		{ int getdtablesize(void); }
+90	STD		{ int dup2(u_int from, u_int to); }
+91	UNIMPL		getdopt
+92	STD		{ int fcntl(int fd, int cmd, void *arg); }
+93	STD		{ int select(u_int nd, fd_set *in, fd_set *ou, \
+			    fd_set *ex, struct timeval *tv); }
+94	UNIMPL		setdopt
+95	STD		{ int fsync(int fd); }
+96	STD		{ int setpriority(int which, int who, int prio); }
+97	STD		{ int socket(int domain, int type, int protocol); }
+98	STD		{ int connect(int s, caddr_t name, int namelen); }
+99	COMPAT_43	{ int accept(int s, caddr_t name, int *anamelen); }
+100	STD		{ int getpriority(int which, int who); }
+101	COMPAT_43	{ int send(int s, caddr_t buf, int len, int flags); }
+102	COMPAT_43	{ int recv(int s, caddr_t buf, int len, int flags); }
+103	STD		{ int sigreturn(struct sigcontext *sigcntxp); }
+104	STD		{ int bind(int s, caddr_t name, int namelen); }
+105	STD		{ int setsockopt(int s, int level, int name, \
+			    caddr_t val, int valsize); }
+106	STD		{ int listen(int s, int backlog); }
+107	OBSOL		vtimes
+108	COMPAT_43	{ int sigvec(int signum, struct sigvec *nsv, \
+			    struct sigvec *osv); }
+109	COMPAT_43	{ int sigblock(int mask); }
+110	COMPAT_43	{ int sigsetmask(int mask); }
+111	STD		{ int sigsuspend(int mask); }
+112	COMPAT_43	{ int sigstack(struct sigstack *nss, \
+			    struct sigstack *oss); }
+113	COMPAT_43	{ int recvmsg(int s, struct omsghdr *msg, int flags); }
+114	COMPAT_43	{ int sendmsg(int s, caddr_t msg, int flags); }
+#ifdef TRACE
+115	STD		{ int vtrace(int request, int value); }
+#else
+115	OBSOL		vtrace
+#endif
+116	STD		{ int gettimeofday(struct timeval *tp, \
+			    struct timezone *tzp); }
+117	STD		{ int getrusage(int who, struct rusage *rusage); }
+118	STD		{ int getsockopt(int s, int level, int name, \
+			    caddr_t val, int *avalsize); }
+#ifdef vax
+119	STD		{ int resuba(int value); }
+#else
+119	UNIMPL		resuba
+#endif
+120	STD		{ int readv(int fd, struct iovec *iovp, u_int iovcnt); }
+121	STD		{ int writev(int fd, struct iovec *iovp, \
+			    u_int iovcnt); }
+122	STD		{ int settimeofday(struct timeval *tv, \
+			    struct timezone *tzp); }
+123	STD		{ int fchown(int fd, int uid, int gid); }
+124	STD		{ int fchmod(int fd, int mode); }
+125	COMPAT_43	{ int recvfrom(int s, caddr_t buf, size_t len, \
+			    int flags, caddr_t from, int *fromlenaddr); }
+126	COMPAT_43	{ int setreuid(int ruid, int euid); }
+127	COMPAT_43	{ int setregid(int rgid, int egid); }
+128	STD		{ int rename(char *from, char *to); }
+129	COMPAT_43	{ int truncate(char *path, long length); }
+130	COMPAT_43	{ int ftruncate(int fd, long length); }
+131	STD		{ int flock(int fd, int how); }
+132	STD		{ int mkfifo(char *path, int mode); }
+133	STD		{ int sendto(int s, caddr_t buf, size_t len, \
+			    int flags, caddr_t to, int tolen); }
+134	STD		{ int shutdown(int s, int how); }
+135	STD		{ int socketpair(int domain, int type, int protocol, \
+			    int *rsv); }
+136	STD		{ int mkdir(char *path, int mode); }
+137	STD		{ int rmdir(char *path); }
+138	STD		{ int utimes(char *path, struct timeval *tptr); }
+139	OBSOL		4.2 sigreturn
+140	STD		{ int adjtime(struct timeval *delta, \
+			    struct timeval *olddelta); }
+141	COMPAT_43	{ int getpeername(int fdes, caddr_t asa, int *alen); }
+142	COMPAT_43	{ int32_t gethostid(void); }
+143	COMPAT_43	{ int sethostid(int32_t hostid); }
+144	COMPAT_43	{ int getrlimit(u_int which, struct ogetrlimit *rlp); }
+145	COMPAT_43	{ int setrlimit(u_int which, struct ogetrlimit *rlp); }
+146	COMPAT_43	{ int killpg(int pgid, int signum); }
+147	STD		{ int setsid(void); }
+148	STD		{ int quotactl(char *path, int cmd, int uid, \
+			    caddr_t arg); }
+149	COMPAT_43	{ int quota(void); }
+150	COMPAT_43	{ int getsockname(int fdec, caddr_t asa, int *alen); }
+
+; Syscalls 151-180 inclusive are reserved for vendor-specific
+; system calls.  (This includes various calls added for compatibity
+; with other Unix variants.)
+; Some of these calls are now supported by BSD...
+151	UNIMPL
+152	UNIMPL
+153	UNIMPL
+154	UNIMPL
+#ifdef NFS
+155	STD		{ int nfssvc(int flag, caddr_t argp); }
+#else
+155	UNIMPL		nfssvc
+#endif
+156	COMPAT_43	{ int getdirentries(int fd, char *buf, u_int count, \
+			    long *basep); }
+157	STD		{ int statfs(char *path, struct statfs *buf); }
+158	STD		{ int fstatfs(int fd, struct statfs *buf); }
+159	UNIMPL
+160	UNIMPL
+#ifdef NFS
+161	STD		{ int getfh(char *fname, fhandle_t *fhp); }
+#else
+161	UNIMPL		getfh
+#endif
+162	UNIMPL		getdomainname
+163	UNIMPL		setdomainname
+164	UNIMPL
+165	UNIMPL
+166	UNIMPL
+167	UNIMPL
+168	UNIMPL
+169	UNIMPL		semsys
+170	UNIMPL		msgsys
+; XXX more generally, never on machines where sizeof(void *) != sizeof(int)
+#if defined(SYSVSHM) && !defined(alpha)
+171	COMPAT_43	{ int shmsys(int which, int a2, int a3, int a4); }
+#else
+171	UNIMPL		shmsys
+#endif
+172	UNIMPL
+173	UNIMPL
+174	UNIMPL
+175	UNIMPL
+176	UNIMPL
+177	UNIMPL
+178	UNIMPL
+179	UNIMPL
+180	UNIMPL
+
+; Syscalls 180-209 are used by/reserved for BSD
+181	STD		{ int setgid(gid_t gid); }
+182	STD		{ int setegid(gid_t egid); }
+183	STD		{ int seteuid(uid_t euid); }
+#ifdef LFS
+184	STD		{ int lfs_bmapv(fsid_t *fsidp, \
+			    struct block_info *blkiov, int blkcnt); }
+185	STD		{ int lfs_markv(fsid_t *fsidp, \
+			    struct block_info *blkiov, int blkcnt); }
+186	STD		{ int lfs_segclean(fsid_t *fsidp, u_long segment); }
+187	STD		{ int lfs_segwait(fsid_t *fsidp, struct timeval *tv); }
+#else
+184	UNIMPL		lfs_bmapv
+185	UNIMPL		lfs_markv
+186	UNIMPL		lfs_segclean
+187	UNIMPL		lfs_segwait
+#endif
+188	STD		{ int stat(char *path, struct stat *ub); }
+189	STD		{ int fstat(int fd, struct stat *sb); }
+190	STD		{ int lstat(char *path, struct stat *ub); }
+191	STD		{ int pathconf(char *path, int name); }
+192	STD		{ int fpathconf(int fd, int name); }
+193	UNIMPL
+194	STD		{ int getrlimit(u_int which, struct rlimit *rlp); }
+195	STD		{ int setrlimit(u_int which, struct rlimit *rlp); }
+196	STD		{ int getdirentries(int fd, char *buf, u_int count, \
+			    long *basep); }
+197	STD		{ caddr_t mmap(caddr_t addr, size_t len, int prot, \
+			    int flags, int fd, long pad, off_t pos); }
+198	STD		{ int nosys(void); } __syscall
+199	STD		{ off_t lseek(int fd, int pad, off_t offset, \
+			    int whence); }
+200	STD		{ int truncate(char *path, int pad, off_t length); }
+201	STD		{ int ftruncate(int fd, int pad, off_t length); }
+202	STD		{ int __sysctl(int *name, u_int namelen, void *old, \
+			    size_t *oldlenp, void *new, size_t newlen); }
+203	STD		{ int mlock(caddr_t addr, size_t len); }
+204	STD		{ int munlock(caddr_t addr, size_t len); }
+205	STD		{ int undelete(char *path); }
+206	UNIMPL
+207	UNIMPL
+208	UNIMPL
+209	UNIMPL
+; Syscalls 210-219 are used by/reserved for vendor-specific system calls
+210	UNIMPL
+211	UNIMPL
+212	UNIMPL
+213	UNIMPL
+214	UNIMPL
+215	UNIMPL
+216	UNIMPL
+217	UNIMPL
+218	UNIMPL
+219	UNIMPL
+; System calls 220-240 are reserved for use by BSD
+220	UNIMPL		semctl
+221	UNIMPL		semget
+222	UNIMPL		semop
+223	UNIMPL		semconfig
+224	UNIMPL		msgctl
+225	UNIMPL		msgget
+226	UNIMPL		msgsnd
+227	UNIMPL		msgrcv
+#if defined(SYSVSHM) && 0
+228	STD		{ int shmat(int shmid, void *shmaddr, int shmflg); }
+229	STD		{ int shmctl(int shmid, int cmd, \
+			    struct shmid_ds *buf); }
+230	STD		{ int shmdt(void *shmaddr); }
+231	STD		{ int shmget(key_t key, int size, int shmflg); }
+#else
+228	UNIMPL		shmat
+229	UNIMPL		shmctl
+230	UNIMPL		shmdt
+231	UNIMPL		shmget
+#endif
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
new file mode 100644
index 000000000000..5d698b111d6a
--- /dev/null
+++ b/sys/kern/tty.c
@@ -0,0 +1,1927 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty.c	8.13 (Berkeley) 1/9/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#define	TTYDEFCHARS
+#include <sys/tty.h>
+#undef	TTYDEFCHARS
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/dkstat.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+static int	proc_compare __P((struct proc *p1, struct proc *p2));
+static int	ttnread __P((struct tty *));
+static void	ttyblock __P((struct tty *tp));
+static void	ttyecho __P((int, struct tty *tp));
+static void	ttyrubo __P((struct tty *, int));
+
+/* Symbolic sleep message strings. */
+char ttclos[]	= "ttycls";
+char ttopen[]	= "ttyopn";
+char ttybg[]	= "ttybg";
+char ttybuf[]	= "ttybuf";
+char ttyin[]	= "ttyin";
+char ttyout[]	= "ttyout";
+
+/*
+ * Table with character classes and parity. The 8th bit indicates parity,
+ * the 7th bit indicates the character is an alphameric or underscore (for
+ * ALTWERASE), and the low 6 bits indicate delay type.  If the low 6 bits
+ * are 0 then the character needs no special processing on output; classes
+ * other than 0 might be translated or (not currently) require delays.
+ */
+#define	E	0x00	/* Even parity. */
+#define	O	0x80	/* Odd parity. */
+#define	PARITY(c)	(char_type[c] & O)
+
+#define	ALPHA	0x40	/* Alpha or underscore. */
+#define	ISALPHA(c)	(char_type[(c) & TTY_CHARMASK] & ALPHA)
+
+#define	CCLASSMASK	0x3f
+#define	CCLASS(c)	(char_type[c] & CCLASSMASK)
+
+#define	BS	BACKSPACE
+#define	CC	CONTROL
+#define	CR	RETURN
+#define	NA	ORDINARY | ALPHA
+#define	NL	NEWLINE
+#define	NO	ORDINARY
+#define	TB	TAB
+#define	VT	VTAB
+
+char const char_type[] = {
+	E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC,	/* nul - bel */
+	O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */
+	O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */
+	E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */
+	O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */
+	E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */
+	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */
+	O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */
+	O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */
+	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */
+	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */
+	O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */
+	E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */
+	O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */
+	O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */
+	E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */
+	/*
+	 * Meta chars; should be settable per character set;
+	 * for now, treat them all as normal characters.
+	 */
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+};
+#undef	BS
+#undef	CC
+#undef	CR
+#undef	NA
+#undef	NL
+#undef	NO
+#undef	TB
+#undef	VT
+
+/* Macros to clear/set/test flags. */
+#define	SET(t, f)	(t) |= (f)
+#define	CLR(t, f)	(t) &= ~(f)
+#define	ISSET(t, f)	((t) & (f))
+
+/*
+ * Initial open of tty, or (re)entry to standard tty line discipline.
+ */
+int
+ttyopen(device, tp)
+	dev_t device;
+	register struct tty *tp;
+{
+	int s;
+
+	s = spltty();
+	tp->t_dev = device;
+	if (!ISSET(tp->t_state, TS_ISOPEN)) {
+		SET(tp->t_state, TS_ISOPEN);
+		bzero(&tp->t_winsize, sizeof(tp->t_winsize));
+	}
+	CLR(tp->t_state, TS_WOPEN);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Handle close() on a tty line: flush and set to initial state,
+ * bumping generation number so that pending read/write calls
+ * can detect recycling of the tty.
+ */
+int
+ttyclose(tp)
+	register struct tty *tp;
+{
+	extern struct tty *constty;	/* Temporary virtual console. */
+
+	if (constty == tp)
+		constty = NULL;
+
+	ttyflush(tp, FREAD | FWRITE);
+
+	tp->t_gen++;
+	tp->t_pgrp = NULL;
+	tp->t_session = NULL;
+	tp->t_state = 0;
+	return (0);
+}
+
+#define	FLUSHQ(q) {							\
+	if ((q)->c_cc)							\
+		ndflush(q, (q)->c_cc);					\
+}
+
+/* Is 'c' a line delimiter ("break" character)? */
+#define	TTBREAKC(c)							\
+	((c) == '\n' || ((c) == cc[VEOF] ||				\
+	(c) == cc[VEOL] || (c) == cc[VEOL2]) && (c) != _POSIX_VDISABLE)
+
+
+/*
+ * Process input of a single character received on a tty.
+ */
+int
+ttyinput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register int iflag, lflag;
+	register u_char *cc;
+	int i, err;
+
+	/*
+	 * If input is pending take it first.
+	 */
+	lflag = tp->t_lflag;
+	if (ISSET(lflag, PENDIN))
+		ttypend(tp);
+	/*
+	 * Gather stats.
+	 */
+	if (ISSET(lflag, ICANON)) {
+		++tk_cancc;
+		++tp->t_cancc;
+	} else {
+		++tk_rawcc;
+		++tp->t_rawcc;
+	}
+	++tk_nin;
+
+	/* Handle exceptional conditions (break, parity, framing). */
+	cc = tp->t_cc;
+	iflag = tp->t_iflag;
+	if (err = (ISSET(c, TTY_ERRORMASK))) {
+		CLR(c, TTY_ERRORMASK);
+		if (ISSET(err, TTY_FE) && !c) {	/* Break. */
+			if (ISSET(iflag, IGNBRK))
+				goto endcase;
+			else if (ISSET(iflag, BRKINT) &&
+			    ISSET(lflag, ISIG) &&
+			    (cc[VINTR] != _POSIX_VDISABLE))
+				c = cc[VINTR];
+			else if (ISSET(iflag, PARMRK))
+				goto parmrk;
+		} else if (ISSET(err, TTY_PE) &&
+		    ISSET(iflag, INPCK) || ISSET(err, TTY_FE)) {
+			if (ISSET(iflag, IGNPAR))
+				goto endcase;
+			else if (ISSET(iflag, PARMRK)) {
+parmrk:				(void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
+				(void)putc(0 | TTY_QUOTE, &tp->t_rawq);
+				(void)putc(c | TTY_QUOTE, &tp->t_rawq);
+				goto endcase;
+			} else
+				c = 0;
+		}
+	}
+	/*
+	 * In tandem mode, check high water mark.
+	 */
+	if (ISSET(iflag, IXOFF))
+		ttyblock(tp);
+	if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP))
+		CLR(c, 0x80);
+	if (!ISSET(lflag, EXTPROC)) {
+		/*
+		 * Check for literal nexting very first
+		 */
+		if (ISSET(tp->t_state, TS_LNCH)) {
+			SET(c, TTY_QUOTE);
+			CLR(tp->t_state, TS_LNCH);
+		}
+		/*
+		 * Scan for special characters.  This code
+		 * is really just a big case statement with
+		 * non-constant cases.  The bottom of the
+		 * case statement is labeled ``endcase'', so goto
+		 * it after a case match, or similar.
+		 */
+
+		/*
+		 * Control chars which aren't controlled
+		 * by ICANON, ISIG, or IXON.
+		 */
+		if (ISSET(lflag, IEXTEN)) {
+			if (CCEQ(cc[VLNEXT], c)) {
+				if (ISSET(lflag, ECHO)) {
+					if (ISSET(lflag, ECHOE)) {
+						(void)ttyoutput('^', tp);
+						(void)ttyoutput('\b', tp);
+					} else
+						ttyecho(c, tp);
+				}
+				SET(tp->t_state, TS_LNCH);
+				goto endcase;
+			}
+			if (CCEQ(cc[VDISCARD], c)) {
+				if (ISSET(lflag, FLUSHO))
+					CLR(tp->t_lflag, FLUSHO);
+				else {
+					ttyflush(tp, FWRITE);
+					ttyecho(c, tp);
+					if (tp->t_rawq.c_cc + tp->t_canq.c_cc)
+						ttyretype(tp);
+					SET(tp->t_lflag, FLUSHO);
+				}
+				goto startoutput;
+			}
+		}
+		/*
+		 * Signals.
+		 */
+		if (ISSET(lflag, ISIG)) {
+			if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) {
+				if (!ISSET(lflag, NOFLSH))
+					ttyflush(tp, FREAD | FWRITE);
+				ttyecho(c, tp);
+				pgsignal(tp->t_pgrp,
+				    CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1);
+				goto endcase;
+			}
+			if (CCEQ(cc[VSUSP], c)) {
+				if (!ISSET(lflag, NOFLSH))
+					ttyflush(tp, FREAD);
+				ttyecho(c, tp);
+				pgsignal(tp->t_pgrp, SIGTSTP, 1);
+				goto endcase;
+			}
+		}
+		/*
+		 * Handle start/stop characters.
+		 */
+		if (ISSET(iflag, IXON)) {
+			if (CCEQ(cc[VSTOP], c)) {
+				if (!ISSET(tp->t_state, TS_TTSTOP)) {
+					SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c						/* XXX */
+					(*tp->t_stop)(tp, 0);
+#else
+					(*cdevsw[major(tp->t_dev)].d_stop)(tp,
+					   0);
+#endif
+					return (0);
+				}
+				if (!CCEQ(cc[VSTART], c))
+					return (0);
+				/*
+				 * if VSTART == VSTOP then toggle
+				 */
+				goto endcase;
+			}
+			if (CCEQ(cc[VSTART], c))
+				goto restartoutput;
+		}
+		/*
+		 * IGNCR, ICRNL, & INLCR
+		 */
+		if (c == '\r') {
+			if (ISSET(iflag, IGNCR))
+				goto endcase;
+			else if (ISSET(iflag, ICRNL))
+				c = '\n';
+		} else if (c == '\n' && ISSET(iflag, INLCR))
+			c = '\r';
+	}
+	if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) {
+		/*
+		 * From here on down canonical mode character
+		 * processing takes place.
+		 */
+		/*
+		 * erase (^H / ^?)
+		 */
+		if (CCEQ(cc[VERASE], c)) {
+			if (tp->t_rawq.c_cc)
+				ttyrub(unputc(&tp->t_rawq), tp);
+			goto endcase;
+		}
+		/*
+		 * kill (^U)
+		 */
+		if (CCEQ(cc[VKILL], c)) {
+			if (ISSET(lflag, ECHOKE) &&
+			    tp->t_rawq.c_cc == tp->t_rocount &&
+			    !ISSET(lflag, ECHOPRT))
+				while (tp->t_rawq.c_cc)
+					ttyrub(unputc(&tp->t_rawq), tp);
+			else {
+				ttyecho(c, tp);
+				if (ISSET(lflag, ECHOK) ||
+				    ISSET(lflag, ECHOKE))
+					ttyecho('\n', tp);
+				FLUSHQ(&tp->t_rawq);
+				tp->t_rocount = 0;
+			}
+			CLR(tp->t_state, TS_LOCAL);
+			goto endcase;
+		}
+		/*
+		 * word erase (^W)
+		 */
+		if (CCEQ(cc[VWERASE], c)) {
+			int alt = ISSET(lflag, ALTWERASE);
+			int ctype;
+
+			/*
+			 * erase whitespace
+			 */
+			while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t')
+				ttyrub(c, tp);
+			if (c == -1)
+				goto endcase;
+			/*
+			 * erase last char of word and remember the
+			 * next chars type (for ALTWERASE)
+			 */
+			ttyrub(c, tp);
+			c = unputc(&tp->t_rawq);
+			if (c == -1)
+				goto endcase;
+			if (c == ' ' || c == '\t') {
+				(void)putc(c, &tp->t_rawq);
+				goto endcase;
+			}
+			ctype = ISALPHA(c);
+			/*
+			 * erase rest of word
+			 */
+			do {
+				ttyrub(c, tp);
+				c = unputc(&tp->t_rawq);
+				if (c == -1)
+					goto endcase;
+			} while (c != ' ' && c != '\t' &&
+			    (alt == 0 || ISALPHA(c) == ctype));
+			(void)putc(c, &tp->t_rawq);
+			goto endcase;
+		}
+		/*
+		 * reprint line (^R)
+		 */
+		if (CCEQ(cc[VREPRINT], c)) {
+			ttyretype(tp);
+			goto endcase;
+		}
+		/*
+		 * ^T - kernel info and generate SIGINFO
+		 */
+		if (CCEQ(cc[VSTATUS], c)) {
+			if (ISSET(lflag, ISIG))
+				pgsignal(tp->t_pgrp, SIGINFO, 1);
+			if (!ISSET(lflag, NOKERNINFO))
+				ttyinfo(tp);
+			goto endcase;
+		}
+	}
+	/*
+	 * Check for input buffer overflow
+	 */
+	if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= TTYHOG) {
+		if (ISSET(iflag, IMAXBEL)) {
+			if (tp->t_outq.c_cc < tp->t_hiwat)
+				(void)ttyoutput(CTRL('g'), tp);
+		} else
+			ttyflush(tp, FREAD | FWRITE);
+		goto endcase;
+	}
+	/*
+	 * Put data char in q for user and
+	 * wakeup on seeing a line delimiter.
+	 */
+	if (putc(c, &tp->t_rawq) >= 0) {
+		if (!ISSET(lflag, ICANON)) {
+			ttwakeup(tp);
+			ttyecho(c, tp);
+			goto endcase;
+		}
+		if (TTBREAKC(c)) {
+			tp->t_rocount = 0;
+			catq(&tp->t_rawq, &tp->t_canq);
+			ttwakeup(tp);
+		} else if (tp->t_rocount++ == 0)
+			tp->t_rocol = tp->t_column;
+		if (ISSET(tp->t_state, TS_ERASE)) {
+			/*
+			 * end of prterase \.../
+			 */
+			CLR(tp->t_state, TS_ERASE);
+			(void)ttyoutput('/', tp);
+		}
+		i = tp->t_column;
+		ttyecho(c, tp);
+		if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) {
+			/*
+			 * Place the cursor over the '^' of the ^D.
+			 */
+			i = min(2, tp->t_column - i);
+			while (i > 0) {
+				(void)ttyoutput('\b', tp);
+				i--;
+			}
+		}
+	}
+endcase:
+	/*
+	 * IXANY means allow any character to restart output.
+	 */
+	if (ISSET(tp->t_state, TS_TTSTOP) &&
+	    !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP])
+		return (0);
+restartoutput:
+	CLR(tp->t_lflag, FLUSHO);
+	CLR(tp->t_state, TS_TTSTOP);
+startoutput:
+	return (ttstart(tp));
+}
+
+/*
+ * Output a single character on a tty, doing output processing
+ * as needed (expanding tabs, newline processing, etc.).
+ * Returns < 0 if succeeds, otherwise returns char to resend.
+ * Must be recursive.
+ */
+int
+ttyoutput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register long oflag;
+	register int notout, col, s;
+
+	oflag = tp->t_oflag;
+	if (!ISSET(oflag, OPOST)) {
+		if (ISSET(tp->t_lflag, FLUSHO))
+			return (-1);
+		if (putc(c, &tp->t_outq))
+			return (c);
+		tk_nout++;
+		tp->t_outcc++;
+		return (-1);
+	}
+	/*
+	 * Do tab expansion if OXTABS is set.  Special case if we external
+	 * processing, we don't do the tab expansion because we'll probably
+	 * get it wrong.  If tab expansion needs to be done, let it happen
+	 * externally.
+	 */
+	CLR(c, ~TTY_CHARMASK);
+	if (c == '\t' &&
+	    ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) {
+		c = 8 - (tp->t_column & 7);
+		if (ISSET(tp->t_lflag, FLUSHO)) {
+			notout = 0;
+		} else {
+			s = spltty();		/* Don't interrupt tabs. */
+			notout = b_to_q("        ", c, &tp->t_outq);
+			c -= notout;
+			tk_nout += c;
+			tp->t_outcc += c;
+			splx(s);
+		}
+		tp->t_column += c;
+		return (notout ? '\t' : -1);
+	}
+	if (c == CEOT && ISSET(oflag, ONOEOT))
+		return (-1);
+
+	/*
+	 * Newline translation: if ONLCR is set,
+	 * translate newline into "\r\n".
+	 */
+	if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) {
+		tk_nout++;
+		tp->t_outcc++;
+		if (putc('\r', &tp->t_outq))
+			return (c);
+	}
+	tk_nout++;
+	tp->t_outcc++;
+	if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
+		return (c);
+
+	col = tp->t_column;
+	switch (CCLASS(c)) {
+	case BACKSPACE:
+		if (col > 0)
+			--col;
+		break;
+	case CONTROL:
+		break;
+	case NEWLINE:
+	case RETURN:
+		col = 0;
+		break;
+	case ORDINARY:
+		++col;
+		break;
+	case TAB:
+		col = (col + 8) & ~7;
+		break;
+	}
+	tp->t_column = col;
+	return (-1);
+}
+
+/*
+ * Ioctls for all tty devices.  Called after line-discipline specific ioctl
+ * has been called to do discipline-specific functions and/or reject any
+ * of these ioctl commands.
+ */
+/* ARGSUSED */
+int
+ttioctl(tp, cmd, data, flag)
+	register struct tty *tp;
+	u_long cmd;
+	void *data;
+	int flag;
+{
+	extern struct tty *constty;	/* Temporary virtual console. */
+	extern int nlinesw;
+	register struct proc *p;
+	int s, error;
+
+	p = curproc;			/* XXX */
+
+	/* If the ioctl involves modification, hang if in the background. */
+	switch (cmd) {
+	case  TIOCFLUSH:
+	case  TIOCSETA:
+	case  TIOCSETD:
+	case  TIOCSETAF:
+	case  TIOCSETAW:
+#ifdef notdef
+	case  TIOCSPGRP:
+#endif
+	case  TIOCSTI:
+	case  TIOCSWINSZ:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	case  TIOCLBIC:
+	case  TIOCLBIS:
+	case  TIOCLSET:
+	case  TIOCSETC:
+	case OTIOCSETD:
+	case  TIOCSETN:
+	case  TIOCSETP:
+	case  TIOCSLTC:
+#endif
+		while (isbackground(curproc, tp) &&
+		    p->p_pgrp->pg_jobc && (p->p_flag & P_PPWAIT) == 0 &&
+		    (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+		    (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
+			pgsignal(p->p_pgrp, SIGTTOU, 1);
+			if (error = ttysleep(tp,
+			    &lbolt, TTOPRI | PCATCH, ttybg, 0))
+				return (error);
+		}
+		break;
+	}
+
+	switch (cmd) {			/* Process the ioctl. */
+	case FIOASYNC:			/* set/clear async i/o */
+		s = spltty();
+		if (*(int *)data)
+			SET(tp->t_state, TS_ASYNC);
+		else
+			CLR(tp->t_state, TS_ASYNC);
+		splx(s);
+		break;
+	case FIONBIO:			/* set/clear non-blocking i/o */
+		break;			/* XXX: delete. */
+	case FIONREAD:			/* get # bytes to read */
+		*(int *)data = ttnread(tp);
+		break;
+	case TIOCEXCL:			/* set exclusive use of tty */
+		s = spltty();
+		SET(tp->t_state, TS_XCLUDE);
+		splx(s);
+		break;
+	case TIOCFLUSH: {		/* flush buffers */
+		register int flags = *(int *)data;
+
+		if (flags == 0)
+			flags = FREAD | FWRITE;
+		else
+			flags &= FREAD | FWRITE;
+		ttyflush(tp, flags);
+		break;
+	}
+	case TIOCCONS:			/* become virtual console */
+		if (*(int *)data) {
+			if (constty && constty != tp &&
+			    ISSET(constty->t_state, TS_CARR_ON | TS_ISOPEN) ==
+			    (TS_CARR_ON | TS_ISOPEN))
+				return (EBUSY);
+#ifndef	UCONSOLE
+			if (error = suser(p->p_ucred, &p->p_acflag))
+				return (error);
+#endif
+			constty = tp;
+		} else if (tp == constty)
+			constty = NULL;
+		break;
+	case TIOCDRAIN:			/* wait till output drained */
+		if (error = ttywait(tp))
+			return (error);
+		break;
+	case TIOCGETA: {		/* get termios struct */
+		struct termios *t = (struct termios *)data;
+
+		bcopy(&tp->t_termios, t, sizeof(struct termios));
+		break;
+	}
+	case TIOCGETD:			/* get line discipline */
+		*(int *)data = tp->t_line;
+		break;
+	case TIOCGWINSZ:		/* get window size */
+		*(struct winsize *)data = tp->t_winsize;
+		break;
+	case TIOCGPGRP:			/* get pgrp of tty */
+		if (!isctty(p, tp))
+			return (ENOTTY);
+		*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		break;
+#ifdef TIOCHPCL
+	case TIOCHPCL:			/* hang up on last close */
+		s = spltty();
+		SET(tp->t_cflag, HUPCL);
+		splx(s);
+		break;
+#endif
+	case TIOCNXCL:			/* reset exclusive use of tty */
+		s = spltty();
+		CLR(tp->t_state, TS_XCLUDE);
+		splx(s);
+		break;
+	case TIOCOUTQ:			/* output queue size */
+		*(int *)data = tp->t_outq.c_cc;
+		break;
+	case TIOCSETA:			/* set termios struct */
+	case TIOCSETAW:			/* drain output, set */
+	case TIOCSETAF: {		/* drn out, fls in, set */
+		register struct termios *t = (struct termios *)data;
+
+		s = spltty();
+		if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
+			if (error = ttywait(tp)) {
+				splx(s);
+				return (error);
+			}
+			if (cmd == TIOCSETAF)
+				ttyflush(tp, FREAD);
+		}
+		if (!ISSET(t->c_cflag, CIGNORE)) {
+			/*
+			 * Set device hardware.
+			 */
+			if (tp->t_param && (error = (*tp->t_param)(tp, t))) {
+				splx(s);
+				return (error);
+			} else {
+				if (!ISSET(tp->t_state, TS_CARR_ON) &&
+				    ISSET(tp->t_cflag, CLOCAL) &&
+				    !ISSET(t->c_cflag, CLOCAL)) {
+					CLR(tp->t_state, TS_ISOPEN);
+					SET(tp->t_state, TS_WOPEN);
+					ttwakeup(tp);
+				}
+				tp->t_cflag = t->c_cflag;
+				tp->t_ispeed = t->c_ispeed;
+				tp->t_ospeed = t->c_ospeed;
+			}
+			ttsetwater(tp);
+		}
+		if (cmd != TIOCSETAF) {
+			if (ISSET(t->c_lflag, ICANON) !=
+			    ISSET(tp->t_lflag, ICANON))
+				if (ISSET(t->c_lflag, ICANON)) {
+					SET(tp->t_lflag, PENDIN);
+					ttwakeup(tp);
+				} else {
+					struct clist tq;
+
+					catq(&tp->t_rawq, &tp->t_canq);
+					tq = tp->t_rawq;
+					tp->t_rawq = tp->t_canq;
+					tp->t_canq = tq;
+					CLR(tp->t_lflag, PENDIN);
+				}
+		}
+		tp->t_iflag = t->c_iflag;
+		tp->t_oflag = t->c_oflag;
+		/*
+		 * Make the EXTPROC bit read only.
+		 */
+		if (ISSET(tp->t_lflag, EXTPROC))
+			SET(t->c_lflag, EXTPROC);
+		else
+			CLR(t->c_lflag, EXTPROC);
+		tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN);
+		bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc));
+		splx(s);
+		break;
+	}
+	case TIOCSETD: {		/* set line discipline */
+		register int t = *(int *)data;
+		dev_t device = tp->t_dev;
+
+		if ((u_int)t >= nlinesw)
+			return (ENXIO);
+		if (t != tp->t_line) {
+			s = spltty();
+			(*linesw[tp->t_line].l_close)(tp, flag);
+			error = (*linesw[t].l_open)(device, tp);
+			if (error) {
+				(void)(*linesw[tp->t_line].l_open)(device, tp);
+				splx(s);
+				return (error);
+			}
+			tp->t_line = t;
+			splx(s);
+		}
+		break;
+	}
+	case TIOCSTART:			/* start output, like ^Q */
+		s = spltty();
+		if (ISSET(tp->t_state, TS_TTSTOP) ||
+		    ISSET(tp->t_lflag, FLUSHO)) {
+			CLR(tp->t_lflag, FLUSHO);
+			CLR(tp->t_state, TS_TTSTOP);
+			ttstart(tp);
+		}
+		splx(s);
+		break;
+	case TIOCSTI:			/* simulate terminal input */
+		if (p->p_ucred->cr_uid && (flag & FREAD) == 0)
+			return (EPERM);
+		if (p->p_ucred->cr_uid && !isctty(p, tp))
+			return (EACCES);
+		(*linesw[tp->t_line].l_rint)(*(u_char *)data, tp);
+		break;
+	case TIOCSTOP:			/* stop output, like ^S */
+		s = spltty();
+		if (!ISSET(tp->t_state, TS_TTSTOP)) {
+			SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c				/* XXX */
+			(*tp->t_stop)(tp, 0);
+#else
+			(*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+		}
+		splx(s);
+		break;
+	case TIOCSCTTY:			/* become controlling tty */
+		/* Session ctty vnode pointer set in vnode layer. */
+		if (!SESS_LEADER(p) ||
+		    (p->p_session->s_ttyvp || tp->t_session) &&
+		    (tp->t_session != p->p_session))
+			return (EPERM);
+		tp->t_session = p->p_session;
+		tp->t_pgrp = p->p_pgrp;
+		p->p_session->s_ttyp = tp;
+		p->p_flag |= P_CONTROLT;
+		break;
+	case TIOCSPGRP: {		/* set pgrp of tty */
+		register struct pgrp *pgrp = pgfind(*(int *)data);
+
+		if (!isctty(p, tp))
+			return (ENOTTY);
+		else if (pgrp == NULL || pgrp->pg_session != p->p_session)
+			return (EPERM);
+		tp->t_pgrp = pgrp;
+		break;
+	}
+	case TIOCSWINSZ:		/* set window size */
+		if (bcmp((caddr_t)&tp->t_winsize, data,
+		    sizeof (struct winsize))) {
+			tp->t_winsize = *(struct winsize *)data;
+			pgsignal(tp->t_pgrp, SIGWINCH, 1);
+		}
+		break;
+	default:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		return (ttcompat(tp, cmd, data, flag));
+#else
+		return (-1);
+#endif
+	}
+	return (0);
+}
+
+int
+ttselect(device, rw, p)
+	dev_t device;
+	int rw;
+	struct proc *p;
+{
+	register struct tty *tp;
+	int nread, s;
+
+	tp = &cdevsw[major(device)].d_ttys[minor(device)];
+
+	s = spltty();
+	switch (rw) {
+	case FREAD:
+		nread = ttnread(tp);
+		if (nread > 0 || !ISSET(tp->t_cflag, CLOCAL) &&
+		    !ISSET(tp->t_state, TS_CARR_ON))
+			goto win;
+		selrecord(p, &tp->t_rsel);
+		break;
+	case FWRITE:
+		if (tp->t_outq.c_cc <= tp->t_lowat) {
+win:			splx(s);
+			return (1);
+		}
+		selrecord(p, &tp->t_wsel);
+		break;
+	}
+	splx(s);
+	return (0);
+}
+
+static int
+ttnread(tp)
+	struct tty *tp;
+{
+	int nread;
+
+	if (ISSET(tp->t_lflag, PENDIN))
+		ttypend(tp);
+	nread = tp->t_canq.c_cc;
+	if (!ISSET(tp->t_lflag, ICANON))
+		nread += tp->t_rawq.c_cc;
+	return (nread);
+}
+
+/*
+ * Wait for output to drain.
+ */
+int
+ttywait(tp)
+	register struct tty *tp;
+{
+	int error, s;
+
+	error = 0;
+	s = spltty();
+	while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
+	    (ISSET(tp->t_state, TS_CARR_ON) || ISSET(tp->t_cflag, CLOCAL))
+	    && tp->t_oproc) {
+		(*tp->t_oproc)(tp);
+		SET(tp->t_state, TS_ASLEEP);
+		if (error = ttysleep(tp,
+		    &tp->t_outq, TTOPRI | PCATCH, ttyout, 0))
+			break;
+	}
+	splx(s);
+	return (error);
+}
+
+/*
+ * Flush if successfully wait.
+ */
+int
+ttywflush(tp)
+	struct tty *tp;
+{
+	int error;
+
+	if ((error = ttywait(tp)) == 0)
+		ttyflush(tp, FREAD);
+	return (error);
+}
+
+/*
+ * Flush tty read and/or write queues, notifying anyone waiting.
+ */
+void
+ttyflush(tp, rw)
+	register struct tty *tp;
+	int rw;
+{
+	register int s;
+
+	s = spltty();
+	if (rw & FREAD) {
+		FLUSHQ(&tp->t_canq);
+		FLUSHQ(&tp->t_rawq);
+		tp->t_rocount = 0;
+		tp->t_rocol = 0;
+		CLR(tp->t_state, TS_LOCAL);
+		ttwakeup(tp);
+	}
+	if (rw & FWRITE) {
+		CLR(tp->t_state, TS_TTSTOP);
+#ifdef sun4c						/* XXX */
+		(*tp->t_stop)(tp, rw);
+#else
+		(*cdevsw[major(tp->t_dev)].d_stop)(tp, rw);
+#endif
+		FLUSHQ(&tp->t_outq);
+		wakeup((caddr_t)&tp->t_outq);
+		selwakeup(&tp->t_wsel);
+	}
+	splx(s);
+}
+
+/*
+ * Copy in the default termios characters.
+ */
+void
+ttychars(tp)
+	struct tty *tp;
+{
+
+	bcopy(ttydefchars, tp->t_cc, sizeof(ttydefchars));
+}
+
+/*
+ * Send stop character on input overflow.
+ */
+static void
+ttyblock(tp)
+	register struct tty *tp;
+{
+	register int total;
+
+	total = tp->t_rawq.c_cc + tp->t_canq.c_cc;
+	if (tp->t_rawq.c_cc > TTYHOG) {
+		ttyflush(tp, FREAD | FWRITE);
+		CLR(tp->t_state, TS_TBLOCK);
+	}
+	/*
+	 * Block further input iff: current input > threshold
+	 * AND input is available to user program.
+	 */
+	if (total >= TTYHOG / 2 &&
+	    !ISSET(tp->t_state, TS_TBLOCK) &&
+	    !ISSET(tp->t_lflag, ICANON) || tp->t_canq.c_cc > 0 &&
+	    tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
+		if (putc(tp->t_cc[VSTOP], &tp->t_outq) == 0) {
+			SET(tp->t_state, TS_TBLOCK);
+			ttstart(tp);
+		}
+	}
+}
+
+void
+ttrstrt(tp_arg)
+	void *tp_arg;
+{
+	struct tty *tp;
+	int s;
+
+#ifdef DIAGNOSTIC
+	if (tp_arg == NULL)
+		panic("ttrstrt");
+#endif
+	tp = tp_arg;
+	s = spltty();
+
+	CLR(tp->t_state, TS_TIMEOUT);
+	ttstart(tp);
+
+	splx(s);
+}
+
+int
+ttstart(tp)
+	struct tty *tp;
+{
+
+	if (tp->t_oproc != NULL)	/* XXX: Kludge for pty. */
+		(*tp->t_oproc)(tp);
+	return (0);
+}
+
+/*
+ * "close" a line discipline
+ */
+int
+ttylclose(tp, flag)
+	struct tty *tp;
+	int flag;
+{
+
+	if (flag & IO_NDELAY)
+		ttyflush(tp, FREAD | FWRITE);
+	else
+		ttywflush(tp);
+	return (0);
+}
+
+/*
+ * Handle modem control transition on a tty.
+ * Flag indicates new state of carrier.
+ * Returns 0 if the line should be turned off, otherwise 1.
+ */
+int
+ttymodem(tp, flag)
+	register struct tty *tp;
+	int flag;
+{
+
+	if (!ISSET(tp->t_state, TS_WOPEN) && ISSET(tp->t_cflag, MDMBUF)) {
+		/*
+		 * MDMBUF: do flow control according to carrier flag
+		 */
+		if (flag) {
+			CLR(tp->t_state, TS_TTSTOP);
+			ttstart(tp);
+		} else if (!ISSET(tp->t_state, TS_TTSTOP)) {
+			SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c						/* XXX */
+			(*tp->t_stop)(tp, 0);
+#else
+			(*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+		}
+	} else if (flag == 0) {
+		/*
+		 * Lost carrier.
+		 */
+		CLR(tp->t_state, TS_CARR_ON);
+		if (ISSET(tp->t_state, TS_ISOPEN) &&
+		    !ISSET(tp->t_cflag, CLOCAL)) {
+			if (tp->t_session && tp->t_session->s_leader)
+				psignal(tp->t_session->s_leader, SIGHUP);
+			ttyflush(tp, FREAD | FWRITE);
+			return (0);
+		}
+	} else {
+		/*
+		 * Carrier now on.
+		 */
+		SET(tp->t_state, TS_CARR_ON);
+		ttwakeup(tp);
+	}
+	return (1);
+}
+
+/*
+ * Default modem control routine (for other line disciplines).
+ * Return argument flag, to turn off device on carrier drop.
+ */
+int
+nullmodem(tp, flag)
+	register struct tty *tp;
+	int flag;
+{
+
+	if (flag)
+		SET(tp->t_state, TS_CARR_ON);
+	else {
+		CLR(tp->t_state, TS_CARR_ON);
+		if (!ISSET(tp->t_cflag, CLOCAL)) {
+			if (tp->t_session && tp->t_session->s_leader)
+				psignal(tp->t_session->s_leader, SIGHUP);
+			return (0);
+		}
+	}
+	return (1);
+}
+
+/*
+ * Reinput pending characters after state switch
+ * call at spltty().
+ */
+void
+ttypend(tp)
+	register struct tty *tp;
+{
+	struct clist tq;
+	register c;
+
+	CLR(tp->t_lflag, PENDIN);
+	SET(tp->t_state, TS_TYPEN);
+	tq = tp->t_rawq;
+	tp->t_rawq.c_cc = 0;
+	tp->t_rawq.c_cf = tp->t_rawq.c_cl = 0;
+	while ((c = getc(&tq)) >= 0)
+		ttyinput(c, tp);
+	CLR(tp->t_state, TS_TYPEN);
+}
+
+/*
+ * Process a read call on a tty device.
+ */
+int
+ttread(tp, uio, flag)
+	register struct tty *tp;
+	struct uio *uio;
+	int flag;
+{
+	register struct clist *qp;
+	register int c;
+	register long lflag;
+	register u_char *cc = tp->t_cc;
+	register struct proc *p = curproc;
+	int s, first, error = 0;
+
+loop:	lflag = tp->t_lflag;
+	s = spltty();
+	/*
+	 * take pending input first
+	 */
+	if (ISSET(lflag, PENDIN))
+		ttypend(tp);
+	splx(s);
+
+	/*
+	 * Hang process if it's in the background.
+	 */
+	if (isbackground(p, tp)) {
+		if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+		   (p->p_sigmask & sigmask(SIGTTIN)) ||
+		    p->p_flag & P_PPWAIT || p->p_pgrp->pg_jobc == 0)
+			return (EIO);
+		pgsignal(p->p_pgrp, SIGTTIN, 1);
+		if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+			return (error);
+		goto loop;
+	}
+
+	/*
+	 * If canonical, use the canonical queue,
+	 * else use the raw queue.
+	 *
+	 * (should get rid of clists...)
+	 */
+	qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq;
+
+	/*
+	 * If there is no input, sleep on rawq
+	 * awaiting hardware receipt and notification.
+	 * If we have data, we don't need to check for carrier.
+	 */
+	s = spltty();
+	if (qp->c_cc <= 0) {
+		int carrier;
+
+		carrier = ISSET(tp->t_state, TS_CARR_ON) ||
+		    ISSET(tp->t_cflag, CLOCAL);
+		if (!carrier && ISSET(tp->t_state, TS_ISOPEN)) {
+			splx(s);
+			return (0);	/* EOF */
+		}
+		if (flag & IO_NDELAY) {
+			splx(s);
+			return (EWOULDBLOCK);
+		}
+		error = ttysleep(tp, &tp->t_rawq, TTIPRI | PCATCH,
+		    carrier ? ttyin : ttopen, 0);
+		splx(s);
+		if (error)
+			return (error);
+		goto loop;
+	}
+	splx(s);
+
+	/*
+	 * Input present, check for input mapping and processing.
+	 */
+	first = 1;
+	while ((c = getc(qp)) >= 0) {
+		/*
+		 * delayed suspend (^Y)
+		 */
+		if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, ISIG)) {
+			pgsignal(tp->t_pgrp, SIGTSTP, 1);
+			if (first) {
+				if (error = ttysleep(tp,
+				    &lbolt, TTIPRI | PCATCH, ttybg, 0))
+					break;
+				goto loop;
+			}
+			break;
+		}
+		/*
+		 * Interpret EOF only in canonical mode.
+		 */
+		if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON))
+			break;
+		/*
+		 * Give user character.
+		 */
+ 		error = ureadc(c, uio);
+		if (error)
+			break;
+ 		if (uio->uio_resid == 0)
+			break;
+		/*
+		 * In canonical mode check for a "break character"
+		 * marking the end of a "line of input".
+		 */
+		if (ISSET(lflag, ICANON) && TTBREAKC(c))
+			break;
+		first = 0;
+	}
+	/*
+	 * Look to unblock output now that (presumably)
+	 * the input queue has gone down.
+	 */
+	s = spltty();
+	if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc < TTYHOG/5) {
+		if (cc[VSTART] != _POSIX_VDISABLE &&
+		    putc(cc[VSTART], &tp->t_outq) == 0) {
+			CLR(tp->t_state, TS_TBLOCK);
+			ttstart(tp);
+		}
+	}
+	splx(s);
+	return (error);
+}
+
+/*
+ * Check the output queue on tp for space for a kernel message (from uprintf
+ * or tprintf).  Allow some space over the normal hiwater mark so we don't
+ * lose messages due to normal flow control, but don't let the tty run amok.
+ * Sleeps here are not interruptible, but we return prematurely if new signals
+ * arrive.
+ */
+int
+ttycheckoutq(tp, wait)
+	register struct tty *tp;
+	int wait;
+{
+	int hiwat, s, oldsig;
+
+	hiwat = tp->t_hiwat;
+	s = spltty();
+	oldsig = wait ? curproc->p_siglist : 0;
+	if (tp->t_outq.c_cc > hiwat + 200)
+		while (tp->t_outq.c_cc > hiwat) {
+			ttstart(tp);
+			if (wait == 0 || curproc->p_siglist != oldsig) {
+				splx(s);
+				return (0);
+			}
+			timeout((void (*)__P((void *)))wakeup,
+			    (void *)&tp->t_outq, hz);
+			SET(tp->t_state, TS_ASLEEP);
+			sleep((caddr_t)&tp->t_outq, PZERO - 1);
+		}
+	splx(s);
+	return (1);
+}
+
+/*
+ * Process a write call on a tty device.
+ */
+int
+ttwrite(tp, uio, flag)
+	register struct tty *tp;
+	register struct uio *uio;
+	int flag;
+{
+	register char *cp;
+	register int cc, ce;
+	register struct proc *p;
+	int i, hiwat, cnt, error, s;
+	char obuf[OBUFSIZ];
+
+	hiwat = tp->t_hiwat;
+	cnt = uio->uio_resid;
+	error = 0;
+	cc = 0;
+loop:
+	s = spltty();
+	if (!ISSET(tp->t_state, TS_CARR_ON) &&
+	    !ISSET(tp->t_cflag, CLOCAL)) {
+		if (ISSET(tp->t_state, TS_ISOPEN)) {
+			splx(s);
+			return (EIO);
+		} else if (flag & IO_NDELAY) {
+			splx(s);
+			error = EWOULDBLOCK;
+			goto out;
+		} else {
+			/* Sleep awaiting carrier. */
+			error = ttysleep(tp,
+			    &tp->t_rawq, TTIPRI | PCATCH,ttopen, 0);
+			splx(s);
+			if (error)
+				goto out;
+			goto loop;
+		}
+	}
+	splx(s);
+	/*
+	 * Hang the process if it's in the background.
+	 */
+	p = curproc;
+	if (isbackground(p, tp) &&
+	    ISSET(tp->t_lflag, TOSTOP) && (p->p_flag & P_PPWAIT) == 0 &&
+	    (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+	    (p->p_sigmask & sigmask(SIGTTOU)) == 0 &&
+	     p->p_pgrp->pg_jobc) {
+		pgsignal(p->p_pgrp, SIGTTOU, 1);
+		if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+			goto out;
+		goto loop;
+	}
+	/*
+	 * Process the user's data in at most OBUFSIZ chunks.  Perform any
+	 * output translation.  Keep track of high water mark, sleep on
+	 * overflow awaiting device aid in acquiring new space.
+	 */
+	while (uio->uio_resid > 0 || cc > 0) {
+		if (ISSET(tp->t_lflag, FLUSHO)) {
+			uio->uio_resid = 0;
+			return (0);
+		}
+		if (tp->t_outq.c_cc > hiwat)
+			goto ovhiwat;
+		/*
+		 * Grab a hunk of data from the user, unless we have some
+		 * leftover from last time.
+		 */
+		if (cc == 0) {
+			cc = min(uio->uio_resid, OBUFSIZ);
+			cp = obuf;
+			error = uiomove(cp, cc, uio);
+			if (error) {
+				cc = 0;
+				break;
+			}
+		}
+		/*
+		 * If nothing fancy need be done, grab those characters we
+		 * can handle without any of ttyoutput's processing and
+		 * just transfer them to the output q.  For those chars
+		 * which require special processing (as indicated by the
+		 * bits in char_type), call ttyoutput.  After processing
+		 * a hunk of data, look for FLUSHO so ^O's will take effect
+		 * immediately.
+		 */
+		while (cc > 0) {
+			if (!ISSET(tp->t_oflag, OPOST))
+				ce = cc;
+			else {
+				ce = cc - scanc((u_int)cc, (u_char *)cp,
+				   (u_char *)char_type, CCLASSMASK);
+				/*
+				 * If ce is zero, then we're processing
+				 * a special character through ttyoutput.
+				 */
+				if (ce == 0) {
+					tp->t_rocount = 0;
+					if (ttyoutput(*cp, tp) >= 0) {
+						/* No Clists, wait a bit. */
+						ttstart(tp);
+						if (error = ttysleep(tp, &lbolt,
+						    TTOPRI | PCATCH, ttybuf, 0))
+							break;
+						goto loop;
+					}
+					cp++;
+					cc--;
+					if (ISSET(tp->t_lflag, FLUSHO) ||
+					    tp->t_outq.c_cc > hiwat)
+						goto ovhiwat;
+					continue;
+				}
+			}
+			/*
+			 * A bunch of normal characters have been found.
+			 * Transfer them en masse to the output queue and
+			 * continue processing at the top of the loop.
+			 * If there are any further characters in this
+			 * <= OBUFSIZ chunk, the first should be a character
+			 * requiring special handling by ttyoutput.
+			 */
+			tp->t_rocount = 0;
+			i = b_to_q(cp, ce, &tp->t_outq);
+			ce -= i;
+			tp->t_column += ce;
+			cp += ce, cc -= ce, tk_nout += ce;
+			tp->t_outcc += ce;
+			if (i > 0) {
+				/* No Clists, wait a bit. */
+				ttstart(tp);
+				if (error = ttysleep(tp,
+				    &lbolt, TTOPRI | PCATCH, ttybuf, 0))
+					break;
+				goto loop;
+			}
+			if (ISSET(tp->t_lflag, FLUSHO) ||
+			    tp->t_outq.c_cc > hiwat)
+				break;
+		}
+		ttstart(tp);
+	}
+out:
+	/*
+	 * If cc is nonzero, we leave the uio structure inconsistent, as the
+	 * offset and iov pointers have moved forward, but it doesn't matter
+	 * (the call will either return short or restart with a new uio).
+	 */
+	uio->uio_resid += cc;
+	return (error);
+
+ovhiwat:
+	ttstart(tp);
+	s = spltty();
+	/*
+	 * This can only occur if FLUSHO is set in t_lflag,
+	 * or if ttstart/oproc is synchronous (or very fast).
+	 */
+	if (tp->t_outq.c_cc <= hiwat) {
+		splx(s);
+		goto loop;
+	}
+	if (flag & IO_NDELAY) {
+		splx(s);
+		uio->uio_resid += cc;
+		return (uio->uio_resid == cnt ? EWOULDBLOCK : 0);
+	}
+	SET(tp->t_state, TS_ASLEEP);
+	error = ttysleep(tp, &tp->t_outq, TTOPRI | PCATCH, ttyout, 0);
+	splx(s);
+	if (error)
+		goto out;
+	goto loop;
+}
+
+/*
+ * Rubout one character from the rawq of tp
+ * as cleanly as possible.
+ */
+void
+ttyrub(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register char *cp;
+	register int savecol;
+	int tabc, s;
+
+	if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
+		return;
+	CLR(tp->t_lflag, FLUSHO);
+	if (ISSET(tp->t_lflag, ECHOE)) {
+		if (tp->t_rocount == 0) {
+			/*
+			 * Screwed by ttwrite; retype
+			 */
+			ttyretype(tp);
+			return;
+		}
+		if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE))
+			ttyrubo(tp, 2);
+		else {
+			CLR(c, ~TTY_CHARMASK);
+			switch (CCLASS(c)) {
+			case ORDINARY:
+				ttyrubo(tp, 1);
+				break;
+			case BACKSPACE:
+			case CONTROL:
+			case NEWLINE:
+			case RETURN:
+			case VTAB:
+				if (ISSET(tp->t_lflag, ECHOCTL))
+					ttyrubo(tp, 2);
+				break;
+			case TAB:
+				if (tp->t_rocount < tp->t_rawq.c_cc) {
+					ttyretype(tp);
+					return;
+				}
+				s = spltty();
+				savecol = tp->t_column;
+				SET(tp->t_state, TS_CNTTB);
+				SET(tp->t_lflag, FLUSHO);
+				tp->t_column = tp->t_rocol;
+				cp = tp->t_rawq.c_cf;
+				if (cp)
+					tabc = *cp;	/* XXX FIX NEXTC */
+				for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc))
+					ttyecho(tabc, tp);
+				CLR(tp->t_lflag, FLUSHO);
+				CLR(tp->t_state, TS_CNTTB);
+				splx(s);
+
+				/* savecol will now be length of the tab. */
+				savecol -= tp->t_column;
+				tp->t_column += savecol;
+				if (savecol > 8)
+					savecol = 8;	/* overflow screw */
+				while (--savecol >= 0)
+					(void)ttyoutput('\b', tp);
+				break;
+			default:			/* XXX */
+#define	PANICSTR	"ttyrub: would panic c = %d, val = %d\n"
+				(void)printf(PANICSTR, c, CCLASS(c));
+#ifdef notdef
+				panic(PANICSTR, c, CCLASS(c));
+#endif
+			}
+		}
+	} else if (ISSET(tp->t_lflag, ECHOPRT)) {
+		if (!ISSET(tp->t_state, TS_ERASE)) {
+			SET(tp->t_state, TS_ERASE);
+			(void)ttyoutput('\\', tp);
+		}
+		ttyecho(c, tp);
+	} else
+		ttyecho(tp->t_cc[VERASE], tp);
+	--tp->t_rocount;
+}
+
+/*
+ * Back over cnt characters, erasing them.
+ */
+static void
+ttyrubo(tp, cnt)
+	register struct tty *tp;
+	int cnt;
+{
+
+	while (cnt-- > 0) {
+		(void)ttyoutput('\b', tp);
+		(void)ttyoutput(' ', tp);
+		(void)ttyoutput('\b', tp);
+	}
+}
+
+/*
+ * ttyretype --
+ *	Reprint the rawq line.  Note, it is assumed that c_cc has already
+ *	been checked.
+ */
+void
+ttyretype(tp)
+	register struct tty *tp;
+{
+	register char *cp;
+	int s, c;
+
+	/* Echo the reprint character. */
+	if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE)
+		ttyecho(tp->t_cc[VREPRINT], tp);
+
+	(void)ttyoutput('\n', tp);
+
+	/*
+	 * XXX
+	 * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE
+	 * BIT OF FIRST CHAR.
+	 */
+	s = spltty();
+	for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0);
+	    cp != NULL; cp = nextc(&tp->t_canq, cp, &c))
+		ttyecho(c, tp);
+	for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0);
+	    cp != NULL; cp = nextc(&tp->t_rawq, cp, &c))
+		ttyecho(c, tp);
+	CLR(tp->t_state, TS_ERASE);
+	splx(s);
+
+	tp->t_rocount = tp->t_rawq.c_cc;
+	tp->t_rocol = 0;
+}
+
+/*
+ * Echo a typed character to the terminal.
+ */
+static void
+ttyecho(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+
+	if (!ISSET(tp->t_state, TS_CNTTB))
+		CLR(tp->t_lflag, FLUSHO);
+	if ((!ISSET(tp->t_lflag, ECHO) &&
+	    (!ISSET(tp->t_lflag, ECHONL) || c == '\n')) ||
+	    ISSET(tp->t_lflag, EXTPROC))
+		return;
+	if (ISSET(tp->t_lflag, ECHOCTL) &&
+	    (ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n' ||
+	    ISSET(c, TTY_CHARMASK) == 0177)) {
+		(void)ttyoutput('^', tp);
+		CLR(c, ~TTY_CHARMASK);
+		if (c == 0177)
+			c = '?';
+		else
+			c += 'A' - 1;
+	}
+	(void)ttyoutput(c, tp);
+}
+
+/*
+ * Wake up any readers on a tty.
+ */
+void
+ttwakeup(tp)
+	register struct tty *tp;
+{
+
+	selwakeup(&tp->t_rsel);
+	if (ISSET(tp->t_state, TS_ASYNC))
+		pgsignal(tp->t_pgrp, SIGIO, 1);
+	wakeup((caddr_t)&tp->t_rawq);
+}
+
+/*
+ * Look up a code for a specified speed in a conversion table;
+ * used by drivers to map software speed values to hardware parameters.
+ */
+int
+ttspeedtab(speed, table)
+	int speed;
+	register struct speedtab *table;
+{
+
+	for ( ; table->sp_speed != -1; table++)
+		if (table->sp_speed == speed)
+			return (table->sp_code);
+	return (-1);
+}
+
+/*
+ * Set tty hi and low water marks.
+ *
+ * Try to arrange the dynamics so there's about one second
+ * from hi to low water.
+ *
+ */
+void
+ttsetwater(tp)
+	struct tty *tp;
+{
+	register int cps, x;
+
+#define CLAMP(x, h, l)	((x) > h ? h : ((x) < l) ? l : (x))
+
+	cps = tp->t_ospeed / 10;
+	tp->t_lowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT);
+	x += cps;
+	x = CLAMP(x, TTMAXHIWAT, TTMINHIWAT);
+	tp->t_hiwat = roundup(x, CBSIZE);
+#undef	CLAMP
+}
+
+/*
+ * Report on state of foreground process group.
+ */
+void
+ttyinfo(tp)
+	register struct tty *tp;
+{
+	register struct proc *p, *pick;
+	struct timeval utime, stime;
+	int tmp;
+
+	if (ttycheckoutq(tp,0) == 0)
+		return;
+
+	/* Print load average. */
+	tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
+	ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100);
+
+	if (tp->t_session == NULL)
+		ttyprintf(tp, "not a controlling terminal\n");
+	else if (tp->t_pgrp == NULL)
+		ttyprintf(tp, "no foreground process group\n");
+	else if ((p = tp->t_pgrp->pg_members.lh_first) == 0)
+		ttyprintf(tp, "empty foreground process group\n");
+	else {
+		/* Pick interesting process. */
+		for (pick = NULL; p != 0; p = p->p_pglist.le_next)
+			if (proc_compare(pick, p))
+				pick = p;
+
+		ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid,
+		    pick->p_stat == SRUN ? "running" :
+		    pick->p_wmesg ? pick->p_wmesg : "iowait");
+
+		calcru(pick, &utime, &stime, NULL);
+
+		/* Print user time. */
+		ttyprintf(tp, "%d.%02du ",
+		    utime.tv_sec, (utime.tv_usec + 5000) / 10000);
+
+		/* Print system time. */
+		ttyprintf(tp, "%d.%02ds ",
+		    stime.tv_sec, (stime.tv_usec + 5000) / 10000);
+
+#define	pgtok(a)	(((a) * NBPG) / 1024)
+		/* Print percentage cpu, resident set size. */
+		tmp = pick->p_pctcpu * 10000 + FSCALE / 2 >> FSHIFT;
+		ttyprintf(tp, "%d%% %dk\n",
+		    tmp / 100,
+		    pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 :
+#ifdef pmap_resident_count
+			pgtok(pmap_resident_count(&pick->p_vmspace->vm_pmap))
+#else
+			pgtok(pick->p_vmspace->vm_rssize)
+#endif
+			);
+	}
+	tp->t_rocount = 0;	/* so pending input will be retyped if BS */
+}
+
+/*
+ * Returns 1 if p2 is "better" than p1
+ *
+ * The algorithm for picking the "interesting" process is thus:
+ *
+ *	1) Only foreground processes are eligible - implied.
+ *	2) Runnable processes are favored over anything else.  The runner
+ *	   with the highest cpu utilization is picked (p_estcpu).  Ties are
+ *	   broken by picking the highest pid.
+ *	3) The sleeper with the shortest sleep time is next.  With ties,
+ *	   we pick out just "short-term" sleepers (P_SINTR == 0).
+ *	4) Further ties are broken by picking the highest pid.
+ */
+#define ISRUN(p)	(((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
+#define TESTAB(a, b)    ((a)<<1 | (b))
+#define ONLYA   2
+#define ONLYB   1
+#define BOTH    3
+
+static int
+proc_compare(p1, p2)
+	register struct proc *p1, *p2;
+{
+
+	if (p1 == NULL)
+		return (1);
+	/*
+	 * see if at least one of them is runnable
+	 */
+	switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
+	case ONLYA:
+		return (0);
+	case ONLYB:
+		return (1);
+	case BOTH:
+		/*
+		 * tie - favor one with highest recent cpu utilization
+		 */
+		if (p2->p_estcpu > p1->p_estcpu)
+			return (1);
+		if (p1->p_estcpu > p2->p_estcpu)
+			return (0);
+		return (p2->p_pid > p1->p_pid);	/* tie - return highest pid */
+	}
+	/*
+ 	 * weed out zombies
+	 */
+	switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
+	case ONLYA:
+		return (1);
+	case ONLYB:
+		return (0);
+	case BOTH:
+		return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+	}
+	/*
+	 * pick the one with the smallest sleep time
+	 */
+	if (p2->p_slptime > p1->p_slptime)
+		return (0);
+	if (p1->p_slptime > p2->p_slptime)
+		return (1);
+	/*
+	 * favor one sleeping in a non-interruptible sleep
+	 */
+	if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0)
+		return (1);
+	if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0)
+		return (0);
+	return (p2->p_pid > p1->p_pid);		/* tie - return highest pid */
+}
+
+/*
+ * Output char to tty; console putchar style.
+ */
+int
+tputchar(c, tp)
+	int c;
+	struct tty *tp;
+{
+	register int s;
+
+	s = spltty();
+	if (ISSET(tp->t_state,
+	    TS_CARR_ON | TS_ISOPEN) != (TS_CARR_ON | TS_ISOPEN)) {
+		splx(s);
+		return (-1);
+	}
+	if (c == '\n')
+		(void)ttyoutput('\r', tp);
+	(void)ttyoutput(c, tp);
+	ttstart(tp);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Sleep on chan, returning ERESTART if tty changed while we napped and
+ * returning any errors (e.g. EINTR/ETIMEDOUT) reported by tsleep.  If
+ * the tty is revoked, restarting a pending call will redo validation done
+ * at the start of the call.
+ */
+int
+ttysleep(tp, chan, pri, wmesg, timo)
+	struct tty *tp;
+	void *chan;
+	int pri, timo;
+	char *wmesg;
+{
+	int error;
+	short gen;
+
+	gen = tp->t_gen;
+	if (error = tsleep(chan, pri, wmesg, timo))
+		return (error);
+	return (tp->t_gen == gen ? 0 : ERESTART);
+}
diff --git a/sys/kern/tty_compat.c b/sys/kern/tty_compat.c
new file mode 100644
index 000000000000..ce95853a00eb
--- /dev/null
+++ b/sys/kern/tty_compat.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_compat.c	8.2 (Berkeley) 1/9/95
+ */
+
+/* 
+ * mapping routines for old line discipline (yuck)
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/termios.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+int ttydebug = 0;
+
+static struct speedtab compatspeeds[] = {
+	{ 38400, 15 },
+	{ 19200, 14 },
+	{ 9600,	13 },
+	{ 4800,	12 },
+	{ 2400,	11 },
+	{ 1800,	10 },
+	{ 1200,	9 },
+	{ 600,	8 },
+	{ 300,	7 },
+	{ 200,	6 },
+	{ 150,	5 },
+	{ 134,	4 },
+	{ 110,	3 },
+	{ 75,	2 },
+	{ 50,	1 },
+	{ 0,	0 },
+	{ -1,	-1 },
+};
+static int compatspcodes[16] = { 
+	0, 50, 75, 110, 134, 150, 200, 300, 600, 1200,
+	1800, 2400, 4800, 9600, 19200, 38400,
+};
+
+/*ARGSUSED*/
+ttcompat(tp, com, data, flag)
+	register struct tty *tp;
+	u_long com;
+	caddr_t data;
+	int flag;
+{
+
+	switch (com) {
+	case TIOCGETP: {
+		register struct sgttyb *sg = (struct sgttyb *)data;
+		register u_char *cc = tp->t_cc;
+		register speed;
+
+		speed = ttspeedtab(tp->t_ospeed, compatspeeds);
+		sg->sg_ospeed = (speed == -1) ? 15 : speed;
+		if (tp->t_ispeed == 0)
+			sg->sg_ispeed = sg->sg_ospeed;
+		else {
+			speed = ttspeedtab(tp->t_ispeed, compatspeeds);
+			sg->sg_ispeed = (speed == -1) ? 15 : speed;
+		}
+		sg->sg_erase = cc[VERASE];
+		sg->sg_kill = cc[VKILL];
+		sg->sg_flags = ttcompatgetflags(tp);
+		break;
+	}
+
+	case TIOCSETP:
+	case TIOCSETN: {
+		register struct sgttyb *sg = (struct sgttyb *)data;
+		struct termios term;
+		int speed;
+
+		term = tp->t_termios;
+		if ((speed = sg->sg_ispeed) > 15 || speed < 0)
+			term.c_ispeed = speed;
+		else
+			term.c_ispeed = compatspcodes[speed];
+		if ((speed = sg->sg_ospeed) > 15 || speed < 0)
+			term.c_ospeed = speed;
+		else
+			term.c_ospeed = compatspcodes[speed];
+		term.c_cc[VERASE] = sg->sg_erase;
+		term.c_cc[VKILL] = sg->sg_kill;
+		tp->t_flags = tp->t_flags&0xffff0000 | sg->sg_flags&0xffff;
+		ttcompatsetflags(tp, &term);
+		return (ttioctl(tp, com == TIOCSETP ? TIOCSETAF : TIOCSETA, 
+			&term, flag));
+	}
+
+	case TIOCGETC: {
+		struct tchars *tc = (struct tchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		tc->t_intrc = cc[VINTR];
+		tc->t_quitc = cc[VQUIT];
+		tc->t_startc = cc[VSTART];
+		tc->t_stopc = cc[VSTOP];
+		tc->t_eofc = cc[VEOF];
+		tc->t_brkc = cc[VEOL];
+		break;
+	}
+	case TIOCSETC: {
+		struct tchars *tc = (struct tchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		cc[VINTR] = tc->t_intrc;
+		cc[VQUIT] = tc->t_quitc;
+		cc[VSTART] = tc->t_startc;
+		cc[VSTOP] = tc->t_stopc;
+		cc[VEOF] = tc->t_eofc;
+		cc[VEOL] = tc->t_brkc;
+		if (tc->t_brkc == -1)
+			cc[VEOL2] = _POSIX_VDISABLE;
+		break;
+	}
+	case TIOCSLTC: {
+		struct ltchars *ltc = (struct ltchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		cc[VSUSP] = ltc->t_suspc;
+		cc[VDSUSP] = ltc->t_dsuspc;
+		cc[VREPRINT] = ltc->t_rprntc;
+		cc[VDISCARD] = ltc->t_flushc;
+		cc[VWERASE] = ltc->t_werasc;
+		cc[VLNEXT] = ltc->t_lnextc;
+		break;
+	}
+	case TIOCGLTC: {
+		struct ltchars *ltc = (struct ltchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		ltc->t_suspc = cc[VSUSP];
+		ltc->t_dsuspc = cc[VDSUSP];
+		ltc->t_rprntc = cc[VREPRINT];
+		ltc->t_flushc = cc[VDISCARD];
+		ltc->t_werasc = cc[VWERASE];
+		ltc->t_lnextc = cc[VLNEXT];
+		break;
+	}
+	case TIOCLBIS:
+	case TIOCLBIC:
+	case TIOCLSET: {
+		struct termios term;
+
+		term = tp->t_termios;
+		if (com == TIOCLSET)
+			tp->t_flags = (tp->t_flags&0xffff) | *(int *)data<<16;
+		else {
+			tp->t_flags = 
+			 (ttcompatgetflags(tp)&0xffff0000)|(tp->t_flags&0xffff);
+			if (com == TIOCLBIS)
+				tp->t_flags |= *(int *)data<<16;
+			else
+				tp->t_flags &= ~(*(int *)data<<16);
+		}
+		ttcompatsetlflags(tp, &term);
+		return (ttioctl(tp, TIOCSETA, &term, flag));
+	}
+	case TIOCLGET:
+		*(int *)data = ttcompatgetflags(tp)>>16;
+		if (ttydebug)
+			printf("CLGET: returning %x\n", *(int *)data);
+		break;
+
+	case OTIOCGETD:
+		*(int *)data = tp->t_line ? tp->t_line : 2;
+		break;
+
+	case OTIOCSETD: {
+		int ldisczero = 0;
+
+		return (ttioctl(tp, TIOCSETD, 
+			*(int *)data == 2 ? (caddr_t)&ldisczero : data, flag));
+	    }
+
+	case OTIOCCONS:
+		*(int *)data = 1;
+		return (ttioctl(tp, TIOCCONS, data, flag));
+
+	default:
+		return (-1);
+	}
+	return (0);
+}
+
+ttcompatgetflags(tp)
+	register struct tty *tp;
+{
+	register long iflag = tp->t_iflag;
+	register long lflag = tp->t_lflag;
+	register long oflag = tp->t_oflag;
+	register long cflag = tp->t_cflag;
+	register flags = 0;
+
+	if (iflag&IXOFF)
+		flags |= TANDEM;
+	if (iflag&ICRNL || oflag&ONLCR)
+		flags |= CRMOD;
+	if (cflag&PARENB) {
+		if (iflag&INPCK) {
+			if (cflag&PARODD)
+				flags |= ODDP;
+			else
+				flags |= EVENP;
+		} else
+			flags |= EVENP | ODDP;
+	} else {
+		if ((tp->t_flags&LITOUT) && !(oflag&OPOST))
+			flags |= LITOUT;
+		if (tp->t_flags&PASS8)
+			flags |= PASS8;
+	}
+	
+	if ((lflag&ICANON) == 0) {	
+		/* fudge */
+		if (iflag&IXON || lflag&ISIG || lflag&IEXTEN || cflag&PARENB)
+			flags |= CBREAK;
+		else
+			flags |= RAW;
+	}
+	if (cflag&MDMBUF)
+		flags |= MDMBUF;
+	if ((cflag&HUPCL) == 0)
+		flags |= NOHANG;
+	if (oflag&OXTABS)
+		flags |= XTABS;
+	if (lflag&ECHOE)
+		flags |= CRTERA|CRTBS;
+	if (lflag&ECHOKE)
+		flags |= CRTKIL|CRTBS;
+	if (lflag&ECHOPRT)
+		flags |= PRTERA;
+	if (lflag&ECHOCTL)
+		flags |= CTLECH;
+	if ((iflag&IXANY) == 0)
+		flags |= DECCTQ;
+	flags |= lflag&(ECHO|TOSTOP|FLUSHO|PENDIN|NOFLSH);
+if (ttydebug)
+	printf("getflags: %x\n", flags);
+	return (flags);
+}
+
+ttcompatsetflags(tp, t)
+	register struct tty *tp;
+	register struct termios *t;
+{
+	register flags = tp->t_flags;
+	register long iflag = t->c_iflag;
+	register long oflag = t->c_oflag;
+	register long lflag = t->c_lflag;
+	register long cflag = t->c_cflag;
+
+	if (flags & RAW) {
+		iflag &= IXOFF;
+		oflag &= ~OPOST;
+		lflag &= ~(ECHOCTL|ISIG|ICANON|IEXTEN);
+	} else {
+		iflag |= BRKINT|IXON|IMAXBEL;
+		oflag |= OPOST;
+		lflag |= ISIG|IEXTEN|ECHOCTL;	/* XXX was echoctl on ? */
+		if (flags & XTABS)
+			oflag |= OXTABS;
+		else
+			oflag &= ~OXTABS;
+		if (flags & CBREAK)
+			lflag &= ~ICANON;
+		else
+			lflag |= ICANON;
+		if (flags&CRMOD) {
+			iflag |= ICRNL;
+			oflag |= ONLCR;
+		} else {
+			iflag &= ~ICRNL;
+			oflag &= ~ONLCR;
+		}
+	}
+	if (flags&ECHO)
+		lflag |= ECHO;
+	else
+		lflag &= ~ECHO;
+		
+	if (flags&(RAW|LITOUT|PASS8)) {
+		cflag &= ~(CSIZE|PARENB);
+		cflag |= CS8;
+		if ((flags&(RAW|PASS8)) == 0)
+			iflag |= ISTRIP;
+		else
+			iflag &= ~ISTRIP;
+	} else {
+		cflag &= ~CSIZE;
+		cflag |= CS7|PARENB;
+		iflag |= ISTRIP;
+	}
+	if ((flags&(EVENP|ODDP)) == EVENP) {
+		iflag |= INPCK;
+		cflag &= ~PARODD;
+	} else if ((flags&(EVENP|ODDP)) == ODDP) {
+		iflag |= INPCK;
+		cflag |= PARODD;
+	} else 
+		iflag &= ~INPCK;
+	if (flags&LITOUT)
+		oflag &= ~OPOST;	/* move earlier ? */
+	if (flags&TANDEM)
+		iflag |= IXOFF;
+	else
+		iflag &= ~IXOFF;
+	t->c_iflag = iflag;
+	t->c_oflag = oflag;
+	t->c_lflag = lflag;
+	t->c_cflag = cflag;
+}
+
+ttcompatsetlflags(tp, t)
+	register struct tty *tp;
+	register struct termios *t;
+{
+	register flags = tp->t_flags;
+	register long iflag = t->c_iflag;
+	register long oflag = t->c_oflag;
+	register long lflag = t->c_lflag;
+	register long cflag = t->c_cflag;
+
+	if (flags&CRTERA)
+		lflag |= ECHOE;
+	else
+		lflag &= ~ECHOE;
+	if (flags&CRTKIL)
+		lflag |= ECHOKE;
+	else
+		lflag &= ~ECHOKE;
+	if (flags&PRTERA)
+		lflag |= ECHOPRT;
+	else
+		lflag &= ~ECHOPRT;
+	if (flags&CTLECH)
+		lflag |= ECHOCTL;
+	else
+		lflag &= ~ECHOCTL;
+	if ((flags&DECCTQ) == 0)
+		iflag |= IXANY;
+	else
+		iflag &= ~IXANY;
+	if (flags & MDMBUF)
+		cflag |= MDMBUF;
+	else
+		cflag &= ~MDMBUF;
+	if (flags&NOHANG)
+		cflag &= ~HUPCL;
+	else
+		cflag |= HUPCL;
+	lflag &= ~(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+	lflag |= flags&(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+	if (flags&(LITOUT|PASS8)) {
+		iflag &= ~ISTRIP;
+		cflag &= ~(CSIZE|PARENB);
+		cflag |= CS8;
+		if (flags&LITOUT)
+			oflag &= ~OPOST;
+		if ((flags&(PASS8|RAW)) == 0)
+			iflag |= ISTRIP;
+	} else if ((flags&RAW) == 0) {
+		cflag &= ~CSIZE;
+		cflag |= CS7|PARENB;
+		oflag |= OPOST;
+	}
+	t->c_iflag = iflag;
+	t->c_oflag = oflag;
+	t->c_lflag = lflag;
+	t->c_cflag = cflag;
+}
+#endif	/* COMPAT_43 || COMPAT_SUNOS */
diff --git a/sys/kern/tty_conf.c b/sys/kern/tty_conf.c
new file mode 100644
index 000000000000..14536758116e
--- /dev/null
+++ b/sys/kern/tty_conf.c
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_conf.c	8.5 (Berkeley) 1/9/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+
+#define	ttynodisc ((int (*) __P((dev_t, struct tty *)))enodev)
+#define	ttyerrclose ((int (*) __P((struct tty *, int flags)))enodev)
+#define	ttyerrio ((int (*) __P((struct tty *, struct uio *, int)))enodev)
+#define	ttyerrinput ((int (*) __P((int c, struct tty *)))enodev)
+#define	ttyerrstart ((int (*) __P((struct tty *)))enodev)
+
+int	nullioctl __P((struct tty *tp, u_long cmd, caddr_t data,
+			int flag, struct proc *p));
+
+#include "tb.h"
+#if NTB > 0
+int	tbopen __P((dev_t dev, struct tty *tp));
+int	tbclose __P((struct tty *tp, int flags));
+int	tbread __P((struct tty *, struct uio *, int flags));
+int	tbioctl __P((struct tty *tp, u_long cmd, caddr_t data,
+			int flag, struct proc *p));
+int	tbinput __P((int c, struct tty *tp));
+#endif
+
+#include "sl.h"
+#if NSL > 0
+int	slopen __P((dev_t dev, struct tty *tp));
+int	slclose __P((struct tty *tp, int flags));
+int	sltioctl __P((struct tty *tp, u_long cmd, caddr_t data,
+			int flag, struct proc *p));
+int	slinput __P((int c, struct tty *tp));
+int	slstart __P((struct tty *tp));
+#endif
+
+
+struct	linesw linesw[] =
+{
+	{ ttyopen, ttylclose, ttread, ttwrite, nullioctl,
+	  ttyinput, ttstart, ttymodem },		/* 0- termios */
+
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },	/* 1- defunct */
+
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },	/* 2- defunct */
+
+#if NTB > 0
+	{ tbopen, tbclose, tbread, enodev, tbioctl,
+	  tbinput, ttstart, nullmodem },		/* 3- TABLDISC */
+#else
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },
+#endif
+
+#if NSL > 0
+	{ slopen, slclose, ttyerrio, ttyerrio, sltioctl,
+	  slinput, slstart, nullmodem },		/* 4- SLIPDISC */
+#else
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },
+#endif
+};
+
+int	nlinesw = sizeof (linesw) / sizeof (linesw[0]);
+
+/*
+ * Do nothing specific version of line
+ * discipline specific ioctl command.
+ */
+/*ARGSUSED*/
+nullioctl(tp, cmd, data, flags, p)
+	struct tty *tp;
+	u_long cmd;
+	char *data;
+	int flags;
+	struct proc *p;
+{
+
+#ifdef lint
+	tp = tp; data = data; flags = flags; p = p;
+#endif
+	return (-1);
+}
diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
new file mode 100644
index 000000000000..2c37984ab03d
--- /dev/null
+++ b/sys/kern/tty_pty.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_pty.c	8.4 (Berkeley) 2/20/95
+ */
+
+/*
+ * Pseudo-teletype Driver
+ * (Actually two drivers, requiring two entries in 'cdevsw')
+ */
+#include "pty.h"		/* XXX */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+
+#if NPTY == 1
+#undef NPTY
+#define	NPTY	32		/* crude XXX */
+#endif
+
+#define BUFSIZ 100		/* Chunk size iomoved to/from user */
+
+/*
+ * pts == /dev/tty[pqrs]?
+ * ptc == /dev/pty[pqrs]?
+ */
+struct	tty pt_tty[NPTY];	/* XXX */
+struct	pt_ioctl {
+	int	pt_flags;
+	struct	selinfo pt_selr, pt_selw;
+	u_char	pt_send;
+	u_char	pt_ucntl;
+} pt_ioctl[NPTY];		/* XXX */
+int	npty = NPTY;		/* for pstat -t */
+
+#define	PF_PKT		0x08		/* packet mode */
+#define	PF_STOPPED	0x10		/* user told stopped */
+#define	PF_REMOTE	0x20		/* remote and flow controlled input */
+#define	PF_NOSTOP	0x40
+#define PF_UCNTL	0x80		/* user control mode */
+
+void	ptsstop __P((struct tty *, int));
+
+/*
+ * Establish n (or default if n is 1) ptys in the system.
+ *
+ * XXX cdevsw & pstat require the array `pty[]' to be an array
+ */
+void
+ptyattach(n)
+	int n;
+{
+#ifdef notyet
+	char *mem;
+	register u_long ntb;
+#define	DEFAULT_NPTY	32
+
+	/* maybe should allow 0 => none? */
+	if (n <= 1)
+		n = DEFAULT_NPTY;
+	ntb = n * sizeof(struct tty);
+	mem = malloc(ntb + ALIGNBYTES + n * sizeof(struct pt_ioctl),
+	    M_DEVBUF, M_WAITOK);
+	pt_tty = (struct tty *)mem;
+	mem = (char *)ALIGN(mem + ntb);
+	pt_ioctl = (struct pt_ioctl *)mem;
+	npty = n;
+#endif
+}
+
+/*ARGSUSED*/
+ptsopen(dev, flag, devtype, p)
+	dev_t dev;
+	int flag, devtype;
+	struct proc *p;
+{
+	register struct tty *tp;
+	int error;
+
+	if (minor(dev) >= npty)
+		return (ENXIO);
+	tp = &pt_tty[minor(dev)];
+	if ((tp->t_state & TS_ISOPEN) == 0) {
+		tp->t_state |= TS_WOPEN;
+		ttychars(tp);		/* Set up default chars */
+		tp->t_iflag = TTYDEF_IFLAG;
+		tp->t_oflag = TTYDEF_OFLAG;
+		tp->t_lflag = TTYDEF_LFLAG;
+		tp->t_cflag = TTYDEF_CFLAG;
+		tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+		ttsetwater(tp);		/* would be done in xxparam() */
+	} else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0)
+		return (EBUSY);
+	if (tp->t_oproc)			/* Ctrlr still around. */
+		tp->t_state |= TS_CARR_ON;
+	while ((tp->t_state & TS_CARR_ON) == 0) {
+		tp->t_state |= TS_WOPEN;
+		if (flag&FNONBLOCK)
+			break;
+		if (error = ttysleep(tp, (caddr_t)&tp->t_rawq, TTIPRI | PCATCH,
+		    ttopen, 0))
+			return (error);
+	}
+	error = (*linesw[tp->t_line].l_open)(dev, tp);
+	ptcwakeup(tp, FREAD|FWRITE);
+	return (error);
+}
+
+ptsclose(dev, flag, mode, p)
+	dev_t dev;
+	int flag, mode;
+	struct proc *p;
+{
+	register struct tty *tp;
+	int err;
+
+	tp = &pt_tty[minor(dev)];
+	err = (*linesw[tp->t_line].l_close)(tp, flag);
+	err |= ttyclose(tp);
+	ptcwakeup(tp, FREAD|FWRITE);
+	return (err);
+}
+
+ptsread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	struct proc *p = curproc;
+	register struct tty *tp = &pt_tty[minor(dev)];
+	register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	int error = 0;
+
+again:
+	if (pti->pt_flags & PF_REMOTE) {
+		while (isbackground(p, tp)) {
+			if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+			    (p->p_sigmask & sigmask(SIGTTIN)) ||
+			    p->p_pgrp->pg_jobc == 0 ||
+			    p->p_flag & P_PPWAIT)
+				return (EIO);
+			pgsignal(p->p_pgrp, SIGTTIN, 1);
+			if (error = ttysleep(tp, (caddr_t)&lbolt, 
+			    TTIPRI | PCATCH, ttybg, 0))
+				return (error);
+		}
+		if (tp->t_canq.c_cc == 0) {
+			if (flag & IO_NDELAY)
+				return (EWOULDBLOCK);
+			if (error = ttysleep(tp, (caddr_t)&tp->t_canq,
+			    TTIPRI | PCATCH, ttyin, 0))
+				return (error);
+			goto again;
+		}
+		while (tp->t_canq.c_cc > 1 && uio->uio_resid > 0)
+			if (ureadc(getc(&tp->t_canq), uio) < 0) {
+				error = EFAULT;
+				break;
+			}
+		if (tp->t_canq.c_cc == 1)
+			(void) getc(&tp->t_canq);
+		if (tp->t_canq.c_cc)
+			return (error);
+	} else
+		if (tp->t_oproc)
+			error = (*linesw[tp->t_line].l_read)(tp, uio, flag);
+	ptcwakeup(tp, FWRITE);
+	return (error);
+}
+
+/*
+ * Write to pseudo-tty.
+ * Wakeups of controlling tty will happen
+ * indirectly, when tty driver calls ptsstart.
+ */
+ptswrite(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct tty *tp;
+
+	tp = &pt_tty[minor(dev)];
+	if (tp->t_oproc == 0)
+		return (EIO);
+	return ((*linesw[tp->t_line].l_write)(tp, uio, flag));
+}
+
+/*
+ * Start output on pseudo-tty.
+ * Wake up process selecting or sleeping for input from controlling tty.
+ */
+void
+ptsstart(tp)
+	struct tty *tp;
+{
+	register struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+	if (tp->t_state & TS_TTSTOP)
+		return;
+	if (pti->pt_flags & PF_STOPPED) {
+		pti->pt_flags &= ~PF_STOPPED;
+		pti->pt_send = TIOCPKT_START;
+	}
+	ptcwakeup(tp, FREAD);
+}
+
+ptcwakeup(tp, flag)
+	struct tty *tp;
+	int flag;
+{
+	struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+	if (flag & FREAD) {
+		selwakeup(&pti->pt_selr);
+		wakeup((caddr_t)&tp->t_outq.c_cf);
+	}
+	if (flag & FWRITE) {
+		selwakeup(&pti->pt_selw);
+		wakeup((caddr_t)&tp->t_rawq.c_cf);
+	}
+}
+
+/*ARGSUSED*/
+#ifdef __STDC__
+ptcopen(dev_t dev, int flag, int devtype, struct proc *p)
+#else
+ptcopen(dev, flag, devtype, p)
+	dev_t dev;
+	int flag, devtype;
+	struct proc *p;
+#endif
+{
+	register struct tty *tp;
+	struct pt_ioctl *pti;
+
+	if (minor(dev) >= npty)
+		return (ENXIO);
+	tp = &pt_tty[minor(dev)];
+	if (tp->t_oproc)
+		return (EIO);
+	tp->t_oproc = ptsstart;
+#ifdef sun4c
+	tp->t_stop = ptsstop;
+#endif
+	(void)(*linesw[tp->t_line].l_modem)(tp, 1);
+	tp->t_lflag &= ~EXTPROC;
+	pti = &pt_ioctl[minor(dev)];
+	pti->pt_flags = 0;
+	pti->pt_send = 0;
+	pti->pt_ucntl = 0;
+	return (0);
+}
+
+ptcclose(dev)
+	dev_t dev;
+{
+	register struct tty *tp;
+
+	tp = &pt_tty[minor(dev)];
+	(void)(*linesw[tp->t_line].l_modem)(tp, 0);
+	tp->t_state &= ~TS_CARR_ON;
+	tp->t_oproc = 0;		/* mark closed */
+	tp->t_session = 0;
+	return (0);
+}
+
+ptcread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	char buf[BUFSIZ];
+	int error = 0, cc;
+
+	/*
+	 * We want to block until the slave
+	 * is open, and there's something to read;
+	 * but if we lost the slave or we're NBIO,
+	 * then return the appropriate error instead.
+	 */
+	for (;;) {
+		if (tp->t_state&TS_ISOPEN) {
+			if (pti->pt_flags&PF_PKT && pti->pt_send) {
+				error = ureadc((int)pti->pt_send, uio);
+				if (error)
+					return (error);
+				if (pti->pt_send & TIOCPKT_IOCTL) {
+					cc = min(uio->uio_resid,
+						sizeof(tp->t_termios));
+					uiomove(&tp->t_termios, cc, uio);
+				}
+				pti->pt_send = 0;
+				return (0);
+			}
+			if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) {
+				error = ureadc((int)pti->pt_ucntl, uio);
+				if (error)
+					return (error);
+				pti->pt_ucntl = 0;
+				return (0);
+			}
+			if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0)
+				break;
+		}
+		if ((tp->t_state&TS_CARR_ON) == 0)
+			return (0);	/* EOF */
+		if (flag & IO_NDELAY)
+			return (EWOULDBLOCK);
+		if (error = tsleep((caddr_t)&tp->t_outq.c_cf, TTIPRI | PCATCH,
+		    ttyin, 0))
+			return (error);
+	}
+	if (pti->pt_flags & (PF_PKT|PF_UCNTL))
+		error = ureadc(0, uio);
+	while (uio->uio_resid > 0 && error == 0) {
+		cc = q_to_b(&tp->t_outq, buf, min(uio->uio_resid, BUFSIZ));
+		if (cc <= 0)
+			break;
+		error = uiomove(buf, cc, uio);
+	}
+	if (tp->t_outq.c_cc <= tp->t_lowat) {
+		if (tp->t_state&TS_ASLEEP) {
+			tp->t_state &= ~TS_ASLEEP;
+			wakeup((caddr_t)&tp->t_outq);
+		}
+		selwakeup(&tp->t_wsel);
+	}
+	return (error);
+}
+
+void
+ptsstop(tp, flush)
+	register struct tty *tp;
+	int flush;
+{
+	struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+	int flag;
+
+	/* note: FLUSHREAD and FLUSHWRITE already ok */
+	if (flush == 0) {
+		flush = TIOCPKT_STOP;
+		pti->pt_flags |= PF_STOPPED;
+	} else
+		pti->pt_flags &= ~PF_STOPPED;
+	pti->pt_send |= flush;
+	/* change of perspective */
+	flag = 0;
+	if (flush & FREAD)
+		flag |= FWRITE;
+	if (flush & FWRITE)
+		flag |= FREAD;
+	ptcwakeup(tp, flag);
+}
+
+ptcselect(dev, rw, p)
+	dev_t dev;
+	int rw;
+	struct proc *p;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	int s;
+
+	if ((tp->t_state&TS_CARR_ON) == 0)
+		return (1);
+	switch (rw) {
+
+	case FREAD:
+		/*
+		 * Need to block timeouts (ttrstart).
+		 */
+		s = spltty();
+		if ((tp->t_state&TS_ISOPEN) &&
+		     tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) {
+			splx(s);
+			return (1);
+		}
+		splx(s);
+		/* FALLTHROUGH */
+
+	case 0:					/* exceptional */
+		if ((tp->t_state&TS_ISOPEN) &&
+		    (pti->pt_flags&PF_PKT && pti->pt_send ||
+		     pti->pt_flags&PF_UCNTL && pti->pt_ucntl))
+			return (1);
+		selrecord(p, &pti->pt_selr);
+		break;
+
+
+	case FWRITE:
+		if (tp->t_state&TS_ISOPEN) {
+			if (pti->pt_flags & PF_REMOTE) {
+			    if (tp->t_canq.c_cc == 0)
+				return (1);
+			} else {
+			    if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2)
+				    return (1);
+			    if (tp->t_canq.c_cc == 0 && (tp->t_iflag&ICANON))
+				    return (1);
+			}
+		}
+		selrecord(p, &pti->pt_selw);
+		break;
+
+	}
+	return (0);
+}
+
+ptcwrite(dev, uio, flag)
+	dev_t dev;
+	register struct uio *uio;
+	int flag;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	register u_char *cp;
+	register int cc = 0;
+	u_char locbuf[BUFSIZ];
+	int cnt = 0;
+	struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	int error = 0;
+
+again:
+	if ((tp->t_state&TS_ISOPEN) == 0)
+		goto block;
+	if (pti->pt_flags & PF_REMOTE) {
+		if (tp->t_canq.c_cc)
+			goto block;
+		while (uio->uio_resid > 0 && tp->t_canq.c_cc < TTYHOG - 1) {
+			if (cc == 0) {
+				cc = min(uio->uio_resid, BUFSIZ);
+				cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc);
+				cp = locbuf;
+				error = uiomove((caddr_t)cp, cc, uio);
+				if (error)
+					return (error);
+				/* check again for safety */
+				if ((tp->t_state&TS_ISOPEN) == 0)
+					return (EIO);
+			}
+			if (cc)
+				(void) b_to_q((char *)cp, cc, &tp->t_canq);
+			cc = 0;
+		}
+		(void) putc(0, &tp->t_canq);
+		ttwakeup(tp);
+		wakeup((caddr_t)&tp->t_canq);
+		return (0);
+	}
+	while (uio->uio_resid > 0) {
+		if (cc == 0) {
+			cc = min(uio->uio_resid, BUFSIZ);
+			cp = locbuf;
+			error = uiomove((caddr_t)cp, cc, uio);
+			if (error)
+				return (error);
+			/* check again for safety */
+			if ((tp->t_state&TS_ISOPEN) == 0)
+				return (EIO);
+		}
+		while (cc > 0) {
+			if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 &&
+			   (tp->t_canq.c_cc > 0 || !(tp->t_iflag&ICANON))) {
+				wakeup((caddr_t)&tp->t_rawq);
+				goto block;
+			}
+			(*linesw[tp->t_line].l_rint)(*cp++, tp);
+			cnt++;
+			cc--;
+		}
+		cc = 0;
+	}
+	return (0);
+block:
+	/*
+	 * Come here to wait for slave to open, for space
+	 * in outq, or space in rawq.
+	 */
+	if ((tp->t_state&TS_CARR_ON) == 0)
+		return (EIO);
+	if (flag & IO_NDELAY) {
+		/* adjust for data copied in but not written */
+		uio->uio_resid += cc;
+		if (cnt == 0)
+			return (EWOULDBLOCK);
+		return (0);
+	}
+	if (error = tsleep((caddr_t)&tp->t_rawq.c_cf, TTOPRI | PCATCH,
+	    ttyout, 0)) {
+		/* adjust for data copied in but not written */
+		uio->uio_resid += cc;
+		return (error);
+	}
+	goto again;
+}
+
+/*ARGSUSED*/
+ptyioctl(dev, cmd, data, flag, p)
+	dev_t dev;
+	u_long cmd;
+	caddr_t data;
+	int flag;
+	struct proc *p;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	register u_char *cc = tp->t_cc;
+	int stop, error;
+
+	/*
+	 * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG.
+	 * ttywflush(tp) will hang if there are characters in the outq.
+	 */
+	if (cmd == TIOCEXT) {
+		/*
+		 * When the EXTPROC bit is being toggled, we need
+		 * to send an TIOCPKT_IOCTL if the packet driver
+		 * is turned on.
+		 */
+		if (*(int *)data) {
+			if (pti->pt_flags & PF_PKT) {
+				pti->pt_send |= TIOCPKT_IOCTL;
+				ptcwakeup(tp, FREAD);
+			}
+			tp->t_lflag |= EXTPROC;
+		} else {
+			if ((tp->t_lflag & EXTPROC) &&
+			    (pti->pt_flags & PF_PKT)) {
+				pti->pt_send |= TIOCPKT_IOCTL;
+				ptcwakeup(tp, FREAD);
+			}
+			tp->t_lflag &= ~EXTPROC;
+		}
+		return(0);
+	} else
+	if (cdevsw[major(dev)].d_open == ptcopen)
+		switch (cmd) {
+
+		case TIOCGPGRP:
+			/*
+			 * We aviod calling ttioctl on the controller since,
+			 * in that case, tp must be the controlling terminal.
+			 */
+			*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0;
+			return (0);
+
+		case TIOCPKT:
+			if (*(int *)data) {
+				if (pti->pt_flags & PF_UCNTL)
+					return (EINVAL);
+				pti->pt_flags |= PF_PKT;
+			} else
+				pti->pt_flags &= ~PF_PKT;
+			return (0);
+
+		case TIOCUCNTL:
+			if (*(int *)data) {
+				if (pti->pt_flags & PF_PKT)
+					return (EINVAL);
+				pti->pt_flags |= PF_UCNTL;
+			} else
+				pti->pt_flags &= ~PF_UCNTL;
+			return (0);
+
+		case TIOCREMOTE:
+			if (*(int *)data)
+				pti->pt_flags |= PF_REMOTE;
+			else
+				pti->pt_flags &= ~PF_REMOTE;
+			ttyflush(tp, FREAD|FWRITE);
+			return (0);
+
+#ifdef COMPAT_43
+		case TIOCSETP:		
+		case TIOCSETN:
+#endif
+		case TIOCSETD:
+		case TIOCSETA:
+		case TIOCSETAW:
+		case TIOCSETAF:
+			ndflush(&tp->t_outq, tp->t_outq.c_cc);
+			break;
+
+		case TIOCSIG:
+			if (*(unsigned int *)data >= NSIG)
+				return(EINVAL);
+			if ((tp->t_lflag&NOFLSH) == 0)
+				ttyflush(tp, FREAD|FWRITE);
+			pgsignal(tp->t_pgrp, *(unsigned int *)data, 1);
+			if ((*(unsigned int *)data == SIGINFO) &&
+			    ((tp->t_lflag&NOKERNINFO) == 0))
+				ttyinfo(tp);
+			return(0);
+		}
+	error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p);
+	if (error < 0)
+		 error = ttioctl(tp, cmd, data, flag);
+	if (error < 0) {
+		if (pti->pt_flags & PF_UCNTL &&
+		    (cmd & ~0xff) == UIOCCMD(0)) {
+			if (cmd & 0xff) {
+				pti->pt_ucntl = (u_char)cmd;
+				ptcwakeup(tp, FREAD);
+			}
+			return (0);
+		}
+		error = ENOTTY;
+	}
+	/*
+	 * If external processing and packet mode send ioctl packet.
+	 */
+	if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) {
+		switch(cmd) {
+		case TIOCSETA:
+		case TIOCSETAW:
+		case TIOCSETAF:
+#ifdef COMPAT_43
+		case TIOCSETP:
+		case TIOCSETN:
+#endif
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		case TIOCSETC:
+		case TIOCSLTC:
+		case TIOCLBIS:
+		case TIOCLBIC:
+		case TIOCLSET:
+#endif
+			pti->pt_send |= TIOCPKT_IOCTL;
+			ptcwakeup(tp, FREAD);
+		default:
+			break;
+		}
+	}
+	stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s')) 
+		&& CCEQ(cc[VSTART], CTRL('q'));
+	if (pti->pt_flags & PF_NOSTOP) {
+		if (stop) {
+			pti->pt_send &= ~TIOCPKT_NOSTOP;
+			pti->pt_send |= TIOCPKT_DOSTOP;
+			pti->pt_flags &= ~PF_NOSTOP;
+			ptcwakeup(tp, FREAD);
+		}
+	} else {
+		if (!stop) {
+			pti->pt_send &= ~TIOCPKT_DOSTOP;
+			pti->pt_send |= TIOCPKT_NOSTOP;
+			pti->pt_flags |= PF_NOSTOP;
+			ptcwakeup(tp, FREAD);
+		}
+	}
+	return (error);
+}
diff --git a/sys/kern/tty_tb.c b/sys/kern/tty_tb.c
new file mode 100644
index 000000000000..05a46baa73d3
--- /dev/null
+++ b/sys/kern/tty_tb.c
@@ -0,0 +1,368 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_tb.c	8.2 (Berkeley) 1/9/95
+ */
+
+#include "tb.h"
+#if NTB > 0
+
+/*
+ * Line discipline for RS232 tablets;
+ * supplies binary coordinate data.
+ */
+#include <sys/param.h>
+#include <sys/tablet.h>
+#include <sys/tty.h>
+
+/*
+ * Tablet configuration table.
+ */
+struct	tbconf {
+	short	tbc_recsize;	/* input record size in bytes */
+	short	tbc_uiosize;	/* size of data record returned user */
+	int	tbc_sync;	/* mask for finding sync byte/bit */
+	int	(*tbc_decode)();/* decoding routine */
+	char	*tbc_run;	/* enter run mode sequence */
+	char	*tbc_point;	/* enter point mode sequence */
+	char	*tbc_stop;	/* stop sequence */
+	char	*tbc_start;	/* start/restart sequence */
+	int	tbc_flags;
+#define	TBF_POL		0x1	/* polhemus hack */
+#define	TBF_INPROX	0x2	/* tablet has proximity info */
+};
+
+static	int tbdecode(), gtcodecode(), poldecode();
+static	int tblresdecode(), tbhresdecode();
+
+struct	tbconf tbconf[TBTYPE] = {
+{ 0 },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "6", "4" },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "\1CN", "\1RT", "\2", "\4" },
+{ 8, sizeof (struct gtcopos), 0200, gtcodecode },
+{17, sizeof (struct polpos), 0200, poldecode, 0, 0, "\21", "\5\22\2\23",
+  TBF_POL },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CN", "\1PT", "\2", "\4",
+  TBF_INPROX },
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CN", "\1PT", "\2", "\4",
+  TBF_INPROX },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CL\33", "\1PT\33", 0, 0},
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CL\33", "\1PT\33", 0, 0},
+};
+
+/*
+ * Tablet state
+ */
+struct tb {
+	int	tbflags;		/* mode & type bits */
+#define	TBMAXREC	17	/* max input record size */
+	char	cbuf[TBMAXREC];		/* input buffer */
+	union {
+		struct	tbpos tbpos;
+		struct	gtcopos gtcopos;
+		struct	polpos polpos;
+	} rets;				/* processed state */
+#define NTBS	16
+} tb[NTBS];
+
+/*
+ * Open as tablet discipline; called on discipline change.
+ */
+/*ARGSUSED*/
+tbopen(dev, tp)
+	dev_t dev;
+	register struct tty *tp;
+{
+	register struct tb *tbp;
+
+	if (tp->t_line == TABLDISC)
+		return (ENODEV);
+	ttywflush(tp);
+	for (tbp = tb; tbp < &tb[NTBS]; tbp++)
+		if (tbp->tbflags == 0)
+			break;
+	if (tbp >= &tb[NTBS])
+		return (EBUSY);
+	tbp->tbflags = TBTIGER|TBPOINT;		/* default */
+	tp->t_cp = tbp->cbuf;
+	tp->t_inbuf = 0;
+	bzero((caddr_t)&tbp->rets, sizeof (tbp->rets));
+	tp->T_LINEP = (caddr_t)tbp;
+	tp->t_flags |= LITOUT;
+	return (0);
+}
+
+/*
+ * Line discipline change or last device close.
+ */
+tbclose(tp)
+	register struct tty *tp;
+{
+	register int s;
+	int modebits = TBPOINT|TBSTOP;
+
+	tbioctl(tp, BIOSMODE, &modebits, 0);
+	s = spltty();
+	((struct tb *)tp->T_LINEP)->tbflags = 0;
+	tp->t_cp = 0;
+	tp->t_inbuf = 0;
+	tp->t_rawq.c_cc = 0;		/* clear queues -- paranoid */
+	tp->t_canq.c_cc = 0;
+	tp->t_line = 0;			/* paranoid: avoid races */
+	splx(s);
+}
+
+/*
+ * Read from a tablet line.
+ * Characters have been buffered in a buffer and decoded.
+ */
+tbread(tp, uio)
+	register struct tty *tp;
+	struct uio *uio;
+{
+	register struct tb *tbp = (struct tb *)tp->T_LINEP;
+	register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+	int ret;
+
+	if ((tp->t_state&TS_CARR_ON) == 0)
+		return (EIO);
+	ret = uiomove(&tbp->rets, tc->tbc_uiosize, uio);
+	if (tc->tbc_flags&TBF_POL)
+		tbp->rets.polpos.p_key = ' ';
+	return (ret);
+}
+
+/*
+ * Low level character input routine.
+ * Stuff the character in the buffer, and decode
+ * if all the chars are there.
+ *
+ * This routine could be expanded in-line in the receiver
+ * interrupt routine to make it run as fast as possible.
+ */
+tbinput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register struct tb *tbp = (struct tb *)tp->T_LINEP;
+	register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+
+	if (tc->tbc_recsize == 0 || tc->tbc_decode == 0)	/* paranoid? */
+		return;
+	/*
+	 * Locate sync bit/byte or reset input buffer.
+	 */
+	if (c&tc->tbc_sync || tp->t_inbuf == tc->tbc_recsize) {
+		tp->t_cp = tbp->cbuf;
+		tp->t_inbuf = 0;
+	}
+	*tp->t_cp++ = c&0177;
+	/*
+	 * Call decode routine only if a full record has been collected.
+	 */
+	if (++tp->t_inbuf == tc->tbc_recsize)
+		(*tc->tbc_decode)(tc, tbp->cbuf, &tbp->rets);
+}
+
+/*
+ * Decode GTCO 8 byte format (high res, tilt, and pressure).
+ */
+static
+gtcodecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct gtcopos *tbpos;
+{
+
+	tbpos->pressure = *cp >> 2;
+	tbpos->status = (tbpos->pressure > 16) | TBINPROX; /* half way down */
+	tbpos->xpos = (*cp++ & 03) << 14;
+	tbpos->xpos |= *cp++ << 7;
+	tbpos->xpos |= *cp++;
+	tbpos->ypos = (*cp++ & 03) << 14;
+	tbpos->ypos |= *cp++ << 7;
+	tbpos->ypos |= *cp++;
+	tbpos->xtilt = *cp++;
+	tbpos->ytilt = *cp++;
+	tbpos->scount++;
+}
+
+/*
+ * Decode old Hitachi 5 byte format (low res).
+ */
+static
+tbdecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct tbpos *tbpos;
+{
+	register char byte;
+
+	byte = *cp++;
+	tbpos->status = (byte&0100) ? TBINPROX : 0;
+	byte &= ~0100;
+	if (byte > 036)
+		tbpos->status |= 1 << ((byte-040)/2);
+	tbpos->xpos = *cp++ << 7;
+	tbpos->xpos |= *cp++;
+	if (tbpos->xpos < 256)			/* tablet wraps around at 256 */
+		tbpos->status &= ~TBINPROX;	/* make it out of proximity */
+	tbpos->ypos = *cp++ << 7;
+	tbpos->ypos |= *cp++;
+	tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 5-byte format (low res).
+ */
+static
+tblresdecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct tbpos *tbpos;
+{
+
+	*cp &= ~0100;		/* mask sync bit */
+	tbpos->status = (*cp++ >> 2) | TBINPROX;
+	if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+		tbpos->status &= ~(020|TBINPROX);
+	tbpos->xpos = *cp++;
+	tbpos->xpos |= *cp++ << 6;
+	tbpos->ypos = *cp++;
+	tbpos->ypos |= *cp++ << 6;
+	tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 6-byte format (high res).
+ */
+static
+tbhresdecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct tbpos *tbpos;
+{
+	char byte;
+
+	byte = *cp++;
+	tbpos->xpos = (byte & 03) << 14;
+	tbpos->xpos |= *cp++ << 7;
+	tbpos->xpos |= *cp++;
+	tbpos->ypos = *cp++ << 14;
+	tbpos->ypos |= *cp++ << 7;
+	tbpos->ypos |= *cp++;
+	tbpos->status = (byte >> 2) | TBINPROX;
+	if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+		tbpos->status &= ~(020|TBINPROX);
+	tbpos->scount++;
+}
+
+/*
+ * Polhemus decode.
+ */
+static
+poldecode(tc, cp, polpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct polpos *polpos;
+{
+
+	polpos->p_x = cp[4] | cp[3]<<7 | (cp[9] & 0x03) << 14;
+	polpos->p_y = cp[6] | cp[5]<<7 | (cp[9] & 0x0c) << 12;
+	polpos->p_z = cp[8] | cp[7]<<7 | (cp[9] & 0x30) << 10;
+	polpos->p_azi = cp[11] | cp[10]<<7 | (cp[16] & 0x03) << 14;
+	polpos->p_pit = cp[13] | cp[12]<<7 | (cp[16] & 0x0c) << 12;
+	polpos->p_rol = cp[15] | cp[14]<<7 | (cp[16] & 0x30) << 10;
+	polpos->p_stat = cp[1] | cp[0]<<7;
+	if (cp[2] != ' ')
+		polpos->p_key = cp[2];
+}
+
+/*ARGSUSED*/
+tbioctl(tp, cmd, data, flag)
+	struct tty *tp;
+	u_long cmd;
+	caddr_t data;
+	int flag;
+{
+	register struct tb *tbp = (struct tb *)tp->T_LINEP;
+
+	switch (cmd) {
+
+	case BIOGMODE:
+		*(int *)data = tbp->tbflags & TBMODE;
+		break;
+
+	case BIOSTYPE:
+		if (tbconf[*(int *)data & TBTYPE].tbc_recsize == 0 ||
+		    tbconf[*(int *)data & TBTYPE].tbc_decode == 0)
+			return (EINVAL);
+		tbp->tbflags &= ~TBTYPE;
+		tbp->tbflags |= *(int *)data & TBTYPE;
+		/* fall thru... to set mode bits */
+
+	case BIOSMODE: {
+		register struct tbconf *tc;
+
+		tbp->tbflags &= ~TBMODE;
+		tbp->tbflags |= *(int *)data & TBMODE;
+		tc = &tbconf[tbp->tbflags & TBTYPE];
+		if (tbp->tbflags&TBSTOP) {
+			if (tc->tbc_stop)
+				ttyout(tc->tbc_stop, tp);
+		} else if (tc->tbc_start)
+			ttyout(tc->tbc_start, tp);
+		if (tbp->tbflags&TBPOINT) {
+			if (tc->tbc_point)
+				ttyout(tc->tbc_point, tp);
+		} else if (tc->tbc_run)
+			ttyout(tc->tbc_run, tp);
+		ttstart(tp);
+		break;
+	}
+
+	case BIOGTYPE:
+		*(int *)data = tbp->tbflags & TBTYPE;
+		break;
+
+	case TIOCSETD:
+	case TIOCGETD:
+	case TIOCGETP:
+	case TIOCGETC:
+		return (-1);		/* pass thru... */
+
+	default:
+		return (ENOTTY);
+	}
+	return (0);
+}
+#endif
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
new file mode 100644
index 000000000000..d9dd1b46fa59
--- /dev/null
+++ b/sys/kern/tty_tty.c
@@ -0,0 +1,149 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_tty.c	8.4 (Berkeley) 5/14/95
+ */
+
+/*
+ * Indirect driver for controlling tty.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+/*ARGSUSED*/
+cttyopen(dev, flag, mode, p)
+	dev_t dev;
+	int flag, mode;
+	struct proc *p;
+{
+	struct vnode *ttyvp = cttyvp(p);
+	int error;
+
+	if (ttyvp == NULL)
+		return (ENXIO);
+	vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
+#ifdef PARANOID
+	/*
+	 * Since group is tty and mode is 620 on most terminal lines
+	 * and since sessions protect terminals from processes outside
+	 * your session, this check is probably no longer necessary.
+	 * Since it inhibits setuid root programs that later switch 
+	 * to another user from accessing /dev/tty, we have decided
+	 * to delete this test. (mckusick 5/93)
+	 */
+	error = VOP_ACCESS(ttyvp,
+	  (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p);
+	if (!error)
+#endif /* PARANOID */
+		error = VOP_OPEN(ttyvp, flag, NOCRED, p);
+	VOP_UNLOCK(ttyvp, 0, p);
+	return (error);
+}
+
+/*ARGSUSED*/
+cttyread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	struct proc *p = uio->uio_procp;
+	register struct vnode *ttyvp = cttyvp(p);
+	int error;
+
+	if (ttyvp == NULL)
+		return (EIO);
+	vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_READ(ttyvp, uio, flag, NOCRED);
+	VOP_UNLOCK(ttyvp, 0, p);
+	return (error);
+}
+
+/*ARGSUSED*/
+cttywrite(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	struct proc *p = uio->uio_procp;
+	struct vnode *ttyvp = cttyvp(uio->uio_procp);
+	int error;
+
+	if (ttyvp == NULL)
+		return (EIO);
+	vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
+	VOP_UNLOCK(ttyvp, 0, p);
+	return (error);
+}
+
+/*ARGSUSED*/
+cttyioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	u_long cmd;
+	caddr_t addr;
+	int flag;
+	struct proc *p;
+{
+	struct vnode *ttyvp = cttyvp(p);
+
+	if (ttyvp == NULL)
+		return (EIO);
+	if (cmd == TIOCNOTTY) {
+		if (!SESS_LEADER(p)) {
+			p->p_flag &= ~P_CONTROLT;
+			return (0);
+		} else
+			return (EINVAL);
+	}
+	return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p));
+}
+
+/*ARGSUSED*/
+cttyselect(dev, flag, p)
+	dev_t dev;
+	int flag;
+	struct proc *p;
+{
+	struct vnode *ttyvp = cttyvp(p);
+
+	if (ttyvp == NULL)
+		return (1);	/* try operation to get EOF/failure */
+	return (VOP_SELECT(ttyvp, flag, FREAD|FWRITE, NOCRED, p));
+}
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
new file mode 100644
index 000000000000..1c91f2af4a8c
--- /dev/null
+++ b/sys/kern/uipc_domain.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_domain.c	8.3 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+void	pffasttimo __P((void *));
+void	pfslowtimo __P((void *));
+
+#define	ADDDOMAIN(x)	{ \
+	extern struct domain __CONCAT(x,domain); \
+	__CONCAT(x,domain.dom_next) = domains; \
+	domains = &__CONCAT(x,domain); \
+}
+
+void
+domaininit()
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+#undef unix
+#ifndef lint
+	ADDDOMAIN(unix);
+	ADDDOMAIN(route);
+#ifdef INET
+	ADDDOMAIN(inet);
+#endif
+#ifdef NS
+	ADDDOMAIN(ns);
+#endif
+#ifdef ISO
+	ADDDOMAIN(iso);
+#endif
+#ifdef CCITT
+	ADDDOMAIN(ccitt);
+#endif
+#include "imp.h"
+#if NIMP > 0
+	ADDDOMAIN(imp);
+#endif
+#endif
+
+	for (dp = domains; dp; dp = dp->dom_next) {
+		if (dp->dom_init)
+			(*dp->dom_init)();
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_init)
+				(*pr->pr_init)();
+	}
+
+if (max_linkhdr < 16)		/* XXX */
+max_linkhdr = 16;
+	max_hdr = max_linkhdr + max_protohdr;
+	max_datalen = MHLEN - max_hdr;
+	timeout(pffasttimo, NULL, 1);
+	timeout(pfslowtimo, NULL, 1);
+}
+
+struct protosw *
+pffindtype(family, type)
+	int family, type;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		if (dp->dom_family == family)
+			goto found;
+	return (0);
+found:
+	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+		if (pr->pr_type && pr->pr_type == type)
+			return (pr);
+	return (0);
+}
+
+struct protosw *
+pffindproto(family, protocol, type)
+	int family, protocol, type;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+	struct protosw *maybe = 0;
+
+	if (family == 0)
+		return (0);
+	for (dp = domains; dp; dp = dp->dom_next)
+		if (dp->dom_family == family)
+			goto found;
+	return (0);
+found:
+	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
+		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
+			return (pr);
+
+		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
+		    pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+			maybe = pr;
+	}
+	return (maybe);
+}
+
+int
+net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+	int family, protocol;
+
+	/*
+	 * All sysctl names at this level are nonterminal;
+	 * next two components are protocol family and protocol number,
+	 * then at least one addition component.
+	 */
+	if (namelen < 3)
+		return (EISDIR);		/* overloaded */
+	family = name[0];
+	protocol = name[1];
+
+	if (family == 0)
+		return (0);
+	for (dp = domains; dp; dp = dp->dom_next)
+		if (dp->dom_family == family)
+			goto found;
+	return (ENOPROTOOPT);
+found:
+	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+		if (pr->pr_protocol == protocol && pr->pr_sysctl)
+			return ((*pr->pr_sysctl)(name + 2, namelen - 2,
+			    oldp, oldlenp, newp, newlen));
+	return (ENOPROTOOPT);
+}
+
+void
+pfctlinput(cmd, sa)
+	int cmd;
+	struct sockaddr *sa;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_ctlinput)
+				(*pr->pr_ctlinput)(cmd, sa, (caddr_t)0);
+}
+
+void
+pfslowtimo(arg)
+	void *arg;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_slowtimo)
+				(*pr->pr_slowtimo)();
+	timeout(pfslowtimo, NULL, hz/2);
+}
+
+void
+pffasttimo(arg)
+	void *arg;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_fasttimo)
+				(*pr->pr_fasttimo)();
+	timeout(pffasttimo, NULL, hz/5);
+}
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
new file mode 100644
index 000000000000..62abfd5e171a
--- /dev/null
+++ b/sys/kern/uipc_mbuf.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_mbuf.c	8.4 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/map.h>
+#define MBTYPES
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+
+#include <vm/vm.h>
+
+extern	vm_map_t mb_map;
+struct	mbuf *mbutl;
+char	*mclrefcnt;
+
+void
+mbinit()
+{
+	int s;
+
+	s = splimp();
+	if (m_clalloc(max(4096/CLBYTES, 1), M_DONTWAIT) == 0)
+		goto bad;
+	splx(s);
+	return;
+bad:
+	panic("mbinit");
+}
+
+/*
+ * Allocate some number of mbuf clusters
+ * and place on cluster free list.
+ * Must be called at splimp.
+ */
+/* ARGSUSED */
+int
+m_clalloc(ncl, nowait)
+	register int ncl;
+	int nowait;
+{
+	static int logged;
+	register caddr_t p;
+	register int i;
+	int npg;
+
+	npg = ncl * CLSIZE;
+	p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !nowait);
+	if (p == NULL) {
+		if (logged == 0) {
+			logged++;
+			log(LOG_ERR, "mb_map full\n");
+		}
+		return (0);
+	}
+	ncl = ncl * CLBYTES / MCLBYTES;
+	for (i = 0; i < ncl; i++) {
+		((union mcluster *)p)->mcl_next = mclfree;
+		mclfree = (union mcluster *)p;
+		p += MCLBYTES;
+		mbstat.m_clfree++;
+	}
+	mbstat.m_clusters += ncl;
+	return (1);
+}
+
+/*
+ * When MGET failes, ask protocols to free space when short of memory,
+ * then re-attempt to allocate an mbuf.
+ */
+struct mbuf *
+m_retry(i, t)
+	int i, t;
+{
+	register struct mbuf *m;
+
+	m_reclaim();
+#define m_retry(i, t)	(struct mbuf *)0
+	MGET(m, i, t);
+#undef m_retry
+	return (m);
+}
+
+/*
+ * As above; retry an MGETHDR.
+ */
+struct mbuf *
+m_retryhdr(i, t)
+	int i, t;
+{
+	register struct mbuf *m;
+
+	m_reclaim();
+#define m_retryhdr(i, t) (struct mbuf *)0
+	MGETHDR(m, i, t);
+#undef m_retryhdr
+	return (m);
+}
+
+void
+m_reclaim()
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+	int s = splimp();
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_drain)
+				(*pr->pr_drain)();
+	splx(s);
+	mbstat.m_drain++;
+}
+
+/*
+ * Space allocation routines.
+ * These are also available as macros
+ * for critical paths.
+ */
+struct mbuf *
+m_get(nowait, type)
+	int nowait, type;
+{
+	register struct mbuf *m;
+
+	MGET(m, nowait, type);
+	return (m);
+}
+
+struct mbuf *
+m_gethdr(nowait, type)
+	int nowait, type;
+{
+	register struct mbuf *m;
+
+	MGETHDR(m, nowait, type);
+	return (m);
+}
+
+struct mbuf *
+m_getclr(nowait, type)
+	int nowait, type;
+{
+	register struct mbuf *m;
+
+	MGET(m, nowait, type);
+	if (m == 0)
+		return (0);
+	bzero(mtod(m, caddr_t), MLEN);
+	return (m);
+}
+
+struct mbuf *
+m_free(m)
+	struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	MFREE(m, n);
+	return (n);
+}
+
+void
+m_freem(m)
+	register struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	if (m == NULL)
+		return;
+	do {
+		MFREE(m, n);
+	} while (m = n);
+}
+
+/*
+ * Mbuffer utility routines.
+ */
+
+/*
+ * Lesser-used path for M_PREPEND:
+ * allocate new mbuf to prepend to chain,
+ * copy junk along.
+ */
+struct mbuf *
+m_prepend(m, len, how)
+	register struct mbuf *m;
+	int len, how;
+{
+	struct mbuf *mn;
+
+	MGET(mn, how, m->m_type);
+	if (mn == (struct mbuf *)NULL) {
+		m_freem(m);
+		return ((struct mbuf *)NULL);
+	}
+	if (m->m_flags & M_PKTHDR) {
+		M_COPY_PKTHDR(mn, m);
+		m->m_flags &= ~M_PKTHDR;
+	}
+	mn->m_next = m;
+	m = mn;
+	if (len < MHLEN)
+		MH_ALIGN(m, len);
+	m->m_len = len;
+	return (m);
+}
+
+/*
+ * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
+ * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
+ * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ */
+int MCFail;
+
+struct mbuf *
+m_copym(m, off0, len, wait)
+	register struct mbuf *m;
+	int off0, wait;
+	register int len;
+{
+	register struct mbuf *n, **np;
+	register int off = off0;
+	struct mbuf *top;
+	int copyhdr = 0;
+
+	if (off < 0 || len < 0)
+		panic("m_copym");
+	if (off == 0 && m->m_flags & M_PKTHDR)
+		copyhdr = 1;
+	while (off > 0) {
+		if (m == 0)
+			panic("m_copym");
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	np = &top;
+	top = 0;
+	while (len > 0) {
+		if (m == 0) {
+			if (len != M_COPYALL)
+				panic("m_copym");
+			break;
+		}
+		MGET(n, wait, m->m_type);
+		*np = n;
+		if (n == 0)
+			goto nospace;
+		if (copyhdr) {
+			M_COPY_PKTHDR(n, m);
+			if (len == M_COPYALL)
+				n->m_pkthdr.len -= off0;
+			else
+				n->m_pkthdr.len = len;
+			copyhdr = 0;
+		}
+		n->m_len = min(len, m->m_len - off);
+		if (m->m_flags & M_EXT) {
+			n->m_data = m->m_data + off;
+			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+			n->m_ext = m->m_ext;
+			n->m_flags |= M_EXT;
+		} else
+			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+			    (unsigned)n->m_len);
+		if (len != M_COPYALL)
+			len -= n->m_len;
+		off = 0;
+		m = m->m_next;
+		np = &n->m_next;
+	}
+	if (top == 0)
+		MCFail++;
+	return (top);
+nospace:
+	m_freem(top);
+	MCFail++;
+	return (0);
+}
+
+/*
+ * Copy data from an mbuf chain starting "off" bytes from the beginning,
+ * continuing for "len" bytes, into the indicated buffer.
+ */
+void
+m_copydata(m, off, len, cp)
+	register struct mbuf *m;
+	register int off;
+	register int len;
+	caddr_t cp;
+{
+	register unsigned count;
+
+	if (off < 0 || len < 0)
+		panic("m_copydata");
+	while (off > 0) {
+		if (m == 0)
+			panic("m_copydata");
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	while (len > 0) {
+		if (m == 0)
+			panic("m_copydata");
+		count = min(m->m_len - off, len);
+		bcopy(mtod(m, caddr_t) + off, cp, count);
+		len -= count;
+		cp += count;
+		off = 0;
+		m = m->m_next;
+	}
+}
+
+/*
+ * Concatenate mbuf chain n to m.
+ * Both chains must be of the same type (e.g. MT_DATA).
+ * Any m_pkthdr is not updated.
+ */
+void
+m_cat(m, n)
+	register struct mbuf *m, *n;
+{
+	while (m->m_next)
+		m = m->m_next;
+	while (n) {
+		if (m->m_flags & M_EXT ||
+		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+			/* just join the two chains */
+			m->m_next = n;
+			return;
+		}
+		/* splat the data from one into the other */
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+		    (u_int)n->m_len);
+		m->m_len += n->m_len;
+		n = m_free(n);
+	}
+}
+
+void
+m_adj(mp, req_len)
+	struct mbuf *mp;
+	int req_len;
+{
+	register int len = req_len;
+	register struct mbuf *m;
+	register count;
+
+	if ((m = mp) == NULL)
+		return;
+	if (len >= 0) {
+		/*
+		 * Trim from head.
+		 */
+		while (m != NULL && len > 0) {
+			if (m->m_len <= len) {
+				len -= m->m_len;
+				m->m_len = 0;
+				m = m->m_next;
+			} else {
+				m->m_len -= len;
+				m->m_data += len;
+				len = 0;
+			}
+		}
+		m = mp;
+		if (mp->m_flags & M_PKTHDR)
+			m->m_pkthdr.len -= (req_len - len);
+	} else {
+		/*
+		 * Trim from tail.  Scan the mbuf chain,
+		 * calculating its length and finding the last mbuf.
+		 * If the adjustment only affects this mbuf, then just
+		 * adjust and return.  Otherwise, rescan and truncate
+		 * after the remaining size.
+		 */
+		len = -len;
+		count = 0;
+		for (;;) {
+			count += m->m_len;
+			if (m->m_next == (struct mbuf *)0)
+				break;
+			m = m->m_next;
+		}
+		if (m->m_len >= len) {
+			m->m_len -= len;
+			if (mp->m_flags & M_PKTHDR)
+				mp->m_pkthdr.len -= len;
+			return;
+		}
+		count -= len;
+		if (count < 0)
+			count = 0;
+		/*
+		 * Correct length for chain is "count".
+		 * Find the mbuf with last data, adjust its length,
+		 * and toss data from remaining mbufs on chain.
+		 */
+		m = mp;
+		if (m->m_flags & M_PKTHDR)
+			m->m_pkthdr.len = count;
+		for (; m; m = m->m_next) {
+			if (m->m_len >= count) {
+				m->m_len = count;
+				break;
+			}
+			count -= m->m_len;
+		}
+		while (m = m->m_next)
+			m->m_len = 0;
+	}
+}
+
+/*
+ * Rearange an mbuf chain so that len bytes are contiguous
+ * and in the data area of an mbuf (so that mtod and dtom
+ * will work for a structure of size len).  Returns the resulting
+ * mbuf chain on success, frees it and returns null on failure.
+ * If there is room, it will add up to max_protohdr-len extra bytes to the
+ * contiguous region in an attempt to avoid being called next time.
+ */
+int MPFail;
+
+struct mbuf *
+m_pullup(n, len)
+	register struct mbuf *n;
+	int len;
+{
+	register struct mbuf *m;
+	register int count;
+	int space;
+
+	/*
+	 * If first mbuf has no cluster, and has room for len bytes
+	 * without shifting current data, pullup into it,
+	 * otherwise allocate a new mbuf to prepend to the chain.
+	 */
+	if ((n->m_flags & M_EXT) == 0 &&
+	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
+		if (n->m_len >= len)
+			return (n);
+		m = n;
+		n = n->m_next;
+		len -= m->m_len;
+	} else {
+		if (len > MHLEN)
+			goto bad;
+		MGET(m, M_DONTWAIT, n->m_type);
+		if (m == 0)
+			goto bad;
+		m->m_len = 0;
+		if (n->m_flags & M_PKTHDR) {
+			M_COPY_PKTHDR(m, n);
+			n->m_flags &= ~M_PKTHDR;
+		}
+	}
+	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+	do {
+		count = min(min(max(len, max_protohdr), space), n->m_len);
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+		  (unsigned)count);
+		len -= count;
+		m->m_len += count;
+		n->m_len -= count;
+		space -= count;
+		if (n->m_len)
+			n->m_data += count;
+		else
+			n = m_free(n);
+	} while (len > 0 && n);
+	if (len > 0) {
+		(void) m_free(m);
+		goto bad;
+	}
+	m->m_next = n;
+	return (m);
+bad:
+	m_freem(n);
+	MPFail++;
+	return (0);
+}
+
+/*
+ * Partition an mbuf chain in two pieces, returning the tail --
+ * all but the first len0 bytes.  In case of failure, it returns NULL and
+ * attempts to restore the chain to its original state.
+ */
+struct mbuf *
+m_split(m0, len0, wait)
+	register struct mbuf *m0;
+	int len0, wait;
+{
+	register struct mbuf *m, *n;
+	unsigned len = len0, remain;
+
+	for (m = m0; m && len > m->m_len; m = m->m_next)
+		len -= m->m_len;
+	if (m == 0)
+		return (0);
+	remain = m->m_len - len;
+	if (m0->m_flags & M_PKTHDR) {
+		MGETHDR(n, wait, m0->m_type);
+		if (n == 0)
+			return (0);
+		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
+		m0->m_pkthdr.len = len0;
+		if (m->m_flags & M_EXT)
+			goto extpacket;
+		if (remain > MHLEN) {
+			/* m can't be the lead packet */
+			MH_ALIGN(n, 0);
+			n->m_next = m_split(m, len, wait);
+			if (n->m_next == 0) {
+				(void) m_free(n);
+				return (0);
+			} else
+				return (n);
+		} else
+			MH_ALIGN(n, remain);
+	} else if (remain == 0) {
+		n = m->m_next;
+		m->m_next = 0;
+		return (n);
+	} else {
+		MGET(n, wait, m->m_type);
+		if (n == 0)
+			return (0);
+		M_ALIGN(n, remain);
+	}
+extpacket:
+	if (m->m_flags & M_EXT) {
+		n->m_flags |= M_EXT;
+		n->m_ext = m->m_ext;
+		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
+		n->m_data = m->m_data + len;
+	} else {
+		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
+	}
+	n->m_len = remain;
+	m->m_len = len;
+	n->m_next = m->m_next;
+	m->m_next = 0;
+	return (n);
+}
+/*
+ * Routine to copy from device local memory into mbufs.
+ */
+struct mbuf *
+m_devget(buf, totlen, off0, ifp, copy)
+	char *buf;
+	int totlen, off0;
+	struct ifnet *ifp;
+	void (*copy)();
+{
+	register struct mbuf *m;
+	struct mbuf *top = 0, **mp = &top;
+	register int off = off0, len;
+	register char *cp;
+	char *epkt;
+
+	cp = buf;
+	epkt = cp + totlen;
+	if (off) {
+		/*
+		 * If 'off' is non-zero, packet is trailer-encapsulated,
+		 * so we have to skip the type and length fields.
+		 */
+		cp += off + 2 * sizeof(u_int16_t);
+		totlen -= 2 * sizeof(u_int16_t);
+	}
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == 0)
+		return (0);
+	m->m_pkthdr.rcvif = ifp;
+	m->m_pkthdr.len = totlen;
+	m->m_len = MHLEN;
+
+	while (totlen > 0) {
+		if (top) {
+			MGET(m, M_DONTWAIT, MT_DATA);
+			if (m == 0) {
+				m_freem(top);
+				return (0);
+			}
+			m->m_len = MLEN;
+		}
+		len = min(totlen, epkt - cp);
+		if (len >= MINCLSIZE) {
+			MCLGET(m, M_DONTWAIT);
+			if (m->m_flags & M_EXT)
+				m->m_len = len = min(len, MCLBYTES);
+			else
+				len = m->m_len;
+		} else {
+			/*
+			 * Place initial small packet/header at end of mbuf.
+			 */
+			if (len < m->m_len) {
+				if (top == 0 && len + max_linkhdr <= m->m_len)
+					m->m_data += max_linkhdr;
+				m->m_len = len;
+			} else
+				len = m->m_len;
+		}
+		if (copy)
+			copy(cp, mtod(m, caddr_t), (unsigned)len);
+		else
+			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
+		cp += len;
+		*mp = m;
+		mp = &m->m_next;
+		totlen -= len;
+		if (cp == epkt)
+			cp = buf;
+	}
+	return (top);
+}
diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c
new file mode 100644
index 000000000000..e89a84c2f51e
--- /dev/null
+++ b/sys/kern/uipc_proto.c
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_proto.c	8.2 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+/*
+ * Definitions of protocols supported in the UNIX domain.
+ */
+
+int	uipc_usrreq(), raw_usrreq();
+void	raw_init(), raw_input(), raw_ctlinput();
+extern	struct domain unixdomain;		/* or at least forward */
+
+struct protosw unixsw[] = {
+{ SOCK_STREAM,	&unixdomain,	0,	PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
+  0,		0,		0,		0,
+  uipc_usrreq,
+  0,		0,		0,		0,
+},
+{ SOCK_DGRAM,	&unixdomain,	0,		PR_ATOMIC|PR_ADDR|PR_RIGHTS,
+  0,		0,		0,		0,
+  uipc_usrreq,
+  0,		0,		0,		0,
+},
+{ 0,		0,		0,		0,
+  raw_input,	0,		raw_ctlinput,	0,
+  raw_usrreq,
+  raw_init,	0,		0,		0,
+}
+};
+
+int	unp_externalize(), unp_dispose();
+
+struct domain unixdomain =
+    { AF_UNIX, "unix", 0, unp_externalize, unp_dispose,
+      unixsw, &unixsw[sizeof(unixsw)/sizeof(unixsw[0])] };
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
new file mode 100644
index 000000000000..a9c5453e96ed
--- /dev/null
+++ b/sys/kern/uipc_socket.c
@@ -0,0 +1,1040 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_socket.c	8.6 (Berkeley) 5/2/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Socket operation routines.
+ * These routines are called by the routines in
+ * sys_socket.c or from a system process, and
+ * implement the semantics of socket operations by
+ * switching out to the protocol specific routines.
+ */
+/*ARGSUSED*/
+int
+socreate(dom, aso, type, proto)
+	int dom;
+	struct socket **aso;
+	register int type;
+	int proto;
+{
+	struct proc *p = curproc;		/* XXX */
+	register struct protosw *prp;
+	register struct socket *so;
+	register int error;
+
+	if (proto)
+		prp = pffindproto(dom, proto, type);
+	else
+		prp = pffindtype(dom, type);
+	if (prp == 0 || prp->pr_usrreq == 0)
+		return (EPROTONOSUPPORT);
+	if (prp->pr_type != type)
+		return (EPROTOTYPE);
+	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
+	bzero((caddr_t)so, sizeof(*so));
+	so->so_type = type;
+	if (p->p_ucred->cr_uid == 0)
+		so->so_state = SS_PRIV;
+	so->so_proto = prp;
+	error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
+	    (struct mbuf *)(long)proto, (struct mbuf *)0);
+	if (error) {
+		so->so_state |= SS_NOFDREF;
+		sofree(so);
+		return (error);
+	}
+	*aso = so;
+	return (0);
+}
+
+int
+sobind(so, nam)
+	struct socket *so;
+	struct mbuf *nam;
+{
+	int s = splnet();
+	int error;
+
+	error =
+	    (*so->so_proto->pr_usrreq)(so, PRU_BIND,
+		(struct mbuf *)0, nam, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+int
+solisten(so, backlog)
+	register struct socket *so;
+	int backlog;
+{
+	int s = splnet(), error;
+
+	error =
+	    (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
+		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+	if (error) {
+		splx(s);
+		return (error);
+	}
+	if (so->so_q == 0)
+		so->so_options |= SO_ACCEPTCONN;
+	if (backlog < 0)
+		backlog = 0;
+	so->so_qlimit = min(backlog, SOMAXCONN);
+	splx(s);
+	return (0);
+}
+
+int
+sofree(so)
+	register struct socket *so;
+{
+
+	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+		return;
+	if (so->so_head) {
+		if (!soqremque(so, 0) && !soqremque(so, 1))
+			panic("sofree dq");
+		so->so_head = 0;
+	}
+	sbrelease(&so->so_snd);
+	sorflush(so);
+	FREE(so, M_SOCKET);
+}
+
+/*
+ * Close a socket on last file table reference removal.
+ * Initiate disconnect if connected.
+ * Free socket when disconnect complete.
+ */
+int
+soclose(so)
+	register struct socket *so;
+{
+	int s = splnet();		/* conservative */
+	int error = 0;
+
+	if (so->so_options & SO_ACCEPTCONN) {
+		while (so->so_q0)
+			(void) soabort(so->so_q0);
+		while (so->so_q)
+			(void) soabort(so->so_q);
+	}
+	if (so->so_pcb == 0)
+		goto discard;
+	if (so->so_state & SS_ISCONNECTED) {
+		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
+			error = sodisconnect(so);
+			if (error)
+				goto drop;
+		}
+		if (so->so_options & SO_LINGER) {
+			if ((so->so_state & SS_ISDISCONNECTING) &&
+			    (so->so_state & SS_NBIO))
+				goto drop;
+			while (so->so_state & SS_ISCONNECTED)
+				if (error = tsleep((caddr_t)&so->so_timeo,
+				    PSOCK | PCATCH, netcls, so->so_linger * hz))
+					break;
+		}
+	}
+drop:
+	if (so->so_pcb) {
+		int error2 =
+		    (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+			(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+		if (error == 0)
+			error = error2;
+	}
+discard:
+	if (so->so_state & SS_NOFDREF)
+		panic("soclose: NOFDREF");
+	so->so_state |= SS_NOFDREF;
+	sofree(so);
+	splx(s);
+	return (error);
+}
+
+/*
+ * Must be called at splnet...
+ */
+int
+soabort(so)
+	struct socket *so;
+{
+
+	return (
+	    (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
+		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+}
+
+int
+soaccept(so, nam)
+	register struct socket *so;
+	struct mbuf *nam;
+{
+	int s = splnet();
+	int error;
+
+	if ((so->so_state & SS_NOFDREF) == 0)
+		panic("soaccept: !NOFDREF");
+	so->so_state &= ~SS_NOFDREF;
+	error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
+	    (struct mbuf *)0, nam, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+int
+soconnect(so, nam)
+	register struct socket *so;
+	struct mbuf *nam;
+{
+	int s;
+	int error;
+
+	if (so->so_options & SO_ACCEPTCONN)
+		return (EOPNOTSUPP);
+	s = splnet();
+	/*
+	 * If protocol is connection-based, can only connect once.
+	 * Otherwise, if connected, try to disconnect first.
+	 * This allows user to disconnect by connecting to, e.g.,
+	 * a null address.
+	 */
+	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
+	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+	    (error = sodisconnect(so))))
+		error = EISCONN;
+	else
+		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
+		    (struct mbuf *)0, nam, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+int
+soconnect2(so1, so2)
+	register struct socket *so1;
+	struct socket *so2;
+{
+	int s = splnet();
+	int error;
+
+	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
+	    (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+int
+sodisconnect(so)
+	register struct socket *so;
+{
+	int s = splnet();
+	int error;
+
+	if ((so->so_state & SS_ISCONNECTED) == 0) {
+		error = ENOTCONN;
+		goto bad;
+	}
+	if (so->so_state & SS_ISDISCONNECTING) {
+		error = EALREADY;
+		goto bad;
+	}
+	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
+	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+bad:
+	splx(s);
+	return (error);
+}
+
+#define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+/*
+ * Send on a socket.
+ * If send must go all at once and message is larger than
+ * send buffering, then hard error.
+ * Lock against other senders.
+ * If must go all at once and not enough room now, then
+ * inform user that this would block and do nothing.
+ * Otherwise, if nonblocking, send as much as possible.
+ * The data to be sent is described by "uio" if nonzero,
+ * otherwise by the mbuf chain "top" (which must be null
+ * if uio is not).  Data provided in mbuf chain must be small
+ * enough to send all at once.
+ *
+ * Returns nonzero on error, timeout or signal; callers
+ * must check for short counts if EINTR/ERESTART are returned.
+ * Data and control buffers are freed on return.
+ */
+int
+sosend(so, addr, uio, top, control, flags)
+	register struct socket *so;
+	struct mbuf *addr;
+	struct uio *uio;
+	struct mbuf *top;
+	struct mbuf *control;
+	int flags;
+{
+	struct proc *p = curproc;		/* XXX */
+	struct mbuf **mp;
+	register struct mbuf *m;
+	register long space, len, resid;
+	int clen = 0, error, s, dontroute, mlen;
+	int atomic = sosendallatonce(so) || top;
+
+	if (uio)
+		resid = uio->uio_resid;
+	else
+		resid = top->m_pkthdr.len;
+	/*
+	 * In theory resid should be unsigned.
+	 * However, space must be signed, as it might be less than 0
+	 * if we over-committed, and we must use a signed comparison
+	 * of space and resid.  On the other hand, a negative resid
+	 * causes us to loop sending 0-length segments to the protocol.
+	 */
+	if (resid < 0)
+		return (EINVAL);
+	dontroute =
+	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+	    (so->so_proto->pr_flags & PR_ATOMIC);
+	p->p_stats->p_ru.ru_msgsnd++;
+	if (control)
+		clen = control->m_len;
+#define	snderr(errno)	{ error = errno; splx(s); goto release; }
+
+restart:
+	if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
+		goto out;
+	do {
+		s = splnet();
+		if (so->so_state & SS_CANTSENDMORE)
+			snderr(EPIPE);
+		if (so->so_error)
+			snderr(so->so_error);
+		if ((so->so_state & SS_ISCONNECTED) == 0) {
+			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
+				    !(resid == 0 && clen != 0))
+					snderr(ENOTCONN);
+			} else if (addr == 0)
+				snderr(EDESTADDRREQ);
+		}
+		space = sbspace(&so->so_snd);
+		if (flags & MSG_OOB)
+			space += 1024;
+		if (atomic && resid > so->so_snd.sb_hiwat ||
+		    clen > so->so_snd.sb_hiwat)
+			snderr(EMSGSIZE);
+		if (space < resid + clen && uio &&
+		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
+			if (so->so_state & SS_NBIO)
+				snderr(EWOULDBLOCK);
+			sbunlock(&so->so_snd);
+			error = sbwait(&so->so_snd);
+			splx(s);
+			if (error)
+				goto out;
+			goto restart;
+		}
+		splx(s);
+		mp = &top;
+		space -= clen;
+		do {
+		    if (uio == NULL) {
+			/*
+			 * Data is prepackaged in "top".
+			 */
+			resid = 0;
+			if (flags & MSG_EOR)
+				top->m_flags |= M_EOR;
+		    } else do {
+			if (top == 0) {
+				MGETHDR(m, M_WAIT, MT_DATA);
+				mlen = MHLEN;
+				m->m_pkthdr.len = 0;
+				m->m_pkthdr.rcvif = (struct ifnet *)0;
+			} else {
+				MGET(m, M_WAIT, MT_DATA);
+				mlen = MLEN;
+			}
+			if (resid >= MINCLSIZE && space >= MCLBYTES) {
+				MCLGET(m, M_WAIT);
+				if ((m->m_flags & M_EXT) == 0)
+					goto nopages;
+				mlen = MCLBYTES;
+#ifdef	MAPPED_MBUFS
+				len = min(MCLBYTES, resid);
+#else
+				if (atomic && top == 0) {
+					len = min(MCLBYTES - max_hdr, resid);
+					m->m_data += max_hdr;
+				} else
+					len = min(MCLBYTES, resid);
+#endif
+				space -= MCLBYTES;
+			} else {
+nopages:
+				len = min(min(mlen, resid), space);
+				space -= len;
+				/*
+				 * For datagram protocols, leave room
+				 * for protocol headers in first mbuf.
+				 */
+				if (atomic && top == 0 && len < mlen)
+					MH_ALIGN(m, len);
+			}
+			error = uiomove(mtod(m, caddr_t), (int)len, uio);
+			resid = uio->uio_resid;
+			m->m_len = len;
+			*mp = m;
+			top->m_pkthdr.len += len;
+			if (error)
+				goto release;
+			mp = &m->m_next;
+			if (resid <= 0) {
+				if (flags & MSG_EOR)
+					top->m_flags |= M_EOR;
+				break;
+			}
+		    } while (space > 0 && atomic);
+		    if (dontroute)
+			    so->so_options |= SO_DONTROUTE;
+		    s = splnet();				/* XXX */
+		    error = (*so->so_proto->pr_usrreq)(so,
+			(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
+			top, addr, control);
+		    splx(s);
+		    if (dontroute)
+			    so->so_options &= ~SO_DONTROUTE;
+		    clen = 0;
+		    control = 0;
+		    top = 0;
+		    mp = &top;
+		    if (error)
+			goto release;
+		} while (resid && space > 0);
+	} while (resid);
+
+release:
+	sbunlock(&so->so_snd);
+out:
+	if (top)
+		m_freem(top);
+	if (control)
+		m_freem(control);
+	return (error);
+}
+
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*.  In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf or mbufs containing ancillary data,
+ * and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ *
+ * The caller may receive the data as a single mbuf chain by supplying
+ * an mbuf **mp0 for use in returning the chain.  The uio is then used
+ * only for the count in uio_resid.
+ */
+int
+soreceive(so, paddr, uio, mp0, controlp, flagsp)
+	register struct socket *so;
+	struct mbuf **paddr;
+	struct uio *uio;
+	struct mbuf **mp0;
+	struct mbuf **controlp;
+	int *flagsp;
+{
+	register struct mbuf *m, **mp;
+	register int flags, len, error, s, offset;
+	struct protosw *pr = so->so_proto;
+	struct mbuf *nextrecord;
+	int moff, type;
+	int orig_resid = uio->uio_resid;
+
+	mp = mp0;
+	if (paddr)
+		*paddr = 0;
+	if (controlp)
+		*controlp = 0;
+	if (flagsp)
+		flags = *flagsp &~ MSG_EOR;
+	else
+		flags = 0;
+	if (flags & MSG_OOB) {
+		m = m_get(M_WAIT, MT_DATA);
+		error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
+		    (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0);
+		if (error)
+			goto bad;
+		do {
+			error = uiomove(mtod(m, caddr_t),
+			    (int) min(uio->uio_resid, m->m_len), uio);
+			m = m_free(m);
+		} while (uio->uio_resid && error == 0 && m);
+bad:
+		if (m)
+			m_freem(m);
+		return (error);
+	}
+	if (mp)
+		*mp = (struct mbuf *)0;
+	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
+		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+		    (struct mbuf *)0, (struct mbuf *)0);
+
+restart:
+	if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
+		return (error);
+	s = splnet();
+
+	m = so->so_rcv.sb_mb;
+	/*
+	 * If we have less data than requested, block awaiting more
+	 * (subject to any timeout) if:
+	 *   1. the current count is less than the low water mark, or
+	 *   2. MSG_WAITALL is set, and it is possible to do the entire
+	 *	receive operation at once if we block (resid <= hiwat), or
+	 *   3. MSG_DONTWAIT is not set.
+	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
+	 * we have to do the receive in sections, and thus risk returning
+	 * a short count if a timeout or signal occurs after we start.
+	 */
+	if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
+	    so->so_rcv.sb_cc < uio->uio_resid) &&
+	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
+#ifdef DIAGNOSTIC
+		if (m == 0 && so->so_rcv.sb_cc)
+			panic("receive 1");
+#endif
+		if (so->so_error) {
+			if (m)
+				goto dontblock;
+			error = so->so_error;
+			if ((flags & MSG_PEEK) == 0)
+				so->so_error = 0;
+			goto release;
+		}
+		if (so->so_state & SS_CANTRCVMORE) {
+			if (m)
+				goto dontblock;
+			else
+				goto release;
+		}
+		for (; m; m = m->m_next)
+			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
+				m = so->so_rcv.sb_mb;
+				goto dontblock;
+			}
+		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+			error = ENOTCONN;
+			goto release;
+		}
+		if (uio->uio_resid == 0)
+			goto release;
+		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
+			error = EWOULDBLOCK;
+			goto release;
+		}
+		sbunlock(&so->so_rcv);
+		error = sbwait(&so->so_rcv);
+		splx(s);
+		if (error)
+			return (error);
+		goto restart;
+	}
+dontblock:
+	if (uio->uio_procp)
+		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
+	nextrecord = m->m_nextpkt;
+	if (pr->pr_flags & PR_ADDR) {
+#ifdef DIAGNOSTIC
+		if (m->m_type != MT_SONAME)
+			panic("receive 1a");
+#endif
+		orig_resid = 0;
+		if (flags & MSG_PEEK) {
+			if (paddr)
+				*paddr = m_copy(m, 0, m->m_len);
+			m = m->m_next;
+		} else {
+			sbfree(&so->so_rcv, m);
+			if (paddr) {
+				*paddr = m;
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = 0;
+				m = so->so_rcv.sb_mb;
+			} else {
+				MFREE(m, so->so_rcv.sb_mb);
+				m = so->so_rcv.sb_mb;
+			}
+		}
+	}
+	while (m && m->m_type == MT_CONTROL && error == 0) {
+		if (flags & MSG_PEEK) {
+			if (controlp)
+				*controlp = m_copy(m, 0, m->m_len);
+			m = m->m_next;
+		} else {
+			sbfree(&so->so_rcv, m);
+			if (controlp) {
+				if (pr->pr_domain->dom_externalize &&
+				    mtod(m, struct cmsghdr *)->cmsg_type ==
+				    SCM_RIGHTS)
+				   error = (*pr->pr_domain->dom_externalize)(m);
+				*controlp = m;
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = 0;
+				m = so->so_rcv.sb_mb;
+			} else {
+				MFREE(m, so->so_rcv.sb_mb);
+				m = so->so_rcv.sb_mb;
+			}
+		}
+		if (controlp) {
+			orig_resid = 0;
+			controlp = &(*controlp)->m_next;
+		}
+	}
+	if (m) {
+		if ((flags & MSG_PEEK) == 0)
+			m->m_nextpkt = nextrecord;
+		type = m->m_type;
+		if (type == MT_OOBDATA)
+			flags |= MSG_OOB;
+	}
+	moff = 0;
+	offset = 0;
+	while (m && uio->uio_resid > 0 && error == 0) {
+		if (m->m_type == MT_OOBDATA) {
+			if (type != MT_OOBDATA)
+				break;
+		} else if (type == MT_OOBDATA)
+			break;
+#ifdef DIAGNOSTIC
+		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
+			panic("receive 3");
+#endif
+		so->so_state &= ~SS_RCVATMARK;
+		len = uio->uio_resid;
+		if (so->so_oobmark && len > so->so_oobmark - offset)
+			len = so->so_oobmark - offset;
+		if (len > m->m_len - moff)
+			len = m->m_len - moff;
+		/*
+		 * If mp is set, just pass back the mbufs.
+		 * Otherwise copy them out via the uio, then free.
+		 * Sockbuf must be consistent here (points to current mbuf,
+		 * it points to next record) when we drop priority;
+		 * we must note any additions to the sockbuf when we
+		 * block interrupts again.
+		 */
+		if (mp == 0) {
+			splx(s);
+			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
+			s = splnet();
+		} else
+			uio->uio_resid -= len;
+		if (len == m->m_len - moff) {
+			if (m->m_flags & M_EOR)
+				flags |= MSG_EOR;
+			if (flags & MSG_PEEK) {
+				m = m->m_next;
+				moff = 0;
+			} else {
+				nextrecord = m->m_nextpkt;
+				sbfree(&so->so_rcv, m);
+				if (mp) {
+					*mp = m;
+					mp = &m->m_next;
+					so->so_rcv.sb_mb = m = m->m_next;
+					*mp = (struct mbuf *)0;
+				} else {
+					MFREE(m, so->so_rcv.sb_mb);
+					m = so->so_rcv.sb_mb;
+				}
+				if (m)
+					m->m_nextpkt = nextrecord;
+			}
+		} else {
+			if (flags & MSG_PEEK)
+				moff += len;
+			else {
+				if (mp)
+					*mp = m_copym(m, 0, len, M_WAIT);
+				m->m_data += len;
+				m->m_len -= len;
+				so->so_rcv.sb_cc -= len;
+			}
+		}
+		if (so->so_oobmark) {
+			if ((flags & MSG_PEEK) == 0) {
+				so->so_oobmark -= len;
+				if (so->so_oobmark == 0) {
+					so->so_state |= SS_RCVATMARK;
+					break;
+				}
+			} else {
+				offset += len;
+				if (offset == so->so_oobmark)
+					break;
+			}
+		}
+		if (flags & MSG_EOR)
+			break;
+		/*
+		 * If the MSG_WAITALL flag is set (for non-atomic socket),
+		 * we must not quit until "uio->uio_resid == 0" or an error
+		 * termination.  If a signal/timeout occurs, return
+		 * with a short count but without error.
+		 * Keep sockbuf locked against other readers.
+		 */
+		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
+		    !sosendallatonce(so) && !nextrecord) {
+			if (so->so_error || so->so_state & SS_CANTRCVMORE)
+				break;
+			error = sbwait(&so->so_rcv);
+			if (error) {
+				sbunlock(&so->so_rcv);
+				splx(s);
+				return (0);
+			}
+			if (m = so->so_rcv.sb_mb)
+				nextrecord = m->m_nextpkt;
+		}
+	}
+
+	if (m && pr->pr_flags & PR_ATOMIC) {
+		flags |= MSG_TRUNC;
+		if ((flags & MSG_PEEK) == 0)
+			(void) sbdroprecord(&so->so_rcv);
+	}
+	if ((flags & MSG_PEEK) == 0) {
+		if (m == 0)
+			so->so_rcv.sb_mb = nextrecord;
+		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+			    (struct mbuf *)(long)flags, (struct mbuf *)0,
+			    (struct mbuf *)0);
+	}
+	if (orig_resid == uio->uio_resid && orig_resid &&
+	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
+		sbunlock(&so->so_rcv);
+		splx(s);
+		goto restart;
+	}
+		
+	if (flagsp)
+		*flagsp |= flags;
+release:
+	sbunlock(&so->so_rcv);
+	splx(s);
+	return (error);
+}
+
+int
+soshutdown(so, how)
+	register struct socket *so;
+	register int how;
+{
+	register struct protosw *pr = so->so_proto;
+
+	how++;
+	if (how & FREAD)
+		sorflush(so);
+	if (how & FWRITE)
+		return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
+		    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+	return (0);
+}
+
+void
+sorflush(so)
+	register struct socket *so;
+{
+	register struct sockbuf *sb = &so->so_rcv;
+	register struct protosw *pr = so->so_proto;
+	register int s;
+	struct sockbuf asb;
+
+	sb->sb_flags |= SB_NOINTR;
+	(void) sblock(sb, M_WAITOK);
+	s = splimp();
+	socantrcvmore(so);
+	sbunlock(sb);
+	asb = *sb;
+	bzero((caddr_t)sb, sizeof (*sb));
+	splx(s);
+	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
+	sbrelease(&asb);
+}
+
+int
+sosetopt(so, level, optname, m0)
+	register struct socket *so;
+	int level, optname;
+	struct mbuf *m0;
+{
+	int error = 0;
+	register struct mbuf *m = m0;
+
+	if (level != SOL_SOCKET) {
+		if (so->so_proto && so->so_proto->pr_ctloutput)
+			return ((*so->so_proto->pr_ctloutput)
+				  (PRCO_SETOPT, so, level, optname, &m0));
+		error = ENOPROTOOPT;
+	} else {
+		switch (optname) {
+
+		case SO_LINGER:
+			if (m == NULL || m->m_len != sizeof (struct linger)) {
+				error = EINVAL;
+				goto bad;
+			}
+			so->so_linger = mtod(m, struct linger *)->l_linger;
+			/* fall thru... */
+
+		case SO_DEBUG:
+		case SO_KEEPALIVE:
+		case SO_DONTROUTE:
+		case SO_USELOOPBACK:
+		case SO_BROADCAST:
+		case SO_REUSEADDR:
+		case SO_REUSEPORT:
+		case SO_OOBINLINE:
+			if (m == NULL || m->m_len < sizeof (int)) {
+				error = EINVAL;
+				goto bad;
+			}
+			if (*mtod(m, int *))
+				so->so_options |= optname;
+			else
+				so->so_options &= ~optname;
+			break;
+
+		case SO_SNDBUF:
+		case SO_RCVBUF:
+		case SO_SNDLOWAT:
+		case SO_RCVLOWAT:
+			if (m == NULL || m->m_len < sizeof (int)) {
+				error = EINVAL;
+				goto bad;
+			}
+			switch (optname) {
+
+			case SO_SNDBUF:
+			case SO_RCVBUF:
+				if (sbreserve(optname == SO_SNDBUF ?
+				    &so->so_snd : &so->so_rcv,
+				    (u_long) *mtod(m, int *)) == 0) {
+					error = ENOBUFS;
+					goto bad;
+				}
+				break;
+
+			case SO_SNDLOWAT:
+				so->so_snd.sb_lowat = *mtod(m, int *);
+				break;
+			case SO_RCVLOWAT:
+				so->so_rcv.sb_lowat = *mtod(m, int *);
+				break;
+			}
+			break;
+
+		case SO_SNDTIMEO:
+		case SO_RCVTIMEO:
+		    {
+			struct timeval *tv;
+			short val;
+
+			if (m == NULL || m->m_len < sizeof (*tv)) {
+				error = EINVAL;
+				goto bad;
+			}
+			tv = mtod(m, struct timeval *);
+			if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
+				error = EDOM;
+				goto bad;
+			}
+			val = tv->tv_sec * hz + tv->tv_usec / tick;
+
+			switch (optname) {
+
+			case SO_SNDTIMEO:
+				so->so_snd.sb_timeo = val;
+				break;
+			case SO_RCVTIMEO:
+				so->so_rcv.sb_timeo = val;
+				break;
+			}
+			break;
+		    }
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
+			(void) ((*so->so_proto->pr_ctloutput)
+				  (PRCO_SETOPT, so, level, optname, &m0));
+			m = NULL;	/* freed by protocol */
+		}
+	}
+bad:
+	if (m)
+		(void) m_free(m);
+	return (error);
+}
+
+int
+sogetopt(so, level, optname, mp)
+	register struct socket *so;
+	int level, optname;
+	struct mbuf **mp;
+{
+	register struct mbuf *m;
+
+	if (level != SOL_SOCKET) {
+		if (so->so_proto && so->so_proto->pr_ctloutput) {
+			return ((*so->so_proto->pr_ctloutput)
+				  (PRCO_GETOPT, so, level, optname, mp));
+		} else
+			return (ENOPROTOOPT);
+	} else {
+		m = m_get(M_WAIT, MT_SOOPTS);
+		m->m_len = sizeof (int);
+
+		switch (optname) {
+
+		case SO_LINGER:
+			m->m_len = sizeof (struct linger);
+			mtod(m, struct linger *)->l_onoff =
+				so->so_options & SO_LINGER;
+			mtod(m, struct linger *)->l_linger = so->so_linger;
+			break;
+
+		case SO_USELOOPBACK:
+		case SO_DONTROUTE:
+		case SO_DEBUG:
+		case SO_KEEPALIVE:
+		case SO_REUSEADDR:
+		case SO_REUSEPORT:
+		case SO_BROADCAST:
+		case SO_OOBINLINE:
+			*mtod(m, int *) = so->so_options & optname;
+			break;
+
+		case SO_TYPE:
+			*mtod(m, int *) = so->so_type;
+			break;
+
+		case SO_ERROR:
+			*mtod(m, int *) = so->so_error;
+			so->so_error = 0;
+			break;
+
+		case SO_SNDBUF:
+			*mtod(m, int *) = so->so_snd.sb_hiwat;
+			break;
+
+		case SO_RCVBUF:
+			*mtod(m, int *) = so->so_rcv.sb_hiwat;
+			break;
+
+		case SO_SNDLOWAT:
+			*mtod(m, int *) = so->so_snd.sb_lowat;
+			break;
+
+		case SO_RCVLOWAT:
+			*mtod(m, int *) = so->so_rcv.sb_lowat;
+			break;
+
+		case SO_SNDTIMEO:
+		case SO_RCVTIMEO:
+		    {
+			int val = (optname == SO_SNDTIMEO ?
+			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
+
+			m->m_len = sizeof(struct timeval);
+			mtod(m, struct timeval *)->tv_sec = val / hz;
+			mtod(m, struct timeval *)->tv_usec =
+			    (val % hz) * tick;
+			break;
+		    }
+
+		default:
+			(void)m_free(m);
+			return (ENOPROTOOPT);
+		}
+		*mp = m;
+		return (0);
+	}
+}
+
+void
+sohasoutofband(so)
+	register struct socket *so;
+{
+	struct proc *p;
+
+	if (so->so_pgid < 0)
+		gsignal(-so->so_pgid, SIGURG);
+	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+		psignal(p, SIGURG);
+	selwakeup(&so->so_rcv.sb_sel);
+}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
new file mode 100644
index 000000000000..865108aaee3f
--- /dev/null
+++ b/sys/kern/uipc_socket2.c
@@ -0,0 +1,779 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_socket2.c	8.2 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char	netio[] = "netio";
+char	netcon[] = "netcon";
+char	netcls[] = "netcls";
+
+u_long	sb_max = SB_MAX;		/* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups.  Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established.  When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed.  The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn().  When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ * 
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+void
+soisconnecting(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= SS_ISCONNECTING;
+}
+
+void
+soisconnected(so)
+	register struct socket *so;
+{
+	register struct socket *head = so->so_head;
+
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+	so->so_state |= SS_ISCONNECTED;
+	if (head && soqremque(so, 0)) {
+		soqinsque(head, so, 1);
+		sorwakeup(head);
+		wakeup((caddr_t)&head->so_timeo);
+	} else {
+		wakeup((caddr_t)&so->so_timeo);
+		sorwakeup(so);
+		sowwakeup(so);
+	}
+}
+
+void
+soisdisconnecting(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~SS_ISCONNECTING;
+	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
+void
+soisdisconnected(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called.  If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+	register struct socket *head;
+	int connstatus;
+{
+	register struct socket *so;
+	int soqueue = connstatus ? 1 : 0;
+
+	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+		return ((struct socket *)0);
+	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+	if (so == NULL) 
+		return ((struct socket *)0);
+	bzero((caddr_t)so, sizeof(*so));
+	so->so_type = head->so_type;
+	so->so_options = head->so_options &~ SO_ACCEPTCONN;
+	so->so_linger = head->so_linger;
+	so->so_state = head->so_state | SS_NOFDREF;
+	so->so_proto = head->so_proto;
+	so->so_timeo = head->so_timeo;
+	so->so_pgid = head->so_pgid;
+	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+	soqinsque(head, so, soqueue);
+	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+		(void) soqremque(so, soqueue);
+		(void) free((caddr_t)so, M_SOCKET);
+		return ((struct socket *)0);
+	}
+	if (connstatus) {
+		sorwakeup(head);
+		wakeup((caddr_t)&head->so_timeo);
+		so->so_state |= connstatus;
+	}
+	return (so);
+}
+
+void
+soqinsque(head, so, q)
+	register struct socket *head, *so;
+	int q;
+{
+
+	register struct socket **prev;
+	so->so_head = head;
+	if (q == 0) {
+		head->so_q0len++;
+		so->so_q0 = 0;
+		for (prev = &(head->so_q0); *prev; )
+			prev = &((*prev)->so_q0);
+	} else {
+		head->so_qlen++;
+		so->so_q = 0;
+		for (prev = &(head->so_q); *prev; )
+			prev = &((*prev)->so_q);
+	}
+	*prev = so;
+}
+
+int
+soqremque(so, q)
+	register struct socket *so;
+	int q;
+{
+	register struct socket *head, *prev, *next;
+
+	head = so->so_head;
+	prev = head;
+	for (;;) {
+		next = q ? prev->so_q : prev->so_q0;
+		if (next == so)
+			break;
+		if (next == 0)
+			return (0);
+		prev = next;
+	}
+	if (q == 0) {
+		prev->so_q0 = next->so_q0;
+		head->so_q0len--;
+	} else {
+		prev->so_q = next->so_q;
+		head->so_qlen--;
+	}
+	next->so_q0 = next->so_q = 0;
+	next->so_head = 0;
+	return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+void
+socantsendmore(so)
+	struct socket *so;
+{
+
+	so->so_state |= SS_CANTSENDMORE;
+	sowwakeup(so);
+}
+
+void
+socantrcvmore(so)
+	struct socket *so;
+{
+
+	so->so_state |= SS_CANTRCVMORE;
+	sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+int
+sbwait(sb)
+	struct sockbuf *sb;
+{
+
+	sb->sb_flags |= SB_WAIT;
+	return (tsleep((caddr_t)&sb->sb_cc,
+	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+	    sb->sb_timeo));
+}
+
+/* 
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+int
+sb_lock(sb)
+	register struct sockbuf *sb;
+{
+	int error;
+
+	while (sb->sb_flags & SB_LOCK) {
+		sb->sb_flags |= SB_WANT;
+		if (error = tsleep((caddr_t)&sb->sb_flags, 
+		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+		    netio, 0))
+			return (error);
+	}
+	sb->sb_flags |= SB_LOCK;
+	return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+void
+sowakeup(so, sb)
+	register struct socket *so;
+	register struct sockbuf *sb;
+{
+	struct proc *p;
+
+	selwakeup(&sb->sb_sel);
+	sb->sb_flags &= ~SB_SEL;
+	if (sb->sb_flags & SB_WAIT) {
+		sb->sb_flags &= ~SB_WAIT;
+		wakeup((caddr_t)&sb->sb_cc);
+	}
+	if (so->so_state & SS_ASYNC) {
+		if (so->so_pgid < 0)
+			gsignal(-so->so_pgid, SIGIO);
+		else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+			psignal(p, SIGIO);
+	}
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data.  Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field.  Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ *    name, then a record containing that name must be present before
+ *    any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ *    just additional data associated with the message), and there are
+ *    ``rights'' to be received, then a record containing this data
+ *    should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ *    a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve().  This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits).  The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+int
+soreserve(so, sndcc, rcvcc)
+	register struct socket *so;
+	u_long sndcc, rcvcc;
+{
+
+	if (sbreserve(&so->so_snd, sndcc) == 0)
+		goto bad;
+	if (sbreserve(&so->so_rcv, rcvcc) == 0)
+		goto bad2;
+	if (so->so_rcv.sb_lowat == 0)
+		so->so_rcv.sb_lowat = 1;
+	if (so->so_snd.sb_lowat == 0)
+		so->so_snd.sb_lowat = MCLBYTES;
+	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+	return (0);
+bad2:
+	sbrelease(&so->so_snd);
+bad:
+	return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+int
+sbreserve(sb, cc)
+	struct sockbuf *sb;
+	u_long cc;
+{
+
+	if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+		return (0);
+	sb->sb_hiwat = cc;
+	sb->sb_mbmax = min(cc * 2, sb_max);
+	if (sb->sb_lowat > sb->sb_hiwat)
+		sb->sb_lowat = sb->sb_hiwat;
+	return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+void
+sbrelease(sb)
+	struct sockbuf *sb;
+{
+
+	sbflush(sb);
+	sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added.  sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used.  To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used.  In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement.  Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb.  The additional space associated
+ * the mbuf chain is recorded in sb.  Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+void
+sbappend(sb, m)
+	struct sockbuf *sb;
+	struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	if (m == 0)
+		return;
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		do {
+			if (n->m_flags & M_EOR) {
+				sbappendrecord(sb, m); /* XXXXXX!!!! */
+				return;
+			}
+		} while (n->m_next && (n = n->m_next));
+	}
+	sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+void
+sbcheck(sb)
+	register struct sockbuf *sb;
+{
+	register struct mbuf *m;
+	register int len = 0, mbcnt = 0;
+
+	for (m = sb->sb_mb; m; m = m->m_next) {
+		len += m->m_len;
+		mbcnt += MSIZE;
+		if (m->m_flags & M_EXT)
+			mbcnt += m->m_ext.ext_size;
+		if (m->m_nextpkt)
+			panic("sbcheck nextpkt");
+	}
+	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+		printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+		    mbcnt, sb->sb_mbcnt);
+		panic("sbcheck");
+	}
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+void
+sbappendrecord(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	register struct mbuf *m;
+
+	if (m0 == 0)
+		return;
+	if (m = sb->sb_mb)
+		while (m->m_nextpkt)
+			m = m->m_nextpkt;
+	/*
+	 * Put the first mbuf on the queue.
+	 * Note this permits zero length records.
+	 */
+	sballoc(sb, m0);
+	if (m)
+		m->m_nextpkt = m0;
+	else
+		sb->sb_mb = m0;
+	m = m0->m_next;
+	m0->m_next = 0;
+	if (m && (m0->m_flags & M_EOR)) {
+		m0->m_flags &= ~M_EOR;
+		m->m_flags |= M_EOR;
+	}
+	sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+void
+sbinsertoob(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	register struct mbuf *m;
+	register struct mbuf **mp;
+
+	if (m0 == 0)
+		return;
+	for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+	    again:
+		switch (m->m_type) {
+
+		case MT_OOBDATA:
+			continue;		/* WANT next train */
+
+		case MT_CONTROL:
+			if (m = m->m_next)
+				goto again;	/* inspect THIS train further */
+		}
+		break;
+	}
+	/*
+	 * Put the first mbuf on the queue.
+	 * Note this permits zero length records.
+	 */
+	sballoc(sb, m0);
+	m0->m_nextpkt = *mp;
+	*mp = m0;
+	m = m0->m_next;
+	m0->m_next = 0;
+	if (m && (m0->m_flags & M_EOR)) {
+		m0->m_flags &= ~M_EOR;
+		m->m_flags |= M_EOR;
+	}
+	sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket.  If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+int
+sbappendaddr(sb, asa, m0, control)
+	register struct sockbuf *sb;
+	struct sockaddr *asa;
+	struct mbuf *m0, *control;
+{
+	register struct mbuf *m, *n;
+	int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+	if (m0)
+		space += m0->m_pkthdr.len;
+	for (n = control; n; n = n->m_next) {
+		space += n->m_len;
+		if (n->m_next == 0)	/* keep pointer to last control buf */
+			break;
+	}
+	if (space > sbspace(sb))
+		return (0);
+	if (asa->sa_len > MLEN)
+		return (0);
+	MGET(m, M_DONTWAIT, MT_SONAME);
+	if (m == 0)
+		return (0);
+	m->m_len = asa->sa_len;
+	bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+	if (n)
+		n->m_next = m0;		/* concatenate data to control */
+	else
+		control = m0;
+	m->m_next = control;
+	for (n = m; n; n = n->m_next)
+		sballoc(sb, n);
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		n->m_nextpkt = m;
+	} else
+		sb->sb_mb = m;
+	return (1);
+}
+
+int
+sbappendcontrol(sb, m0, control)
+	struct sockbuf *sb;
+	struct mbuf *m0, *control;
+{
+	register struct mbuf *m, *n;
+	int space = 0;
+
+	if (control == 0)
+		panic("sbappendcontrol");
+	for (m = control; ; m = m->m_next) {
+		space += m->m_len;
+		if (m->m_next == 0)
+			break;
+	}
+	n = m;			/* save pointer to last control buffer */
+	for (m = m0; m; m = m->m_next)
+		space += m->m_len;
+	if (space > sbspace(sb))
+		return (0);
+	n->m_next = m0;			/* concatenate data to control */
+	for (m = control; m; m = m->m_next)
+		sballoc(sb, m);
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		n->m_nextpkt = control;
+	} else
+		sb->sb_mb = control;
+	return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n.  If n
+ * is null, the buffer is presumed empty.
+ */
+void
+sbcompress(sb, m, n)
+	register struct sockbuf *sb;
+	register struct mbuf *m, *n;
+{
+	register int eor = 0;
+	register struct mbuf *o;
+
+	while (m) {
+		eor |= m->m_flags & M_EOR;
+		if (m->m_len == 0 &&
+		    (eor == 0 ||
+		     (((o = m->m_next) || (o = n)) &&
+		      o->m_type == m->m_type))) {
+			m = m_free(m);
+			continue;
+		}
+		if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+		    (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+		    n->m_type == m->m_type) {
+			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+			    (unsigned)m->m_len);
+			n->m_len += m->m_len;
+			sb->sb_cc += m->m_len;
+			m = m_free(m);
+			continue;
+		}
+		if (n)
+			n->m_next = m;
+		else
+			sb->sb_mb = m;
+		sballoc(sb, m);
+		n = m;
+		m->m_flags &= ~M_EOR;
+		m = m->m_next;
+		n->m_next = 0;
+	}
+	if (eor) {
+		if (n)
+			n->m_flags |= eor;
+		else
+			printf("semi-panic: sbcompress\n");
+	}
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+void
+sbflush(sb)
+	register struct sockbuf *sb;
+{
+
+	if (sb->sb_flags & SB_LOCK)
+		panic("sbflush");
+	while (sb->sb_mbcnt)
+		sbdrop(sb, (int)sb->sb_cc);
+	if (sb->sb_cc || sb->sb_mb)
+		panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+void
+sbdrop(sb, len)
+	register struct sockbuf *sb;
+	register int len;
+{
+	register struct mbuf *m, *mn;
+	struct mbuf *next;
+
+	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+	while (len > 0) {
+		if (m == 0) {
+			if (next == 0)
+				panic("sbdrop");
+			m = next;
+			next = m->m_nextpkt;
+			continue;
+		}
+		if (m->m_len > len) {
+			m->m_len -= len;
+			m->m_data += len;
+			sb->sb_cc -= len;
+			break;
+		}
+		len -= m->m_len;
+		sbfree(sb, m);
+		MFREE(m, mn);
+		m = mn;
+	}
+	while (m && m->m_len == 0) {
+		sbfree(sb, m);
+		MFREE(m, mn);
+		m = mn;
+	}
+	if (m) {
+		sb->sb_mb = m;
+		m->m_nextpkt = next;
+	} else
+		sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+void
+sbdroprecord(sb)
+	register struct sockbuf *sb;
+{
+	register struct mbuf *m, *mn;
+
+	m = sb->sb_mb;
+	if (m) {
+		sb->sb_mb = m->m_nextpkt;
+		do {
+			sbfree(sb, m);
+			MFREE(m, mn);
+		} while (m = mn);
+	}
+}
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
new file mode 100644
index 000000000000..800434c7f3ca
--- /dev/null
+++ b/sys/kern/uipc_syscalls.c
@@ -0,0 +1,1263 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_syscalls.c	8.6 (Berkeley) 2/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+/*
+ * System call interface to the socket abstraction.
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+#define COMPAT_OLDSOCK
+#endif
+
+extern	struct fileops socketops;
+
+int
+socket(p, uap, retval)
+	struct proc *p;
+	register struct socket_args /* {
+		syscallarg(int) domain;
+		syscallarg(int) type;
+		syscallarg(int) protocol;
+	} */ *uap;
+	register_t *retval;
+{
+	struct filedesc *fdp = p->p_fd;
+	struct socket *so;
+	struct file *fp;
+	int fd, error;
+
+	if (error = falloc(p, &fp, &fd))
+		return (error);
+	fp->f_flag = FREAD|FWRITE;
+	fp->f_type = DTYPE_SOCKET;
+	fp->f_ops = &socketops;
+	if (error = socreate(SCARG(uap, domain), &so, SCARG(uap, type),
+	    SCARG(uap, protocol))) {
+		fdp->fd_ofiles[fd] = 0;
+		ffree(fp);
+	} else {
+		fp->f_data = (caddr_t)so;
+		*retval = fd;
+	}
+	return (error);
+}
+
+/* ARGSUSED */
+int
+bind(p, uap, retval)
+	struct proc *p;
+	register struct bind_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) name;
+		syscallarg(int) namelen;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	struct mbuf *nam;
+	int error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	if (error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
+	    MT_SONAME))
+		return (error);
+	error = sobind((struct socket *)fp->f_data, nam);
+	m_freem(nam);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+listen(p, uap, retval)
+	struct proc *p;
+	register struct listen_args /* {
+		syscallarg(int) s;
+		syscallarg(int) backlog;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	int error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	return (solisten((struct socket *)fp->f_data, SCARG(uap, backlog)));
+}
+
+#ifdef COMPAT_OLDSOCK
+int
+accept(p, uap, retval)
+	struct proc *p;
+	struct accept_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) name;
+		syscallarg(int *) anamelen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	return (accept1(p, uap, retval, 0));
+}
+
+int
+compat_43_accept(p, uap, retval)
+	struct proc *p;
+	struct accept_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) name;
+		syscallarg(int *) anamelen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	return (accept1(p, uap, retval, 1));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define	accept1	accept
+#endif
+
+int
+accept1(p, uap, retval, compat_43)
+	struct proc *p;
+	register struct accept_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) name;
+		syscallarg(int *) anamelen;
+	} */ *uap;
+	register_t *retval;
+	int compat_43;
+{
+	struct file *fp;
+	struct mbuf *nam;
+	int namelen, error, s, tmpfd;
+	register struct socket *so;
+
+	if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, anamelen),
+	    (caddr_t)&namelen, sizeof (namelen))))
+		return (error);
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	s = splnet();
+	so = (struct socket *)fp->f_data;
+	if ((so->so_options & SO_ACCEPTCONN) == 0) {
+		splx(s);
+		return (EINVAL);
+	}
+	if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
+		splx(s);
+		return (EWOULDBLOCK);
+	}
+	while (so->so_qlen == 0 && so->so_error == 0) {
+		if (so->so_state & SS_CANTRCVMORE) {
+			so->so_error = ECONNABORTED;
+			break;
+		}
+		if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+		    netcon, 0)) {
+			splx(s);
+			return (error);
+		}
+	}
+	if (so->so_error) {
+		error = so->so_error;
+		so->so_error = 0;
+		splx(s);
+		return (error);
+	}
+	if (error = falloc(p, &fp, &tmpfd)) {
+		splx(s);
+		return (error);
+	}
+	*retval = tmpfd;
+	{ struct socket *aso = so->so_q;
+	  if (soqremque(aso, 1) == 0)
+		panic("accept");
+	  so = aso;
+	}
+	fp->f_type = DTYPE_SOCKET;
+	fp->f_flag = FREAD|FWRITE;
+	fp->f_ops = &socketops;
+	fp->f_data = (caddr_t)so;
+	nam = m_get(M_WAIT, MT_SONAME);
+	(void) soaccept(so, nam);
+	if (SCARG(uap, name)) {
+#ifdef COMPAT_OLDSOCK
+		if (compat_43)
+			mtod(nam, struct osockaddr *)->sa_family =
+			    mtod(nam, struct sockaddr *)->sa_family;
+#endif
+		if (namelen > nam->m_len)
+			namelen = nam->m_len;
+		/* SHOULD COPY OUT A CHAIN HERE */
+		if ((error = copyout(mtod(nam, caddr_t),
+		    (caddr_t)SCARG(uap, name), (u_int)namelen)) == 0)
+			error = copyout((caddr_t)&namelen,
+			    (caddr_t)SCARG(uap, anamelen),
+			    sizeof (*SCARG(uap, anamelen)));
+	}
+	m_freem(nam);
+	splx(s);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+connect(p, uap, retval)
+	struct proc *p;
+	register struct connect_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) name;
+		syscallarg(int) namelen;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	register struct socket *so;
+	struct mbuf *nam;
+	int error, s;
+
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	so = (struct socket *)fp->f_data;
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
+		return (EALREADY);
+	if (error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
+	    MT_SONAME))
+		return (error);
+	error = soconnect(so, nam);
+	if (error)
+		goto bad;
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+		m_freem(nam);
+		return (EINPROGRESS);
+	}
+	s = splnet();
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
+		if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+		    netcon, 0))
+			break;
+	if (error == 0) {
+		error = so->so_error;
+		so->so_error = 0;
+	}
+	splx(s);
+bad:
+	so->so_state &= ~SS_ISCONNECTING;
+	m_freem(nam);
+	if (error == ERESTART)
+		error = EINTR;
+	return (error);
+}
+
+int
+socketpair(p, uap, retval)
+	struct proc *p;
+	register struct socketpair_args /* {
+		syscallarg(int) domain;
+		syscallarg(int) type;
+		syscallarg(int) protocol;
+		syscallarg(int *) rsv;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file *fp1, *fp2;
+	struct socket *so1, *so2;
+	int fd, error, sv[2];
+
+	if (error = socreate(SCARG(uap, domain), &so1, SCARG(uap, type),
+	    SCARG(uap, protocol)))
+		return (error);
+	if (error = socreate(SCARG(uap, domain), &so2, SCARG(uap, type),
+	    SCARG(uap, protocol)))
+		goto free1;
+	if (error = falloc(p, &fp1, &fd))
+		goto free2;
+	sv[0] = fd;
+	fp1->f_flag = FREAD|FWRITE;
+	fp1->f_type = DTYPE_SOCKET;
+	fp1->f_ops = &socketops;
+	fp1->f_data = (caddr_t)so1;
+	if (error = falloc(p, &fp2, &fd))
+		goto free3;
+	fp2->f_flag = FREAD|FWRITE;
+	fp2->f_type = DTYPE_SOCKET;
+	fp2->f_ops = &socketops;
+	fp2->f_data = (caddr_t)so2;
+	sv[1] = fd;
+	if (error = soconnect2(so1, so2))
+		goto free4;
+	if (SCARG(uap, type) == SOCK_DGRAM) {
+		/*
+		 * Datagram socket connection is asymmetric.
+		 */
+		 if (error = soconnect2(so2, so1))
+			goto free4;
+	}
+	error = copyout((caddr_t)sv, (caddr_t)SCARG(uap, rsv),
+	    2 * sizeof (int));
+	retval[0] = sv[0];		/* XXX ??? */
+	retval[1] = sv[1];		/* XXX ??? */
+	return (error);
+free4:
+	ffree(fp2);
+	fdp->fd_ofiles[sv[1]] = 0;
+free3:
+	ffree(fp1);
+	fdp->fd_ofiles[sv[0]] = 0;
+free2:
+	(void)soclose(so2);
+free1:
+	(void)soclose(so1);
+	return (error);
+}
+
+int
+sendto(p, uap, retval)
+	struct proc *p;
+	register struct sendto_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) buf;
+		syscallarg(size_t) len;
+		syscallarg(int) flags;
+		syscallarg(caddr_t) to;
+		syscallarg(int) tolen;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+
+	msg.msg_name = SCARG(uap, to);
+	msg.msg_namelen = SCARG(uap, tolen);
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = 0;
+#ifdef COMPAT_OLDSOCK
+	msg.msg_flags = 0;
+#endif
+	aiov.iov_base = SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, len);
+	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+int
+compat_43_send(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_send_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) buf;
+		syscallarg(int) len;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+
+	msg.msg_name = 0;
+	msg.msg_namelen = 0;
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	aiov.iov_base = SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, len);
+	msg.msg_control = 0;
+	msg.msg_flags = 0;
+	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
+}
+
+#define MSG_COMPAT	0x8000
+int
+compat_43_sendmsg(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_sendmsg_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) msg;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *iov;
+	int error;
+
+	if (error = copyin(SCARG(uap, msg), (caddr_t)&msg,
+	    sizeof (struct omsghdr)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 
+		      M_WAITOK);
+	} else
+		iov = aiov;
+	if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+		goto done;
+	msg.msg_flags = MSG_COMPAT;
+	msg.msg_iov = iov;
+	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+#endif
+
+int
+sendmsg(p, uap, retval)
+	struct proc *p;
+	register struct sendmsg_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) msg;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *iov;
+	int error;
+
+	if (error = copyin(SCARG(uap, msg), (caddr_t)&msg, sizeof (msg)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+		       M_WAITOK);
+	} else
+		iov = aiov;
+	if (msg.msg_iovlen &&
+	    (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
+		goto done;
+	msg.msg_iov = iov;
+#ifdef COMPAT_OLDSOCK
+	msg.msg_flags = 0;
+#endif
+	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+
+int
+sendit(p, s, mp, flags, retsize)
+	register struct proc *p;
+	int s;
+	register struct msghdr *mp;
+	int flags;
+	register_t *retsize;
+{
+	struct file *fp;
+	struct uio auio;
+	register struct iovec *iov;
+	register int i;
+	struct mbuf *to, *control;
+	int len, error;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+	
+	if (error = getsock(p->p_fd, s, &fp))
+		return (error);
+	auio.uio_iov = mp->msg_iov;
+	auio.uio_iovcnt = mp->msg_iovlen;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+	iov = mp->msg_iov;
+	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+		if (auio.uio_resid + iov->iov_len < auio.uio_resid)
+			return (EINVAL);
+		auio.uio_resid += iov->iov_len;
+	}
+	if (mp->msg_name) {
+		if (error = sockargs(&to, mp->msg_name, mp->msg_namelen,
+		    MT_SONAME))
+			return (error);
+	} else
+		to = 0;
+	if (mp->msg_control) {
+		if (mp->msg_controllen < sizeof(struct cmsghdr)
+#ifdef COMPAT_OLDSOCK
+		    && mp->msg_flags != MSG_COMPAT
+#endif
+		) {
+			error = EINVAL;
+			goto bad;
+		}
+		if (error = sockargs(&control, mp->msg_control,
+		    mp->msg_controllen, MT_CONTROL))
+			goto bad;
+#ifdef COMPAT_OLDSOCK
+		if (mp->msg_flags == MSG_COMPAT) {
+			register struct cmsghdr *cm;
+
+			M_PREPEND(control, sizeof(*cm), M_WAIT);
+			if (control == 0) {
+				error = ENOBUFS;
+				goto bad;
+			} else {
+				cm = mtod(control, struct cmsghdr *);
+				cm->cmsg_len = control->m_len;
+				cm->cmsg_level = SOL_SOCKET;
+				cm->cmsg_type = SCM_RIGHTS;
+			}
+		}
+#endif
+	} else
+		control = 0;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO)) {
+		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	len = auio.uio_resid;
+	if (error = sosend((struct socket *)fp->f_data, to, &auio,
+	    (struct mbuf *)0, control, flags)) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		if (error == EPIPE)
+			psignal(p, SIGPIPE);
+	}
+	if (error == 0)
+		*retsize = len - auio.uio_resid;
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, s, UIO_WRITE,
+				ktriov, *retsize, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+bad:
+	if (to)
+		m_freem(to);
+	return (error);
+}
+
+#ifdef COMPAT_OLDSOCK
+int
+compat_43_recvfrom(p, uap, retval)
+	struct proc *p;
+	struct recvfrom_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) buf;
+		syscallarg(size_t) len;
+		syscallarg(int) flags;
+		syscallarg(caddr_t) from;
+		syscallarg(int *) fromlenaddr;
+	} */ *uap;
+	register_t *retval;
+{
+
+	SCARG(uap, flags) |= MSG_COMPAT;
+	return (recvfrom(p, uap, retval));
+}
+#endif
+
+int
+recvfrom(p, uap, retval)
+	struct proc *p;
+	register struct recvfrom_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) buf;
+		syscallarg(size_t) len;
+		syscallarg(int) flags;
+		syscallarg(caddr_t) from;
+		syscallarg(int *) fromlenaddr;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+	int error;
+
+	if (SCARG(uap, fromlenaddr)) {
+		if (error = copyin((caddr_t)SCARG(uap, fromlenaddr),
+		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)))
+			return (error);
+	} else
+		msg.msg_namelen = 0;
+	msg.msg_name = SCARG(uap, from);
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	aiov.iov_base = SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, len);
+	msg.msg_control = 0;
+	msg.msg_flags = SCARG(uap, flags);
+	return (recvit(p, SCARG(uap, s), &msg,
+	    (caddr_t)SCARG(uap, fromlenaddr), retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+int
+compat_43_recv(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_recv_args /* {
+		syscallarg(int) s;
+		syscallarg(caddr_t) buf;
+		syscallarg(int) len;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+
+	msg.msg_name = 0;
+	msg.msg_namelen = 0;
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	aiov.iov_base = SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, len);
+	msg.msg_control = 0;
+	msg.msg_flags = SCARG(uap, flags);
+	return (recvit(p, SCARG(uap, s), &msg, (caddr_t)0, retval));
+}
+
+/*
+ * Old recvmsg.  This code takes advantage of the fact that the old msghdr
+ * overlays the new one, missing only the flags, and with the (old) access
+ * rights where the control fields are now.
+ */
+int
+compat_43_recvmsg(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_recvmsg_args /* {
+		syscallarg(int) s;
+		syscallarg(struct omsghdr *) msg;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *iov;
+	int error;
+
+	if (error = copyin((caddr_t)SCARG(uap, msg), (caddr_t)&msg,
+	    sizeof (struct omsghdr)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+		      M_WAITOK);
+	} else
+		iov = aiov;
+	msg.msg_flags = SCARG(uap, flags) | MSG_COMPAT;
+	if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+		goto done;
+	msg.msg_iov = iov;
+	error = recvit(p, SCARG(uap, s), &msg,
+	    (caddr_t)&SCARG(uap, msg)->msg_namelen, retval);
+
+	if (msg.msg_controllen && error == 0)
+		error = copyout((caddr_t)&msg.msg_controllen,
+		    (caddr_t)&SCARG(uap, msg)->msg_accrightslen, sizeof (int));
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+#endif
+
+int
+recvmsg(p, uap, retval)
+	struct proc *p;
+	register struct recvmsg_args /* {
+		syscallarg(int) s;
+		syscallarg(struct msghdr *) msg;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
+	register int error;
+
+	if (error = copyin((caddr_t)SCARG(uap, msg), (caddr_t)&msg,
+	    sizeof (msg)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+		       M_WAITOK);
+	} else
+		iov = aiov;
+#ifdef COMPAT_OLDSOCK
+	msg.msg_flags = SCARG(uap, flags) &~ MSG_COMPAT;
+#else
+	msg.msg_flags = SCARG(uap, flags);
+#endif
+	uiov = msg.msg_iov;
+	msg.msg_iov = iov;
+	if (error = copyin((caddr_t)uiov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+		goto done;
+	if ((error = recvit(p, SCARG(uap, s), &msg, (caddr_t)0, retval)) == 0) {
+		msg.msg_iov = uiov;
+		error = copyout((caddr_t)&msg, (caddr_t)SCARG(uap, msg),
+		    sizeof(msg));
+	}
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+
+int
+recvit(p, s, mp, namelenp, retsize)
+	register struct proc *p;
+	int s;
+	register struct msghdr *mp;
+	caddr_t namelenp;
+	register_t *retsize;
+{
+	struct file *fp;
+	struct uio auio;
+	register struct iovec *iov;
+	register int i;
+	int len, error;
+	struct mbuf *from = 0, *control = 0;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+	
+	if (error = getsock(p->p_fd, s, &fp))
+		return (error);
+	auio.uio_iov = mp->msg_iov;
+	auio.uio_iovcnt = mp->msg_iovlen;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+	iov = mp->msg_iov;
+	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+		if (auio.uio_resid + iov->iov_len < auio.uio_resid)
+			return (EINVAL);
+		auio.uio_resid += iov->iov_len;
+	}
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO)) {
+		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	len = auio.uio_resid;
+	if (error = soreceive((struct socket *)fp->f_data, &from, &auio,
+	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
+	    &mp->msg_flags)) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	}
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, s, UIO_READ,
+				ktriov, len - auio.uio_resid, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+	if (error)
+		goto out;
+	*retsize = len - auio.uio_resid;
+	if (mp->msg_name) {
+		len = mp->msg_namelen;
+		if (len <= 0 || from == 0)
+			len = 0;
+		else {
+#ifdef COMPAT_OLDSOCK
+			if (mp->msg_flags & MSG_COMPAT)
+				mtod(from, struct osockaddr *)->sa_family =
+				    mtod(from, struct sockaddr *)->sa_family;
+#endif
+			if (len > from->m_len)
+				len = from->m_len;
+			/* else if len < from->m_len ??? */
+			if (error = copyout(mtod(from, caddr_t),
+			    (caddr_t)mp->msg_name, (unsigned)len))
+				goto out;
+		}
+		mp->msg_namelen = len;
+		if (namelenp &&
+		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
+#ifdef COMPAT_OLDSOCK
+			if (mp->msg_flags & MSG_COMPAT)
+				error = 0;	/* old recvfrom didn't check */
+			else
+#endif
+			goto out;
+		}
+	}
+	if (mp->msg_control) {
+#ifdef COMPAT_OLDSOCK
+		/*
+		 * We assume that old recvmsg calls won't receive access
+		 * rights and other control info, esp. as control info
+		 * is always optional and those options didn't exist in 4.3.
+		 * If we receive rights, trim the cmsghdr; anything else
+		 * is tossed.
+		 */
+		if (control && mp->msg_flags & MSG_COMPAT) {
+			if (mtod(control, struct cmsghdr *)->cmsg_level !=
+			    SOL_SOCKET ||
+			    mtod(control, struct cmsghdr *)->cmsg_type !=
+			    SCM_RIGHTS) {
+				mp->msg_controllen = 0;
+				goto out;
+			}
+			control->m_len -= sizeof (struct cmsghdr);
+			control->m_data += sizeof (struct cmsghdr);
+		}
+#endif
+		len = mp->msg_controllen;
+		if (len <= 0 || control == 0)
+			len = 0;
+		else {
+			if (len >= control->m_len)
+				len = control->m_len;
+			else
+				mp->msg_flags |= MSG_CTRUNC;
+			error = copyout((caddr_t)mtod(control, caddr_t),
+			    (caddr_t)mp->msg_control, (unsigned)len);
+		}
+		mp->msg_controllen = len;
+	}
+out:
+	if (from)
+		m_freem(from);
+	if (control)
+		m_freem(control);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+shutdown(p, uap, retval)
+	struct proc *p;
+	register struct shutdown_args /* {
+		syscallarg(int) s;
+		syscallarg(int) how;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	int error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	return (soshutdown((struct socket *)fp->f_data, SCARG(uap, how)));
+}
+
+/* ARGSUSED */
+int
+setsockopt(p, uap, retval)
+	struct proc *p;
+	register struct setsockopt_args /* {
+		syscallarg(int) s;
+		syscallarg(int) level;
+		syscallarg(int) name;
+		syscallarg(caddr_t) val;
+		syscallarg(int) valsize;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	struct mbuf *m = NULL;
+	int error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	if (SCARG(uap, valsize) > MLEN)
+		return (EINVAL);
+	if (SCARG(uap, val)) {
+		m = m_get(M_WAIT, MT_SOOPTS);
+		if (m == NULL)
+			return (ENOBUFS);
+		if (error = copyin(SCARG(uap, val), mtod(m, caddr_t),
+		    (u_int)SCARG(uap, valsize))) {
+			(void) m_free(m);
+			return (error);
+		}
+		m->m_len = SCARG(uap, valsize);
+	}
+	return (sosetopt((struct socket *)fp->f_data, SCARG(uap, level),
+	    SCARG(uap, name), m));
+}
+
+/* ARGSUSED */
+int
+getsockopt(p, uap, retval)
+	struct proc *p;
+	register struct getsockopt_args /* {
+		syscallarg(int) s;
+		syscallarg(int) level;
+		syscallarg(int) name;
+		syscallarg(caddr_t) val;
+		syscallarg(int *) avalsize;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	struct mbuf *m = NULL;
+	int valsize, error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, s), &fp))
+		return (error);
+	if (SCARG(uap, val)) {
+		if (error = copyin((caddr_t)SCARG(uap, avalsize),
+		    (caddr_t)&valsize, sizeof (valsize)))
+			return (error);
+	} else
+		valsize = 0;
+	if ((error = sogetopt((struct socket *)fp->f_data, SCARG(uap, level),
+	    SCARG(uap, name), &m)) == 0 && SCARG(uap, val) && valsize &&
+	    m != NULL) {
+		if (valsize > m->m_len)
+			valsize = m->m_len;
+		error = copyout(mtod(m, caddr_t), SCARG(uap, val),
+		    (u_int)valsize);
+		if (error == 0)
+			error = copyout((caddr_t)&valsize,
+			    (caddr_t)SCARG(uap, avalsize), sizeof (valsize));
+	}
+	if (m != NULL)
+		(void) m_free(m);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+pipe(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file *rf, *wf;
+	struct socket *rso, *wso;
+	int fd, error;
+
+	if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0))
+		return (error);
+	if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0))
+		goto free1;
+	if (error = falloc(p, &rf, &fd))
+		goto free2;
+	retval[0] = fd;
+	rf->f_flag = FREAD;
+	rf->f_type = DTYPE_SOCKET;
+	rf->f_ops = &socketops;
+	rf->f_data = (caddr_t)rso;
+	if (error = falloc(p, &wf, &fd))
+		goto free3;
+	wf->f_flag = FWRITE;
+	wf->f_type = DTYPE_SOCKET;
+	wf->f_ops = &socketops;
+	wf->f_data = (caddr_t)wso;
+	retval[1] = fd;
+	if (error = unp_connect2(wso, rso))
+		goto free4;
+	return (0);
+free4:
+	ffree(wf);
+	fdp->fd_ofiles[retval[1]] = 0;
+free3:
+	ffree(rf);
+	fdp->fd_ofiles[retval[0]] = 0;
+free2:
+	(void)soclose(wso);
+free1:
+	(void)soclose(rso);
+	return (error);
+}
+
+/*
+ * Get socket name.
+ */
+#ifdef COMPAT_OLDSOCK
+int
+getsockname(p, uap, retval)
+	struct proc *p;
+	struct getsockname_args /* {
+		syscallarg(int) fdes;
+		syscallarg(caddr_t) asa;
+		syscallarg(int *) alen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	return (getsockname1(p, uap, retval, 0));
+}
+
+int
+compat_43_getsockname(p, uap, retval)
+	struct proc *p;
+	struct getsockname_args /* {
+		syscallarg(int) fdes;
+		syscallarg(caddr_t) asa;
+		syscallarg(int *) alen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	return (getsockname1(p, uap, retval, 1));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define	getsockname1	getsockname
+#endif
+
+/* ARGSUSED */
+int
+getsockname1(p, uap, retval, compat_43)
+	struct proc *p;
+	register struct getsockname_args /* {
+		syscallarg(int) fdes;
+		syscallarg(caddr_t) asa;
+		syscallarg(int *) alen;
+	} */ *uap;
+	register_t *retval;
+	int compat_43;
+{
+	struct file *fp;
+	register struct socket *so;
+	struct mbuf *m;
+	int len, error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, fdes), &fp))
+		return (error);
+	if (error = copyin((caddr_t)SCARG(uap, alen), (caddr_t)&len,
+	    sizeof (len)))
+		return (error);
+	so = (struct socket *)fp->f_data;
+	m = m_getclr(M_WAIT, MT_SONAME);
+	if (m == NULL)
+		return (ENOBUFS);
+	if (error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0))
+		goto bad;
+	if (len > m->m_len)
+		len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+	if (compat_43)
+		mtod(m, struct osockaddr *)->sa_family =
+		    mtod(m, struct sockaddr *)->sa_family;
+#endif
+	error = copyout(mtod(m, caddr_t), (caddr_t)SCARG(uap, asa), (u_int)len);
+	if (error == 0)
+		error = copyout((caddr_t)&len, (caddr_t)SCARG(uap, alen),
+		    sizeof (len));
+bad:
+	m_freem(m);
+	return (error);
+}
+
+/*
+ * Get name of peer for connected socket.
+ */
+#ifdef COMPAT_OLDSOCK
+int
+getpeername(p, uap, retval)
+	struct proc *p;
+	struct getpeername_args /* {
+		syscallarg(int) fdes;
+		syscallarg(caddr_t) asa;
+		syscallarg(int *) alen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	return (getpeername1(p, uap, retval, 0));
+}
+
+int
+compat_43_getpeername(p, uap, retval)
+	struct proc *p;
+	struct getpeername_args /* {
+		syscallarg(int) fdes;
+		syscallarg(caddr_t) asa;
+		syscallarg(int *) alen;
+	} */ *uap;
+	register_t *retval;
+{
+
+	return (getpeername1(p, uap, retval, 1));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define	getpeername1	getpeername
+#endif
+
+/* ARGSUSED */
+int
+getpeername1(p, uap, retval, compat_43)
+	struct proc *p;
+	register struct getpeername_args /* {
+		syscallarg(int) fdes;
+		syscallarg(caddr_t) asa;
+		syscallarg(int *) alen;
+	} */ *uap;
+	register_t *retval;
+	int compat_43;
+{
+	struct file *fp;
+	register struct socket *so;
+	struct mbuf *m;
+	int len, error;
+
+	if (error = getsock(p->p_fd, SCARG(uap, fdes), &fp))
+		return (error);
+	so = (struct socket *)fp->f_data;
+	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
+		return (ENOTCONN);
+	if (error =
+	    copyin((caddr_t)SCARG(uap, alen), (caddr_t)&len, sizeof (len)))
+		return (error);
+	m = m_getclr(M_WAIT, MT_SONAME);
+	if (m == NULL)
+		return (ENOBUFS);
+	if (error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0))
+		goto bad;
+	if (len > m->m_len)
+		len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+	if (compat_43)
+		mtod(m, struct osockaddr *)->sa_family =
+		    mtod(m, struct sockaddr *)->sa_family;
+#endif
+	if (error =
+	    copyout(mtod(m, caddr_t), (caddr_t)SCARG(uap, asa), (u_int)len))
+		goto bad;
+	error = copyout((caddr_t)&len, (caddr_t)SCARG(uap, alen), sizeof (len));
+bad:
+	m_freem(m);
+	return (error);
+}
+
+int
+sockargs(mp, buf, buflen, type)
+	struct mbuf **mp;
+	caddr_t buf;
+	int buflen, type;
+{
+	register struct sockaddr *sa;
+	register struct mbuf *m;
+	int error;
+
+	if ((u_int)buflen > MLEN) {
+#ifdef COMPAT_OLDSOCK
+		if (type == MT_SONAME && (u_int)buflen <= 112)
+			buflen = MLEN;		/* unix domain compat. hack */
+		else
+#endif
+		return (EINVAL);
+	}
+	m = m_get(M_WAIT, type);
+	if (m == NULL)
+		return (ENOBUFS);
+	m->m_len = buflen;
+	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
+	if (error) {
+		(void) m_free(m);
+		return (error);
+	}
+	*mp = m;
+	if (type == MT_SONAME) {
+		sa = mtod(m, struct sockaddr *);
+
+#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
+		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+			sa->sa_family = sa->sa_len;
+#endif
+		sa->sa_len = buflen;
+	}
+	return (0);
+}
+
+int
+getsock(fdp, fdes, fpp)
+	struct filedesc *fdp;
+	int fdes;
+	struct file **fpp;
+{
+	register struct file *fp;
+
+	if ((unsigned)fdes >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fdes]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_SOCKET)
+		return (ENOTSOCK);
+	*fpp = fp;
+	return (0);
+}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
new file mode 100644
index 000000000000..c6bcbfd9e2ed
--- /dev/null
+++ b/sys/kern/uipc_usrreq.c
@@ -0,0 +1,839 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_usrreq.c	8.9 (Berkeley) 5/14/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/unpcb.h>
+#include <sys/un.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mbuf.h>
+
+/*
+ * Unix communications domain.
+ *
+ * TODO:
+ *	SEQPACKET, RDM
+ *	rethink name space problems
+ *	need a proper out-of-band
+ */
+struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
+ino_t	unp_ino;			/* prototype for fake inode numbers */
+
+/*ARGSUSED*/
+int
+uipc_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	struct unpcb *unp = sotounpcb(so);
+	register struct socket *so2;
+	register int error = 0;
+	struct proc *p = curproc;	/* XXX */
+
+	if (req == PRU_CONTROL)
+		return (EOPNOTSUPP);
+	if (req != PRU_SEND && control && control->m_len) {
+		error = EOPNOTSUPP;
+		goto release;
+	}
+	if (unp == 0 && req != PRU_ATTACH) {
+		error = EINVAL;
+		goto release;
+	}
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (unp) {
+			error = EISCONN;
+			break;
+		}
+		error = unp_attach(so);
+		break;
+
+	case PRU_DETACH:
+		unp_detach(unp);
+		break;
+
+	case PRU_BIND:
+		error = unp_bind(unp, nam, p);
+		break;
+
+	case PRU_LISTEN:
+		if (unp->unp_vnode == 0)
+			error = EINVAL;
+		break;
+
+	case PRU_CONNECT:
+		error = unp_connect(so, nam, p);
+		break;
+
+	case PRU_CONNECT2:
+		error = unp_connect2(so, (struct socket *)nam);
+		break;
+
+	case PRU_DISCONNECT:
+		unp_disconnect(unp);
+		break;
+
+	case PRU_ACCEPT:
+		/*
+		 * Pass back name of connected socket,
+		 * if it was bound and we are still connected
+		 * (our peer may have closed already!).
+		 */
+		if (unp->unp_conn && unp->unp_conn->unp_addr) {
+			nam->m_len = unp->unp_conn->unp_addr->m_len;
+			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+			    mtod(nam, caddr_t), (unsigned)nam->m_len);
+		} else {
+			nam->m_len = sizeof(sun_noname);
+			*(mtod(nam, struct sockaddr *)) = sun_noname;
+		}
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		unp_shutdown(unp);
+		break;
+
+	case PRU_RCVD:
+		switch (so->so_type) {
+
+		case SOCK_DGRAM:
+			panic("uipc 1");
+			/*NOTREACHED*/
+
+		case SOCK_STREAM:
+#define	rcv (&so->so_rcv)
+#define snd (&so2->so_snd)
+			if (unp->unp_conn == 0)
+				break;
+			so2 = unp->unp_conn->unp_socket;
+			/*
+			 * Adjust backpressure on sender
+			 * and wakeup any waiting to write.
+			 */
+			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
+			unp->unp_mbcnt = rcv->sb_mbcnt;
+			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
+			unp->unp_cc = rcv->sb_cc;
+			sowwakeup(so2);
+#undef snd
+#undef rcv
+			break;
+
+		default:
+			panic("uipc 2");
+		}
+		break;
+
+	case PRU_SEND:
+		if (control && (error = unp_internalize(control, p)))
+			break;
+		switch (so->so_type) {
+
+		case SOCK_DGRAM: {
+			struct sockaddr *from;
+
+			if (nam) {
+				if (unp->unp_conn) {
+					error = EISCONN;
+					break;
+				}
+				error = unp_connect(so, nam, p);
+				if (error)
+					break;
+			} else {
+				if (unp->unp_conn == 0) {
+					error = ENOTCONN;
+					break;
+				}
+			}
+			so2 = unp->unp_conn->unp_socket;
+			if (unp->unp_addr)
+				from = mtod(unp->unp_addr, struct sockaddr *);
+			else
+				from = &sun_noname;
+			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+				sorwakeup(so2);
+				m = 0;
+				control = 0;
+			} else
+				error = ENOBUFS;
+			if (nam)
+				unp_disconnect(unp);
+			break;
+		}
+
+		case SOCK_STREAM:
+#define	rcv (&so2->so_rcv)
+#define	snd (&so->so_snd)
+			if (so->so_state & SS_CANTSENDMORE) {
+				error = EPIPE;
+				break;
+			}
+			if (unp->unp_conn == 0)
+				panic("uipc 3");
+			so2 = unp->unp_conn->unp_socket;
+			/*
+			 * Send to paired receive port, and then reduce
+			 * send buffer hiwater marks to maintain backpressure.
+			 * Wake up readers.
+			 */
+			if (control) {
+				if (sbappendcontrol(rcv, m, control))
+					control = 0;
+			} else
+				sbappend(rcv, m);
+			snd->sb_mbmax -=
+			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
+			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
+			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
+			unp->unp_conn->unp_cc = rcv->sb_cc;
+			sorwakeup(so2);
+			m = 0;
+#undef snd
+#undef rcv
+			break;
+
+		default:
+			panic("uipc 4");
+		}
+		break;
+
+	case PRU_ABORT:
+		unp_drop(unp, ECONNABORTED);
+		break;
+
+	case PRU_SENSE:
+		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
+			so2 = unp->unp_conn->unp_socket;
+			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
+		}
+		((struct stat *) m)->st_dev = NODEV;
+		if (unp->unp_ino == 0)
+			unp->unp_ino = unp_ino++;
+		((struct stat *) m)->st_ino = unp->unp_ino;
+		return (0);
+
+	case PRU_RCVOOB:
+		return (EOPNOTSUPP);
+
+	case PRU_SENDOOB:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_SOCKADDR:
+		if (unp->unp_addr) {
+			nam->m_len = unp->unp_addr->m_len;
+			bcopy(mtod(unp->unp_addr, caddr_t),
+			    mtod(nam, caddr_t), (unsigned)nam->m_len);
+		} else
+			nam->m_len = 0;
+		break;
+
+	case PRU_PEERADDR:
+		if (unp->unp_conn && unp->unp_conn->unp_addr) {
+			nam->m_len = unp->unp_conn->unp_addr->m_len;
+			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+			    mtod(nam, caddr_t), (unsigned)nam->m_len);
+		} else
+			nam->m_len = 0;
+		break;
+
+	case PRU_SLOWTIMO:
+		break;
+
+	default:
+		panic("piusrreq");
+	}
+release:
+	if (control)
+		m_freem(control);
+	if (m)
+		m_freem(m);
+	return (error);
+}
+
+/*
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace.  Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#define	PIPSIZ	4096
+u_long	unpst_sendspace = PIPSIZ;
+u_long	unpst_recvspace = PIPSIZ;
+u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
+u_long	unpdg_recvspace = 4*1024;
+
+int	unp_rights;			/* file descriptors in flight */
+
+int
+unp_attach(so)
+	struct socket *so;
+{
+	register struct mbuf *m;
+	register struct unpcb *unp;
+	int error;
+	
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+		switch (so->so_type) {
+
+		case SOCK_STREAM:
+			error = soreserve(so, unpst_sendspace, unpst_recvspace);
+			break;
+
+		case SOCK_DGRAM:
+			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
+			break;
+
+		default:
+			panic("unp_attach");
+		}
+		if (error)
+			return (error);
+	}
+	m = m_getclr(M_DONTWAIT, MT_PCB);
+	if (m == NULL)
+		return (ENOBUFS);
+	unp = mtod(m, struct unpcb *);
+	so->so_pcb = (caddr_t)unp;
+	unp->unp_socket = so;
+	return (0);
+}
+
+void
+unp_detach(unp)
+	register struct unpcb *unp;
+{
+	
+	if (unp->unp_vnode) {
+		unp->unp_vnode->v_socket = 0;
+		vrele(unp->unp_vnode);
+		unp->unp_vnode = 0;
+	}
+	if (unp->unp_conn)
+		unp_disconnect(unp);
+	while (unp->unp_refs)
+		unp_drop(unp->unp_refs, ECONNRESET);
+	soisdisconnected(unp->unp_socket);
+	unp->unp_socket->so_pcb = 0;
+	m_freem(unp->unp_addr);
+	(void) m_free(dtom(unp));
+	if (unp_rights) {
+		/*
+		 * Normally the receive buffer is flushed later,
+		 * in sofree, but if our receive buffer holds references
+		 * to descriptors that are now garbage, we will dispose
+		 * of those descriptor references after the garbage collector
+		 * gets them (resulting in a "panic: closef: count < 0").
+		 */
+		sorflush(unp->unp_socket);
+		unp_gc();
+	}
+}
+
+int
+unp_bind(unp, nam, p)
+	struct unpcb *unp;
+	struct mbuf *nam;
+	struct proc *p;
+{
+	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
+	    soun->sun_path, p);
+	if (unp->unp_vnode != NULL)
+		return (EINVAL);
+	if (nam->m_len == MLEN) {
+		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+			return (EINVAL);
+	} else
+		*(mtod(nam, caddr_t) + nam->m_len) = 0;
+/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		return (EADDRINUSE);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VSOCK;
+	vattr.va_mode = ACCESSPERMS;
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
+		return (error);
+	vp = nd.ni_vp;
+	vp->v_socket = unp->unp_socket;
+	unp->unp_vnode = vp;
+	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
+	VOP_UNLOCK(vp, 0, p);
+	return (0);
+}
+
+int
+unp_connect(so, nam, p)
+	struct socket *so;
+	struct mbuf *nam;
+	struct proc *p;
+{
+	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+	register struct vnode *vp;
+	register struct socket *so2, *so3;
+	struct unpcb *unp2, *unp3;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
+	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
+		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+			return (EMSGSIZE);
+	} else
+		*(mtod(nam, caddr_t) + nam->m_len) = 0;
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VSOCK) {
+		error = ENOTSOCK;
+		goto bad;
+	}
+	if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
+		goto bad;
+	so2 = vp->v_socket;
+	if (so2 == 0) {
+		error = ECONNREFUSED;
+		goto bad;
+	}
+	if (so->so_type != so2->so_type) {
+		error = EPROTOTYPE;
+		goto bad;
+	}
+	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
+		    (so3 = sonewconn(so2, 0)) == 0) {
+			error = ECONNREFUSED;
+			goto bad;
+		}
+		unp2 = sotounpcb(so2);
+		unp3 = sotounpcb(so3);
+		if (unp2->unp_addr)
+			unp3->unp_addr =
+				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+		so2 = so3;
+	}
+	error = unp_connect2(so, so2);
+bad:
+	vput(vp);
+	return (error);
+}
+
+int
+unp_connect2(so, so2)
+	register struct socket *so;
+	register struct socket *so2;
+{
+	register struct unpcb *unp = sotounpcb(so);
+	register struct unpcb *unp2;
+
+	if (so2->so_type != so->so_type)
+		return (EPROTOTYPE);
+	unp2 = sotounpcb(so2);
+	unp->unp_conn = unp2;
+	switch (so->so_type) {
+
+	case SOCK_DGRAM:
+		unp->unp_nextref = unp2->unp_refs;
+		unp2->unp_refs = unp;
+		soisconnected(so);
+		break;
+
+	case SOCK_STREAM:
+		unp2->unp_conn = unp;
+		soisconnected(so);
+		soisconnected(so2);
+		break;
+
+	default:
+		panic("unp_connect2");
+	}
+	return (0);
+}
+
+void
+unp_disconnect(unp)
+	struct unpcb *unp;
+{
+	register struct unpcb *unp2 = unp->unp_conn;
+
+	if (unp2 == 0)
+		return;
+	unp->unp_conn = 0;
+	switch (unp->unp_socket->so_type) {
+
+	case SOCK_DGRAM:
+		if (unp2->unp_refs == unp)
+			unp2->unp_refs = unp->unp_nextref;
+		else {
+			unp2 = unp2->unp_refs;
+			for (;;) {
+				if (unp2 == 0)
+					panic("unp_disconnect");
+				if (unp2->unp_nextref == unp)
+					break;
+				unp2 = unp2->unp_nextref;
+			}
+			unp2->unp_nextref = unp->unp_nextref;
+		}
+		unp->unp_nextref = 0;
+		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
+		break;
+
+	case SOCK_STREAM:
+		soisdisconnected(unp->unp_socket);
+		unp2->unp_conn = 0;
+		soisdisconnected(unp2->unp_socket);
+		break;
+	}
+}
+
+#ifdef notdef
+void
+unp_abort(unp)
+	struct unpcb *unp;
+{
+
+	unp_detach(unp);
+}
+#endif
+
+void
+unp_shutdown(unp)
+	struct unpcb *unp;
+{
+	struct socket *so;
+
+	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
+	    (so = unp->unp_conn->unp_socket))
+		socantrcvmore(so);
+}
+
+void
+unp_drop(unp, errno)
+	struct unpcb *unp;
+	int errno;
+{
+	struct socket *so = unp->unp_socket;
+
+	so->so_error = errno;
+	unp_disconnect(unp);
+	if (so->so_head) {
+		so->so_pcb = (caddr_t) 0;
+		m_freem(unp->unp_addr);
+		(void) m_free(dtom(unp));
+		sofree(so);
+	}
+}
+
+#ifdef notdef
+unp_drain()
+{
+
+}
+#endif
+
+int
+unp_externalize(rights)
+	struct mbuf *rights;
+{
+	struct proc *p = curproc;		/* XXX */
+	register int i;
+	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
+	register struct file **rp = (struct file **)(cm + 1);
+	register struct file *fp;
+	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
+	int f;
+
+	if (!fdavail(p, newfds)) {
+		for (i = 0; i < newfds; i++) {
+			fp = *rp;
+			unp_discard(fp);
+			*rp++ = 0;
+		}
+		return (EMSGSIZE);
+	}
+	for (i = 0; i < newfds; i++) {
+		if (fdalloc(p, 0, &f))
+			panic("unp_externalize");
+		fp = *rp;
+		p->p_fd->fd_ofiles[f] = fp;
+		fp->f_msgcount--;
+		unp_rights--;
+		*(int *)rp++ = f;
+	}
+	return (0);
+}
+
+int
+unp_internalize(control, p)
+	struct mbuf *control;
+	struct proc *p;
+{
+	struct filedesc *fdp = p->p_fd;
+	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+	register struct file **rp;
+	register struct file *fp;
+	register int i, fd;
+	int oldfds;
+
+	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
+	    cm->cmsg_len != control->m_len)
+		return (EINVAL);
+	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
+	rp = (struct file **)(cm + 1);
+	for (i = 0; i < oldfds; i++) {
+		fd = *(int *)rp++;
+		if ((unsigned)fd >= fdp->fd_nfiles ||
+		    fdp->fd_ofiles[fd] == NULL)
+			return (EBADF);
+	}
+	rp = (struct file **)(cm + 1);
+	for (i = 0; i < oldfds; i++) {
+		fp = fdp->fd_ofiles[*(int *)rp];
+		*rp++ = fp;
+		fp->f_count++;
+		fp->f_msgcount++;
+		unp_rights++;
+	}
+	return (0);
+}
+
+int	unp_defer, unp_gcing;
+extern	struct domain unixdomain;
+
+void
+unp_gc()
+{
+	register struct file *fp, *nextfp;
+	register struct socket *so;
+	struct file **extra_ref, **fpp;
+	int nunref, i;
+
+	if (unp_gcing)
+		return;
+	unp_gcing = 1;
+	unp_defer = 0;
+	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
+		fp->f_flag &= ~(FMARK|FDEFER);
+	do {
+		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
+			if (fp->f_count == 0)
+				continue;
+			if (fp->f_flag & FDEFER) {
+				fp->f_flag &= ~FDEFER;
+				unp_defer--;
+			} else {
+				if (fp->f_flag & FMARK)
+					continue;
+				if (fp->f_count == fp->f_msgcount)
+					continue;
+				fp->f_flag |= FMARK;
+			}
+			if (fp->f_type != DTYPE_SOCKET ||
+			    (so = (struct socket *)fp->f_data) == 0)
+				continue;
+			if (so->so_proto->pr_domain != &unixdomain ||
+			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
+				continue;
+#ifdef notdef
+			if (so->so_rcv.sb_flags & SB_LOCK) {
+				/*
+				 * This is problematical; it's not clear
+				 * we need to wait for the sockbuf to be
+				 * unlocked (on a uniprocessor, at least),
+				 * and it's also not clear what to do
+				 * if sbwait returns an error due to receipt
+				 * of a signal.  If sbwait does return
+				 * an error, we'll go into an infinite
+				 * loop.  Delete all of this for now.
+				 */
+				(void) sbwait(&so->so_rcv);
+				goto restart;
+			}
+#endif
+			unp_scan(so->so_rcv.sb_mb, unp_mark);
+		}
+	} while (unp_defer);
+	/*
+	 * We grab an extra reference to each of the file table entries
+	 * that are not otherwise accessible and then free the rights
+	 * that are stored in messages on them.
+	 *
+	 * The bug in the orginal code is a little tricky, so I'll describe
+	 * what's wrong with it here.
+	 *
+	 * It is incorrect to simply unp_discard each entry for f_msgcount
+	 * times -- consider the case of sockets A and B that contain
+	 * references to each other.  On a last close of some other socket,
+	 * we trigger a gc since the number of outstanding rights (unp_rights)
+	 * is non-zero.  If during the sweep phase the gc code un_discards,
+	 * we end up doing a (full) closef on the descriptor.  A closef on A
+	 * results in the following chain.  Closef calls soo_close, which
+	 * calls soclose.   Soclose calls first (through the switch
+	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
+	 * returns because the previous instance had set unp_gcing, and
+	 * we return all the way back to soclose, which marks the socket
+	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
+	 * to free up the rights that are queued in messages on the socket A,
+	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
+	 * switch unp_dispose, which unp_scans with unp_discard.  This second
+	 * instance of unp_discard just calls closef on B.
+	 *
+	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
+	 * which results in another closef on A.  Unfortunately, A is already
+	 * being closed, and the descriptor has already been marked with
+	 * SS_NOFDREF, and soclose panics at this point.
+	 *
+	 * Here, we first take an extra reference to each inaccessible
+	 * descriptor.  Then, we call sorflush ourself, since we know
+	 * it is a Unix domain socket anyhow.  After we destroy all the
+	 * rights carried in messages, we do a last closef to get rid
+	 * of our extra reference.  This is the last close, and the
+	 * unp_detach etc will shut down the socket.
+	 *
+	 * 91/09/19, bsy@cs.cmu.edu
+	 */
+	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
+	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
+	    fp = nextfp) {
+		nextfp = fp->f_list.le_next;
+		if (fp->f_count == 0)
+			continue;
+		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
+			*fpp++ = fp;
+			nunref++;
+			fp->f_count++;
+		}
+	}
+	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+		sorflush((struct socket *)(*fpp)->f_data);
+	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+		closef(*fpp, (struct proc *)NULL);
+	free((caddr_t)extra_ref, M_FILE);
+	unp_gcing = 0;
+}
+
+void
+unp_dispose(m)
+	struct mbuf *m;
+{
+
+	if (m)
+		unp_scan(m, unp_discard);
+}
+
+void
+unp_scan(m0, op)
+	register struct mbuf *m0;
+	void (*op) __P((struct file *));
+{
+	register struct mbuf *m;
+	register struct file **rp;
+	register struct cmsghdr *cm;
+	register int i;
+	int qfds;
+
+	while (m0) {
+		for (m = m0; m; m = m->m_next)
+			if (m->m_type == MT_CONTROL &&
+			    m->m_len >= sizeof(*cm)) {
+				cm = mtod(m, struct cmsghdr *);
+				if (cm->cmsg_level != SOL_SOCKET ||
+				    cm->cmsg_type != SCM_RIGHTS)
+					continue;
+				qfds = (cm->cmsg_len - sizeof *cm)
+						/ sizeof (struct file *);
+				rp = (struct file **)(cm + 1);
+				for (i = 0; i < qfds; i++)
+					(*op)(*rp++);
+				break;		/* XXX, but saves time */
+			}
+		m0 = m0->m_act;
+	}
+}
+
+void
+unp_mark(fp)
+	struct file *fp;
+{
+
+	if (fp->f_flag & FMARK)
+		return;
+	unp_defer++;
+	fp->f_flag |= (FMARK|FDEFER);
+}
+
+void
+unp_discard(fp)
+	struct file *fp;
+{
+
+	fp->f_msgcount--;
+	unp_rights--;
+	(void) closef(fp, (struct proc *)NULL);
+}
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
new file mode 100644
index 000000000000..c20966bf7775
--- /dev/null
+++ b/sys/kern/vfs_cache.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Poul-Henning Kamp of the FreeBSD Project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: vfs_cache.c,v 1.11 1995/03/12 02:01:20 phk Exp $
+ *
+ *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Name caching works as follows:
+ *
+ * Names found by directory scans are retained in a cache
+ * for future reference.  It is managed LRU, so frequently
+ * used names will hang around.  Cache is indexed by hash value
+ * obtained from (vp, name) where vp refers to the directory
+ * containing name.
+ *
+ * If it is a "negative" entry, (i.e. for a name that is known NOT to
+ * exist) the vnode pointer will be NULL.
+ *
+ * For simplicity (and economy of storage), names longer than
+ * a maximum length of NCHNAMLEN are not cached; they occur
+ * infrequently in any case, and are almost never of interest.
+ *
+ * Upon reaching the last segment of a path, if the reference
+ * is for DELETE, or NOCACHE is set (rewrite), and the
+ * name is located in the cache, it will be dropped.
+ */
+
+/*
+ * Structures associated with name cacheing.
+ */
+#define NCHHASH(dvp, cnp) \
+	(&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) & nchash])
+LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
+u_long	nchash;				/* size of hash table - 1 */
+long	numcache;			/* number of cache entries allocated */
+TAILQ_HEAD(, namecache) nclruhead;	/* LRU chain */
+struct	nchstats nchstats;		/* cache effectiveness statistics */
+
+int doingcache = 1;			/* 1 => enable the cache */
+
+/*
+ * Delete an entry from its hash list and move it to the front
+ * of the LRU list for immediate reuse.
+ */
+#define PURGE(ncp)  {						\
+	LIST_REMOVE(ncp, nc_hash);				\
+	ncp->nc_hash.le_prev = 0;				\
+	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);			\
+	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);		\
+}
+
+/*
+ * Move an entry that has been used to the tail of the LRU list
+ * so that it will be preserved for future use.
+ */
+#define TOUCH(ncp)  {						\
+	if (ncp->nc_lru.tqe_next != 0) {			\
+		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);		\
+		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);	\
+	}							\
+}
+
+/*
+ * Lookup an entry in the cache 
+ *
+ * We don't do this if the segment name is long, simply so the cache 
+ * can avoid holding long names (which would either waste space, or
+ * add greatly to the complexity).
+ *
+ * Lookup is called with dvp pointing to the directory to search,
+ * cnp pointing to the name of the entry being sought. If the lookup
+ * succeeds, the vnode is returned in *vpp, and a status of -1 is
+ * returned. If the lookup determines that the name does not exist
+ * (negative cacheing), a status of ENOENT is returned. If the lookup
+ * fails, a status of zero is returned.
+ */
+
+int
+cache_lookup(dvp, vpp, cnp)
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	register struct namecache *ncp, *nnp;
+	register struct nchashhead *ncpp;
+
+	if (!doingcache) {
+		cnp->cn_flags &= ~MAKEENTRY;
+		return (0);
+	}
+	if (cnp->cn_namelen > NCHNAMLEN) {
+		nchstats.ncs_long++;
+		cnp->cn_flags &= ~MAKEENTRY;
+		return (0);
+	}
+
+	ncpp = NCHHASH(dvp, cnp);
+	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
+		nnp = ncp->nc_hash.le_next;
+		/* If one of the vp's went stale, don't bother anymore. */
+		if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) ||
+		    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
+			nchstats.ncs_falsehits++;
+			PURGE(ncp);
+			continue;
+		}
+		/* Now that we know the vp's to be valid, is it ours ? */
+		if (ncp->nc_dvp == dvp &&
+		    ncp->nc_nlen == cnp->cn_namelen &&
+		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
+			break;
+	}
+
+	/* We failed to find an entry */
+	if (ncp == 0) {
+		nchstats.ncs_miss++;
+		return (0);
+	}
+
+	/* We don't want to have an entry, so dump it */
+	if ((cnp->cn_flags & MAKEENTRY) == 0) {
+		nchstats.ncs_badhits++;
+		PURGE(ncp);
+		return (0);
+	} 
+
+	/* We found a "positive" match, return the vnode */
+        if (ncp->nc_vp) {
+		nchstats.ncs_goodhits++;
+		TOUCH(ncp);
+		*vpp = ncp->nc_vp;
+		return (-1);
+	}
+
+	/* We found a negative match, and want to create it, so purge */
+	if (cnp->cn_nameiop == CREATE) {
+		nchstats.ncs_badhits++;
+		PURGE(ncp);
+		return (0);
+	}
+
+	/*
+	 * We found a "negative" match, ENOENT notifies client of this match.
+	 * The nc_vpid field records whether this is a whiteout.
+	 */
+	nchstats.ncs_neghits++;
+	TOUCH(ncp);
+	cnp->cn_flags |= ncp->nc_vpid;
+	return (ENOENT);
+}
+
+/*
+ * Add an entry to the cache.
+ */
+void
+cache_enter(dvp, vp, cnp)
+	struct vnode *dvp;
+	struct vnode *vp;
+	struct componentname *cnp;
+{
+	register struct namecache *ncp;
+	register struct nchashhead *ncpp;
+
+	if (!doingcache)
+		return;
+
+#ifdef DIAGNOSTIC
+	if (cnp->cn_namelen > NCHNAMLEN)
+		panic("cache_enter: name too long");
+#endif
+
+	/*
+	 * We allocate a new entry if we are less than the maximum
+	 * allowed and the one at the front of the LRU list is in use.
+	 * Otherwise we use the one at the front of the LRU list.
+	 */
+	if (numcache < desiredvnodes &&
+	    ((ncp = nclruhead.tqh_first) == NULL ||
+	    ncp->nc_hash.le_prev != 0)) {
+		/* Add one more entry */
+		ncp = (struct namecache *)
+			malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
+		bzero((char *)ncp, sizeof *ncp);
+		numcache++;
+	} else if (ncp = nclruhead.tqh_first) {
+		/* reuse an old entry */
+		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+		if (ncp->nc_hash.le_prev != 0) {
+			LIST_REMOVE(ncp, nc_hash);
+			ncp->nc_hash.le_prev = 0;
+		}
+	} else {
+		/* give up */
+		return;
+	}
+
+	/*
+	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
+	 * For negative entries, we have to record whether it is a whiteout.
+	 * the whiteout flag is stored in the nc_vpid field which is
+	 * otherwise unused.
+	 */
+	ncp->nc_vp = vp;
+	if (vp)
+		ncp->nc_vpid = vp->v_id;
+	else
+		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
+	ncp->nc_dvp = dvp;
+	ncp->nc_dvpid = dvp->v_id;
+	ncp->nc_nlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
+	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+	ncpp = NCHHASH(dvp, cnp);
+	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
+}
+
+/*
+ * Name cache initialization, from vfs_init() when we are booting
+ */
+void
+nchinit()
+{
+
+	TAILQ_INIT(&nclruhead);
+	nchashtbl = hashinit(desiredvnodes, M_CACHE, &nchash);
+}
+
+/*
+ * Invalidate a all entries to particular vnode.
+ * 
+ * We actually just increment the v_id, that will do it. The entries will
+ * be purged by lookup as they get found. If the v_id wraps around, we
+ * need to ditch the entire cache, to avoid confusion. No valid vnode will
+ * ever have (v_id == 0).
+ */
+void
+cache_purge(vp)
+	struct vnode *vp;
+{
+	struct namecache *ncp;
+	struct nchashhead *ncpp;
+
+	vp->v_id = ++nextvnodeid;
+	if (nextvnodeid != 0)
+		return;
+	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
+		while (ncp = ncpp->lh_first)
+			PURGE(ncp);
+	}
+	vp->v_id = ++nextvnodeid;
+}
+
+/*
+ * Flush all entries referencing a particular filesystem.
+ *
+ * Since we need to check it anyway, we will flush all the invalid
+ * entriess at the same time.
+ */
+void
+cache_purgevfs(mp)
+	struct mount *mp;
+{
+	struct nchashhead *ncpp;
+	struct namecache *ncp, *nnp;
+
+	/* Scan hash tables for applicable entries */
+	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
+		for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
+			nnp = ncp->nc_hash.le_next;
+			if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
+			    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
+			    ncp->nc_dvp->v_mount == mp) {
+				PURGE(ncp);
+			}
+		}
+	}
+}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
new file mode 100644
index 000000000000..e01d24f099b1
--- /dev/null
+++ b/sys/kern/vfs_cluster.c
@@ -0,0 +1,756 @@
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_cluster.c	8.10 (Berkeley) 3/28/95
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <libkern/libkern.h>
+
+/*
+ * Local declarations
+ */
+struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
+	    daddr_t, long, int));
+struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
+	    daddr_t, daddr_t, long, int, long));
+void	    cluster_wbuild __P((struct vnode *, struct buf *, long,
+	    daddr_t, int, daddr_t));
+struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));
+
+#ifdef DIAGNOSTIC
+/*
+ * Set to 1 if reads of block zero should cause readahead to be done.
+ * Set to 0 treats a read of block zero as a non-sequential read.
+ *
+ * Setting to one assumes that most reads of block zero of files are due to
+ * sequential passes over the files (e.g. cat, sum) where additional blocks
+ * will soon be needed.  Setting to zero assumes that the majority are
+ * surgical strikes to get particular info (e.g. size, file) where readahead
+ * blocks will not be used and, in fact, push out other potentially useful
+ * blocks from the cache.  The former seems intuitive, but some quick tests
+ * showed that the latter performed better from a system-wide point of view.
+ */
+int	doclusterraz = 0;
+#define ISSEQREAD(vp, blk) \
+	(((blk) != 0 || doclusterraz) && \
+	 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#else
+#define ISSEQREAD(vp, blk) \
+	((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#endif
+
+/*
+ * This replaces bread.  If this is a bread at the beginning of a file and
+ * lastr is 0, we assume this is the first read and we'll read up to two
+ * blocks if they are sequential.  After that, we'll do regular read ahead
+ * in clustered chunks.
+ *
+ * There are 4 or 5 cases depending on how you count:
+ *	Desired block is in the cache:
+ *	    1 Not sequential access (0 I/Os).
+ *	    2 Access is sequential, do read-ahead (1 ASYNC).
+ *	Desired block is not in cache:
+ *	    3 Not sequential access (1 SYNC).
+ *	    4 Sequential access, next block is contiguous (1 SYNC).
+ *	    5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
+ *
+ * There are potentially two buffers that require I/O.
+ * 	bp is the block requested.
+ *	rbp is the read-ahead block.
+ *	If either is NULL, then you don't have to do the I/O.
+ */
+cluster_read(vp, filesize, lblkno, size, cred, bpp)
+	struct vnode *vp;
+	u_quad_t filesize;
+	daddr_t lblkno;
+	long size;
+	struct ucred *cred;
+	struct buf **bpp;
+{
+	struct buf *bp, *rbp;
+	daddr_t blkno, ioblkno;
+	long flags;
+	int error, num_ra, alreadyincore;
+
+#ifdef DIAGNOSTIC
+	if (size == 0)
+		panic("cluster_read: size = 0");
+#endif
+
+	error = 0;
+	flags = B_READ;
+	*bpp = bp = getblk(vp, lblkno, size, 0, 0);
+	if (bp->b_flags & B_CACHE) {
+		/*
+		 * Desired block is in cache; do any readahead ASYNC.
+		 * Case 1, 2.
+		 */
+		trace(TR_BREADHIT, pack(vp, size), lblkno);
+		flags |= B_ASYNC;
+		ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1);
+		alreadyincore = incore(vp, ioblkno) != NULL;
+		bp = NULL;
+	} else {
+		/* Block wasn't in cache, case 3, 4, 5. */
+		trace(TR_BREADMISS, pack(vp, size), lblkno);
+		bp->b_flags |= B_READ;
+		ioblkno = lblkno;
+		alreadyincore = 0;
+		curproc->p_stats->p_ru.ru_inblock++;		/* XXX */
+	}
+	/*
+	 * XXX
+	 * Replace 1 with a window size based on some permutation of
+	 * maxcontig and rot_delay.  This will let you figure out how
+	 * many blocks you should read-ahead (case 2, 4, 5).
+	 *
+	 * If the access isn't sequential, reset the window to 1.
+	 * Note that a read to the same block is considered sequential.
+	 * This catches the case where the file is being read sequentially,
+	 * but at smaller than the filesystem block size.
+	 */
+	rbp = NULL;
+	if (!ISSEQREAD(vp, lblkno)) {
+		vp->v_ralen = 0;
+		vp->v_maxra = lblkno;
+	} else if ((ioblkno + 1) * size <= filesize && !alreadyincore &&
+	    !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) &&
+	    blkno != -1) {
+		/*
+		 * Reading sequentially, and the next block is not in the
+		 * cache.  We are going to try reading ahead.
+		 */
+		if (num_ra) {
+			/*
+			 * If our desired readahead block had been read
+			 * in a previous readahead but is no longer in
+			 * core, then we may be reading ahead too far
+			 * or are not using our readahead very rapidly.
+			 * In this case we scale back the window.
+			 */
+			if (!alreadyincore && ioblkno <= vp->v_maxra)
+				vp->v_ralen = max(vp->v_ralen >> 1, 1);
+			/*
+			 * There are more sequential blocks than our current
+			 * window allows, scale up.  Ideally we want to get
+			 * in sync with the filesystem maxcontig value.
+			 */
+			else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr)
+				vp->v_ralen = vp->v_ralen ?
+					min(num_ra, vp->v_ralen << 1) : 1;
+
+			if (num_ra > vp->v_ralen)
+				num_ra = vp->v_ralen;
+		}
+
+		if (num_ra)				/* case 2, 4 */
+			rbp = cluster_rbuild(vp, filesize,
+			    bp, ioblkno, blkno, size, num_ra, flags);
+		else if (ioblkno == lblkno) {
+			bp->b_blkno = blkno;
+			/* Case 5: check how many blocks to read ahead */
+			++ioblkno;
+			if ((ioblkno + 1) * size > filesize ||
+			    incore(vp, ioblkno) || (error = VOP_BMAP(vp,
+			     ioblkno, NULL, &blkno, &num_ra)) || blkno == -1)
+				goto skip_readahead;
+			/*
+			 * Adjust readahead as above.
+			 * Don't check alreadyincore, we know it is 0 from
+			 * the previous conditional.
+			 */
+			if (num_ra) {
+				if (ioblkno <= vp->v_maxra)
+					vp->v_ralen = max(vp->v_ralen >> 1, 1);
+				else if (num_ra > vp->v_ralen &&
+					 lblkno != vp->v_lastr)
+					vp->v_ralen = vp->v_ralen ?
+						min(num_ra,vp->v_ralen<<1) : 1;
+				if (num_ra > vp->v_ralen)
+					num_ra = vp->v_ralen;
+			}
+			flags |= B_ASYNC;
+			if (num_ra)
+				rbp = cluster_rbuild(vp, filesize,
+				    NULL, ioblkno, blkno, size, num_ra, flags);
+			else {
+				rbp = getblk(vp, ioblkno, size, 0, 0);
+				rbp->b_flags |= flags;
+				rbp->b_blkno = blkno;
+			}
+		} else {
+			/* case 2; read ahead single block */
+			rbp = getblk(vp, ioblkno, size, 0, 0);
+			rbp->b_flags |= flags;
+			rbp->b_blkno = blkno;
+		}
+
+		if (rbp == bp)			/* case 4 */
+			rbp = NULL;
+		else if (rbp) {			/* case 2, 5 */
+			trace(TR_BREADMISSRA,
+			    pack(vp, (num_ra + 1) * size), ioblkno);
+			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+		}
+	}
+
+	/* XXX Kirk, do we need to make sure the bp has creds? */
+skip_readahead:
+	if (bp)
+		if (bp->b_flags & (B_DONE | B_DELWRI))
+			panic("cluster_read: DONE bp");
+		else 
+			error = VOP_STRATEGY(bp);
+
+	if (rbp)
+		if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
+			rbp->b_flags &= ~(B_ASYNC | B_READ);
+			brelse(rbp);
+		} else
+			(void) VOP_STRATEGY(rbp);
+
+	/*
+	 * Recalculate our maximum readahead
+	 */
+	if (rbp == NULL)
+		rbp = bp;
+	if (rbp)
+		vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1;
+
+	if (bp)
+		return(biowait(bp));
+	return(error);
+}
+
+/*
+ * If blocks are contiguous on disk, use this to provide clustered
+ * read ahead.  We will read as many blocks as possible sequentially
+ * and then parcel them up into logical blocks in the buffer hash table.
+ */
+struct buf *
+cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
+	struct vnode *vp;
+	u_quad_t filesize;
+	struct buf *bp;
+	daddr_t lbn;
+	daddr_t blkno;
+	long size;
+	int run;
+	long flags;
+{
+	struct cluster_save *b_save;
+	struct buf *tbp;
+	daddr_t bn;
+	int i, inc;
+
+#ifdef DIAGNOSTIC
+	if (size != vp->v_mount->mnt_stat.f_iosize)
+		panic("cluster_rbuild: size %d != filesize %d\n",
+			size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+	if (size * (lbn + run + 1) > filesize)
+		--run;
+	if (run == 0) {
+		if (!bp) {
+			bp = getblk(vp, lbn, size, 0, 0);
+			bp->b_blkno = blkno;
+			bp->b_flags |= flags;
+		}
+		return(bp);
+	}
+
+	bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
+	if (bp->b_flags & (B_DONE | B_DELWRI))
+		return (bp);
+
+	b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
+	    M_SEGMENT, M_WAITOK);
+	b_save->bs_bufsize = b_save->bs_bcount = size;
+	b_save->bs_nchildren = 0;
+	b_save->bs_children = (struct buf **)(b_save + 1);
+	b_save->bs_saveaddr = bp->b_saveaddr;
+	bp->b_saveaddr = (caddr_t) b_save;
+
+	inc = btodb(size);
+	for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
+		/*
+		 * A component of the cluster is already in core,
+		 * terminate the cluster early.
+		 */
+		if (incore(vp, lbn + i))
+			break;
+		tbp = getblk(vp, lbn + i, 0, 0, 0);
+		/*
+		 * getblk may return some memory in the buffer if there were
+		 * no empty buffers to shed it to.  If there is currently
+		 * memory in the buffer, we move it down size bytes to make
+		 * room for the valid pages that cluster_callback will insert.
+		 * We do this now so we don't have to do it at interrupt time
+		 * in the callback routine.
+		 */
+		if (tbp->b_bufsize != 0) {
+			caddr_t bdata = (char *)tbp->b_data;
+
+			/*
+			 * No room in the buffer to add another page,
+			 * terminate the cluster early.
+			 */
+			if (tbp->b_bufsize + size > MAXBSIZE) {
+#ifdef DIAGNOSTIC
+				if (tbp->b_bufsize != MAXBSIZE)
+					panic("cluster_rbuild: too much memory");
+#endif
+				brelse(tbp);
+				break;
+			}
+			if (tbp->b_bufsize > size) {
+				/*
+				 * XXX if the source and destination regions
+				 * overlap we have to copy backward to avoid
+				 * clobbering any valid pages (i.e. pagemove
+				 * implementations typically can't handle
+				 * overlap).
+				 */
+				bdata += tbp->b_bufsize;
+				while (bdata > (char *)tbp->b_data) {
+					bdata -= CLBYTES;
+					pagemove(bdata, bdata + size, CLBYTES);
+				}
+			} else 
+				pagemove(bdata, bdata + size, tbp->b_bufsize);
+		}
+		tbp->b_blkno = bn;
+		tbp->b_flags |= flags | B_READ | B_ASYNC;
+		++b_save->bs_nchildren;
+		b_save->bs_children[i - 1] = tbp;
+	}
+	/*
+	 * The cluster may have been terminated early, adjust the cluster
+	 * buffer size accordingly.  If no cluster could be formed,
+	 * deallocate the cluster save info.
+	 */
+	if (i <= run) {
+		if (i == 1) {
+			bp->b_saveaddr = b_save->bs_saveaddr;
+			bp->b_flags &= ~B_CALL;
+			bp->b_iodone = NULL;
+			free(b_save, M_SEGMENT);
+		}
+		allocbuf(bp, size * i);
+	}
+	return(bp);
+}
+
+/*
+ * Either get a new buffer or grow the existing one.
+ */
+struct buf *
+cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
+	struct vnode *vp;
+	struct buf *bp;
+	long flags;
+	daddr_t blkno;
+	daddr_t lblkno;
+	long size;
+	int run;
+{
+	if (!bp) {
+		bp = getblk(vp, lblkno, size, 0, 0);
+		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+			bp->b_blkno = blkno;
+			return(bp);
+		}
+	}
+	allocbuf(bp, run * size);
+	bp->b_blkno = blkno;
+	bp->b_iodone = cluster_callback;
+	bp->b_flags |= flags | B_CALL;
+	return(bp);
+}
+
+/*
+ * Cleanup after a clustered read or write.
+ * This is complicated by the fact that any of the buffers might have
+ * extra memory (if there were no empty buffer headers at allocbuf time)
+ * that we will need to shift around.
+ */
+void
+cluster_callback(bp)
+	struct buf *bp;
+{
+	struct cluster_save *b_save;
+	struct buf **bpp, *tbp;
+	long bsize;
+	caddr_t cp;
+	int error = 0;
+
+	/*
+	 * Must propogate errors to all the components.
+	 */
+	if (bp->b_flags & B_ERROR)
+		error = bp->b_error;
+
+	b_save = (struct cluster_save *)(bp->b_saveaddr);
+	bp->b_saveaddr = b_save->bs_saveaddr;
+
+	bsize = b_save->bs_bufsize;
+	cp = (char *)bp->b_data + bsize;
+	/*
+	 * Move memory from the large cluster buffer into the component
+	 * buffers and mark IO as done on these.
+	 */
+	for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) {
+		tbp = *bpp;
+		pagemove(cp, tbp->b_data, bsize);
+		tbp->b_bufsize += bsize;
+		tbp->b_bcount = bsize;
+		if (error) {
+			tbp->b_flags |= B_ERROR;
+			tbp->b_error = error;
+		}
+		biodone(tbp);
+		bp->b_bufsize -= bsize;
+		cp += bsize;
+	}
+	/*
+	 * If there was excess memory in the cluster buffer,
+	 * slide it up adjacent to the remaining valid data.
+	 */
+	if (bp->b_bufsize != bsize) {
+		if (bp->b_bufsize < bsize)
+			panic("cluster_callback: too little memory");
+		pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize);
+	}
+	bp->b_bcount = bsize;
+	bp->b_iodone = NULL;
+	free(b_save, M_SEGMENT);
+	if (bp->b_flags & B_ASYNC)
+		brelse(bp);
+	else {
+		bp->b_flags &= ~B_WANTED;
+		wakeup((caddr_t)bp);
+	}
+}
+
+/*
+ * Do clustered write for FFS.
+ *
+ * Three cases:
+ *	1. Write is not sequential (write asynchronously)
+ *	Write is sequential:
+ *	2.	beginning of cluster - begin cluster
+ *	3.	middle of a cluster - add to cluster
+ *	4.	end of a cluster - asynchronously write cluster
+ */
+void
+cluster_write(bp, filesize)
+        struct buf *bp;
+	u_quad_t filesize;
+{
+        struct vnode *vp;
+        daddr_t lbn;
+        int maxclen, cursize;
+
+        vp = bp->b_vp;
+        lbn = bp->b_lblkno;
+
+	/* Initialize vnode to beginning of file. */
+	if (lbn == 0)
+		vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
+
+        if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
+	    (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) {
+		maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
+		if (vp->v_clen != 0) {
+			/*
+			 * Next block is not sequential.
+			 *
+			 * If we are not writing at end of file, the process
+			 * seeked to another point in the file since its
+			 * last write, or we have reached our maximum
+			 * cluster size, then push the previous cluster.
+			 * Otherwise try reallocating to make it sequential.
+			 */
+			cursize = vp->v_lastw - vp->v_cstart + 1;
+			if ((lbn + 1) * bp->b_bcount != filesize ||
+			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
+				cluster_wbuild(vp, NULL, bp->b_bcount,
+				    vp->v_cstart, cursize, lbn);
+			} else {
+				struct buf **bpp, **endbp;
+				struct cluster_save *buflist;
+
+				buflist = cluster_collectbufs(vp, bp);
+				endbp = &buflist->bs_children
+				    [buflist->bs_nchildren - 1];
+				if (VOP_REALLOCBLKS(vp, buflist)) {
+					/*
+					 * Failed, push the previous cluster.
+					 */
+					for (bpp = buflist->bs_children;
+					     bpp < endbp; bpp++)
+						brelse(*bpp);
+					free(buflist, M_SEGMENT);
+					cluster_wbuild(vp, NULL, bp->b_bcount,
+					    vp->v_cstart, cursize, lbn);
+				} else {
+					/*
+					 * Succeeded, keep building cluster.
+					 */
+					for (bpp = buflist->bs_children;
+					     bpp <= endbp; bpp++)
+						bdwrite(*bpp);
+					free(buflist, M_SEGMENT);
+					vp->v_lastw = lbn;
+					vp->v_lasta = bp->b_blkno;
+					return;
+				}
+			}
+		}
+		/*
+		 * Consider beginning a cluster.
+		 * If at end of file, make cluster as large as possible,
+		 * otherwise find size of existing cluster.
+		 */
+		if ((lbn + 1) * bp->b_bcount != filesize &&
+		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
+		     bp->b_blkno == -1)) {
+			bawrite(bp);
+			vp->v_clen = 0;
+			vp->v_lasta = bp->b_blkno;
+			vp->v_cstart = lbn + 1;
+			vp->v_lastw = lbn;
+			return;
+		}
+                vp->v_clen = maxclen;
+                if (maxclen == 0) {		/* I/O not contiguous */
+			vp->v_cstart = lbn + 1;
+                        bawrite(bp);
+                } else {			/* Wait for rest of cluster */
+			vp->v_cstart = lbn;
+                        bdwrite(bp);
+		}
+	} else if (lbn == vp->v_cstart + vp->v_clen) {
+		/*
+		 * At end of cluster, write it out.
+		 */
+		cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
+		    vp->v_clen + 1, lbn);
+		vp->v_clen = 0;
+		vp->v_cstart = lbn + 1;
+	} else
+		/*
+		 * In the middle of a cluster, so just delay the
+		 * I/O for now.
+		 */
+		bdwrite(bp);
+	vp->v_lastw = lbn;
+	vp->v_lasta = bp->b_blkno;
+}
+
+
+/*
+ * This is an awful lot like cluster_rbuild...wish they could be combined.
+ * The last lbn argument is the current block on which I/O is being
+ * performed.  Check to see that it doesn't fall in the middle of
+ * the current block (if last_bp == NULL).
+ */
+void
+cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
+	struct vnode *vp;
+	struct buf *last_bp;
+	long size;
+	daddr_t start_lbn;
+	int len;
+	daddr_t	lbn;
+{
+	struct cluster_save *b_save;
+	struct buf *bp, *tbp;
+	caddr_t	cp;
+	int i, s;
+
+#ifdef DIAGNOSTIC
+	if (size != vp->v_mount->mnt_stat.f_iosize)
+		panic("cluster_wbuild: size %d != filesize %d\n",
+			size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+redo:
+	while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
+		++start_lbn;
+		--len;
+	}
+
+	/* Get more memory for current buffer */
+	if (len <= 1) {
+		if (last_bp) {
+			bawrite(last_bp);
+		} else if (len) {
+			bp = getblk(vp, start_lbn, size, 0, 0);
+			bawrite(bp);
+		}
+		return;
+	}
+
+	bp = getblk(vp, start_lbn, size, 0, 0);
+	if (!(bp->b_flags & B_DELWRI)) {
+		++start_lbn;
+		--len;
+		brelse(bp);
+		goto redo;
+	}
+
+	/*
+	 * Extra memory in the buffer, punt on this buffer.
+	 * XXX we could handle this in most cases, but we would have to
+	 * push the extra memory down to after our max possible cluster
+	 * size and then potentially pull it back up if the cluster was
+	 * terminated prematurely--too much hassle.
+	 */
+	if (bp->b_bcount != bp->b_bufsize) {
+		++start_lbn;
+		--len;
+		bawrite(bp);
+		goto redo;
+	}
+
+	--len;
+	b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
+	    M_SEGMENT, M_WAITOK);
+	b_save->bs_bcount = bp->b_bcount;
+	b_save->bs_bufsize = bp->b_bufsize;
+	b_save->bs_nchildren = 0;
+	b_save->bs_children = (struct buf **)(b_save + 1);
+	b_save->bs_saveaddr = bp->b_saveaddr;
+	bp->b_saveaddr = (caddr_t) b_save;
+
+	bp->b_flags |= B_CALL;
+	bp->b_iodone = cluster_callback;
+	cp = (char *)bp->b_data + size;
+	for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
+		/*
+		 * Block is not in core or the non-sequential block
+		 * ending our cluster was part of the cluster (in which
+		 * case we don't want to write it twice).
+		 */
+		if (!incore(vp, start_lbn) ||
+		    last_bp == NULL && start_lbn == lbn)
+			break;
+
+		/*
+		 * Get the desired block buffer (unless it is the final
+		 * sequential block whose buffer was passed in explictly
+		 * as last_bp).
+		 */
+		if (last_bp == NULL || start_lbn != lbn) {
+			tbp = getblk(vp, start_lbn, size, 0, 0);
+			if (!(tbp->b_flags & B_DELWRI)) {
+				brelse(tbp);
+				break;
+			}
+		} else
+			tbp = last_bp;
+
+		++b_save->bs_nchildren;
+
+		/* Move memory from children to parent */
+		if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) {
+			printf("Clustered Block: %d addr %x bufsize: %d\n",
+			    bp->b_lblkno, bp->b_blkno, bp->b_bufsize);
+			printf("Child Block: %d addr: %x\n", tbp->b_lblkno,
+			    tbp->b_blkno);
+			panic("Clustered write to wrong blocks");
+		}
+
+		pagemove(tbp->b_data, cp, size);
+		bp->b_bcount += size;
+		bp->b_bufsize += size;
+
+		tbp->b_bufsize -= size;
+		tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
+		tbp->b_flags |= (B_ASYNC | B_AGE);
+		s = splbio();
+		reassignbuf(tbp, tbp->b_vp);		/* put on clean list */
+		++tbp->b_vp->v_numoutput;
+		splx(s);
+		b_save->bs_children[i] = tbp;
+
+		cp += size;
+	}
+
+	if (i == 0) {
+		/* None to cluster */
+		bp->b_saveaddr = b_save->bs_saveaddr;
+		bp->b_flags &= ~B_CALL;
+		bp->b_iodone = NULL;
+		free(b_save, M_SEGMENT);
+	}
+	bawrite(bp);
+	if (i < len) {
+		len -= i + 1;
+		start_lbn += 1;
+		goto redo;
+	}
+}
+
+/*
+ * Collect together all the buffers in a cluster.
+ * Plus add one additional buffer.
+ */
+struct cluster_save *
+cluster_collectbufs(vp, last_bp)
+	struct vnode *vp;
+	struct buf *last_bp;
+{
+	struct cluster_save *buflist;
+	daddr_t	lbn;
+	int i, len;
+
+	len = vp->v_lastw - vp->v_cstart + 1;
+	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
+	    M_SEGMENT, M_WAITOK);
+	buflist->bs_nchildren = 0;
+	buflist->bs_children = (struct buf **)(buflist + 1);
+	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
+		    (void)bread(vp, lbn, last_bp->b_bcount, NOCRED,
+			&buflist->bs_children[i]);
+	buflist->bs_children[i] = last_bp;
+	buflist->bs_nchildren = i + 1;
+	return (buflist);
+}
diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c
new file mode 100644
index 000000000000..9b5779767d00
--- /dev/null
+++ b/sys/kern/vfs_conf.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_conf.c	8.11 (Berkeley) 5/10/95
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+/*
+ * These define the root filesystem, device, and root filesystem type.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+int (*mountroot)() = NULL;
+
+/*
+ * Set up the initial array of known filesystem types.
+ */
+extern	struct vfsops ufs_vfsops;
+extern	int ffs_mountroot();
+extern	struct vfsops lfs_vfsops;
+extern	int lfs_mountroot();
+extern	struct vfsops mfs_vfsops;
+extern	int mfs_mountroot();
+extern	struct vfsops cd9660_vfsops;
+extern	int cd9660_mountroot();
+extern	struct vfsops msdos_vfsops;
+extern	struct vfsops adosfs_vfsops;
+extern	struct vfsops nfs_vfsops;
+extern	int nfs_mountroot();
+extern	struct vfsops afs_vfsops;
+extern	struct vfsops procfs_vfsops;
+extern	struct vfsops null_vfsops;
+extern	struct vfsops union_vfsops;
+extern	struct vfsops umap_vfsops;
+extern	struct vfsops portal_vfsops;
+extern	struct vfsops fdesc_vfsops;
+extern	struct vfsops kernfs_vfsops;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ */
+static struct vfsconf vfsconflist[] = {
+
+	/* Fast Filesystem */
+#ifdef FFS
+	{ &ufs_vfsops, "ufs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL },
+#endif
+
+	/* Log-based Filesystem */
+#ifdef LFS
+	{ &lfs_vfsops, "lfs", 5, 0, MNT_LOCAL, lfs_mountroot, NULL },
+#endif
+
+	/* Memory-based Filesystem */
+#ifdef MFS
+	{ &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL },
+#endif
+
+	/* ISO9660 (aka CDROM) Filesystem */
+#ifdef CD9660
+	{ &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL },
+#endif
+
+	/* MSDOS Filesystem */
+#ifdef MSDOS
+	{ &msdos_vfsops, "msdos", 4, 0, MNT_LOCAL, NULL, NULL },
+#endif
+
+	/* AmigaDOS Filesystem */
+#ifdef ADOSFS
+	{ &adosfs_vfsops, "adosfs", 16, 0, MNT_LOCAL, NULL, NULL },
+#endif
+
+	/* Sun-compatible Network Filesystem */
+#ifdef NFS
+	{ &nfs_vfsops, "nfs", 2, 0, 0, nfs_mountroot, NULL },
+#endif
+
+	/* Andrew Filesystem */
+#ifdef AFS
+	{ &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL },
+#endif
+
+	/* /proc Filesystem */
+#ifdef PROCFS
+	{ &procfs_vfsops, "procfs", 12, 0, 0, NULL, NULL },
+#endif
+
+	/* Loopback (Minimal) Filesystem Layer */
+#ifdef NULLFS
+	{ &null_vfsops, "loopback", 9, 0, 0, NULL, NULL },
+#endif
+
+	/* Union (translucent) Filesystem */
+#ifdef UNION
+	{ &union_vfsops, "union", 15, 0, 0, NULL, NULL },
+#endif
+
+	/* User/Group Identifer Remapping Filesystem */
+#ifdef UMAPFS
+	{ &umap_vfsops, "umap", 10, 0, 0, NULL, NULL },
+#endif
+
+	/* Portal Filesystem */
+#ifdef PORTAL
+	{ &portal_vfsops, "portal", 8, 0, 0, NULL, NULL },
+#endif
+
+	/* File Descriptor Filesystem */
+#ifdef FDESC
+	{ &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL },
+#endif
+
+	/* Kernel Information Filesystem */
+#ifdef KERNFS
+	{ &kernfs_vfsops, "kernfs", 11, 0, 0, NULL, NULL },
+#endif
+
+};
+
+/*
+ * Initially the size of the list, vfs_init will set maxvfsconf
+ * to the highest defined type number.
+ */
+int maxvfsconf = sizeof(vfsconflist) / sizeof (struct vfsconf);
+struct vfsconf *vfsconf = vfsconflist;
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector.  It is consulted at system boot to build operation
+ * vectors.  It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+	&ffs_vnodeop_opv_desc,
+	&ffs_specop_opv_desc,
+#ifdef FIFO
+	&ffs_fifoop_opv_desc,
+#endif
+	&dead_vnodeop_opv_desc,
+#ifdef FIFO
+	&fifo_vnodeop_opv_desc,
+#endif
+	&spec_vnodeop_opv_desc,
+#ifdef LFS
+	&lfs_vnodeop_opv_desc,
+	&lfs_specop_opv_desc,
+#ifdef FIFO
+	&lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+	&mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+	&nfsv2_vnodeop_opv_desc,
+	&spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+	&fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+	&fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+	&portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+	&null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+	&umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+	&kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+	&procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+	&cd9660_vnodeop_opv_desc,
+	&cd9660_specop_opv_desc,
+#ifdef FIFO
+	&cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+	&union_vnodeop_opv_desc,
+#endif
+	NULL
+};
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
new file mode 100644
index 000000000000..b5abe5801af4
--- /dev/null
+++ b/sys/kern/vfs_init.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed
+ * to Berkeley by John Heidemann of the UCLA Ficus project.
+ *
+ * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_init.c	8.5 (Berkeley) 5/11/95
+ */
+
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Sigh, such primitive tools are these...
+ */
+#if 0
+#define DODEBUG(A) A
+#else
+#define DODEBUG(A)
+#endif
+
+extern struct vnodeopv_desc *vfs_opv_descs[];
+				/* a list of lists of vnodeops defns */
+extern struct vnodeop_desc *vfs_op_descs[];
+				/* and the operations they perform */
+/*
+ * This code doesn't work if the defn is **vnodop_defns with cc.
+ * The problem is because of the compiler sometimes putting in an
+ * extra level of indirection for arrays.  It's an interesting
+ * "feature" of C.
+ */
+int vfs_opv_numops;
+
+typedef (*PFI)();   /* the standard Pointer to a Function returning an Int */
+
+/*
+ * A miscellaneous routine.
+ * A generic "default" routine that just returns an error.
+ */
+int
+vn_default_error()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * vfs_init.c
+ *
+ * Allocate and fill in operations vectors.
+ *
+ * An undocumented feature of this approach to defining operations is that
+ * there can be multiple entries in vfs_opv_descs for the same operations
+ * vector. This allows third parties to extend the set of operations
+ * supported by another layer in a binary compatibile way. For example,
+ * assume that NFS needed to be modified to support Ficus. NFS has an entry
+ * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
+ * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
+ * listing those new operations Ficus adds to NFS, all without modifying the
+ * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
+ * that is a(whole)nother story.) This is a feature.
+ */
+void
+vfs_opv_init()
+{
+	int i, j, k;
+	int (***opv_desc_vector_p)();
+	int (**opv_desc_vector)();
+	struct vnodeopv_entry_desc *opve_descp;
+
+	/*
+	 * Allocate the dynamic vectors and fill them in.
+	 */
+	for (i=0; vfs_opv_descs[i]; i++) {
+		opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
+		/*
+		 * Allocate and init the vector, if it needs it.
+		 * Also handle backwards compatibility.
+		 */
+		if (*opv_desc_vector_p == NULL) {
+			/* XXX - shouldn't be M_VNODE */
+			MALLOC(*opv_desc_vector_p, PFI*,
+			       vfs_opv_numops*sizeof(PFI), M_VNODE, M_WAITOK);
+			bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI));
+			DODEBUG(printf("vector at %x allocated\n",
+			    opv_desc_vector_p));
+		}
+		opv_desc_vector = *opv_desc_vector_p;
+		for (j=0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
+			opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
+
+			/*
+			 * Sanity check:  is this operation listed
+			 * in the list of operations?  We check this
+			 * by seeing if its offest is zero.  Since
+			 * the default routine should always be listed
+			 * first, it should be the only one with a zero
+			 * offset.  Any other operation with a zero
+			 * offset is probably not listed in
+			 * vfs_op_descs, and so is probably an error.
+			 *
+			 * A panic here means the layer programmer
+			 * has committed the all-too common bug
+			 * of adding a new operation to the layer's
+			 * list of vnode operations but
+			 * not adding the operation to the system-wide
+			 * list of supported operations.
+			 */
+			if (opve_descp->opve_op->vdesc_offset == 0 &&
+				    opve_descp->opve_op->vdesc_offset !=
+				    	VOFFSET(vop_default)) {
+				printf("operation %s not listed in %s.\n",
+				    opve_descp->opve_op->vdesc_name,
+				    "vfs_op_descs");
+				panic ("vfs_opv_init: bad operation");
+			}
+			/*
+			 * Fill in this entry.
+			 */
+			opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
+					opve_descp->opve_impl;
+		}
+	}
+	/*
+	 * Finally, go back and replace unfilled routines
+	 * with their default.  (Sigh, an O(n^3) algorithm.  I
+	 * could make it better, but that'd be work, and n is small.)
+	 */
+	for (i = 0; vfs_opv_descs[i]; i++) {
+		opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
+		/*
+		 * Force every operations vector to have a default routine.
+		 */
+		if (opv_desc_vector[VOFFSET(vop_default)]==NULL) {
+			panic("vfs_opv_init: operation vector without default routine.");
+		}
+		for (k = 0; k<vfs_opv_numops; k++)
+			if (opv_desc_vector[k] == NULL)
+				opv_desc_vector[k] = 
+					opv_desc_vector[VOFFSET(vop_default)];
+	}
+}
+
+/*
+ * Initialize known vnode operations vectors.
+ */
+void
+vfs_op_init()
+{
+	int i;
+
+	DODEBUG(printf("Vnode_interface_init.\n"));
+	/*
+	 * Set all vnode vectors to a well known value.
+	 */
+	for (i = 0; vfs_opv_descs[i]; i++)
+		*(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
+	/*
+	 * Figure out how many ops there are by counting the table,
+	 * and assign each its offset.
+	 */
+	for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
+		vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
+		vfs_opv_numops++;
+	}
+	DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops));
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern struct vnodeops dead_vnodeops;
+extern struct vnodeops spec_vnodeops;
+struct vattr va_null;
+
+/*
+ * Initialize the vnode structures and initialize each file system type.
+ */
+vfsinit()
+{
+	struct vfsconf *vfsp;
+	int i, maxtypenum;
+
+	/*
+	 * Initialize the vnode table
+	 */
+	vntblinit();
+	/*
+	 * Initialize the vnode name cache
+	 */
+	nchinit();
+	/*
+	 * Build vnode operation vectors.
+	 */
+	vfs_op_init();
+	vfs_opv_init();   /* finish the job */
+	/*
+	 * Initialize each file system type.
+	 */
+	vattr_null(&va_null);
+	maxtypenum = 0;
+	for (vfsp = vfsconf, i = 1; i <= maxvfsconf; i++, vfsp++) {
+		if (i < maxvfsconf)
+			vfsp->vfc_next = vfsp + 1;
+		if (maxtypenum <= vfsp->vfc_typenum)
+			maxtypenum = vfsp->vfc_typenum + 1;
+		(*vfsp->vfc_vfsops->vfs_init)(vfsp);
+	}
+	/* next vfc_typenum to be used */
+	maxvfsconf = maxtypenum;
+}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
new file mode 100644
index 000000000000..826fbfeab83b
--- /dev/null
+++ b/sys/kern/vfs_lookup.c
@@ -0,0 +1,645 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_lookup.c	8.10 (Berkeley) 5/27/95
+ */
+
+#include <sys/param.h>
+#include <sys/syslimits.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Convert a pathname into a pointer to a locked inode.
+ *
+ * The FOLLOW flag is set when symbolic links are to be followed
+ * when they occur at the end of the name translation process.
+ * Symbolic links are always followed for all other pathname
+ * components other than the last.
+ *
+ * The segflg defines whether the name is to be copied from user
+ * space or kernel space.
+ *
+ * Overall outline of namei:
+ *
+ *	copy in name
+ *	get starting directory
+ *	while (!done && !error) {
+ *		call lookup to search path.
+ *		if symbolic link, massage name in buffer and continue
+ *	}
+ */
+int
+namei(ndp)
+	register struct nameidata *ndp;
+{
+	register struct filedesc *fdp;	/* pointer to file descriptor state */
+	register char *cp;		/* pointer into pathname argument */
+	register struct vnode *dp;	/* the directory we are searching */
+	struct iovec aiov;		/* uio for reading symbolic links */
+	struct uio auio;
+	int error, linklen;
+	struct componentname *cnp = &ndp->ni_cnd;
+	struct proc *p = cnp->cn_proc;
+
+	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
+#ifdef DIAGNOSTIC
+	if (!cnp->cn_cred || !cnp->cn_proc)
+		panic ("namei: bad cred/proc");
+	if (cnp->cn_nameiop & (~OPMASK))
+		panic ("namei: nameiop contaminated with flags");
+	if (cnp->cn_flags & OPMASK)
+		panic ("namei: flags contaminated with nameiops");
+#endif
+	fdp = cnp->cn_proc->p_fd;
+
+	/*
+	 * Get a buffer for the name to be translated, and copy the
+	 * name into the buffer.
+	 */
+	if ((cnp->cn_flags & HASBUF) == 0)
+		MALLOC(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if (ndp->ni_segflg == UIO_SYSSPACE)
+		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
+			    MAXPATHLEN, &ndp->ni_pathlen);
+	else
+		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
+			    MAXPATHLEN, &ndp->ni_pathlen);
+	if (error) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		ndp->ni_vp = NULL;
+		return (error);
+	}
+	ndp->ni_loopcnt = 0;
+#ifdef KTRACE
+	if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
+		ktrnamei(cnp->cn_proc->p_tracep, cnp->cn_pnbuf);
+#endif
+
+	/*
+	 * Get starting point for the translation.
+	 */
+	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
+		ndp->ni_rootdir = rootvnode;
+	dp = fdp->fd_cdir;
+	VREF(dp);
+	for (;;) {
+		/*
+		 * Check if root directory should replace current directory.
+		 * Done at start of translation and after symbolic link.
+		 */
+		cnp->cn_nameptr = cnp->cn_pnbuf;
+		if (*(cnp->cn_nameptr) == '/') {
+			vrele(dp);
+			while (*(cnp->cn_nameptr) == '/') {
+				cnp->cn_nameptr++;
+				ndp->ni_pathlen--;
+			}
+			dp = ndp->ni_rootdir;
+			VREF(dp);
+		}
+		ndp->ni_startdir = dp;
+		if (error = lookup(ndp)) {
+			FREE(cnp->cn_pnbuf, M_NAMEI);
+			return (error);
+		}
+		/*
+		 * Check for symbolic link
+		 */
+		if ((cnp->cn_flags & ISSYMLINK) == 0) {
+			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
+				FREE(cnp->cn_pnbuf, M_NAMEI);
+			else
+				cnp->cn_flags |= HASBUF;
+			return (0);
+		}
+		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+			VOP_UNLOCK(ndp->ni_dvp, 0, p);
+		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+			error = ELOOP;
+			break;
+		}
+		if (ndp->ni_pathlen > 1)
+			MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+		else
+			cp = cnp->cn_pnbuf;
+		aiov.iov_base = cp;
+		aiov.iov_len = MAXPATHLEN;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_procp = (struct proc *)0;
+		auio.uio_resid = MAXPATHLEN;
+		if (error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred)) {
+			if (ndp->ni_pathlen > 1)
+				free(cp, M_NAMEI);
+			break;
+		}
+		linklen = MAXPATHLEN - auio.uio_resid;
+		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+			if (ndp->ni_pathlen > 1)
+				free(cp, M_NAMEI);
+			error = ENAMETOOLONG;
+			break;
+		}
+		if (ndp->ni_pathlen > 1) {
+			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+			FREE(cnp->cn_pnbuf, M_NAMEI);
+			cnp->cn_pnbuf = cp;
+		} else
+			cnp->cn_pnbuf[linklen] = '\0';
+		ndp->ni_pathlen += linklen;
+		vput(ndp->ni_vp);
+		dp = ndp->ni_dvp;
+	}
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	vrele(ndp->ni_dvp);
+	vput(ndp->ni_vp);
+	ndp->ni_vp = NULL;
+	return (error);
+}
+
+/*
+ * Search a pathname.
+ * This is a very central and rather complicated routine.
+ *
+ * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
+ * The starting directory is taken from ni_startdir. The pathname is
+ * descended until done, or a symbolic link is encountered. The variable
+ * ni_more is clear if the path is completed; it is set to one if a
+ * symbolic link needing interpretation is encountered.
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it, the parent directory is returned
+ * locked. If flag has WANTPARENT or'ed into it, the parent directory is
+ * returned unlocked. Otherwise the parent directory is not returned. If
+ * the target of the pathname exists and LOCKLEAF is or'ed into the flag
+ * the target is returned locked, otherwise it is returned unlocked.
+ * When creating or renaming and LOCKPARENT is specified, the target may not
+ * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
+ * 
+ * Overall outline of lookup:
+ *
+ * dirloop:
+ *	identify next component of name at ndp->ni_ptr
+ *	handle degenerate case where name is null string
+ *	if .. and crossing mount points and on mounted filesys, find parent
+ *	call VOP_LOOKUP routine for next component name
+ *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
+ *	    component vnode returned in ni_vp (if it exists), locked.
+ *	if result vnode is mounted on and crossing mount points,
+ *	    find mounted on vnode
+ *	if more components of name, do next level at dirloop
+ *	return the answer in ni_vp, locked if LOCKLEAF set
+ *	    if LOCKPARENT set, return locked parent in ni_dvp
+ *	    if WANTPARENT set, return unlocked parent in ni_dvp
+ */
+int
+lookup(ndp)
+	register struct nameidata *ndp;
+{
+	register char *cp;		/* pointer into pathname argument */
+	register struct vnode *dp = 0;	/* the directory we are searching */
+	struct vnode *tdp;		/* saved dp */
+	struct mount *mp;		/* mount table entry */
+	int docache;			/* == 0 do not cache last component */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int rdonly;			/* lookup read-only flag bit */
+	int error = 0;
+	struct componentname *cnp = &ndp->ni_cnd;
+	struct proc *p = cnp->cn_proc;
+
+	/*
+	 * Setup: break out flag bits into variables.
+	 */
+	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+	if (cnp->cn_nameiop == DELETE ||
+	    (wantparent && cnp->cn_nameiop != CREATE))
+		docache = 0;
+	rdonly = cnp->cn_flags & RDONLY;
+	ndp->ni_dvp = NULL;
+	cnp->cn_flags &= ~ISSYMLINK;
+	dp = ndp->ni_startdir;
+	ndp->ni_startdir = NULLVP;
+	vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
+
+dirloop:
+	/*
+	 * Search a new directory.
+	 *
+	 * The cn_hash value is for use by vfs_cache.
+	 * The last component of the filename is left accessible via
+	 * cnp->cn_nameptr for callers that need the name. Callers needing
+	 * the name set the SAVENAME flag. When done, they assume
+	 * responsibility for freeing the pathname buffer.
+	 */
+	cnp->cn_consume = 0;
+	cnp->cn_hash = 0;
+	for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+		cnp->cn_hash += (unsigned char)*cp;
+	cnp->cn_namelen = cp - cnp->cn_nameptr;
+	if (cnp->cn_namelen > NAME_MAX) {
+		error = ENAMETOOLONG;
+		goto bad;
+	}
+#ifdef NAMEI_DIAGNOSTIC
+	{ char c = *cp;
+	*cp = '\0';
+	printf("{%s}: ", cnp->cn_nameptr);
+	*cp = c; }
+#endif
+	ndp->ni_pathlen -= cnp->cn_namelen;
+	ndp->ni_next = cp;
+	cnp->cn_flags |= MAKEENTRY;
+	if (*cp == '\0' && docache == 0)
+		cnp->cn_flags &= ~MAKEENTRY;
+	if (cnp->cn_namelen == 2 &&
+	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
+		cnp->cn_flags |= ISDOTDOT;
+	else
+		cnp->cn_flags &= ~ISDOTDOT;
+	if (*ndp->ni_next == 0)
+		cnp->cn_flags |= ISLASTCN;
+	else
+		cnp->cn_flags &= ~ISLASTCN;
+
+
+	/*
+	 * Check for degenerate name (e.g. / or "")
+	 * which is a way of talking about a directory,
+	 * e.g. like "/." or ".".
+	 */
+	if (cnp->cn_nameptr[0] == '\0') {
+		if (dp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto bad;
+		}
+		if (cnp->cn_nameiop != LOOKUP) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (wantparent) {
+			ndp->ni_dvp = dp;
+			VREF(dp);
+		}
+		ndp->ni_vp = dp;
+		if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
+			VOP_UNLOCK(dp, 0, p);
+		if (cnp->cn_flags & SAVESTART)
+			panic("lookup: SAVESTART");
+		return (0);
+	}
+
+	/*
+	 * Handle "..": two special cases.
+	 * 1. If at root directory (e.g. after chroot)
+	 *    or at absolute root directory
+	 *    then ignore it so can't get out.
+	 * 2. If this vnode is the root of a mounted
+	 *    filesystem, then replace it with the
+	 *    vnode which was mounted on so we take the
+	 *    .. in the other file system.
+	 */
+	if (cnp->cn_flags & ISDOTDOT) {
+		for (;;) {
+			if (dp == ndp->ni_rootdir || dp == rootvnode) {
+				ndp->ni_dvp = dp;
+				ndp->ni_vp = dp;
+				VREF(dp);
+				goto nextname;
+			}
+			if ((dp->v_flag & VROOT) == 0 ||
+			    (cnp->cn_flags & NOCROSSMOUNT))
+				break;
+			tdp = dp;
+			dp = dp->v_mount->mnt_vnodecovered;
+			vput(tdp);
+			VREF(dp);
+			vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
+		}
+	}
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+unionlookup:
+	ndp->ni_dvp = dp;
+	ndp->ni_vp = NULL;
+	if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) {
+#ifdef DIAGNOSTIC
+		if (ndp->ni_vp != NULL)
+			panic("leaf should be empty");
+#endif
+#ifdef NAMEI_DIAGNOSTIC
+		printf("not found\n");
+#endif
+		if ((error == ENOENT) &&
+		    (dp->v_flag & VROOT) &&
+		    (dp->v_mount->mnt_flag & MNT_UNION)) {
+			tdp = dp;
+			dp = dp->v_mount->mnt_vnodecovered;
+			vput(tdp);
+			VREF(dp);
+			vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
+			goto unionlookup;
+		}
+
+		if (error != EJUSTRETURN)
+			goto bad;
+		/*
+		 * If creating and at end of pathname, then can consider
+		 * allowing file to be created.
+		 */
+		if (rdonly) {
+			error = EROFS;
+			goto bad;
+		}
+		/*
+		 * We return with ni_vp NULL to indicate that the entry
+		 * doesn't currently exist, leaving a pointer to the
+		 * (possibly locked) directory inode in ndp->ni_dvp.
+		 */
+		if (cnp->cn_flags & SAVESTART) {
+			ndp->ni_startdir = ndp->ni_dvp;
+			VREF(ndp->ni_startdir);
+		}
+		return (0);
+	}
+#ifdef NAMEI_DIAGNOSTIC
+	printf("found\n");
+#endif
+
+	/*
+	 * Take into account any additional components consumed by
+	 * the underlying filesystem.
+	 */
+	if (cnp->cn_consume > 0) {
+		cnp->cn_nameptr += cnp->cn_consume;
+		ndp->ni_next += cnp->cn_consume;
+		ndp->ni_pathlen -= cnp->cn_consume;
+		cnp->cn_consume = 0;
+	}
+
+	dp = ndp->ni_vp;
+	/*
+	 * Check to see if the vnode has been mounted on;
+	 * if so find the root of the mounted file system.
+	 */
+	while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
+	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
+		if (vfs_busy(mp, 0, 0, p))
+			continue;
+		error = VFS_ROOT(mp, &tdp);
+		vfs_unbusy(mp, p);
+		if (error)
+			goto bad2;
+		vput(dp);
+		ndp->ni_vp = dp = tdp;
+	}
+
+	/*
+	 * Check for symbolic link
+	 */
+	if ((dp->v_type == VLNK) &&
+	    ((cnp->cn_flags & FOLLOW) || *ndp->ni_next == '/')) {
+		cnp->cn_flags |= ISSYMLINK;
+		return (0);
+	}
+
+nextname:
+	/*
+	 * Not a symbolic link.  If more pathname,
+	 * continue at next component, else return.
+	 */
+	if (*ndp->ni_next == '/') {
+		cnp->cn_nameptr = ndp->ni_next;
+		while (*cnp->cn_nameptr == '/') {
+			cnp->cn_nameptr++;
+			ndp->ni_pathlen--;
+		}
+		vrele(ndp->ni_dvp);
+		goto dirloop;
+	}
+	/*
+	 * Disallow directory write attempts on read-only file systems.
+	 */
+	if (rdonly &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
+		error = EROFS;
+		goto bad2;
+	}
+	if (cnp->cn_flags & SAVESTART) {
+		ndp->ni_startdir = ndp->ni_dvp;
+		VREF(ndp->ni_startdir);
+	}
+	if (!wantparent)
+		vrele(ndp->ni_dvp);
+	if ((cnp->cn_flags & LOCKLEAF) == 0)
+		VOP_UNLOCK(dp, 0, p);
+	return (0);
+
+bad2:
+	if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0')
+		VOP_UNLOCK(ndp->ni_dvp, 0, p);
+	vrele(ndp->ni_dvp);
+bad:
+	vput(dp);
+	ndp->ni_vp = NULL;
+	return (error);
+}
+
+/*
+ * relookup - lookup a path name component
+ *    Used by lookup to re-aquire things.
+ */
+int
+relookup(dvp, vpp, cnp)
+	struct vnode *dvp, **vpp;
+	struct componentname *cnp;
+{
+	struct proc *p = cnp->cn_proc;
+	struct vnode *dp = 0;		/* the directory we are searching */
+	int docache;			/* == 0 do not cache last component */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int rdonly;			/* lookup read-only flag bit */
+	int error = 0;
+#ifdef NAMEI_DIAGNOSTIC
+	int newhash;			/* DEBUG: check name hash */
+	char *cp;			/* DEBUG: check name ptr/len */
+#endif
+
+	/*
+	 * Setup: break out flag bits into variables.
+	 */
+	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
+	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+	if (cnp->cn_nameiop == DELETE ||
+	    (wantparent && cnp->cn_nameiop != CREATE))
+		docache = 0;
+	rdonly = cnp->cn_flags & RDONLY;
+	cnp->cn_flags &= ~ISSYMLINK;
+	dp = dvp;
+	vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p);
+
+/* dirloop: */
+	/*
+	 * Search a new directory.
+	 *
+	 * The cn_hash value is for use by vfs_cache.
+	 * The last component of the filename is left accessible via
+	 * cnp->cn_nameptr for callers that need the name. Callers needing
+	 * the name set the SAVENAME flag. When done, they assume
+	 * responsibility for freeing the pathname buffer.
+	 */
+#ifdef NAMEI_DIAGNOSTIC
+	for (newhash = 0, cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+		newhash += (unsigned char)*cp;
+	if (newhash != cnp->cn_hash)
+		panic("relookup: bad hash");
+	if (cnp->cn_namelen != cp - cnp->cn_nameptr)
+		panic ("relookup: bad len");
+	if (*cp != 0)
+		panic("relookup: not last component");
+	printf("{%s}: ", cnp->cn_nameptr);
+#endif
+
+	/*
+	 * Check for degenerate name (e.g. / or "")
+	 * which is a way of talking about a directory,
+	 * e.g. like "/." or ".".
+	 */
+	if (cnp->cn_nameptr[0] == '\0') {
+		if (cnp->cn_nameiop != LOOKUP || wantparent) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (dp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto bad;
+		}
+		if (!(cnp->cn_flags & LOCKLEAF))
+			VOP_UNLOCK(dp, 0, p);
+		*vpp = dp;
+		if (cnp->cn_flags & SAVESTART)
+			panic("lookup: SAVESTART");
+		return (0);
+	}
+
+	if (cnp->cn_flags & ISDOTDOT)
+		panic ("relookup: lookup on dot-dot");
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+	if (error = VOP_LOOKUP(dp, vpp, cnp)) {
+#ifdef DIAGNOSTIC
+		if (*vpp != NULL)
+			panic("leaf should be empty");
+#endif
+		if (error != EJUSTRETURN)
+			goto bad;
+		/*
+		 * If creating and at end of pathname, then can consider
+		 * allowing file to be created.
+		 */
+		if (rdonly) {
+			error = EROFS;
+			goto bad;
+		}
+		/* ASSERT(dvp == ndp->ni_startdir) */
+		if (cnp->cn_flags & SAVESTART)
+			VREF(dvp);
+		/*
+		 * We return with ni_vp NULL to indicate that the entry
+		 * doesn't currently exist, leaving a pointer to the
+		 * (possibly locked) directory inode in ndp->ni_dvp.
+		 */
+		return (0);
+	}
+	dp = *vpp;
+
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for symbolic link
+	 */
+	if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW))
+		panic ("relookup: symlink found.\n");
+#endif
+
+	/*
+	 * Disallow directory write attempts on read-only file systems.
+	 */
+	if (rdonly &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
+		error = EROFS;
+		goto bad2;
+	}
+	/* ASSERT(dvp == ndp->ni_startdir) */
+	if (cnp->cn_flags & SAVESTART)
+		VREF(dvp);
+	
+	if (!wantparent)
+		vrele(dvp);
+	if ((cnp->cn_flags & LOCKLEAF) == 0)
+		VOP_UNLOCK(dp, 0, p);
+	return (0);
+
+bad2:
+	if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
+		VOP_UNLOCK(dvp, 0, p);
+	vrele(dvp);
+bad:
+	vput(dp);
+	*vpp = NULL;
+	return (error);
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
new file mode 100644
index 000000000000..f891e02d519e
--- /dev/null
+++ b/sys/kern/vfs_subr.c
@@ -0,0 +1,1782 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int	vttoif_tab[9] = {
+	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+	S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define	bufremvn(bp) {							\
+	LIST_REMOVE(bp, b_vnbufs);					\
+	(bp)->b_vnbufs.le_next = NOLIST;				\
+}
+TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
+struct mntlist mountlist;			/* mounted filesystem list */
+struct simplelock mountlist_slock;
+static struct simplelock mntid_slock;
+struct simplelock mntvnode_slock;
+struct simplelock vnode_free_list_slock;
+static struct simplelock spechash_slock;
+
+/*
+ * Initialize the vnode management data structures.
+ */
+void
+vntblinit()
+{
+
+	simple_lock_init(&mntvnode_slock);
+	simple_lock_init(&mntid_slock);
+	simple_lock_init(&spechash_slock);
+	TAILQ_INIT(&vnode_free_list);
+	simple_lock_init(&vnode_free_list_slock);
+	CIRCLEQ_INIT(&mountlist);
+}
+
+/*
+ * Mark a mount point as busy. Used to synchronize access and to delay
+ * unmounting. Interlock is not released on failure.
+ */
+int
+vfs_busy(mp, flags, interlkp, p)
+	struct mount *mp;
+	int flags;
+	struct simplelock *interlkp;
+	struct proc *p;
+{
+	int lkflags;
+
+	if (mp->mnt_flag & MNT_UNMOUNT) {
+		if (flags & LK_NOWAIT)
+			return (ENOENT);
+		mp->mnt_flag |= MNT_MWAIT;
+		if (interlkp)
+			simple_unlock(interlkp);
+		/*
+		 * Since all busy locks are shared except the exclusive
+		 * lock granted when unmounting, the only place that a
+		 * wakeup needs to be done is at the release of the
+		 * exclusive lock at the end of dounmount.
+		 */
+		sleep((caddr_t)mp, PVFS);
+		if (interlkp)
+			simple_lock(interlkp);
+		return (ENOENT);
+	}
+	lkflags = LK_SHARED;
+	if (interlkp)
+		lkflags |= LK_INTERLOCK;
+	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
+		panic("vfs_busy: unexpected lock failure");
+	return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ */
+void
+vfs_unbusy(mp, p)
+	struct mount *mp;
+	struct proc *p;
+{
+
+	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
+}
+
+/*
+ * Lookup a filesystem type, and if found allocate and initialize
+ * a mount structure for it.
+ *
+ * Devname is usually updated by mount(8) after booting.
+ */
+int
+vfs_rootmountalloc(fstypename, devname, mpp)
+	char *fstypename;
+	char *devname;
+	struct mount **mpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vfsconf *vfsp;
+	struct mount *mp;
+
+	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+		if (!strcmp(vfsp->vfc_name, fstypename))
+			break;
+	if (vfsp == NULL)
+		return (ENODEV);
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
+	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
+	LIST_INIT(&mp->mnt_vnodelist);
+	mp->mnt_vfc = vfsp;
+	mp->mnt_op = vfsp->vfc_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	mp->mnt_vnodecovered = NULLVP;
+	vfsp->vfc_refcount++;
+	mp->mnt_stat.f_type = vfsp->vfc_typenum;
+	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
+	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
+	mp->mnt_stat.f_mntonname[0] = '/';
+	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
+	*mpp = mp;
+	return (0);
+}
+
+/*
+ * Find an appropriate filesystem to use for the root. If a filesystem
+ * has not been preselected, walk through the list of known filesystems
+ * trying those that have mountroot routines, and try them until one
+ * works or we have tried them all.
+ */
+int
+vfs_mountroot()
+{
+	struct vfsconf *vfsp;
+	extern int (*mountroot)(void);
+	int error;
+
+	if (mountroot != NULL)
+		return ((*mountroot)());
+	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
+		if (vfsp->vfc_mountroot == NULL)
+			continue;
+		if ((error = (*vfsp->vfc_mountroot)()) == 0)
+			return (0);
+		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
+	}
+	return (ENODEV);
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+vfs_getvfs(fsid)
+	fsid_t *fsid;
+{
+	register struct mount *mp;
+
+	simple_lock(&mountlist_slock);
+	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
+	     mp = mp->mnt_list.cqe_next) {
+		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
+			simple_unlock(&mountlist_slock);
+			return (mp);
+		}
+	}
+	simple_unlock(&mountlist_slock);
+	return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+vfs_getnewfsid(mp)
+	struct mount *mp;
+{
+static u_short xxxfs_mntid;
+
+	fsid_t tfsid;
+	int mtype;
+
+	simple_lock(&mntid_slock);
+	mtype = mp->mnt_vfc->vfc_typenum;
+	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+	mp->mnt_stat.f_fsid.val[1] = mtype;
+	if (xxxfs_mntid == 0)
+		++xxxfs_mntid;
+	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+	tfsid.val[1] = mtype;
+	if (mountlist.cqh_first != (void *)&mountlist) {
+		while (vfs_getvfs(&tfsid)) {
+			tfsid.val[0]++;
+			xxxfs_mntid++;
+		}
+	}
+	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+	simple_unlock(&mntid_slock);
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void
+vattr_null(vap)
+	register struct vattr *vap;
+{
+
+	vap->va_type = VNON;
+	vap->va_size = vap->va_bytes = VNOVAL;
+	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+		vap->va_fsid = vap->va_fileid =
+		vap->va_blocksize = vap->va_rdev =
+		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+		vap->va_flags = vap->va_gen = VNOVAL;
+	vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+static void vclean __P((struct vnode *vp, int flag, struct proc *p));
+extern void vgonel __P((struct vnode *vp, struct proc *p));
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+int
+getnewvnode(tag, mp, vops, vpp)
+	enum vtagtype tag;
+	struct mount *mp;
+	int (**vops)();
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp;
+	int s;
+	int cnt;
+
+top:
+	simple_lock(&vnode_free_list_slock);
+	if ((vnode_free_list.tqh_first == NULL &&
+	     numvnodes < 2 * desiredvnodes) ||
+	    numvnodes < desiredvnodes) {
+		simple_unlock(&vnode_free_list_slock);
+		vp = (struct vnode *)malloc((u_long)sizeof *vp,
+		    M_VNODE, M_WAITOK);
+		bzero((char *)vp, sizeof *vp);
+		numvnodes++;
+	} else {
+		for (vp = vnode_free_list.tqh_first;
+				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
+			if (simple_lock_try(&vp->v_interlock))
+				break;
+		}
+		/*
+		 * Unless this is a bad time of the month, at most
+		 * the first NCPUS items on the free list are
+		 * locked, so this is close enough to being empty.
+		 */
+		if (vp == NULLVP) {
+			simple_unlock(&vnode_free_list_slock);
+			tablefull("vnode");
+			*vpp = 0;
+			return (ENFILE);
+		}
+		if (vp->v_usecount)
+			panic("free vnode isn't");
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		/* see comment on why 0xdeadb is set at end of vgone (below) */
+		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+		simple_unlock(&vnode_free_list_slock);
+		vp->v_lease = NULL;
+		if (vp->v_type != VBAD)
+			vgonel(vp, p);
+		else
+			simple_unlock(&vp->v_interlock);
+#ifdef DIAGNOSTIC
+		if (vp->v_data)
+			panic("cleaned vnode isn't");
+		s = splbio();
+		if (vp->v_numoutput)
+			panic("Clean vnode has pending I/O's");
+		splx(s);
+#endif
+		vp->v_flag = 0;
+		vp->v_lastr = 0;
+		vp->v_ralen = 0;
+		vp->v_maxra = 0;
+		vp->v_lastw = 0;
+		vp->v_lasta = 0;
+		vp->v_cstart = 0;
+		vp->v_clen = 0;
+		vp->v_socket = 0;
+	}
+	vp->v_type = VNON;
+	cache_purge(vp);
+	vp->v_tag = tag;
+	vp->v_op = vops;
+	insmntque(vp, mp);
+	*vpp = vp;
+	vp->v_usecount = 1;
+	vp->v_data = 0;
+	return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+void
+insmntque(vp, mp)
+	struct vnode *vp;
+	struct mount *mp;
+{
+
+	simple_lock(&mntvnode_slock);
+	/*
+	 * Delete from old mount point vnode list, if on one.
+	 */
+	if (vp->v_mount != NULL)
+		LIST_REMOVE(vp, v_mntvnodes);
+	/*
+	 * Insert into list of vnodes for the new mount point, if available.
+	 */
+	if ((vp->v_mount = mp) != NULL)
+		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+	simple_unlock(&mntvnode_slock);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+void
+vwakeup(bp)
+	register struct buf *bp;
+{
+	register struct vnode *vp;
+
+	bp->b_flags &= ~B_WRITEINPROG;
+	if (vp = bp->b_vp) {
+		if (--vp->v_numoutput < 0)
+			panic("vwakeup: neg numoutput");
+		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+			if (vp->v_numoutput < 0)
+				panic("vwakeup: neg numoutput 2");
+			vp->v_flag &= ~VBWAIT;
+			wakeup((caddr_t)&vp->v_numoutput);
+		}
+	}
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+	register struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+	int slpflag, slptimeo;
+{
+	register struct buf *bp;
+	struct buf *nbp, *blist;
+	int s, error;
+
+	if (flags & V_SAVE) {
+		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+			return (error);
+		if (vp->v_dirtyblkhd.lh_first != NULL)
+			panic("vinvalbuf: dirty bufs");
+	}
+	for (;;) {
+		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+			while (blist && blist->b_lblkno < 0)
+				blist = blist->b_vnbufs.le_next;
+		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
+		    (flags & V_SAVEMETA))
+			while (blist && blist->b_lblkno < 0)
+				blist = blist->b_vnbufs.le_next;
+		if (!blist)
+			break;
+
+		for (bp = blist; bp; bp = nbp) {
+			nbp = bp->b_vnbufs.le_next;
+			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+				continue;
+			s = splbio();
+			if (bp->b_flags & B_BUSY) {
+				bp->b_flags |= B_WANTED;
+				error = tsleep((caddr_t)bp,
+					slpflag | (PRIBIO + 1), "vinvalbuf",
+					slptimeo);
+				splx(s);
+				if (error)
+					return (error);
+				break;
+			}
+			bremfree(bp);
+			bp->b_flags |= B_BUSY;
+			splx(s);
+			/*
+			 * XXX Since there are no node locks for NFS, I believe
+			 * there is a slight chance that a delayed write will
+			 * occur while sleeping just above, so check for it.
+			 */
+			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+				(void) VOP_BWRITE(bp);
+				break;
+			}
+			bp->b_flags |= B_INVAL;
+			brelse(bp);
+		}
+	}
+	if (!(flags & V_SAVEMETA) &&
+	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+		panic("vinvalbuf: flush failed");
+	return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+void
+bgetvp(vp, bp)
+	register struct vnode *vp;
+	register struct buf *bp;
+{
+
+	if (bp->b_vp)
+		panic("bgetvp: not free");
+	VHOLD(vp);
+	bp->b_vp = vp;
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		bp->b_dev = vp->v_rdev;
+	else
+		bp->b_dev = NODEV;
+	/*
+	 * Insert onto list for new vnode.
+	 */
+	bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+void
+brelvp(bp)
+	register struct buf *bp;
+{
+	struct vnode *vp;
+
+	if (bp->b_vp == (struct vnode *) 0)
+		panic("brelvp: NULL");
+	/*
+	 * Delete from old vnode list, if on one.
+	 */
+	if (bp->b_vnbufs.le_next != NOLIST)
+		bufremvn(bp);
+	vp = bp->b_vp;
+	bp->b_vp = (struct vnode *) 0;
+	HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+void
+reassignbuf(bp, newvp)
+	register struct buf *bp;
+	register struct vnode *newvp;
+{
+	register struct buflists *listheadp;
+
+	if (newvp == NULL) {
+		printf("reassignbuf: NULL");
+		return;
+	}
+	/*
+	 * Delete from old vnode list, if on one.
+	 */
+	if (bp->b_vnbufs.le_next != NOLIST)
+		bufremvn(bp);
+	/*
+	 * If dirty, put on list of dirty buffers;
+	 * otherwise insert onto list of clean buffers.
+	 */
+	if (bp->b_flags & B_DELWRI)
+		listheadp = &newvp->v_dirtyblkhd;
+	else
+		listheadp = &newvp->v_cleanblkhd;
+	bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+int
+bdevvp(dev, vpp)
+	dev_t dev;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	struct vnode *nvp;
+	int error;
+
+	if (dev == NODEV) {
+		*vpp = NULLVP;
+		return (ENODEV);
+	}
+	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+	if (error) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	vp = nvp;
+	vp->v_type = VBLK;
+	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+		vput(vp);
+		vp = nvp;
+	}
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+	register struct vnode *nvp;
+	dev_t nvp_rdev;
+	struct mount *mp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp;
+	struct vnode **vpp;
+
+	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+		return (NULLVP);
+
+	vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+	simple_lock(&spechash_slock);
+	for (vp = *vpp; vp; vp = vp->v_specnext) {
+		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+			continue;
+		/*
+		 * Alias, but not in use, so flush it out.
+		 */
+		simple_lock(&vp->v_interlock);
+		if (vp->v_usecount == 0) {
+			simple_unlock(&spechash_slock);
+			vgonel(vp, p);
+			goto loop;
+		}
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
+			simple_unlock(&spechash_slock);
+			goto loop;
+		}
+		break;
+	}
+	if (vp == NULL || vp->v_tag != VT_NON) {
+		MALLOC(nvp->v_specinfo, struct specinfo *,
+			sizeof(struct specinfo), M_VNODE, M_WAITOK);
+		nvp->v_rdev = nvp_rdev;
+		nvp->v_hashchain = vpp;
+		nvp->v_specnext = *vpp;
+		nvp->v_specflags = 0;
+		simple_unlock(&spechash_slock);
+		*vpp = nvp;
+		if (vp != NULLVP) {
+			nvp->v_flag |= VALIASED;
+			vp->v_flag |= VALIASED;
+			vput(vp);
+		}
+		return (NULLVP);
+	}
+	simple_unlock(&spechash_slock);
+	VOP_UNLOCK(vp, 0, p);
+	simple_lock(&vp->v_interlock);
+	vclean(vp, 0, p);
+	vp->v_op = nvp->v_op;
+	vp->v_tag = nvp->v_tag;
+	nvp->v_type = VNON;
+	insmntque(vp, mp);
+	return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+int
+vget(vp, flags, p)
+	struct vnode *vp;
+	int flags;
+	struct proc *p;
+{
+	int error;
+
+	/*
+	 * If the vnode is in the process of being cleaned out for
+	 * another use, we wait for the cleaning to finish and then
+	 * return failure. Cleaning is determined by checking that
+	 * the VXLOCK flag is set.
+	 */
+	if ((flags & LK_INTERLOCK) == 0)
+		simple_lock(&vp->v_interlock);
+	if (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		simple_unlock(&vp->v_interlock);
+		tsleep((caddr_t)vp, PINOD, "vget", 0);
+		return (ENOENT);
+	}
+	if (vp->v_usecount == 0) {
+		simple_lock(&vnode_free_list_slock);
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		simple_unlock(&vnode_free_list_slock);
+	}
+	vp->v_usecount++;
+	if (flags & LK_TYPE_MASK) {
+		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
+			vrele(vp);
+		return (error);
+	}
+	simple_unlock(&vp->v_interlock);
+	return (0);
+}
+
+/*
+ * Stubs to use when there is no locking to be done on the underlying object.
+ * A minimal shared lock is necessary to ensure that the underlying object
+ * is not revoked while an operation is in progress. So, an active shared
+ * count is maintained in an auxillary vnode lock structure.
+ */
+int
+vop_nolock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+#ifdef notyet
+	/*
+	 * This code cannot be used until all the non-locking filesystems
+	 * (notably NFS) are converted to properly lock and release nodes.
+	 * Also, certain vnode operations change the locking state within
+	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
+	 * and symlink). Ideally these operations should not change the
+	 * lock state, but should be changed to let the caller of the
+	 * function unlock them. Otherwise all intermediate vnode layers
+	 * (such as union, umapfs, etc) must catch these functions to do
+	 * the necessary locking at their layer. Note that the inactive
+	 * and lookup operations also change their lock state, but this 
+	 * cannot be avoided, so these two operations will always need
+	 * to be handled in intermediate layers.
+	 */
+	struct vnode *vp = ap->a_vp;
+	int vnflags, flags = ap->a_flags;
+
+	if (vp->v_vnlock == NULL) {
+		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
+			return (0);
+		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
+		    M_VNODE, M_WAITOK);
+		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
+	}
+	switch (flags & LK_TYPE_MASK) {
+	case LK_DRAIN:
+		vnflags = LK_DRAIN;
+		break;
+	case LK_EXCLUSIVE:
+	case LK_SHARED:
+		vnflags = LK_SHARED;
+		break;
+	case LK_UPGRADE:
+	case LK_EXCLUPGRADE:
+	case LK_DOWNGRADE:
+		return (0);
+	case LK_RELEASE:
+	default:
+		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
+	}
+	if (flags & LK_INTERLOCK)
+		vnflags |= LK_INTERLOCK;
+	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
+#else /* for now */
+	/*
+	 * Since we are not using the lock manager, we must clear
+	 * the interlock here.
+	 */
+	if (ap->a_flags & LK_INTERLOCK)
+		simple_unlock(&ap->a_vp->v_interlock);
+	return (0);
+#endif
+}
+
+/*
+ * Decrement the active use count.
+ */
+int
+vop_nounlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	if (vp->v_vnlock == NULL)
+		return (0);
+	return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
+}
+
+/*
+ * Return whether or not the node is in use.
+ */
+int
+vop_noislocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	if (vp->v_vnlock == NULL)
+		return (0);
+	return (lockstatus(vp->v_vnlock));
+}
+
+/*
+ * Vnode reference.
+ */
+void
+vref(vp)
+	struct vnode *vp;
+{
+
+	simple_lock(&vp->v_interlock);
+	if (vp->v_usecount <= 0)
+		panic("vref used where vget required");
+	vp->v_usecount++;
+	simple_unlock(&vp->v_interlock);
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void
+vput(vp)
+	struct vnode *vp;
+{
+	struct proc *p = curproc;	/* XXX */
+
+#ifdef DIGANOSTIC
+	if (vp == NULL)
+		panic("vput: null vp");
+#endif
+	simple_lock(&vp->v_interlock);
+	vp->v_usecount--;
+	if (vp->v_usecount > 0) {
+		simple_unlock(&vp->v_interlock);
+		VOP_UNLOCK(vp, 0, p);
+		return;
+	}
+#ifdef DIAGNOSTIC
+	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
+		vprint("vput: bad ref count", vp);
+		panic("vput: ref cnt");
+	}
+#endif
+	/*
+	 * insert at tail of LRU list
+	 */
+	simple_lock(&vnode_free_list_slock);
+	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+	simple_unlock(&vnode_free_list_slock);
+	simple_unlock(&vp->v_interlock);
+	VOP_INACTIVE(vp, p);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void
+vrele(vp)
+	struct vnode *vp;
+{
+	struct proc *p = curproc;	/* XXX */
+
+#ifdef DIAGNOSTIC
+	if (vp == NULL)
+		panic("vrele: null vp");
+#endif
+	simple_lock(&vp->v_interlock);
+	vp->v_usecount--;
+	if (vp->v_usecount > 0) {
+		simple_unlock(&vp->v_interlock);
+		return;
+	}
+#ifdef DIAGNOSTIC
+	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
+		vprint("vrele: bad ref count", vp);
+		panic("vrele: ref cnt");
+	}
+#endif
+	/*
+	 * insert at tail of LRU list
+	 */
+	simple_lock(&vnode_free_list_slock);
+	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+	simple_unlock(&vnode_free_list_slock);
+	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
+		VOP_INACTIVE(vp, p);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * Page or buffer structure gets a reference.
+ */
+void
+vhold(vp)
+	register struct vnode *vp;
+{
+
+	simple_lock(&vp->v_interlock);
+	vp->v_holdcnt++;
+	simple_unlock(&vp->v_interlock);
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void
+holdrele(vp)
+	register struct vnode *vp;
+{
+
+	simple_lock(&vp->v_interlock);
+	if (vp->v_holdcnt <= 0)
+		panic("holdrele: holdcnt");
+	vp->v_holdcnt--;
+	simple_unlock(&vp->v_interlock);
+}
+#endif /* DIAGNOSTIC */
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0;	/* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+int
+vflush(mp, skipvp, flags)
+	struct mount *mp;
+	struct vnode *skipvp;
+	int flags;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp, *nvp;
+	int busy = 0;
+
+	simple_lock(&mntvnode_slock);
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+		if (vp->v_mount != mp)
+			goto loop;
+		nvp = vp->v_mntvnodes.le_next;
+		/*
+		 * Skip over a selected vnode.
+		 */
+		if (vp == skipvp)
+			continue;
+
+		simple_lock(&vp->v_interlock);
+		/*
+		 * Skip over a vnodes marked VSYSTEM.
+		 */
+		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
+			simple_unlock(&vp->v_interlock);
+			continue;
+		}
+		/*
+		 * If WRITECLOSE is set, only flush out regular file
+		 * vnodes open for writing.
+		 */
+		if ((flags & WRITECLOSE) &&
+		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
+			simple_unlock(&vp->v_interlock);
+			continue;
+		}
+		/*
+		 * With v_usecount == 0, all we need to do is clear
+		 * out the vnode data structures and we are done.
+		 */
+		if (vp->v_usecount == 0) {
+			simple_unlock(&mntvnode_slock);
+			vgonel(vp, p);
+			simple_lock(&mntvnode_slock);
+			continue;
+		}
+		/*
+		 * If FORCECLOSE is set, forcibly close the vnode.
+		 * For block or character devices, revert to an
+		 * anonymous device. For all other files, just kill them.
+		 */
+		if (flags & FORCECLOSE) {
+			simple_unlock(&mntvnode_slock);
+			if (vp->v_type != VBLK && vp->v_type != VCHR) {
+				vgonel(vp, p);
+			} else {
+				vclean(vp, 0, p);
+				vp->v_op = spec_vnodeop_p;
+				insmntque(vp, (struct mount *)0);
+			}
+			simple_lock(&mntvnode_slock);
+			continue;
+		}
+#ifdef DIAGNOSTIC
+		if (busyprt)
+			vprint("vflush: busy vnode", vp);
+#endif
+		simple_unlock(&vp->v_interlock);
+		busy++;
+	}
+	simple_unlock(&mntvnode_slock);
+	if (busy)
+		return (EBUSY);
+	return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ * The vnode interlock is held on entry.
+ */
+static void
+vclean(vp, flags, p)
+	struct vnode *vp;
+	int flags;
+	struct proc *p;
+{
+	int active;
+
+	/*
+	 * Check to see if the vnode is in use.
+	 * If so we have to reference it before we clean it out
+	 * so that its count cannot fall to zero and generate a
+	 * race against ourselves to recycle it.
+	 */
+	if (active = vp->v_usecount)
+		vp->v_usecount++;
+	/*
+	 * Prevent the vnode from being recycled or
+	 * brought into use while we clean it out.
+	 */
+	if (vp->v_flag & VXLOCK)
+		panic("vclean: deadlock");
+	vp->v_flag |= VXLOCK;
+	/*
+	 * Even if the count is zero, the VOP_INACTIVE routine may still
+	 * have the object locked while it cleans it out. The VOP_LOCK
+	 * ensures that the VOP_INACTIVE routine is done with its work.
+	 * For active vnodes, it ensures that no other activity can
+	 * occur while the underlying object is being cleaned out.
+	 */
+	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
+	/*
+	 * Clean out any buffers associated with the vnode.
+	 */
+	if (flags & DOCLOSE)
+		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
+	/*
+	 * If purging an active vnode, it must be closed and
+	 * deactivated before being reclaimed. Note that the
+	 * VOP_INACTIVE will unlock the vnode.
+	 */
+	if (active) {
+		if (flags & DOCLOSE)
+			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
+		VOP_INACTIVE(vp, p);
+	} else {
+		/*
+		 * Any other processes trying to obtain this lock must first
+		 * wait for VXLOCK to clear, then call the new lock operation.
+		 */
+		VOP_UNLOCK(vp, 0, p);
+	}
+	/*
+	 * Reclaim the vnode.
+	 */
+	if (VOP_RECLAIM(vp, p))
+		panic("vclean: cannot reclaim");
+	if (active)
+		vrele(vp);
+	cache_purge(vp);
+	if (vp->v_vnlock) {
+		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
+			vprint("vclean: lock not drained", vp);
+		FREE(vp->v_vnlock, M_VNODE);
+		vp->v_vnlock = NULL;
+	}
+
+	/*
+	 * Done with purge, notify sleepers of the grim news.
+	 */
+	vp->v_op = dead_vnodeop_p;
+	vp->v_tag = VT_NON;
+	vp->v_flag &= ~VXLOCK;
+	if (vp->v_flag & VXWANT) {
+		vp->v_flag &= ~VXWANT;
+		wakeup((caddr_t)vp);
+	}
+}
+
+/*
+ * Eliminate all activity associated with  the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+int
+vop_revoke(ap)
+	struct vop_revoke_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+	} */ *ap;
+{
+	struct vnode *vp, *vq;
+	struct proc *p = curproc;	/* XXX */
+
+#ifdef DIAGNOSTIC
+	if ((ap->a_flags & REVOKEALL) == 0)
+		panic("vop_revoke");
+#endif
+
+	vp = ap->a_vp;
+	simple_lock(&vp->v_interlock);
+
+	if (vp->v_flag & VALIASED) {
+		/*
+		 * If a vgone (or vclean) is already in progress,
+		 * wait until it is done and return.
+		 */
+		if (vp->v_flag & VXLOCK) {
+			vp->v_flag |= VXWANT;
+			simple_unlock(&vp->v_interlock);
+			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
+			return (0);
+		}
+		/*
+		 * Ensure that vp will not be vgone'd while we
+		 * are eliminating its aliases.
+		 */
+		vp->v_flag |= VXLOCK;
+		simple_unlock(&vp->v_interlock);
+		while (vp->v_flag & VALIASED) {
+			simple_lock(&spechash_slock);
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_rdev != vp->v_rdev ||
+				    vq->v_type != vp->v_type || vp == vq)
+					continue;
+				simple_unlock(&spechash_slock);
+				vgone(vq);
+				break;
+			}
+			if (vq == NULLVP)
+				simple_unlock(&spechash_slock);
+		}
+		/*
+		 * Remove the lock so that vgone below will
+		 * really eliminate the vnode after which time
+		 * vgone will awaken any sleepers.
+		 */
+		simple_lock(&vp->v_interlock);
+		vp->v_flag &= ~VXLOCK;
+	}
+	vgonel(vp, p);
+	return (0);
+}
+
+/*
+ * Recycle an unused vnode to the front of the free list.
+ * Release the passed interlock if the vnode will be recycled.
+ */
+int
+vrecycle(vp, inter_lkp, p)
+	struct vnode *vp;
+	struct simplelock *inter_lkp;
+	struct proc *p;
+{
+
+	simple_lock(&vp->v_interlock);
+	if (vp->v_usecount == 0) {
+		if (inter_lkp)
+			simple_unlock(inter_lkp);
+		vgonel(vp, p);
+		return (1);
+	}
+	simple_unlock(&vp->v_interlock);
+	return (0);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void
+vgone(vp)
+	struct vnode *vp;
+{
+	struct proc *p = curproc;	/* XXX */
+
+	simple_lock(&vp->v_interlock);
+	vgonel(vp, p);
+}
+
+/*
+ * vgone, with the vp interlock held.
+ */
+void
+vgonel(vp, p)
+	struct vnode *vp;
+	struct proc *p;
+{
+	struct vnode *vq;
+	struct vnode *vx;
+
+	/*
+	 * If a vgone (or vclean) is already in progress,
+	 * wait until it is done and return.
+	 */
+	if (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		simple_unlock(&vp->v_interlock);
+		tsleep((caddr_t)vp, PINOD, "vgone", 0);
+		return;
+	}
+	/*
+	 * Clean out the filesystem specific data.
+	 */
+	vclean(vp, DOCLOSE, p);
+	/*
+	 * Delete from old mount point vnode list, if on one.
+	 */
+	if (vp->v_mount != NULL)
+		insmntque(vp, (struct mount *)0);
+	/*
+	 * If special device, remove it from special device alias list
+	 * if it is on one.
+	 */
+	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
+		simple_lock(&spechash_slock);
+		if (*vp->v_hashchain == vp) {
+			*vp->v_hashchain = vp->v_specnext;
+		} else {
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_specnext != vp)
+					continue;
+				vq->v_specnext = vp->v_specnext;
+				break;
+			}
+			if (vq == NULL)
+				panic("missing bdev");
+		}
+		if (vp->v_flag & VALIASED) {
+			vx = NULL;
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_rdev != vp->v_rdev ||
+				    vq->v_type != vp->v_type)
+					continue;
+				if (vx)
+					break;
+				vx = vq;
+			}
+			if (vx == NULL)
+				panic("missing alias");
+			if (vq == NULL)
+				vx->v_flag &= ~VALIASED;
+			vp->v_flag &= ~VALIASED;
+		}
+		simple_unlock(&spechash_slock);
+		FREE(vp->v_specinfo, M_VNODE);
+		vp->v_specinfo = NULL;
+	}
+	/*
+	 * If it is on the freelist and not already at the head,
+	 * move it to the head of the list. The test of the back
+	 * pointer and the reference count of zero is because
+	 * it will be removed from the free list by getnewvnode,
+	 * but will not have its reference count incremented until
+	 * after calling vgone. If the reference count were
+	 * incremented first, vgone would (incorrectly) try to
+	 * close the previous instance of the underlying object.
+	 * So, the back pointer is explicitly set to `0xdeadb' in
+	 * getnewvnode after removing it from the freelist to ensure
+	 * that we do not try to move it here.
+	 */
+	if (vp->v_usecount == 0) {
+		simple_lock(&vnode_free_list_slock);
+		if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
+		    vnode_free_list.tqh_first != vp) {
+			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+		}
+		simple_unlock(&vnode_free_list_slock);
+	}
+	vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+int
+vfinddev(dev, type, vpp)
+	dev_t dev;
+	enum vtype type;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+	int rc = 0;
+
+	simple_lock(&spechash_slock);
+	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+		if (dev != vp->v_rdev || type != vp->v_type)
+			continue;
+		*vpp = vp;
+		rc = 1;
+		break;
+	}
+	simple_unlock(&spechash_slock);
+	return (rc);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+int
+vcount(vp)
+	struct vnode *vp;
+{
+	struct vnode *vq, *vnext;
+	int count;
+
+loop:
+	if ((vp->v_flag & VALIASED) == 0)
+		return (vp->v_usecount);
+	simple_lock(&spechash_slock);
+	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+		vnext = vq->v_specnext;
+		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+			continue;
+		/*
+		 * Alias, but not in use, so flush it out.
+		 */
+		if (vq->v_usecount == 0 && vq != vp) {
+			simple_unlock(&spechash_slock);
+			vgone(vq);
+			goto loop;
+		}
+		count += vq->v_usecount;
+	}
+	simple_unlock(&spechash_slock);
+	return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+void
+vprint(label, vp)
+	char *label;
+	register struct vnode *vp;
+{
+	char buf[64];
+
+	if (label != NULL)
+		printf("%s: ", label);
+	printf("type %s, usecount %d, writecount %d, refcount %d,",
+		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+		vp->v_holdcnt);
+	buf[0] = '\0';
+	if (vp->v_flag & VROOT)
+		strcat(buf, "|VROOT");
+	if (vp->v_flag & VTEXT)
+		strcat(buf, "|VTEXT");
+	if (vp->v_flag & VSYSTEM)
+		strcat(buf, "|VSYSTEM");
+	if (vp->v_flag & VXLOCK)
+		strcat(buf, "|VXLOCK");
+	if (vp->v_flag & VXWANT)
+		strcat(buf, "|VXWANT");
+	if (vp->v_flag & VBWAIT)
+		strcat(buf, "|VBWAIT");
+	if (vp->v_flag & VALIASED)
+		strcat(buf, "|VALIASED");
+	if (buf[0] != '\0')
+		printf(" flags (%s)", &buf[1]);
+	if (vp->v_data == NULL) {
+		printf("\n");
+	} else {
+		printf("\n\t");
+		VOP_PRINT(vp);
+	}
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+void
+printlockedvnodes()
+{
+	struct proc *p = curproc;	/* XXX */
+	struct mount *mp, *nmp;
+	struct vnode *vp;
+
+	printf("Locked vnodes\n");
+	simple_lock(&mountlist_slock);
+	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
+		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+			nmp = mp->mnt_list.cqe_next;
+			continue;
+		}
+		for (vp = mp->mnt_vnodelist.lh_first;
+		     vp != NULL;
+		     vp = vp->v_mntvnodes.le_next) {
+			if (VOP_ISLOCKED(vp))
+				vprint((char *)0, vp);
+		}
+		simple_lock(&mountlist_slock);
+		nmp = mp->mnt_list.cqe_next;
+		vfs_unbusy(mp, p);
+	}
+	simple_unlock(&mountlist_slock);
+}
+#endif
+
+/*
+ * Top level filesystem related information gathering.
+ */
+int
+vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	struct ctldebug *cdp;
+	struct vfsconf *vfsp;
+
+	/* all sysctl names at this level are at least name and field */
+	if (namelen < 2)
+		return (ENOTDIR);		/* overloaded */
+	if (name[0] != VFS_GENERIC) {
+		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+			if (vfsp->vfc_typenum == name[0])
+				break;
+		if (vfsp == NULL)
+			return (EOPNOTSUPP);
+		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
+		    oldp, oldlenp, newp, newlen, p));
+	}
+	switch (name[1]) {
+	case VFS_MAXTYPENUM:
+		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
+	case VFS_CONF:
+		if (namelen < 3)
+			return (ENOTDIR);	/* overloaded */
+		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+			if (vfsp->vfc_typenum == name[2])
+				break;
+		if (vfsp == NULL)
+			return (EOPNOTSUPP);
+		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
+		    sizeof(struct vfsconf)));
+	}
+	return (EOPNOTSUPP);
+}
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP	10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+int
+sysctl_vnode(where, sizep, p)
+	char *where;
+	size_t *sizep;
+	struct proc *p;
+{
+	struct mount *mp, *nmp;
+	struct vnode *nvp, *vp;
+	char *bp = where, *savebp;
+	char *ewhere;
+	int error;
+
+#define VPTRSZ	sizeof (struct vnode *)
+#define VNODESZ	sizeof (struct vnode)
+	if (where == NULL) {
+		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+		return (0);
+	}
+	ewhere = where + *sizep;
+		
+	simple_lock(&mountlist_slock);
+	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
+		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+			nmp = mp->mnt_list.cqe_next;
+			continue;
+		}
+		savebp = bp;
+again:
+		simple_lock(&mntvnode_slock);
+		for (vp = mp->mnt_vnodelist.lh_first;
+		     vp != NULL;
+		     vp = nvp) {
+			/*
+			 * Check that the vp is still associated with
+			 * this filesystem.  RACE: could have been
+			 * recycled onto the same filesystem.
+			 */
+			if (vp->v_mount != mp) {
+				simple_unlock(&mntvnode_slock);
+				if (kinfo_vdebug)
+					printf("kinfo: vp changed\n");
+				bp = savebp;
+				goto again;
+			}
+			nvp = vp->v_mntvnodes.le_next;
+			if (bp + VPTRSZ + VNODESZ > ewhere) {
+				simple_unlock(&mntvnode_slock);
+				*sizep = bp - where;
+				return (ENOMEM);
+			}
+			simple_unlock(&mntvnode_slock);
+			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+				return (error);
+			bp += VPTRSZ + VNODESZ;
+			simple_lock(&mntvnode_slock);
+		}
+		simple_unlock(&mntvnode_slock);
+		simple_lock(&mountlist_slock);
+		nmp = mp->mnt_list.cqe_next;
+		vfs_unbusy(mp, p);
+	}
+	simple_unlock(&mountlist_slock);
+
+	*sizep = bp - where;
+	return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+	struct vnode *vp;
+{
+	struct vnode *vq;
+	int error = 0;
+
+	if (vp->v_specflags & SI_MOUNTEDON)
+		return (EBUSY);
+	if (vp->v_flag & VALIASED) {
+		simple_lock(&spechash_slock);
+		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+			if (vq->v_rdev != vp->v_rdev ||
+			    vq->v_type != vp->v_type)
+				continue;
+			if (vq->v_specflags & SI_MOUNTEDON) {
+				error = EBUSY;
+				break;
+			}
+		}
+		simple_unlock(&spechash_slock);
+	}
+	return (error);
+}
+
+/*
+ * Unmount all filesystems. The list is traversed in reverse order
+ * of mounting to avoid dependencies.
+ */
+void
+vfs_unmountall()
+{
+	struct mount *mp, *nmp;
+	struct proc *p = curproc;	/* XXX */
+
+	/*
+	 * Since this only runs when rebooting, it is not interlocked.
+	 */
+	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
+		nmp = mp->mnt_list.cqe_prev;
+		(void) dounmount(mp, MNT_FORCE, p);
+	}
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+	struct mount *mp;
+	struct netexport *nep;
+	struct export_args *argp;
+{
+	register struct netcred *np;
+	register struct radix_node_head *rnh;
+	register int i;
+	struct radix_node *rn;
+	struct sockaddr *saddr, *smask = 0;
+	struct domain *dom;
+	int error;
+
+	if (argp->ex_addrlen == 0) {
+		if (mp->mnt_flag & MNT_DEFEXPORTED)
+			return (EPERM);
+		np = &nep->ne_defexported;
+		np->netc_exflags = argp->ex_flags;
+		np->netc_anon = argp->ex_anon;
+		np->netc_anon.cr_ref = 1;
+		mp->mnt_flag |= MNT_DEFEXPORTED;
+		return (0);
+	}
+	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+	bzero((caddr_t)np, i);
+	saddr = (struct sockaddr *)(np + 1);
+	if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+		goto out;
+	if (saddr->sa_len > argp->ex_addrlen)
+		saddr->sa_len = argp->ex_addrlen;
+	if (argp->ex_masklen) {
+		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+		if (error)
+			goto out;
+		if (smask->sa_len > argp->ex_masklen)
+			smask->sa_len = argp->ex_masklen;
+	}
+	i = saddr->sa_family;
+	if ((rnh = nep->ne_rtable[i]) == 0) {
+		/*
+		 * Seems silly to initialize every AF when most are not
+		 * used, do so on demand here
+		 */
+		for (dom = domains; dom; dom = dom->dom_next)
+			if (dom->dom_family == i && dom->dom_rtattach) {
+				dom->dom_rtattach((void **)&nep->ne_rtable[i],
+					dom->dom_rtoffset);
+				break;
+			}
+		if ((rnh = nep->ne_rtable[i]) == 0) {
+			error = ENOBUFS;
+			goto out;
+		}
+	}
+	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+		np->netc_rnodes);
+	if (rn == 0) {
+		/*
+		 * One of the reasons that rnh_addaddr may fail is that
+		 * the entry already exists. To check for this case, we
+		 * look up the entry to see if it is there. If so, we
+		 * do not need to make a new entry but do return success.
+		 */
+		free(np, M_NETADDR);
+		rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
+		if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
+		    ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
+		    !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
+			    (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
+			return (0);
+		return (EPERM);
+	}
+	np->netc_exflags = argp->ex_flags;
+	np->netc_anon = argp->ex_anon;
+	np->netc_anon.cr_ref = 1;
+	return (0);
+out:
+	free(np, M_NETADDR);
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+	struct radix_node *rn;
+	caddr_t w;
+{
+	register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+	free((caddr_t)rn, M_NETADDR);
+	return (0);
+}
+
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+	struct netexport *nep;
+{
+	register int i;
+	register struct radix_node_head *rnh;
+
+	for (i = 0; i <= AF_MAX; i++)
+		if (rnh = nep->ne_rtable[i]) {
+			(*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+			    (caddr_t)rnh);
+			free((caddr_t)rnh, M_RTABLE);
+			nep->ne_rtable[i] = 0;
+		}
+}
+
+int
+vfs_export(mp, nep, argp)
+	struct mount *mp;
+	struct netexport *nep;
+	struct export_args *argp;
+{
+	int error;
+
+	if (argp->ex_flags & MNT_DELEXPORT) {
+		vfs_free_addrlist(nep);
+		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+	}
+	if (argp->ex_flags & MNT_EXPORTED) {
+		if (error = vfs_hang_addrlist(mp, nep, argp))
+			return (error);
+		mp->mnt_flag |= MNT_EXPORTED;
+	}
+	return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+	register struct mount *mp;
+	struct netexport *nep;
+	struct mbuf *nam;
+{
+	register struct netcred *np;
+	register struct radix_node_head *rnh;
+	struct sockaddr *saddr;
+
+	np = NULL;
+	if (mp->mnt_flag & MNT_EXPORTED) {
+		/*
+		 * Lookup in the export list first.
+		 */
+		if (nam != NULL) {
+			saddr = mtod(nam, struct sockaddr *);
+			rnh = nep->ne_rtable[saddr->sa_family];
+			if (rnh != NULL) {
+				np = (struct netcred *)
+					(*rnh->rnh_matchaddr)((caddr_t)saddr,
+							      rnh);
+				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+					np = NULL;
+			}
+		}
+		/*
+		 * If no address match, use the default if it exists.
+		 */
+		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+			np = &nep->ne_defexported;
+	}
+	return (np);
+}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
new file mode 100644
index 000000000000..0cf7680ec9d4
--- /dev/null
+++ b/sys/kern/vfs_syscalls.c
@@ -0,0 +1,2417 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_syscalls.c	8.41 (Berkeley) 6/15/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <sys/syscallargs.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+static void checkdirs __P((struct vnode *olddp));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+/* ARGSUSED */
+int
+mount(p, uap, retval)
+	struct proc *p;
+	register struct mount_args /* {
+		syscallarg(char *) type;
+		syscallarg(char *) path;
+		syscallarg(int) flags;
+		syscallarg(caddr_t) data;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vnode *vp;
+	struct mount *mp;
+	struct vfsconf *vfsp;
+	int error, flag;
+	struct vattr va;
+	u_long fstypenum;
+	struct nameidata nd;
+	char fstypename[MFSNAMELEN];
+
+	/*
+	 * Get vnode to be covered
+	 */
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (SCARG(uap, flags) & MNT_UPDATE) {
+		if ((vp->v_flag & VROOT) == 0) {
+			vput(vp);
+			return (EINVAL);
+		}
+		mp = vp->v_mount;
+		flag = mp->mnt_flag;
+		/*
+		 * We only allow the filesystem to be reloaded if it
+		 * is currently mounted read-only.
+		 */
+		if ((SCARG(uap, flags) & MNT_RELOAD) &&
+		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+			vput(vp);
+			return (EOPNOTSUPP);	/* Needs translation */
+		}
+		mp->mnt_flag |=
+		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		/*
+		 * Only root, or the user that did the original mount is
+		 * permitted to update it.
+		 */
+		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
+		    (error = suser(p->p_ucred, &p->p_acflag))) {
+			vput(vp);
+			return (error);
+		}
+		/*
+		 * Do not allow NFS export by non-root users. Silently
+		 * enforce MNT_NOSUID and MNT_NODEV for non-root users.
+		 */
+		if (p->p_ucred->cr_uid != 0) {
+			if (SCARG(uap, flags) & MNT_EXPORTED) {
+				vput(vp);
+				return (EPERM);
+			}
+			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
+		}
+		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
+			vput(vp);
+			return (EBUSY);
+		}
+		VOP_UNLOCK(vp, 0, p);
+		goto update;
+	}
+	/*
+	 * If the user is not root, ensure that they own the directory
+	 * onto which we are attempting to mount.
+	 */
+	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
+	    (va.va_uid != p->p_ucred->cr_uid &&
+	     (error = suser(p->p_ucred, &p->p_acflag)))) {
+		vput(vp);
+		return (error);
+	}
+	/*
+	 * Do not allow NFS export by non-root users. Silently
+	 * enforce MNT_NOSUID and MNT_NODEV for non-root users.
+	 */
+	if (p->p_ucred->cr_uid != 0) {
+		if (SCARG(uap, flags) & MNT_EXPORTED) {
+			vput(vp);
+			return (EPERM);
+		}
+		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
+	}
+	if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+	if (vp->v_type != VDIR) {
+		vput(vp);
+		return (ENOTDIR);
+	}
+#ifdef COMPAT_43
+	/*
+	 * Historically filesystem types were identified by number. If we
+	 * get an integer for the filesystem type instead of a string, we
+	 * check to see if it matches one of the historic filesystem types.
+	 */
+	fstypenum = (u_long)SCARG(uap, type);
+	if (fstypenum < maxvfsconf) {
+		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+			if (vfsp->vfc_typenum == fstypenum)
+				break;
+		if (vfsp == NULL) {
+			vput(vp);
+			return (ENODEV);
+		}
+		strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
+	} else
+#endif /* COMPAT_43 */
+	if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) {
+		vput(vp);
+		return (error);
+	}
+	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
+		if (!strcmp(vfsp->vfc_name, fstypename))
+			break;
+	if (vfsp == NULL) {
+		vput(vp);
+		return (ENODEV);
+	}
+	if (vp->v_mountedhere != NULL) {
+		vput(vp);
+		return (EBUSY);
+	}
+
+	/*
+	 * Allocate and initialize the filesystem.
+	 */
+	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+		M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
+	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
+	mp->mnt_op = vfsp->vfc_vfsops;
+	mp->mnt_vfc = vfsp;
+	vfsp->vfc_refcount++;
+	mp->mnt_stat.f_type = vfsp->vfc_typenum;
+	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
+	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
+	vp->v_mountedhere = mp;
+	mp->mnt_vnodecovered = vp;
+	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
+update:
+	/*
+	 * Set the mount level flags.
+	 */
+	if (SCARG(uap, flags) & MNT_RDONLY)
+		mp->mnt_flag |= MNT_RDONLY;
+	else if (mp->mnt_flag & MNT_RDONLY)
+		mp->mnt_flag |= MNT_WANTRDWR;
+	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
+	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+	/*
+	 * Mount the filesystem.
+	 */
+	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
+	if (mp->mnt_flag & MNT_UPDATE) {
+		vrele(vp);
+		if (mp->mnt_flag & MNT_WANTRDWR)
+			mp->mnt_flag &= ~MNT_RDONLY;
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+		if (error)
+			mp->mnt_flag = flag;
+		vfs_unbusy(mp, p);
+		return (error);
+	}
+	/*
+	 * Put the new filesystem on the mount list after root.
+	 */
+	cache_purge(vp);
+	if (!error) {
+		simple_lock(&mountlist_slock);
+		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+		simple_unlock(&mountlist_slock);
+		checkdirs(vp);
+		VOP_UNLOCK(vp, 0, p);
+		vfs_unbusy(mp, p);
+		if (error = VFS_START(mp, 0, p))
+			vrele(vp);
+	} else {
+		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+		mp->mnt_vfc->vfc_refcount--;
+		vfs_unbusy(mp, p);
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+	}
+	return (error);
+}
+
+/*
+ * Scan all active processes to see if any of them have a current
+ * or root directory onto which the new filesystem has just been
+ * mounted. If so, replace them with the new mount point.
+ */
+static void
+checkdirs(olddp)
+	struct vnode *olddp;
+{
+	struct filedesc *fdp;
+	struct vnode *newdp;
+	struct proc *p;
+
+	if (olddp->v_usecount == 1)
+		return;
+	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
+		panic("mount: lost mount");
+	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+		fdp = p->p_fd;
+		if (fdp->fd_cdir == olddp) {
+			vrele(fdp->fd_cdir);
+			VREF(newdp);
+			fdp->fd_cdir = newdp;
+		}
+		if (fdp->fd_rdir == olddp) {
+			vrele(fdp->fd_rdir);
+			VREF(newdp);
+			fdp->fd_rdir = newdp;
+		}
+	}
+	if (rootvnode == olddp) {
+		vrele(rootvnode);
+		VREF(newdp);
+		rootvnode = newdp;
+	}
+	vput(newdp);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+/* ARGSUSED */
+int
+unmount(p, uap, retval)
+	struct proc *p;
+	register struct unmount_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct mount *mp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	mp = vp->v_mount;
+
+	/*
+	 * Only root, or the user that did the original mount is
+	 * permitted to unmount this filesystem.
+	 */
+	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
+	    (error = suser(p->p_ucred, &p->p_acflag))) {
+		vput(vp);
+		return (error);
+	}
+
+	/*
+	 * Don't allow unmounting the root file system.
+	 */
+	if (mp->mnt_flag & MNT_ROOTFS) {
+		vput(vp);
+		return (EINVAL);
+	}
+
+	/*
+	 * Must be the root of the filesystem
+	 */
+	if ((vp->v_flag & VROOT) == 0) {
+		vput(vp);
+		return (EINVAL);
+	}
+	vput(vp);
+	return (dounmount(mp, SCARG(uap, flags), p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+int
+dounmount(mp, flags, p)
+	register struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	struct vnode *coveredvp;
+	int error;
+
+	simple_lock(&mountlist_slock);
+	mp->mnt_flag |= MNT_UNMOUNT;
+	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+	mp->mnt_flag &=~ MNT_ASYNC;
+	vnode_pager_umount(mp);	/* release cached vnodes */
+	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if (((mp->mnt_flag & MNT_RDONLY) ||
+	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
+	    (flags & MNT_FORCE))
+		error = VFS_UNMOUNT(mp, flags, p);
+	simple_lock(&mountlist_slock);
+	if (error) {
+		mp->mnt_flag &= ~MNT_UNMOUNT;
+		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
+		    &mountlist_slock, p);
+		return (error);
+	}
+	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
+	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
+		coveredvp->v_mountedhere = (struct mount *)0;
+		vrele(coveredvp);
+	}
+	mp->mnt_vfc->vfc_refcount--;
+	if (mp->mnt_vnodelist.lh_first != NULL)
+		panic("unmount: dangling vnode");
+	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
+	if (mp->mnt_flag & MNT_MWAIT)
+		wakeup((caddr_t)mp);
+	free((caddr_t)mp, M_MOUNT);
+	return (0);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DEBUG
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+/* ARGSUSED */
+int
+sync(p, uap, retval)
+	struct proc *p;
+	void *uap;
+	register_t *retval;
+{
+	register struct mount *mp, *nmp;
+	int asyncflag;
+
+	simple_lock(&mountlist_slock);
+	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
+		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+			nmp = mp->mnt_list.cqe_next;
+			continue;
+		}
+		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			asyncflag = mp->mnt_flag & MNT_ASYNC;
+			mp->mnt_flag &= ~MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+			if (asyncflag)
+				mp->mnt_flag |= MNT_ASYNC;
+		}
+		simple_lock(&mountlist_slock);
+		nmp = mp->mnt_list.cqe_next;
+		vfs_unbusy(mp, p);
+	}
+	simple_unlock(&mountlist_slock);
+#ifdef DIAGNOSTIC
+	if (syncprt)
+		vfs_bufstats();
+#endif /* DIAGNOSTIC */
+	return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+/* ARGSUSED */
+int
+quotactl(p, uap, retval)
+	struct proc *p;
+	register struct quotactl_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) cmd;
+		syscallarg(int) uid;
+		syscallarg(caddr_t) arg;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct mount *mp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	vrele(nd.ni_vp);
+	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+	    SCARG(uap, arg), p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+/* ARGSUSED */
+int
+statfs(p, uap, retval)
+	struct proc *p;
+	register struct statfs_args /* {
+		syscallarg(char *) path;
+		syscallarg(struct statfs *) buf;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct mount *mp;
+	register struct statfs *sp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	sp = &mp->mnt_stat;
+	vrele(nd.ni_vp);
+	if (error = VFS_STATFS(mp, sp, p))
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+/* ARGSUSED */
+int
+fstatfs(p, uap, retval)
+	struct proc *p;
+	register struct fstatfs_args /* {
+		syscallarg(int) fd;
+		syscallarg(struct statfs *) buf;
+	} */ *uap;
+	register_t *retval;
+{
+	struct file *fp;
+	struct mount *mp;
+	register struct statfs *sp;
+	int error;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	mp = ((struct vnode *)fp->f_data)->v_mount;
+	sp = &mp->mnt_stat;
+	if (error = VFS_STATFS(mp, sp, p))
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+int
+getfsstat(p, uap, retval)
+	struct proc *p;
+	register struct getfsstat_args /* {
+		syscallarg(struct statfs *) buf;
+		syscallarg(long) bufsize;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct mount *mp, *nmp;
+	register struct statfs *sp;
+	caddr_t sfsp;
+	long count, maxcount, error;
+
+	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
+	sfsp = (caddr_t)SCARG(uap, buf);
+	count = 0;
+	simple_lock(&mountlist_slock);
+	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
+		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
+			nmp = mp->mnt_list.cqe_next;
+			continue;
+		}
+		if (sfsp && count < maxcount) {
+			sp = &mp->mnt_stat;
+			/*
+			 * If MNT_NOWAIT is specified, do not refresh the
+			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 */
+			if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 ||
+			    (SCARG(uap, flags) & MNT_WAIT)) &&
+			    (error = VFS_STATFS(mp, sp, p))) {
+				simple_lock(&mountlist_slock);
+				nmp = mp->mnt_list.cqe_next;
+				vfs_unbusy(mp, p);
+				continue;
+			}
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+				return (error);
+			sfsp += sizeof(*sp);
+		}
+		count++;
+		simple_lock(&mountlist_slock);
+		nmp = mp->mnt_list.cqe_next;
+		vfs_unbusy(mp, p);
+	}
+	simple_unlock(&mountlist_slock);
+	if (sfsp && count > maxcount)
+		*retval = maxcount;
+	else
+		*retval = count;
+	return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+/* ARGSUSED */
+int
+fchdir(p, uap, retval)
+	struct proc *p;
+	struct fchdir_args /* {
+		syscallarg(int) fd;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct vnode *vp, *tdp;
+	struct mount *mp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(fdp, SCARG(uap, fd), &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VREF(vp);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (vp->v_type != VDIR)
+		error = ENOTDIR;
+	else
+		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+	while (!error && (mp = vp->v_mountedhere) != NULL) {
+		if (vfs_busy(mp, 0, 0, p))
+			continue;
+		error = VFS_ROOT(mp, &tdp);
+		vfs_unbusy(mp, p);
+		if (error)
+			break;
+		vput(vp);
+		vp = tdp;
+	}
+	if (error) {
+		vput(vp);
+		return (error);
+	}
+	VOP_UNLOCK(vp, 0, p);
+	vrele(fdp->fd_cdir);
+	fdp->fd_cdir = vp;
+	return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+/* ARGSUSED */
+int
+chdir(p, uap, retval)
+	struct proc *p;
+	struct chdir_args /* {
+		syscallarg(char *) path;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = change_dir(&nd, p))
+		return (error);
+	vrele(fdp->fd_cdir);
+	fdp->fd_cdir = nd.ni_vp;
+	return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+/* ARGSUSED */
+int
+chroot(p, uap, retval)
+	struct proc *p;
+	struct chroot_args /* {
+		syscallarg(char *) path;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	int error;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = change_dir(&nd, p))
+		return (error);
+	if (fdp->fd_rdir != NULL)
+		vrele(fdp->fd_rdir);
+	fdp->fd_rdir = nd.ni_vp;
+	return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+	register struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *vp;
+	int error;
+
+	if (error = namei(ndp))
+		return (error);
+	vp = ndp->ni_vp;
+	if (vp->v_type != VDIR)
+		error = ENOTDIR;
+	else
+		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+	if (error)
+		vput(vp);
+	else
+		VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+int
+open(p, uap, retval)
+	struct proc *p;
+	register struct open_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) flags;
+		syscallarg(int) mode;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register struct vnode *vp;
+	int flags, cmode;
+	struct file *nfp;
+	int type, indx, error;
+	struct flock lf;
+	struct nameidata nd;
+	extern struct fileops vnops;
+
+	if (error = falloc(p, &nfp, &indx))
+		return (error);
+	fp = nfp;
+	flags = FFLAGS(SCARG(uap, flags));
+	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
+	if (error = vn_open(&nd, flags, cmode)) {
+		ffree(fp);
+		if ((error == ENODEV || error == ENXIO) &&
+		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
+		    (error =
+			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+			*retval = indx;
+			return (0);
+		}
+		if (error == ERESTART)
+			error = EINTR;
+		fdp->fd_ofiles[indx] = NULL;
+		return (error);
+	}
+	p->p_dupfd = 0;
+	vp = nd.ni_vp;
+	fp->f_flag = flags & FMASK;
+	fp->f_type = DTYPE_VNODE;
+	fp->f_ops = &vnops;
+	fp->f_data = (caddr_t)vp;
+	if (flags & (O_EXLOCK | O_SHLOCK)) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		if (flags & O_EXLOCK)
+			lf.l_type = F_WRLCK;
+		else
+			lf.l_type = F_RDLCK;
+		type = F_FLOCK;
+		if ((flags & FNONBLOCK) == 0)
+			type |= F_WAIT;
+		VOP_UNLOCK(vp, 0, p);
+		if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+			ffree(fp);
+			fdp->fd_ofiles[indx] = NULL;
+			return (error);
+		}
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		fp->f_flag |= FHASLOCK;
+	}
+	VOP_UNLOCK(vp, 0, p);
+	*retval = indx;
+	return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+int
+compat_43_creat(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_creat_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) mode;
+	} */ *uap;
+	register_t *retval;
+{
+	struct open_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) flags;
+		syscallarg(int) mode;
+	} */ nuap;
+
+	SCARG(&nuap, path) = SCARG(uap, path);
+	SCARG(&nuap, mode) = SCARG(uap, mode);
+	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
+	return (open(p, &nuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+/* ARGSUSED */
+int
+mknod(p, uap, retval)
+	struct proc *p;
+	register struct mknod_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) mode;
+		syscallarg(int) dev;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	int whiteout;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL)
+		error = EEXIST;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
+		vattr.va_rdev = SCARG(uap, dev);
+		whiteout = 0;
+
+		switch (SCARG(uap, mode) & S_IFMT) {
+		case S_IFMT:	/* used by badsect to flag bad sectors */
+			vattr.va_type = VBAD;
+			break;
+		case S_IFCHR:
+			vattr.va_type = VCHR;
+			break;
+		case S_IFBLK:
+			vattr.va_type = VBLK;
+			break;
+		case S_IFWHT:
+			whiteout = 1;
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
+	}
+	if (!error) {
+		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		if (whiteout) {
+			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
+			if (error)
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+			vput(nd.ni_dvp);
+		} else {
+			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
+						&nd.ni_cnd, &vattr);
+		}
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (vp)
+			vrele(vp);
+	}
+	return (error);
+}
+
+/*
+ * Create a named pipe.
+ */
+/* ARGSUSED */
+int
+mkfifo(p, uap, retval)
+	struct proc *p;
+	register struct mkfifo_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) mode;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+#ifndef FIFO
+	return (EOPNOTSUPP);
+#else
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	if (nd.ni_vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		return (EEXIST);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VFIFO;
+	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+/* ARGSUSED */
+int
+link(p, uap, retval)
+	struct proc *p;
+	register struct link_args /* {
+		syscallarg(char *) path;
+		syscallarg(char *) link;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct nameidata nd;
+	int error;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR ||
+	    (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+		nd.ni_cnd.cn_nameiop = CREATE;
+		nd.ni_cnd.cn_flags = LOCKPARENT;
+		nd.ni_dirp = SCARG(uap, link);
+		if ((error = namei(&nd)) == 0) {
+			if (nd.ni_vp != NULL)
+				error = EEXIST;
+			if (!error) {
+				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
+				    LEASE_WRITE);
+				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+				error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd);
+			} else {
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				if (nd.ni_dvp == nd.ni_vp)
+					vrele(nd.ni_dvp);
+				else
+					vput(nd.ni_dvp);
+				if (nd.ni_vp)
+					vrele(nd.ni_vp);
+			}
+		}
+	}
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+/* ARGSUSED */
+int
+symlink(p, uap, retval)
+	struct proc *p;
+	register struct symlink_args /* {
+		syscallarg(char *) path;
+		syscallarg(char *) link;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vattr vattr;
+	char *path;
+	int error;
+	struct nameidata nd;
+
+	MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL))
+		goto out;
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p);
+	if (error = namei(&nd))
+		goto out;
+	if (nd.ni_vp) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		error = EEXIST;
+		goto out;
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+	FREE(path, M_NAMEI);
+	return (error);
+}
+
+/*
+ * Delete a whiteout from the filesystem.
+ */
+/* ARGSUSED */
+int
+undelete(p, uap, retval)
+	struct proc *p;
+	register struct undelete_args /* {
+		syscallarg(char *) path;
+	} */ *uap;
+	register_t *retval;
+{
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	error = namei(&nd);
+	if (error)
+		return (error);
+
+	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (nd.ni_vp)
+			vrele(nd.ni_vp);
+		return (EEXIST);
+	}
+
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE))
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	vput(nd.ni_dvp);
+	return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+/* ARGSUSED */
+int
+unlink(p, uap, retval)
+	struct proc *p;
+	struct unlink_args /* {
+		syscallarg(char *) path;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+
+	if (vp->v_type != VDIR ||
+	    (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+		/*
+		 * The root of a mounted filesystem cannot be deleted.
+		 */
+		if (vp->v_flag & VROOT)
+			error = EBUSY;
+		else
+			(void)vnode_pager_uncache(vp);
+	}
+
+	if (!error) {
+		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (vp != NULLVP)
+			vput(vp);
+	}
+	return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+int
+lseek(p, uap, retval)
+	struct proc *p;
+	register struct lseek_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) pad;
+		syscallarg(off_t) offset;
+		syscallarg(int) whence;
+	} */ *uap;
+	register_t *retval;
+{
+	struct ucred *cred = p->p_ucred;
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vattr vattr;
+	int error;
+
+	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (ESPIPE);
+	switch (SCARG(uap, whence)) {
+	case L_INCR:
+		fp->f_offset += SCARG(uap, offset);
+		break;
+	case L_XTND:
+		if (error =
+		    VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+			return (error);
+		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
+		break;
+	case L_SET:
+		fp->f_offset = SCARG(uap, offset);
+		break;
+	default:
+		return (EINVAL);
+	}
+	*(off_t *)retval = fp->f_offset;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+int
+compat_43_lseek(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_lseek_args /* {
+		syscallarg(int) fd;
+		syscallarg(long) offset;
+		syscallarg(int) whence;
+	} */ *uap;
+	register_t *retval;
+{
+	struct lseek_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) pad;
+		syscallarg(off_t) offset;
+		syscallarg(int) whence;
+	} */ nuap;
+	off_t qret;
+	int error;
+
+	SCARG(&nuap, fd) = SCARG(uap, fd);
+	SCARG(&nuap, offset) = SCARG(uap, offset);
+	SCARG(&nuap, whence) = SCARG(uap, whence);
+	error = lseek(p, &nuap, &qret);
+	*(long *)retval = qret;
+	return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+int
+access(p, uap, retval)
+	struct proc *p;
+	register struct access_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct ucred *cred = p->p_ucred;
+	register struct vnode *vp;
+	int error, flags, t_gid, t_uid;
+	struct nameidata nd;
+
+	t_uid = cred->cr_uid;
+	t_gid = cred->cr_groups[0];
+	cred->cr_uid = p->p_cred->p_ruid;
+	cred->cr_groups[0] = p->p_cred->p_rgid;
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		goto out1;
+	vp = nd.ni_vp;
+
+	/* Flags == 0 means only check for existence. */
+	if (SCARG(uap, flags)) {
+		flags = 0;
+		if (SCARG(uap, flags) & R_OK)
+			flags |= VREAD;
+		if (SCARG(uap, flags) & W_OK)
+			flags |= VWRITE;
+		if (SCARG(uap, flags) & X_OK)
+			flags |= VEXEC;
+		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+			error = VOP_ACCESS(vp, flags, cred, p);
+	}
+	vput(vp);
+out1:
+	cred->cr_uid = t_uid;
+	cred->cr_groups[0] = t_gid;
+	return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+/* ARGSUSED */
+int
+compat_43_stat(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_stat_args /* {
+		syscallarg(char *) path;
+		syscallarg(struct ostat *) ub;
+	} */ *uap;
+	register_t *retval;
+{
+	struct stat sb;
+	struct ostat osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	cvtstat(&sb, &osb);
+	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
+	return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+/* ARGSUSED */
+int
+compat_43_lstat(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_lstat_args /* {
+		syscallarg(char *) path;
+		syscallarg(struct ostat *) ub;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vnode *vp, *dvp;
+	struct stat sb, sb1;
+	struct ostat osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	/*
+	 * For symbolic links, always return the attributes of its
+	 * containing directory, except for mode, size, and links.
+	 */
+	vp = nd.ni_vp;
+	dvp = nd.ni_dvp;
+	if (vp->v_type != VLNK) {
+		if (dvp == vp)
+			vrele(dvp);
+		else
+			vput(dvp);
+		error = vn_stat(vp, &sb, p);
+		vput(vp);
+		if (error)
+			return (error);
+	} else {
+		error = vn_stat(dvp, &sb, p);
+		vput(dvp);
+		if (error) {
+			vput(vp);
+			return (error);
+		}
+		error = vn_stat(vp, &sb1, p);
+		vput(vp);
+		if (error)
+			return (error);
+		sb.st_mode &= ~S_IFDIR;
+		sb.st_mode |= S_IFLNK;
+		sb.st_nlink = sb1.st_nlink;
+		sb.st_size = sb1.st_size;
+		sb.st_blocks = sb1.st_blocks;
+	}
+	cvtstat(&sb, &osb);
+	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
+	return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+void
+cvtstat(st, ost)
+	struct stat *st;
+	struct ostat *ost;
+{
+
+	ost->st_dev = st->st_dev;
+	ost->st_ino = st->st_ino;
+	ost->st_mode = st->st_mode;
+	ost->st_nlink = st->st_nlink;
+	ost->st_uid = st->st_uid;
+	ost->st_gid = st->st_gid;
+	ost->st_rdev = st->st_rdev;
+	if (st->st_size < (quad_t)1 << 32)
+		ost->st_size = st->st_size;
+	else
+		ost->st_size = -2;
+	ost->st_atime = st->st_atime;
+	ost->st_mtime = st->st_mtime;
+	ost->st_ctime = st->st_ctime;
+	ost->st_blksize = st->st_blksize;
+	ost->st_blocks = st->st_blocks;
+	ost->st_flags = st->st_flags;
+	ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+/* ARGSUSED */
+int
+stat(p, uap, retval)
+	struct proc *p;
+	register struct stat_args /* {
+		syscallarg(char *) path;
+		syscallarg(struct stat *) ub;
+	} */ *uap;
+	register_t *retval;
+{
+	struct stat sb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
+	return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+/* ARGSUSED */
+int
+lstat(p, uap, retval)
+	struct proc *p;
+	register struct lstat_args /* {
+		syscallarg(char *) path;
+		syscallarg(struct stat *) ub;
+	} */ *uap;
+	register_t *retval;
+{
+	int error;
+	struct vnode *vp, *dvp;
+	struct stat sb, sb1;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	/*
+	 * For symbolic links, always return the attributes of its containing
+	 * directory, except for mode, size, inode number, and links.
+	 */
+	vp = nd.ni_vp;
+	dvp = nd.ni_dvp;
+	if (vp->v_type != VLNK) {
+		if (dvp == vp)
+			vrele(dvp);
+		else
+			vput(dvp);
+		error = vn_stat(vp, &sb, p);
+		vput(vp);
+		if (error)
+			return (error);
+	} else {
+		error = vn_stat(dvp, &sb, p);
+		vput(dvp);
+		if (error) {
+			vput(vp);
+			return (error);
+		}
+		error = vn_stat(vp, &sb1, p);
+		vput(vp);
+		if (error)
+			return (error);
+		sb.st_mode &= ~S_IFDIR;
+		sb.st_mode |= S_IFLNK;
+		sb.st_nlink = sb1.st_nlink;
+		sb.st_size = sb1.st_size;
+		sb.st_blocks = sb1.st_blocks;
+		sb.st_ino = sb1.st_ino;
+	}
+	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
+	return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+/* ARGSUSED */
+int
+pathconf(p, uap, retval)
+	struct proc *p;
+	register struct pathconf_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) name;
+	} */ *uap;
+	register_t *retval;
+{
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
+	vput(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+/* ARGSUSED */
+int
+readlink(p, uap, retval)
+	struct proc *p;
+	register struct readlink_args /* {
+		syscallarg(char *) path;
+		syscallarg(char *) buf;
+		syscallarg(int) count;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VLNK)
+		error = EINVAL;
+	else {
+		aiov.iov_base = SCARG(uap, buf);
+		aiov.iov_len = SCARG(uap, count);
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_USERSPACE;
+		auio.uio_procp = p;
+		auio.uio_resid = SCARG(uap, count);
+		error = VOP_READLINK(vp, &auio, p->p_ucred);
+	}
+	vput(vp);
+	*retval = SCARG(uap, count) - auio.uio_resid;
+	return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+/* ARGSUSED */
+int
+chflags(p, uap, retval)
+	struct proc *p;
+	register struct chflags_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	VATTR_NULL(&vattr);
+	vattr.va_flags = SCARG(uap, flags);
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchflags(p, uap, retval)
+	struct proc *p;
+	register struct fchflags_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) flags;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	VATTR_NULL(&vattr);
+	vattr.va_flags = SCARG(uap, flags);
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+/* ARGSUSED */
+int
+chmod(p, uap, retval)
+	struct proc *p;
+	register struct chmod_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) mode;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	VATTR_NULL(&vattr);
+	vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchmod(p, uap, retval)
+	struct proc *p;
+	register struct fchmod_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) mode;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	VATTR_NULL(&vattr);
+	vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+/* ARGSUSED */
+int
+chown(p, uap, retval)
+	struct proc *p;
+	register struct chown_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) uid;
+		syscallarg(int) gid;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	VATTR_NULL(&vattr);
+	vattr.va_uid = SCARG(uap, uid);
+	vattr.va_gid = SCARG(uap, gid);
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchown(p, uap, retval)
+	struct proc *p;
+	register struct fchown_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) uid;
+		syscallarg(int) gid;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	VATTR_NULL(&vattr);
+	vattr.va_uid = SCARG(uap, uid);
+	vattr.va_gid = SCARG(uap, gid);
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+utimes(p, uap, retval)
+	struct proc *p;
+	register struct utimes_args /* {
+		syscallarg(char *) path;
+		syscallarg(struct timeval *) tptr;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct timeval tv[2];
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	VATTR_NULL(&vattr);
+	if (SCARG(uap, tptr) == NULL) {
+		microtime(&tv[0]);
+		tv[1] = tv[0];
+		vattr.va_vaflags |= VA_UTIMES_NULL;
+	} else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv,
+	    sizeof (tv)))
+  		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	vattr.va_atime.ts_sec = tv[0].tv_sec;
+	vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+	vattr.va_mtime.ts_sec = tv[1].tv_sec;
+	vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+/* ARGSUSED */
+int
+truncate(p, uap, retval)
+	struct proc *p;
+	register struct truncate_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) pad;
+		syscallarg(off_t) length;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (vp->v_type == VDIR)
+		error = EISDIR;
+	else if ((error = vn_writechk(vp)) == 0 &&
+	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+		VATTR_NULL(&vattr);
+		vattr.va_size = SCARG(uap, length);
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+ftruncate(p, uap, retval)
+	struct proc *p;
+	register struct ftruncate_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) pad;
+		syscallarg(off_t) length;
+	} */ *uap;
+	register_t *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	if ((fp->f_flag & FWRITE) == 0)
+		return (EINVAL);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (vp->v_type == VDIR)
+		error = EISDIR;
+	else if ((error = vn_writechk(vp)) == 0) {
+		VATTR_NULL(&vattr);
+		vattr.va_size = SCARG(uap, length);
+		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+	}
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+/* ARGSUSED */
+int
+compat_43_truncate(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_truncate_args /* {
+		syscallarg(char *) path;
+		syscallarg(long) length;
+	} */ *uap;
+	register_t *retval;
+{
+	struct truncate_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) pad;
+		syscallarg(off_t) length;
+	} */ nuap;
+
+	SCARG(&nuap, path) = SCARG(uap, path);
+	SCARG(&nuap, length) = SCARG(uap, length);
+	return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+compat_43_ftruncate(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_ftruncate_args /* {
+		syscallarg(int) fd;
+		syscallarg(long) length;
+	} */ *uap;
+	register_t *retval;
+{
+	struct ftruncate_args /* {
+		syscallarg(int) fd;
+		syscallarg(int) pad;
+		syscallarg(off_t) length;
+	} */ nuap;
+
+	SCARG(&nuap, fd) = SCARG(uap, fd);
+	SCARG(&nuap, length) = SCARG(uap, length);
+	return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+/* ARGSUSED */
+int
+fsync(p, uap, retval)
+	struct proc *p;
+	struct fsync_args /* {
+		syscallarg(int) fd;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * Rename files.  Source and destination must either both be directories,
+ * or both not be directories.  If target is a directory, it must be empty.
+ */
+/* ARGSUSED */
+int
+rename(p, uap, retval)
+	struct proc *p;
+	register struct rename_args /* {
+		syscallarg(char *) from;
+		syscallarg(char *) to;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *tvp, *fvp, *tdvp;
+	struct nameidata fromnd, tond;
+	int error;
+
+	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+	    SCARG(uap, from), p);
+	if (error = namei(&fromnd))
+		return (error);
+	fvp = fromnd.ni_vp;
+	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+	    UIO_USERSPACE, SCARG(uap, to), p);
+	if (error = namei(&tond)) {
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
+	tdvp = tond.ni_dvp;
+	tvp = tond.ni_vp;
+	if (tvp != NULL) {
+		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto out;
+		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+			error = EISDIR;
+			goto out;
+		}
+	}
+	if (fvp == tdvp)
+		error = EINVAL;
+	/*
+	 * If source is the same as the destination (that is the
+	 * same inode number with the same name in the same directory),
+	 * then there is nothing to do.
+	 */
+	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+	      fromnd.ni_cnd.cn_namelen))
+		error = -1;
+out:
+	if (!error) {
+		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
+		if (fromnd.ni_dvp != tdvp)
+			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		if (tvp)
+			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+	} else {
+		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	vrele(tond.ni_startdir);
+	FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+	if (fromnd.ni_startdir)
+		vrele(fromnd.ni_startdir);
+	FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+	if (error == -1)
+		return (0);
+	return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+/* ARGSUSED */
+int
+mkdir(p, uap, retval)
+	struct proc *p;
+	register struct mkdir_args /* {
+		syscallarg(char *) path;
+		syscallarg(int) mode;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		return (EEXIST);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VDIR;
+	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+	if (!error)
+		vput(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+/* ARGSUSED */
+int
+rmdir(p, uap, retval)
+	struct proc *p;
+	struct rmdir_args /* {
+		syscallarg(char *) path;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
+	    SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR) {
+		error = ENOTDIR;
+		goto out;
+	}
+	/*
+	 * No rmdir "." please.
+	 */
+	if (nd.ni_dvp == vp) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT)
+		error = EBUSY;
+out:
+	if (!error) {
+		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+int
+compat_43_getdirentries(p, uap, retval)
+	struct proc *p;
+	register struct compat_43_getdirentries_args /* {
+		syscallarg(int) fd;
+		syscallarg(char *) buf;
+		syscallarg(u_int) count;
+		syscallarg(long *) basep;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	struct uio auio, kuio;
+	struct iovec aiov, kiov;
+	struct dirent *dp, *edp;
+	caddr_t dirbuf;
+	int error, eofflag, readcnt;
+	long loff;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	if ((fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	vp = (struct vnode *)fp->f_data;
+unionread:
+	if (vp->v_type != VDIR)
+		return (EINVAL);
+	aiov.iov_base = SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, count);
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	auio.uio_resid = SCARG(uap, count);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	loff = auio.uio_offset = fp->f_offset;
+#	if (BYTE_ORDER != LITTLE_ENDIAN)
+		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
+			    (int *)0, (u_long *)0);
+			fp->f_offset = auio.uio_offset;
+		} else
+#	endif
+	{
+		kuio = auio;
+		kuio.uio_iov = &kiov;
+		kuio.uio_segflg = UIO_SYSSPACE;
+		kiov.iov_len = SCARG(uap, count);
+		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
+		kiov.iov_base = dirbuf;
+		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
+			    (int *)0, (u_long *)0);
+		fp->f_offset = kuio.uio_offset;
+		if (error == 0) {
+			readcnt = SCARG(uap, count) - kuio.uio_resid;
+			edp = (struct dirent *)&dirbuf[readcnt];
+			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+#				if (BYTE_ORDER == LITTLE_ENDIAN)
+					/*
+					 * The expected low byte of
+					 * dp->d_namlen is our dp->d_type.
+					 * The high MBZ byte of dp->d_namlen
+					 * is our dp->d_namlen.
+					 */
+					dp->d_type = dp->d_namlen;
+					dp->d_namlen = 0;
+#				else
+					/*
+					 * The dp->d_type is the high byte
+					 * of the expected dp->d_namlen,
+					 * so must be zero'ed.
+					 */
+					dp->d_type = 0;
+#				endif
+				if (dp->d_reclen > 0) {
+					dp = (struct dirent *)
+					    ((char *)dp + dp->d_reclen);
+				} else {
+					error = EIO;
+					break;
+				}
+			}
+			if (dp >= edp)
+				error = uiomove(dirbuf, readcnt, &auio);
+		}
+		FREE(dirbuf, M_TEMP);
+	}
+	VOP_UNLOCK(vp, 0, p);
+	if (error)
+		return (error);
+
+#ifdef UNION
+{
+	extern int (**union_vnodeop_p)();
+	extern struct vnode *union_dircache __P((struct vnode*, struct proc*));
+
+	if ((SCARG(uap, count) == auio.uio_resid) &&
+	    (vp->v_op == union_vnodeop_p)) {
+		struct vnode *lvp;
+
+		lvp = union_dircache(vp, p);
+		if (lvp != NULLVP) {
+			struct vattr va;
+
+			/*
+			 * If the directory is opaque,
+			 * then don't show lower entries
+			 */
+			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
+			if (va.va_flags & OPAQUE) {
+				vput(lvp);
+				lvp = NULL;
+			}
+		}
+		
+		if (lvp != NULLVP) {
+			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
+			if (error) {
+				vput(lvp);
+				return (error);
+			}
+			VOP_UNLOCK(lvp, 0, p);
+			fp->f_data = (caddr_t) lvp;
+			fp->f_offset = 0;
+			error = vn_close(vp, FREAD, fp->f_cred, p);
+			if (error)
+				return (error);
+			vp = lvp;
+			goto unionread;
+		}
+	}
+}
+#endif /* UNION */
+
+	if ((SCARG(uap, count) == auio.uio_resid) &&
+	    (vp->v_flag & VROOT) &&
+	    (vp->v_mount->mnt_flag & MNT_UNION)) {
+		struct vnode *tvp = vp;
+		vp = vp->v_mount->mnt_vnodecovered;
+		VREF(vp);
+		fp->f_data = (caddr_t) vp;
+		fp->f_offset = 0;
+		vrele(tvp);
+		goto unionread;
+	}
+	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
+	    sizeof(long));
+	*retval = SCARG(uap, count) - auio.uio_resid;
+	return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+int
+getdirentries(p, uap, retval)
+	struct proc *p;
+	register struct getdirentries_args /* {
+		syscallarg(int) fd;
+		syscallarg(char *) buf;
+		syscallarg(u_int) count;
+		syscallarg(long *) basep;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	struct uio auio;
+	struct iovec aiov;
+	long loff;
+	int error, eofflag;
+
+	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
+		return (error);
+	if ((fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	vp = (struct vnode *)fp->f_data;
+unionread:
+	if (vp->v_type != VDIR)
+		return (EINVAL);
+	aiov.iov_base = SCARG(uap, buf);
+	aiov.iov_len = SCARG(uap, count);
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	auio.uio_resid = SCARG(uap, count);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	loff = auio.uio_offset = fp->f_offset;
+	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
+			    (int *)0, (u_long *)0);
+	fp->f_offset = auio.uio_offset;
+	VOP_UNLOCK(vp, 0, p);
+	if (error)
+		return (error);
+
+#ifdef UNION
+{
+	extern int (**union_vnodeop_p)();
+	extern struct vnode *union_dircache __P((struct vnode*, struct proc*));
+
+	if ((SCARG(uap, count) == auio.uio_resid) &&
+	    (vp->v_op == union_vnodeop_p)) {
+		struct vnode *lvp;
+
+		lvp = union_dircache(vp, p);
+		if (lvp != NULLVP) {
+			struct vattr va;
+
+			/*
+			 * If the directory is opaque,
+			 * then don't show lower entries
+			 */
+			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
+			if (va.va_flags & OPAQUE) {
+				vput(lvp);
+				lvp = NULL;
+			}
+		}
+
+		if (lvp != NULLVP) {
+			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
+			if (error) {
+				vput(lvp);
+				return (error);
+			}
+			VOP_UNLOCK(lvp, 0, p);
+			fp->f_data = (caddr_t) lvp;
+			fp->f_offset = 0;
+			error = vn_close(vp, FREAD, fp->f_cred, p);
+			if (error)
+				return (error);
+			vp = lvp;
+			goto unionread;
+		}
+	}
+}
+#endif /* UNION */
+
+	if ((SCARG(uap, count) == auio.uio_resid) &&
+	    (vp->v_flag & VROOT) &&
+	    (vp->v_mount->mnt_flag & MNT_UNION)) {
+		struct vnode *tvp = vp;
+		vp = vp->v_mount->mnt_vnodecovered;
+		VREF(vp);
+		fp->f_data = (caddr_t) vp;
+		fp->f_offset = 0;
+		vrele(tvp);
+		goto unionread;
+	}
+	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
+	    sizeof(long));
+	*retval = SCARG(uap, count) - auio.uio_resid;
+	return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+int
+umask(p, uap, retval)
+	struct proc *p;
+	struct umask_args /* {
+		syscallarg(int) newmask;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct filedesc *fdp;
+
+	fdp = p->p_fd;
+	*retval = fdp->fd_cmask;
+	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
+	return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+/* ARGSUSED */
+int
+revoke(p, uap, retval)
+	struct proc *p;
+	register struct revoke_args /* {
+		syscallarg(char *) path;
+	} */ *uap;
+	register_t *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+		goto out;
+	if (p->p_ucred->cr_uid != vattr.va_uid &&
+	    (error = suser(p->p_ucred, &p->p_acflag)))
+		goto out;
+	if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+		VOP_REVOKE(vp, REVOKEALL);
+out:
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+int
+getvnode(fdp, fd, fpp)
+	struct filedesc *fdp;
+	struct file **fpp;
+	int fd;
+{
+	struct file *fp;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (EINVAL);
+	*fpp = fp;
+	return (0);
+}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
new file mode 100644
index 000000000000..3cfc6fd7bca3
--- /dev/null
+++ b/sys/kern/vfs_vnops.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_vnops.c	8.14 (Berkeley) 6/15/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+#include <vm/vm.h>
+
+struct 	fileops vnops =
+	{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
+
+/*
+ * Common code for vnode open operations.
+ * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
+ */
+vn_open(ndp, fmode, cmode)
+	register struct nameidata *ndp;
+	int fmode, cmode;
+{
+	register struct vnode *vp;
+	register struct proc *p = ndp->ni_cnd.cn_proc;
+	register struct ucred *cred = p->p_ucred;
+	struct vattr vat;
+	struct vattr *vap = &vat;
+	int error;
+
+	if (fmode & O_CREAT) {
+		ndp->ni_cnd.cn_nameiop = CREATE;
+		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+		if ((fmode & O_EXCL) == 0)
+			ndp->ni_cnd.cn_flags |= FOLLOW;
+		if (error = namei(ndp))
+			return (error);
+		if (ndp->ni_vp == NULL) {
+			VATTR_NULL(vap);
+			vap->va_type = VREG;
+			vap->va_mode = cmode;
+			if (fmode & O_EXCL)
+				vap->va_vaflags |= VA_EXCLUSIVE;
+			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
+			if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
+			    &ndp->ni_cnd, vap))
+				return (error);
+			fmode &= ~O_TRUNC;
+			vp = ndp->ni_vp;
+		} else {
+			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
+			if (ndp->ni_dvp == ndp->ni_vp)
+				vrele(ndp->ni_dvp);
+			else
+				vput(ndp->ni_dvp);
+			ndp->ni_dvp = NULL;
+			vp = ndp->ni_vp;
+			if (fmode & O_EXCL) {
+				error = EEXIST;
+				goto bad;
+			}
+			fmode &= ~O_CREAT;
+		}
+	} else {
+		ndp->ni_cnd.cn_nameiop = LOOKUP;
+		ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
+		if (error = namei(ndp))
+			return (error);
+		vp = ndp->ni_vp;
+	}
+	if (vp->v_type == VSOCK) {
+		error = EOPNOTSUPP;
+		goto bad;
+	}
+	if ((fmode & O_CREAT) == 0) {
+		if (fmode & FREAD) {
+			if (error = VOP_ACCESS(vp, VREAD, cred, p))
+				goto bad;
+		}
+		if (fmode & (FWRITE | O_TRUNC)) {
+			if (vp->v_type == VDIR) {
+				error = EISDIR;
+				goto bad;
+			}
+			if ((error = vn_writechk(vp)) ||
+			    (error = VOP_ACCESS(vp, VWRITE, cred, p)))
+				goto bad;
+		}
+	}
+	if (fmode & O_TRUNC) {
+		VOP_UNLOCK(vp, 0, p);				/* XXX */
+		VOP_LEASE(vp, p, cred, LEASE_WRITE);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
+		VATTR_NULL(vap);
+		vap->va_size = 0;
+		if (error = VOP_SETATTR(vp, vap, cred, p))
+			goto bad;
+	}
+	if (error = VOP_OPEN(vp, fmode, cred, p))
+		goto bad;
+	if (fmode & FWRITE)
+		vp->v_writecount++;
+	return (0);
+bad:
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Check for write permissions on the specified vnode.
+ * Prototype text segments cannot be written.
+ */
+vn_writechk(vp)
+	register struct vnode *vp;
+{
+
+	/*
+	 * If there's shared text associated with
+	 * the vnode, try to free it up once.  If
+	 * we fail, we can't allow writing.
+	 */
+	if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+		return (ETXTBSY);
+	return (0);
+}
+
+/*
+ * Vnode close call
+ */
+vn_close(vp, flags, cred, p)
+	register struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+
+	if (flags & FWRITE)
+		vp->v_writecount--;
+	error = VOP_CLOSE(vp, flags, cred, p);
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Package up an I/O request on a vnode into a uio and do it.
+ */
+vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
+	enum uio_rw rw;
+	struct vnode *vp;
+	caddr_t base;
+	int len;
+	off_t offset;
+	enum uio_seg segflg;
+	int ioflg;
+	struct ucred *cred;
+	int *aresid;
+	struct proc *p;
+{
+	struct uio auio;
+	struct iovec aiov;
+	int error;
+
+	if ((ioflg & IO_NODELOCKED) == 0)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = base;
+	aiov.iov_len = len;
+	auio.uio_resid = len;
+	auio.uio_offset = offset;
+	auio.uio_segflg = segflg;
+	auio.uio_rw = rw;
+	auio.uio_procp = p;
+	if (rw == UIO_READ) {
+		error = VOP_READ(vp, &auio, ioflg, cred);
+	} else {
+		error = VOP_WRITE(vp, &auio, ioflg, cred);
+	}
+	if (aresid)
+		*aresid = auio.uio_resid;
+	else
+		if (auio.uio_resid && error == 0)
+			error = EIO;
+	if ((ioflg & IO_NODELOCKED) == 0)
+		VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * File table vnode read routine.
+ */
+vn_read(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+	struct vnode *vp = (struct vnode *)fp->f_data;
+	struct proc *p = uio->uio_procp;
+	int count, error;
+
+	VOP_LEASE(vp, p, cred, LEASE_READ);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	uio->uio_offset = fp->f_offset;
+	count = uio->uio_resid;
+	error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
+		cred);
+	fp->f_offset += count - uio->uio_resid;
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * File table vnode write routine.
+ */
+vn_write(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+	struct vnode *vp = (struct vnode *)fp->f_data;
+	struct proc *p = uio->uio_procp;
+	int count, error, ioflag = IO_UNIT;
+
+	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+		ioflag |= IO_APPEND;
+	if (fp->f_flag & FNONBLOCK)
+		ioflag |= IO_NDELAY;
+	if ((fp->f_flag & O_FSYNC) ||
+	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
+		ioflag |= IO_SYNC;
+	VOP_LEASE(vp, p, cred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	uio->uio_offset = fp->f_offset;
+	count = uio->uio_resid;
+	error = VOP_WRITE(vp, uio, ioflag, cred);
+	if (ioflag & IO_APPEND)
+		fp->f_offset = uio->uio_offset;
+	else
+		fp->f_offset += count - uio->uio_resid;
+	VOP_UNLOCK(vp, 0, p);
+	return (error);
+}
+
+/*
+ * File table vnode stat routine.
+ */
+vn_stat(vp, sb, p)
+	struct vnode *vp;
+	register struct stat *sb;
+	struct proc *p;
+{
+	struct vattr vattr;
+	register struct vattr *vap;
+	int error;
+	u_short mode;
+
+	vap = &vattr;
+	error = VOP_GETATTR(vp, vap, p->p_ucred, p);
+	if (error)
+		return (error);
+	/*
+	 * Copy from vattr table
+	 */
+	sb->st_dev = vap->va_fsid;
+	sb->st_ino = vap->va_fileid;
+	mode = vap->va_mode;
+	switch (vp->v_type) {
+	case VREG:
+		mode |= S_IFREG;
+		break;
+	case VDIR:
+		mode |= S_IFDIR;
+		break;
+	case VBLK:
+		mode |= S_IFBLK;
+		break;
+	case VCHR:
+		mode |= S_IFCHR;
+		break;
+	case VLNK:
+		mode |= S_IFLNK;
+		break;
+	case VSOCK:
+		mode |= S_IFSOCK;
+		break;
+	case VFIFO:
+		mode |= S_IFIFO;
+		break;
+	default:
+		return (EBADF);
+	};
+	sb->st_mode = mode;
+	sb->st_nlink = vap->va_nlink;
+	sb->st_uid = vap->va_uid;
+	sb->st_gid = vap->va_gid;
+	sb->st_rdev = vap->va_rdev;
+	sb->st_size = vap->va_size;
+	sb->st_atimespec = vap->va_atime;
+	sb->st_mtimespec = vap->va_mtime;
+	sb->st_ctimespec = vap->va_ctime;
+	sb->st_blksize = vap->va_blocksize;
+	sb->st_flags = vap->va_flags;
+	sb->st_gen = vap->va_gen;
+	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+	return (0);
+}
+
+/*
+ * File table vnode ioctl routine.
+ */
+vn_ioctl(fp, com, data, p)
+	struct file *fp;
+	u_long com;
+	caddr_t data;
+	struct proc *p;
+{
+	register struct vnode *vp = ((struct vnode *)fp->f_data);
+	struct vattr vattr;
+	int error;
+
+	switch (vp->v_type) {
+
+	case VREG:
+	case VDIR:
+		if (com == FIONREAD) {
+			if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+				return (error);
+			*(int *)data = vattr.va_size - fp->f_offset;
+			return (0);
+		}
+		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
+			return (0);			/* XXX */
+		/* fall into ... */
+
+	default:
+		return (ENOTTY);
+
+	case VFIFO:
+	case VCHR:
+	case VBLK:
+		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
+		if (error == 0 && com == TIOCSCTTY) {
+			if (p->p_session->s_ttyvp)
+				vrele(p->p_session->s_ttyvp);
+			p->p_session->s_ttyvp = vp;
+			VREF(vp);
+		}
+		return (error);
+	}
+}
+
+/*
+ * File table vnode select routine.
+ */
+vn_select(fp, which, p)
+	struct file *fp;
+	int which;
+	struct proc *p;
+{
+
+	return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
+		fp->f_cred, p));
+}
+
+/*
+ * Check that the vnode is still valid, and if so
+ * acquire requested lock.
+ */
+int
+vn_lock(vp, flags, p)
+	struct vnode *vp;
+	int flags;
+	struct proc *p;
+{
+	int error;
+	
+	do {
+		if ((flags & LK_INTERLOCK) == 0)
+			simple_lock(&vp->v_interlock);
+		if (vp->v_flag & VXLOCK) {
+			vp->v_flag |= VXWANT;
+			simple_unlock(&vp->v_interlock);
+			tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
+			error = ENOENT;
+		} else {
+			error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
+			if (error == 0)
+				return (error);
+		}
+		flags &= ~LK_INTERLOCK;
+	} while (flags & LK_RETRY);
+	return (error);
+}
+
+/*
+ * File table vnode close routine.
+ */
+vn_closefile(fp, p)
+	struct file *fp;
+	struct proc *p;
+{
+
+	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
+		fp->f_cred, p));
+}
diff --git a/sys/kern/vnode_if.sh b/sys/kern/vnode_if.sh
new file mode 100644
index 000000000000..8b74d83eca95
--- /dev/null
+++ b/sys/kern/vnode_if.sh
@@ -0,0 +1,344 @@
+#!/bin/sh -
+copyright='
+/*
+ * Copyright (c) 1992, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+ *
+ * from: NetBSD: vnode_if.sh,v 1.7 1994/08/25 03:04:28 cgd Exp $
+ */
+'
+SCRIPT_ID='@(#)vnode_if.sh	8.7 (Berkeley) 5/11/95'
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+#	(where srcfile is currently /sys/kern/vnode_if.src)
+#
+
+if [ $# -ne 1 ] ; then
+	echo 'usage: vnode_if.sh srcfile'
+	exit 1
+fi
+
+# Name of the source file.
+src=$1
+
+# Names of the created files.
+out_c=vnode_if.c
+out_h=vnode_if.h
+
+# Awk program (must support nawk extensions)
+# Use "awk" at Berkeley, "nawk" or "gawk" elsewhere.
+awk=${AWK:-awk}
+
+# Does this awk have a "toupper" function? (i.e. is it GNU awk)
+isgawk=`$awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null`
+
+# If this awk does not define "toupper" then define our own.
+if [ "$isgawk" = TRUE ] ; then
+	# GNU awk provides it.
+	toupper=
+else
+	# Provide our own toupper()
+	toupper='
+function toupper(str) {
+	_toupper_cmd = "echo "str" |tr a-z A-Z"
+	_toupper_cmd | getline _toupper_str;
+	close(_toupper_cmd);
+	return _toupper_str;
+}'
+fi
+
+#
+# This is the common part of all awk programs that read $src
+# This parses the input for one function into the arrays:
+#	argdir, argtype, argname, willrele
+# and calls "doit()" to generate output for the function.
+#
+# Input to this parser is pre-processed slightly by sed
+# so this awk parser doesn't have to work so hard.  The
+# changes done by the sed pre-processing step are:
+#	insert a space beween * and pointer name
+#	replace semicolons with spaces
+#
+sed_prep='s:\*\([^\*/]\):\* \1:g
+s/;/ /'
+awk_parser='
+# Comment line
+/^#/	{ next; }
+# First line of description
+/^vop_/	{
+	name=$1;
+	argc=0;
+	next;
+}
+# Last line of description
+/^}/	{
+	doit();
+	next;
+}
+# Middle lines of description
+{
+	argdir[argc] = $1; i=2;
+	if ($2 == "WILLRELE") {
+		willrele[argc] = 1;
+		i++;
+	} else
+		willrele[argc] = 0;
+	argtype[argc] = $i; i++;
+	while (i < NF) {
+		argtype[argc] = argtype[argc]" "$i;
+		i++;
+	}
+	argname[argc] = $i;
+	argc++;
+	next;
+}
+'
+
+# This is put after the copyright on each generated file.
+warning="
+/*
+ * Warning: This file is generated automatically.
+ * (Modifications made here may easily be lost!)
+ *
+ * Created by the script:
+ *	${SCRIPT_ID}
+ */
+"
+
+# Get rid of ugly spaces
+space_elim='s:\([^/]\*\) :\1:g'
+
+#
+# Redirect stdout to the H file.
+#
+echo "$0: Creating $out_h" 1>&2
+exec > $out_h
+
+# Begin stuff
+echo "$copyright"
+echo "$warning"
+echo '
+extern struct vnodeop_desc vop_default_desc;
+'
+
+# Body stuff
+# This awk program needs toupper() so define it if necessary.
+sed -e "$sed_prep" $src | $awk "$toupper"'
+function doit() {
+	# Declare arg struct, descriptor.
+	printf("\nstruct %s_args {\n", name);
+	printf("\tstruct vnodeop_desc * a_desc;\n");
+	for (i=0; i<argc; i++) {
+		printf("\t%s a_%s;\n", argtype[i], argname[i]);
+	}
+	printf("};\n");
+	printf("extern struct vnodeop_desc %s_desc;\n", name);
+	# Define inline function.
+	printf("#define %s(", toupper(name));
+	for (i=0; i<argc; i++) {
+		printf("%s", argname[i]);
+		if (i < (argc-1)) printf(", ");
+	}
+	printf(") _%s(", toupper(name));
+	for (i=0; i<argc; i++) {
+		printf("%s", argname[i]);
+		if (i < (argc-1)) printf(", ");
+	}
+	printf(")\n");
+	printf("static __inline int _%s(", toupper(name));
+	for (i=0; i<argc; i++) {
+		printf("%s", argname[i]);
+		if (i < (argc-1)) printf(", ");
+	}
+	printf(")\n");
+	for (i=0; i<argc; i++) {
+		printf("\t%s %s;\n", argtype[i], argname[i]);
+	}
+	printf("{\n\tstruct %s_args a;\n", name);
+	printf("\ta.a_desc = VDESC(%s);\n", name);
+	for (i=0; i<argc; i++) {
+		printf("\ta.a_%s = %s;\n", argname[i], argname[i]);
+	}
+	printf("\treturn (VCALL(%s%s, VOFFSET(%s), &a));\n}\n",
+		argname[0], arg0special, name);
+}
+BEGIN	{
+	arg0special="";
+}
+END	{
+	printf("\n/* Special cases: */\n#include <sys/buf.h>\n");
+	argc=1;
+	argtype[0]="struct buf *";
+	argname[0]="bp";
+	arg0special="->b_vp";
+	name="vop_strategy";
+	doit();
+	name="vop_bwrite";
+	doit();
+}
+'"$awk_parser" | sed -e "$space_elim"
+
+# End stuff
+echo '
+/* End of special cases. */'
+
+
+#
+# Redirect stdout to the C file.
+#
+echo "$0: Creating $out_c" 1>&2
+exec > $out_c
+
+# Begin stuff
+echo "$copyright"
+echo "$warning"
+echo '
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+	0,
+	"default",
+	0,
+	NULL,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+'
+
+# Body stuff
+sed -e "$sed_prep" $src | $awk '
+function do_offset(typematch) {
+	for (i=0; i<argc; i++) {
+		if (argtype[i] == typematch) {
+			printf("\tVOPARG_OFFSETOF(struct %s_args, a_%s),\n",
+				name, argname[i]);
+			return i;
+		};
+	};
+	print "\tVDESC_NO_OFFSET,";
+	return -1;
+}
+
+function doit() {
+	# Define offsets array
+	printf("\nint %s_vp_offsets[] = {\n", name);
+	for (i=0; i<argc; i++) {
+		if (argtype[i] == "struct vnode *") {
+			printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+				name, argname[i]);
+		}
+	}
+	print "\tVDESC_NO_OFFSET";
+	print "};";
+	# Define F_desc
+	printf("struct vnodeop_desc %s_desc = {\n", name);
+	# offset
+	printf ("\t0,\n");
+	# printable name
+	printf ("\t\"%s\",\n", name);
+	# flags
+	printf("\t0");
+	vpnum = 0;
+	for (i=0; i<argc; i++) {
+		if (willrele[i]) {
+			if (argdir[i] ~ /OUT/) {
+				printf(" | VDESC_VPP_WILLRELE");
+			} else {
+				printf(" | VDESC_VP%s_WILLRELE", vpnum);
+			};
+			vpnum++;
+		}
+	}
+	print ",";
+	# vp offsets
+	printf ("\t%s_vp_offsets,\n", name);
+	# vpp (if any)
+	do_offset("struct vnode **");
+	# cred (if any)
+	do_offset("struct ucred *");
+	# proc (if any)
+	do_offset("struct proc *");
+	# componentname
+	do_offset("struct componentname *");
+	# transport layer information
+	printf ("\tNULL,\n};\n");
+}
+END	{
+	printf("\n/* Special cases: */\n");
+	argc=1;
+	argdir[0]="IN";
+	argtype[0]="struct buf *";
+	argname[0]="bp";
+	willrele[0]=0;
+	name="vop_strategy";
+	doit();
+	name="vop_bwrite";
+	doit();
+}
+'"$awk_parser" | sed -e "$space_elim"
+
+# End stuff
+echo '
+/* End of special cases. */'
+
+# Add the vfs_op_descs array to the C file.
+# Begin stuff
+echo '
+struct vnodeop_desc *vfs_op_descs[] = {
+	&vop_default_desc,	/* MUST BE FIRST */
+	&vop_strategy_desc,	/* XXX: SPECIAL CASE */
+	&vop_bwrite_desc,	/* XXX: SPECIAL CASE */
+'
+
+# Body stuff
+sed -e "$sed_prep" $src | $awk '
+function doit() {
+	printf("\t&%s_desc,\n", name);
+}
+'"$awk_parser"
+
+# End stuff
+echo '	NULL
+};
+'
+
+exit 0
+
+# Local Variables:
+# tab-width: 4
+# End:
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
new file mode 100644
index 000000000000..1e32f29abd58
--- /dev/null
+++ b/sys/kern/vnode_if.src
@@ -0,0 +1,494 @@
+#
+# Copyright (c) 1992, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)vnode_if.src	8.12 (Berkeley) 5/14/95
+#
+
+#
+# Above each of the vop descriptors is a specification of the locking
+# protocol used by each vop call.  The first column is the name of
+# the variable, the remaining three columns are in, out and error
+# respectively.  The "in" column defines the lock state on input,
+# the "out" column defines the state on succesful return, and the
+# "error" column defines the locking state on error exit.
+#
+# The locking value can take the following values:
+# L: locked.
+# U: unlocked/
+# -: not applicable.  vnode does not yet (or no longer) exists.
+# =: the same on input and output, may be either L or U.
+# X: locked if not nil.
+#
+
+#
+#% lookup	dvp	L ? ?
+#% lookup	vpp	- L -
+#
+# XXX - the lookup locking protocol defies simple description and depends
+#	on the flags and operation fields in the (cnp) structure.  Note
+#	especially that *vpp may equal dvp and both may be locked.
+#
+vop_lookup {
+	IN struct vnode *dvp;
+	INOUT struct vnode **vpp;
+	IN struct componentname *cnp;
+};
+
+#
+#% create	dvp	L U U
+#% create	vpp	- L -
+#
+vop_create {
+	IN WILLRELE struct vnode *dvp;
+	OUT struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+};
+
+#
+#% whiteout	dvp	L L L
+#% whiteout	cnp	- - -
+#% whiteout	flag	- - -
+#
+vop_whiteout {
+	IN WILLRELE struct vnode *dvp;
+	IN struct componentname *cnp;
+	IN int flags;
+};
+
+#
+#% mknod	dvp	L U U
+#% mknod	vpp	- X -
+#
+vop_mknod {
+	IN WILLRELE struct vnode *dvp;
+	OUT WILLRELE struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+};
+
+#
+#% open		vp	L L L
+#
+vop_open {
+	IN struct vnode *vp;
+	IN int mode;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% close	vp	U U U
+#
+vop_close {
+	IN struct vnode *vp;
+	IN int fflag;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% access	vp	L L L
+#
+vop_access {
+	IN struct vnode *vp;
+	IN int mode;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% getattr	vp	= = =
+#
+vop_getattr {
+	IN struct vnode *vp;
+	IN struct vattr *vap;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% setattr	vp	L L L
+#
+vop_setattr {
+	IN struct vnode *vp;
+	IN struct vattr *vap;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% read		vp	L L L
+#
+vop_read {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN int ioflag;
+	IN struct ucred *cred;
+};
+
+#
+#% write	vp	L L L
+#
+vop_write {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN int ioflag;
+	IN struct ucred *cred;
+};
+
+#
+#% lease	vp	= = =
+#
+vop_lease {
+	IN struct vnode *vp;
+	IN struct proc *p;
+	IN struct ucred *cred;
+	IN int flag;
+};
+
+#
+#% ioctl	vp	U U U
+#
+vop_ioctl {
+	IN struct vnode *vp;
+	IN u_long command;
+	IN caddr_t data;
+	IN int fflag;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% select	vp	U U U
+#
+# Needs work?  (fflags)
+#
+vop_select {
+	IN struct vnode *vp;
+	IN int which;
+	IN int fflags;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% revoke	vp	U U U
+#
+vop_revoke {
+	IN struct vnode *vp;
+	IN int flags;
+};
+
+#
+# XXX - not used
+#
+vop_mmap {
+	IN struct vnode *vp;
+	IN int fflags;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% fsync	vp	L L L
+#
+vop_fsync {
+	IN struct vnode *vp;
+	IN struct ucred *cred;
+	IN int waitfor;
+	IN struct proc *p;
+};
+
+#
+# XXX - not used
+# Needs work: Is newoff right?  What's it mean?
+#
+vop_seek {
+	IN struct vnode *vp;
+	IN off_t oldoff;
+	IN off_t newoff;
+	IN struct ucred *cred;
+};
+
+#
+#% remove	dvp	L U U
+#% remove	vp	L U U
+#
+vop_remove {
+	IN WILLRELE struct vnode *dvp;
+	IN WILLRELE struct vnode *vp;
+	IN struct componentname *cnp;
+};
+
+#
+#% link		vp	U U U
+#% link		tdvp	L U U
+#
+vop_link {
+	IN WILLRELE struct vnode *vp;
+	IN struct vnode *tdvp;
+	IN struct componentname *cnp;
+};
+
+#
+#% rename	fdvp	U U U
+#% rename	fvp	U U U
+#% rename	tdvp	L U U
+#% rename	tvp	X U U
+#
+vop_rename {
+	IN WILLRELE struct vnode *fdvp;
+	IN WILLRELE struct vnode *fvp;
+	IN struct componentname *fcnp;
+	IN WILLRELE struct vnode *tdvp;
+	IN WILLRELE struct vnode *tvp;
+	IN struct componentname *tcnp;
+};
+
+#
+#% mkdir	dvp	L U U
+#% mkdir	vpp	- L -
+#
+vop_mkdir {
+	IN WILLRELE struct vnode *dvp;
+	OUT struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+};
+
+#
+#% rmdir	dvp	L U U
+#% rmdir	vp	L U U
+#
+vop_rmdir {
+	IN WILLRELE struct vnode *dvp;
+	IN WILLRELE struct vnode *vp;
+	IN struct componentname *cnp;
+};
+
+#
+#% symlink	dvp	L U U
+#% symlink	vpp	- U -
+#
+# XXX - note that the return vnode has already been VRELE'ed
+#	by the filesystem layer.  To use it you must use vget,
+#	possibly with a further namei.
+#
+vop_symlink {
+	IN WILLRELE struct vnode *dvp;
+	OUT WILLRELE struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+	IN char *target;
+};
+
+#
+#% readdir	vp	L L L
+#
+vop_readdir {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN struct ucred *cred;
+	INOUT int *eofflag;
+	OUT int *ncookies;
+	INOUT u_long **cookies;
+};
+
+#
+#% readlink	vp	L L L
+#
+vop_readlink {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN struct ucred *cred;
+};
+
+#
+#% abortop	dvp	= = =
+#
+vop_abortop {
+	IN struct vnode *dvp;
+	IN struct componentname *cnp;
+};
+
+#
+#% inactive	vp	L U U
+#
+vop_inactive {
+	IN struct vnode *vp;
+	IN struct proc *p;
+};
+
+#
+#% reclaim	vp	U U U
+#
+vop_reclaim {
+	IN struct vnode *vp;
+	IN struct proc *p;
+};
+
+#
+#% lock		vp	U L U
+#
+vop_lock {
+	IN struct vnode *vp;
+	IN int flags;
+	IN struct proc *p;
+};
+
+#
+#% unlock	vp	L U L
+#
+vop_unlock {
+	IN struct vnode *vp;
+	IN int flags;
+	IN struct proc *p;
+};
+
+#
+#% bmap		vp	L L L
+#% bmap		vpp	- U -
+#
+vop_bmap {
+	IN struct vnode *vp;
+	IN daddr_t bn;
+	OUT struct vnode **vpp;
+	IN daddr_t *bnp;
+	OUT int *runp;
+};
+
+#
+# Needs work: no vp?
+#
+#vop_strategy {
+#	IN struct buf *bp;
+#};
+
+#
+#% print	vp	= = =
+#
+vop_print {
+	IN struct vnode *vp;
+};
+
+#
+#% islocked	vp	= = =
+#
+vop_islocked {
+	IN struct vnode *vp;
+};
+
+#
+#% pathconf	vp	L L L
+#
+vop_pathconf {
+	IN struct vnode *vp;
+	IN int name;
+	OUT register_t *retval;
+};
+
+#
+#% advlock	vp	U U U
+#
+vop_advlock {
+	IN struct vnode *vp;
+	IN caddr_t id;
+	IN int op;
+	IN struct flock *fl;
+	IN int flags;
+};
+
+#
+#% blkatoff	vp	L L L
+#
+vop_blkatoff {
+	IN struct vnode *vp;
+	IN off_t offset;
+	OUT char **res;
+	OUT struct buf **bpp;
+};
+
+#
+#% valloc	pvp	L L L
+#
+vop_valloc {
+	IN struct vnode *pvp;
+	IN int mode;
+	IN struct ucred *cred;
+	OUT struct vnode **vpp;
+};
+
+#
+#% reallocblks	vp	L L L
+#
+vop_reallocblks {
+	IN struct vnode *vp;
+	IN struct cluster_save *buflist;
+};
+
+#
+#% vfree	pvp	L L L
+#
+vop_vfree {
+	IN struct vnode *pvp;
+	IN ino_t ino;
+	IN int mode;
+};
+
+#
+#% truncate	vp	L L L
+#
+vop_truncate {
+	IN struct vnode *vp;
+	IN off_t length;
+	IN int flags;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+#
+#% update	vp	L L L
+#
+vop_update {
+	IN struct vnode *vp;
+	IN struct timeval *access;
+	IN struct timeval *modify;
+	IN int waitfor;
+};
+
+#
+# Needs work: no vp?
+#
+#vop_bwrite {
+#	IN struct buf *bp;
+#};