aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/uipc_usrreq.c
diff options
context:
space:
mode:
authorRodney W. Grimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
committerRodney W. Grimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
commitdf8bae1de4b67ccf57f4afebd4e2bf258c38910d (patch)
tree7b1a8eb5b08af4b9f7bac45ad41398687df2c351 /sys/kern/uipc_usrreq.c
parent59deaec541ae8baaa58daf6c5a2196ea7de180c3 (diff)
downloadsrc-df8bae1de4b67ccf57f4afebd4e2bf258c38910d.tar.gz
src-df8bae1de4b67ccf57f4afebd4e2bf258c38910d.zip
BSD 4.4 Lite Kernel Sources
Notes
Notes: svn path=/head/; revision=1541
Diffstat (limited to 'sys/kern/uipc_usrreq.c')
-rw-r--r--sys/kern/uipc_usrreq.c823
1 files changed, 823 insertions, 0 deletions
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
new file mode 100644
index 000000000000..94bf8f744c86
--- /dev/null
+++ b/sys/kern/uipc_usrreq.c
@@ -0,0 +1,823 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/unpcb.h>
+#include <sys/un.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mbuf.h>
+
+/*
+ * Unix communications domain.
+ *
+ * TODO:
+ * SEQPACKET, RDM
+ * rethink name space problems
+ * need a proper out-of-band
+ */
+struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
+ino_t unp_ino; /* prototype for fake inode numbers */
+
+/*ARGSUSED*/
+uipc_usrreq(so, req, m, nam, control)
+ struct socket *so;
+ int req;
+ struct mbuf *m, *nam, *control;
+{
+ struct unpcb *unp = sotounpcb(so);
+ register struct socket *so2;
+ register int error = 0;
+ struct proc *p = curproc; /* XXX */
+
+ if (req == PRU_CONTROL)
+ return (EOPNOTSUPP);
+ if (req != PRU_SEND && control && control->m_len) {
+ error = EOPNOTSUPP;
+ goto release;
+ }
+ if (unp == 0 && req != PRU_ATTACH) {
+ error = EINVAL;
+ goto release;
+ }
+ switch (req) {
+
+ case PRU_ATTACH:
+ if (unp) {
+ error = EISCONN;
+ break;
+ }
+ error = unp_attach(so);
+ break;
+
+ case PRU_DETACH:
+ unp_detach(unp);
+ break;
+
+ case PRU_BIND:
+ error = unp_bind(unp, nam, p);
+ break;
+
+ case PRU_LISTEN:
+ if (unp->unp_vnode == 0)
+ error = EINVAL;
+ break;
+
+ case PRU_CONNECT:
+ error = unp_connect(so, nam, p);
+ break;
+
+ case PRU_CONNECT2:
+ error = unp_connect2(so, (struct socket *)nam);
+ break;
+
+ case PRU_DISCONNECT:
+ unp_disconnect(unp);
+ break;
+
+ case PRU_ACCEPT:
+ /*
+ * Pass back name of connected socket,
+ * if it was bound and we are still connected
+ * (our peer may have closed already!).
+ */
+ if (unp->unp_conn && unp->unp_conn->unp_addr) {
+ nam->m_len = unp->unp_conn->unp_addr->m_len;
+ bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else {
+ nam->m_len = sizeof(sun_noname);
+ *(mtod(nam, struct sockaddr *)) = sun_noname;
+ }
+ break;
+
+ case PRU_SHUTDOWN:
+ socantsendmore(so);
+ unp_shutdown(unp);
+ break;
+
+ case PRU_RCVD:
+ switch (so->so_type) {
+
+ case SOCK_DGRAM:
+ panic("uipc 1");
+ /*NOTREACHED*/
+
+ case SOCK_STREAM:
+#define rcv (&so->so_rcv)
+#define snd (&so2->so_snd)
+ if (unp->unp_conn == 0)
+ break;
+ so2 = unp->unp_conn->unp_socket;
+ /*
+ * Adjust backpressure on sender
+ * and wakeup any waiting to write.
+ */
+ snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
+ unp->unp_mbcnt = rcv->sb_mbcnt;
+ snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
+ unp->unp_cc = rcv->sb_cc;
+ sowwakeup(so2);
+#undef snd
+#undef rcv
+ break;
+
+ default:
+ panic("uipc 2");
+ }
+ break;
+
+ case PRU_SEND:
+ if (control && (error = unp_internalize(control, p)))
+ break;
+ switch (so->so_type) {
+
+ case SOCK_DGRAM: {
+ struct sockaddr *from;
+
+ if (nam) {
+ if (unp->unp_conn) {
+ error = EISCONN;
+ break;
+ }
+ error = unp_connect(so, nam, p);
+ if (error)
+ break;
+ } else {
+ if (unp->unp_conn == 0) {
+ error = ENOTCONN;
+ break;
+ }
+ }
+ so2 = unp->unp_conn->unp_socket;
+ if (unp->unp_addr)
+ from = mtod(unp->unp_addr, struct sockaddr *);
+ else
+ from = &sun_noname;
+ if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+ sorwakeup(so2);
+ m = 0;
+ control = 0;
+ } else
+ error = ENOBUFS;
+ if (nam)
+ unp_disconnect(unp);
+ break;
+ }
+
+ case SOCK_STREAM:
+#define rcv (&so2->so_rcv)
+#define snd (&so->so_snd)
+ if (so->so_state & SS_CANTSENDMORE) {
+ error = EPIPE;
+ break;
+ }
+ if (unp->unp_conn == 0)
+ panic("uipc 3");
+ so2 = unp->unp_conn->unp_socket;
+ /*
+ * Send to paired receive port, and then reduce
+ * send buffer hiwater marks to maintain backpressure.
+ * Wake up readers.
+ */
+ if (control) {
+ if (sbappendcontrol(rcv, m, control))
+ control = 0;
+ } else
+ sbappend(rcv, m);
+ snd->sb_mbmax -=
+ rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
+ unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
+ snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
+ unp->unp_conn->unp_cc = rcv->sb_cc;
+ sorwakeup(so2);
+ m = 0;
+#undef snd
+#undef rcv
+ break;
+
+ default:
+ panic("uipc 4");
+ }
+ break;
+
+ case PRU_ABORT:
+ unp_drop(unp, ECONNABORTED);
+ break;
+
+ case PRU_SENSE:
+ ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+ if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
+ so2 = unp->unp_conn->unp_socket;
+ ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
+ }
+ ((struct stat *) m)->st_dev = NODEV;
+ if (unp->unp_ino == 0)
+ unp->unp_ino = unp_ino++;
+ ((struct stat *) m)->st_ino = unp->unp_ino;
+ return (0);
+
+ case PRU_RCVOOB:
+ return (EOPNOTSUPP);
+
+ case PRU_SENDOOB:
+ error = EOPNOTSUPP;
+ break;
+
+ case PRU_SOCKADDR:
+ if (unp->unp_addr) {
+ nam->m_len = unp->unp_addr->m_len;
+ bcopy(mtod(unp->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else
+ nam->m_len = 0;
+ break;
+
+ case PRU_PEERADDR:
+ if (unp->unp_conn && unp->unp_conn->unp_addr) {
+ nam->m_len = unp->unp_conn->unp_addr->m_len;
+ bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+ mtod(nam, caddr_t), (unsigned)nam->m_len);
+ } else
+ nam->m_len = 0;
+ break;
+
+ case PRU_SLOWTIMO:
+ break;
+
+ default:
+ panic("piusrreq");
+ }
+release:
+ if (control)
+ m_freem(control);
+ if (m)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace. Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#define PIPSIZ 4096
+u_long unpst_sendspace = PIPSIZ;
+u_long unpst_recvspace = PIPSIZ;
+u_long unpdg_sendspace = 2*1024; /* really max datagram size */
+u_long unpdg_recvspace = 4*1024;
+
+int unp_rights; /* file descriptors in flight */
+
+unp_attach(so)
+ struct socket *so;
+{
+ register struct mbuf *m;
+ register struct unpcb *unp;
+ int error;
+
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ switch (so->so_type) {
+
+ case SOCK_STREAM:
+ error = soreserve(so, unpst_sendspace, unpst_recvspace);
+ break;
+
+ case SOCK_DGRAM:
+ error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
+ break;
+
+ default:
+ panic("unp_attach");
+ }
+ if (error)
+ return (error);
+ }
+ m = m_getclr(M_DONTWAIT, MT_PCB);
+ if (m == NULL)
+ return (ENOBUFS);
+ unp = mtod(m, struct unpcb *);
+ so->so_pcb = (caddr_t)unp;
+ unp->unp_socket = so;
+ return (0);
+}
+
+unp_detach(unp)
+ register struct unpcb *unp;
+{
+
+ if (unp->unp_vnode) {
+ unp->unp_vnode->v_socket = 0;
+ vrele(unp->unp_vnode);
+ unp->unp_vnode = 0;
+ }
+ if (unp->unp_conn)
+ unp_disconnect(unp);
+ while (unp->unp_refs)
+ unp_drop(unp->unp_refs, ECONNRESET);
+ soisdisconnected(unp->unp_socket);
+ unp->unp_socket->so_pcb = 0;
+ m_freem(unp->unp_addr);
+ (void) m_free(dtom(unp));
+ if (unp_rights) {
+ /*
+ * Normally the receive buffer is flushed later,
+ * in sofree, but if our receive buffer holds references
+ * to descriptors that are now garbage, we will dispose
+ * of those descriptor references after the garbage collector
+ * gets them (resulting in a "panic: closef: count < 0").
+ */
+ sorflush(unp->unp_socket);
+ unp_gc();
+ }
+}
+
+unp_bind(unp, nam, p)
+ struct unpcb *unp;
+ struct mbuf *nam;
+ struct proc *p;
+{
+ struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
+ soun->sun_path, p);
+ if (unp->unp_vnode != NULL)
+ return (EINVAL);
+ if (nam->m_len == MLEN) {
+ if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+ return (EINVAL);
+ } else
+ *(mtod(nam, caddr_t) + nam->m_len) = 0;
+/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EADDRINUSE);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VSOCK;
+ vattr.va_mode = ACCESSPERMS;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
+ return (error);
+ vp = nd.ni_vp;
+ vp->v_socket = unp->unp_socket;
+ unp->unp_vnode = vp;
+ unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
+ VOP_UNLOCK(vp);
+ return (0);
+}
+
+unp_connect(so, nam, p)
+ struct socket *so;
+ struct mbuf *nam;
+ struct proc *p;
+{
+ register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+ register struct vnode *vp;
+ register struct socket *so2, *so3;
+ struct unpcb *unp2, *unp3;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
+ if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
+ if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+ return (EMSGSIZE);
+ } else
+ *(mtod(nam, caddr_t) + nam->m_len) = 0;
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VSOCK) {
+ error = ENOTSOCK;
+ goto bad;
+ }
+ if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
+ goto bad;
+ so2 = vp->v_socket;
+ if (so2 == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ if (so->so_type != so2->so_type) {
+ error = EPROTOTYPE;
+ goto bad;
+ }
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
+ (so3 = sonewconn(so2, 0)) == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr =
+ m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+ so2 = so3;
+ }
+ error = unp_connect2(so, so2);
+bad:
+ vput(vp);
+ return (error);
+}
+
+unp_connect2(so, so2)
+ register struct socket *so;
+ register struct socket *so2;
+{
+ register struct unpcb *unp = sotounpcb(so);
+ register struct unpcb *unp2;
+
+ if (so2->so_type != so->so_type)
+ return (EPROTOTYPE);
+ unp2 = sotounpcb(so2);
+ unp->unp_conn = unp2;
+ switch (so->so_type) {
+
+ case SOCK_DGRAM:
+ unp->unp_nextref = unp2->unp_refs;
+ unp2->unp_refs = unp;
+ soisconnected(so);
+ break;
+
+ case SOCK_STREAM:
+ unp2->unp_conn = unp;
+ soisconnected(so);
+ soisconnected(so2);
+ break;
+
+ default:
+ panic("unp_connect2");
+ }
+ return (0);
+}
+
+unp_disconnect(unp)
+ struct unpcb *unp;
+{
+ register struct unpcb *unp2 = unp->unp_conn;
+
+ if (unp2 == 0)
+ return;
+ unp->unp_conn = 0;
+ switch (unp->unp_socket->so_type) {
+
+ case SOCK_DGRAM:
+ if (unp2->unp_refs == unp)
+ unp2->unp_refs = unp->unp_nextref;
+ else {
+ unp2 = unp2->unp_refs;
+ for (;;) {
+ if (unp2 == 0)
+ panic("unp_disconnect");
+ if (unp2->unp_nextref == unp)
+ break;
+ unp2 = unp2->unp_nextref;
+ }
+ unp2->unp_nextref = unp->unp_nextref;
+ }
+ unp->unp_nextref = 0;
+ unp->unp_socket->so_state &= ~SS_ISCONNECTED;
+ break;
+
+ case SOCK_STREAM:
+ soisdisconnected(unp->unp_socket);
+ unp2->unp_conn = 0;
+ soisdisconnected(unp2->unp_socket);
+ break;
+ }
+}
+
+#ifdef notdef
+unp_abort(unp)
+ struct unpcb *unp;
+{
+
+ unp_detach(unp);
+}
+#endif
+
+unp_shutdown(unp)
+ struct unpcb *unp;
+{
+ struct socket *so;
+
+ if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
+ (so = unp->unp_conn->unp_socket))
+ socantrcvmore(so);
+}
+
+unp_drop(unp, errno)
+ struct unpcb *unp;
+ int errno;
+{
+ struct socket *so = unp->unp_socket;
+
+ so->so_error = errno;
+ unp_disconnect(unp);
+ if (so->so_head) {
+ so->so_pcb = (caddr_t) 0;
+ m_freem(unp->unp_addr);
+ (void) m_free(dtom(unp));
+ sofree(so);
+ }
+}
+
+#ifdef notdef
+unp_drain()
+{
+
+}
+#endif
+
+unp_externalize(rights)
+ struct mbuf *rights;
+{
+ struct proc *p = curproc; /* XXX */
+ register int i;
+ register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
+ register struct file **rp = (struct file **)(cm + 1);
+ register struct file *fp;
+ int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
+ int f;
+
+ if (!fdavail(p, newfds)) {
+ for (i = 0; i < newfds; i++) {
+ fp = *rp;
+ unp_discard(fp);
+ *rp++ = 0;
+ }
+ return (EMSGSIZE);
+ }
+ for (i = 0; i < newfds; i++) {
+ if (fdalloc(p, 0, &f))
+ panic("unp_externalize");
+ fp = *rp;
+ p->p_fd->fd_ofiles[f] = fp;
+ fp->f_msgcount--;
+ unp_rights--;
+ *(int *)rp++ = f;
+ }
+ return (0);
+}
+
+unp_internalize(control, p)
+ struct mbuf *control;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+ register struct file **rp;
+ register struct file *fp;
+ register int i, fd;
+ int oldfds;
+
+ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_len != control->m_len)
+ return (EINVAL);
+ oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < oldfds; i++) {
+ fd = *(int *)rp++;
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ fdp->fd_ofiles[fd] == NULL)
+ return (EBADF);
+ }
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < oldfds; i++) {
+ fp = fdp->fd_ofiles[*(int *)rp];
+ *rp++ = fp;
+ fp->f_count++;
+ fp->f_msgcount++;
+ unp_rights++;
+ }
+ return (0);
+}
+
+int unp_defer, unp_gcing;
+int unp_mark();
+extern struct domain unixdomain;
+
+unp_gc()
+{
+ register struct file *fp, *nextfp;
+ register struct socket *so;
+ struct file **extra_ref, **fpp;
+ int nunref, i;
+
+ if (unp_gcing)
+ return;
+ unp_gcing = 1;
+ unp_defer = 0;
+ for (fp = filehead; fp; fp = fp->f_filef)
+ fp->f_flag &= ~(FMARK|FDEFER);
+ do {
+ for (fp = filehead; fp; fp = fp->f_filef) {
+ if (fp->f_count == 0)
+ continue;
+ if (fp->f_flag & FDEFER) {
+ fp->f_flag &= ~FDEFER;
+ unp_defer--;
+ } else {
+ if (fp->f_flag & FMARK)
+ continue;
+ if (fp->f_count == fp->f_msgcount)
+ continue;
+ fp->f_flag |= FMARK;
+ }
+ if (fp->f_type != DTYPE_SOCKET ||
+ (so = (struct socket *)fp->f_data) == 0)
+ continue;
+ if (so->so_proto->pr_domain != &unixdomain ||
+ (so->so_proto->pr_flags&PR_RIGHTS) == 0)
+ continue;
+#ifdef notdef
+ if (so->so_rcv.sb_flags & SB_LOCK) {
+ /*
+ * This is problematical; it's not clear
+ * we need to wait for the sockbuf to be
+ * unlocked (on a uniprocessor, at least),
+ * and it's also not clear what to do
+ * if sbwait returns an error due to receipt
+ * of a signal. If sbwait does return
+ * an error, we'll go into an infinite
+ * loop. Delete all of this for now.
+ */
+ (void) sbwait(&so->so_rcv);
+ goto restart;
+ }
+#endif
+ unp_scan(so->so_rcv.sb_mb, unp_mark);
+ }
+ } while (unp_defer);
+ /*
+ * We grab an extra reference to each of the file table entries
+ * that are not otherwise accessible and then free the rights
+ * that are stored in messages on them.
+ *
+ * The bug in the orginal code is a little tricky, so I'll describe
+ * what's wrong with it here.
+ *
+ * It is incorrect to simply unp_discard each entry for f_msgcount
+ * times -- consider the case of sockets A and B that contain
+ * references to each other. On a last close of some other socket,
+ * we trigger a gc since the number of outstanding rights (unp_rights)
+ * is non-zero. If during the sweep phase the gc code un_discards,
+ * we end up doing a (full) closef on the descriptor. A closef on A
+ * results in the following chain. Closef calls soo_close, which
+ * calls soclose. Soclose calls first (through the switch
+ * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
+ * returns because the previous instance had set unp_gcing, and
+ * we return all the way back to soclose, which marks the socket
+ * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
+ * to free up the rights that are queued in messages on the socket A,
+ * i.e., the reference on B. The sorflush calls via the dom_dispose
+ * switch unp_dispose, which unp_scans with unp_discard. This second
+ * instance of unp_discard just calls closef on B.
+ *
+ * Well, a similar chain occurs on B, resulting in a sorflush on B,
+ * which results in another closef on A. Unfortunately, A is already
+ * being closed, and the descriptor has already been marked with
+ * SS_NOFDREF, and soclose panics at this point.
+ *
+ * Here, we first take an extra reference to each inaccessible
+ * descriptor. Then, we call sorflush ourself, since we know
+ * it is a Unix domain socket anyhow. After we destroy all the
+ * rights carried in messages, we do a last closef to get rid
+ * of our extra reference. This is the last close, and the
+ * unp_detach etc will shut down the socket.
+ *
+ * 91/09/19, bsy@cs.cmu.edu
+ */
+ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
+ for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
+ nextfp = fp->f_filef;
+ if (fp->f_count == 0)
+ continue;
+ if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
+ *fpp++ = fp;
+ nunref++;
+ fp->f_count++;
+ }
+ }
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+ sorflush((struct socket *)(*fpp)->f_data);
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+ closef(*fpp);
+ free((caddr_t)extra_ref, M_FILE);
+ unp_gcing = 0;
+}
+
+unp_dispose(m)
+ struct mbuf *m;
+{
+ int unp_discard();
+
+ if (m)
+ unp_scan(m, unp_discard);
+}
+
+unp_scan(m0, op)
+ register struct mbuf *m0;
+ int (*op)();
+{
+ register struct mbuf *m;
+ register struct file **rp;
+ register struct cmsghdr *cm;
+ register int i;
+ int qfds;
+
+ while (m0) {
+ for (m = m0; m; m = m->m_next)
+ if (m->m_type == MT_CONTROL &&
+ m->m_len >= sizeof(*cm)) {
+ cm = mtod(m, struct cmsghdr *);
+ if (cm->cmsg_level != SOL_SOCKET ||
+ cm->cmsg_type != SCM_RIGHTS)
+ continue;
+ qfds = (cm->cmsg_len - sizeof *cm)
+ / sizeof (struct file *);
+ rp = (struct file **)(cm + 1);
+ for (i = 0; i < qfds; i++)
+ (*op)(*rp++);
+ break; /* XXX, but saves time */
+ }
+ m0 = m0->m_act;
+ }
+}
+
+unp_mark(fp)
+ struct file *fp;
+{
+
+ if (fp->f_flag & FMARK)
+ return;
+ unp_defer++;
+ fp->f_flag |= (FMARK|FDEFER);
+}
+
+unp_discard(fp)
+ struct file *fp;
+{
+
+ fp->f_msgcount--;
+ unp_rights--;
+ (void) closef(fp, (struct proc *)NULL);
+}