summaryrefslogtreecommitdiff
path: root/sys/kern/uipc_syscalls.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/uipc_syscalls.c')
-rw-r--r--sys/kern/uipc_syscalls.c430
1 files changed, 1 insertions, 429 deletions
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 6cc487ad8cdf..a841bfad18c7 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -2,9 +2,6 @@
* Copyright (c) 1982, 1986, 1989, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
- * sendfile(2) and related extensions:
- * Copyright (c) 1998, David Greenman. All rights reserved.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
- * $Id: uipc_syscalls.c,v 1.48 1998/12/03 12:35:47 dg Exp $
+ * $Id: uipc_syscalls.c,v 1.40 1998/06/10 10:30:23 dfr Exp $
*/
#include "opt_compat.h"
@@ -42,7 +39,6 @@
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/kernel.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/filedesc.h>
@@ -55,27 +51,9 @@
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/uio.h>
-#include <sys/vnode.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
-#include <vm/vm.h>
-#include <vm/vm_prot.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <machine/limits.h>
-
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
-static struct sf_buf *sf_buf_alloc(void);
-static void sf_buf_ref(caddr_t addr, u_int size);
-static void sf_buf_free(caddr_t addr, u_int size);
static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
static int recvit __P((struct proc *p, int s, struct msghdr *mp,
@@ -87,11 +65,6 @@ static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
int compat));
-static SLIST_HEAD(, sf_buf) sf_freelist;
-static vm_offset_t sf_base;
-static struct sf_buf *sf_bufs;
-static int sf_buf_alloc_want;
-
/*
* System call interface to the socket abstraction.
*/
@@ -260,8 +233,6 @@ accept1(p, uap, compat)
so->so_state &= ~SS_COMP;
so->so_head = NULL;
- if (head->so_sigio != NULL)
- fsetown(fgetown(head->so_sigio), &so->so_sigio);
fp->f_type = DTYPE_SOCKET;
fp->f_flag = fflag;
@@ -1303,402 +1274,3 @@ getsock(fdp, fdes, fpp)
*fpp = fp;
return (0);
}
-
-/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- * XXX - The sf_buf functions are currently private to sendfile(2), so have
- * been made static, but may be useful in the future for doing zero-copy in
- * other parts of the networking code.
- */
-static void
-sf_buf_init(void *arg)
-{
- int i;
-
- SLIST_INIT(&sf_freelist);
- sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
- bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
- }
-}
-
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-static struct sf_buf *
-sf_buf_alloc()
-{
- struct sf_buf *sf;
- int s;
-
- s = splimp();
- while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
- sf_buf_alloc_want = 1;
- tsleep(&sf_freelist, PVM, "sfbufa", 0);
- }
- SLIST_REMOVE_HEAD(&sf_freelist, free_list);
- splx(s);
- sf->refcnt = 1;
- return (sf);
-}
-
-#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
-static void
-sf_buf_ref(caddr_t addr, u_int size)
-{
- struct sf_buf *sf;
-
- sf = dtosf(addr);
- if (sf->refcnt == 0)
- panic("sf_buf_ref: referencing a free sf_buf");
- sf->refcnt++;
-}
-
-/*
- * Lose a reference to an sf_buf. When none left, detach mapped page
- * and release resources back to the system.
- *
- * Must be called at splimp.
- */
-static void
-sf_buf_free(caddr_t addr, u_int size)
-{
- struct sf_buf *sf;
- struct vm_page *m;
- int s;
-
- sf = dtosf(addr);
- if (sf->refcnt == 0)
- panic("sf_buf_free: freeing free sf_buf");
- sf->refcnt--;
- if (sf->refcnt == 0) {
- pmap_qremove((vm_offset_t)addr, 1);
- m = sf->m;
- s = splvm();
- vm_page_unwire(m, 0);
- /*
- * Check for the object going away on us. This can
- * happen since we don't hold a reference to it.
- * If so, we're responsible for freeing the page.
- */
- if (m->wire_count == 0 && m->object == NULL)
- vm_page_free(m);
- splx(s);
- sf->m = NULL;
- SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
- if (sf_buf_alloc_want) {
- sf_buf_alloc_want = 0;
- wakeup(&sf_freelist);
- }
- }
-}
-
-/*
- * sendfile(2).
- * int sendfile(int fd, int s, off_t offset, size_t nbytes,
- * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
- *
- * Send a file specified by 'fd' and starting at 'offset' to a socket
- * specified by 's'. Send only 'nbytes' of the file or until EOF if
- * nbytes == 0. Optionally add a header and/or trailer to the socket
- * output. If specified, write the total number of bytes sent into *sbytes.
- */
-int
-sendfile(struct proc *p, struct sendfile_args *uap)
-{
- struct file *fp;
- struct filedesc *fdp = p->p_fd;
- struct vnode *vp;
- struct vm_object *obj;
- struct socket *so;
- struct mbuf *m;
- struct sf_buf *sf;
- struct vm_page *pg;
- struct writev_args nuap;
- struct sf_hdtr hdtr;
- off_t off, xfsize, sbytes = 0;
- int error = 0, s;
-
- /*
- * Do argument checking. Must be a regular file in, stream
- * type and connected socket out, positive offset.
- */
- if (((u_int)uap->fd) >= fdp->fd_nfiles ||
- (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
- (fp->f_flag & FREAD) == 0) {
- error = EBADF;
- goto done;
- }
- if (fp->f_type != DTYPE_VNODE) {
- error = EINVAL;
- goto done;
- }
- vp = (struct vnode *)fp->f_data;
- obj = vp->v_object;
- if (vp->v_type != VREG || obj == NULL) {
- error = EINVAL;
- goto done;
- }
- error = getsock(p->p_fd, uap->s, &fp);
- if (error)
- goto done;
- so = (struct socket *)fp->f_data;
- if (so->so_type != SOCK_STREAM) {
- error = EINVAL;
- goto done;
- }
- if ((so->so_state & SS_ISCONNECTED) == 0) {
- error = ENOTCONN;
- goto done;
- }
- if (uap->offset < 0) {
- error = EINVAL;
- goto done;
- }
-
- /*
- * If specified, get the pointer to the sf_hdtr struct for
- * any headers/trailers.
- */
- if (uap->hdtr != NULL) {
- error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
- if (error)
- goto done;
- /*
- * Send any headers. Wimp out and use writev(2).
- */
- if (hdtr.headers != NULL) {
- nuap.fd = uap->s;
- nuap.iovp = hdtr.headers;
- nuap.iovcnt = hdtr.hdr_cnt;
- error = writev(p, &nuap);
- if (error)
- goto done;
- sbytes += p->p_retval[0];
- }
- }
-
- /*
- * Protect against multiple writers to the socket.
- */
- (void) sblock(&so->so_snd, M_WAITOK);
-
- /*
- * Loop through the pages in the file, starting with the requested
- * offset. Get a file page (do I/O if necessary), map the file page
- * into an sf_buf, attach an mbuf header to the sf_buf, and queue
- * it on the socket.
- */
- for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
- vm_pindex_t pindex;
- vm_offset_t pgoff;
-
- pindex = OFF_TO_IDX(off);
-retry_lookup:
- /*
- * Calculate the amount to transfer. Not to exceed a page,
- * the EOF, or the passed in nbytes.
- */
- xfsize = obj->un_pager.vnp.vnp_size - off;
- if (xfsize > PAGE_SIZE)
- xfsize = PAGE_SIZE;
- pgoff = (vm_offset_t)(off & PAGE_MASK);
- if (PAGE_SIZE - pgoff < xfsize)
- xfsize = PAGE_SIZE - pgoff;
- if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
- xfsize = uap->nbytes - sbytes;
- if (xfsize <= 0)
- break;
- /*
- * Optimize the non-blocking case by looking at the socket space
- * before going to the extra work of constituting the sf_buf.
- */
- if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
- if (so->so_state & SS_CANTSENDMORE)
- error = EPIPE;
- else
- error = EAGAIN;
- sbunlock(&so->so_snd);
- goto done;
- }
- /*
- * Attempt to look up the page. If the page doesn't exist or the
- * part we're interested in isn't valid, then read it from disk.
- * If some other part of the kernel has this page (i.e. it's busy),
- * then disk I/O may be occuring on it, so wait and retry.
- */
- pg = vm_page_lookup(obj, pindex);
- if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
- !vm_page_is_valid(pg, pgoff, xfsize))) {
- struct uio auio;
- struct iovec aiov;
- int bsize;
-
- if (pg == NULL) {
- pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
- if (pg == NULL) {
- VM_WAIT;
- goto retry_lookup;
- }
- vm_page_flag_clear(pg, PG_BUSY);
- }
- /*
- * Ensure that our page is still around when the I/O completes.
- */
- vm_page_io_start(pg);
- vm_page_wire(pg);
- /*
- * Get the page from backing store.
- */
- bsize = vp->v_mount->mnt_stat.f_iosize;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- aiov.iov_base = 0;
- aiov.iov_len = MAXBSIZE;
- auio.uio_resid = MAXBSIZE;
- auio.uio_offset = trunc_page(off);
- auio.uio_segflg = UIO_NOCOPY;
- auio.uio_rw = UIO_READ;
- auio.uio_procp = p;
- vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
- error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
- p->p_ucred);
- VOP_UNLOCK(vp, 0, p);
- vm_page_flag_clear(pg, PG_ZERO);
- vm_page_io_finish(pg);
- if (error) {
- vm_page_unwire(pg, 0);
- /*
- * See if anyone else might know about this page.
- * If not and it is not valid, then free it.
- */
- if (pg->wire_count == 0 && pg->valid == 0 &&
- pg->busy == 0 && !(pg->flags & PG_BUSY) &&
- pg->hold_count == 0)
- vm_page_free(pg);
- sbunlock(&so->so_snd);
- goto done;
- }
- } else {
- if ((pg->flags & PG_BUSY) || pg->busy) {
- s = splvm();
- if ((pg->flags & PG_BUSY) || pg->busy) {
- /*
- * Page is busy. Wait and retry.
- */
- vm_page_flag_set(pg, PG_WANTED);
- tsleep(pg, PVM, "sfpbsy", 0);
- splx(s);
- goto retry_lookup;
- }
- splx(s);
- }
- /*
- * Protect from having the page ripped out from beneath us.
- */
- vm_page_wire(pg);
- }
- /*
- * Allocate a kernel virtual page and insert the physical page
- * into it.
- */
- sf = sf_buf_alloc();
- sf->m = pg;
- pmap_qenter(sf->kva, &pg, 1);
- /*
- * Get an mbuf header and set it up as having external storage.
- */
- MGETHDR(m, M_WAIT, MT_DATA);
- m->m_ext.ext_free = sf_buf_free;
- m->m_ext.ext_ref = sf_buf_ref;
- m->m_ext.ext_buf = (void *)sf->kva;
- m->m_ext.ext_size = PAGE_SIZE;
- m->m_data = (char *) sf->kva + pgoff;
- m->m_flags |= M_EXT;
- m->m_pkthdr.len = m->m_len = xfsize;
- /*
- * Add the buffer to the socket buffer chain.
- */
- s = splnet();
-retry_space:
- /*
- * Make sure that the socket is still able to take more data.
- * CANTSENDMORE being true usually means that the connection
- * was closed. so_error is true when an error was sensed after
- * a previous send.
- * The state is checked after the page mapping and buffer
- * allocation above since those operations may block and make
- * any socket checks stale. From this point forward, nothing
- * blocks before the pru_send (or more accurately, any blocking
- * results in a loop back to here to re-check).
- */
- if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
- if (so->so_state & SS_CANTSENDMORE) {
- error = EPIPE;
- } else {
- error = so->so_error;
- so->so_error = 0;
- }
- m_freem(m);
- sbunlock(&so->so_snd);
- splx(s);
- goto done;
- }
- /*
- * Wait for socket space to become available. We do this just
- * after checking the connection state above in order to avoid
- * a race condition with sbwait().
- */
- if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
- if (so->so_state & SS_NBIO) {
- m_freem(m);
- sbunlock(&so->so_snd);
- splx(s);
- error = EAGAIN;
- goto done;
- }
- error = sbwait(&so->so_snd);
- /*
- * An error from sbwait usually indicates that we've
- * been interrupted by a signal. If we've sent anything
- * then return bytes sent, otherwise return the error.
- */
- if (error) {
- m_freem(m);
- sbunlock(&so->so_snd);
- splx(s);
- goto done;
- }
- goto retry_space;
- }
- error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
- splx(s);
- if (error) {
- sbunlock(&so->so_snd);
- goto done;
- }
- }
- sbunlock(&so->so_snd);
-
- /*
- * Send trailers. Wimp out and use writev(2).
- */
- if (uap->hdtr != NULL && hdtr.trailers != NULL) {
- nuap.fd = uap->s;
- nuap.iovp = hdtr.trailers;
- nuap.iovcnt = hdtr.trl_cnt;
- error = writev(p, &nuap);
- if (error)
- goto done;
- sbytes += p->p_retval[0];
- }
-
-done:
- if (uap->sbytes != NULL) {
- copyout(&sbytes, uap->sbytes, sizeof(off_t));
- }
- return (error);
-}