aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGlen Barber <gjb@FreeBSD.org>2013-09-10 19:00:32 +0000
committerGlen Barber <gjb@FreeBSD.org>2013-09-10 19:00:32 +0000
commit6d9936ffafbc75df7b91f4ddcb845d36b19cfd06 (patch)
tree32c7a4f3069e78f4ad55235a4cdd9143bace2108
parente2630701b4342b36d0b5793620536bc5e9436d1b (diff)
downloadsrc-6d9936ffafbc75df7b91f4ddcb845d36b19cfd06.tar.gz
src-6d9936ffafbc75df7b91f4ddcb845d36b19cfd06.zip
Reintegrate r250907 (previously reverted as r254754) plus MFS r254947,
r255443: r250907 (scottl): MFC r248830, r250027, r250409: Several fixes and improvements to sendfile() r254947 (kib): NFS implementation of VOP_READ() sometimes upgrades the vnode lock, which causes drop of the shared lock and sleep for exclusive. As result, busying of the page before the call to vn_rdwr() makes NFS code to wait for vnode lock while page is busy, which contradicts the proper order of vnode lock -> busy. r255443 (des): Fix the length calculation for the final block of a sendfile(2) transmission which could be tricked into rounding up to the nearest page size, leaking up to a page of kernel memory. [13:11] Approved by: re (delphij) Sponsored by: The FreeBSD Foundation
Notes
Notes: svn path=/releng/9.2/; revision=255456
-rw-r--r--sys/kern/uipc_syscalls.c94
1 files changed, 56 insertions, 38 deletions
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 6ea7d3481530..309310a2cb18 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -1835,9 +1835,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
struct mbuf *m = NULL;
struct sf_buf *sf;
struct vm_page *pg;
+ struct vattr va;
off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
int error, hdrlen = 0, mnw = 0;
int vfslocked;
+ int bsize;
struct sendfile_sync *sfs = NULL;
/*
@@ -1852,6 +1854,18 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_SHARED | LK_RETRY);
if (vp->v_type == VREG) {
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ if (uap->nbytes == 0) {
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0) {
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ obj = NULL;
+ goto out;
+ }
+ rem = va.va_size;
+ } else
+ rem = uap->nbytes;
obj = vp->v_object;
if (obj != NULL) {
/*
@@ -1869,7 +1883,8 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
obj = NULL;
}
}
- }
+ } else
+ bsize = 0; /* silence gcc */
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
if (obj == NULL) {
@@ -1962,11 +1977,20 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
* The outer loop checks the state and available space of the socket
* and takes care of the overall progress.
*/
- for (off = uap->offset, rem = uap->nbytes; ; ) {
- struct mbuf *mtail = NULL;
- int loopbytes = 0;
- int space = 0;
- int done = 0;
+ for (off = uap->offset; ; ) {
+ struct mbuf *mtail;
+ int loopbytes;
+ int space;
+ int done;
+
+ if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
+ (uap->nbytes == 0 && va.va_size == fsbytes))
+ break;
+
+ mtail = NULL;
+ loopbytes = 0;
+ space = 0;
+ done = 0;
/*
* Check the socket state for ongoing connection,
@@ -2034,6 +2058,20 @@ retry_space:
*/
space -= hdrlen;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = vn_lock(vp, LK_SHARED);
+ if (error != 0) {
+ VFS_UNLOCK_GIANT(vfslocked);
+ goto done;
+ }
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0 || off >= va.va_size) {
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ goto done;
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+
/*
* Loop and construct maximum sized mbuf chain to be bulk
* dumped into socket buffer.
@@ -2043,25 +2081,19 @@ retry_space:
vm_offset_t pgoff;
struct mbuf *m0;
- VM_OBJECT_LOCK(obj);
/*
* Calculate the amount to transfer.
* Not to exceed a page, the EOF,
* or the passed in nbytes.
*/
pgoff = (vm_offset_t)(off & PAGE_MASK);
- xfsize = omin(PAGE_SIZE - pgoff,
- obj->un_pager.vnp.vnp_size - uap->offset -
- fsbytes - loopbytes);
- if (uap->nbytes)
- rem = (uap->nbytes - fsbytes - loopbytes);
- else
- rem = obj->un_pager.vnp.vnp_size -
- uap->offset - fsbytes - loopbytes;
- xfsize = omin(rem, xfsize);
+ rem = va.va_size - uap->offset;
+ if (uap->nbytes != 0)
+ rem = omin(rem, uap->nbytes);
+ rem -= fsbytes + loopbytes;
+ xfsize = omin(PAGE_SIZE - pgoff, rem);
xfsize = omin(space - loopbytes, xfsize);
if (xfsize <= 0) {
- VM_OBJECT_UNLOCK(obj);
done = 1; /* all data sent */
break;
}
@@ -2071,6 +2103,7 @@ retry_space:
* if not found or wait and loop if busy.
*/
pindex = OFF_TO_IDX(off);
+ VM_OBJECT_LOCK(obj);
pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
@@ -2088,42 +2121,25 @@ retry_space:
else if (uap->flags & SF_NODISKIO)
error = EBUSY;
else {
- int bsize;
ssize_t resid;
- /*
- * Ensure that our page is still around
- * when the I/O completes.
- */
- vm_page_io_start(pg);
VM_OBJECT_UNLOCK(obj);
/*
* Get the page from backing store.
- */
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- error = vn_lock(vp, LK_SHARED);
- if (error != 0)
- goto after_read;
- bsize = vp->v_mount->mnt_stat.f_iosize;
-
- /*
* XXXMAC: Because we don't have fp->f_cred
* here, we pass in NOCRED. This is probably
* wrong, but is consistent with our original
* implementation.
*/
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
td->td_ucred, NOCRED, &resid, td);
- VOP_UNLOCK(vp, 0);
- after_read:
VFS_UNLOCK_GIANT(vfslocked);
- VM_OBJECT_LOCK(obj);
- vm_page_io_finish(pg);
- if (!error)
- VM_OBJECT_UNLOCK(obj);
+ if (error)
+ VM_OBJECT_LOCK(obj);
mbstat.sf_iocnt++;
}
if (error) {
@@ -2174,7 +2190,7 @@ retry_space:
m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
if (m0 == NULL) {
error = (mnw ? EAGAIN : ENOBUFS);
- sf_buf_mext((void *)sf_buf_kva(sf), sf);
+ sf_buf_mext(NULL, sf);
break;
}
MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
@@ -2202,6 +2218,8 @@ retry_space:
}
}
+ VOP_UNLOCK(vp, 0);
+
/* Add the buffer chain to the socket buffer. */
if (m != NULL) {
int mlen, err;