mirror of
https://github.com/opnsense/src.git
synced 2026-06-08 16:22:46 -04:00
Implement sendfile(2) for the posix shared memory segment file descriptor,
in addition to the regular files. Requested by: alc Discussed with: emaste Tested by: pho (previous version) Sponsored by: The FreeBSD Foundation Approved by: re (hrs)
This commit is contained in:
parent
ed03145ec8
commit
227aaa86ed
2 changed files with 251 additions and 127 deletions
|
|
@ -134,7 +134,7 @@ static struct fileops shm_ops = {
|
|||
.fo_close = shm_close,
|
||||
.fo_chmod = shm_chmod,
|
||||
.fo_chown = shm_chown,
|
||||
.fo_sendfile = invfo_sendfile,
|
||||
.fo_sendfile = vn_sendfile,
|
||||
.fo_seek = shm_seek,
|
||||
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
|
||||
};
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
|
|||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/capability.h>
|
||||
#include <sys/condvar.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
|
|
@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$");
|
|||
#include <sys/file.h>
|
||||
#include <sys/filio.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/protosw.h>
|
||||
|
|
@ -86,7 +88,7 @@ __FBSDID("$FreeBSD$");
|
|||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pageout.h>
|
||||
#include <vm/vm_pager.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_extern.h>
|
||||
|
||||
|
|
@ -1850,8 +1852,6 @@ getsockaddr(namp, uaddr, len)
|
|||
return (error);
|
||||
}
|
||||
|
||||
#include <sys/condvar.h>
|
||||
|
||||
struct sendfile_sync {
|
||||
struct mtx mtx;
|
||||
struct cv cv;
|
||||
|
|
@ -1917,6 +1917,10 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
|
|||
cap_rights_t rights;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* File offset must be positive. If it goes beyond EOF
|
||||
* we send only the header/trailer and no payload data.
|
||||
*/
|
||||
if (uap->offset < 0)
|
||||
return (EINVAL);
|
||||
|
||||
|
|
@ -1978,79 +1982,240 @@ freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
|
|||
}
|
||||
#endif /* COMPAT_FREEBSD4 */
|
||||
|
||||
static int
|
||||
sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd,
|
||||
off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res)
|
||||
{
|
||||
vm_page_t m;
|
||||
vm_pindex_t pindex;
|
||||
ssize_t resid;
|
||||
int error, readahead, rv;
|
||||
|
||||
pindex = OFF_TO_IDX(off);
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY |
|
||||
VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL);
|
||||
|
||||
/*
|
||||
* Check if page is valid for what we need, otherwise initiate I/O.
|
||||
*
|
||||
* The non-zero nd argument prevents disk I/O, instead we
|
||||
* return the caller what he specified in nd. In particular,
|
||||
* if we already turned some pages into mbufs, nd == EAGAIN
|
||||
* and the main function send them the pages before we come
|
||||
* here again and block.
|
||||
*/
|
||||
if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) {
|
||||
if (vp == NULL)
|
||||
vm_page_xunbusy(m);
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
*res = m;
|
||||
return (0);
|
||||
} else if (nd != 0) {
|
||||
if (vp == NULL)
|
||||
vm_page_xunbusy(m);
|
||||
error = nd;
|
||||
goto free_page;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the page from backing store.
|
||||
*/
|
||||
error = 0;
|
||||
if (vp != NULL) {
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
readahead = sfreadahead * MAXBSIZE;
|
||||
|
||||
/*
|
||||
* Use vn_rdwr() instead of the pager interface for
|
||||
* the vnode, to allow the read-ahead.
|
||||
*
|
||||
* XXXMAC: Because we don't have fp->f_cred here, we
|
||||
* pass in NOCRED. This is probably wrong, but is
|
||||
* consistent with our original implementation.
|
||||
*/
|
||||
error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off),
|
||||
UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead /
|
||||
bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td);
|
||||
SFSTAT_INC(sf_iocnt);
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
} else {
|
||||
if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
|
||||
rv = vm_pager_get_pages(obj, &m, 1, 0);
|
||||
SFSTAT_INC(sf_iocnt);
|
||||
m = vm_page_lookup(obj, pindex);
|
||||
if (m == NULL)
|
||||
error = EIO;
|
||||
else if (rv != VM_PAGER_OK) {
|
||||
vm_page_lock(m);
|
||||
vm_page_free(m);
|
||||
vm_page_unlock(m);
|
||||
m = NULL;
|
||||
error = EIO;
|
||||
}
|
||||
} else {
|
||||
pmap_zero_page(m);
|
||||
m->valid = VM_PAGE_BITS_ALL;
|
||||
m->dirty = 0;
|
||||
}
|
||||
if (m != NULL)
|
||||
vm_page_xunbusy(m);
|
||||
}
|
||||
if (error == 0) {
|
||||
*res = m;
|
||||
} else if (m != NULL) {
|
||||
free_page:
|
||||
vm_page_lock(m);
|
||||
vm_page_unwire(m, 0);
|
||||
|
||||
/*
|
||||
* See if anyone else might know about this page. If
|
||||
* not and it is not valid, then free it.
|
||||
*/
|
||||
if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m))
|
||||
vm_page_free(m);
|
||||
vm_page_unlock(m);
|
||||
}
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
KASSERT(error != 0 || (m->wire_count > 0 && m->valid ==
|
||||
VM_PAGE_BITS_ALL),
|
||||
("wrong page state m %p", m));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
|
||||
struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
|
||||
int *bsize)
|
||||
{
|
||||
struct vattr va;
|
||||
vm_object_t obj;
|
||||
struct vnode *vp;
|
||||
struct shmfd *shmfd;
|
||||
int error;
|
||||
|
||||
vp = *vp_res = NULL;
|
||||
obj = NULL;
|
||||
shmfd = *shmfd_res = NULL;
|
||||
*bsize = 0;
|
||||
|
||||
/*
|
||||
* The file descriptor must be a regular file and have a
|
||||
* backing VM object.
|
||||
*/
|
||||
if (fp->f_type == DTYPE_VNODE) {
|
||||
vp = fp->f_vnode;
|
||||
vn_lock(vp, LK_SHARED | LK_RETRY);
|
||||
if (vp->v_type != VREG) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
*bsize = vp->v_mount->mnt_stat.f_iosize;
|
||||
error = VOP_GETATTR(vp, &va, td->td_ucred);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
*obj_size = va.va_size;
|
||||
obj = vp->v_object;
|
||||
if (obj == NULL) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
} else if (fp->f_type == DTYPE_SHM) {
|
||||
shmfd = fp->f_data;
|
||||
obj = shmfd->shm_object;
|
||||
*obj_size = shmfd->shm_size;
|
||||
} else {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
if ((obj->flags & OBJ_DEAD) != 0) {
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
error = EBADF;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Temporarily increase the backing VM object's reference
|
||||
* count so that a forced reclamation of its vnode does not
|
||||
* immediately destroy it.
|
||||
*/
|
||||
vm_object_reference_locked(obj);
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
*obj_res = obj;
|
||||
*vp_res = vp;
|
||||
*shmfd_res = shmfd;
|
||||
|
||||
out:
|
||||
if (vp != NULL)
|
||||
VOP_UNLOCK(vp, 0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
|
||||
struct socket **so)
|
||||
{
|
||||
cap_rights_t rights;
|
||||
int error;
|
||||
|
||||
*sock_fp = NULL;
|
||||
*so = NULL;
|
||||
|
||||
/*
|
||||
* The socket must be a stream socket and connected.
|
||||
*/
|
||||
error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights,
|
||||
CAP_SEND), sock_fp, NULL);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
*so = (*sock_fp)->f_data;
|
||||
if ((*so)->so_type != SOCK_STREAM)
|
||||
return (EINVAL);
|
||||
if (((*so)->so_state & SS_ISCONNECTED) == 0)
|
||||
return (ENOTCONN);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
||||
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
|
||||
int kflags, struct thread *td)
|
||||
{
|
||||
struct vnode *vp = fp->f_vnode;
|
||||
struct file *sock_fp;
|
||||
struct vm_object *obj = NULL;
|
||||
struct socket *so = NULL;
|
||||
struct mbuf *m = NULL;
|
||||
struct vnode *vp;
|
||||
struct vm_object *obj;
|
||||
struct socket *so;
|
||||
struct mbuf *m;
|
||||
struct sf_buf *sf;
|
||||
struct vm_page *pg;
|
||||
struct shmfd *shmfd;
|
||||
struct sendfile_sync *sfs;
|
||||
struct vattr va;
|
||||
struct sendfile_sync *sfs = NULL;
|
||||
cap_rights_t rights;
|
||||
off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
|
||||
int bsize, error, hdrlen = 0, mnw = 0;
|
||||
off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
|
||||
int error, bsize, nd, hdrlen, mnw;
|
||||
bool inflight_called;
|
||||
|
||||
vn_lock(vp, LK_SHARED | LK_RETRY);
|
||||
if (vp->v_type == VREG) {
|
||||
bsize = vp->v_mount->mnt_stat.f_iosize;
|
||||
if (nbytes == 0) {
|
||||
error = VOP_GETATTR(vp, &va, td->td_ucred);
|
||||
if (error != 0) {
|
||||
VOP_UNLOCK(vp, 0);
|
||||
obj = NULL;
|
||||
goto out;
|
||||
}
|
||||
rem = va.va_size;
|
||||
} else
|
||||
rem = nbytes;
|
||||
obj = vp->v_object;
|
||||
if (obj != NULL) {
|
||||
/*
|
||||
* Temporarily increase the backing VM
|
||||
* object's reference count so that a forced
|
||||
* reclamation of its vnode does not
|
||||
* immediately destroy it.
|
||||
*/
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
if ((obj->flags & OBJ_DEAD) == 0) {
|
||||
vm_object_reference_locked(obj);
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
} else {
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
obj = NULL;
|
||||
}
|
||||
}
|
||||
} else
|
||||
bsize = 0; /* silence gcc */
|
||||
VOP_UNLOCK(vp, 0);
|
||||
if (obj == NULL) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
obj = NULL;
|
||||
so = NULL;
|
||||
m = NULL;
|
||||
sfs = NULL;
|
||||
fsbytes = sbytes = 0;
|
||||
hdrlen = mnw = 0;
|
||||
rem = nbytes;
|
||||
inflight_called = false;
|
||||
|
||||
/*
|
||||
* The socket must be a stream socket and connected.
|
||||
* Remember if it a blocking or non-blocking socket.
|
||||
*/
|
||||
error = getsock_cap(td->td_proc->p_fd, sockfd,
|
||||
cap_rights_init(&rights, CAP_SEND), &sock_fp, NULL);
|
||||
error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
if (rem == 0)
|
||||
rem = obj_size;
|
||||
|
||||
error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
so = sock_fp->f_data;
|
||||
if (so->so_type != SOCK_STREAM) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if ((so->so_state & SS_ISCONNECTED) == 0) {
|
||||
error = ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not wait on memory allocations but return ENOMEM for
|
||||
* caller to retry later.
|
||||
|
|
@ -2123,7 +2288,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
|
|||
int done;
|
||||
|
||||
if ((nbytes != 0 && nbytes == fsbytes) ||
|
||||
(nbytes == 0 && va.va_size == fsbytes))
|
||||
(nbytes == 0 && obj_size == fsbytes))
|
||||
break;
|
||||
|
||||
mtail = NULL;
|
||||
|
|
@ -2197,13 +2362,16 @@ retry_space:
|
|||
*/
|
||||
space -= hdrlen;
|
||||
|
||||
error = vn_lock(vp, LK_SHARED);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
error = VOP_GETATTR(vp, &va, td->td_ucred);
|
||||
if (error != 0 || off >= va.va_size) {
|
||||
VOP_UNLOCK(vp, 0);
|
||||
goto done;
|
||||
if (vp != NULL) {
|
||||
error = vn_lock(vp, LK_SHARED);
|
||||
if (error != 0)
|
||||
goto done;
|
||||
error = VOP_GETATTR(vp, &va, td->td_ucred);
|
||||
if (error != 0 || off >= va.va_size) {
|
||||
VOP_UNLOCK(vp, 0);
|
||||
goto done;
|
||||
}
|
||||
obj_size = va.va_size;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -2211,7 +2379,6 @@ retry_space:
|
|||
* dumped into socket buffer.
|
||||
*/
|
||||
while (space > loopbytes) {
|
||||
vm_pindex_t pindex;
|
||||
vm_offset_t pgoff;
|
||||
struct mbuf *m0;
|
||||
|
||||
|
|
@ -2221,7 +2388,7 @@ retry_space:
|
|||
* or the passed in nbytes.
|
||||
*/
|
||||
pgoff = (vm_offset_t)(off & PAGE_MASK);
|
||||
rem = va.va_size - offset;
|
||||
rem = obj_size - offset;
|
||||
if (nbytes != 0)
|
||||
rem = omin(rem, nbytes);
|
||||
rem -= fsbytes + loopbytes;
|
||||
|
|
@ -2236,59 +2403,15 @@ retry_space:
|
|||
* Attempt to look up the page. Allocate
|
||||
* if not found or wait and loop if busy.
|
||||
*/
|
||||
pindex = OFF_TO_IDX(off);
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
|
||||
VM_ALLOC_IGN_SBUSY | VM_ALLOC_NORMAL |
|
||||
VM_ALLOC_WIRED);
|
||||
|
||||
/*
|
||||
* Check if page is valid for what we need,
|
||||
* otherwise initiate I/O.
|
||||
* If we already turned some pages into mbufs,
|
||||
* send them off before we come here again and
|
||||
* block.
|
||||
*/
|
||||
if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
else if (m != NULL)
|
||||
error = EAGAIN; /* send what we already got */
|
||||
else if (flags & SF_NODISKIO)
|
||||
error = EBUSY;
|
||||
else {
|
||||
ssize_t resid;
|
||||
int readahead = sfreadahead * MAXBSIZE;
|
||||
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
|
||||
/*
|
||||
* Get the page from backing store.
|
||||
* XXXMAC: Because we don't have fp->f_cred
|
||||
* here, we pass in NOCRED. This is probably
|
||||
* wrong, but is consistent with our original
|
||||
* implementation.
|
||||
*/
|
||||
error = vn_rdwr(UIO_READ, vp, NULL, readahead,
|
||||
trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
|
||||
IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT),
|
||||
td->td_ucred, NOCRED, &resid, td);
|
||||
SFSTAT_INC(sf_iocnt);
|
||||
if (error != 0)
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
}
|
||||
if (m != NULL)
|
||||
nd = EAGAIN; /* send what we already got */
|
||||
else if ((flags & SF_NODISKIO) != 0)
|
||||
nd = EBUSY;
|
||||
else
|
||||
nd = 0;
|
||||
error = sendfile_readpage(obj, vp, nd, off,
|
||||
xfsize, bsize, td, &pg);
|
||||
if (error != 0) {
|
||||
vm_page_lock(pg);
|
||||
vm_page_unwire(pg, 0);
|
||||
/*
|
||||
* See if anyone else might know about
|
||||
* this page. If not and it is not valid,
|
||||
* then free it.
|
||||
*/
|
||||
if (pg->wire_count == 0 && pg->valid == 0 &&
|
||||
!vm_page_busied(pg))
|
||||
vm_page_free(pg);
|
||||
vm_page_unlock(pg);
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
if (error == EAGAIN)
|
||||
error = 0; /* not a real error */
|
||||
break;
|
||||
|
|
@ -2358,7 +2481,8 @@ retry_space:
|
|||
}
|
||||
}
|
||||
|
||||
VOP_UNLOCK(vp, 0);
|
||||
if (vp != NULL)
|
||||
VOP_UNLOCK(vp, 0);
|
||||
|
||||
/* Add the buffer chain to the socket buffer. */
|
||||
if (m != NULL) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue