uipc_syscalls.c revision 321020
160786Sps/*- 260786Sps * Copyright (c) 1982, 1986, 1989, 1990, 1993 360786Sps * The Regents of the University of California. All rights reserved. 460786Sps * 560786Sps * sendfile(2) and related extensions: 660786Sps * Copyright (c) 1998, David Greenman. All rights reserved. 760786Sps * 860786Sps * Redistribution and use in source and binary forms, with or without 960786Sps * modification, are permitted provided that the following conditions 1060786Sps * are met: 1160786Sps * 1. Redistributions of source code must retain the above copyright 1260786Sps * notice, this list of conditions and the following disclaimer. 1360786Sps * 2. Redistributions in binary form must reproduce the above copyright 1460786Sps * notice, this list of conditions and the following disclaimer in the 1560786Sps * documentation and/or other materials provided with the distribution. 1660786Sps * 4. Neither the name of the University nor the names of its contributors 1760786Sps * may be used to endorse or promote products derived from this software 1860786Sps * without specific prior written permission. 1960786Sps * 2060786Sps * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2160786Sps * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2260786Sps * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2360786Sps * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2460786Sps * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2560786Sps * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2660786Sps * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2760786Sps * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2860786Sps * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2960786Sps * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3060786Sps * SUCH DAMAGE. 3160786Sps * 3260786Sps * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 3360786Sps */ 3460786Sps 3560786Sps#include <sys/cdefs.h> 3660786Sps__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_syscalls.c 321020 2017-07-15 17:25:40Z dchagin $"); 3760786Sps 3860786Sps#include "opt_capsicum.h" 3960786Sps#include "opt_inet.h" 4060786Sps#include "opt_inet6.h" 4160786Sps#include "opt_compat.h" 4260786Sps#include "opt_ktrace.h" 4360786Sps 4460786Sps#include <sys/param.h> 4589019Sps#include <sys/systm.h> 4660786Sps#include <sys/capsicum.h> 4760786Sps#include <sys/condvar.h> 4863128Sps#include <sys/kernel.h> 4963128Sps#include <sys/lock.h> 5060786Sps#include <sys/mutex.h> 5160786Sps#include <sys/sysproto.h> 5260786Sps#include <sys/malloc.h> 5360786Sps#include <sys/filedesc.h> 5460786Sps#include <sys/event.h> 5560786Sps#include <sys/proc.h> 5660786Sps#include <sys/fcntl.h> 5760786Sps#include <sys/file.h> 5860786Sps#include <sys/filio.h> 5960786Sps#include <sys/jail.h> 6060786Sps#include <sys/mman.h> 6160786Sps#include <sys/mount.h> 6260786Sps#include <sys/mbuf.h> 6360786Sps#include <sys/protosw.h> 6460786Sps#include <sys/rwlock.h> 6560786Sps#include <sys/sf_buf.h> 6660786Sps#include <sys/sysent.h> 6760786Sps#include <sys/socket.h> 6860786Sps#include <sys/socketvar.h> 6960786Sps#include <sys/signalvar.h> 7060786Sps#include <sys/syscallsubr.h> 7160786Sps#include <sys/sysctl.h> 7260786Sps#include <sys/uio.h> 7360786Sps#include <sys/vnode.h> 7463128Sps#ifdef KTRACE 7560786Sps#include <sys/ktrace.h> 7660786Sps#endif 7760786Sps#ifdef COMPAT_FREEBSD32 7860786Sps#include <compat/freebsd32/freebsd32_util.h> 7960786Sps#endif 8060786Sps 8160786Sps#include <net/vnet.h> 8260786Sps 8360786Sps#include <security/audit/audit.h> 8460786Sps#include <security/mac/mac_framework.h> 8560786Sps 8660786Sps#include <vm/vm.h> 8760786Sps#include <vm/vm_param.h> 8860786Sps#include <vm/vm_object.h> 8960786Sps#include <vm/vm_page.h> 9060786Sps#include <vm/vm_pager.h> 9160786Sps#include <vm/vm_kern.h> 9260786Sps#include <vm/vm_extern.h> 9360786Sps 9460786Sps/* 9560786Sps * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC 9660786Sps * and SOCK_NONBLOCK. 9760786Sps */ 9860786Sps#define ACCEPT4_INHERIT 0x1 9960786Sps#define ACCEPT4_COMPAT 0x2 10060786Sps 10160786Spsstatic int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 10260786Spsstatic int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 10360786Sps 10460786Spsstatic int accept1(struct thread *td, int s, struct sockaddr *uname, 10563128Sps socklen_t *anamelen, int flags); 10689019Spsstatic int do_sendfile(struct thread *td, struct sendfile_args *uap, 10760786Sps int compat); 10860786Spsstatic int getsockname1(struct thread *td, struct getsockname_args *uap, 10960786Sps int compat); 11060786Spsstatic int getpeername1(struct thread *td, struct getpeername_args *uap, 11189019Sps int compat); 11289019Sps 11389019Spscounter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; 11489019Sps 11589019Sps/* 11689019Sps * sendfile(2)-related variables and associated sysctls 11789019Sps */ 11889019Spsstatic SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0, 11989019Sps "sendfile(2) tunables"); 12060786Spsstatic int sfreadahead = 1; 12160786SpsSYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, 12260786Sps &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); 12360786Sps 12460786Sps 12560786Spsstatic void 12660786Spssfstat_init(const void *unused) 12760786Sps{ 12860786Sps 12960786Sps COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), 13060786Sps M_WAITOK); 13160786Sps} 13260786SpsSYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); 13360786Sps 13460786Spsstatic int 13560786Spssfstat_sysctl(SYSCTL_HANDLER_ARGS) 13660786Sps{ 13760786Sps struct sfstat s; 13860786Sps 13960786Sps COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); 14060786Sps if (req->newptr) 14160786Sps COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); 14260786Sps return (SYSCTL_OUT(req, &s, sizeof(s))); 14360786Sps} 14460786SpsSYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, 14560786Sps NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); 14660786Sps 14760786Sps/* 14860786Sps * Convert a user file descriptor to a kernel file entry and check if required 14960786Sps * capability rights are present. 15060786Sps * A reference on the file entry is held upon returning. 15160786Sps */ 15260786Spsint 15360786Spsgetsock_cap(struct filedesc *fdp, int fd, cap_rights_t *rightsp, 15460786Sps struct file **fpp, u_int *fflagp) 15560786Sps{ 15660786Sps struct file *fp; 15789019Sps int error; 15889019Sps 15989019Sps error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 16060786Sps if (error != 0) 16160786Sps return (error); 16260786Sps if (fp->f_type != DTYPE_SOCKET) { 16360786Sps fdrop(fp, curthread); 16460786Sps return (ENOTSOCK); 16560786Sps } 16660786Sps if (fflagp != NULL) 16760786Sps *fflagp = fp->f_flag; 16860786Sps *fpp = fp; 16960786Sps return (0); 17060786Sps} 17160786Sps 17260786Sps/* 17360786Sps * System call interface to the socket abstraction. 17460786Sps */ 17560786Sps#if defined(COMPAT_43) 17660786Sps#define COMPAT_OLDSOCK 17760786Sps#endif 17860786Sps 17960786Spsint 18060786Spssys_socket(td, uap) 18160786Sps struct thread *td; 18260786Sps struct socket_args /* { 18360786Sps int domain; 18460786Sps int type; 18560786Sps int protocol; 18660786Sps } */ *uap; 18760786Sps{ 18860786Sps struct socket *so; 18960786Sps struct file *fp; 19060786Sps int fd, error, type, oflag, fflag; 19160786Sps 19260786Sps AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 19360786Sps 19460786Sps type = uap->type; 19560786Sps oflag = 0; 19660786Sps fflag = 0; 19760786Sps if ((type & SOCK_CLOEXEC) != 0) { 19860786Sps type &= ~SOCK_CLOEXEC; 19960786Sps oflag |= O_CLOEXEC; 20060786Sps } 20160786Sps if ((type & SOCK_NONBLOCK) != 0) { 20260786Sps type &= ~SOCK_NONBLOCK; 20360786Sps fflag |= FNONBLOCK; 20460786Sps } 20560786Sps 20663128Sps#ifdef MAC 20763128Sps error = mac_socket_check_create(td->td_ucred, uap->domain, type, 20863128Sps uap->protocol); 20963128Sps if (error != 0) 21063128Sps return (error); 21163128Sps#endif 21260786Sps error = falloc(td, &fp, &fd, oflag); 21360786Sps if (error != 0) 21460786Sps return (error); 21560786Sps /* An extra reference on `fp' has been held for us by falloc(). */ 21660786Sps error = socreate(uap->domain, &so, type, uap->protocol, 21760786Sps td->td_ucred, td); 21860786Sps if (error != 0) { 21960786Sps fdclose(td, fp, fd); 22060786Sps } else { 22160786Sps finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); 22260786Sps if ((fflag & FNONBLOCK) != 0) 22360786Sps (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); 22460786Sps td->td_retval[0] = fd; 22560786Sps } 22660786Sps fdrop(fp, td); 22760786Sps return (error); 22860786Sps} 22960786Sps 23060786Sps/* ARGSUSED */ 23160786Spsint 23260786Spssys_bind(td, uap) 23360786Sps struct thread *td; 23460786Sps struct bind_args /* { 23560786Sps int s; 23660786Sps caddr_t name; 23760786Sps int namelen; 23860786Sps } */ *uap; 23960786Sps{ 24060786Sps struct sockaddr *sa; 24160786Sps int error; 24260786Sps 24360786Sps error = getsockaddr(&sa, uap->name, uap->namelen); 24460786Sps if (error == 0) { 24560786Sps error = kern_bind(td, uap->s, sa); 24660786Sps free(sa, M_SONAME); 24760786Sps } 24860786Sps return (error); 24960786Sps} 25060786Sps 25160786Spsstatic int 25260786Spskern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 25360786Sps{ 25460786Sps struct socket *so; 25560786Sps struct file *fp; 25660786Sps cap_rights_t rights; 25760786Sps int error; 25860786Sps 25960786Sps AUDIT_ARG_FD(fd); 26060786Sps AUDIT_ARG_SOCKADDR(td, dirfd, sa); 26160786Sps error = getsock_cap(td->td_proc->p_fd, fd, 26260786Sps cap_rights_init(&rights, CAP_BIND), &fp, NULL); 26360786Sps if (error != 0) 26460786Sps return (error); 26560786Sps so = fp->f_data; 26660786Sps#ifdef KTRACE 26760786Sps if (KTRPOINT(td, KTR_STRUCT)) 26860786Sps ktrsockaddr(sa); 26960786Sps#endif 27060786Sps#ifdef MAC 27160786Sps error = mac_socket_check_bind(td->td_ucred, so, sa); 27260786Sps if (error == 0) { 27360786Sps#endif 27460786Sps if (dirfd == AT_FDCWD) 27560786Sps error = sobind(so, sa, td); 27660786Sps else 27760786Sps error = sobindat(dirfd, so, sa, td); 27860786Sps#ifdef MAC 27960786Sps } 28060786Sps#endif 28160786Sps fdrop(fp, td); 28260786Sps return (error); 28360786Sps} 28460786Sps 28560786Spsint 28660786Spskern_bind(struct thread *td, int fd, struct sockaddr *sa) 28760786Sps{ 28860786Sps 28960786Sps return (kern_bindat(td, AT_FDCWD, fd, sa)); 29060786Sps} 29160786Sps 29260786Sps/* ARGSUSED */ 29360786Spsint 29460786Spssys_bindat(td, uap) 29560786Sps struct thread *td; 29660786Sps struct bindat_args /* { 29760786Sps int fd; 29860786Sps int s; 29960786Sps caddr_t name; 30060786Sps int namelen; 30160786Sps } */ *uap; 30260786Sps{ 30389019Sps struct sockaddr *sa; 30460786Sps int error; 30589019Sps 30660786Sps error = getsockaddr(&sa, uap->name, uap->namelen); 30760786Sps if (error == 0) { 30860786Sps error = kern_bindat(td, uap->fd, uap->s, sa); 30960786Sps free(sa, M_SONAME); 31060786Sps } 31160786Sps return (error); 31260786Sps} 31360786Sps 31460786Sps/* ARGSUSED */ 31560786Spsint 31660786Spssys_listen(td, uap) 31760786Sps struct thread *td; 31860786Sps struct listen_args /* { 31960786Sps int s; 32060786Sps int backlog; 32160786Sps } */ *uap; 32260786Sps{ 32360786Sps struct socket *so; 32460786Sps struct file *fp; 32560786Sps cap_rights_t rights; 32660786Sps int error; 32760786Sps 32860786Sps AUDIT_ARG_FD(uap->s); 32960786Sps error = getsock_cap(td->td_proc->p_fd, uap->s, 33060786Sps cap_rights_init(&rights, CAP_LISTEN), &fp, NULL); 33160786Sps if (error == 0) { 33260786Sps so = fp->f_data; 33360786Sps#ifdef MAC 33460786Sps error = mac_socket_check_listen(td->td_ucred, so); 33560786Sps if (error == 0) 33660786Sps#endif 33760786Sps error = solisten(so, uap->backlog, td); 33860786Sps fdrop(fp, td); 33960786Sps } 34063128Sps return(error); 34163128Sps} 34263128Sps 34363128Sps/* 34463128Sps * accept1() 34563128Sps */ 34689019Spsstatic int 34789019Spsaccept1(td, s, uname, anamelen, flags) 34889019Sps struct thread *td; 34989019Sps int s; 35089019Sps struct sockaddr *uname; 35189019Sps socklen_t *anamelen; 35260786Sps int flags; 35360786Sps{ 35460786Sps struct sockaddr *name; 35560786Sps socklen_t namelen; 35660786Sps struct file *fp; 35760786Sps int error; 35860786Sps 35960786Sps if (uname == NULL) 36060786Sps return (kern_accept4(td, s, NULL, NULL, flags, NULL)); 36160786Sps 36260786Sps error = copyin(anamelen, &namelen, sizeof (namelen)); 36360786Sps if (error != 0) 36460786Sps return (error); 36560786Sps 36660786Sps error = kern_accept4(td, s, &name, &namelen, flags, &fp); 36760786Sps 36860786Sps /* 36960786Sps * return a namelen of zero for older code which might 37060786Sps * ignore the return value from accept. 37160786Sps */ 37260786Sps if (error != 0) { 37360786Sps (void) copyout(&namelen, anamelen, sizeof(*anamelen)); 37460786Sps return (error); 37560786Sps } 37660786Sps 37760786Sps if (error == 0 && uname != NULL) { 37860786Sps#ifdef COMPAT_OLDSOCK 37960786Sps if (flags & ACCEPT4_COMPAT) 38060786Sps ((struct osockaddr *)name)->sa_family = 38160786Sps name->sa_family; 38260786Sps#endif 38360786Sps error = copyout(name, uname, namelen); 38460786Sps } 38560786Sps if (error == 0) 38660786Sps error = copyout(&namelen, anamelen, 38760786Sps sizeof(namelen)); 38860786Sps if (error != 0) 38960786Sps fdclose(td, fp, td->td_retval[0]); 39060786Sps fdrop(fp, td); 39160786Sps free(name, M_SONAME); 39260786Sps return (error); 39360786Sps} 39460786Sps 39560786Spsint 39660786Spskern_accept(struct thread *td, int s, struct sockaddr **name, 39760786Sps socklen_t *namelen, struct file **fp) 39860786Sps{ 39960786Sps return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); 40060786Sps} 40160786Sps 40260786Spsint 40360786Spskern_accept4(struct thread *td, int s, struct sockaddr **name, 40460786Sps socklen_t *namelen, int flags, struct file **fp) 40560786Sps{ 40660786Sps struct filedesc *fdp; 40760786Sps struct file *headfp, *nfp = NULL; 40860786Sps struct sockaddr *sa = NULL; 40960786Sps struct socket *head, *so; 41060786Sps cap_rights_t rights; 41160786Sps u_int fflag; 41260786Sps pid_t pgid; 41360786Sps int error, fd, tmp; 41460786Sps 41560786Sps if (name != NULL) 41660786Sps *name = NULL; 41760786Sps 41860786Sps AUDIT_ARG_FD(s); 41960786Sps fdp = td->td_proc->p_fd; 42060786Sps error = getsock_cap(fdp, s, cap_rights_init(&rights, CAP_ACCEPT), 42160786Sps &headfp, &fflag); 42260786Sps if (error != 0) 42360786Sps return (error); 42460786Sps head = headfp->f_data; 42560786Sps if ((head->so_options & SO_ACCEPTCONN) == 0) { 42660786Sps error = EINVAL; 42760786Sps goto done; 42860786Sps } 42960786Sps#ifdef MAC 43060786Sps error = mac_socket_check_accept(td->td_ucred, head); 43160786Sps if (error != 0) 43260786Sps goto done; 43360786Sps#endif 43460786Sps error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0); 43560786Sps if (error != 0) 43660786Sps goto done; 43760786Sps ACCEPT_LOCK(); 43860786Sps if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 43960786Sps ACCEPT_UNLOCK(); 44060786Sps error = EWOULDBLOCK; 44160786Sps goto noconnection; 44260786Sps } 44360786Sps while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 44460786Sps if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 44560786Sps head->so_error = ECONNABORTED; 44660786Sps break; 44760786Sps } 44860786Sps error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 44960786Sps "accept", 0); 45060786Sps if (error != 0) { 45189019Sps ACCEPT_UNLOCK(); 45260786Sps goto noconnection; 45360786Sps } 45460786Sps } 45560786Sps if (head->so_error) { 45660786Sps error = head->so_error; 45760786Sps head->so_error = 0; 45860786Sps ACCEPT_UNLOCK(); 45960786Sps goto noconnection; 46060786Sps } 46160786Sps so = TAILQ_FIRST(&head->so_comp); 46260786Sps KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 46360786Sps KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 46460786Sps 46560786Sps /* 46660786Sps * Before changing the flags on the socket, we have to bump the 46760786Sps * reference count. Otherwise, if the protocol calls sofree(), 46860786Sps * the socket will be released due to a zero refcount. 46960786Sps */ 47060786Sps SOCK_LOCK(so); /* soref() and so_state update */ 47160786Sps soref(so); /* file descriptor reference */ 472 473 TAILQ_REMOVE(&head->so_comp, so, so_list); 474 head->so_qlen--; 475 if (flags & ACCEPT4_INHERIT) 476 so->so_state |= (head->so_state & SS_NBIO); 477 else 478 so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; 479 so->so_qstate &= ~SQ_COMP; 480 so->so_head = NULL; 481 482 SOCK_UNLOCK(so); 483 ACCEPT_UNLOCK(); 484 485 /* An extra reference on `nfp' has been held for us by falloc(). */ 486 td->td_retval[0] = fd; 487 488 /* connection has been removed from the listen queue */ 489 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 490 491 if (flags & ACCEPT4_INHERIT) { 492 pgid = fgetown(&head->so_sigio); 493 if (pgid != 0) 494 fsetown(pgid, &so->so_sigio); 495 } else { 496 fflag &= ~(FNONBLOCK | FASYNC); 497 if (flags & SOCK_NONBLOCK) 498 fflag |= FNONBLOCK; 499 } 500 501 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 502 /* Sync socket nonblocking/async state with file flags */ 503 tmp = fflag & FNONBLOCK; 504 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 505 tmp = fflag & FASYNC; 506 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 507 sa = 0; 508 error = soaccept(so, &sa); 509 if (error != 0) { 510 /* 511 * return a namelen of zero for older code which might 512 * ignore the return value from accept. 513 */ 514 if (name) 515 *namelen = 0; 516 goto noconnection; 517 } 518 if (sa == NULL) { 519 if (name) 520 *namelen = 0; 521 goto done; 522 } 523 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); 524 if (name) { 525 /* check sa_len before it is destroyed */ 526 if (*namelen > sa->sa_len) 527 *namelen = sa->sa_len; 528#ifdef KTRACE 529 if (KTRPOINT(td, KTR_STRUCT)) 530 ktrsockaddr(sa); 531#endif 532 *name = sa; 533 sa = NULL; 534 } 535noconnection: 536 free(sa, M_SONAME); 537 538 /* 539 * close the new descriptor, assuming someone hasn't ripped it 540 * out from under us. 541 */ 542 if (error != 0) 543 fdclose(td, nfp, fd); 544 545 /* 546 * Release explicitly held references before returning. We return 547 * a reference on nfp to the caller on success if they request it. 548 */ 549done: 550 if (fp != NULL) { 551 if (error == 0) { 552 *fp = nfp; 553 nfp = NULL; 554 } else 555 *fp = NULL; 556 } 557 if (nfp != NULL) 558 fdrop(nfp, td); 559 fdrop(headfp, td); 560 return (error); 561} 562 563int 564sys_accept(td, uap) 565 struct thread *td; 566 struct accept_args *uap; 567{ 568 569 return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); 570} 571 572int 573sys_accept4(td, uap) 574 struct thread *td; 575 struct accept4_args *uap; 576{ 577 578 if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 579 return (EINVAL); 580 581 return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); 582} 583 584#ifdef COMPAT_OLDSOCK 585int 586oaccept(td, uap) 587 struct thread *td; 588 struct accept_args *uap; 589{ 590 591 return (accept1(td, uap->s, uap->name, uap->anamelen, 592 ACCEPT4_INHERIT | ACCEPT4_COMPAT)); 593} 594#endif /* COMPAT_OLDSOCK */ 595 596/* ARGSUSED */ 597int 598sys_connect(td, uap) 599 struct thread *td; 600 struct connect_args /* { 601 int s; 602 caddr_t name; 603 int namelen; 604 } */ *uap; 605{ 606 struct sockaddr *sa; 607 int error; 608 609 error = getsockaddr(&sa, uap->name, uap->namelen); 610 if (error == 0) { 611 error = kern_connect(td, uap->s, sa); 612 free(sa, M_SONAME); 613 } 614 return (error); 615} 616 617static int 618kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 619{ 620 struct socket *so; 621 struct file *fp; 622 cap_rights_t rights; 623 int error, interrupted = 0; 624 625 AUDIT_ARG_FD(fd); 626 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 627 error = getsock_cap(td->td_proc->p_fd, fd, 628 cap_rights_init(&rights, CAP_CONNECT), &fp, NULL); 629 if (error != 0) 630 return (error); 631 so = fp->f_data; 632 if (so->so_state & SS_ISCONNECTING) { 633 error = EALREADY; 634 goto done1; 635 } 636#ifdef KTRACE 637 if (KTRPOINT(td, KTR_STRUCT)) 638 ktrsockaddr(sa); 639#endif 640#ifdef MAC 641 error = mac_socket_check_connect(td->td_ucred, so, sa); 642 if (error != 0) 643 goto bad; 644#endif 645 if (dirfd == AT_FDCWD) 646 error = soconnect(so, sa, td); 647 else 648 error = soconnectat(dirfd, so, sa, td); 649 if (error != 0) 650 goto bad; 651 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 652 error = EINPROGRESS; 653 goto done1; 654 } 655 SOCK_LOCK(so); 656 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 657 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 658 "connec", 0); 659 if (error != 0) { 660 if (error == EINTR || error == ERESTART) 661 interrupted = 1; 662 break; 663 } 664 } 665 if (error == 0) { 666 error = so->so_error; 667 so->so_error = 0; 668 } 669 SOCK_UNLOCK(so); 670bad: 671 if (!interrupted) 672 so->so_state &= ~SS_ISCONNECTING; 673 if (error == ERESTART) 674 error = EINTR; 675done1: 676 fdrop(fp, td); 677 return (error); 678} 679 680int 681kern_connect(struct thread *td, int fd, struct sockaddr *sa) 682{ 683 684 return (kern_connectat(td, AT_FDCWD, fd, sa)); 685} 686 687/* ARGSUSED */ 688int 689sys_connectat(td, uap) 690 struct thread *td; 691 struct connectat_args /* { 692 int fd; 693 int s; 694 caddr_t name; 695 int namelen; 696 } */ *uap; 697{ 698 struct sockaddr *sa; 699 int error; 700 701 error = getsockaddr(&sa, uap->name, uap->namelen); 702 if (error == 0) { 703 error = kern_connectat(td, uap->fd, uap->s, sa); 704 free(sa, M_SONAME); 705 } 706 return (error); 707} 708 709int 710kern_socketpair(struct thread *td, int domain, int type, int protocol, 711 int *rsv) 712{ 713 struct file *fp1, *fp2; 714 struct socket *so1, *so2; 715 int fd, error, oflag, fflag; 716 717 AUDIT_ARG_SOCKET(domain, type, protocol); 718 719 oflag = 0; 720 fflag = 0; 721 if ((type & SOCK_CLOEXEC) != 0) { 722 type &= ~SOCK_CLOEXEC; 723 oflag |= O_CLOEXEC; 724 } 725 if ((type & SOCK_NONBLOCK) != 0) { 726 type &= ~SOCK_NONBLOCK; 727 fflag |= FNONBLOCK; 728 } 729#ifdef MAC 730 /* We might want to have a separate check for socket pairs. */ 731 error = mac_socket_check_create(td->td_ucred, domain, type, 732 protocol); 733 if (error != 0) 734 return (error); 735#endif 736 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 737 if (error != 0) 738 return (error); 739 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 740 if (error != 0) 741 goto free1; 742 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 743 error = falloc(td, &fp1, &fd, oflag); 744 if (error != 0) 745 goto free2; 746 rsv[0] = fd; 747 fp1->f_data = so1; /* so1 already has ref count */ 748 error = falloc(td, &fp2, &fd, oflag); 749 if (error != 0) 750 goto free3; 751 fp2->f_data = so2; /* so2 already has ref count */ 752 rsv[1] = fd; 753 error = soconnect2(so1, so2); 754 if (error != 0) 755 goto free4; 756 if (type == SOCK_DGRAM) { 757 /* 758 * Datagram socket connection is asymmetric. 759 */ 760 error = soconnect2(so2, so1); 761 if (error != 0) 762 goto free4; 763 } 764 finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, 765 &socketops); 766 finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, 767 &socketops); 768 if ((fflag & FNONBLOCK) != 0) { 769 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); 770 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); 771 } 772 fdrop(fp1, td); 773 fdrop(fp2, td); 774 return (0); 775free4: 776 fdclose(td, fp2, rsv[1]); 777 fdrop(fp2, td); 778free3: 779 fdclose(td, fp1, rsv[0]); 780 fdrop(fp1, td); 781free2: 782 if (so2 != NULL) 783 (void)soclose(so2); 784free1: 785 if (so1 != NULL) 786 (void)soclose(so1); 787 return (error); 788} 789 790int 791sys_socketpair(struct thread *td, struct socketpair_args *uap) 792{ 793 int error, sv[2]; 794 795 error = kern_socketpair(td, uap->domain, uap->type, 796 uap->protocol, sv); 797 if (error != 0) 798 return (error); 799 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 800 if (error != 0) { 801 (void)kern_close(td, sv[0]); 802 (void)kern_close(td, sv[1]); 803 } 804 return (error); 805} 806 807static int 808sendit(td, s, mp, flags) 809 struct thread *td; 810 int s; 811 struct msghdr *mp; 812 int flags; 813{ 814 struct mbuf *control; 815 struct sockaddr *to; 816 int error; 817 818#ifdef CAPABILITY_MODE 819 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 820 return (ECAPMODE); 821#endif 822 823 if (mp->msg_name != NULL) { 824 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 825 if (error != 0) { 826 to = NULL; 827 goto bad; 828 } 829 mp->msg_name = to; 830 } else { 831 to = NULL; 832 } 833 834 if (mp->msg_control) { 835 if (mp->msg_controllen < sizeof(struct cmsghdr) 836#ifdef COMPAT_OLDSOCK 837 && mp->msg_flags != MSG_COMPAT 838#endif 839 ) { 840 error = EINVAL; 841 goto bad; 842 } 843 error = sockargs(&control, mp->msg_control, 844 mp->msg_controllen, MT_CONTROL); 845 if (error != 0) 846 goto bad; 847#ifdef COMPAT_OLDSOCK 848 if (mp->msg_flags == MSG_COMPAT) { 849 struct cmsghdr *cm; 850 851 M_PREPEND(control, sizeof(*cm), M_WAITOK); 852 cm = mtod(control, struct cmsghdr *); 853 cm->cmsg_len = control->m_len; 854 cm->cmsg_level = SOL_SOCKET; 855 cm->cmsg_type = SCM_RIGHTS; 856 } 857#endif 858 } else { 859 control = NULL; 860 } 861 862 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 863 864bad: 865 free(to, M_SONAME); 866 return (error); 867} 868 869int 870kern_sendit(td, s, mp, flags, control, segflg) 871 struct thread *td; 872 int s; 873 struct msghdr *mp; 874 int flags; 875 struct mbuf *control; 876 enum uio_seg segflg; 877{ 878 struct file *fp; 879 struct uio auio; 880 struct iovec *iov; 881 struct socket *so; 882 cap_rights_t rights; 883#ifdef KTRACE 884 struct uio *ktruio = NULL; 885#endif 886 ssize_t len; 887 int i, error; 888 889 AUDIT_ARG_FD(s); 890 cap_rights_init(&rights, CAP_SEND); 891 if (mp->msg_name != NULL) { 892 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); 893 cap_rights_set(&rights, CAP_CONNECT); 894 } 895 error = getsock_cap(td->td_proc->p_fd, s, &rights, &fp, NULL); 896 if (error != 0) 897 return (error); 898 so = (struct socket *)fp->f_data; 899 900#ifdef KTRACE 901 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 902 ktrsockaddr(mp->msg_name); 903#endif 904#ifdef MAC 905 if (mp->msg_name != NULL) { 906 error = mac_socket_check_connect(td->td_ucred, so, 907 mp->msg_name); 908 if (error != 0) 909 goto bad; 910 } 911 error = mac_socket_check_send(td->td_ucred, so); 912 if (error != 0) 913 goto bad; 914#endif 915 916 auio.uio_iov = mp->msg_iov; 917 auio.uio_iovcnt = mp->msg_iovlen; 918 auio.uio_segflg = segflg; 919 auio.uio_rw = UIO_WRITE; 920 auio.uio_td = td; 921 auio.uio_offset = 0; /* XXX */ 922 auio.uio_resid = 0; 923 iov = mp->msg_iov; 924 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 925 if ((auio.uio_resid += iov->iov_len) < 0) { 926 error = EINVAL; 927 goto bad; 928 } 929 } 930#ifdef KTRACE 931 if (KTRPOINT(td, KTR_GENIO)) 932 ktruio = cloneuio(&auio); 933#endif 934 len = auio.uio_resid; 935 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 936 if (error != 0) { 937 if (auio.uio_resid != len && (error == ERESTART || 938 error == EINTR || error == EWOULDBLOCK)) 939 error = 0; 940 /* Generation of SIGPIPE can be controlled per socket */ 941 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 942 !(flags & MSG_NOSIGNAL)) { 943 PROC_LOCK(td->td_proc); 944 tdsignal(td, SIGPIPE); 945 PROC_UNLOCK(td->td_proc); 946 } 947 } 948 if (error == 0) 949 td->td_retval[0] = len - auio.uio_resid; 950#ifdef KTRACE 951 if (ktruio != NULL) { 952 ktruio->uio_resid = td->td_retval[0]; 953 ktrgenio(s, UIO_WRITE, ktruio, error); 954 } 955#endif 956bad: 957 fdrop(fp, td); 958 return (error); 959} 960 961int 962sys_sendto(td, uap) 963 struct thread *td; 964 struct sendto_args /* { 965 int s; 966 caddr_t buf; 967 size_t len; 968 int flags; 969 caddr_t to; 970 int tolen; 971 } */ *uap; 972{ 973 struct msghdr msg; 974 struct iovec aiov; 975 976 msg.msg_name = uap->to; 977 msg.msg_namelen = uap->tolen; 978 msg.msg_iov = &aiov; 979 msg.msg_iovlen = 1; 980 msg.msg_control = 0; 981#ifdef COMPAT_OLDSOCK 982 msg.msg_flags = 0; 983#endif 984 aiov.iov_base = uap->buf; 985 aiov.iov_len = uap->len; 986 return (sendit(td, uap->s, &msg, uap->flags)); 987} 988 989#ifdef COMPAT_OLDSOCK 990int 991osend(td, uap) 992 struct thread *td; 993 struct osend_args /* { 994 int s; 995 caddr_t buf; 996 int len; 997 int flags; 998 } */ *uap; 999{ 1000 struct msghdr msg; 1001 struct iovec aiov; 1002 1003 msg.msg_name = 0; 1004 msg.msg_namelen = 0; 1005 msg.msg_iov = &aiov; 1006 msg.msg_iovlen = 1; 1007 aiov.iov_base = uap->buf; 1008 aiov.iov_len = uap->len; 1009 msg.msg_control = 0; 1010 msg.msg_flags = 0; 1011 return (sendit(td, uap->s, &msg, uap->flags)); 1012} 1013 1014int 1015osendmsg(td, uap) 1016 struct thread *td; 1017 struct osendmsg_args /* { 1018 int s; 1019 caddr_t msg; 1020 int flags; 1021 } */ *uap; 1022{ 1023 struct msghdr msg; 1024 struct iovec *iov; 1025 int error; 1026 1027 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1028 if (error != 0) 1029 return (error); 1030 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1031 if (error != 0) 1032 return (error); 1033 msg.msg_iov = iov; 1034 msg.msg_flags = MSG_COMPAT; 1035 error = sendit(td, uap->s, &msg, uap->flags); 1036 free(iov, M_IOV); 1037 return (error); 1038} 1039#endif 1040 1041int 1042sys_sendmsg(td, uap) 1043 struct thread *td; 1044 struct sendmsg_args /* { 1045 int s; 1046 caddr_t msg; 1047 int flags; 1048 } */ *uap; 1049{ 1050 struct msghdr msg; 1051 struct iovec *iov; 1052 int error; 1053 1054 error = copyin(uap->msg, &msg, sizeof (msg)); 1055 if (error != 0) 1056 return (error); 1057 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1058 if (error != 0) 1059 return (error); 1060 msg.msg_iov = iov; 1061#ifdef COMPAT_OLDSOCK 1062 msg.msg_flags = 0; 1063#endif 1064 error = sendit(td, uap->s, &msg, uap->flags); 1065 free(iov, M_IOV); 1066 return (error); 1067} 1068 1069int 1070kern_recvit(td, s, mp, fromseg, controlp) 1071 struct thread *td; 1072 int s; 1073 struct msghdr *mp; 1074 enum uio_seg fromseg; 1075 struct mbuf **controlp; 1076{ 1077 struct uio auio; 1078 struct iovec *iov; 1079 struct mbuf *m, *control = NULL; 1080 caddr_t ctlbuf; 1081 struct file *fp; 1082 struct socket *so; 1083 struct sockaddr *fromsa = NULL; 1084 cap_rights_t rights; 1085#ifdef KTRACE 1086 struct uio *ktruio = NULL; 1087#endif 1088 ssize_t len; 1089 int error, i; 1090 1091 if (controlp != NULL) 1092 *controlp = NULL; 1093 1094 AUDIT_ARG_FD(s); 1095 error = getsock_cap(td->td_proc->p_fd, s, 1096 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 1097 if (error != 0) 1098 return (error); 1099 so = fp->f_data; 1100 1101#ifdef MAC 1102 error = mac_socket_check_receive(td->td_ucred, so); 1103 if (error != 0) { 1104 fdrop(fp, td); 1105 return (error); 1106 } 1107#endif 1108 1109 auio.uio_iov = mp->msg_iov; 1110 auio.uio_iovcnt = mp->msg_iovlen; 1111 auio.uio_segflg = UIO_USERSPACE; 1112 auio.uio_rw = UIO_READ; 1113 auio.uio_td = td; 1114 auio.uio_offset = 0; /* XXX */ 1115 auio.uio_resid = 0; 1116 iov = mp->msg_iov; 1117 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1118 if ((auio.uio_resid += iov->iov_len) < 0) { 1119 fdrop(fp, td); 1120 return (EINVAL); 1121 } 1122 } 1123#ifdef KTRACE 1124 if (KTRPOINT(td, KTR_GENIO)) 1125 ktruio = cloneuio(&auio); 1126#endif 1127 len = auio.uio_resid; 1128 error = soreceive(so, &fromsa, &auio, NULL, 1129 (mp->msg_control || controlp) ? &control : NULL, 1130 &mp->msg_flags); 1131 if (error != 0) { 1132 if (auio.uio_resid != len && (error == ERESTART || 1133 error == EINTR || error == EWOULDBLOCK)) 1134 error = 0; 1135 } 1136 if (fromsa != NULL) 1137 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); 1138#ifdef KTRACE 1139 if (ktruio != NULL) { 1140 ktruio->uio_resid = len - auio.uio_resid; 1141 ktrgenio(s, UIO_READ, ktruio, error); 1142 } 1143#endif 1144 if (error != 0) 1145 goto out; 1146 td->td_retval[0] = len - auio.uio_resid; 1147 if (mp->msg_name) { 1148 len = mp->msg_namelen; 1149 if (len <= 0 || fromsa == NULL) 1150 len = 0; 1151 else { 1152 /* save sa_len before it is destroyed by MSG_COMPAT */ 1153 len = MIN(len, fromsa->sa_len); 1154#ifdef COMPAT_OLDSOCK 1155 if (mp->msg_flags & MSG_COMPAT) 1156 ((struct osockaddr *)fromsa)->sa_family = 1157 fromsa->sa_family; 1158#endif 1159 if (fromseg == UIO_USERSPACE) { 1160 error = copyout(fromsa, mp->msg_name, 1161 (unsigned)len); 1162 if (error != 0) 1163 goto out; 1164 } else 1165 bcopy(fromsa, mp->msg_name, len); 1166 } 1167 mp->msg_namelen = len; 1168 } 1169 if (mp->msg_control && controlp == NULL) { 1170#ifdef COMPAT_OLDSOCK 1171 /* 1172 * We assume that old recvmsg calls won't receive access 1173 * rights and other control info, esp. as control info 1174 * is always optional and those options didn't exist in 4.3. 1175 * If we receive rights, trim the cmsghdr; anything else 1176 * is tossed. 1177 */ 1178 if (control && mp->msg_flags & MSG_COMPAT) { 1179 if (mtod(control, struct cmsghdr *)->cmsg_level != 1180 SOL_SOCKET || 1181 mtod(control, struct cmsghdr *)->cmsg_type != 1182 SCM_RIGHTS) { 1183 mp->msg_controllen = 0; 1184 goto out; 1185 } 1186 control->m_len -= sizeof (struct cmsghdr); 1187 control->m_data += sizeof (struct cmsghdr); 1188 } 1189#endif 1190 len = mp->msg_controllen; 1191 m = control; 1192 mp->msg_controllen = 0; 1193 ctlbuf = mp->msg_control; 1194 1195 while (m && len > 0) { 1196 unsigned int tocopy; 1197 1198 if (len >= m->m_len) 1199 tocopy = m->m_len; 1200 else { 1201 mp->msg_flags |= MSG_CTRUNC; 1202 tocopy = len; 1203 } 1204 1205 if ((error = copyout(mtod(m, caddr_t), 1206 ctlbuf, tocopy)) != 0) 1207 goto out; 1208 1209 ctlbuf += tocopy; 1210 len -= tocopy; 1211 m = m->m_next; 1212 } 1213 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1214 } 1215out: 1216 fdrop(fp, td); 1217#ifdef KTRACE 1218 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1219 ktrsockaddr(fromsa); 1220#endif 1221 free(fromsa, M_SONAME); 1222 1223 if (error == 0 && controlp != NULL) 1224 *controlp = control; 1225 else if (control) 1226 m_freem(control); 1227 1228 return (error); 1229} 1230 1231static int 1232recvit(td, s, mp, namelenp) 1233 struct thread *td; 1234 int s; 1235 struct msghdr *mp; 1236 void *namelenp; 1237{ 1238 int error; 1239 1240 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1241 if (error != 0) 1242 return (error); 1243 if (namelenp != NULL) { 1244 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1245#ifdef COMPAT_OLDSOCK 1246 if (mp->msg_flags & MSG_COMPAT) 1247 error = 0; /* old recvfrom didn't check */ 1248#endif 1249 } 1250 return (error); 1251} 1252 1253int 1254sys_recvfrom(td, uap) 1255 struct thread *td; 1256 struct recvfrom_args /* { 1257 int s; 1258 caddr_t buf; 1259 size_t len; 1260 int flags; 1261 struct sockaddr * __restrict from; 1262 socklen_t * __restrict fromlenaddr; 1263 } */ *uap; 1264{ 1265 struct msghdr msg; 1266 struct iovec aiov; 1267 int error; 1268 1269 if (uap->fromlenaddr) { 1270 error = copyin(uap->fromlenaddr, 1271 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1272 if (error != 0) 1273 goto done2; 1274 } else { 1275 msg.msg_namelen = 0; 1276 } 1277 msg.msg_name = uap->from; 1278 msg.msg_iov = &aiov; 1279 msg.msg_iovlen = 1; 1280 aiov.iov_base = uap->buf; 1281 aiov.iov_len = uap->len; 1282 msg.msg_control = 0; 1283 msg.msg_flags = uap->flags; 1284 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1285done2: 1286 return (error); 1287} 1288 1289#ifdef COMPAT_OLDSOCK 1290int 1291orecvfrom(td, uap) 1292 struct thread *td; 1293 struct recvfrom_args *uap; 1294{ 1295 1296 uap->flags |= MSG_COMPAT; 1297 return (sys_recvfrom(td, uap)); 1298} 1299#endif 1300 1301#ifdef COMPAT_OLDSOCK 1302int 1303orecv(td, uap) 1304 struct thread *td; 1305 struct orecv_args /* { 1306 int s; 1307 caddr_t buf; 1308 int len; 1309 int flags; 1310 } */ *uap; 1311{ 1312 struct msghdr msg; 1313 struct iovec aiov; 1314 1315 msg.msg_name = 0; 1316 msg.msg_namelen = 0; 1317 msg.msg_iov = &aiov; 1318 msg.msg_iovlen = 1; 1319 aiov.iov_base = uap->buf; 1320 aiov.iov_len = uap->len; 1321 msg.msg_control = 0; 1322 msg.msg_flags = uap->flags; 1323 return (recvit(td, uap->s, &msg, NULL)); 1324} 1325 1326/* 1327 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1328 * overlays the new one, missing only the flags, and with the (old) access 1329 * rights where the control fields are now. 1330 */ 1331int 1332orecvmsg(td, uap) 1333 struct thread *td; 1334 struct orecvmsg_args /* { 1335 int s; 1336 struct omsghdr *msg; 1337 int flags; 1338 } */ *uap; 1339{ 1340 struct msghdr msg; 1341 struct iovec *iov; 1342 int error; 1343 1344 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1345 if (error != 0) 1346 return (error); 1347 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1348 if (error != 0) 1349 return (error); 1350 msg.msg_flags = uap->flags | MSG_COMPAT; 1351 msg.msg_iov = iov; 1352 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1353 if (msg.msg_controllen && error == 0) 1354 error = copyout(&msg.msg_controllen, 1355 &uap->msg->msg_accrightslen, sizeof (int)); 1356 free(iov, M_IOV); 1357 return (error); 1358} 1359#endif 1360 1361int 1362sys_recvmsg(td, uap) 1363 struct thread *td; 1364 struct recvmsg_args /* { 1365 int s; 1366 struct msghdr *msg; 1367 int flags; 1368 } */ *uap; 1369{ 1370 struct msghdr msg; 1371 struct iovec *uiov, *iov; 1372 int error; 1373 1374 error = copyin(uap->msg, &msg, sizeof (msg)); 1375 if (error != 0) 1376 return (error); 1377 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1378 if (error != 0) 1379 return (error); 1380 msg.msg_flags = uap->flags; 1381#ifdef COMPAT_OLDSOCK 1382 msg.msg_flags &= ~MSG_COMPAT; 1383#endif 1384 uiov = msg.msg_iov; 1385 msg.msg_iov = iov; 1386 error = recvit(td, uap->s, &msg, NULL); 1387 if (error == 0) { 1388 msg.msg_iov = uiov; 1389 error = copyout(&msg, uap->msg, sizeof(msg)); 1390 } 1391 free(iov, M_IOV); 1392 return (error); 1393} 1394 1395/* ARGSUSED */ 1396int 1397sys_shutdown(td, uap) 1398 struct thread *td; 1399 struct shutdown_args /* { 1400 int s; 1401 int how; 1402 } */ *uap; 1403{ 1404 struct socket *so; 1405 struct file *fp; 1406 cap_rights_t rights; 1407 int error; 1408 1409 AUDIT_ARG_FD(uap->s); 1410 error = getsock_cap(td->td_proc->p_fd, uap->s, 1411 cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL); 1412 if (error == 0) { 1413 so = fp->f_data; 1414 error = soshutdown(so, uap->how); 1415 fdrop(fp, td); 1416 } 1417 return (error); 1418} 1419 1420/* ARGSUSED */ 1421int 1422sys_setsockopt(td, uap) 1423 struct thread *td; 1424 struct setsockopt_args /* { 1425 int s; 1426 int level; 1427 int name; 1428 caddr_t val; 1429 int valsize; 1430 } */ *uap; 1431{ 1432 1433 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1434 uap->val, UIO_USERSPACE, uap->valsize)); 1435} 1436 1437int 1438kern_setsockopt(td, s, level, name, val, valseg, valsize) 1439 struct thread *td; 1440 int s; 1441 int level; 1442 int name; 1443 void *val; 1444 enum uio_seg valseg; 1445 socklen_t valsize; 1446{ 1447 struct socket *so; 1448 struct file *fp; 1449 struct sockopt sopt; 1450 cap_rights_t rights; 1451 int error; 1452 1453 if (val == NULL && valsize != 0) 1454 return (EFAULT); 1455 if ((int)valsize < 0) 1456 return (EINVAL); 1457 1458 sopt.sopt_dir = SOPT_SET; 1459 sopt.sopt_level = level; 1460 sopt.sopt_name = name; 1461 sopt.sopt_val = val; 1462 sopt.sopt_valsize = valsize; 1463 switch (valseg) { 1464 case UIO_USERSPACE: 1465 sopt.sopt_td = td; 1466 break; 1467 case UIO_SYSSPACE: 1468 sopt.sopt_td = NULL; 1469 break; 1470 default: 1471 panic("kern_setsockopt called with bad valseg"); 1472 } 1473 1474 AUDIT_ARG_FD(s); 1475 error = getsock_cap(td->td_proc->p_fd, s, 1476 cap_rights_init(&rights, CAP_SETSOCKOPT), &fp, NULL); 1477 if (error == 0) { 1478 so = fp->f_data; 1479 error = sosetopt(so, &sopt); 1480 fdrop(fp, td); 1481 } 1482 return(error); 1483} 1484 1485/* ARGSUSED */ 1486int 1487sys_getsockopt(td, uap) 1488 struct thread *td; 1489 struct getsockopt_args /* { 1490 int s; 1491 int level; 1492 int name; 1493 void * __restrict val; 1494 socklen_t * __restrict avalsize; 1495 } */ *uap; 1496{ 1497 socklen_t valsize; 1498 int error; 1499 1500 if (uap->val) { 1501 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1502 if (error != 0) 1503 return (error); 1504 } 1505 1506 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1507 uap->val, UIO_USERSPACE, &valsize); 1508 1509 if (error == 0) 1510 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1511 return (error); 1512} 1513 1514/* 1515 * Kernel version of getsockopt. 1516 * optval can be a userland or userspace. optlen is always a kernel pointer. 1517 */ 1518int 1519kern_getsockopt(td, s, level, name, val, valseg, valsize) 1520 struct thread *td; 1521 int s; 1522 int level; 1523 int name; 1524 void *val; 1525 enum uio_seg valseg; 1526 socklen_t *valsize; 1527{ 1528 struct socket *so; 1529 struct file *fp; 1530 struct sockopt sopt; 1531 cap_rights_t rights; 1532 int error; 1533 1534 if (val == NULL) 1535 *valsize = 0; 1536 if ((int)*valsize < 0) 1537 return (EINVAL); 1538 1539 sopt.sopt_dir = SOPT_GET; 1540 sopt.sopt_level = level; 1541 sopt.sopt_name = name; 1542 sopt.sopt_val = val; 1543 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1544 switch (valseg) { 1545 case UIO_USERSPACE: 1546 sopt.sopt_td = td; 1547 break; 1548 case UIO_SYSSPACE: 1549 sopt.sopt_td = NULL; 1550 break; 1551 default: 1552 panic("kern_getsockopt called with bad valseg"); 1553 } 1554 1555 AUDIT_ARG_FD(s); 1556 error = getsock_cap(td->td_proc->p_fd, s, 1557 cap_rights_init(&rights, CAP_GETSOCKOPT), &fp, NULL); 1558 if (error == 0) { 1559 so = fp->f_data; 1560 error = sogetopt(so, &sopt); 1561 *valsize = sopt.sopt_valsize; 1562 fdrop(fp, td); 1563 } 1564 return (error); 1565} 1566 1567/* 1568 * getsockname1() - Get socket name. 1569 */ 1570/* ARGSUSED */ 1571static int 1572getsockname1(td, uap, compat) 1573 struct thread *td; 1574 struct getsockname_args /* { 1575 int fdes; 1576 struct sockaddr * __restrict asa; 1577 socklen_t * __restrict alen; 1578 } */ *uap; 1579 int compat; 1580{ 1581 struct sockaddr *sa; 1582 socklen_t len; 1583 int error; 1584 1585 error = copyin(uap->alen, &len, sizeof(len)); 1586 if (error != 0) 1587 return (error); 1588 1589 error = kern_getsockname(td, uap->fdes, &sa, &len); 1590 if (error != 0) 1591 return (error); 1592 1593 if (len != 0) { 1594#ifdef COMPAT_OLDSOCK 1595 if (compat) 1596 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1597#endif 1598 error = copyout(sa, uap->asa, (u_int)len); 1599 } 1600 free(sa, M_SONAME); 1601 if (error == 0) 1602 error = copyout(&len, uap->alen, sizeof(len)); 1603 return (error); 1604} 1605 1606int 1607kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1608 socklen_t *alen) 1609{ 1610 struct socket *so; 1611 struct file *fp; 1612 cap_rights_t rights; 1613 socklen_t len; 1614 int error; 1615 1616 AUDIT_ARG_FD(fd); 1617 error = getsock_cap(td->td_proc->p_fd, fd, 1618 cap_rights_init(&rights, CAP_GETSOCKNAME), &fp, NULL); 1619 if (error != 0) 1620 return (error); 1621 so = fp->f_data; 1622 *sa = NULL; 1623 CURVNET_SET(so->so_vnet); 1624 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1625 CURVNET_RESTORE(); 1626 if (error != 0) 1627 goto bad; 1628 if (*sa == NULL) 1629 len = 0; 1630 else 1631 len = MIN(*alen, (*sa)->sa_len); 1632 *alen = len; 1633#ifdef KTRACE 1634 if (KTRPOINT(td, KTR_STRUCT)) 1635 ktrsockaddr(*sa); 1636#endif 1637bad: 1638 fdrop(fp, td); 1639 if (error != 0 && *sa != NULL) { 1640 free(*sa, M_SONAME); 1641 *sa = NULL; 1642 } 1643 return (error); 1644} 1645 1646int 1647sys_getsockname(td, uap) 1648 struct thread *td; 1649 struct getsockname_args *uap; 1650{ 1651 1652 return (getsockname1(td, uap, 0)); 1653} 1654 1655#ifdef COMPAT_OLDSOCK 1656int 1657ogetsockname(td, uap) 1658 struct thread *td; 1659 struct getsockname_args *uap; 1660{ 1661 1662 return (getsockname1(td, uap, 1)); 1663} 1664#endif /* COMPAT_OLDSOCK */ 1665 1666/* 1667 * getpeername1() - Get name of peer for connected socket. 1668 */ 1669/* ARGSUSED */ 1670static int 1671getpeername1(td, uap, compat) 1672 struct thread *td; 1673 struct getpeername_args /* { 1674 int fdes; 1675 struct sockaddr * __restrict asa; 1676 socklen_t * __restrict alen; 1677 } */ *uap; 1678 int compat; 1679{ 1680 struct sockaddr *sa; 1681 socklen_t len; 1682 int error; 1683 1684 error = copyin(uap->alen, &len, sizeof (len)); 1685 if (error != 0) 1686 return (error); 1687 1688 error = kern_getpeername(td, uap->fdes, &sa, &len); 1689 if (error != 0) 1690 return (error); 1691 1692 if (len != 0) { 1693#ifdef COMPAT_OLDSOCK 1694 if (compat) 1695 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1696#endif 1697 error = copyout(sa, uap->asa, (u_int)len); 1698 } 1699 free(sa, M_SONAME); 1700 if (error == 0) 1701 error = copyout(&len, uap->alen, sizeof(len)); 1702 return (error); 1703} 1704 1705int 1706kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1707 socklen_t *alen) 1708{ 1709 struct socket *so; 1710 struct file *fp; 1711 cap_rights_t rights; 1712 socklen_t len; 1713 int error; 1714 1715 AUDIT_ARG_FD(fd); 1716 error = getsock_cap(td->td_proc->p_fd, fd, 1717 cap_rights_init(&rights, CAP_GETPEERNAME), &fp, NULL); 1718 if (error != 0) 1719 return (error); 1720 so = fp->f_data; 1721 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1722 error = ENOTCONN; 1723 goto done; 1724 } 1725 *sa = NULL; 1726 CURVNET_SET(so->so_vnet); 1727 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1728 CURVNET_RESTORE(); 1729 if (error != 0) 1730 goto bad; 1731 if (*sa == NULL) 1732 len = 0; 1733 else 1734 len = MIN(*alen, (*sa)->sa_len); 1735 *alen = len; 1736#ifdef KTRACE 1737 if (KTRPOINT(td, KTR_STRUCT)) 1738 ktrsockaddr(*sa); 1739#endif 1740bad: 1741 if (error != 0 && *sa != NULL) { 1742 free(*sa, M_SONAME); 1743 *sa = NULL; 1744 } 1745done: 1746 fdrop(fp, td); 1747 return (error); 1748} 1749 1750int 1751sys_getpeername(td, uap) 1752 struct thread *td; 1753 struct getpeername_args *uap; 1754{ 1755 1756 return (getpeername1(td, uap, 0)); 1757} 1758 1759#ifdef COMPAT_OLDSOCK 1760int 1761ogetpeername(td, uap) 1762 struct thread *td; 1763 struct ogetpeername_args *uap; 1764{ 1765 1766 /* XXX uap should have type `getpeername_args *' to begin with. */ 1767 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1768} 1769#endif /* COMPAT_OLDSOCK */ 1770 1771int 1772sockargs(mp, buf, buflen, type) 1773 struct mbuf **mp; 1774 caddr_t buf; 1775 int buflen, type; 1776{ 1777 struct sockaddr *sa; 1778 struct mbuf *m; 1779 int error; 1780 1781 if (buflen < 0) 1782 return (EINVAL); 1783 1784 if (buflen > MLEN) { 1785#ifdef COMPAT_OLDSOCK 1786 if (type == MT_SONAME && buflen <= 112) 1787 buflen = MLEN; /* unix domain compat. hack */ 1788 else 1789#endif 1790 if (buflen > MCLBYTES) 1791 return (EINVAL); 1792 } 1793 m = m_get2(buflen, M_WAITOK, type, 0); 1794 m->m_len = buflen; 1795 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1796 if (error != 0) 1797 (void) m_free(m); 1798 else { 1799 *mp = m; 1800 if (type == MT_SONAME) { 1801 sa = mtod(m, struct sockaddr *); 1802 1803#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1804 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1805 sa->sa_family = sa->sa_len; 1806#endif 1807 sa->sa_len = buflen; 1808 } 1809 } 1810 return (error); 1811} 1812 1813int 1814getsockaddr(namp, uaddr, len) 1815 struct sockaddr **namp; 1816 caddr_t uaddr; 1817 size_t len; 1818{ 1819 struct sockaddr *sa; 1820 int error; 1821 1822 if (len > SOCK_MAXADDRLEN) 1823 return (ENAMETOOLONG); 1824 if (len < offsetof(struct sockaddr, sa_data[0])) 1825 return (EINVAL); 1826 sa = malloc(len, M_SONAME, M_WAITOK); 1827 error = copyin(uaddr, sa, len); 1828 if (error != 0) { 1829 free(sa, M_SONAME); 1830 } else { 1831#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1832 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1833 sa->sa_family = sa->sa_len; 1834#endif 1835 sa->sa_len = len; 1836 *namp = sa; 1837 } 1838 return (error); 1839} 1840 1841struct sendfile_sync { 1842 struct mtx mtx; 1843 struct cv cv; 1844 unsigned count; 1845}; 1846 1847/* 1848 * Detach mapped page and release resources back to the system. 1849 */ 1850int 1851sf_buf_mext(struct mbuf *mb, void *addr, void *args) 1852{ 1853 vm_page_t m; 1854 struct sendfile_sync *sfs; 1855 1856 m = sf_buf_page(args); 1857 sf_buf_free(args); 1858 vm_page_lock(m); 1859 vm_page_unwire(m, 0); 1860 /* 1861 * Check for the object going away on us. This can 1862 * happen since we don't hold a reference to it. 1863 * If so, we're responsible for freeing the page. 1864 */ 1865 if (m->wire_count == 0 && m->object == NULL) 1866 vm_page_free(m); 1867 vm_page_unlock(m); 1868 if (addr == NULL) 1869 return (EXT_FREE_OK); 1870 sfs = addr; 1871 mtx_lock(&sfs->mtx); 1872 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1873 if (--sfs->count == 0) 1874 cv_signal(&sfs->cv); 1875 mtx_unlock(&sfs->mtx); 1876 return (EXT_FREE_OK); 1877} 1878 1879/* 1880 * sendfile(2) 1881 * 1882 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1883 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1884 * 1885 * Send a file specified by 'fd' and starting at 'offset' to a socket 1886 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1887 * 0. Optionally add a header and/or trailer to the socket output. If 1888 * specified, write the total number of bytes sent into *sbytes. 1889 */ 1890int 1891sys_sendfile(struct thread *td, struct sendfile_args *uap) 1892{ 1893 1894 return (do_sendfile(td, uap, 0)); 1895} 1896 1897static int 1898do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1899{ 1900 struct sf_hdtr hdtr; 1901 struct uio *hdr_uio, *trl_uio; 1902 struct file *fp; 1903 cap_rights_t rights; 1904 int error; 1905 1906 /* 1907 * File offset must be positive. If it goes beyond EOF 1908 * we send only the header/trailer and no payload data. 1909 */ 1910 if (uap->offset < 0) 1911 return (EINVAL); 1912 1913 hdr_uio = trl_uio = NULL; 1914 1915 if (uap->hdtr != NULL) { 1916 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1917 if (error != 0) 1918 goto out; 1919 if (hdtr.headers != NULL) { 1920 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1921 if (error != 0) 1922 goto out; 1923 } 1924 if (hdtr.trailers != NULL) { 1925 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1926 if (error != 0) 1927 goto out; 1928 1929 } 1930 } 1931 1932 AUDIT_ARG_FD(uap->fd); 1933 1934 /* 1935 * sendfile(2) can start at any offset within a file so we require 1936 * CAP_READ+CAP_SEEK = CAP_PREAD. 1937 */ 1938 if ((error = fget_read(td, uap->fd, 1939 cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { 1940 goto out; 1941 } 1942 1943 error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, 1944 uap->nbytes, uap->sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); 1945 fdrop(fp, td); 1946 1947out: 1948 free(hdr_uio, M_IOV); 1949 free(trl_uio, M_IOV); 1950 return (error); 1951} 1952 1953#ifdef COMPAT_FREEBSD4 1954int 1955freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1956{ 1957 struct sendfile_args args; 1958 1959 args.fd = uap->fd; 1960 args.s = uap->s; 1961 args.offset = uap->offset; 1962 args.nbytes = uap->nbytes; 1963 args.hdtr = uap->hdtr; 1964 args.sbytes = uap->sbytes; 1965 args.flags = uap->flags; 1966 1967 return (do_sendfile(td, &args, 1)); 1968} 1969#endif /* COMPAT_FREEBSD4 */ 1970 1971static int 1972sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, 1973 off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res) 1974{ 1975 vm_page_t m; 1976 vm_pindex_t pindex; 1977 ssize_t resid; 1978 int error, readahead, rv; 1979 1980 pindex = OFF_TO_IDX(off); 1981 VM_OBJECT_WLOCK(obj); 1982 m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY | 1983 VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL); 1984 1985 /* 1986 * Check if page is valid for what we need, otherwise initiate I/O. 1987 * 1988 * The non-zero nd argument prevents disk I/O, instead we 1989 * return the caller what he specified in nd. In particular, 1990 * if we already turned some pages into mbufs, nd == EAGAIN 1991 * and the main function send them the pages before we come 1992 * here again and block. 1993 */ 1994 if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) { 1995 if (vp == NULL) 1996 vm_page_xunbusy(m); 1997 VM_OBJECT_WUNLOCK(obj); 1998 *res = m; 1999 return (0); 2000 } else if (nd != 0) { 2001 if (vp == NULL) 2002 vm_page_xunbusy(m); 2003 error = nd; 2004 goto free_page; 2005 } 2006 2007 /* 2008 * Get the page from backing store. 2009 */ 2010 error = 0; 2011 if (vp != NULL) { 2012 VM_OBJECT_WUNLOCK(obj); 2013 readahead = sfreadahead * MAXBSIZE; 2014 2015 /* 2016 * Use vn_rdwr() instead of the pager interface for 2017 * the vnode, to allow the read-ahead. 2018 * 2019 * XXXMAC: Because we don't have fp->f_cred here, we 2020 * pass in NOCRED. This is probably wrong, but is 2021 * consistent with our original implementation. 2022 */ 2023 error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), 2024 UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead / 2025 bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); 2026 SFSTAT_INC(sf_iocnt); 2027 VM_OBJECT_WLOCK(obj); 2028 } else { 2029 if (vm_pager_has_page(obj, pindex, NULL, NULL)) { 2030 rv = vm_pager_get_pages(obj, &m, 1, 0); 2031 SFSTAT_INC(sf_iocnt); 2032 m = vm_page_lookup(obj, pindex); 2033 if (m == NULL) 2034 error = EIO; 2035 else if (rv != VM_PAGER_OK) { 2036 vm_page_lock(m); 2037 vm_page_free(m); 2038 vm_page_unlock(m); 2039 m = NULL; 2040 error = EIO; 2041 } 2042 } else { 2043 pmap_zero_page(m); 2044 m->valid = VM_PAGE_BITS_ALL; 2045 m->dirty = 0; 2046 } 2047 if (m != NULL) 2048 vm_page_xunbusy(m); 2049 } 2050 if (error == 0) { 2051 *res = m; 2052 } else if (m != NULL) { 2053free_page: 2054 vm_page_lock(m); 2055 vm_page_unwire(m, 0); 2056 2057 /* 2058 * See if anyone else might know about this page. If 2059 * not and it is not valid, then free it. 2060 */ 2061 if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m)) 2062 vm_page_free(m); 2063 vm_page_unlock(m); 2064 } 2065 KASSERT(error != 0 || (m->wire_count > 0 && 2066 vm_page_is_valid(m, off & PAGE_MASK, xfsize)), 2067 ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off, 2068 xfsize)); 2069 VM_OBJECT_WUNLOCK(obj); 2070 return (error); 2071} 2072 2073static int 2074sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, 2075 struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, 2076 int *bsize) 2077{ 2078 struct vattr va; 2079 vm_object_t obj; 2080 struct vnode *vp; 2081 struct shmfd *shmfd; 2082 int error; 2083 2084 vp = *vp_res = NULL; 2085 obj = NULL; 2086 shmfd = *shmfd_res = NULL; 2087 *bsize = 0; 2088 2089 /* 2090 * The file descriptor must be a regular file and have a 2091 * backing VM object. 2092 */ 2093 if (fp->f_type == DTYPE_VNODE) { 2094 vp = fp->f_vnode; 2095 vn_lock(vp, LK_SHARED | LK_RETRY); 2096 if (vp->v_type != VREG) { 2097 error = EINVAL; 2098 goto out; 2099 } 2100 *bsize = vp->v_mount->mnt_stat.f_iosize; 2101 error = VOP_GETATTR(vp, &va, td->td_ucred); 2102 if (error != 0) 2103 goto out; 2104 *obj_size = va.va_size; 2105 obj = vp->v_object; 2106 if (obj == NULL) { 2107 error = EINVAL; 2108 goto out; 2109 } 2110 } else if (fp->f_type == DTYPE_SHM) { 2111 error = 0; 2112 shmfd = fp->f_data; 2113 obj = shmfd->shm_object; 2114 *obj_size = shmfd->shm_size; 2115 } else { 2116 error = EINVAL; 2117 goto out; 2118 } 2119 2120 VM_OBJECT_WLOCK(obj); 2121 if ((obj->flags & OBJ_DEAD) != 0) { 2122 VM_OBJECT_WUNLOCK(obj); 2123 error = EBADF; 2124 goto out; 2125 } 2126 2127 /* 2128 * Temporarily increase the backing VM object's reference 2129 * count so that a forced reclamation of its vnode does not 2130 * immediately destroy it. 2131 */ 2132 vm_object_reference_locked(obj); 2133 VM_OBJECT_WUNLOCK(obj); 2134 *obj_res = obj; 2135 *vp_res = vp; 2136 *shmfd_res = shmfd; 2137 2138out: 2139 if (vp != NULL) 2140 VOP_UNLOCK(vp, 0); 2141 return (error); 2142} 2143 2144static int 2145kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, 2146 struct socket **so) 2147{ 2148 cap_rights_t rights; 2149 int error; 2150 2151 *sock_fp = NULL; 2152 *so = NULL; 2153 2154 /* 2155 * The socket must be a stream socket and connected. 2156 */ 2157 error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights, 2158 CAP_SEND), sock_fp, NULL); 2159 if (error != 0) 2160 return (error); 2161 *so = (*sock_fp)->f_data; 2162 if ((*so)->so_type != SOCK_STREAM) 2163 return (EINVAL); 2164 if (((*so)->so_state & SS_ISCONNECTED) == 0) 2165 return (ENOTCONN); 2166 return (0); 2167} 2168 2169int 2170vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 2171 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 2172 int kflags, struct thread *td) 2173{ 2174 struct file *sock_fp; 2175 struct vnode *vp; 2176 struct vm_object *obj; 2177 struct socket *so; 2178 struct mbuf *m; 2179 struct sf_buf *sf; 2180 struct vm_page *pg; 2181 struct shmfd *shmfd; 2182 struct sendfile_sync *sfs; 2183 struct vattr va; 2184 off_t off, xfsize, fsbytes, sbytes, rem, obj_size; 2185 int error, bsize, nd, hdrlen, mnw; 2186 bool inflight_called; 2187 2188 pg = NULL; 2189 obj = NULL; 2190 so = NULL; 2191 m = NULL; 2192 sfs = NULL; 2193 fsbytes = sbytes = 0; 2194 hdrlen = mnw = 0; 2195 rem = nbytes; 2196 obj_size = 0; 2197 inflight_called = false; 2198 2199 error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); 2200 if (error != 0) 2201 return (error); 2202 if (rem == 0) 2203 rem = obj_size; 2204 2205 error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so); 2206 if (error != 0) 2207 goto out; 2208 2209 /* 2210 * Do not wait on memory allocations but return ENOMEM for 2211 * caller to retry later. 2212 * XXX: Experimental. 2213 */ 2214 if (flags & SF_MNOWAIT) 2215 mnw = 1; 2216 2217 if (flags & SF_SYNC) { 2218 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 2219 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 2220 cv_init(&sfs->cv, "sendfile"); 2221 } 2222 2223#ifdef MAC 2224 error = mac_socket_check_send(td->td_ucred, so); 2225 if (error != 0) 2226 goto out; 2227#endif 2228 2229 /* If headers are specified copy them into mbufs. */ 2230 if (hdr_uio != NULL) { 2231 hdr_uio->uio_td = td; 2232 hdr_uio->uio_rw = UIO_WRITE; 2233 if (hdr_uio->uio_resid > 0) { 2234 /* 2235 * In FBSD < 5.0 the nbytes to send also included 2236 * the header. If compat is specified subtract the 2237 * header size from nbytes. 2238 */ 2239 if (kflags & SFK_COMPAT) { 2240 if (nbytes > hdr_uio->uio_resid) 2241 nbytes -= hdr_uio->uio_resid; 2242 else 2243 nbytes = 0; 2244 } 2245 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 2246 0, 0, 0); 2247 if (m == NULL) { 2248 error = mnw ? EAGAIN : ENOBUFS; 2249 goto out; 2250 } 2251 hdrlen = m_length(m, NULL); 2252 } 2253 } 2254 2255 /* 2256 * Protect against multiple writers to the socket. 2257 * 2258 * XXXRW: Historically this has assumed non-interruptibility, so now 2259 * we implement that, but possibly shouldn't. 2260 */ 2261 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 2262 2263 /* 2264 * Loop through the pages of the file, starting with the requested 2265 * offset. Get a file page (do I/O if necessary), map the file page 2266 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 2267 * it on the socket. 2268 * This is done in two loops. The inner loop turns as many pages 2269 * as it can, up to available socket buffer space, without blocking 2270 * into mbufs to have it bulk delivered into the socket send buffer. 2271 * The outer loop checks the state and available space of the socket 2272 * and takes care of the overall progress. 2273 */ 2274 for (off = offset; ; ) { 2275 struct mbuf *mtail; 2276 int loopbytes; 2277 int space; 2278 int done; 2279 2280 if ((nbytes != 0 && nbytes == fsbytes) || 2281 (nbytes == 0 && obj_size == fsbytes)) 2282 break; 2283 2284 mtail = NULL; 2285 loopbytes = 0; 2286 space = 0; 2287 done = 0; 2288 2289 /* 2290 * Check the socket state for ongoing connection, 2291 * no errors and space in socket buffer. 2292 * If space is low allow for the remainder of the 2293 * file to be processed if it fits the socket buffer. 2294 * Otherwise block in waiting for sufficient space 2295 * to proceed, or if the socket is nonblocking, return 2296 * to userland with EAGAIN while reporting how far 2297 * we've come. 2298 * We wait until the socket buffer has significant free 2299 * space to do bulk sends. This makes good use of file 2300 * system read ahead and allows packet segmentation 2301 * offloading hardware to take over lots of work. If 2302 * we were not careful here we would send off only one 2303 * sfbuf at a time. 2304 */ 2305 SOCKBUF_LOCK(&so->so_snd); 2306 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 2307 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 2308retry_space: 2309 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2310 error = EPIPE; 2311 SOCKBUF_UNLOCK(&so->so_snd); 2312 goto done; 2313 } else if (so->so_error) { 2314 error = so->so_error; 2315 so->so_error = 0; 2316 SOCKBUF_UNLOCK(&so->so_snd); 2317 goto done; 2318 } 2319 space = sbspace(&so->so_snd); 2320 if (space < rem && 2321 (space <= 0 || 2322 space < so->so_snd.sb_lowat)) { 2323 if (so->so_state & SS_NBIO) { 2324 SOCKBUF_UNLOCK(&so->so_snd); 2325 error = EAGAIN; 2326 goto done; 2327 } 2328 /* 2329 * sbwait drops the lock while sleeping. 2330 * When we loop back to retry_space the 2331 * state may have changed and we retest 2332 * for it. 2333 */ 2334 error = sbwait(&so->so_snd); 2335 /* 2336 * An error from sbwait usually indicates that we've 2337 * been interrupted by a signal. If we've sent anything 2338 * then return bytes sent, otherwise return the error. 2339 */ 2340 if (error != 0) { 2341 SOCKBUF_UNLOCK(&so->so_snd); 2342 goto done; 2343 } 2344 goto retry_space; 2345 } 2346 SOCKBUF_UNLOCK(&so->so_snd); 2347 2348 /* 2349 * Reduce space in the socket buffer by the size of 2350 * the header mbuf chain. 2351 * hdrlen is set to 0 after the first loop. 2352 */ 2353 space -= hdrlen; 2354 2355 if (vp != NULL) { 2356 error = vn_lock(vp, LK_SHARED); 2357 if (error != 0) 2358 goto done; 2359 error = VOP_GETATTR(vp, &va, td->td_ucred); 2360 if (error != 0 || off >= va.va_size) { 2361 VOP_UNLOCK(vp, 0); 2362 goto done; 2363 } 2364 obj_size = va.va_size; 2365 } 2366 2367 /* 2368 * Loop and construct maximum sized mbuf chain to be bulk 2369 * dumped into socket buffer. 2370 */ 2371 while (space > loopbytes) { 2372 vm_offset_t pgoff; 2373 struct mbuf *m0; 2374 2375 /* 2376 * Calculate the amount to transfer. 2377 * Not to exceed a page, the EOF, 2378 * or the passed in nbytes. 2379 */ 2380 pgoff = (vm_offset_t)(off & PAGE_MASK); 2381 rem = obj_size - offset; 2382 if (nbytes != 0) 2383 rem = omin(rem, nbytes); 2384 rem -= fsbytes + loopbytes; 2385 xfsize = omin(PAGE_SIZE - pgoff, rem); 2386 xfsize = omin(space - loopbytes, xfsize); 2387 if (xfsize <= 0) { 2388 done = 1; /* all data sent */ 2389 break; 2390 } 2391 2392 /* 2393 * Attempt to look up the page. Allocate 2394 * if not found or wait and loop if busy. 2395 */ 2396 if (m != NULL) 2397 nd = EAGAIN; /* send what we already got */ 2398 else if ((flags & SF_NODISKIO) != 0) 2399 nd = EBUSY; 2400 else 2401 nd = 0; 2402 error = sendfile_readpage(obj, vp, nd, off, 2403 xfsize, bsize, td, &pg); 2404 if (error != 0) { 2405 if (error == EAGAIN) 2406 error = 0; /* not a real error */ 2407 break; 2408 } 2409 2410 /* 2411 * Get a sendfile buf. When allocating the 2412 * first buffer for mbuf chain, we usually 2413 * wait as long as necessary, but this wait 2414 * can be interrupted. For consequent 2415 * buffers, do not sleep, since several 2416 * threads might exhaust the buffers and then 2417 * deadlock. 2418 */ 2419 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2420 SFB_CATCH); 2421 if (sf == NULL) { 2422 SFSTAT_INC(sf_allocfail); 2423 vm_page_lock(pg); 2424 vm_page_unwire(pg, 0); 2425 KASSERT(pg->object != NULL, 2426 ("%s: object disappeared", __func__)); 2427 vm_page_unlock(pg); 2428 if (m == NULL) 2429 error = (mnw ? EAGAIN : EINTR); 2430 break; 2431 } 2432 2433 /* 2434 * Get an mbuf and set it up as having 2435 * external storage. 2436 */ 2437 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2438 if (m0 == NULL) { 2439 error = (mnw ? EAGAIN : ENOBUFS); 2440 (void)sf_buf_mext(NULL, NULL, sf); 2441 break; 2442 } 2443 if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, 2444 sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, 2445 (mnw ? M_NOWAIT : M_WAITOK)) != 0) { 2446 error = (mnw ? EAGAIN : ENOBUFS); 2447 (void)sf_buf_mext(NULL, NULL, sf); 2448 m_freem(m0); 2449 break; 2450 } 2451 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2452 m0->m_len = xfsize; 2453 2454 /* Append to mbuf chain. */ 2455 if (mtail != NULL) 2456 mtail->m_next = m0; 2457 else if (m != NULL) 2458 m_last(m)->m_next = m0; 2459 else 2460 m = m0; 2461 mtail = m0; 2462 2463 /* Keep track of bits processed. */ 2464 loopbytes += xfsize; 2465 off += xfsize; 2466 2467 if (sfs != NULL) { 2468 mtx_lock(&sfs->mtx); 2469 sfs->count++; 2470 mtx_unlock(&sfs->mtx); 2471 } 2472 } 2473 2474 if (vp != NULL) 2475 VOP_UNLOCK(vp, 0); 2476 2477 /* Add the buffer chain to the socket buffer. */ 2478 if (m != NULL) { 2479 int mlen, err; 2480 2481 mlen = m_length(m, NULL); 2482 SOCKBUF_LOCK(&so->so_snd); 2483 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2484 error = EPIPE; 2485 SOCKBUF_UNLOCK(&so->so_snd); 2486 goto done; 2487 } 2488 SOCKBUF_UNLOCK(&so->so_snd); 2489 CURVNET_SET(so->so_vnet); 2490 /* Avoid error aliasing. */ 2491 err = (*so->so_proto->pr_usrreqs->pru_send) 2492 (so, 0, m, NULL, NULL, td); 2493 CURVNET_RESTORE(); 2494 if (err == 0) { 2495 /* 2496 * We need two counters to get the 2497 * file offset and nbytes to send 2498 * right: 2499 * - sbytes contains the total amount 2500 * of bytes sent, including headers. 2501 * - fsbytes contains the total amount 2502 * of bytes sent from the file. 2503 */ 2504 sbytes += mlen; 2505 fsbytes += mlen; 2506 if (hdrlen) { 2507 fsbytes -= hdrlen; 2508 hdrlen = 0; 2509 } 2510 } else if (error == 0) 2511 error = err; 2512 m = NULL; /* pru_send always consumes */ 2513 } 2514 2515 /* Quit outer loop on error or when we're done. */ 2516 if (done) 2517 break; 2518 if (error != 0) 2519 goto done; 2520 } 2521 2522 /* 2523 * Send trailers. Wimp out and use writev(2). 2524 */ 2525 if (trl_uio != NULL) { 2526 sbunlock(&so->so_snd); 2527 error = kern_writev(td, sockfd, trl_uio); 2528 if (error == 0) 2529 sbytes += td->td_retval[0]; 2530 goto out; 2531 } 2532 2533done: 2534 sbunlock(&so->so_snd); 2535out: 2536 /* 2537 * If there was no error we have to clear td->td_retval[0] 2538 * because it may have been set by writev. 2539 */ 2540 if (error == 0) { 2541 td->td_retval[0] = 0; 2542 } 2543 if (sent != NULL) { 2544 copyout(&sbytes, sent, sizeof(off_t)); 2545 } 2546 if (obj != NULL) 2547 vm_object_deallocate(obj); 2548 if (so) 2549 fdrop(sock_fp, td); 2550 if (m) 2551 m_freem(m); 2552 2553 if (sfs != NULL) { 2554 mtx_lock(&sfs->mtx); 2555 if (sfs->count != 0) 2556 cv_wait(&sfs->cv, &sfs->mtx); 2557 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2558 cv_destroy(&sfs->cv); 2559 mtx_destroy(&sfs->mtx); 2560 free(sfs, M_TEMP); 2561 } 2562 2563 if (error == ERESTART) 2564 error = EINTR; 2565 2566 return (error); 2567} 2568