1139804Simp/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 4. Neither the name of the University nor the names of its contributors 191541Srgrimes * may be used to endorse or promote products derived from this software 201541Srgrimes * without specific prior written permission. 211541Srgrimes * 221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 251541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 321541Srgrimes * SUCH DAMAGE. 331541Srgrimes * 341541Srgrimes * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 351541Srgrimes */ 361541Srgrimes 37116182Sobrien#include <sys/cdefs.h> 38116182Sobrien__FBSDID("$FreeBSD: stable/10/sys/kern/sys_generic.c 360332 2020-04-26 08:35:32Z hselasky $"); 39116182Sobrien 40224778Srwatson#include "opt_capsicum.h" 41147676Speter#include "opt_compat.h" 4213203Swollman#include "opt_ktrace.h" 4313203Swollman 441541Srgrimes#include <sys/param.h> 451541Srgrimes#include <sys/systm.h> 4612221Sbde#include <sys/sysproto.h> 47280258Srwatson#include <sys/capsicum.h> 481541Srgrimes#include <sys/filedesc.h> 4924206Sbde#include <sys/filio.h> 5024131Sbde#include <sys/fcntl.h> 511541Srgrimes#include <sys/file.h> 52247602Spjd#include <sys/lock.h> 531541Srgrimes#include <sys/proc.h> 543308Sphk#include <sys/signalvar.h> 551541Srgrimes#include <sys/socketvar.h> 561541Srgrimes#include <sys/uio.h> 571541Srgrimes#include <sys/kernel.h> 58175140Sjhb#include <sys/ktr.h> 59114216Skan#include <sys/limits.h> 601541Srgrimes#include <sys/malloc.h> 6129351Speter#include <sys/poll.h> 6272146Speter#include <sys/resourcevar.h> 6370834Swollman#include <sys/selinfo.h> 64126326Sjhb#include <sys/sleepqueue.h> 65102779Siedowse#include <sys/syscallsubr.h> 6655478Speter#include <sys/sysctl.h> 6729351Speter#include <sys/sysent.h> 68124736Sache#include <sys/vnode.h> 6968883Sdillon#include <sys/bio.h> 7068883Sdillon#include <sys/buf.h> 7176564Stanimura#include <sys/condvar.h> 721541Srgrimes#ifdef KTRACE 731541Srgrimes#include <sys/ktrace.h> 741541Srgrimes#endif 751541Srgrimes 76175140Sjhb#include <security/audit/audit.h> 77174647Sjeff 78275212Shselasky/* 79275212Shselasky * The following macro defines how many bytes will be allocated from 80275212Shselasky * the stack instead of memory allocated when passing the IOCTL data 81275212Shselasky * structures from userspace and to the kernel. Some IOCTLs having 82275212Shselasky * small data structures are used very frequently and this small 83275212Shselasky * buffer on the stack gives a significant speedup improvement for 84275212Shselasky * those requests. The value of this define should be greater or equal 85275212Shselasky * to 64 bytes and should also be power of two. The data structure is 86275212Shselasky * currently hard-aligned to a 8-byte boundary on the stack. This 87275212Shselasky * should currently be sufficient for all supported platforms. 88275212Shselasky */ 89275212Shselasky#define SYS_IOCTL_SMALL_SIZE 128 /* bytes */ 90275212Shselasky#define SYS_IOCTL_SMALL_ALIGN 8 /* bytes */ 91275212Shselasky 92231949Skibint iosize_max_clamp = 1; 93232494SkibSYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW, 94232494Skib &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX"); 95257122Skibint devfs_iosize_max_clamp = 1; 96257122SkibSYSCTL_INT(_debug, OID_AUTO, devfs_iosize_max_clamp, CTLFLAG_RW, 97257122Skib &devfs_iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX for devices"); 98257122Skib 99232494Skib/* 100232494Skib * Assert that the return value of read(2) and write(2) syscalls fits 101232494Skib * into a register. If not, an architecture will need to provide the 102232494Skib * usermode wrappers to reconstruct the result. 103232494Skib */ 104232494SkibCTASSERT(sizeof(register_t) >= sizeof(size_t)); 105231949Skib 10630354Sphkstatic MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 10730354Sphkstatic MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 10830354SphkMALLOC_DEFINE(M_IOV, "iov", "large iov's"); 10930309Sphk 110211941Skibstatic int pollout(struct thread *, struct pollfd *, struct pollfd *, 111211941Skib u_int); 11291972Salfredstatic int pollscan(struct thread *, struct pollfd *, u_int); 113174647Sjeffstatic int pollrescan(struct thread *); 11491972Salfredstatic int selscan(struct thread *, fd_mask **, fd_mask **, int); 115174647Sjeffstatic int selrescan(struct thread *, fd_mask **, fd_mask **); 116174647Sjeffstatic void selfdalloc(struct thread *, void *); 117174647Sjeffstatic void selfdfree(struct seltd *, struct selfd *); 118147813Sjhbstatic int dofileread(struct thread *, int, struct file *, struct uio *, 119147813Sjhb off_t, int); 120147813Sjhbstatic int dofilewrite(struct thread *, int, struct file *, struct uio *, 121147813Sjhb off_t, int); 122122352Stanimurastatic void doselwakeup(struct selinfo *, int); 123174647Sjeffstatic void seltdinit(struct thread *); 124247801Sdavidestatic int seltdwait(struct thread *, sbintime_t, sbintime_t); 125174647Sjeffstatic void seltdclear(struct thread *); 1263485Sphk 127174647Sjeff/* 128174647Sjeff * One seltd per-thread allocated on demand as needed. 129174647Sjeff * 130174647Sjeff * t - protected by st_mtx 131174647Sjeff * k - Only accessed by curthread or read-only 132174647Sjeff */ 133174647Sjeffstruct seltd { 134174647Sjeff STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */ 135174647Sjeff struct selfd *st_free1; /* (k) free fd for read set. */ 136174647Sjeff struct selfd *st_free2; /* (k) free fd for write set. */ 137174647Sjeff struct mtx st_mtx; /* Protects struct seltd */ 138174647Sjeff struct cv st_wait; /* (t) Wait channel. */ 139174647Sjeff int st_flags; /* (t) SELTD_ flags. */ 140174647Sjeff}; 141174647Sjeff 142174647Sjeff#define SELTD_PENDING 0x0001 /* We have pending events. */ 143174647Sjeff#define SELTD_RESCAN 0x0002 /* Doing a rescan. */ 144174647Sjeff 145174647Sjeff/* 146174647Sjeff * One selfd allocated per-thread per-file-descriptor. 147174647Sjeff * f - protected by sf_mtx 148174647Sjeff */ 149174647Sjeffstruct selfd { 150174647Sjeff STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */ 151174647Sjeff TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */ 152174647Sjeff struct selinfo *sf_si; /* (f) selinfo when linked. */ 153174647Sjeff struct mtx *sf_mtx; /* Pointer to selinfo mtx. */ 154174647Sjeff struct seltd *sf_td; /* (k) owning seltd. */ 155174647Sjeff void *sf_cookie; /* (k) fd or pollfd. */ 156174647Sjeff}; 157174647Sjeff 158174647Sjeffstatic uma_zone_t selfd_zone; 159195259Sjeffstatic struct mtx_pool *mtxpool_select; 160174647Sjeff 16112221Sbde#ifndef _SYS_SYSPROTO_H_ 1621541Srgrimesstruct read_args { 1631541Srgrimes int fd; 16438864Sbde void *buf; 16538864Sbde size_t nbyte; 1661541Srgrimes}; 16712221Sbde#endif 1681549Srgrimesint 169225617Skmacysys_read(td, uap) 17083366Sjulian struct thread *td; 17186341Sdillon struct read_args *uap; 1721541Srgrimes{ 173147813Sjhb struct uio auio; 174147813Sjhb struct iovec aiov; 17568883Sdillon int error; 1761541Srgrimes 177231949Skib if (uap->nbyte > IOSIZE_MAX) 178147813Sjhb return (EINVAL); 179147813Sjhb aiov.iov_base = uap->buf; 180147813Sjhb aiov.iov_len = uap->nbyte; 181147813Sjhb auio.uio_iov = &aiov; 182147813Sjhb auio.uio_iovcnt = 1; 183147813Sjhb auio.uio_resid = uap->nbyte; 184147813Sjhb auio.uio_segflg = UIO_USERSPACE; 185147813Sjhb error = kern_readv(td, uap->fd, &auio); 18668883Sdillon return(error); 1871541Srgrimes} 1881541Srgrimes 1891541Srgrimes/* 190147813Sjhb * Positioned read system call 19145065Salc */ 19245065Salc#ifndef _SYS_SYSPROTO_H_ 19345065Salcstruct pread_args { 19445065Salc int fd; 19545065Salc void *buf; 19645065Salc size_t nbyte; 19745311Sdt int pad; 19845311Sdt off_t offset; 19945065Salc}; 20045065Salc#endif 20145065Salcint 202225617Skmacysys_pread(td, uap) 20383366Sjulian struct thread *td; 20486341Sdillon struct pread_args *uap; 20545065Salc{ 20645065Salc struct uio auio; 20745065Salc struct iovec aiov; 208147813Sjhb int error; 20945065Salc 210231949Skib if (uap->nbyte > IOSIZE_MAX) 211147813Sjhb return (EINVAL); 212147813Sjhb aiov.iov_base = uap->buf; 213147813Sjhb aiov.iov_len = uap->nbyte; 21445065Salc auio.uio_iov = &aiov; 21545065Salc auio.uio_iovcnt = 1; 216147813Sjhb auio.uio_resid = uap->nbyte; 21745065Salc auio.uio_segflg = UIO_USERSPACE; 218147813Sjhb error = kern_preadv(td, uap->fd, &auio, uap->offset); 219147813Sjhb return(error); 22045065Salc} 22145065Salc 222171212Speterint 223171212Speterfreebsd6_pread(td, uap) 224171212Speter struct thread *td; 225171212Speter struct freebsd6_pread_args *uap; 226171212Speter{ 227171212Speter struct pread_args oargs; 228171212Speter 229171212Speter oargs.fd = uap->fd; 230171212Speter oargs.buf = uap->buf; 231171212Speter oargs.nbyte = uap->nbyte; 232171212Speter oargs.offset = uap->offset; 233225617Skmacy return (sys_pread(td, &oargs)); 234171212Speter} 235171212Speter 23645065Salc/* 2371541Srgrimes * Scatter read system call. 2381541Srgrimes */ 23912221Sbde#ifndef _SYS_SYSPROTO_H_ 2401541Srgrimesstruct readv_args { 24112208Sbde int fd; 2421541Srgrimes struct iovec *iovp; 2431541Srgrimes u_int iovcnt; 2441541Srgrimes}; 24512221Sbde#endif 2461549Srgrimesint 247225617Skmacysys_readv(struct thread *td, struct readv_args *uap) 2481541Srgrimes{ 249144445Sjhb struct uio *auio; 250144445Sjhb int error; 251144445Sjhb 252144445Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 253144445Sjhb if (error) 254144445Sjhb return (error); 255144445Sjhb error = kern_readv(td, uap->fd, auio); 256144445Sjhb free(auio, M_IOV); 257144445Sjhb return (error); 258144445Sjhb} 259144445Sjhb 260144445Sjhbint 261144445Sjhbkern_readv(struct thread *td, int fd, struct uio *auio) 262144445Sjhb{ 26386341Sdillon struct file *fp; 264255219Spjd cap_rights_t rights; 26596243Salc int error; 266147813Sjhb 267255219Spjd error = fget_read(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 268147813Sjhb if (error) 269147813Sjhb return (error); 270147813Sjhb error = dofileread(td, fd, fp, auio, (off_t)-1, 0); 271147813Sjhb fdrop(fp, td); 272147813Sjhb return (error); 273147813Sjhb} 274147813Sjhb 275147813Sjhb/* 276147813Sjhb * Scatter positioned read system call. 277147813Sjhb */ 278147813Sjhb#ifndef _SYS_SYSPROTO_H_ 279147813Sjhbstruct preadv_args { 280147813Sjhb int fd; 281147813Sjhb struct iovec *iovp; 282147813Sjhb u_int iovcnt; 283147813Sjhb off_t offset; 284147813Sjhb}; 2851541Srgrimes#endif 286147813Sjhbint 287225617Skmacysys_preadv(struct thread *td, struct preadv_args *uap) 288147813Sjhb{ 289147813Sjhb struct uio *auio; 290147813Sjhb int error; 2911541Srgrimes 292147813Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 293147813Sjhb if (error) 294147813Sjhb return (error); 295147813Sjhb error = kern_preadv(td, uap->fd, auio, uap->offset); 296147813Sjhb free(auio, M_IOV); 297147813Sjhb return (error); 298147813Sjhb} 299147813Sjhb 300147813Sjhbint 301147813Sjhbkern_preadv(td, fd, auio, offset) 302147813Sjhb struct thread *td; 303147813Sjhb int fd; 304147813Sjhb struct uio *auio; 305147813Sjhb off_t offset; 306147813Sjhb{ 307147813Sjhb struct file *fp; 308255219Spjd cap_rights_t rights; 309147813Sjhb int error; 310147813Sjhb 311255219Spjd error = fget_read(td, fd, cap_rights_init(&rights, CAP_PREAD), &fp); 312131897Sphk if (error) 31396243Salc return (error); 314147813Sjhb if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 315147813Sjhb error = ESPIPE; 316315481Smmokhi else if (offset < 0 && 317315481Smmokhi (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) 318147813Sjhb error = EINVAL; 319147813Sjhb else 320147813Sjhb error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); 321147813Sjhb fdrop(fp, td); 322147813Sjhb return (error); 323147813Sjhb} 324147813Sjhb 325147813Sjhb/* 326147813Sjhb * Common code for readv and preadv that reads data in 327147813Sjhb * from a file using the passed in uio, offset, and flags. 328147813Sjhb */ 329147813Sjhbstatic int 330147813Sjhbdofileread(td, fd, fp, auio, offset, flags) 331147813Sjhb struct thread *td; 332147813Sjhb int fd; 333147813Sjhb struct file *fp; 334147813Sjhb struct uio *auio; 335147813Sjhb off_t offset; 336147813Sjhb int flags; 337147813Sjhb{ 338147813Sjhb ssize_t cnt; 339147813Sjhb int error; 340147813Sjhb#ifdef KTRACE 341147813Sjhb struct uio *ktruio = NULL; 342147813Sjhb#endif 343147813Sjhb 344140800Sphk /* Finish zero length reads right here */ 345140800Sphk if (auio->uio_resid == 0) { 346140800Sphk td->td_retval[0] = 0; 347140800Sphk return(0); 348140800Sphk } 349131897Sphk auio->uio_rw = UIO_READ; 350147813Sjhb auio->uio_offset = offset; 351131897Sphk auio->uio_td = td; 3521541Srgrimes#ifdef KTRACE 353131897Sphk if (KTRPOINT(td, KTR_GENIO)) 354131897Sphk ktruio = cloneuio(auio); 3551541Srgrimes#endif 356131897Sphk cnt = auio->uio_resid; 357147813Sjhb if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) { 358131897Sphk if (auio->uio_resid != cnt && (error == ERESTART || 3591541Srgrimes error == EINTR || error == EWOULDBLOCK)) 3601541Srgrimes error = 0; 36168883Sdillon } 362131897Sphk cnt -= auio->uio_resid; 3631541Srgrimes#ifdef KTRACE 364131897Sphk if (ktruio != NULL) { 365131897Sphk ktruio->uio_resid = cnt; 366144445Sjhb ktrgenio(fd, UIO_READ, ktruio, error); 3671541Srgrimes } 3681541Srgrimes#endif 36983366Sjulian td->td_retval[0] = cnt; 3701541Srgrimes return (error); 3711541Srgrimes} 3721541Srgrimes 37312221Sbde#ifndef _SYS_SYSPROTO_H_ 3741541Srgrimesstruct write_args { 3751541Srgrimes int fd; 37638864Sbde const void *buf; 37738864Sbde size_t nbyte; 3781541Srgrimes}; 37912221Sbde#endif 3801549Srgrimesint 381225617Skmacysys_write(td, uap) 38283366Sjulian struct thread *td; 38386341Sdillon struct write_args *uap; 3841541Srgrimes{ 385147813Sjhb struct uio auio; 386147813Sjhb struct iovec aiov; 38768883Sdillon int error; 3881541Srgrimes 389231949Skib if (uap->nbyte > IOSIZE_MAX) 390147813Sjhb return (EINVAL); 391147813Sjhb aiov.iov_base = (void *)(uintptr_t)uap->buf; 392147813Sjhb aiov.iov_len = uap->nbyte; 393147813Sjhb auio.uio_iov = &aiov; 394147813Sjhb auio.uio_iovcnt = 1; 395147813Sjhb auio.uio_resid = uap->nbyte; 396147813Sjhb auio.uio_segflg = UIO_USERSPACE; 397147813Sjhb error = kern_writev(td, uap->fd, &auio); 39868883Sdillon return(error); 3991541Srgrimes} 4001541Srgrimes 4011541Srgrimes/* 402167211Srwatson * Positioned write system call. 40345065Salc */ 40445065Salc#ifndef _SYS_SYSPROTO_H_ 40545065Salcstruct pwrite_args { 40645065Salc int fd; 40745065Salc const void *buf; 40845065Salc size_t nbyte; 40945311Sdt int pad; 41045311Sdt off_t offset; 41145065Salc}; 41245065Salc#endif 41345065Salcint 414225617Skmacysys_pwrite(td, uap) 41583366Sjulian struct thread *td; 41686341Sdillon struct pwrite_args *uap; 41745065Salc{ 41845065Salc struct uio auio; 41945065Salc struct iovec aiov; 420147813Sjhb int error; 42145065Salc 422231949Skib if (uap->nbyte > IOSIZE_MAX) 423147813Sjhb return (EINVAL); 424147813Sjhb aiov.iov_base = (void *)(uintptr_t)uap->buf; 425147813Sjhb aiov.iov_len = uap->nbyte; 42645065Salc auio.uio_iov = &aiov; 42745065Salc auio.uio_iovcnt = 1; 428147813Sjhb auio.uio_resid = uap->nbyte; 42945065Salc auio.uio_segflg = UIO_USERSPACE; 430147813Sjhb error = kern_pwritev(td, uap->fd, &auio, uap->offset); 431147813Sjhb return(error); 43245065Salc} 43345065Salc 434171212Speterint 435171212Speterfreebsd6_pwrite(td, uap) 436171212Speter struct thread *td; 437171212Speter struct freebsd6_pwrite_args *uap; 438171212Speter{ 439171212Speter struct pwrite_args oargs; 440171212Speter 441171212Speter oargs.fd = uap->fd; 442171212Speter oargs.buf = uap->buf; 443171212Speter oargs.nbyte = uap->nbyte; 444171212Speter oargs.offset = uap->offset; 445225617Skmacy return (sys_pwrite(td, &oargs)); 446171212Speter} 447171212Speter 44845065Salc/* 449167211Srwatson * Gather write system call. 4501541Srgrimes */ 45112221Sbde#ifndef _SYS_SYSPROTO_H_ 4521541Srgrimesstruct writev_args { 4531541Srgrimes int fd; 4541541Srgrimes struct iovec *iovp; 4551541Srgrimes u_int iovcnt; 4561541Srgrimes}; 45712221Sbde#endif 4581549Srgrimesint 459225617Skmacysys_writev(struct thread *td, struct writev_args *uap) 4601541Srgrimes{ 461144445Sjhb struct uio *auio; 462144445Sjhb int error; 463144445Sjhb 464144445Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 465144445Sjhb if (error) 466144445Sjhb return (error); 467144445Sjhb error = kern_writev(td, uap->fd, auio); 468144445Sjhb free(auio, M_IOV); 469144445Sjhb return (error); 470144445Sjhb} 471144445Sjhb 472144445Sjhbint 473144445Sjhbkern_writev(struct thread *td, int fd, struct uio *auio) 474144445Sjhb{ 47586341Sdillon struct file *fp; 476255219Spjd cap_rights_t rights; 477131897Sphk int error; 478147813Sjhb 479255219Spjd error = fget_write(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 480147813Sjhb if (error) 481154073Sjhb return (error); 482147813Sjhb error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); 483147813Sjhb fdrop(fp, td); 484147813Sjhb return (error); 485147813Sjhb} 486147813Sjhb 487147813Sjhb/* 488167211Srwatson * Gather positioned write system call. 489147813Sjhb */ 490147813Sjhb#ifndef _SYS_SYSPROTO_H_ 491147813Sjhbstruct pwritev_args { 492147813Sjhb int fd; 493147813Sjhb struct iovec *iovp; 494147813Sjhb u_int iovcnt; 495147813Sjhb off_t offset; 496147813Sjhb}; 497147813Sjhb#endif 498147813Sjhbint 499225617Skmacysys_pwritev(struct thread *td, struct pwritev_args *uap) 500147813Sjhb{ 501147813Sjhb struct uio *auio; 502147813Sjhb int error; 503147813Sjhb 504147813Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 505147813Sjhb if (error) 506147813Sjhb return (error); 507147813Sjhb error = kern_pwritev(td, uap->fd, auio, uap->offset); 508147813Sjhb free(auio, M_IOV); 509147813Sjhb return (error); 510147813Sjhb} 511147813Sjhb 512147813Sjhbint 513147813Sjhbkern_pwritev(td, fd, auio, offset) 514147813Sjhb struct thread *td; 515147813Sjhb struct uio *auio; 516147813Sjhb int fd; 517147813Sjhb off_t offset; 518147813Sjhb{ 519147813Sjhb struct file *fp; 520255219Spjd cap_rights_t rights; 521147813Sjhb int error; 522147813Sjhb 523255219Spjd error = fget_write(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp); 524147813Sjhb if (error) 525154073Sjhb return (error); 526147813Sjhb if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 527147813Sjhb error = ESPIPE; 528315481Smmokhi else if (offset < 0 && 529315481Smmokhi (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) 530147813Sjhb error = EINVAL; 531147813Sjhb else 532147813Sjhb error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); 533147813Sjhb fdrop(fp, td); 534147813Sjhb return (error); 535147813Sjhb} 536147813Sjhb 537147813Sjhb/* 538147813Sjhb * Common code for writev and pwritev that writes data to 539147813Sjhb * a file using the passed in uio, offset, and flags. 540147813Sjhb */ 541147813Sjhbstatic int 542147813Sjhbdofilewrite(td, fd, fp, auio, offset, flags) 543147813Sjhb struct thread *td; 544147813Sjhb int fd; 545147813Sjhb struct file *fp; 546147813Sjhb struct uio *auio; 547147813Sjhb off_t offset; 548147813Sjhb int flags; 549147813Sjhb{ 550147813Sjhb ssize_t cnt; 551147813Sjhb int error; 5521541Srgrimes#ifdef KTRACE 553131897Sphk struct uio *ktruio = NULL; 5541541Srgrimes#endif 5551541Srgrimes 556131897Sphk auio->uio_rw = UIO_WRITE; 557131897Sphk auio->uio_td = td; 558147813Sjhb auio->uio_offset = offset; 5591541Srgrimes#ifdef KTRACE 560131897Sphk if (KTRPOINT(td, KTR_GENIO)) 561131897Sphk ktruio = cloneuio(auio); 5621541Srgrimes#endif 563131897Sphk cnt = auio->uio_resid; 564244643Skib if (fp->f_type == DTYPE_VNODE && 565244643Skib (fp->f_vnread_flags & FDEVFS_VNODE) == 0) 56669733Sdillon bwillwrite(); 567147813Sjhb if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) { 568131897Sphk if (auio->uio_resid != cnt && (error == ERESTART || 5691541Srgrimes error == EINTR || error == EWOULDBLOCK)) 5701541Srgrimes error = 0; 571147813Sjhb /* Socket layer is responsible for issuing SIGPIPE. */ 572167150Sbms if (fp->f_type != DTYPE_SOCKET && error == EPIPE) { 57383366Sjulian PROC_LOCK(td->td_proc); 574209595Sjhb tdsignal(td, SIGPIPE); 57583366Sjulian PROC_UNLOCK(td->td_proc); 57673929Sjhb } 5771541Srgrimes } 578131897Sphk cnt -= auio->uio_resid; 5791541Srgrimes#ifdef KTRACE 580131897Sphk if (ktruio != NULL) { 581131897Sphk ktruio->uio_resid = cnt; 582144445Sjhb ktrgenio(fd, UIO_WRITE, ktruio, error); 5831541Srgrimes } 5841541Srgrimes#endif 58583366Sjulian td->td_retval[0] = cnt; 5861541Srgrimes return (error); 5871541Srgrimes} 5881541Srgrimes 589175140Sjhb/* 590175140Sjhb * Truncate a file given a file descriptor. 591175140Sjhb * 592175140Sjhb * Can't use fget_write() here, since must return EINVAL and not EBADF if the 593175140Sjhb * descriptor isn't writable. 594175140Sjhb */ 595175140Sjhbint 596175140Sjhbkern_ftruncate(td, fd, length) 597175140Sjhb struct thread *td; 598175140Sjhb int fd; 599175140Sjhb off_t length; 600175140Sjhb{ 601175140Sjhb struct file *fp; 602255219Spjd cap_rights_t rights; 603175140Sjhb int error; 604175140Sjhb 605195104Srwatson AUDIT_ARG_FD(fd); 606175140Sjhb if (length < 0) 607175140Sjhb return (EINVAL); 608255219Spjd error = fget(td, fd, cap_rights_init(&rights, CAP_FTRUNCATE), &fp); 609175140Sjhb if (error) 610175140Sjhb return (error); 611195104Srwatson AUDIT_ARG_FILE(td->td_proc, fp); 612175140Sjhb if (!(fp->f_flag & FWRITE)) { 613175140Sjhb fdrop(fp, td); 614175140Sjhb return (EINVAL); 615175140Sjhb } 616175140Sjhb error = fo_truncate(fp, length, td->td_ucred, td); 617175140Sjhb fdrop(fp, td); 618175140Sjhb return (error); 619175140Sjhb} 620175140Sjhb 62112221Sbde#ifndef _SYS_SYSPROTO_H_ 622175140Sjhbstruct ftruncate_args { 623175140Sjhb int fd; 624175140Sjhb int pad; 625175140Sjhb off_t length; 626175140Sjhb}; 627175140Sjhb#endif 628175140Sjhbint 629225617Skmacysys_ftruncate(td, uap) 630175140Sjhb struct thread *td; 631175140Sjhb struct ftruncate_args *uap; 632175140Sjhb{ 633175140Sjhb 634175140Sjhb return (kern_ftruncate(td, uap->fd, uap->length)); 635175140Sjhb} 636175140Sjhb 637175140Sjhb#if defined(COMPAT_43) 638175140Sjhb#ifndef _SYS_SYSPROTO_H_ 639175140Sjhbstruct oftruncate_args { 640175140Sjhb int fd; 641175140Sjhb long length; 642175140Sjhb}; 643175140Sjhb#endif 644175140Sjhbint 645175140Sjhboftruncate(td, uap) 646175140Sjhb struct thread *td; 647175140Sjhb struct oftruncate_args *uap; 648175140Sjhb{ 649175140Sjhb 650175140Sjhb return (kern_ftruncate(td, uap->fd, uap->length)); 651175140Sjhb} 652175140Sjhb#endif /* COMPAT_43 */ 653175140Sjhb 654175140Sjhb#ifndef _SYS_SYSPROTO_H_ 6551541Srgrimesstruct ioctl_args { 6561541Srgrimes int fd; 65738517Sdfr u_long com; 6581541Srgrimes caddr_t data; 6591541Srgrimes}; 66012221Sbde#endif 6611541Srgrimes/* ARGSUSED */ 6621549Srgrimesint 663225617Skmacysys_ioctl(struct thread *td, struct ioctl_args *uap) 6641541Srgrimes{ 665275212Shselasky u_char smalldata[SYS_IOCTL_SMALL_SIZE] __aligned(SYS_IOCTL_SMALL_ALIGN); 666360332Shselasky uint32_t com; 667162711Sru int arg, error; 668137687Sphk u_int size; 669162711Sru caddr_t data; 6701541Srgrimes 671360332Shselasky#ifdef INVARIANTS 672140406Sphk if (uap->com > 0xffffffff) { 673140406Sphk printf( 674140406Sphk "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n", 675173600Sjulian td->td_proc->p_pid, td->td_name, uap->com); 676140406Sphk } 677360332Shselasky#endif 678360332Shselasky com = (uint32_t)uap->com; 6791541Srgrimes 6801541Srgrimes /* 6811541Srgrimes * Interpret high order word to find amount of data to be 6821541Srgrimes * copied to/from the user's address space. 6831541Srgrimes */ 6841541Srgrimes size = IOCPARM_LEN(com); 685137689Sphk if ((size > IOCPARM_MAX) || 686137689Sphk ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) || 687147676Speter#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) 688147676Speter ((com & IOC_OUT) && size == 0) || 689147676Speter#else 690147676Speter ((com & (IOC_IN | IOC_OUT)) && size == 0) || 691147676Speter#endif 692162711Sru ((com & IOC_VOID) && size > 0 && size != sizeof(int))) 69389306Salfred return (ENOTTY); 69468883Sdillon 695137689Sphk if (size > 0) { 696183297Sobrien if (com & IOC_VOID) { 697162711Sru /* Integer argument. */ 698162711Sru arg = (intptr_t)uap->data; 699162711Sru data = (void *)&arg; 700162711Sru size = 0; 701275212Shselasky } else { 702275212Shselasky if (size > SYS_IOCTL_SMALL_SIZE) 703275212Shselasky data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 704275212Shselasky else 705275212Shselasky data = smalldata; 706275212Shselasky } 707162711Sru } else 708137689Sphk data = (void *)&uap->data; 709137689Sphk if (com & IOC_IN) { 710137689Sphk error = copyin(uap->data, data, (u_int)size); 711275212Shselasky if (error != 0) 712275212Shselasky goto out; 713137689Sphk } else if (com & IOC_OUT) { 7141541Srgrimes /* 7151541Srgrimes * Zero the buffer so the user always 7161541Srgrimes * gets back something deterministic. 7171541Srgrimes */ 7181541Srgrimes bzero(data, size); 71968883Sdillon } 7201541Srgrimes 721160192Sjhb error = kern_ioctl(td, uap->fd, com, data); 722160192Sjhb 723160192Sjhb if (error == 0 && (com & IOC_OUT)) 724160192Sjhb error = copyout(data, uap->data, (u_int)size); 725160192Sjhb 726275212Shselaskyout: 727275212Shselasky if (size > SYS_IOCTL_SMALL_SIZE) 728162711Sru free(data, M_IOCTLOPS); 729160192Sjhb return (error); 730160192Sjhb} 731160192Sjhb 732160192Sjhbint 733160192Sjhbkern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) 734160192Sjhb{ 735160192Sjhb struct file *fp; 736160192Sjhb struct filedesc *fdp; 737255219Spjd#ifndef CAPABILITIES 738255219Spjd cap_rights_t rights; 739255219Spjd#endif 740247602Spjd int error, tmp, locked; 741160192Sjhb 742195281Srwatson AUDIT_ARG_FD(fd); 743195281Srwatson AUDIT_ARG_CMD(com); 744247602Spjd 745160192Sjhb fdp = td->td_proc->p_fd; 746247602Spjd 747160192Sjhb switch (com) { 748160192Sjhb case FIONCLEX: 749247602Spjd case FIOCLEX: 750168355Srwatson FILEDESC_XLOCK(fdp); 751247602Spjd locked = LA_XLOCKED; 752247602Spjd break; 753247602Spjd default: 754247602Spjd#ifdef CAPABILITIES 755247602Spjd FILEDESC_SLOCK(fdp); 756247602Spjd locked = LA_SLOCKED; 757247602Spjd#else 758247602Spjd locked = LA_UNLOCKED; 759247602Spjd#endif 760247602Spjd break; 761247602Spjd } 762247602Spjd 763247602Spjd#ifdef CAPABILITIES 764247602Spjd if ((fp = fget_locked(fdp, fd)) == NULL) { 765247602Spjd error = EBADF; 766160192Sjhb goto out; 767247602Spjd } 768247602Spjd if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { 769247602Spjd fp = NULL; /* fhold() was not called yet */ 770247602Spjd goto out; 771247602Spjd } 772247602Spjd fhold(fp); 773247602Spjd if (locked == LA_SLOCKED) { 774247602Spjd FILEDESC_SUNLOCK(fdp); 775247602Spjd locked = LA_UNLOCKED; 776247602Spjd } 777247602Spjd#else 778255219Spjd error = fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp); 779255219Spjd if (error != 0) { 780247602Spjd fp = NULL; 781247602Spjd goto out; 782247602Spjd } 783247602Spjd#endif 784247602Spjd if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 785247602Spjd error = EBADF; 786247602Spjd goto out; 787247602Spjd } 788247602Spjd 789247602Spjd switch (com) { 790247602Spjd case FIONCLEX: 791247602Spjd fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; 792247602Spjd goto out; 793160192Sjhb case FIOCLEX: 794247602Spjd fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; 795160192Sjhb goto out; 796160192Sjhb case FIONBIO: 7973098Sphk if ((tmp = *(int *)data)) 798174988Sjeff atomic_set_int(&fp->f_flag, FNONBLOCK); 7991541Srgrimes else 800174988Sjeff atomic_clear_int(&fp->f_flag, FNONBLOCK); 801137773Sphk data = (void *)&tmp; 802160192Sjhb break; 803160192Sjhb case FIOASYNC: 8043098Sphk if ((tmp = *(int *)data)) 805174988Sjeff atomic_set_int(&fp->f_flag, FASYNC); 8061541Srgrimes else 807174988Sjeff atomic_clear_int(&fp->f_flag, FASYNC); 808137773Sphk data = (void *)&tmp; 809160192Sjhb break; 810137773Sphk } 8111541Srgrimes 812137773Sphk error = fo_ioctl(fp, com, data, td->td_ucred, td); 813160192Sjhbout: 814247602Spjd switch (locked) { 815247602Spjd case LA_XLOCKED: 816247602Spjd FILEDESC_XUNLOCK(fdp); 817247602Spjd break; 818247602Spjd#ifdef CAPABILITIES 819247602Spjd case LA_SLOCKED: 820247602Spjd FILEDESC_SUNLOCK(fdp); 821247602Spjd break; 822247602Spjd#endif 823247602Spjd default: 824247602Spjd FILEDESC_UNLOCK_ASSERT(fdp); 825247602Spjd break; 826247602Spjd } 827247602Spjd if (fp != NULL) 828247602Spjd fdrop(fp, td); 8291541Srgrimes return (error); 8301541Srgrimes} 8311541Srgrimes 832189450Skibint 833189450Skibpoll_no_poll(int events) 834189450Skib{ 835189450Skib /* 836189450Skib * Return true for read/write. If the user asked for something 837189450Skib * special, return POLLNVAL, so that clients have a way of 838189450Skib * determining reliably whether or not the extended 839189450Skib * functionality is present without hard-coding knowledge 840189450Skib * of specific filesystem implementations. 841189450Skib */ 842189450Skib if (events & ~POLLSTANDARD) 843189450Skib return (POLLNVAL); 844189450Skib 845189450Skib return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 846189450Skib} 847189450Skib 848198508Skibint 849225617Skmacysys_pselect(struct thread *td, struct pselect_args *uap) 850198508Skib{ 851198508Skib struct timespec ts; 852198508Skib struct timeval tv, *tvp; 853198508Skib sigset_t set, *uset; 854198508Skib int error; 855198508Skib 856198508Skib if (uap->ts != NULL) { 857198508Skib error = copyin(uap->ts, &ts, sizeof(ts)); 858198508Skib if (error != 0) 859198508Skib return (error); 860198508Skib TIMESPEC_TO_TIMEVAL(&tv, &ts); 861198508Skib tvp = &tv; 862198508Skib } else 863198508Skib tvp = NULL; 864198508Skib if (uap->sm != NULL) { 865198508Skib error = copyin(uap->sm, &set, sizeof(set)); 866198508Skib if (error != 0) 867198508Skib return (error); 868198508Skib uset = &set; 869198508Skib } else 870198508Skib uset = NULL; 871198508Skib return (kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, 872198508Skib uset, NFDBITS)); 873198508Skib} 874198508Skib 875198508Skibint 876198508Skibkern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, 877198508Skib struct timeval *tvp, sigset_t *uset, int abi_nfdbits) 878198508Skib{ 879198508Skib int error; 880198508Skib 881198508Skib if (uset != NULL) { 882198508Skib error = kern_sigprocmask(td, SIG_SETMASK, uset, 883198508Skib &td->td_oldsigmask, 0); 884198508Skib if (error != 0) 885198508Skib return (error); 886198508Skib td->td_pflags |= TDP_OLDMASK; 887198508Skib /* 888198508Skib * Make sure that ast() is called on return to 889198508Skib * usermode and TDP_OLDMASK is cleared, restoring old 890198508Skib * sigmask. 891198508Skib */ 892198508Skib thread_lock(td); 893198508Skib td->td_flags |= TDF_ASTPENDING; 894198508Skib thread_unlock(td); 895198508Skib } 896198508Skib error = kern_select(td, nd, in, ou, ex, tvp, abi_nfdbits); 897198508Skib return (error); 898198508Skib} 899198508Skib 90012221Sbde#ifndef _SYS_SYSPROTO_H_ 9011541Srgrimesstruct select_args { 90217702Ssmpatel int nd; 9031541Srgrimes fd_set *in, *ou, *ex; 9041541Srgrimes struct timeval *tv; 9051541Srgrimes}; 90612221Sbde#endif 9071549Srgrimesint 908225617Skmacysys_select(struct thread *td, struct select_args *uap) 9091541Srgrimes{ 910102779Siedowse struct timeval tv, *tvp; 911102779Siedowse int error; 912102779Siedowse 913102779Siedowse if (uap->tv != NULL) { 914102779Siedowse error = copyin(uap->tv, &tv, sizeof(tv)); 915102779Siedowse if (error) 916102779Siedowse return (error); 917102779Siedowse tvp = &tv; 918102779Siedowse } else 919102779Siedowse tvp = NULL; 920102779Siedowse 921197049Skib return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, 922197049Skib NFDBITS)); 923102779Siedowse} 924102779Siedowse 925227485Skib/* 926227485Skib * In the unlikely case when user specified n greater then the last 927227485Skib * open file descriptor, check that no bits are set after the last 928227485Skib * valid fd. We must return EBADF if any is set. 929227485Skib * 930227485Skib * There are applications that rely on the behaviour. 931227485Skib * 932227485Skib * nd is fd_lastfile + 1. 933227485Skib */ 934227485Skibstatic int 935227485Skibselect_check_badfd(fd_set *fd_in, int nd, int ndu, int abi_nfdbits) 936227485Skib{ 937227485Skib char *addr, *oaddr; 938227485Skib int b, i, res; 939227485Skib uint8_t bits; 940227485Skib 941227485Skib if (nd >= ndu || fd_in == NULL) 942227485Skib return (0); 943227485Skib 944227485Skib oaddr = NULL; 945227485Skib bits = 0; /* silence gcc */ 946227485Skib for (i = nd; i < ndu; i++) { 947227485Skib b = i / NBBY; 948227485Skib#if BYTE_ORDER == LITTLE_ENDIAN 949227485Skib addr = (char *)fd_in + b; 950227485Skib#else 951227485Skib addr = (char *)fd_in; 952227485Skib if (abi_nfdbits == NFDBITS) { 953227485Skib addr += rounddown(b, sizeof(fd_mask)) + 954227485Skib sizeof(fd_mask) - 1 - b % sizeof(fd_mask); 955227485Skib } else { 956227485Skib addr += rounddown(b, sizeof(uint32_t)) + 957227485Skib sizeof(uint32_t) - 1 - b % sizeof(uint32_t); 958227485Skib } 959227485Skib#endif 960227485Skib if (addr != oaddr) { 961227485Skib res = fubyte(addr); 962227485Skib if (res == -1) 963227485Skib return (EFAULT); 964227485Skib oaddr = addr; 965227485Skib bits = res; 966227485Skib } 967227485Skib if ((bits & (1 << (i % NBBY))) != 0) 968227485Skib return (EBADF); 969227485Skib } 970227485Skib return (0); 971227485Skib} 972227485Skib 973102779Siedowseint 974102779Siedowsekern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, 975197049Skib fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits) 976102779Siedowse{ 97789306Salfred struct filedesc *fdp; 97822945Sbde /* 97922945Sbde * The magic 2048 here is chosen to be just enough for FD_SETSIZE 98022945Sbde * infds with the new FD_SETSIZE of 1024, and more than enough for 98122945Sbde * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 98222945Sbde * of 256. 98322945Sbde */ 98422945Sbde fd_mask s_selbits[howmany(2048, NFDBITS)]; 98589969Salfred fd_mask *ibits[3], *obits[3], *selbits, *sbp; 986247801Sdavide struct timeval rtv; 987247801Sdavide sbintime_t asbt, precision, rsbt; 988197049Skib u_int nbufbytes, ncpbytes, ncpubytes, nfdbits; 989247801Sdavide int error, lf, ndu; 9901541Srgrimes 991102779Siedowse if (nd < 0) 99217713Ssmpatel return (EINVAL); 99389306Salfred fdp = td->td_proc->p_fd; 994227485Skib ndu = nd; 995227485Skib lf = fdp->fd_lastfile; 996227485Skib if (nd > lf + 1) 997227485Skib nd = lf + 1; 99817702Ssmpatel 999227485Skib error = select_check_badfd(fd_in, nd, ndu, abi_nfdbits); 1000227485Skib if (error != 0) 1001227485Skib return (error); 1002227485Skib error = select_check_badfd(fd_ou, nd, ndu, abi_nfdbits); 1003227485Skib if (error != 0) 1004227485Skib return (error); 1005227485Skib error = select_check_badfd(fd_ex, nd, ndu, abi_nfdbits); 1006227485Skib if (error != 0) 1007227485Skib return (error); 1008227485Skib 100922945Sbde /* 101022945Sbde * Allocate just enough bits for the non-null fd_sets. Use the 101122945Sbde * preallocated auto buffer if possible. 101222945Sbde */ 1013102779Siedowse nfdbits = roundup(nd, NFDBITS); 101422945Sbde ncpbytes = nfdbits / NBBY; 1015197049Skib ncpubytes = roundup(nd, abi_nfdbits) / NBBY; 101622945Sbde nbufbytes = 0; 1017102779Siedowse if (fd_in != NULL) 101822945Sbde nbufbytes += 2 * ncpbytes; 1019102779Siedowse if (fd_ou != NULL) 102022945Sbde nbufbytes += 2 * ncpbytes; 1021102779Siedowse if (fd_ex != NULL) 102222945Sbde nbufbytes += 2 * ncpbytes; 102322945Sbde if (nbufbytes <= sizeof s_selbits) 102422945Sbde selbits = &s_selbits[0]; 102522945Sbde else 1026111119Simp selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 102717702Ssmpatel 102817702Ssmpatel /* 102922945Sbde * Assign pointers into the bit buffers and fetch the input bits. 103022945Sbde * Put the output buffers together so that they can be bzeroed 103122945Sbde * together. 103217702Ssmpatel */ 103322945Sbde sbp = selbits; 10341541Srgrimes#define getbits(name, x) \ 103522945Sbde do { \ 1036205014Snwhitehorn if (name == NULL) { \ 103722945Sbde ibits[x] = NULL; \ 1038205014Snwhitehorn obits[x] = NULL; \ 1039205014Snwhitehorn } else { \ 104022945Sbde ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 104122945Sbde obits[x] = sbp; \ 104222945Sbde sbp += ncpbytes / sizeof *sbp; \ 1043197049Skib error = copyin(name, ibits[x], ncpubytes); \ 104476564Stanimura if (error != 0) \ 1045174647Sjeff goto done; \ 1046197049Skib bzero((char *)ibits[x] + ncpubytes, \ 1047197049Skib ncpbytes - ncpubytes); \ 104822945Sbde } \ 104922945Sbde } while (0) 1050102779Siedowse getbits(fd_in, 0); 1051102779Siedowse getbits(fd_ou, 1); 1052102779Siedowse getbits(fd_ex, 2); 10531541Srgrimes#undef getbits 1054205014Snwhitehorn 1055205014Snwhitehorn#if BYTE_ORDER == BIG_ENDIAN && defined(__LP64__) 1056205014Snwhitehorn /* 1057205014Snwhitehorn * XXX: swizzle_fdset assumes that if abi_nfdbits != NFDBITS, 1058205014Snwhitehorn * we are running under 32-bit emulation. This should be more 1059205014Snwhitehorn * generic. 1060205014Snwhitehorn */ 1061205014Snwhitehorn#define swizzle_fdset(bits) \ 1062205014Snwhitehorn if (abi_nfdbits != NFDBITS && bits != NULL) { \ 1063205014Snwhitehorn int i; \ 1064205014Snwhitehorn for (i = 0; i < ncpbytes / sizeof *sbp; i++) \ 1065205014Snwhitehorn bits[i] = (bits[i] >> 32) | (bits[i] << 32); \ 1066205014Snwhitehorn } 1067205014Snwhitehorn#else 1068205014Snwhitehorn#define swizzle_fdset(bits) 1069205014Snwhitehorn#endif 1070205014Snwhitehorn 1071205014Snwhitehorn /* Make sure the bit order makes it through an ABI transition */ 1072205014Snwhitehorn swizzle_fdset(ibits[0]); 1073205014Snwhitehorn swizzle_fdset(ibits[1]); 1074205014Snwhitehorn swizzle_fdset(ibits[2]); 1075205014Snwhitehorn 107622945Sbde if (nbufbytes != 0) 107722945Sbde bzero(selbits, nbufbytes / 2); 10781541Srgrimes 1079247801Sdavide precision = 0; 1080102779Siedowse if (tvp != NULL) { 1081247801Sdavide rtv = *tvp; 1082247801Sdavide if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || 1083247801Sdavide rtv.tv_usec >= 1000000) { 10841541Srgrimes error = EINVAL; 1085174647Sjeff goto done; 10861541Srgrimes } 1087248092Smav if (!timevalisset(&rtv)) 1088247898Smav asbt = 0; 1089248092Smav else if (rtv.tv_sec <= INT32_MAX) { 1090247898Smav rsbt = tvtosbt(rtv); 1091247898Smav precision = rsbt; 1092247898Smav precision >>= tc_precexp; 1093247898Smav if (TIMESEL(&asbt, rsbt)) 1094247898Smav asbt += tc_tick_sbt; 1095304894Skib if (asbt <= SBT_MAX - rsbt) 1096248092Smav asbt += rsbt; 1097248092Smav else 1098247898Smav asbt = -1; 1099247898Smav } else 1100247898Smav asbt = -1; 1101247801Sdavide } else 1102247801Sdavide asbt = -1; 1103174647Sjeff seltdinit(td); 1104174647Sjeff /* Iterate until the timeout expires or descriptors become ready. */ 1105174647Sjeff for (;;) { 1106174647Sjeff error = selscan(td, ibits, obits, nd); 1107174647Sjeff if (error || td->td_retval[0] != 0) 1108174647Sjeff break; 1109247801Sdavide error = seltdwait(td, asbt, precision); 1110174647Sjeff if (error) 1111174647Sjeff break; 1112174647Sjeff error = selrescan(td, ibits, obits); 1113174647Sjeff if (error || td->td_retval[0] != 0) 1114174647Sjeff break; 111535029Sphk } 1116174647Sjeff seltdclear(td); 111792252Salfred 11181541Srgrimesdone: 11191541Srgrimes /* select is not restarted after signals... */ 11201541Srgrimes if (error == ERESTART) 11211541Srgrimes error = EINTR; 11221541Srgrimes if (error == EWOULDBLOCK) 11231541Srgrimes error = 0; 1124205014Snwhitehorn 1125205014Snwhitehorn /* swizzle bit order back, if necessary */ 1126205014Snwhitehorn swizzle_fdset(obits[0]); 1127205014Snwhitehorn swizzle_fdset(obits[1]); 1128205014Snwhitehorn swizzle_fdset(obits[2]); 1129205014Snwhitehorn#undef swizzle_fdset 1130205014Snwhitehorn 11311541Srgrimes#define putbits(name, x) \ 1132197049Skib if (name && (error2 = copyout(obits[x], name, ncpubytes))) \ 11331541Srgrimes error = error2; 11341541Srgrimes if (error == 0) { 11351541Srgrimes int error2; 11361541Srgrimes 1137102779Siedowse putbits(fd_in, 0); 1138102779Siedowse putbits(fd_ou, 1); 1139102779Siedowse putbits(fd_ex, 2); 11401541Srgrimes#undef putbits 11411541Srgrimes } 114222945Sbde if (selbits != &s_selbits[0]) 114322945Sbde free(selbits, M_SELECT); 114482752Sdillon 11451541Srgrimes return (error); 11461541Srgrimes} 1147187677Sjeff/* 1148187677Sjeff * Convert a select bit set to poll flags. 1149187682Sjeff * 1150187677Sjeff * The backend always returns POLLHUP/POLLERR if appropriate and we 1151187677Sjeff * return this as a set bit in any set. 1152187677Sjeff */ 1153187677Sjeffstatic int select_flags[3] = { 1154187677Sjeff POLLRDNORM | POLLHUP | POLLERR, 1155187677Sjeff POLLWRNORM | POLLHUP | POLLERR, 1156208374Skib POLLRDBAND | POLLERR 1157187677Sjeff}; 11581541Srgrimes 1159174647Sjeff/* 1160187677Sjeff * Compute the fo_poll flags required for a fd given by the index and 1161187677Sjeff * bit position in the fd_mask array. 1162187677Sjeff */ 1163187677Sjeffstatic __inline int 1164187996Ssepotvinselflags(fd_mask **ibits, int idx, fd_mask bit) 1165187677Sjeff{ 1166187677Sjeff int flags; 1167187677Sjeff int msk; 1168187677Sjeff 1169187677Sjeff flags = 0; 1170187677Sjeff for (msk = 0; msk < 3; msk++) { 1171187677Sjeff if (ibits[msk] == NULL) 1172187677Sjeff continue; 1173187996Ssepotvin if ((ibits[msk][idx] & bit) == 0) 1174187677Sjeff continue; 1175187677Sjeff flags |= select_flags[msk]; 1176187677Sjeff } 1177187677Sjeff return (flags); 1178187677Sjeff} 1179187677Sjeff 1180187677Sjeff/* 1181187677Sjeff * Set the appropriate output bits given a mask of fired events and the 1182187677Sjeff * input bits originally requested. 1183187677Sjeff */ 1184187677Sjeffstatic __inline int 1185187677Sjeffselsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) 1186187677Sjeff{ 1187187677Sjeff int msk; 1188187677Sjeff int n; 1189187677Sjeff 1190187677Sjeff n = 0; 1191187677Sjeff for (msk = 0; msk < 3; msk++) { 1192187677Sjeff if ((events & select_flags[msk]) == 0) 1193187677Sjeff continue; 1194187677Sjeff if (ibits[msk] == NULL) 1195187677Sjeff continue; 1196187677Sjeff if ((ibits[msk][idx] & bit) == 0) 1197187677Sjeff continue; 1198187677Sjeff /* 1199187677Sjeff * XXX Check for a duplicate set. This can occur because a 1200187677Sjeff * socket calls selrecord() twice for each poll() call 1201187677Sjeff * resulting in two selfds per real fd. selrescan() will 1202187677Sjeff * call selsetbits twice as a result. 1203187677Sjeff */ 1204187677Sjeff if ((obits[msk][idx] & bit) != 0) 1205187677Sjeff continue; 1206187677Sjeff obits[msk][idx] |= bit; 1207187677Sjeff n++; 1208187677Sjeff } 1209187677Sjeff 1210187677Sjeff return (n); 1211187677Sjeff} 1212187677Sjeff 1213224778Srwatsonstatic __inline int 1214224778Srwatsongetselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) 1215224778Srwatson{ 1216255219Spjd cap_rights_t rights; 1217224778Srwatson 1218258324Spjd cap_rights_init(&rights, CAP_EVENT); 1219258324Spjd 1220258324Spjd return (fget_unlocked(fdp, fd, &rights, 0, fpp, NULL)); 1221224778Srwatson} 1222224778Srwatson 1223187677Sjeff/* 1224174647Sjeff * Traverse the list of fds attached to this thread's seltd and check for 1225174647Sjeff * completion. 1226174647Sjeff */ 122712819Sphkstatic int 1228174647Sjeffselrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) 1229174647Sjeff{ 1230187677Sjeff struct filedesc *fdp; 1231187677Sjeff struct selinfo *si; 1232174647Sjeff struct seltd *stp; 1233174647Sjeff struct selfd *sfp; 1234174647Sjeff struct selfd *sfn; 1235174647Sjeff struct file *fp; 1236187693Sjeff fd_mask bit; 1237187693Sjeff int fd, ev, n, idx; 1238224778Srwatson int error; 1239174647Sjeff 1240187677Sjeff fdp = td->td_proc->p_fd; 1241174647Sjeff stp = td->td_sel; 1242187677Sjeff n = 0; 1243174647Sjeff STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1244174647Sjeff fd = (int)(uintptr_t)sfp->sf_cookie; 1245174647Sjeff si = sfp->sf_si; 1246174647Sjeff selfdfree(stp, sfp); 1247174647Sjeff /* If the selinfo wasn't cleared the event didn't fire. */ 1248174647Sjeff if (si != NULL) 1249174647Sjeff continue; 1250224778Srwatson error = getselfd_cap(fdp, fd, &fp); 1251224778Srwatson if (error) 1252224778Srwatson return (error); 1253187677Sjeff idx = fd / NFDBITS; 1254187693Sjeff bit = (fd_mask)1 << (fd % NFDBITS); 1255187677Sjeff ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td); 1256192080Sjeff fdrop(fp, td); 1257187677Sjeff if (ev != 0) 1258187677Sjeff n += selsetbits(ibits, obits, idx, bit, ev); 1259174647Sjeff } 1260174647Sjeff stp->st_flags = 0; 1261174647Sjeff td->td_retval[0] = n; 1262174647Sjeff return (0); 1263174647Sjeff} 1264174647Sjeff 1265174647Sjeff/* 1266174647Sjeff * Perform the initial filedescriptor scan and register ourselves with 1267174647Sjeff * each selinfo. 1268174647Sjeff */ 1269174647Sjeffstatic int 127083366Sjulianselscan(td, ibits, obits, nfd) 127183366Sjulian struct thread *td; 127217702Ssmpatel fd_mask **ibits, **obits; 127330994Sphk int nfd; 12741541Srgrimes{ 1275187677Sjeff struct filedesc *fdp; 12761541Srgrimes struct file *fp; 1277187693Sjeff fd_mask bit; 1278187677Sjeff int ev, flags, end, fd; 1279187693Sjeff int n, idx; 1280224778Srwatson int error; 12811541Srgrimes 1282187677Sjeff fdp = td->td_proc->p_fd; 1283187677Sjeff n = 0; 1284187693Sjeff for (idx = 0, fd = 0; fd < nfd; idx++) { 1285187677Sjeff end = imin(fd + NFDBITS, nfd); 1286187677Sjeff for (bit = 1; fd < end; bit <<= 1, fd++) { 1287187677Sjeff /* Compute the list of events we're interested in. */ 1288187677Sjeff flags = selflags(ibits, idx, bit); 1289187677Sjeff if (flags == 0) 1290187677Sjeff continue; 1291224778Srwatson error = getselfd_cap(fdp, fd, &fp); 1292224778Srwatson if (error) 1293224778Srwatson return (error); 1294187677Sjeff selfdalloc(td, (void *)(uintptr_t)fd); 1295187677Sjeff ev = fo_poll(fp, flags, td->td_ucred, td); 1296192080Sjeff fdrop(fp, td); 1297187677Sjeff if (ev != 0) 1298187677Sjeff n += selsetbits(ibits, obits, idx, bit, ev); 12991541Srgrimes } 13001541Srgrimes } 1301187677Sjeff 130283366Sjulian td->td_retval[0] = n; 13031541Srgrimes return (0); 13041541Srgrimes} 13051541Srgrimes 130629351Speterint 1307275986Sdchaginsys_poll(struct thread *td, struct poll_args *uap) 130829351Speter{ 1309275986Sdchagin struct timespec ts, *tsp; 1310275986Sdchagin 1311275986Sdchagin if (uap->timeout != INFTIM) { 1312275986Sdchagin if (uap->timeout < 0) 1313275986Sdchagin return (EINVAL); 1314275986Sdchagin ts.tv_sec = uap->timeout / 1000; 1315275986Sdchagin ts.tv_nsec = (uap->timeout % 1000) * 1000000; 1316275986Sdchagin tsp = &ts; 1317275986Sdchagin } else 1318275986Sdchagin tsp = NULL; 1319275986Sdchagin 1320275986Sdchagin return (kern_poll(td, uap->fds, uap->nfds, tsp, NULL)); 1321275986Sdchagin} 1322275986Sdchagin 1323275986Sdchaginint 1324275986Sdchaginkern_poll(struct thread *td, struct pollfd *fds, u_int nfds, 1325275986Sdchagin struct timespec *tsp, sigset_t *uset) 1326275986Sdchagin{ 1327134404Sandre struct pollfd *bits; 1328134404Sandre struct pollfd smallbits[32]; 1329275986Sdchagin sbintime_t sbt, precision, tmp; 1330275986Sdchagin time_t over; 1331275986Sdchagin struct timespec ts; 1332247801Sdavide int error; 133329351Speter size_t ni; 133429351Speter 1335275986Sdchagin precision = 0; 1336275986Sdchagin if (tsp != NULL) { 1337275986Sdchagin if (tsp->tv_sec < 0) 1338275986Sdchagin return (EINVAL); 1339275986Sdchagin if (tsp->tv_nsec < 0 || tsp->tv_nsec >= 1000000000) 1340275986Sdchagin return (EINVAL); 1341275986Sdchagin if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 1342275986Sdchagin sbt = 0; 1343275986Sdchagin else { 1344275986Sdchagin ts = *tsp; 1345275986Sdchagin if (ts.tv_sec > INT32_MAX / 2) { 1346275986Sdchagin over = ts.tv_sec - INT32_MAX / 2; 1347275986Sdchagin ts.tv_sec -= over; 1348275986Sdchagin } else 1349275986Sdchagin over = 0; 1350275986Sdchagin tmp = tstosbt(ts); 1351275986Sdchagin precision = tmp; 1352275986Sdchagin precision >>= tc_precexp; 1353275986Sdchagin if (TIMESEL(&sbt, tmp)) 1354275986Sdchagin sbt += tc_tick_sbt; 1355275986Sdchagin sbt += tmp; 1356275986Sdchagin } 1357275986Sdchagin } else 1358275986Sdchagin sbt = -1; 1359275986Sdchagin 1360177368Sjeff if (nfds > maxfilesperproc && nfds > FD_SETSIZE) 1361174647Sjeff return (EINVAL); 136272146Speter ni = nfds * sizeof(struct pollfd); 136329351Speter if (ni > sizeof(smallbits)) 1364111119Simp bits = malloc(ni, M_TEMP, M_WAITOK); 136529351Speter else 136629351Speter bits = smallbits; 1367275986Sdchagin error = copyin(fds, bits, ni); 136829351Speter if (error) 1369174647Sjeff goto done; 1370275986Sdchagin 1371275986Sdchagin if (uset != NULL) { 1372275986Sdchagin error = kern_sigprocmask(td, SIG_SETMASK, uset, 1373275986Sdchagin &td->td_oldsigmask, 0); 1374275986Sdchagin if (error) 1375174647Sjeff goto done; 1376275986Sdchagin td->td_pflags |= TDP_OLDMASK; 1377275986Sdchagin /* 1378275986Sdchagin * Make sure that ast() is called on return to 1379275986Sdchagin * usermode and TDP_OLDMASK is cleared, restoring old 1380275986Sdchagin * sigmask. 1381275986Sdchagin */ 1382275986Sdchagin thread_lock(td); 1383275986Sdchagin td->td_flags |= TDF_ASTPENDING; 1384275986Sdchagin thread_unlock(td); 1385275986Sdchagin } 1386275986Sdchagin 1387174647Sjeff seltdinit(td); 1388174647Sjeff /* Iterate until the timeout expires or descriptors become ready. */ 1389174647Sjeff for (;;) { 1390174647Sjeff error = pollscan(td, bits, nfds); 1391174647Sjeff if (error || td->td_retval[0] != 0) 1392174647Sjeff break; 1393275986Sdchagin error = seltdwait(td, sbt, precision); 1394174647Sjeff if (error) 1395174647Sjeff break; 1396174647Sjeff error = pollrescan(td); 1397174647Sjeff if (error || td->td_retval[0] != 0) 1398174647Sjeff break; 139929351Speter } 1400174647Sjeff seltdclear(td); 140192252Salfred 140229351Speterdone: 140329351Speter /* poll is not restarted after signals... */ 140429351Speter if (error == ERESTART) 140529351Speter error = EINTR; 140629351Speter if (error == EWOULDBLOCK) 140729351Speter error = 0; 140829351Speter if (error == 0) { 1409275986Sdchagin error = pollout(td, bits, fds, nfds); 141029351Speter if (error) 141129351Speter goto out; 141229351Speter } 141329351Speterout: 141429351Speter if (ni > sizeof(smallbits)) 141529351Speter free(bits, M_TEMP); 141629351Speter return (error); 141729351Speter} 141829351Speter 1419275986Sdchaginint 1420275986Sdchaginsys_ppoll(struct thread *td, struct ppoll_args *uap) 1421275986Sdchagin{ 1422275986Sdchagin struct timespec ts, *tsp; 1423275986Sdchagin sigset_t set, *ssp; 1424275986Sdchagin int error; 1425275986Sdchagin 1426275986Sdchagin if (uap->ts != NULL) { 1427275986Sdchagin error = copyin(uap->ts, &ts, sizeof(ts)); 1428275986Sdchagin if (error) 1429275986Sdchagin return (error); 1430275986Sdchagin tsp = &ts; 1431275986Sdchagin } else 1432275986Sdchagin tsp = NULL; 1433275986Sdchagin if (uap->set != NULL) { 1434275986Sdchagin error = copyin(uap->set, &set, sizeof(set)); 1435275986Sdchagin if (error) 1436275986Sdchagin return (error); 1437275986Sdchagin ssp = &set; 1438275986Sdchagin } else 1439275986Sdchagin ssp = NULL; 1440275986Sdchagin /* 1441275986Sdchagin * fds is still a pointer to user space. kern_poll() will 1442275986Sdchagin * take care of copyin that array to the kernel space. 1443275986Sdchagin */ 1444275986Sdchagin 1445275986Sdchagin return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); 1446275986Sdchagin} 1447275986Sdchagin 144829351Speterstatic int 1449174647Sjeffpollrescan(struct thread *td) 1450174647Sjeff{ 1451174647Sjeff struct seltd *stp; 1452174647Sjeff struct selfd *sfp; 1453174647Sjeff struct selfd *sfn; 1454174647Sjeff struct selinfo *si; 1455174647Sjeff struct filedesc *fdp; 1456174647Sjeff struct file *fp; 1457174647Sjeff struct pollfd *fd; 1458255230Ssbruno#ifdef CAPABILITIES 1459255219Spjd cap_rights_t rights; 1460255230Ssbruno#endif 1461174647Sjeff int n; 1462174647Sjeff 1463174647Sjeff n = 0; 1464174647Sjeff fdp = td->td_proc->p_fd; 1465174647Sjeff stp = td->td_sel; 1466174647Sjeff FILEDESC_SLOCK(fdp); 1467174647Sjeff STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1468174647Sjeff fd = (struct pollfd *)sfp->sf_cookie; 1469174647Sjeff si = sfp->sf_si; 1470174647Sjeff selfdfree(stp, sfp); 1471174647Sjeff /* If the selinfo wasn't cleared the event didn't fire. */ 1472174647Sjeff if (si != NULL) 1473174647Sjeff continue; 1474247602Spjd fp = fdp->fd_ofiles[fd->fd].fde_file; 1475224910Sjonathan#ifdef CAPABILITIES 1476247602Spjd if (fp == NULL || 1477255219Spjd cap_check(cap_rights(fdp, fd->fd), 1478258324Spjd cap_rights_init(&rights, CAP_EVENT)) != 0) 1479224910Sjonathan#else 1480247602Spjd if (fp == NULL) 1481224910Sjonathan#endif 1482247602Spjd { 1483174647Sjeff fd->revents = POLLNVAL; 1484174647Sjeff n++; 1485174647Sjeff continue; 1486174647Sjeff } 1487224910Sjonathan 1488174647Sjeff /* 1489174647Sjeff * Note: backend also returns POLLHUP and 1490174647Sjeff * POLLERR if appropriate. 1491174647Sjeff */ 1492174647Sjeff fd->revents = fo_poll(fp, fd->events, td->td_ucred, td); 1493174647Sjeff if (fd->revents != 0) 1494174647Sjeff n++; 1495174647Sjeff } 1496174647Sjeff FILEDESC_SUNLOCK(fdp); 1497174647Sjeff stp->st_flags = 0; 1498174647Sjeff td->td_retval[0] = n; 1499174647Sjeff return (0); 1500174647Sjeff} 1501174647Sjeff 1502174647Sjeff 1503174647Sjeffstatic int 1504211941Skibpollout(td, fds, ufds, nfd) 1505211941Skib struct thread *td; 1506189708Srwatson struct pollfd *fds; 1507189708Srwatson struct pollfd *ufds; 1508189708Srwatson u_int nfd; 1509189708Srwatson{ 1510189708Srwatson int error = 0; 1511189708Srwatson u_int i = 0; 1512211941Skib u_int n = 0; 1513189708Srwatson 1514189708Srwatson for (i = 0; i < nfd; i++) { 1515189708Srwatson error = copyout(&fds->revents, &ufds->revents, 1516189708Srwatson sizeof(ufds->revents)); 1517189708Srwatson if (error) 1518189708Srwatson return (error); 1519211941Skib if (fds->revents != 0) 1520211941Skib n++; 1521189708Srwatson fds++; 1522189708Srwatson ufds++; 1523189708Srwatson } 1524211941Skib td->td_retval[0] = n; 1525189708Srwatson return (0); 1526189708Srwatson} 1527189708Srwatson 1528189708Srwatsonstatic int 152983366Sjulianpollscan(td, fds, nfd) 153083366Sjulian struct thread *td; 153129351Speter struct pollfd *fds; 153273159Sjlemon u_int nfd; 153329351Speter{ 1534174647Sjeff struct filedesc *fdp = td->td_proc->p_fd; 153529351Speter struct file *fp; 1536255230Ssbruno#ifdef CAPABILITIES 1537255219Spjd cap_rights_t rights; 1538255230Ssbruno#endif 1539247602Spjd int i, n = 0; 154029351Speter 1541168355Srwatson FILEDESC_SLOCK(fdp); 154229351Speter for (i = 0; i < nfd; i++, fds++) { 1543268338Smjg if (fds->fd > fdp->fd_lastfile) { 154429351Speter fds->revents = POLLNVAL; 154529351Speter n++; 154641632Sjkh } else if (fds->fd < 0) { 154741632Sjkh fds->revents = 0; 154829351Speter } else { 1549247602Spjd fp = fdp->fd_ofiles[fds->fd].fde_file; 1550224910Sjonathan#ifdef CAPABILITIES 1551247602Spjd if (fp == NULL || 1552247602Spjd cap_check(cap_rights(fdp, fds->fd), 1553258324Spjd cap_rights_init(&rights, CAP_EVENT)) != 0) 1554224910Sjonathan#else 1555247602Spjd if (fp == NULL) 1556224910Sjonathan#endif 1557247602Spjd { 155829351Speter fds->revents = POLLNVAL; 155929351Speter n++; 156029351Speter } else { 156131364Sbde /* 156231364Sbde * Note: backend also returns POLLHUP and 156331364Sbde * POLLERR if appropriate. 156431364Sbde */ 1565174647Sjeff selfdalloc(td, fds); 156651418Sgreen fds->revents = fo_poll(fp, fds->events, 1567101983Srwatson td->td_ucred, td); 1568196460Skib /* 1569196460Skib * POSIX requires POLLOUT to be never 1570196460Skib * set simultaneously with POLLHUP. 1571196460Skib */ 1572196460Skib if ((fds->revents & POLLHUP) != 0) 1573196460Skib fds->revents &= ~POLLOUT; 1574196460Skib 157529351Speter if (fds->revents != 0) 157629351Speter n++; 157729351Speter } 157829351Speter } 157929351Speter } 1580168355Srwatson FILEDESC_SUNLOCK(fdp); 158183366Sjulian td->td_retval[0] = n; 158229351Speter return (0); 158329351Speter} 158429351Speter 158529351Speter/* 158629351Speter * OpenBSD poll system call. 1587167211Srwatson * 158829351Speter * XXX this isn't quite a true representation.. OpenBSD uses select ops. 158929351Speter */ 159029351Speter#ifndef _SYS_SYSPROTO_H_ 159129351Speterstruct openbsd_poll_args { 159229351Speter struct pollfd *fds; 159329351Speter u_int nfds; 159429351Speter int timeout; 159529351Speter}; 159629351Speter#endif 159729351Speterint 1598225617Skmacysys_openbsd_poll(td, uap) 159983366Sjulian register struct thread *td; 160029351Speter register struct openbsd_poll_args *uap; 160129351Speter{ 1602225617Skmacy return (sys_poll(td, (struct poll_args *)uap)); 160329351Speter} 160429351Speter 160592252Salfred/* 1606252356Sdavide * XXX This was created specifically to support netncp and netsmb. This 1607252356Sdavide * allows the caller to specify a socket to wait for events on. It returns 1608252356Sdavide * 0 if any events matched and an error otherwise. There is no way to 1609252356Sdavide * determine which events fired. 1610252356Sdavide */ 1611252356Sdavideint 1612252356Sdavideselsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) 1613252356Sdavide{ 1614252356Sdavide struct timeval rtv; 1615252356Sdavide sbintime_t asbt, precision, rsbt; 1616252356Sdavide int error; 1617252356Sdavide 1618252367Speter precision = 0; /* stupid gcc! */ 1619252356Sdavide if (tvp != NULL) { 1620252356Sdavide rtv = *tvp; 1621252356Sdavide if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || 1622252356Sdavide rtv.tv_usec >= 1000000) 1623252356Sdavide return (EINVAL); 1624252356Sdavide if (!timevalisset(&rtv)) 1625252356Sdavide asbt = 0; 1626252356Sdavide else if (rtv.tv_sec <= INT32_MAX) { 1627252356Sdavide rsbt = tvtosbt(rtv); 1628252356Sdavide precision = rsbt; 1629252356Sdavide precision >>= tc_precexp; 1630252356Sdavide if (TIMESEL(&asbt, rsbt)) 1631252356Sdavide asbt += tc_tick_sbt; 1632304894Skib if (asbt <= SBT_MAX - rsbt) 1633252356Sdavide asbt += rsbt; 1634252356Sdavide else 1635252356Sdavide asbt = -1; 1636252356Sdavide } else 1637252356Sdavide asbt = -1; 1638252356Sdavide } else 1639252356Sdavide asbt = -1; 1640252356Sdavide seltdinit(td); 1641252356Sdavide /* 1642252356Sdavide * Iterate until the timeout expires or the socket becomes ready. 1643252356Sdavide */ 1644252356Sdavide for (;;) { 1645252356Sdavide selfdalloc(td, NULL); 1646252356Sdavide error = sopoll(so, events, NULL, td); 1647252356Sdavide /* error here is actually the ready events. */ 1648252356Sdavide if (error) 1649252356Sdavide return (0); 1650252356Sdavide error = seltdwait(td, asbt, precision); 1651252356Sdavide if (error) 1652252356Sdavide break; 1653252356Sdavide } 1654252356Sdavide seltdclear(td); 1655252356Sdavide /* XXX Duplicates ncp/smb behavior. */ 1656252356Sdavide if (error == ERESTART) 1657252356Sdavide error = 0; 1658252356Sdavide return (error); 1659252356Sdavide} 1660252356Sdavide 1661252356Sdavide/* 1662174647Sjeff * Preallocate two selfds associated with 'cookie'. Some fo_poll routines 1663174647Sjeff * have two select sets, one for read and another for write. 1664174647Sjeff */ 1665174647Sjeffstatic void 1666174647Sjeffselfdalloc(struct thread *td, void *cookie) 1667174647Sjeff{ 1668174647Sjeff struct seltd *stp; 1669174647Sjeff 1670174647Sjeff stp = td->td_sel; 1671174647Sjeff if (stp->st_free1 == NULL) 1672174647Sjeff stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1673174647Sjeff stp->st_free1->sf_td = stp; 1674174647Sjeff stp->st_free1->sf_cookie = cookie; 1675174647Sjeff if (stp->st_free2 == NULL) 1676174647Sjeff stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1677174647Sjeff stp->st_free2->sf_td = stp; 1678174647Sjeff stp->st_free2->sf_cookie = cookie; 1679174647Sjeff} 1680174647Sjeff 1681174647Sjeffstatic void 1682174647Sjeffselfdfree(struct seltd *stp, struct selfd *sfp) 1683174647Sjeff{ 1684174647Sjeff STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link); 1685174647Sjeff mtx_lock(sfp->sf_mtx); 1686174647Sjeff if (sfp->sf_si) 1687174647Sjeff TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads); 1688174647Sjeff mtx_unlock(sfp->sf_mtx); 1689174647Sjeff uma_zfree(selfd_zone, sfp); 1690174647Sjeff} 1691174647Sjeff 1692225177Sattilio/* Drain the waiters tied to all the selfd belonging the specified selinfo. */ 1693225177Sattiliovoid 1694225177Sattilioseldrain(sip) 1695225177Sattilio struct selinfo *sip; 1696225177Sattilio{ 1697225177Sattilio 1698225177Sattilio /* 1699225177Sattilio * This feature is already provided by doselwakeup(), thus it is 1700225177Sattilio * enough to go for it. 1701225177Sattilio * Eventually, the context, should take care to avoid races 1702225177Sattilio * between thread calling select()/poll() and file descriptor 1703225177Sattilio * detaching, but, again, the races are just the same as 1704225177Sattilio * selwakeup(). 1705225177Sattilio */ 1706225177Sattilio doselwakeup(sip, -1); 1707225177Sattilio} 1708225177Sattilio 1709174647Sjeff/* 17101541Srgrimes * Record a select request. 17111541Srgrimes */ 17121541Srgrimesvoid 17131541Srgrimesselrecord(selector, sip) 171483366Sjulian struct thread *selector; 17151541Srgrimes struct selinfo *sip; 17161541Srgrimes{ 1717174647Sjeff struct selfd *sfp; 1718174647Sjeff struct seltd *stp; 1719174647Sjeff struct mtx *mtxp; 17201541Srgrimes 1721174647Sjeff stp = selector->td_sel; 172292252Salfred /* 1723174647Sjeff * Don't record when doing a rescan. 172492252Salfred */ 1725174647Sjeff if (stp->st_flags & SELTD_RESCAN) 1726174647Sjeff return; 1727174647Sjeff /* 1728174647Sjeff * Grab one of the preallocated descriptors. 1729174647Sjeff */ 1730174647Sjeff sfp = NULL; 1731174647Sjeff if ((sfp = stp->st_free1) != NULL) 1732174647Sjeff stp->st_free1 = NULL; 1733174647Sjeff else if ((sfp = stp->st_free2) != NULL) 1734174647Sjeff stp->st_free2 = NULL; 1735174647Sjeff else 1736174647Sjeff panic("selrecord: No free selfd on selq"); 1737195259Sjeff mtxp = sip->si_mtx; 1738195259Sjeff if (mtxp == NULL) 1739195259Sjeff mtxp = mtx_pool_find(mtxpool_select, sip); 1740174647Sjeff /* 1741174647Sjeff * Initialize the sfp and queue it in the thread. 1742174647Sjeff */ 1743174647Sjeff sfp->sf_si = sip; 1744174647Sjeff sfp->sf_mtx = mtxp; 1745174647Sjeff STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link); 1746174647Sjeff /* 1747174647Sjeff * Now that we've locked the sip, check for initialization. 1748174647Sjeff */ 1749174647Sjeff mtx_lock(mtxp); 1750174647Sjeff if (sip->si_mtx == NULL) { 1751174647Sjeff sip->si_mtx = mtxp; 1752174647Sjeff TAILQ_INIT(&sip->si_tdlist); 175383366Sjulian } 1754174647Sjeff /* 1755174647Sjeff * Add this thread to the list of selfds listening on this selinfo. 1756174647Sjeff */ 1757174647Sjeff TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads); 1758174647Sjeff mtx_unlock(sip->si_mtx); 17591541Srgrimes} 17601541Srgrimes 1761122352Stanimura/* Wake up a selecting thread. */ 1762122352Stanimuravoid 1763122352Stanimuraselwakeup(sip) 1764122352Stanimura struct selinfo *sip; 1765122352Stanimura{ 1766122352Stanimura doselwakeup(sip, -1); 1767122352Stanimura} 1768122352Stanimura 1769122352Stanimura/* Wake up a selecting thread, and set its priority. */ 1770122352Stanimuravoid 1771122352Stanimuraselwakeuppri(sip, pri) 1772122352Stanimura struct selinfo *sip; 1773122352Stanimura int pri; 1774122352Stanimura{ 1775122352Stanimura doselwakeup(sip, pri); 1776122352Stanimura} 1777122352Stanimura 17781541Srgrimes/* 17791541Srgrimes * Do a wakeup when a selectable event occurs. 17801541Srgrimes */ 1781122352Stanimurastatic void 1782122352Stanimuradoselwakeup(sip, pri) 178392252Salfred struct selinfo *sip; 1784122352Stanimura int pri; 17851541Srgrimes{ 1786174647Sjeff struct selfd *sfp; 1787174647Sjeff struct selfd *sfn; 1788174647Sjeff struct seltd *stp; 17891541Srgrimes 1790174647Sjeff /* If it's not initialized there can't be any waiters. */ 1791174647Sjeff if (sip->si_mtx == NULL) 179292252Salfred return; 1793174647Sjeff /* 1794174647Sjeff * Locking the selinfo locks all selfds associated with it. 1795174647Sjeff */ 1796174647Sjeff mtx_lock(sip->si_mtx); 1797174647Sjeff TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) { 1798174647Sjeff /* 1799174647Sjeff * Once we remove this sfp from the list and clear the 1800174647Sjeff * sf_si seltdclear will know to ignore this si. 1801174647Sjeff */ 1802174647Sjeff TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads); 1803174647Sjeff sfp->sf_si = NULL; 1804174647Sjeff stp = sfp->sf_td; 1805174647Sjeff mtx_lock(&stp->st_mtx); 1806174647Sjeff stp->st_flags |= SELTD_PENDING; 1807174647Sjeff cv_broadcastpri(&stp->st_wait, pri); 1808174647Sjeff mtx_unlock(&stp->st_mtx); 18091541Srgrimes } 1810174647Sjeff mtx_unlock(sip->si_mtx); 18111541Srgrimes} 181276564Stanimura 1813174647Sjeffstatic void 1814174647Sjeffseltdinit(struct thread *td) 1815174647Sjeff{ 1816174647Sjeff struct seltd *stp; 181776564Stanimura 1818174647Sjeff if ((stp = td->td_sel) != NULL) 1819174647Sjeff goto out; 1820174647Sjeff td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO); 1821174647Sjeff mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF); 1822174647Sjeff cv_init(&stp->st_wait, "select"); 1823174647Sjeffout: 1824174647Sjeff stp->st_flags = 0; 1825174647Sjeff STAILQ_INIT(&stp->st_selq); 1826174647Sjeff} 1827174647Sjeff 1828174647Sjeffstatic int 1829247801Sdavideseltdwait(struct thread *td, sbintime_t sbt, sbintime_t precision) 1830174647Sjeff{ 1831174647Sjeff struct seltd *stp; 1832174647Sjeff int error; 1833174647Sjeff 1834174647Sjeff stp = td->td_sel; 1835174647Sjeff /* 1836174647Sjeff * An event of interest may occur while we do not hold the seltd 1837174647Sjeff * locked so check the pending flag before we sleep. 1838174647Sjeff */ 1839174647Sjeff mtx_lock(&stp->st_mtx); 1840174647Sjeff /* 1841174647Sjeff * Any further calls to selrecord will be a rescan. 1842174647Sjeff */ 1843174647Sjeff stp->st_flags |= SELTD_RESCAN; 1844174647Sjeff if (stp->st_flags & SELTD_PENDING) { 1845174647Sjeff mtx_unlock(&stp->st_mtx); 1846174647Sjeff return (0); 1847174647Sjeff } 1848247801Sdavide if (sbt == 0) 1849247801Sdavide error = EWOULDBLOCK; 1850247801Sdavide else if (sbt != -1) 1851247801Sdavide error = cv_timedwait_sig_sbt(&stp->st_wait, &stp->st_mtx, 1852247801Sdavide sbt, precision, C_ABSOLUTE); 1853174647Sjeff else 1854174647Sjeff error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); 1855174647Sjeff mtx_unlock(&stp->st_mtx); 1856174647Sjeff 1857174647Sjeff return (error); 1858174647Sjeff} 1859174647Sjeff 1860174647Sjeffvoid 1861174647Sjeffseltdfini(struct thread *td) 1862174647Sjeff{ 1863174647Sjeff struct seltd *stp; 1864174647Sjeff 1865174647Sjeff stp = td->td_sel; 1866174647Sjeff if (stp == NULL) 1867174647Sjeff return; 1868174647Sjeff if (stp->st_free1) 1869174647Sjeff uma_zfree(selfd_zone, stp->st_free1); 1870174647Sjeff if (stp->st_free2) 1871174647Sjeff uma_zfree(selfd_zone, stp->st_free2); 1872174647Sjeff td->td_sel = NULL; 1873174647Sjeff free(stp, M_SELECT); 1874174647Sjeff} 1875174647Sjeff 1876174647Sjeff/* 1877174647Sjeff * Remove the references to the thread from all of the objects we were 1878174647Sjeff * polling. 1879174647Sjeff */ 188076564Stanimurastatic void 1881174647Sjeffseltdclear(struct thread *td) 188276564Stanimura{ 1883174647Sjeff struct seltd *stp; 1884174647Sjeff struct selfd *sfp; 1885174647Sjeff struct selfd *sfn; 1886174647Sjeff 1887174647Sjeff stp = td->td_sel; 1888174647Sjeff STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) 1889174647Sjeff selfdfree(stp, sfp); 1890174647Sjeff stp->st_flags = 0; 189176564Stanimura} 1892174647Sjeff 1893174647Sjeffstatic void selectinit(void *); 1894174647SjeffSYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL); 1895174647Sjeffstatic void 1896174647Sjeffselectinit(void *dummy __unused) 1897174647Sjeff{ 1898195259Sjeff 1899174647Sjeff selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL, 1900174647Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 1901195259Sjeff mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF); 1902174647Sjeff} 1903