1258945Sroberto/* 2280849Scy * Copyright (C) 2004-2012 Internet Systems Consortium, Inc. ("ISC") 3258945Sroberto * Copyright (C) 1998-2003 Internet Software Consortium. 4258945Sroberto * 5258945Sroberto * Permission to use, copy, modify, and/or distribute this software for any 6258945Sroberto * purpose with or without fee is hereby granted, provided that the above 7258945Sroberto * copyright notice and this permission notice appear in all copies. 8258945Sroberto * 9258945Sroberto * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10258945Sroberto * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11258945Sroberto * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12258945Sroberto * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13258945Sroberto * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14258945Sroberto * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15258945Sroberto * PERFORMANCE OF THIS SOFTWARE. 16258945Sroberto */ 17258945Sroberto 18280849Scy/* $Id$ */ 19258945Sroberto 20258945Sroberto/*! \file */ 21258945Sroberto 22258945Sroberto#include <config.h> 23258945Sroberto 24258945Sroberto#include <sys/param.h> 25258945Sroberto#include <sys/types.h> 26258945Sroberto#include <sys/socket.h> 27258945Sroberto#include <sys/stat.h> 28258945Sroberto#include <sys/time.h> 29258945Sroberto#include <sys/uio.h> 30258945Sroberto 31258945Sroberto#include <errno.h> 32258945Sroberto#include <fcntl.h> 33258945Sroberto#include <stddef.h> 34258945Sroberto#include <stdlib.h> 35258945Sroberto#include <string.h> 36258945Sroberto#include <unistd.h> 37258945Sroberto 38258945Sroberto#include <isc/buffer.h> 39258945Sroberto#include <isc/bufferlist.h> 40258945Sroberto#include <isc/condition.h> 41258945Sroberto#include <isc/formatcheck.h> 42258945Sroberto#include <isc/list.h> 43258945Sroberto#include <isc/log.h> 44258945Sroberto#include <isc/mem.h> 45258945Sroberto#include <isc/msgs.h> 46258945Sroberto#include <isc/mutex.h> 47258945Sroberto#include <isc/net.h> 48258945Sroberto#include <isc/once.h> 49258945Sroberto#include <isc/platform.h> 50258945Sroberto#include <isc/print.h> 51258945Sroberto#include <isc/region.h> 52258945Sroberto#include <isc/socket.h> 53258945Sroberto#include <isc/stats.h> 54258945Sroberto#include <isc/strerror.h> 55258945Sroberto#include <isc/task.h> 56258945Sroberto#include <isc/thread.h> 57258945Sroberto#include <isc/util.h> 58258945Sroberto#include <isc/xml.h> 59258945Sroberto 60258945Sroberto#ifdef ISC_PLATFORM_HAVESYSUNH 61258945Sroberto#include <sys/un.h> 62258945Sroberto#endif 63258945Sroberto#ifdef ISC_PLATFORM_HAVEKQUEUE 64258945Sroberto#include <sys/event.h> 65258945Sroberto#endif 66258945Sroberto#ifdef ISC_PLATFORM_HAVEEPOLL 67258945Sroberto#include <sys/epoll.h> 68258945Sroberto#endif 69258945Sroberto#ifdef ISC_PLATFORM_HAVEDEVPOLL 70280849Scy#if defined(HAVE_SYS_DEVPOLL_H) 71258945Sroberto#include <sys/devpoll.h> 72280849Scy#elif defined(HAVE_DEVPOLL_H) 73280849Scy#include <devpoll.h> 74258945Sroberto#endif 75280849Scy#endif 76258945Sroberto 77258945Sroberto#include "errno2result.h" 78258945Sroberto 79280849Scy/* See task.c about the following definition: */ 80280849Scy#ifdef BIND9 81280849Scy#ifdef ISC_PLATFORM_USETHREADS 82280849Scy#define USE_WATCHER_THREAD 83280849Scy#else 84280849Scy#define USE_SHARED_MANAGER 85280849Scy#endif /* ISC_PLATFORM_USETHREADS */ 86280849Scy#endif /* BIND9 */ 87280849Scy 88280849Scy#ifndef USE_WATCHER_THREAD 89258945Sroberto#include "socket_p.h" 90280849Scy#include "../task_p.h" 91280849Scy#endif /* USE_WATCHER_THREAD */ 92258945Sroberto 93258945Sroberto#if defined(SO_BSDCOMPAT) && defined(__linux__) 94258945Sroberto#include <sys/utsname.h> 95258945Sroberto#endif 96258945Sroberto 97258945Sroberto/*% 98258945Sroberto * Choose the most preferable multiplex method. 99258945Sroberto */ 100258945Sroberto#ifdef ISC_PLATFORM_HAVEKQUEUE 101258945Sroberto#define USE_KQUEUE 102258945Sroberto#elif defined (ISC_PLATFORM_HAVEEPOLL) 103258945Sroberto#define USE_EPOLL 104258945Sroberto#elif defined (ISC_PLATFORM_HAVEDEVPOLL) 105258945Sroberto#define USE_DEVPOLL 106258945Srobertotypedef struct { 107258945Sroberto unsigned int want_read : 1, 108258945Sroberto want_write : 1; 109258945Sroberto} pollinfo_t; 110258945Sroberto#else 111258945Sroberto#define USE_SELECT 112258945Sroberto#endif /* ISC_PLATFORM_HAVEKQUEUE */ 113258945Sroberto 114280849Scy#ifndef USE_WATCHER_THREAD 115258945Sroberto#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) 116258945Srobertostruct isc_socketwait { 117258945Sroberto int nevents; 118258945Sroberto}; 119258945Sroberto#elif defined (USE_SELECT) 120258945Srobertostruct isc_socketwait { 121258945Sroberto fd_set *readset; 122258945Sroberto fd_set *writeset; 123258945Sroberto int nfds; 124258945Sroberto int maxfd; 125258945Sroberto}; 126258945Sroberto#endif /* USE_KQUEUE */ 127280849Scy#endif /* !USE_WATCHER_THREAD */ 128258945Sroberto 129258945Sroberto/*% 130258945Sroberto * Maximum number of allowable open sockets. This is also the maximum 131258945Sroberto * allowable socket file descriptor. 132258945Sroberto * 133258945Sroberto * Care should be taken before modifying this value for select(): 134258945Sroberto * The API standard doesn't ensure select() accept more than (the system default 135258945Sroberto * of) FD_SETSIZE descriptors, and the default size should in fact be fine in 136258945Sroberto * the vast majority of cases. This constant should therefore be increased only 137258945Sroberto * when absolutely necessary and possible, i.e., the server is exhausting all 138258945Sroberto * available file descriptors (up to FD_SETSIZE) and the select() function 139258945Sroberto * and FD_xxx macros support larger values than FD_SETSIZE (which may not 140258945Sroberto * always by true, but we keep using some of them to ensure as much 141258945Sroberto * portability as possible). Note also that overall server performance 142258945Sroberto * may be rather worsened with a larger value of this constant due to 143258945Sroberto * inherent scalability problems of select(). 144258945Sroberto * 145258945Sroberto * As a special note, this value shouldn't have to be touched if 146258945Sroberto * this is a build for an authoritative only DNS server. 147258945Sroberto */ 148258945Sroberto#ifndef ISC_SOCKET_MAXSOCKETS 149258945Sroberto#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) 150258945Sroberto#define ISC_SOCKET_MAXSOCKETS 4096 151258945Sroberto#elif defined(USE_SELECT) 152258945Sroberto#define ISC_SOCKET_MAXSOCKETS FD_SETSIZE 153258945Sroberto#endif /* USE_KQUEUE... */ 154258945Sroberto#endif /* ISC_SOCKET_MAXSOCKETS */ 155258945Sroberto 156258945Sroberto#ifdef USE_SELECT 157258945Sroberto/*% 158258945Sroberto * Mac OS X needs a special definition to support larger values in select(). 159258945Sroberto * We always define this because a larger value can be specified run-time. 160258945Sroberto */ 161258945Sroberto#ifdef __APPLE__ 162258945Sroberto#define _DARWIN_UNLIMITED_SELECT 163258945Sroberto#endif /* __APPLE__ */ 164258945Sroberto#endif /* USE_SELECT */ 165258945Sroberto 166258945Sroberto#ifdef ISC_SOCKET_USE_POLLWATCH 167258945Sroberto/*% 168258945Sroberto * If this macro is defined, enable workaround for a Solaris /dev/poll kernel 169258945Sroberto * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for 170258945Sroberto * some of the specified FD. The idea is based on the observation that it's 171258945Sroberto * likely for a busy server to keep receiving packets. It specifically works 172258945Sroberto * as follows: the socket watcher is first initialized with the state of 173258945Sroberto * "poll_idle". While it's in the idle state it keeps sleeping until a socket 174258945Sroberto * event occurs. When it wakes up for a socket I/O event, it moves to the 175258945Sroberto * poll_active state, and sets the poll timeout to a short period 176258945Sroberto * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the 177258945Sroberto * watcher goes to the poll_checking state with the same timeout period. 178258945Sroberto * In this state, the watcher tries to detect whether this is a break 179258945Sroberto * during intermittent events or the kernel bug is triggered. If the next 180258945Sroberto * polling reports an event within the short period, the previous timeout is 181258945Sroberto * likely to be a kernel bug, and so the watcher goes back to the active state. 182258945Sroberto * Otherwise, it moves to the idle state again. 183258945Sroberto * 184258945Sroberto * It's not clear whether this is a thread-related bug, but since we've only 185258945Sroberto * seen this with threads, this workaround is used only when enabling threads. 186258945Sroberto */ 187258945Sroberto 188258945Srobertotypedef enum { poll_idle, poll_active, poll_checking } pollstate_t; 189258945Sroberto 190258945Sroberto#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT 191258945Sroberto#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 192258945Sroberto#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ 193258945Sroberto#endif /* ISC_SOCKET_USE_POLLWATCH */ 194258945Sroberto 195258945Sroberto/*% 196258945Sroberto * Size of per-FD lock buckets. 197258945Sroberto */ 198258945Sroberto#ifdef ISC_PLATFORM_USETHREADS 199258945Sroberto#define FDLOCK_COUNT 1024 200258945Sroberto#define FDLOCK_ID(fd) ((fd) % FDLOCK_COUNT) 201258945Sroberto#else 202258945Sroberto#define FDLOCK_COUNT 1 203258945Sroberto#define FDLOCK_ID(fd) 0 204258945Sroberto#endif /* ISC_PLATFORM_USETHREADS */ 205258945Sroberto 206258945Sroberto/*% 207258945Sroberto * Maximum number of events communicated with the kernel. There should normally 208258945Sroberto * be no need for having a large number. 209258945Sroberto */ 210258945Sroberto#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) 211258945Sroberto#ifndef ISC_SOCKET_MAXEVENTS 212258945Sroberto#define ISC_SOCKET_MAXEVENTS 64 213258945Sroberto#endif 214258945Sroberto#endif 215258945Sroberto 216258945Sroberto/*% 217258945Sroberto * Some systems define the socket length argument as an int, some as size_t, 218258945Sroberto * some as socklen_t. This is here so it can be easily changed if needed. 219258945Sroberto */ 220258945Sroberto#ifndef ISC_SOCKADDR_LEN_T 221258945Sroberto#define ISC_SOCKADDR_LEN_T unsigned int 222258945Sroberto#endif 223258945Sroberto 224258945Sroberto/*% 225258945Sroberto * Define what the possible "soft" errors can be. These are non-fatal returns 226258945Sroberto * of various network related functions, like recv() and so on. 227258945Sroberto * 228258945Sroberto * For some reason, BSDI (and perhaps others) will sometimes return <0 229258945Sroberto * from recv() but will have errno==0. This is broken, but we have to 230258945Sroberto * work around it here. 231258945Sroberto */ 232258945Sroberto#define SOFT_ERROR(e) ((e) == EAGAIN || \ 233258945Sroberto (e) == EWOULDBLOCK || \ 234258945Sroberto (e) == EINTR || \ 235258945Sroberto (e) == 0) 236258945Sroberto 237258945Sroberto#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 238258945Sroberto 239258945Sroberto/*!< 240258945Sroberto * DLVL(90) -- Function entry/exit and other tracing. 241258945Sroberto * DLVL(70) -- Socket "correctness" -- including returning of events, etc. 242258945Sroberto * DLVL(60) -- Socket data send/receive 243258945Sroberto * DLVL(50) -- Event tracing, including receiving/sending completion events. 244258945Sroberto * DLVL(20) -- Socket creation/destruction. 245258945Sroberto */ 246258945Sroberto#define TRACE_LEVEL 90 247258945Sroberto#define CORRECTNESS_LEVEL 70 248258945Sroberto#define IOEVENT_LEVEL 60 249258945Sroberto#define EVENT_LEVEL 50 250258945Sroberto#define CREATION_LEVEL 20 251258945Sroberto 252258945Sroberto#define TRACE DLVL(TRACE_LEVEL) 253258945Sroberto#define CORRECTNESS DLVL(CORRECTNESS_LEVEL) 254258945Sroberto#define IOEVENT DLVL(IOEVENT_LEVEL) 255258945Sroberto#define EVENT DLVL(EVENT_LEVEL) 256258945Sroberto#define CREATION DLVL(CREATION_LEVEL) 257258945Sroberto 258258945Srobertotypedef isc_event_t intev_t; 259258945Sroberto 260258945Sroberto#define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 261280849Scy#define VALID_SOCKET(s) ISC_MAGIC_VALID(s, SOCKET_MAGIC) 262258945Sroberto 263258945Sroberto/*! 264258945Sroberto * IPv6 control information. If the socket is an IPv6 socket we want 265258945Sroberto * to collect the destination address and interface so the client can 266258945Sroberto * set them on outgoing packets. 267258945Sroberto */ 268258945Sroberto#ifdef ISC_PLATFORM_HAVEIN6PKTINFO 269258945Sroberto#ifndef USE_CMSG 270258945Sroberto#define USE_CMSG 1 271258945Sroberto#endif 272258945Sroberto#endif 273258945Sroberto 274258945Sroberto/*% 275258945Sroberto * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have 276258945Sroberto * a setsockopt() like interface to request timestamps, and if the OS 277258945Sroberto * doesn't do it for us, call gettimeofday() on every UDP receive? 278258945Sroberto */ 279258945Sroberto#ifdef SO_TIMESTAMP 280258945Sroberto#ifndef USE_CMSG 281258945Sroberto#define USE_CMSG 1 282258945Sroberto#endif 283258945Sroberto#endif 284258945Sroberto 285258945Sroberto/*% 286258945Sroberto * The size to raise the receive buffer to (from BIND 8). 287258945Sroberto */ 288258945Sroberto#define RCVBUFSIZE (32*1024) 289258945Sroberto 290258945Sroberto/*% 291258945Sroberto * The number of times a send operation is repeated if the result is EINTR. 292258945Sroberto */ 293258945Sroberto#define NRETRIES 10 294258945Sroberto 295280849Scytypedef struct isc__socket isc__socket_t; 296280849Scytypedef struct isc__socketmgr isc__socketmgr_t; 297280849Scy 298280849Scy#define NEWCONNSOCK(ev) ((isc__socket_t *)(ev)->newsocket) 299280849Scy 300280849Scystruct isc__socket { 301258945Sroberto /* Not locked. */ 302280849Scy isc_socket_t common; 303280849Scy isc__socketmgr_t *manager; 304258945Sroberto isc_mutex_t lock; 305258945Sroberto isc_sockettype_t type; 306258945Sroberto const isc_statscounter_t *statsindex; 307258945Sroberto 308258945Sroberto /* Locked by socket lock. */ 309280849Scy ISC_LINK(isc__socket_t) link; 310258945Sroberto unsigned int references; 311258945Sroberto int fd; 312258945Sroberto int pf; 313258945Sroberto char name[16]; 314258945Sroberto void * tag; 315258945Sroberto 316258945Sroberto ISC_LIST(isc_socketevent_t) send_list; 317258945Sroberto ISC_LIST(isc_socketevent_t) recv_list; 318258945Sroberto ISC_LIST(isc_socket_newconnev_t) accept_list; 319258945Sroberto isc_socket_connev_t *connect_ev; 320258945Sroberto 321258945Sroberto /* 322258945Sroberto * Internal events. Posted when a descriptor is readable or 323258945Sroberto * writable. These are statically allocated and never freed. 324258945Sroberto * They will be set to non-purgable before use. 325258945Sroberto */ 326258945Sroberto intev_t readable_ev; 327258945Sroberto intev_t writable_ev; 328258945Sroberto 329258945Sroberto isc_sockaddr_t peer_address; /* remote address */ 330258945Sroberto 331258945Sroberto unsigned int pending_recv : 1, 332258945Sroberto pending_send : 1, 333258945Sroberto pending_accept : 1, 334258945Sroberto listener : 1, /* listener socket */ 335258945Sroberto connected : 1, 336258945Sroberto connecting : 1, /* connect pending */ 337280849Scy bound : 1, /* bound to local addr */ 338280849Scy dupped : 1; 339258945Sroberto 340258945Sroberto#ifdef ISC_NET_RECVOVERFLOW 341258945Sroberto unsigned char overflow; /* used for MSG_TRUNC fake */ 342258945Sroberto#endif 343258945Sroberto 344258945Sroberto char *recvcmsgbuf; 345258945Sroberto ISC_SOCKADDR_LEN_T recvcmsgbuflen; 346258945Sroberto char *sendcmsgbuf; 347258945Sroberto ISC_SOCKADDR_LEN_T sendcmsgbuflen; 348258945Sroberto 349258945Sroberto void *fdwatcharg; 350258945Sroberto isc_sockfdwatch_t fdwatchcb; 351258945Sroberto int fdwatchflags; 352258945Sroberto isc_task_t *fdwatchtask; 353258945Sroberto}; 354258945Sroberto 355258945Sroberto#define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 356258945Sroberto#define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 357258945Sroberto 358280849Scystruct isc__socketmgr { 359258945Sroberto /* Not locked. */ 360280849Scy isc_socketmgr_t common; 361258945Sroberto isc_mem_t *mctx; 362258945Sroberto isc_mutex_t lock; 363258945Sroberto isc_mutex_t *fdlock; 364258945Sroberto isc_stats_t *stats; 365258945Sroberto#ifdef USE_KQUEUE 366258945Sroberto int kqueue_fd; 367258945Sroberto int nevents; 368258945Sroberto struct kevent *events; 369258945Sroberto#endif /* USE_KQUEUE */ 370258945Sroberto#ifdef USE_EPOLL 371258945Sroberto int epoll_fd; 372258945Sroberto int nevents; 373258945Sroberto struct epoll_event *events; 374258945Sroberto#endif /* USE_EPOLL */ 375258945Sroberto#ifdef USE_DEVPOLL 376258945Sroberto int devpoll_fd; 377258945Sroberto int nevents; 378258945Sroberto struct pollfd *events; 379258945Sroberto#endif /* USE_DEVPOLL */ 380258945Sroberto#ifdef USE_SELECT 381258945Sroberto int fd_bufsize; 382258945Sroberto#endif /* USE_SELECT */ 383258945Sroberto unsigned int maxsocks; 384258945Sroberto#ifdef ISC_PLATFORM_USETHREADS 385258945Sroberto int pipe_fds[2]; 386258945Sroberto#endif 387258945Sroberto 388258945Sroberto /* Locked by fdlock. */ 389280849Scy isc__socket_t **fds; 390258945Sroberto int *fdstate; 391258945Sroberto#ifdef USE_DEVPOLL 392258945Sroberto pollinfo_t *fdpollinfo; 393258945Sroberto#endif 394258945Sroberto 395258945Sroberto /* Locked by manager lock. */ 396280849Scy ISC_LIST(isc__socket_t) socklist; 397258945Sroberto#ifdef USE_SELECT 398258945Sroberto fd_set *read_fds; 399258945Sroberto fd_set *read_fds_copy; 400258945Sroberto fd_set *write_fds; 401258945Sroberto fd_set *write_fds_copy; 402258945Sroberto int maxfd; 403258945Sroberto#endif /* USE_SELECT */ 404258945Sroberto int reserved; /* unlocked */ 405280849Scy#ifdef USE_WATCHER_THREAD 406258945Sroberto isc_thread_t watcher; 407258945Sroberto isc_condition_t shutdown_ok; 408280849Scy#else /* USE_WATCHER_THREAD */ 409258945Sroberto unsigned int refs; 410280849Scy#endif /* USE_WATCHER_THREAD */ 411280849Scy int maxudp; 412258945Sroberto}; 413258945Sroberto 414280849Scy#ifdef USE_SHARED_MANAGER 415280849Scystatic isc__socketmgr_t *socketmgr = NULL; 416280849Scy#endif /* USE_SHARED_MANAGER */ 417258945Sroberto 418258945Sroberto#define CLOSED 0 /* this one must be zero */ 419258945Sroberto#define MANAGED 1 420258945Sroberto#define CLOSE_PENDING 2 421258945Sroberto 422258945Sroberto/* 423258945Sroberto * send() and recv() iovec counts 424258945Sroberto */ 425258945Sroberto#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 426258945Sroberto#ifdef ISC_NET_RECVOVERFLOW 427258945Sroberto# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1) 428258945Sroberto#else 429258945Sroberto# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 430258945Sroberto#endif 431258945Sroberto 432280849Scystatic isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, 433280849Scy isc_sockettype_t type, 434280849Scy isc_socket_t **socketp, 435280849Scy isc_socket_t *dup_socket); 436280849Scystatic void send_recvdone_event(isc__socket_t *, isc_socketevent_t **); 437280849Scystatic void send_senddone_event(isc__socket_t *, isc_socketevent_t **); 438280849Scystatic void free_socket(isc__socket_t **); 439280849Scystatic isc_result_t allocate_socket(isc__socketmgr_t *, isc_sockettype_t, 440280849Scy isc__socket_t **); 441280849Scystatic void destroy(isc__socket_t **); 442258945Srobertostatic void internal_accept(isc_task_t *, isc_event_t *); 443258945Srobertostatic void internal_connect(isc_task_t *, isc_event_t *); 444258945Srobertostatic void internal_recv(isc_task_t *, isc_event_t *); 445258945Srobertostatic void internal_send(isc_task_t *, isc_event_t *); 446258945Srobertostatic void internal_fdwatch_write(isc_task_t *, isc_event_t *); 447258945Srobertostatic void internal_fdwatch_read(isc_task_t *, isc_event_t *); 448280849Scystatic void process_cmsg(isc__socket_t *, struct msghdr *, isc_socketevent_t *); 449280849Scystatic void build_msghdr_send(isc__socket_t *, isc_socketevent_t *, 450258945Sroberto struct msghdr *, struct iovec *, size_t *); 451280849Scystatic void build_msghdr_recv(isc__socket_t *, isc_socketevent_t *, 452258945Sroberto struct msghdr *, struct iovec *, size_t *); 453280849Scy#ifdef USE_WATCHER_THREAD 454280849Scystatic isc_boolean_t process_ctlfd(isc__socketmgr_t *manager); 455258945Sroberto#endif 456258945Sroberto 457280849Scy/*% 458280849Scy * The following can be either static or public, depending on build environment. 459280849Scy */ 460280849Scy 461280849Scy#ifdef BIND9 462280849Scy#define ISC_SOCKETFUNC_SCOPE 463280849Scy#else 464280849Scy#define ISC_SOCKETFUNC_SCOPE static 465280849Scy#endif 466280849Scy 467280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 468280849Scyisc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 469280849Scy isc_socket_t **socketp); 470280849ScyISC_SOCKETFUNC_SCOPE void 471280849Scyisc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp); 472280849ScyISC_SOCKETFUNC_SCOPE void 473280849Scyisc__socket_detach(isc_socket_t **socketp); 474280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 475280849Scyisc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp); 476280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 477280849Scyisc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, 478280849Scy unsigned int maxsocks); 479280849ScyISC_SOCKETFUNC_SCOPE void 480280849Scyisc__socketmgr_destroy(isc_socketmgr_t **managerp); 481280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 482280849Scyisc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 483280849Scy unsigned int minimum, isc_task_t *task, 484280849Scy isc_taskaction_t action, const void *arg); 485280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 486280849Scyisc__socket_recv(isc_socket_t *sock, isc_region_t *region, 487280849Scy unsigned int minimum, isc_task_t *task, 488280849Scy isc_taskaction_t action, const void *arg); 489280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 490280849Scyisc__socket_recv2(isc_socket_t *sock, isc_region_t *region, 491280849Scy unsigned int minimum, isc_task_t *task, 492280849Scy isc_socketevent_t *event, unsigned int flags); 493280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 494280849Scyisc__socket_send(isc_socket_t *sock, isc_region_t *region, 495280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg); 496280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 497280849Scyisc__socket_sendto(isc_socket_t *sock, isc_region_t *region, 498280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg, 499280849Scy isc_sockaddr_t *address, struct in6_pktinfo *pktinfo); 500280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 501280849Scyisc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 502280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg); 503280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 504280849Scyisc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, 505280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg, 506280849Scy isc_sockaddr_t *address, struct in6_pktinfo *pktinfo); 507280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 508280849Scyisc__socket_sendto2(isc_socket_t *sock, isc_region_t *region, 509280849Scy isc_task_t *task, 510280849Scy isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 511280849Scy isc_socketevent_t *event, unsigned int flags); 512280849ScyISC_SOCKETFUNC_SCOPE void 513280849Scyisc__socket_cleanunix(isc_sockaddr_t *sockaddr, isc_boolean_t active); 514280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 515280849Scyisc__socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm, 516280849Scy isc_uint32_t owner, isc_uint32_t group); 517280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 518280849Scyisc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 519280849Scy unsigned int options); 520280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 521280849Scyisc__socket_filter(isc_socket_t *sock, const char *filter); 522280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 523280849Scyisc__socket_listen(isc_socket_t *sock, unsigned int backlog); 524280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 525280849Scyisc__socket_accept(isc_socket_t *sock, 526280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg); 527280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 528280849Scyisc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 529280849Scy isc_task_t *task, isc_taskaction_t action, 530280849Scy const void *arg); 531280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 532280849Scyisc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp); 533280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 534280849Scyisc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp); 535280849ScyISC_SOCKETFUNC_SCOPE void 536280849Scyisc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how); 537280849ScyISC_SOCKETFUNC_SCOPE isc_sockettype_t 538280849Scyisc__socket_gettype(isc_socket_t *sock); 539280849ScyISC_SOCKETFUNC_SCOPE isc_boolean_t 540280849Scyisc__socket_isbound(isc_socket_t *sock); 541280849ScyISC_SOCKETFUNC_SCOPE void 542280849Scyisc__socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes); 543280849Scy#if defined(HAVE_LIBXML2) && defined(BIND9) 544280849ScyISC_SOCKETFUNC_SCOPE void 545280849Scyisc__socketmgr_renderxml(isc_socketmgr_t *mgr0, xmlTextWriterPtr writer); 546280849Scy#endif 547280849Scy 548280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 549280849Scyisc__socket_fdwatchcreate(isc_socketmgr_t *manager, int fd, int flags, 550280849Scy isc_sockfdwatch_t callback, void *cbarg, 551280849Scy isc_task_t *task, isc_socket_t **socketp); 552280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 553280849Scyisc__socket_fdwatchpoke(isc_socket_t *sock, int flags); 554280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 555280849Scyisc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp); 556280849ScyISC_SOCKETFUNC_SCOPE int 557280849Scyisc__socket_getfd(isc_socket_t *sock); 558280849Scy 559280849Scystatic struct { 560280849Scy isc_socketmethods_t methods; 561280849Scy 562280849Scy /*% 563280849Scy * The following are defined just for avoiding unused static functions. 564280849Scy */ 565280849Scy#ifndef BIND9 566280849Scy void *recvv, *send, *sendv, *sendto2, *cleanunix, *permunix, *filter, 567280849Scy *listen, *accept, *getpeername, *isbound; 568280849Scy#endif 569280849Scy} socketmethods = { 570280849Scy { 571280849Scy isc__socket_attach, 572280849Scy isc__socket_detach, 573280849Scy isc__socket_bind, 574280849Scy isc__socket_sendto, 575280849Scy isc__socket_connect, 576280849Scy isc__socket_recv, 577280849Scy isc__socket_cancel, 578280849Scy isc__socket_getsockname, 579280849Scy isc__socket_gettype, 580280849Scy isc__socket_ipv6only, 581280849Scy isc__socket_fdwatchpoke, 582280849Scy isc__socket_dup, 583280849Scy isc__socket_getfd 584280849Scy } 585280849Scy#ifndef BIND9 586280849Scy , 587280849Scy (void *)isc__socket_recvv, (void *)isc__socket_send, 588280849Scy (void *)isc__socket_sendv, (void *)isc__socket_sendto2, 589280849Scy (void *)isc__socket_cleanunix, (void *)isc__socket_permunix, 590280849Scy (void *)isc__socket_filter, (void *)isc__socket_listen, 591280849Scy (void *)isc__socket_accept, (void *)isc__socket_getpeername, 592280849Scy (void *)isc__socket_isbound 593280849Scy#endif 594280849Scy}; 595280849Scy 596280849Scystatic isc_socketmgrmethods_t socketmgrmethods = { 597280849Scy isc__socketmgr_destroy, 598280849Scy isc__socket_create, 599280849Scy isc__socket_fdwatchcreate 600280849Scy}; 601280849Scy 602258945Sroberto#define SELECT_POKE_SHUTDOWN (-1) 603258945Sroberto#define SELECT_POKE_NOTHING (-2) 604258945Sroberto#define SELECT_POKE_READ (-3) 605258945Sroberto#define SELECT_POKE_ACCEPT (-3) /*%< Same as _READ */ 606258945Sroberto#define SELECT_POKE_WRITE (-4) 607258945Sroberto#define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */ 608258945Sroberto#define SELECT_POKE_CLOSE (-5) 609258945Sroberto 610258945Sroberto#define SOCK_DEAD(s) ((s)->references == 0) 611258945Sroberto 612258945Sroberto/*% 613258945Sroberto * Shortcut index arrays to get access to statistics counters. 614258945Sroberto */ 615258945Srobertoenum { 616258945Sroberto STATID_OPEN = 0, 617258945Sroberto STATID_OPENFAIL = 1, 618258945Sroberto STATID_CLOSE = 2, 619258945Sroberto STATID_BINDFAIL = 3, 620258945Sroberto STATID_CONNECTFAIL = 4, 621258945Sroberto STATID_CONNECT = 5, 622258945Sroberto STATID_ACCEPTFAIL = 6, 623258945Sroberto STATID_ACCEPT = 7, 624258945Sroberto STATID_SENDFAIL = 8, 625258945Sroberto STATID_RECVFAIL = 9 626258945Sroberto}; 627258945Srobertostatic const isc_statscounter_t upd4statsindex[] = { 628258945Sroberto isc_sockstatscounter_udp4open, 629258945Sroberto isc_sockstatscounter_udp4openfail, 630258945Sroberto isc_sockstatscounter_udp4close, 631258945Sroberto isc_sockstatscounter_udp4bindfail, 632258945Sroberto isc_sockstatscounter_udp4connectfail, 633258945Sroberto isc_sockstatscounter_udp4connect, 634258945Sroberto -1, 635258945Sroberto -1, 636258945Sroberto isc_sockstatscounter_udp4sendfail, 637258945Sroberto isc_sockstatscounter_udp4recvfail 638258945Sroberto}; 639258945Srobertostatic const isc_statscounter_t upd6statsindex[] = { 640258945Sroberto isc_sockstatscounter_udp6open, 641258945Sroberto isc_sockstatscounter_udp6openfail, 642258945Sroberto isc_sockstatscounter_udp6close, 643258945Sroberto isc_sockstatscounter_udp6bindfail, 644258945Sroberto isc_sockstatscounter_udp6connectfail, 645258945Sroberto isc_sockstatscounter_udp6connect, 646258945Sroberto -1, 647258945Sroberto -1, 648258945Sroberto isc_sockstatscounter_udp6sendfail, 649258945Sroberto isc_sockstatscounter_udp6recvfail 650258945Sroberto}; 651258945Srobertostatic const isc_statscounter_t tcp4statsindex[] = { 652258945Sroberto isc_sockstatscounter_tcp4open, 653258945Sroberto isc_sockstatscounter_tcp4openfail, 654258945Sroberto isc_sockstatscounter_tcp4close, 655258945Sroberto isc_sockstatscounter_tcp4bindfail, 656258945Sroberto isc_sockstatscounter_tcp4connectfail, 657258945Sroberto isc_sockstatscounter_tcp4connect, 658258945Sroberto isc_sockstatscounter_tcp4acceptfail, 659258945Sroberto isc_sockstatscounter_tcp4accept, 660258945Sroberto isc_sockstatscounter_tcp4sendfail, 661258945Sroberto isc_sockstatscounter_tcp4recvfail 662258945Sroberto}; 663258945Srobertostatic const isc_statscounter_t tcp6statsindex[] = { 664258945Sroberto isc_sockstatscounter_tcp6open, 665258945Sroberto isc_sockstatscounter_tcp6openfail, 666258945Sroberto isc_sockstatscounter_tcp6close, 667258945Sroberto isc_sockstatscounter_tcp6bindfail, 668258945Sroberto isc_sockstatscounter_tcp6connectfail, 669258945Sroberto isc_sockstatscounter_tcp6connect, 670258945Sroberto isc_sockstatscounter_tcp6acceptfail, 671258945Sroberto isc_sockstatscounter_tcp6accept, 672258945Sroberto isc_sockstatscounter_tcp6sendfail, 673258945Sroberto isc_sockstatscounter_tcp6recvfail 674258945Sroberto}; 675258945Srobertostatic const isc_statscounter_t unixstatsindex[] = { 676258945Sroberto isc_sockstatscounter_unixopen, 677258945Sroberto isc_sockstatscounter_unixopenfail, 678258945Sroberto isc_sockstatscounter_unixclose, 679258945Sroberto isc_sockstatscounter_unixbindfail, 680258945Sroberto isc_sockstatscounter_unixconnectfail, 681258945Sroberto isc_sockstatscounter_unixconnect, 682258945Sroberto isc_sockstatscounter_unixacceptfail, 683258945Sroberto isc_sockstatscounter_unixaccept, 684258945Sroberto isc_sockstatscounter_unixsendfail, 685258945Sroberto isc_sockstatscounter_unixrecvfail 686258945Sroberto}; 687258945Srobertostatic const isc_statscounter_t fdwatchstatsindex[] = { 688258945Sroberto -1, 689258945Sroberto -1, 690258945Sroberto isc_sockstatscounter_fdwatchclose, 691258945Sroberto isc_sockstatscounter_fdwatchbindfail, 692258945Sroberto isc_sockstatscounter_fdwatchconnectfail, 693258945Sroberto isc_sockstatscounter_fdwatchconnect, 694258945Sroberto -1, 695258945Sroberto -1, 696258945Sroberto isc_sockstatscounter_fdwatchsendfail, 697258945Sroberto isc_sockstatscounter_fdwatchrecvfail 698258945Sroberto}; 699258945Sroberto 700280849Scy#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) || \ 701280849Scy defined(USE_WATCHER_THREAD) 702258945Srobertostatic void 703280849Scymanager_log(isc__socketmgr_t *sockmgr, 704258945Sroberto isc_logcategory_t *category, isc_logmodule_t *module, int level, 705258945Sroberto const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6); 706258945Srobertostatic void 707280849Scymanager_log(isc__socketmgr_t *sockmgr, 708258945Sroberto isc_logcategory_t *category, isc_logmodule_t *module, int level, 709258945Sroberto const char *fmt, ...) 710258945Sroberto{ 711258945Sroberto char msgbuf[2048]; 712258945Sroberto va_list ap; 713258945Sroberto 714258945Sroberto if (! isc_log_wouldlog(isc_lctx, level)) 715258945Sroberto return; 716258945Sroberto 717258945Sroberto va_start(ap, fmt); 718258945Sroberto vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 719258945Sroberto va_end(ap); 720258945Sroberto 721258945Sroberto isc_log_write(isc_lctx, category, module, level, 722258945Sroberto "sockmgr %p: %s", sockmgr, msgbuf); 723258945Sroberto} 724280849Scy#endif 725258945Sroberto 726258945Srobertostatic void 727280849Scysocket_log(isc__socket_t *sock, isc_sockaddr_t *address, 728258945Sroberto isc_logcategory_t *category, isc_logmodule_t *module, int level, 729258945Sroberto isc_msgcat_t *msgcat, int msgset, int message, 730258945Sroberto const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10); 731258945Srobertostatic void 732280849Scysocket_log(isc__socket_t *sock, isc_sockaddr_t *address, 733258945Sroberto isc_logcategory_t *category, isc_logmodule_t *module, int level, 734258945Sroberto isc_msgcat_t *msgcat, int msgset, int message, 735258945Sroberto const char *fmt, ...) 736258945Sroberto{ 737258945Sroberto char msgbuf[2048]; 738258945Sroberto char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 739258945Sroberto va_list ap; 740258945Sroberto 741258945Sroberto if (! isc_log_wouldlog(isc_lctx, level)) 742258945Sroberto return; 743258945Sroberto 744258945Sroberto va_start(ap, fmt); 745258945Sroberto vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 746258945Sroberto va_end(ap); 747258945Sroberto 748258945Sroberto if (address == NULL) { 749258945Sroberto isc_log_iwrite(isc_lctx, category, module, level, 750258945Sroberto msgcat, msgset, message, 751258945Sroberto "socket %p: %s", sock, msgbuf); 752258945Sroberto } else { 753258945Sroberto isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 754258945Sroberto isc_log_iwrite(isc_lctx, category, module, level, 755258945Sroberto msgcat, msgset, message, 756258945Sroberto "socket %p %s: %s", sock, peerbuf, msgbuf); 757258945Sroberto } 758258945Sroberto} 759258945Sroberto 760258945Sroberto#if defined(_AIX) && defined(ISC_NET_BSD44MSGHDR) && \ 761258945Sroberto defined(USE_CMSG) && defined(IPV6_RECVPKTINFO) 762258945Sroberto/* 763258945Sroberto * AIX has a kernel bug where IPV6_RECVPKTINFO gets cleared by 764258945Sroberto * setting IPV6_V6ONLY. 765258945Sroberto */ 766258945Srobertostatic void 767280849ScyFIX_IPV6_RECVPKTINFO(isc__socket_t *sock) 768258945Sroberto{ 769258945Sroberto char strbuf[ISC_STRERRORSIZE]; 770258945Sroberto int on = 1; 771258945Sroberto 772258945Sroberto if (sock->pf != AF_INET6 || sock->type != isc_sockettype_udp) 773258945Sroberto return; 774258945Sroberto 775258945Sroberto if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 776258945Sroberto (void *)&on, sizeof(on)) < 0) { 777258945Sroberto 778280849Scy isc__strerror(errno, strbuf, sizeof(strbuf)); 779258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 780258945Sroberto "setsockopt(%d, IPV6_RECVPKTINFO) " 781258945Sroberto "%s: %s", sock->fd, 782258945Sroberto isc_msgcat_get(isc_msgcat, 783258945Sroberto ISC_MSGSET_GENERAL, 784258945Sroberto ISC_MSG_FAILED, 785258945Sroberto "failed"), 786258945Sroberto strbuf); 787258945Sroberto } 788258945Sroberto} 789258945Sroberto#else 790258945Sroberto#define FIX_IPV6_RECVPKTINFO(sock) (void)0 791258945Sroberto#endif 792258945Sroberto 793258945Sroberto/*% 794258945Sroberto * Increment socket-related statistics counters. 795258945Sroberto */ 796258945Srobertostatic inline void 797258945Srobertoinc_stats(isc_stats_t *stats, isc_statscounter_t counterid) { 798258945Sroberto REQUIRE(counterid != -1); 799258945Sroberto 800258945Sroberto if (stats != NULL) 801258945Sroberto isc_stats_increment(stats, counterid); 802258945Sroberto} 803258945Sroberto 804258945Srobertostatic inline isc_result_t 805280849Scywatch_fd(isc__socketmgr_t *manager, int fd, int msg) { 806258945Sroberto isc_result_t result = ISC_R_SUCCESS; 807258945Sroberto 808258945Sroberto#ifdef USE_KQUEUE 809258945Sroberto struct kevent evchange; 810258945Sroberto 811258945Sroberto memset(&evchange, 0, sizeof(evchange)); 812258945Sroberto if (msg == SELECT_POKE_READ) 813258945Sroberto evchange.filter = EVFILT_READ; 814258945Sroberto else 815258945Sroberto evchange.filter = EVFILT_WRITE; 816258945Sroberto evchange.flags = EV_ADD; 817258945Sroberto evchange.ident = fd; 818258945Sroberto if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) 819258945Sroberto result = isc__errno2result(errno); 820258945Sroberto 821258945Sroberto return (result); 822258945Sroberto#elif defined(USE_EPOLL) 823258945Sroberto struct epoll_event event; 824258945Sroberto 825258945Sroberto if (msg == SELECT_POKE_READ) 826258945Sroberto event.events = EPOLLIN; 827258945Sroberto else 828258945Sroberto event.events = EPOLLOUT; 829280849Scy memset(&event.data, 0, sizeof(event.data)); 830258945Sroberto event.data.fd = fd; 831258945Sroberto if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1 && 832258945Sroberto errno != EEXIST) { 833258945Sroberto result = isc__errno2result(errno); 834258945Sroberto } 835258945Sroberto 836258945Sroberto return (result); 837258945Sroberto#elif defined(USE_DEVPOLL) 838258945Sroberto struct pollfd pfd; 839258945Sroberto int lockid = FDLOCK_ID(fd); 840258945Sroberto 841258945Sroberto memset(&pfd, 0, sizeof(pfd)); 842258945Sroberto if (msg == SELECT_POKE_READ) 843258945Sroberto pfd.events = POLLIN; 844258945Sroberto else 845258945Sroberto pfd.events = POLLOUT; 846258945Sroberto pfd.fd = fd; 847258945Sroberto pfd.revents = 0; 848258945Sroberto LOCK(&manager->fdlock[lockid]); 849258945Sroberto if (write(manager->devpoll_fd, &pfd, sizeof(pfd)) == -1) 850258945Sroberto result = isc__errno2result(errno); 851258945Sroberto else { 852258945Sroberto if (msg == SELECT_POKE_READ) 853258945Sroberto manager->fdpollinfo[fd].want_read = 1; 854258945Sroberto else 855258945Sroberto manager->fdpollinfo[fd].want_write = 1; 856258945Sroberto } 857258945Sroberto UNLOCK(&manager->fdlock[lockid]); 858258945Sroberto 859258945Sroberto return (result); 860258945Sroberto#elif defined(USE_SELECT) 861258945Sroberto LOCK(&manager->lock); 862258945Sroberto if (msg == SELECT_POKE_READ) 863258945Sroberto FD_SET(fd, manager->read_fds); 864258945Sroberto if (msg == SELECT_POKE_WRITE) 865258945Sroberto FD_SET(fd, manager->write_fds); 866258945Sroberto UNLOCK(&manager->lock); 867258945Sroberto 868258945Sroberto return (result); 869258945Sroberto#endif 870258945Sroberto} 871258945Sroberto 872258945Srobertostatic inline isc_result_t 873280849Scyunwatch_fd(isc__socketmgr_t *manager, int fd, int msg) { 874258945Sroberto isc_result_t result = ISC_R_SUCCESS; 875258945Sroberto 876258945Sroberto#ifdef USE_KQUEUE 877258945Sroberto struct kevent evchange; 878258945Sroberto 879258945Sroberto memset(&evchange, 0, sizeof(evchange)); 880258945Sroberto if (msg == SELECT_POKE_READ) 881258945Sroberto evchange.filter = EVFILT_READ; 882258945Sroberto else 883258945Sroberto evchange.filter = EVFILT_WRITE; 884258945Sroberto evchange.flags = EV_DELETE; 885258945Sroberto evchange.ident = fd; 886258945Sroberto if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) 887258945Sroberto result = isc__errno2result(errno); 888258945Sroberto 889258945Sroberto return (result); 890258945Sroberto#elif defined(USE_EPOLL) 891258945Sroberto struct epoll_event event; 892258945Sroberto 893258945Sroberto if (msg == SELECT_POKE_READ) 894258945Sroberto event.events = EPOLLIN; 895258945Sroberto else 896258945Sroberto event.events = EPOLLOUT; 897280849Scy memset(&event.data, 0, sizeof(event.data)); 898258945Sroberto event.data.fd = fd; 899258945Sroberto if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_DEL, fd, &event) == -1 && 900258945Sroberto errno != ENOENT) { 901258945Sroberto char strbuf[ISC_STRERRORSIZE]; 902258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 903258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 904258945Sroberto "epoll_ctl(DEL), %d: %s", fd, strbuf); 905258945Sroberto result = ISC_R_UNEXPECTED; 906258945Sroberto } 907258945Sroberto return (result); 908258945Sroberto#elif defined(USE_DEVPOLL) 909258945Sroberto struct pollfd pfds[2]; 910258945Sroberto size_t writelen = sizeof(pfds[0]); 911258945Sroberto int lockid = FDLOCK_ID(fd); 912258945Sroberto 913258945Sroberto memset(pfds, 0, sizeof(pfds)); 914258945Sroberto pfds[0].events = POLLREMOVE; 915258945Sroberto pfds[0].fd = fd; 916258945Sroberto 917258945Sroberto /* 918258945Sroberto * Canceling read or write polling via /dev/poll is tricky. Since it 919258945Sroberto * only provides a way of canceling per FD, we may need to re-poll the 920258945Sroberto * socket for the other operation. 921258945Sroberto */ 922258945Sroberto LOCK(&manager->fdlock[lockid]); 923258945Sroberto if (msg == SELECT_POKE_READ && 924258945Sroberto manager->fdpollinfo[fd].want_write == 1) { 925258945Sroberto pfds[1].events = POLLOUT; 926258945Sroberto pfds[1].fd = fd; 927258945Sroberto writelen += sizeof(pfds[1]); 928258945Sroberto } 929258945Sroberto if (msg == SELECT_POKE_WRITE && 930258945Sroberto manager->fdpollinfo[fd].want_read == 1) { 931258945Sroberto pfds[1].events = POLLIN; 932258945Sroberto pfds[1].fd = fd; 933258945Sroberto writelen += sizeof(pfds[1]); 934258945Sroberto } 935258945Sroberto 936258945Sroberto if (write(manager->devpoll_fd, pfds, writelen) == -1) 937258945Sroberto result = isc__errno2result(errno); 938258945Sroberto else { 939258945Sroberto if (msg == SELECT_POKE_READ) 940258945Sroberto manager->fdpollinfo[fd].want_read = 0; 941258945Sroberto else 942258945Sroberto manager->fdpollinfo[fd].want_write = 0; 943258945Sroberto } 944258945Sroberto UNLOCK(&manager->fdlock[lockid]); 945258945Sroberto 946258945Sroberto return (result); 947258945Sroberto#elif defined(USE_SELECT) 948258945Sroberto LOCK(&manager->lock); 949258945Sroberto if (msg == SELECT_POKE_READ) 950258945Sroberto FD_CLR(fd, manager->read_fds); 951258945Sroberto else if (msg == SELECT_POKE_WRITE) 952258945Sroberto FD_CLR(fd, manager->write_fds); 953258945Sroberto UNLOCK(&manager->lock); 954258945Sroberto 955258945Sroberto return (result); 956258945Sroberto#endif 957258945Sroberto} 958258945Sroberto 959258945Srobertostatic void 960280849Scywakeup_socket(isc__socketmgr_t *manager, int fd, int msg) { 961258945Sroberto isc_result_t result; 962258945Sroberto int lockid = FDLOCK_ID(fd); 963258945Sroberto 964258945Sroberto /* 965258945Sroberto * This is a wakeup on a socket. If the socket is not in the 966258945Sroberto * process of being closed, start watching it for either reads 967258945Sroberto * or writes. 968258945Sroberto */ 969258945Sroberto 970258945Sroberto INSIST(fd >= 0 && fd < (int)manager->maxsocks); 971258945Sroberto 972258945Sroberto if (msg == SELECT_POKE_CLOSE) { 973258945Sroberto /* No one should be updating fdstate, so no need to lock it */ 974258945Sroberto INSIST(manager->fdstate[fd] == CLOSE_PENDING); 975258945Sroberto manager->fdstate[fd] = CLOSED; 976258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 977258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 978258945Sroberto (void)close(fd); 979258945Sroberto return; 980258945Sroberto } 981258945Sroberto 982258945Sroberto LOCK(&manager->fdlock[lockid]); 983258945Sroberto if (manager->fdstate[fd] == CLOSE_PENDING) { 984258945Sroberto UNLOCK(&manager->fdlock[lockid]); 985258945Sroberto 986258945Sroberto /* 987258945Sroberto * We accept (and ignore) any error from unwatch_fd() as we are 988258945Sroberto * closing the socket, hoping it doesn't leave dangling state in 989258945Sroberto * the kernel. 990258945Sroberto * Note that unwatch_fd() must be called after releasing the 991258945Sroberto * fdlock; otherwise it could cause deadlock due to a lock order 992258945Sroberto * reversal. 993258945Sroberto */ 994258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 995258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 996258945Sroberto return; 997258945Sroberto } 998258945Sroberto if (manager->fdstate[fd] != MANAGED) { 999258945Sroberto UNLOCK(&manager->fdlock[lockid]); 1000258945Sroberto return; 1001258945Sroberto } 1002258945Sroberto UNLOCK(&manager->fdlock[lockid]); 1003258945Sroberto 1004258945Sroberto /* 1005258945Sroberto * Set requested bit. 1006258945Sroberto */ 1007258945Sroberto result = watch_fd(manager, fd, msg); 1008258945Sroberto if (result != ISC_R_SUCCESS) { 1009258945Sroberto /* 1010258945Sroberto * XXXJT: what should we do? Ignoring the failure of watching 1011258945Sroberto * a socket will make the application dysfunctional, but there 1012258945Sroberto * seems to be no reasonable recovery process. 1013258945Sroberto */ 1014258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 1015258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 1016258945Sroberto "failed to start watching FD (%d): %s", 1017258945Sroberto fd, isc_result_totext(result)); 1018258945Sroberto } 1019258945Sroberto} 1020258945Sroberto 1021280849Scy#ifdef USE_WATCHER_THREAD 1022258945Sroberto/* 1023258945Sroberto * Poke the select loop when there is something for us to do. 1024258945Sroberto * The write is required (by POSIX) to complete. That is, we 1025258945Sroberto * will not get partial writes. 1026258945Sroberto */ 1027258945Srobertostatic void 1028280849Scyselect_poke(isc__socketmgr_t *mgr, int fd, int msg) { 1029258945Sroberto int cc; 1030258945Sroberto int buf[2]; 1031258945Sroberto char strbuf[ISC_STRERRORSIZE]; 1032258945Sroberto 1033258945Sroberto buf[0] = fd; 1034258945Sroberto buf[1] = msg; 1035258945Sroberto 1036258945Sroberto do { 1037258945Sroberto cc = write(mgr->pipe_fds[1], buf, sizeof(buf)); 1038258945Sroberto#ifdef ENOSR 1039258945Sroberto /* 1040258945Sroberto * Treat ENOSR as EAGAIN but loop slowly as it is 1041258945Sroberto * unlikely to clear fast. 1042258945Sroberto */ 1043258945Sroberto if (cc < 0 && errno == ENOSR) { 1044258945Sroberto sleep(1); 1045258945Sroberto errno = EAGAIN; 1046258945Sroberto } 1047258945Sroberto#endif 1048258945Sroberto } while (cc < 0 && SOFT_ERROR(errno)); 1049258945Sroberto 1050258945Sroberto if (cc < 0) { 1051258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 1052258945Sroberto FATAL_ERROR(__FILE__, __LINE__, 1053258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 1054258945Sroberto ISC_MSG_WRITEFAILED, 1055258945Sroberto "write() failed " 1056258945Sroberto "during watcher poke: %s"), 1057258945Sroberto strbuf); 1058258945Sroberto } 1059258945Sroberto 1060258945Sroberto INSIST(cc == sizeof(buf)); 1061258945Sroberto} 1062258945Sroberto 1063258945Sroberto/* 1064258945Sroberto * Read a message on the internal fd. 1065258945Sroberto */ 1066258945Srobertostatic void 1067280849Scyselect_readmsg(isc__socketmgr_t *mgr, int *fd, int *msg) { 1068258945Sroberto int buf[2]; 1069258945Sroberto int cc; 1070258945Sroberto char strbuf[ISC_STRERRORSIZE]; 1071258945Sroberto 1072258945Sroberto cc = read(mgr->pipe_fds[0], buf, sizeof(buf)); 1073258945Sroberto if (cc < 0) { 1074258945Sroberto *msg = SELECT_POKE_NOTHING; 1075258945Sroberto *fd = -1; /* Silence compiler. */ 1076258945Sroberto if (SOFT_ERROR(errno)) 1077258945Sroberto return; 1078258945Sroberto 1079258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 1080258945Sroberto FATAL_ERROR(__FILE__, __LINE__, 1081258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 1082258945Sroberto ISC_MSG_READFAILED, 1083258945Sroberto "read() failed " 1084258945Sroberto "during watcher poke: %s"), 1085258945Sroberto strbuf); 1086258945Sroberto 1087258945Sroberto return; 1088258945Sroberto } 1089258945Sroberto INSIST(cc == sizeof(buf)); 1090258945Sroberto 1091258945Sroberto *fd = buf[0]; 1092258945Sroberto *msg = buf[1]; 1093258945Sroberto} 1094280849Scy#else /* USE_WATCHER_THREAD */ 1095258945Sroberto/* 1096258945Sroberto * Update the state of the socketmgr when something changes. 1097258945Sroberto */ 1098258945Srobertostatic void 1099280849Scyselect_poke(isc__socketmgr_t *manager, int fd, int msg) { 1100258945Sroberto if (msg == SELECT_POKE_SHUTDOWN) 1101258945Sroberto return; 1102258945Sroberto else if (fd >= 0) 1103258945Sroberto wakeup_socket(manager, fd, msg); 1104258945Sroberto return; 1105258945Sroberto} 1106280849Scy#endif /* USE_WATCHER_THREAD */ 1107258945Sroberto 1108258945Sroberto/* 1109258945Sroberto * Make a fd non-blocking. 1110258945Sroberto */ 1111258945Srobertostatic isc_result_t 1112258945Srobertomake_nonblock(int fd) { 1113258945Sroberto int ret; 1114258945Sroberto int flags; 1115258945Sroberto char strbuf[ISC_STRERRORSIZE]; 1116258945Sroberto#ifdef USE_FIONBIO_IOCTL 1117258945Sroberto int on = 1; 1118258945Sroberto 1119258945Sroberto ret = ioctl(fd, FIONBIO, (char *)&on); 1120258945Sroberto#else 1121258945Sroberto flags = fcntl(fd, F_GETFL, 0); 1122258945Sroberto flags |= PORT_NONBLOCK; 1123258945Sroberto ret = fcntl(fd, F_SETFL, flags); 1124258945Sroberto#endif 1125258945Sroberto 1126258945Sroberto if (ret == -1) { 1127258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 1128258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 1129258945Sroberto#ifdef USE_FIONBIO_IOCTL 1130258945Sroberto "ioctl(%d, FIONBIO, &on): %s", fd, 1131258945Sroberto#else 1132258945Sroberto "fcntl(%d, F_SETFL, %d): %s", fd, flags, 1133258945Sroberto#endif 1134258945Sroberto strbuf); 1135258945Sroberto 1136258945Sroberto return (ISC_R_UNEXPECTED); 1137258945Sroberto } 1138258945Sroberto 1139258945Sroberto return (ISC_R_SUCCESS); 1140258945Sroberto} 1141258945Sroberto 1142258945Sroberto#ifdef USE_CMSG 1143258945Sroberto/* 1144258945Sroberto * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. 1145258945Sroberto * In order to ensure as much portability as possible, we provide wrapper 1146258945Sroberto * functions of these macros. 1147258945Sroberto * Note that cmsg_space() could run slow on OSes that do not have 1148258945Sroberto * CMSG_SPACE. 1149258945Sroberto */ 1150258945Srobertostatic inline ISC_SOCKADDR_LEN_T 1151258945Srobertocmsg_len(ISC_SOCKADDR_LEN_T len) { 1152258945Sroberto#ifdef CMSG_LEN 1153258945Sroberto return (CMSG_LEN(len)); 1154258945Sroberto#else 1155258945Sroberto ISC_SOCKADDR_LEN_T hdrlen; 1156258945Sroberto 1157258945Sroberto /* 1158258945Sroberto * Cast NULL so that any pointer arithmetic performed by CMSG_DATA 1159258945Sroberto * is correct. 1160258945Sroberto */ 1161258945Sroberto hdrlen = (ISC_SOCKADDR_LEN_T)CMSG_DATA(((struct cmsghdr *)NULL)); 1162258945Sroberto return (hdrlen + len); 1163258945Sroberto#endif 1164258945Sroberto} 1165258945Sroberto 1166258945Srobertostatic inline ISC_SOCKADDR_LEN_T 1167258945Srobertocmsg_space(ISC_SOCKADDR_LEN_T len) { 1168258945Sroberto#ifdef CMSG_SPACE 1169258945Sroberto return (CMSG_SPACE(len)); 1170258945Sroberto#else 1171258945Sroberto struct msghdr msg; 1172258945Sroberto struct cmsghdr *cmsgp; 1173258945Sroberto /* 1174258945Sroberto * XXX: The buffer length is an ad-hoc value, but should be enough 1175258945Sroberto * in a practical sense. 1176258945Sroberto */ 1177258945Sroberto char dummybuf[sizeof(struct cmsghdr) + 1024]; 1178258945Sroberto 1179258945Sroberto memset(&msg, 0, sizeof(msg)); 1180258945Sroberto msg.msg_control = dummybuf; 1181258945Sroberto msg.msg_controllen = sizeof(dummybuf); 1182258945Sroberto 1183258945Sroberto cmsgp = (struct cmsghdr *)dummybuf; 1184258945Sroberto cmsgp->cmsg_len = cmsg_len(len); 1185258945Sroberto 1186258945Sroberto cmsgp = CMSG_NXTHDR(&msg, cmsgp); 1187258945Sroberto if (cmsgp != NULL) 1188258945Sroberto return ((char *)cmsgp - (char *)msg.msg_control); 1189258945Sroberto else 1190258945Sroberto return (0); 1191258945Sroberto#endif 1192258945Sroberto} 1193258945Sroberto#endif /* USE_CMSG */ 1194258945Sroberto 1195258945Sroberto/* 1196258945Sroberto * Process control messages received on a socket. 1197258945Sroberto */ 1198258945Srobertostatic void 1199280849Scyprocess_cmsg(isc__socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { 1200258945Sroberto#ifdef USE_CMSG 1201258945Sroberto struct cmsghdr *cmsgp; 1202258945Sroberto#ifdef ISC_PLATFORM_HAVEIN6PKTINFO 1203258945Sroberto struct in6_pktinfo *pktinfop; 1204258945Sroberto#endif 1205258945Sroberto#ifdef SO_TIMESTAMP 1206258945Sroberto struct timeval *timevalp; 1207258945Sroberto#endif 1208258945Sroberto#endif 1209258945Sroberto 1210258945Sroberto /* 1211258945Sroberto * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. 1212258945Sroberto * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. 1213258945Sroberto * They are all here, outside of the CPP tests, because it is 1214258945Sroberto * more consistent with the usual ISC coding style. 1215258945Sroberto */ 1216258945Sroberto UNUSED(sock); 1217258945Sroberto UNUSED(msg); 1218258945Sroberto UNUSED(dev); 1219258945Sroberto 1220258945Sroberto#ifdef ISC_NET_BSD44MSGHDR 1221258945Sroberto 1222258945Sroberto#ifdef MSG_TRUNC 1223258945Sroberto if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC) 1224258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; 1225258945Sroberto#endif 1226258945Sroberto 1227258945Sroberto#ifdef MSG_CTRUNC 1228258945Sroberto if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC) 1229258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; 1230258945Sroberto#endif 1231258945Sroberto 1232258945Sroberto#ifndef USE_CMSG 1233258945Sroberto return; 1234258945Sroberto#else 1235258945Sroberto if (msg->msg_controllen == 0U || msg->msg_control == NULL) 1236258945Sroberto return; 1237258945Sroberto 1238258945Sroberto#ifdef SO_TIMESTAMP 1239258945Sroberto timevalp = NULL; 1240258945Sroberto#endif 1241258945Sroberto#ifdef ISC_PLATFORM_HAVEIN6PKTINFO 1242258945Sroberto pktinfop = NULL; 1243258945Sroberto#endif 1244258945Sroberto 1245258945Sroberto cmsgp = CMSG_FIRSTHDR(msg); 1246258945Sroberto while (cmsgp != NULL) { 1247258945Sroberto socket_log(sock, NULL, TRACE, 1248258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PROCESSCMSG, 1249258945Sroberto "processing cmsg %p", cmsgp); 1250258945Sroberto 1251258945Sroberto#ifdef ISC_PLATFORM_HAVEIN6PKTINFO 1252258945Sroberto if (cmsgp->cmsg_level == IPPROTO_IPV6 1253258945Sroberto && cmsgp->cmsg_type == IPV6_PKTINFO) { 1254258945Sroberto 1255258945Sroberto pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 1256258945Sroberto memcpy(&dev->pktinfo, pktinfop, 1257258945Sroberto sizeof(struct in6_pktinfo)); 1258258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 1259258945Sroberto socket_log(sock, NULL, TRACE, 1260258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 1261258945Sroberto ISC_MSG_IFRECEIVED, 1262258945Sroberto "interface received on ifindex %u", 1263258945Sroberto dev->pktinfo.ipi6_ifindex); 1264258945Sroberto if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) 1265258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; 1266258945Sroberto goto next; 1267258945Sroberto } 1268258945Sroberto#endif 1269258945Sroberto 1270258945Sroberto#ifdef SO_TIMESTAMP 1271258945Sroberto if (cmsgp->cmsg_level == SOL_SOCKET 1272258945Sroberto && cmsgp->cmsg_type == SCM_TIMESTAMP) { 1273258945Sroberto timevalp = (struct timeval *)CMSG_DATA(cmsgp); 1274258945Sroberto dev->timestamp.seconds = timevalp->tv_sec; 1275258945Sroberto dev->timestamp.nanoseconds = timevalp->tv_usec * 1000; 1276258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; 1277258945Sroberto goto next; 1278258945Sroberto } 1279258945Sroberto#endif 1280258945Sroberto 1281258945Sroberto next: 1282258945Sroberto cmsgp = CMSG_NXTHDR(msg, cmsgp); 1283258945Sroberto } 1284258945Sroberto#endif /* USE_CMSG */ 1285258945Sroberto 1286258945Sroberto#endif /* ISC_NET_BSD44MSGHDR */ 1287258945Sroberto} 1288258945Sroberto 1289258945Sroberto/* 1290258945Sroberto * Construct an iov array and attach it to the msghdr passed in. This is 1291258945Sroberto * the SEND constructor, which will use the used region of the buffer 1292258945Sroberto * (if using a buffer list) or will use the internal region (if a single 1293258945Sroberto * buffer I/O is requested). 1294258945Sroberto * 1295258945Sroberto * Nothing can be NULL, and the done event must list at least one buffer 1296258945Sroberto * on the buffer linked list for this function to be meaningful. 1297258945Sroberto * 1298258945Sroberto * If write_countp != NULL, *write_countp will hold the number of bytes 1299258945Sroberto * this transaction can send. 1300258945Sroberto */ 1301258945Srobertostatic void 1302280849Scybuild_msghdr_send(isc__socket_t *sock, isc_socketevent_t *dev, 1303258945Sroberto struct msghdr *msg, struct iovec *iov, size_t *write_countp) 1304258945Sroberto{ 1305258945Sroberto unsigned int iovcount; 1306258945Sroberto isc_buffer_t *buffer; 1307258945Sroberto isc_region_t used; 1308258945Sroberto size_t write_count; 1309258945Sroberto size_t skip_count; 1310258945Sroberto 1311258945Sroberto memset(msg, 0, sizeof(*msg)); 1312258945Sroberto 1313258945Sroberto if (!sock->connected) { 1314258945Sroberto msg->msg_name = (void *)&dev->address.type.sa; 1315258945Sroberto msg->msg_namelen = dev->address.length; 1316258945Sroberto } else { 1317258945Sroberto msg->msg_name = NULL; 1318258945Sroberto msg->msg_namelen = 0; 1319258945Sroberto } 1320258945Sroberto 1321258945Sroberto buffer = ISC_LIST_HEAD(dev->bufferlist); 1322258945Sroberto write_count = 0; 1323258945Sroberto iovcount = 0; 1324258945Sroberto 1325258945Sroberto /* 1326258945Sroberto * Single buffer I/O? Skip what we've done so far in this region. 1327258945Sroberto */ 1328258945Sroberto if (buffer == NULL) { 1329258945Sroberto write_count = dev->region.length - dev->n; 1330258945Sroberto iov[0].iov_base = (void *)(dev->region.base + dev->n); 1331258945Sroberto iov[0].iov_len = write_count; 1332258945Sroberto iovcount = 1; 1333258945Sroberto 1334258945Sroberto goto config; 1335258945Sroberto } 1336258945Sroberto 1337258945Sroberto /* 1338258945Sroberto * Multibuffer I/O. 1339258945Sroberto * Skip the data in the buffer list that we have already written. 1340258945Sroberto */ 1341258945Sroberto skip_count = dev->n; 1342258945Sroberto while (buffer != NULL) { 1343258945Sroberto REQUIRE(ISC_BUFFER_VALID(buffer)); 1344258945Sroberto if (skip_count < isc_buffer_usedlength(buffer)) 1345258945Sroberto break; 1346258945Sroberto skip_count -= isc_buffer_usedlength(buffer); 1347258945Sroberto buffer = ISC_LIST_NEXT(buffer, link); 1348258945Sroberto } 1349258945Sroberto 1350258945Sroberto while (buffer != NULL) { 1351258945Sroberto INSIST(iovcount < MAXSCATTERGATHER_SEND); 1352258945Sroberto 1353258945Sroberto isc_buffer_usedregion(buffer, &used); 1354258945Sroberto 1355258945Sroberto if (used.length > 0) { 1356258945Sroberto iov[iovcount].iov_base = (void *)(used.base 1357258945Sroberto + skip_count); 1358258945Sroberto iov[iovcount].iov_len = used.length - skip_count; 1359258945Sroberto write_count += (used.length - skip_count); 1360258945Sroberto skip_count = 0; 1361258945Sroberto iovcount++; 1362258945Sroberto } 1363258945Sroberto buffer = ISC_LIST_NEXT(buffer, link); 1364258945Sroberto } 1365258945Sroberto 1366258945Sroberto INSIST(skip_count == 0U); 1367258945Sroberto 1368258945Sroberto config: 1369258945Sroberto msg->msg_iov = iov; 1370258945Sroberto msg->msg_iovlen = iovcount; 1371258945Sroberto 1372258945Sroberto#ifdef ISC_NET_BSD44MSGHDR 1373258945Sroberto msg->msg_control = NULL; 1374258945Sroberto msg->msg_controllen = 0; 1375258945Sroberto msg->msg_flags = 0; 1376258945Sroberto#if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO) 1377258945Sroberto if ((sock->type == isc_sockettype_udp) 1378258945Sroberto && ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) { 1379280849Scy#if defined(IPV6_USE_MIN_MTU) 1380280849Scy int use_min_mtu = 1; /* -1, 0, 1 */ 1381280849Scy#endif 1382258945Sroberto struct cmsghdr *cmsgp; 1383258945Sroberto struct in6_pktinfo *pktinfop; 1384258945Sroberto 1385258945Sroberto socket_log(sock, NULL, TRACE, 1386258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_SENDTODATA, 1387258945Sroberto "sendto pktinfo data, ifindex %u", 1388258945Sroberto dev->pktinfo.ipi6_ifindex); 1389258945Sroberto 1390258945Sroberto msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo)); 1391258945Sroberto INSIST(msg->msg_controllen <= sock->sendcmsgbuflen); 1392258945Sroberto msg->msg_control = (void *)sock->sendcmsgbuf; 1393258945Sroberto 1394258945Sroberto cmsgp = (struct cmsghdr *)sock->sendcmsgbuf; 1395258945Sroberto cmsgp->cmsg_level = IPPROTO_IPV6; 1396258945Sroberto cmsgp->cmsg_type = IPV6_PKTINFO; 1397258945Sroberto cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo)); 1398258945Sroberto pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); 1399258945Sroberto memcpy(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo)); 1400280849Scy#if defined(IPV6_USE_MIN_MTU) 1401280849Scy /* 1402280849Scy * Set IPV6_USE_MIN_MTU as a per packet option as FreeBSD 1403280849Scy * ignores setsockopt(IPV6_USE_MIN_MTU) when IPV6_PKTINFO 1404280849Scy * is used. 1405280849Scy */ 1406280849Scy cmsgp = (struct cmsghdr *)(sock->sendcmsgbuf + 1407280849Scy msg->msg_controllen); 1408280849Scy msg->msg_controllen += cmsg_space(sizeof(use_min_mtu)); 1409280849Scy INSIST(msg->msg_controllen <= sock->sendcmsgbuflen); 1410280849Scy 1411280849Scy cmsgp->cmsg_level = IPPROTO_IPV6; 1412280849Scy cmsgp->cmsg_type = IPV6_USE_MIN_MTU; 1413280849Scy cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu)); 1414280849Scy memcpy(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu)); 1415280849Scy#endif 1416258945Sroberto } 1417258945Sroberto#endif /* USE_CMSG && ISC_PLATFORM_HAVEIPV6 */ 1418258945Sroberto#else /* ISC_NET_BSD44MSGHDR */ 1419258945Sroberto msg->msg_accrights = NULL; 1420258945Sroberto msg->msg_accrightslen = 0; 1421258945Sroberto#endif /* ISC_NET_BSD44MSGHDR */ 1422258945Sroberto 1423258945Sroberto if (write_countp != NULL) 1424258945Sroberto *write_countp = write_count; 1425258945Sroberto} 1426258945Sroberto 1427258945Sroberto/* 1428258945Sroberto * Construct an iov array and attach it to the msghdr passed in. This is 1429258945Sroberto * the RECV constructor, which will use the available region of the buffer 1430258945Sroberto * (if using a buffer list) or will use the internal region (if a single 1431258945Sroberto * buffer I/O is requested). 1432258945Sroberto * 1433258945Sroberto * Nothing can be NULL, and the done event must list at least one buffer 1434258945Sroberto * on the buffer linked list for this function to be meaningful. 1435258945Sroberto * 1436258945Sroberto * If read_countp != NULL, *read_countp will hold the number of bytes 1437258945Sroberto * this transaction can receive. 1438258945Sroberto */ 1439258945Srobertostatic void 1440280849Scybuild_msghdr_recv(isc__socket_t *sock, isc_socketevent_t *dev, 1441258945Sroberto struct msghdr *msg, struct iovec *iov, size_t *read_countp) 1442258945Sroberto{ 1443258945Sroberto unsigned int iovcount; 1444258945Sroberto isc_buffer_t *buffer; 1445258945Sroberto isc_region_t available; 1446258945Sroberto size_t read_count; 1447258945Sroberto 1448258945Sroberto memset(msg, 0, sizeof(struct msghdr)); 1449258945Sroberto 1450258945Sroberto if (sock->type == isc_sockettype_udp) { 1451258945Sroberto memset(&dev->address, 0, sizeof(dev->address)); 1452258945Sroberto#ifdef BROKEN_RECVMSG 1453258945Sroberto if (sock->pf == AF_INET) { 1454258945Sroberto msg->msg_name = (void *)&dev->address.type.sin; 1455258945Sroberto msg->msg_namelen = sizeof(dev->address.type.sin6); 1456258945Sroberto } else if (sock->pf == AF_INET6) { 1457258945Sroberto msg->msg_name = (void *)&dev->address.type.sin6; 1458258945Sroberto msg->msg_namelen = sizeof(dev->address.type.sin6); 1459258945Sroberto#ifdef ISC_PLATFORM_HAVESYSUNH 1460258945Sroberto } else if (sock->pf == AF_UNIX) { 1461258945Sroberto msg->msg_name = (void *)&dev->address.type.sunix; 1462258945Sroberto msg->msg_namelen = sizeof(dev->address.type.sunix); 1463258945Sroberto#endif 1464258945Sroberto } else { 1465258945Sroberto msg->msg_name = (void *)&dev->address.type.sa; 1466258945Sroberto msg->msg_namelen = sizeof(dev->address.type); 1467258945Sroberto } 1468258945Sroberto#else 1469258945Sroberto msg->msg_name = (void *)&dev->address.type.sa; 1470258945Sroberto msg->msg_namelen = sizeof(dev->address.type); 1471258945Sroberto#endif 1472258945Sroberto#ifdef ISC_NET_RECVOVERFLOW 1473258945Sroberto /* If needed, steal one iovec for overflow detection. */ 1474258945Sroberto maxiov--; 1475258945Sroberto#endif 1476258945Sroberto } else { /* TCP */ 1477258945Sroberto msg->msg_name = NULL; 1478258945Sroberto msg->msg_namelen = 0; 1479258945Sroberto dev->address = sock->peer_address; 1480258945Sroberto } 1481258945Sroberto 1482258945Sroberto buffer = ISC_LIST_HEAD(dev->bufferlist); 1483258945Sroberto read_count = 0; 1484258945Sroberto 1485258945Sroberto /* 1486258945Sroberto * Single buffer I/O? Skip what we've done so far in this region. 1487258945Sroberto */ 1488258945Sroberto if (buffer == NULL) { 1489258945Sroberto read_count = dev->region.length - dev->n; 1490258945Sroberto iov[0].iov_base = (void *)(dev->region.base + dev->n); 1491258945Sroberto iov[0].iov_len = read_count; 1492258945Sroberto iovcount = 1; 1493258945Sroberto 1494258945Sroberto goto config; 1495258945Sroberto } 1496258945Sroberto 1497258945Sroberto /* 1498258945Sroberto * Multibuffer I/O. 1499258945Sroberto * Skip empty buffers. 1500258945Sroberto */ 1501258945Sroberto while (buffer != NULL) { 1502258945Sroberto REQUIRE(ISC_BUFFER_VALID(buffer)); 1503258945Sroberto if (isc_buffer_availablelength(buffer) != 0) 1504258945Sroberto break; 1505258945Sroberto buffer = ISC_LIST_NEXT(buffer, link); 1506258945Sroberto } 1507258945Sroberto 1508258945Sroberto iovcount = 0; 1509258945Sroberto while (buffer != NULL) { 1510258945Sroberto INSIST(iovcount < MAXSCATTERGATHER_RECV); 1511258945Sroberto 1512258945Sroberto isc_buffer_availableregion(buffer, &available); 1513258945Sroberto 1514258945Sroberto if (available.length > 0) { 1515258945Sroberto iov[iovcount].iov_base = (void *)(available.base); 1516258945Sroberto iov[iovcount].iov_len = available.length; 1517258945Sroberto read_count += available.length; 1518258945Sroberto iovcount++; 1519258945Sroberto } 1520258945Sroberto buffer = ISC_LIST_NEXT(buffer, link); 1521258945Sroberto } 1522258945Sroberto 1523258945Sroberto config: 1524258945Sroberto 1525258945Sroberto /* 1526258945Sroberto * If needed, set up to receive that one extra byte. Note that 1527258945Sroberto * we know there is at least one iov left, since we stole it 1528258945Sroberto * at the top of this function. 1529258945Sroberto */ 1530258945Sroberto#ifdef ISC_NET_RECVOVERFLOW 1531258945Sroberto if (sock->type == isc_sockettype_udp) { 1532258945Sroberto iov[iovcount].iov_base = (void *)(&sock->overflow); 1533258945Sroberto iov[iovcount].iov_len = 1; 1534258945Sroberto iovcount++; 1535258945Sroberto } 1536258945Sroberto#endif 1537258945Sroberto 1538258945Sroberto msg->msg_iov = iov; 1539258945Sroberto msg->msg_iovlen = iovcount; 1540258945Sroberto 1541258945Sroberto#ifdef ISC_NET_BSD44MSGHDR 1542258945Sroberto msg->msg_control = NULL; 1543258945Sroberto msg->msg_controllen = 0; 1544258945Sroberto msg->msg_flags = 0; 1545258945Sroberto#if defined(USE_CMSG) 1546258945Sroberto if (sock->type == isc_sockettype_udp) { 1547258945Sroberto msg->msg_control = sock->recvcmsgbuf; 1548258945Sroberto msg->msg_controllen = sock->recvcmsgbuflen; 1549258945Sroberto } 1550258945Sroberto#endif /* USE_CMSG */ 1551258945Sroberto#else /* ISC_NET_BSD44MSGHDR */ 1552258945Sroberto msg->msg_accrights = NULL; 1553258945Sroberto msg->msg_accrightslen = 0; 1554258945Sroberto#endif /* ISC_NET_BSD44MSGHDR */ 1555258945Sroberto 1556258945Sroberto if (read_countp != NULL) 1557258945Sroberto *read_countp = read_count; 1558258945Sroberto} 1559258945Sroberto 1560258945Srobertostatic void 1561280849Scyset_dev_address(isc_sockaddr_t *address, isc__socket_t *sock, 1562258945Sroberto isc_socketevent_t *dev) 1563258945Sroberto{ 1564258945Sroberto if (sock->type == isc_sockettype_udp) { 1565258945Sroberto if (address != NULL) 1566258945Sroberto dev->address = *address; 1567258945Sroberto else 1568258945Sroberto dev->address = sock->peer_address; 1569258945Sroberto } else if (sock->type == isc_sockettype_tcp) { 1570258945Sroberto INSIST(address == NULL); 1571258945Sroberto dev->address = sock->peer_address; 1572258945Sroberto } 1573258945Sroberto} 1574258945Sroberto 1575258945Srobertostatic void 1576258945Srobertodestroy_socketevent(isc_event_t *event) { 1577258945Sroberto isc_socketevent_t *ev = (isc_socketevent_t *)event; 1578258945Sroberto 1579258945Sroberto INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 1580258945Sroberto 1581258945Sroberto (ev->destroy)(event); 1582258945Sroberto} 1583258945Sroberto 1584258945Srobertostatic isc_socketevent_t * 1585280849Scyallocate_socketevent(isc__socket_t *sock, isc_eventtype_t eventtype, 1586258945Sroberto isc_taskaction_t action, const void *arg) 1587258945Sroberto{ 1588258945Sroberto isc_socketevent_t *ev; 1589258945Sroberto 1590258945Sroberto ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx, 1591258945Sroberto sock, eventtype, 1592258945Sroberto action, arg, 1593258945Sroberto sizeof(*ev)); 1594258945Sroberto 1595258945Sroberto if (ev == NULL) 1596258945Sroberto return (NULL); 1597258945Sroberto 1598280849Scy ev->result = ISC_R_UNSET; 1599258945Sroberto ISC_LINK_INIT(ev, ev_link); 1600258945Sroberto ISC_LIST_INIT(ev->bufferlist); 1601258945Sroberto ev->region.base = NULL; 1602258945Sroberto ev->n = 0; 1603258945Sroberto ev->offset = 0; 1604258945Sroberto ev->attributes = 0; 1605258945Sroberto ev->destroy = ev->ev_destroy; 1606258945Sroberto ev->ev_destroy = destroy_socketevent; 1607258945Sroberto 1608258945Sroberto return (ev); 1609258945Sroberto} 1610258945Sroberto 1611258945Sroberto#if defined(ISC_SOCKET_DEBUG) 1612258945Srobertostatic void 1613258945Srobertodump_msg(struct msghdr *msg) { 1614258945Sroberto unsigned int i; 1615258945Sroberto 1616258945Sroberto printf("MSGHDR %p\n", msg); 1617258945Sroberto printf("\tname %p, namelen %ld\n", msg->msg_name, 1618258945Sroberto (long) msg->msg_namelen); 1619258945Sroberto printf("\tiov %p, iovlen %ld\n", msg->msg_iov, 1620258945Sroberto (long) msg->msg_iovlen); 1621258945Sroberto for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) 1622258945Sroberto printf("\t\t%d\tbase %p, len %ld\n", i, 1623258945Sroberto msg->msg_iov[i].iov_base, 1624258945Sroberto (long) msg->msg_iov[i].iov_len); 1625258945Sroberto#ifdef ISC_NET_BSD44MSGHDR 1626258945Sroberto printf("\tcontrol %p, controllen %ld\n", msg->msg_control, 1627258945Sroberto (long) msg->msg_controllen); 1628258945Sroberto#endif 1629258945Sroberto} 1630258945Sroberto#endif 1631258945Sroberto 1632258945Sroberto#define DOIO_SUCCESS 0 /* i/o ok, event sent */ 1633258945Sroberto#define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 1634258945Sroberto#define DOIO_HARD 2 /* i/o error, event sent */ 1635258945Sroberto#define DOIO_EOF 3 /* EOF, no event sent */ 1636258945Sroberto 1637258945Srobertostatic int 1638280849Scydoio_recv(isc__socket_t *sock, isc_socketevent_t *dev) { 1639258945Sroberto int cc; 1640258945Sroberto struct iovec iov[MAXSCATTERGATHER_RECV]; 1641258945Sroberto size_t read_count; 1642258945Sroberto size_t actual_count; 1643258945Sroberto struct msghdr msghdr; 1644258945Sroberto isc_buffer_t *buffer; 1645258945Sroberto int recv_errno; 1646258945Sroberto char strbuf[ISC_STRERRORSIZE]; 1647258945Sroberto 1648258945Sroberto build_msghdr_recv(sock, dev, &msghdr, iov, &read_count); 1649258945Sroberto 1650258945Sroberto#if defined(ISC_SOCKET_DEBUG) 1651258945Sroberto dump_msg(&msghdr); 1652258945Sroberto#endif 1653258945Sroberto 1654258945Sroberto cc = recvmsg(sock->fd, &msghdr, 0); 1655258945Sroberto recv_errno = errno; 1656258945Sroberto 1657258945Sroberto#if defined(ISC_SOCKET_DEBUG) 1658258945Sroberto dump_msg(&msghdr); 1659258945Sroberto#endif 1660258945Sroberto 1661258945Sroberto if (cc < 0) { 1662258945Sroberto if (SOFT_ERROR(recv_errno)) 1663258945Sroberto return (DOIO_SOFT); 1664258945Sroberto 1665258945Sroberto if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1666258945Sroberto isc__strerror(recv_errno, strbuf, sizeof(strbuf)); 1667258945Sroberto socket_log(sock, NULL, IOEVENT, 1668258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 1669258945Sroberto ISC_MSG_DOIORECV, 1670258945Sroberto "doio_recv: recvmsg(%d) %d bytes, err %d/%s", 1671258945Sroberto sock->fd, cc, recv_errno, strbuf); 1672258945Sroberto } 1673258945Sroberto 1674258945Sroberto#define SOFT_OR_HARD(_system, _isc) \ 1675258945Sroberto if (recv_errno == _system) { \ 1676258945Sroberto if (sock->connected) { \ 1677258945Sroberto dev->result = _isc; \ 1678258945Sroberto inc_stats(sock->manager->stats, \ 1679258945Sroberto sock->statsindex[STATID_RECVFAIL]); \ 1680258945Sroberto return (DOIO_HARD); \ 1681258945Sroberto } \ 1682258945Sroberto return (DOIO_SOFT); \ 1683258945Sroberto } 1684258945Sroberto#define ALWAYS_HARD(_system, _isc) \ 1685258945Sroberto if (recv_errno == _system) { \ 1686258945Sroberto dev->result = _isc; \ 1687258945Sroberto inc_stats(sock->manager->stats, \ 1688258945Sroberto sock->statsindex[STATID_RECVFAIL]); \ 1689258945Sroberto return (DOIO_HARD); \ 1690258945Sroberto } 1691258945Sroberto 1692258945Sroberto SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 1693258945Sroberto SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH); 1694258945Sroberto SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 1695258945Sroberto SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN); 1696258945Sroberto /* HPUX 11.11 can return EADDRNOTAVAIL. */ 1697258945Sroberto SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 1698258945Sroberto ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 1699258945Sroberto /* 1700258945Sroberto * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6 1701258945Sroberto * errors. 1702258945Sroberto */ 1703258945Sroberto#ifdef EPROTO 1704258945Sroberto SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); 1705258945Sroberto#endif 1706258945Sroberto SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); 1707258945Sroberto 1708258945Sroberto#undef SOFT_OR_HARD 1709258945Sroberto#undef ALWAYS_HARD 1710258945Sroberto 1711258945Sroberto dev->result = isc__errno2result(recv_errno); 1712258945Sroberto inc_stats(sock->manager->stats, 1713258945Sroberto sock->statsindex[STATID_RECVFAIL]); 1714258945Sroberto return (DOIO_HARD); 1715258945Sroberto } 1716258945Sroberto 1717258945Sroberto /* 1718280849Scy * On TCP and UNIX sockets, zero length reads indicate EOF, 1719280849Scy * while on UDP sockets, zero length reads are perfectly valid, 1720280849Scy * although strange. 1721258945Sroberto */ 1722280849Scy switch (sock->type) { 1723280849Scy case isc_sockettype_tcp: 1724280849Scy case isc_sockettype_unix: 1725280849Scy if (cc == 0) 1726280849Scy return (DOIO_EOF); 1727280849Scy break; 1728280849Scy case isc_sockettype_udp: 1729280849Scy break; 1730280849Scy case isc_sockettype_fdwatch: 1731280849Scy default: 1732280849Scy INSIST(0); 1733280849Scy } 1734258945Sroberto 1735258945Sroberto if (sock->type == isc_sockettype_udp) { 1736258945Sroberto dev->address.length = msghdr.msg_namelen; 1737258945Sroberto if (isc_sockaddr_getport(&dev->address) == 0) { 1738258945Sroberto if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1739258945Sroberto socket_log(sock, &dev->address, IOEVENT, 1740258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 1741258945Sroberto ISC_MSG_ZEROPORT, 1742258945Sroberto "dropping source port zero packet"); 1743258945Sroberto } 1744258945Sroberto return (DOIO_SOFT); 1745258945Sroberto } 1746280849Scy /* 1747280849Scy * Simulate a firewall blocking UDP responses bigger than 1748280849Scy * 512 bytes. 1749280849Scy */ 1750280849Scy if (sock->manager->maxudp != 0 && cc > sock->manager->maxudp) 1751280849Scy return (DOIO_SOFT); 1752258945Sroberto } 1753258945Sroberto 1754258945Sroberto socket_log(sock, &dev->address, IOEVENT, 1755258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PKTRECV, 1756258945Sroberto "packet received correctly"); 1757258945Sroberto 1758258945Sroberto /* 1759258945Sroberto * Overflow bit detection. If we received MORE bytes than we should, 1760258945Sroberto * this indicates an overflow situation. Set the flag in the 1761258945Sroberto * dev entry and adjust how much we read by one. 1762258945Sroberto */ 1763258945Sroberto#ifdef ISC_NET_RECVOVERFLOW 1764258945Sroberto if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) { 1765258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_TRUNC; 1766258945Sroberto cc--; 1767258945Sroberto } 1768258945Sroberto#endif 1769258945Sroberto 1770258945Sroberto /* 1771258945Sroberto * If there are control messages attached, run through them and pull 1772258945Sroberto * out the interesting bits. 1773258945Sroberto */ 1774258945Sroberto if (sock->type == isc_sockettype_udp) 1775258945Sroberto process_cmsg(sock, &msghdr, dev); 1776258945Sroberto 1777258945Sroberto /* 1778258945Sroberto * update the buffers (if any) and the i/o count 1779258945Sroberto */ 1780258945Sroberto dev->n += cc; 1781258945Sroberto actual_count = cc; 1782258945Sroberto buffer = ISC_LIST_HEAD(dev->bufferlist); 1783258945Sroberto while (buffer != NULL && actual_count > 0U) { 1784258945Sroberto REQUIRE(ISC_BUFFER_VALID(buffer)); 1785258945Sroberto if (isc_buffer_availablelength(buffer) <= actual_count) { 1786258945Sroberto actual_count -= isc_buffer_availablelength(buffer); 1787258945Sroberto isc_buffer_add(buffer, 1788258945Sroberto isc_buffer_availablelength(buffer)); 1789258945Sroberto } else { 1790258945Sroberto isc_buffer_add(buffer, actual_count); 1791258945Sroberto actual_count = 0; 1792280849Scy POST(actual_count); 1793258945Sroberto break; 1794258945Sroberto } 1795258945Sroberto buffer = ISC_LIST_NEXT(buffer, link); 1796258945Sroberto if (buffer == NULL) { 1797258945Sroberto INSIST(actual_count == 0U); 1798258945Sroberto } 1799258945Sroberto } 1800258945Sroberto 1801258945Sroberto /* 1802258945Sroberto * If we read less than we expected, update counters, 1803258945Sroberto * and let the upper layer poke the descriptor. 1804258945Sroberto */ 1805258945Sroberto if (((size_t)cc != read_count) && (dev->n < dev->minimum)) 1806258945Sroberto return (DOIO_SOFT); 1807258945Sroberto 1808258945Sroberto /* 1809258945Sroberto * Full reads are posted, or partials if partials are ok. 1810258945Sroberto */ 1811258945Sroberto dev->result = ISC_R_SUCCESS; 1812258945Sroberto return (DOIO_SUCCESS); 1813258945Sroberto} 1814258945Sroberto 1815258945Sroberto/* 1816258945Sroberto * Returns: 1817258945Sroberto * DOIO_SUCCESS The operation succeeded. dev->result contains 1818258945Sroberto * ISC_R_SUCCESS. 1819258945Sroberto * 1820258945Sroberto * DOIO_HARD A hard or unexpected I/O error was encountered. 1821258945Sroberto * dev->result contains the appropriate error. 1822258945Sroberto * 1823258945Sroberto * DOIO_SOFT A soft I/O error was encountered. No senddone 1824258945Sroberto * event was sent. The operation should be retried. 1825258945Sroberto * 1826258945Sroberto * No other return values are possible. 1827258945Sroberto */ 1828258945Srobertostatic int 1829280849Scydoio_send(isc__socket_t *sock, isc_socketevent_t *dev) { 1830258945Sroberto int cc; 1831258945Sroberto struct iovec iov[MAXSCATTERGATHER_SEND]; 1832258945Sroberto size_t write_count; 1833258945Sroberto struct msghdr msghdr; 1834258945Sroberto char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1835258945Sroberto int attempts = 0; 1836258945Sroberto int send_errno; 1837258945Sroberto char strbuf[ISC_STRERRORSIZE]; 1838258945Sroberto 1839258945Sroberto build_msghdr_send(sock, dev, &msghdr, iov, &write_count); 1840258945Sroberto 1841258945Sroberto resend: 1842258945Sroberto cc = sendmsg(sock->fd, &msghdr, 0); 1843258945Sroberto send_errno = errno; 1844258945Sroberto 1845258945Sroberto /* 1846258945Sroberto * Check for error or block condition. 1847258945Sroberto */ 1848258945Sroberto if (cc < 0) { 1849258945Sroberto if (send_errno == EINTR && ++attempts < NRETRIES) 1850258945Sroberto goto resend; 1851258945Sroberto 1852258945Sroberto if (SOFT_ERROR(send_errno)) 1853258945Sroberto return (DOIO_SOFT); 1854258945Sroberto 1855258945Sroberto#define SOFT_OR_HARD(_system, _isc) \ 1856258945Sroberto if (send_errno == _system) { \ 1857258945Sroberto if (sock->connected) { \ 1858258945Sroberto dev->result = _isc; \ 1859258945Sroberto inc_stats(sock->manager->stats, \ 1860258945Sroberto sock->statsindex[STATID_SENDFAIL]); \ 1861258945Sroberto return (DOIO_HARD); \ 1862258945Sroberto } \ 1863258945Sroberto return (DOIO_SOFT); \ 1864258945Sroberto } 1865258945Sroberto#define ALWAYS_HARD(_system, _isc) \ 1866258945Sroberto if (send_errno == _system) { \ 1867258945Sroberto dev->result = _isc; \ 1868258945Sroberto inc_stats(sock->manager->stats, \ 1869258945Sroberto sock->statsindex[STATID_SENDFAIL]); \ 1870258945Sroberto return (DOIO_HARD); \ 1871258945Sroberto } 1872258945Sroberto 1873258945Sroberto SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED); 1874258945Sroberto ALWAYS_HARD(EACCES, ISC_R_NOPERM); 1875258945Sroberto ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 1876258945Sroberto ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 1877258945Sroberto ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH); 1878258945Sroberto#ifdef EHOSTDOWN 1879258945Sroberto ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH); 1880258945Sroberto#endif 1881258945Sroberto ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH); 1882258945Sroberto ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); 1883258945Sroberto ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH); 1884258945Sroberto ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED); 1885258945Sroberto ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET); 1886258945Sroberto 1887258945Sroberto#undef SOFT_OR_HARD 1888258945Sroberto#undef ALWAYS_HARD 1889258945Sroberto 1890258945Sroberto /* 1891258945Sroberto * The other error types depend on whether or not the 1892258945Sroberto * socket is UDP or TCP. If it is UDP, some errors 1893258945Sroberto * that we expect to be fatal under TCP are merely 1894258945Sroberto * annoying, and are really soft errors. 1895258945Sroberto * 1896258945Sroberto * However, these soft errors are still returned as 1897258945Sroberto * a status. 1898258945Sroberto */ 1899258945Sroberto isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1900258945Sroberto isc__strerror(send_errno, strbuf, sizeof(strbuf)); 1901258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s", 1902258945Sroberto addrbuf, strbuf); 1903258945Sroberto dev->result = isc__errno2result(send_errno); 1904258945Sroberto inc_stats(sock->manager->stats, 1905258945Sroberto sock->statsindex[STATID_SENDFAIL]); 1906258945Sroberto return (DOIO_HARD); 1907258945Sroberto } 1908258945Sroberto 1909258945Sroberto if (cc == 0) { 1910258945Sroberto inc_stats(sock->manager->stats, 1911258945Sroberto sock->statsindex[STATID_SENDFAIL]); 1912258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 1913258945Sroberto "doio_send: send() %s 0", 1914258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 1915258945Sroberto ISC_MSG_RETURNED, "returned")); 1916258945Sroberto } 1917258945Sroberto 1918258945Sroberto /* 1919258945Sroberto * If we write less than we expected, update counters, poke. 1920258945Sroberto */ 1921258945Sroberto dev->n += cc; 1922258945Sroberto if ((size_t)cc != write_count) 1923258945Sroberto return (DOIO_SOFT); 1924258945Sroberto 1925258945Sroberto /* 1926258945Sroberto * Exactly what we wanted to write. We're done with this 1927258945Sroberto * entry. Post its completion event. 1928258945Sroberto */ 1929258945Sroberto dev->result = ISC_R_SUCCESS; 1930258945Sroberto return (DOIO_SUCCESS); 1931258945Sroberto} 1932258945Sroberto 1933258945Sroberto/* 1934258945Sroberto * Kill. 1935258945Sroberto * 1936258945Sroberto * Caller must ensure that the socket is not locked and no external 1937258945Sroberto * references exist. 1938258945Sroberto */ 1939258945Srobertostatic void 1940280849Scyclosesocket(isc__socketmgr_t *manager, isc__socket_t *sock, int fd) { 1941258945Sroberto isc_sockettype_t type = sock->type; 1942258945Sroberto int lockid = FDLOCK_ID(fd); 1943258945Sroberto 1944258945Sroberto /* 1945258945Sroberto * No one has this socket open, so the watcher doesn't have to be 1946258945Sroberto * poked, and the socket doesn't have to be locked. 1947258945Sroberto */ 1948258945Sroberto LOCK(&manager->fdlock[lockid]); 1949258945Sroberto manager->fds[fd] = NULL; 1950258945Sroberto if (type == isc_sockettype_fdwatch) 1951258945Sroberto manager->fdstate[fd] = CLOSED; 1952258945Sroberto else 1953258945Sroberto manager->fdstate[fd] = CLOSE_PENDING; 1954258945Sroberto UNLOCK(&manager->fdlock[lockid]); 1955258945Sroberto if (type == isc_sockettype_fdwatch) { 1956258945Sroberto /* 1957258945Sroberto * The caller may close the socket once this function returns, 1958258945Sroberto * and `fd' may be reassigned for a new socket. So we do 1959258945Sroberto * unwatch_fd() here, rather than defer it via select_poke(). 1960258945Sroberto * Note: this may complicate data protection among threads and 1961258945Sroberto * may reduce performance due to additional locks. One way to 1962258945Sroberto * solve this would be to dup() the watched descriptor, but we 1963258945Sroberto * take a simpler approach at this moment. 1964258945Sroberto */ 1965258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 1966258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 1967258945Sroberto } else 1968258945Sroberto select_poke(manager, fd, SELECT_POKE_CLOSE); 1969258945Sroberto 1970258945Sroberto inc_stats(manager->stats, sock->statsindex[STATID_CLOSE]); 1971258945Sroberto 1972258945Sroberto /* 1973258945Sroberto * update manager->maxfd here (XXX: this should be implemented more 1974258945Sroberto * efficiently) 1975258945Sroberto */ 1976258945Sroberto#ifdef USE_SELECT 1977258945Sroberto LOCK(&manager->lock); 1978258945Sroberto if (manager->maxfd == fd) { 1979258945Sroberto int i; 1980258945Sroberto 1981258945Sroberto manager->maxfd = 0; 1982258945Sroberto for (i = fd - 1; i >= 0; i--) { 1983258945Sroberto lockid = FDLOCK_ID(i); 1984258945Sroberto 1985258945Sroberto LOCK(&manager->fdlock[lockid]); 1986258945Sroberto if (manager->fdstate[i] == MANAGED) { 1987258945Sroberto manager->maxfd = i; 1988258945Sroberto UNLOCK(&manager->fdlock[lockid]); 1989258945Sroberto break; 1990258945Sroberto } 1991258945Sroberto UNLOCK(&manager->fdlock[lockid]); 1992258945Sroberto } 1993258945Sroberto#ifdef ISC_PLATFORM_USETHREADS 1994258945Sroberto if (manager->maxfd < manager->pipe_fds[0]) 1995258945Sroberto manager->maxfd = manager->pipe_fds[0]; 1996258945Sroberto#endif 1997258945Sroberto } 1998258945Sroberto UNLOCK(&manager->lock); 1999258945Sroberto#endif /* USE_SELECT */ 2000258945Sroberto} 2001258945Sroberto 2002258945Srobertostatic void 2003280849Scydestroy(isc__socket_t **sockp) { 2004258945Sroberto int fd; 2005280849Scy isc__socket_t *sock = *sockp; 2006280849Scy isc__socketmgr_t *manager = sock->manager; 2007258945Sroberto 2008258945Sroberto socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 2009258945Sroberto ISC_MSG_DESTROYING, "destroying"); 2010258945Sroberto 2011258945Sroberto INSIST(ISC_LIST_EMPTY(sock->accept_list)); 2012258945Sroberto INSIST(ISC_LIST_EMPTY(sock->recv_list)); 2013258945Sroberto INSIST(ISC_LIST_EMPTY(sock->send_list)); 2014258945Sroberto INSIST(sock->connect_ev == NULL); 2015258945Sroberto REQUIRE(sock->fd == -1 || sock->fd < (int)manager->maxsocks); 2016258945Sroberto 2017258945Sroberto if (sock->fd >= 0) { 2018258945Sroberto fd = sock->fd; 2019258945Sroberto sock->fd = -1; 2020258945Sroberto closesocket(manager, sock, fd); 2021258945Sroberto } 2022258945Sroberto 2023258945Sroberto LOCK(&manager->lock); 2024258945Sroberto 2025258945Sroberto ISC_LIST_UNLINK(manager->socklist, sock, link); 2026258945Sroberto 2027280849Scy#ifdef USE_WATCHER_THREAD 2028258945Sroberto if (ISC_LIST_EMPTY(manager->socklist)) 2029258945Sroberto SIGNAL(&manager->shutdown_ok); 2030280849Scy#endif /* USE_WATCHER_THREAD */ 2031258945Sroberto 2032280849Scy /* can't unlock manager as its memory context is still used */ 2033280849Scy free_socket(sockp); 2034280849Scy 2035258945Sroberto UNLOCK(&manager->lock); 2036258945Sroberto} 2037258945Sroberto 2038258945Srobertostatic isc_result_t 2039280849Scyallocate_socket(isc__socketmgr_t *manager, isc_sockettype_t type, 2040280849Scy isc__socket_t **socketp) 2041258945Sroberto{ 2042280849Scy isc__socket_t *sock; 2043258945Sroberto isc_result_t result; 2044258945Sroberto ISC_SOCKADDR_LEN_T cmsgbuflen; 2045258945Sroberto 2046258945Sroberto sock = isc_mem_get(manager->mctx, sizeof(*sock)); 2047258945Sroberto 2048258945Sroberto if (sock == NULL) 2049258945Sroberto return (ISC_R_NOMEMORY); 2050258945Sroberto 2051280849Scy sock->common.magic = 0; 2052280849Scy sock->common.impmagic = 0; 2053258945Sroberto sock->references = 0; 2054258945Sroberto 2055258945Sroberto sock->manager = manager; 2056258945Sroberto sock->type = type; 2057258945Sroberto sock->fd = -1; 2058280849Scy sock->dupped = 0; 2059258945Sroberto sock->statsindex = NULL; 2060258945Sroberto 2061258945Sroberto ISC_LINK_INIT(sock, link); 2062258945Sroberto 2063258945Sroberto sock->recvcmsgbuf = NULL; 2064258945Sroberto sock->sendcmsgbuf = NULL; 2065258945Sroberto 2066258945Sroberto /* 2067258945Sroberto * set up cmsg buffers 2068258945Sroberto */ 2069258945Sroberto cmsgbuflen = 0; 2070258945Sroberto#if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO) 2071280849Scy cmsgbuflen += cmsg_space(sizeof(struct in6_pktinfo)); 2072258945Sroberto#endif 2073258945Sroberto#if defined(USE_CMSG) && defined(SO_TIMESTAMP) 2074258945Sroberto cmsgbuflen += cmsg_space(sizeof(struct timeval)); 2075258945Sroberto#endif 2076258945Sroberto sock->recvcmsgbuflen = cmsgbuflen; 2077258945Sroberto if (sock->recvcmsgbuflen != 0U) { 2078258945Sroberto sock->recvcmsgbuf = isc_mem_get(manager->mctx, cmsgbuflen); 2079280849Scy if (sock->recvcmsgbuf == NULL) { 2080280849Scy result = ISC_R_NOMEMORY; 2081258945Sroberto goto error; 2082280849Scy } 2083258945Sroberto } 2084258945Sroberto 2085258945Sroberto cmsgbuflen = 0; 2086258945Sroberto#if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO) 2087280849Scy cmsgbuflen += cmsg_space(sizeof(struct in6_pktinfo)); 2088280849Scy#if defined(IPV6_USE_MIN_MTU) 2089280849Scy /* 2090280849Scy * Provide space for working around FreeBSD's broken IPV6_USE_MIN_MTU 2091280849Scy * support. 2092280849Scy */ 2093280849Scy cmsgbuflen += cmsg_space(sizeof(int)); 2094258945Sroberto#endif 2095280849Scy#endif 2096258945Sroberto sock->sendcmsgbuflen = cmsgbuflen; 2097258945Sroberto if (sock->sendcmsgbuflen != 0U) { 2098258945Sroberto sock->sendcmsgbuf = isc_mem_get(manager->mctx, cmsgbuflen); 2099280849Scy if (sock->sendcmsgbuf == NULL) { 2100280849Scy result = ISC_R_NOMEMORY; 2101258945Sroberto goto error; 2102280849Scy } 2103258945Sroberto } 2104258945Sroberto 2105258945Sroberto memset(sock->name, 0, sizeof(sock->name)); 2106258945Sroberto sock->tag = NULL; 2107258945Sroberto 2108258945Sroberto /* 2109258945Sroberto * set up list of readers and writers to be initially empty 2110258945Sroberto */ 2111258945Sroberto ISC_LIST_INIT(sock->recv_list); 2112258945Sroberto ISC_LIST_INIT(sock->send_list); 2113258945Sroberto ISC_LIST_INIT(sock->accept_list); 2114258945Sroberto sock->connect_ev = NULL; 2115258945Sroberto sock->pending_recv = 0; 2116258945Sroberto sock->pending_send = 0; 2117258945Sroberto sock->pending_accept = 0; 2118258945Sroberto sock->listener = 0; 2119258945Sroberto sock->connected = 0; 2120258945Sroberto sock->connecting = 0; 2121258945Sroberto sock->bound = 0; 2122258945Sroberto 2123258945Sroberto /* 2124258945Sroberto * initialize the lock 2125258945Sroberto */ 2126258945Sroberto result = isc_mutex_init(&sock->lock); 2127258945Sroberto if (result != ISC_R_SUCCESS) { 2128280849Scy sock->common.magic = 0; 2129280849Scy sock->common.impmagic = 0; 2130258945Sroberto goto error; 2131258945Sroberto } 2132258945Sroberto 2133258945Sroberto /* 2134258945Sroberto * Initialize readable and writable events 2135258945Sroberto */ 2136258945Sroberto ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t), 2137258945Sroberto ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR, 2138258945Sroberto NULL, sock, sock, NULL, NULL); 2139258945Sroberto ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t), 2140258945Sroberto ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW, 2141258945Sroberto NULL, sock, sock, NULL, NULL); 2142258945Sroberto 2143280849Scy sock->common.magic = ISCAPI_SOCKET_MAGIC; 2144280849Scy sock->common.impmagic = SOCKET_MAGIC; 2145258945Sroberto *socketp = sock; 2146258945Sroberto 2147258945Sroberto return (ISC_R_SUCCESS); 2148258945Sroberto 2149258945Sroberto error: 2150258945Sroberto if (sock->recvcmsgbuf != NULL) 2151258945Sroberto isc_mem_put(manager->mctx, sock->recvcmsgbuf, 2152258945Sroberto sock->recvcmsgbuflen); 2153258945Sroberto if (sock->sendcmsgbuf != NULL) 2154258945Sroberto isc_mem_put(manager->mctx, sock->sendcmsgbuf, 2155258945Sroberto sock->sendcmsgbuflen); 2156258945Sroberto isc_mem_put(manager->mctx, sock, sizeof(*sock)); 2157258945Sroberto 2158258945Sroberto return (result); 2159258945Sroberto} 2160258945Sroberto 2161258945Sroberto/* 2162258945Sroberto * This event requires that the various lists be empty, that the reference 2163258945Sroberto * count be 1, and that the magic number is valid. The other socket bits, 2164258945Sroberto * like the lock, must be initialized as well. The fd associated must be 2165258945Sroberto * marked as closed, by setting it to -1 on close, or this routine will 2166258945Sroberto * also close the socket. 2167258945Sroberto */ 2168258945Srobertostatic void 2169280849Scyfree_socket(isc__socket_t **socketp) { 2170280849Scy isc__socket_t *sock = *socketp; 2171258945Sroberto 2172258945Sroberto INSIST(sock->references == 0); 2173258945Sroberto INSIST(VALID_SOCKET(sock)); 2174258945Sroberto INSIST(!sock->connecting); 2175258945Sroberto INSIST(!sock->pending_recv); 2176258945Sroberto INSIST(!sock->pending_send); 2177258945Sroberto INSIST(!sock->pending_accept); 2178258945Sroberto INSIST(ISC_LIST_EMPTY(sock->recv_list)); 2179258945Sroberto INSIST(ISC_LIST_EMPTY(sock->send_list)); 2180258945Sroberto INSIST(ISC_LIST_EMPTY(sock->accept_list)); 2181258945Sroberto INSIST(!ISC_LINK_LINKED(sock, link)); 2182258945Sroberto 2183258945Sroberto if (sock->recvcmsgbuf != NULL) 2184258945Sroberto isc_mem_put(sock->manager->mctx, sock->recvcmsgbuf, 2185258945Sroberto sock->recvcmsgbuflen); 2186258945Sroberto if (sock->sendcmsgbuf != NULL) 2187258945Sroberto isc_mem_put(sock->manager->mctx, sock->sendcmsgbuf, 2188258945Sroberto sock->sendcmsgbuflen); 2189258945Sroberto 2190280849Scy sock->common.magic = 0; 2191280849Scy sock->common.impmagic = 0; 2192258945Sroberto 2193258945Sroberto DESTROYLOCK(&sock->lock); 2194258945Sroberto 2195258945Sroberto isc_mem_put(sock->manager->mctx, sock, sizeof(*sock)); 2196258945Sroberto 2197258945Sroberto *socketp = NULL; 2198258945Sroberto} 2199258945Sroberto 2200258945Sroberto#ifdef SO_BSDCOMPAT 2201258945Sroberto/* 2202258945Sroberto * This really should not be necessary to do. Having to workout 2203258945Sroberto * which kernel version we are on at run time so that we don't cause 2204258945Sroberto * the kernel to issue a warning about us using a deprecated socket option. 2205258945Sroberto * Such warnings should *never* be on by default in production kernels. 2206258945Sroberto * 2207258945Sroberto * We can't do this a build time because executables are moved between 2208258945Sroberto * machines and hence kernels. 2209258945Sroberto * 2210258945Sroberto * We can't just not set SO_BSDCOMAT because some kernels require it. 2211258945Sroberto */ 2212258945Sroberto 2213258945Srobertostatic isc_once_t bsdcompat_once = ISC_ONCE_INIT; 2214258945Srobertoisc_boolean_t bsdcompat = ISC_TRUE; 2215258945Sroberto 2216258945Srobertostatic void 2217258945Srobertoclear_bsdcompat(void) { 2218258945Sroberto#ifdef __linux__ 2219258945Sroberto struct utsname buf; 2220258945Sroberto char *endp; 2221258945Sroberto long int major; 2222258945Sroberto long int minor; 2223258945Sroberto 2224258945Sroberto uname(&buf); /* Can only fail if buf is bad in Linux. */ 2225258945Sroberto 2226258945Sroberto /* Paranoia in parsing can be increased, but we trust uname(). */ 2227258945Sroberto major = strtol(buf.release, &endp, 10); 2228258945Sroberto if (*endp == '.') { 2229258945Sroberto minor = strtol(endp+1, &endp, 10); 2230258945Sroberto if ((major > 2) || ((major == 2) && (minor >= 4))) { 2231258945Sroberto bsdcompat = ISC_FALSE; 2232258945Sroberto } 2233258945Sroberto } 2234258945Sroberto#endif /* __linux __ */ 2235258945Sroberto} 2236258945Sroberto#endif 2237258945Sroberto 2238258945Srobertostatic isc_result_t 2239280849Scyopensocket(isc__socketmgr_t *manager, isc__socket_t *sock, 2240280849Scy isc__socket_t *dup_socket) 2241280849Scy{ 2242280849Scy isc_result_t result; 2243258945Sroberto char strbuf[ISC_STRERRORSIZE]; 2244258945Sroberto const char *err = "socket"; 2245258945Sroberto int tries = 0; 2246258945Sroberto#if defined(USE_CMSG) || defined(SO_BSDCOMPAT) 2247258945Sroberto int on = 1; 2248258945Sroberto#endif 2249258945Sroberto#if defined(SO_RCVBUF) 2250258945Sroberto ISC_SOCKADDR_LEN_T optlen; 2251258945Sroberto int size; 2252258945Sroberto#endif 2253258945Sroberto 2254258945Sroberto again: 2255280849Scy if (dup_socket == NULL) { 2256280849Scy switch (sock->type) { 2257280849Scy case isc_sockettype_udp: 2258280849Scy sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); 2259280849Scy break; 2260280849Scy case isc_sockettype_tcp: 2261280849Scy sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 2262280849Scy break; 2263280849Scy case isc_sockettype_unix: 2264280849Scy sock->fd = socket(sock->pf, SOCK_STREAM, 0); 2265280849Scy break; 2266280849Scy case isc_sockettype_fdwatch: 2267280849Scy /* 2268280849Scy * We should not be called for isc_sockettype_fdwatch 2269280849Scy * sockets. 2270280849Scy */ 2271280849Scy INSIST(0); 2272280849Scy break; 2273280849Scy } 2274280849Scy } else { 2275280849Scy sock->fd = dup(dup_socket->fd); 2276280849Scy sock->dupped = 1; 2277280849Scy sock->bound = dup_socket->bound; 2278258945Sroberto } 2279258945Sroberto if (sock->fd == -1 && errno == EINTR && tries++ < 42) 2280258945Sroberto goto again; 2281258945Sroberto 2282258945Sroberto#ifdef F_DUPFD 2283258945Sroberto /* 2284258945Sroberto * Leave a space for stdio and TCP to work in. 2285258945Sroberto */ 2286258945Sroberto if (manager->reserved != 0 && sock->type == isc_sockettype_udp && 2287258945Sroberto sock->fd >= 0 && sock->fd < manager->reserved) { 2288258945Sroberto int new, tmp; 2289258945Sroberto new = fcntl(sock->fd, F_DUPFD, manager->reserved); 2290258945Sroberto tmp = errno; 2291258945Sroberto (void)close(sock->fd); 2292258945Sroberto errno = tmp; 2293258945Sroberto sock->fd = new; 2294258945Sroberto err = "isc_socket_create: fcntl/reserved"; 2295258945Sroberto } else if (sock->fd >= 0 && sock->fd < 20) { 2296258945Sroberto int new, tmp; 2297258945Sroberto new = fcntl(sock->fd, F_DUPFD, 20); 2298258945Sroberto tmp = errno; 2299258945Sroberto (void)close(sock->fd); 2300258945Sroberto errno = tmp; 2301258945Sroberto sock->fd = new; 2302258945Sroberto err = "isc_socket_create: fcntl"; 2303258945Sroberto } 2304258945Sroberto#endif 2305258945Sroberto 2306258945Sroberto if (sock->fd >= (int)manager->maxsocks) { 2307258945Sroberto (void)close(sock->fd); 2308258945Sroberto isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, 2309258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 2310258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 2311258945Sroberto ISC_MSG_TOOMANYFDS, 2312258945Sroberto "socket: file descriptor exceeds limit (%d/%u)", 2313258945Sroberto sock->fd, manager->maxsocks); 2314258945Sroberto return (ISC_R_NORESOURCES); 2315258945Sroberto } 2316258945Sroberto 2317258945Sroberto if (sock->fd < 0) { 2318258945Sroberto switch (errno) { 2319258945Sroberto case EMFILE: 2320258945Sroberto case ENFILE: 2321258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2322258945Sroberto isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, 2323258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 2324258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 2325258945Sroberto ISC_MSG_TOOMANYFDS, 2326258945Sroberto "%s: %s", err, strbuf); 2327258945Sroberto /* fallthrough */ 2328258945Sroberto case ENOBUFS: 2329258945Sroberto return (ISC_R_NORESOURCES); 2330258945Sroberto 2331258945Sroberto case EPROTONOSUPPORT: 2332258945Sroberto case EPFNOSUPPORT: 2333258945Sroberto case EAFNOSUPPORT: 2334258945Sroberto /* 2335258945Sroberto * Linux 2.2 (and maybe others) return EINVAL instead of 2336258945Sroberto * EAFNOSUPPORT. 2337258945Sroberto */ 2338258945Sroberto case EINVAL: 2339258945Sroberto return (ISC_R_FAMILYNOSUPPORT); 2340258945Sroberto 2341258945Sroberto default: 2342258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2343258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2344258945Sroberto "%s() %s: %s", err, 2345258945Sroberto isc_msgcat_get(isc_msgcat, 2346258945Sroberto ISC_MSGSET_GENERAL, 2347258945Sroberto ISC_MSG_FAILED, 2348258945Sroberto "failed"), 2349258945Sroberto strbuf); 2350258945Sroberto return (ISC_R_UNEXPECTED); 2351258945Sroberto } 2352258945Sroberto } 2353258945Sroberto 2354280849Scy if (dup_socket != NULL) 2355280849Scy goto setup_done; 2356280849Scy 2357280849Scy result = make_nonblock(sock->fd); 2358280849Scy if (result != ISC_R_SUCCESS) { 2359258945Sroberto (void)close(sock->fd); 2360280849Scy return (result); 2361258945Sroberto } 2362258945Sroberto 2363258945Sroberto#ifdef SO_BSDCOMPAT 2364258945Sroberto RUNTIME_CHECK(isc_once_do(&bsdcompat_once, 2365258945Sroberto clear_bsdcompat) == ISC_R_SUCCESS); 2366258945Sroberto if (sock->type != isc_sockettype_unix && bsdcompat && 2367258945Sroberto setsockopt(sock->fd, SOL_SOCKET, SO_BSDCOMPAT, 2368258945Sroberto (void *)&on, sizeof(on)) < 0) { 2369258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2370258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2371258945Sroberto "setsockopt(%d, SO_BSDCOMPAT) %s: %s", 2372258945Sroberto sock->fd, 2373258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2374258945Sroberto ISC_MSG_FAILED, "failed"), 2375258945Sroberto strbuf); 2376258945Sroberto /* Press on... */ 2377258945Sroberto } 2378258945Sroberto#endif 2379258945Sroberto 2380258945Sroberto#ifdef SO_NOSIGPIPE 2381258945Sroberto if (setsockopt(sock->fd, SOL_SOCKET, SO_NOSIGPIPE, 2382258945Sroberto (void *)&on, sizeof(on)) < 0) { 2383258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2384258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2385258945Sroberto "setsockopt(%d, SO_NOSIGPIPE) %s: %s", 2386258945Sroberto sock->fd, 2387258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2388258945Sroberto ISC_MSG_FAILED, "failed"), 2389258945Sroberto strbuf); 2390258945Sroberto /* Press on... */ 2391258945Sroberto } 2392258945Sroberto#endif 2393258945Sroberto 2394258945Sroberto#if defined(USE_CMSG) || defined(SO_RCVBUF) 2395258945Sroberto if (sock->type == isc_sockettype_udp) { 2396258945Sroberto 2397258945Sroberto#if defined(USE_CMSG) 2398258945Sroberto#if defined(SO_TIMESTAMP) 2399258945Sroberto if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, 2400258945Sroberto (void *)&on, sizeof(on)) < 0 2401258945Sroberto && errno != ENOPROTOOPT) { 2402258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2403258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2404258945Sroberto "setsockopt(%d, SO_TIMESTAMP) %s: %s", 2405258945Sroberto sock->fd, 2406258945Sroberto isc_msgcat_get(isc_msgcat, 2407258945Sroberto ISC_MSGSET_GENERAL, 2408258945Sroberto ISC_MSG_FAILED, 2409258945Sroberto "failed"), 2410258945Sroberto strbuf); 2411258945Sroberto /* Press on... */ 2412258945Sroberto } 2413258945Sroberto#endif /* SO_TIMESTAMP */ 2414258945Sroberto 2415258945Sroberto#if defined(ISC_PLATFORM_HAVEIPV6) 2416258945Sroberto if (sock->pf == AF_INET6 && sock->recvcmsgbuflen == 0U) { 2417258945Sroberto /* 2418258945Sroberto * Warn explicitly because this anomaly can be hidden 2419258945Sroberto * in usual operation (and unexpectedly appear later). 2420258945Sroberto */ 2421258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2422258945Sroberto "No buffer available to receive " 2423258945Sroberto "IPv6 destination"); 2424258945Sroberto } 2425258945Sroberto#ifdef ISC_PLATFORM_HAVEIN6PKTINFO 2426258945Sroberto#ifdef IPV6_RECVPKTINFO 2427258945Sroberto /* RFC 3542 */ 2428258945Sroberto if ((sock->pf == AF_INET6) 2429258945Sroberto && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 2430258945Sroberto (void *)&on, sizeof(on)) < 0)) { 2431258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2432258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2433258945Sroberto "setsockopt(%d, IPV6_RECVPKTINFO) " 2434258945Sroberto "%s: %s", sock->fd, 2435258945Sroberto isc_msgcat_get(isc_msgcat, 2436258945Sroberto ISC_MSGSET_GENERAL, 2437258945Sroberto ISC_MSG_FAILED, 2438258945Sroberto "failed"), 2439258945Sroberto strbuf); 2440258945Sroberto } 2441258945Sroberto#else 2442258945Sroberto /* RFC 2292 */ 2443258945Sroberto if ((sock->pf == AF_INET6) 2444258945Sroberto && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, 2445258945Sroberto (void *)&on, sizeof(on)) < 0)) { 2446258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2447258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2448258945Sroberto "setsockopt(%d, IPV6_PKTINFO) %s: %s", 2449258945Sroberto sock->fd, 2450258945Sroberto isc_msgcat_get(isc_msgcat, 2451258945Sroberto ISC_MSGSET_GENERAL, 2452258945Sroberto ISC_MSG_FAILED, 2453258945Sroberto "failed"), 2454258945Sroberto strbuf); 2455258945Sroberto } 2456258945Sroberto#endif /* IPV6_RECVPKTINFO */ 2457258945Sroberto#endif /* ISC_PLATFORM_HAVEIN6PKTINFO */ 2458258945Sroberto#ifdef IPV6_USE_MIN_MTU /* RFC 3542, not too common yet*/ 2459258945Sroberto /* use minimum MTU */ 2460280849Scy if (sock->pf == AF_INET6 && 2461280849Scy setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 2462280849Scy (void *)&on, sizeof(on)) < 0) { 2463280849Scy isc__strerror(errno, strbuf, sizeof(strbuf)); 2464280849Scy UNEXPECTED_ERROR(__FILE__, __LINE__, 2465280849Scy "setsockopt(%d, IPV6_USE_MIN_MTU) " 2466280849Scy "%s: %s", sock->fd, 2467280849Scy isc_msgcat_get(isc_msgcat, 2468280849Scy ISC_MSGSET_GENERAL, 2469280849Scy ISC_MSG_FAILED, 2470280849Scy "failed"), 2471280849Scy strbuf); 2472280849Scy } 2473280849Scy#endif 2474280849Scy#if defined(IPV6_MTU) 2475280849Scy /* 2476280849Scy * Use minimum MTU on IPv6 sockets. 2477280849Scy */ 2478258945Sroberto if (sock->pf == AF_INET6) { 2479280849Scy int mtu = 1280; 2480280849Scy (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_MTU, 2481280849Scy &mtu, sizeof(mtu)); 2482280849Scy } 2483280849Scy#endif 2484280849Scy#if defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DONT) 2485280849Scy /* 2486280849Scy * Turn off Path MTU discovery on IPv6/UDP sockets. 2487280849Scy */ 2488280849Scy if (sock->pf == AF_INET6) { 2489280849Scy int action = IPV6_PMTUDISC_DONT; 2490258945Sroberto (void)setsockopt(sock->fd, IPPROTO_IPV6, 2491280849Scy IPV6_MTU_DISCOVER, &action, 2492280849Scy sizeof(action)); 2493258945Sroberto } 2494258945Sroberto#endif 2495258945Sroberto#endif /* ISC_PLATFORM_HAVEIPV6 */ 2496258945Sroberto#endif /* defined(USE_CMSG) */ 2497258945Sroberto 2498258945Sroberto#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 2499258945Sroberto /* 2500258945Sroberto * Turn off Path MTU discovery on IPv4/UDP sockets. 2501258945Sroberto */ 2502258945Sroberto if (sock->pf == AF_INET) { 2503258945Sroberto int action = IP_PMTUDISC_DONT; 2504258945Sroberto (void)setsockopt(sock->fd, IPPROTO_IP, IP_MTU_DISCOVER, 2505258945Sroberto &action, sizeof(action)); 2506258945Sroberto } 2507258945Sroberto#endif 2508258945Sroberto#if defined(IP_DONTFRAG) 2509258945Sroberto /* 2510258945Sroberto * Turn off Path MTU discovery on IPv4/UDP sockets. 2511258945Sroberto */ 2512258945Sroberto if (sock->pf == AF_INET) { 2513258945Sroberto int off = 0; 2514258945Sroberto (void)setsockopt(sock->fd, IPPROTO_IP, IP_DONTFRAG, 2515258945Sroberto &off, sizeof(off)); 2516258945Sroberto } 2517258945Sroberto#endif 2518258945Sroberto 2519258945Sroberto#if defined(SO_RCVBUF) 2520258945Sroberto optlen = sizeof(size); 2521258945Sroberto if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 2522258945Sroberto (void *)&size, &optlen) >= 0 && 2523258945Sroberto size < RCVBUFSIZE) { 2524258945Sroberto size = RCVBUFSIZE; 2525258945Sroberto if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 2526258945Sroberto (void *)&size, sizeof(size)) == -1) { 2527258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 2528258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 2529258945Sroberto "setsockopt(%d, SO_RCVBUF, %d) %s: %s", 2530258945Sroberto sock->fd, size, 2531258945Sroberto isc_msgcat_get(isc_msgcat, 2532258945Sroberto ISC_MSGSET_GENERAL, 2533258945Sroberto ISC_MSG_FAILED, 2534258945Sroberto "failed"), 2535258945Sroberto strbuf); 2536258945Sroberto } 2537258945Sroberto } 2538258945Sroberto#endif 2539258945Sroberto } 2540258945Sroberto#endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ 2541258945Sroberto 2542280849Scysetup_done: 2543258945Sroberto inc_stats(manager->stats, sock->statsindex[STATID_OPEN]); 2544258945Sroberto 2545258945Sroberto return (ISC_R_SUCCESS); 2546258945Sroberto} 2547258945Sroberto 2548280849Scy/* 2549280849Scy * Create a 'type' socket or duplicate an existing socket, managed 2550280849Scy * by 'manager'. Events will be posted to 'task' and when dispatched 2551280849Scy * 'action' will be called with 'arg' as the arg value. The new 2552280849Scy * socket is returned in 'socketp'. 2553258945Sroberto */ 2554280849Scystatic isc_result_t 2555280849Scysocket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 2556280849Scy isc_socket_t **socketp, isc_socket_t *dup_socket) 2557258945Sroberto{ 2558280849Scy isc__socket_t *sock = NULL; 2559280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2560258945Sroberto isc_result_t result; 2561258945Sroberto int lockid; 2562258945Sroberto 2563258945Sroberto REQUIRE(VALID_MANAGER(manager)); 2564258945Sroberto REQUIRE(socketp != NULL && *socketp == NULL); 2565258945Sroberto REQUIRE(type != isc_sockettype_fdwatch); 2566258945Sroberto 2567258945Sroberto result = allocate_socket(manager, type, &sock); 2568258945Sroberto if (result != ISC_R_SUCCESS) 2569258945Sroberto return (result); 2570258945Sroberto 2571258945Sroberto switch (sock->type) { 2572258945Sroberto case isc_sockettype_udp: 2573258945Sroberto sock->statsindex = 2574258945Sroberto (pf == AF_INET) ? upd4statsindex : upd6statsindex; 2575258945Sroberto break; 2576258945Sroberto case isc_sockettype_tcp: 2577258945Sroberto sock->statsindex = 2578258945Sroberto (pf == AF_INET) ? tcp4statsindex : tcp6statsindex; 2579258945Sroberto break; 2580258945Sroberto case isc_sockettype_unix: 2581258945Sroberto sock->statsindex = unixstatsindex; 2582258945Sroberto break; 2583258945Sroberto default: 2584258945Sroberto INSIST(0); 2585258945Sroberto } 2586258945Sroberto 2587258945Sroberto sock->pf = pf; 2588280849Scy 2589280849Scy result = opensocket(manager, sock, (isc__socket_t *)dup_socket); 2590258945Sroberto if (result != ISC_R_SUCCESS) { 2591258945Sroberto inc_stats(manager->stats, sock->statsindex[STATID_OPENFAIL]); 2592258945Sroberto free_socket(&sock); 2593258945Sroberto return (result); 2594258945Sroberto } 2595258945Sroberto 2596280849Scy sock->common.methods = (isc_socketmethods_t *)&socketmethods; 2597258945Sroberto sock->references = 1; 2598280849Scy *socketp = (isc_socket_t *)sock; 2599258945Sroberto 2600258945Sroberto /* 2601258945Sroberto * Note we don't have to lock the socket like we normally would because 2602258945Sroberto * there are no external references to it yet. 2603258945Sroberto */ 2604258945Sroberto 2605258945Sroberto lockid = FDLOCK_ID(sock->fd); 2606258945Sroberto LOCK(&manager->fdlock[lockid]); 2607258945Sroberto manager->fds[sock->fd] = sock; 2608258945Sroberto manager->fdstate[sock->fd] = MANAGED; 2609258945Sroberto#ifdef USE_DEVPOLL 2610258945Sroberto INSIST(sock->manager->fdpollinfo[sock->fd].want_read == 0 && 2611258945Sroberto sock->manager->fdpollinfo[sock->fd].want_write == 0); 2612258945Sroberto#endif 2613258945Sroberto UNLOCK(&manager->fdlock[lockid]); 2614258945Sroberto 2615258945Sroberto LOCK(&manager->lock); 2616258945Sroberto ISC_LIST_APPEND(manager->socklist, sock, link); 2617258945Sroberto#ifdef USE_SELECT 2618258945Sroberto if (manager->maxfd < sock->fd) 2619258945Sroberto manager->maxfd = sock->fd; 2620258945Sroberto#endif 2621258945Sroberto UNLOCK(&manager->lock); 2622258945Sroberto 2623258945Sroberto socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 2624280849Scy ISC_MSG_CREATED, dup_socket == NULL ? "dupped" : "created"); 2625258945Sroberto 2626258945Sroberto return (ISC_R_SUCCESS); 2627258945Sroberto} 2628258945Sroberto 2629280849Scy/*% 2630280849Scy * Create a new 'type' socket managed by 'manager'. Events 2631280849Scy * will be posted to 'task' and when dispatched 'action' will be 2632280849Scy * called with 'arg' as the arg value. The new socket is returned 2633280849Scy * in 'socketp'. 2634280849Scy */ 2635280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 2636280849Scyisc__socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type, 2637280849Scy isc_socket_t **socketp) 2638280849Scy{ 2639280849Scy return (socket_create(manager0, pf, type, socketp, NULL)); 2640280849Scy} 2641280849Scy 2642280849Scy/*% 2643280849Scy * Duplicate an existing socket. The new socket is returned 2644280849Scy * in 'socketp'. 2645280849Scy */ 2646280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 2647280849Scyisc__socket_dup(isc_socket_t *sock0, isc_socket_t **socketp) { 2648280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 2649280849Scy 2650280849Scy REQUIRE(VALID_SOCKET(sock)); 2651280849Scy REQUIRE(socketp != NULL && *socketp == NULL); 2652280849Scy 2653280849Scy return (socket_create((isc_socketmgr_t *) sock->manager, 2654280849Scy sock->pf, sock->type, socketp, 2655280849Scy sock0)); 2656280849Scy} 2657280849Scy 2658280849Scy#ifdef BIND9 2659280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 2660280849Scyisc__socket_open(isc_socket_t *sock0) { 2661258945Sroberto isc_result_t result; 2662280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 2663258945Sroberto 2664258945Sroberto REQUIRE(VALID_SOCKET(sock)); 2665258945Sroberto 2666258945Sroberto LOCK(&sock->lock); 2667258945Sroberto REQUIRE(sock->references == 1); 2668258945Sroberto REQUIRE(sock->type != isc_sockettype_fdwatch); 2669258945Sroberto UNLOCK(&sock->lock); 2670258945Sroberto /* 2671258945Sroberto * We don't need to retain the lock hereafter, since no one else has 2672258945Sroberto * this socket. 2673258945Sroberto */ 2674258945Sroberto REQUIRE(sock->fd == -1); 2675258945Sroberto 2676280849Scy result = opensocket(sock->manager, sock, NULL); 2677258945Sroberto if (result != ISC_R_SUCCESS) 2678258945Sroberto sock->fd = -1; 2679258945Sroberto 2680258945Sroberto if (result == ISC_R_SUCCESS) { 2681258945Sroberto int lockid = FDLOCK_ID(sock->fd); 2682258945Sroberto 2683258945Sroberto LOCK(&sock->manager->fdlock[lockid]); 2684258945Sroberto sock->manager->fds[sock->fd] = sock; 2685258945Sroberto sock->manager->fdstate[sock->fd] = MANAGED; 2686258945Sroberto#ifdef USE_DEVPOLL 2687258945Sroberto INSIST(sock->manager->fdpollinfo[sock->fd].want_read == 0 && 2688258945Sroberto sock->manager->fdpollinfo[sock->fd].want_write == 0); 2689258945Sroberto#endif 2690258945Sroberto UNLOCK(&sock->manager->fdlock[lockid]); 2691258945Sroberto 2692258945Sroberto#ifdef USE_SELECT 2693258945Sroberto LOCK(&sock->manager->lock); 2694258945Sroberto if (sock->manager->maxfd < sock->fd) 2695258945Sroberto sock->manager->maxfd = sock->fd; 2696258945Sroberto UNLOCK(&sock->manager->lock); 2697258945Sroberto#endif 2698258945Sroberto } 2699258945Sroberto 2700258945Sroberto return (result); 2701258945Sroberto} 2702280849Scy#endif /* BIND9 */ 2703258945Sroberto 2704258945Sroberto/* 2705258945Sroberto * Create a new 'type' socket managed by 'manager'. Events 2706258945Sroberto * will be posted to 'task' and when dispatched 'action' will be 2707258945Sroberto * called with 'arg' as the arg value. The new socket is returned 2708258945Sroberto * in 'socketp'. 2709258945Sroberto */ 2710280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 2711280849Scyisc__socket_fdwatchcreate(isc_socketmgr_t *manager0, int fd, int flags, 2712280849Scy isc_sockfdwatch_t callback, void *cbarg, 2713280849Scy isc_task_t *task, isc_socket_t **socketp) 2714258945Sroberto{ 2715280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 2716280849Scy isc__socket_t *sock = NULL; 2717258945Sroberto isc_result_t result; 2718258945Sroberto int lockid; 2719258945Sroberto 2720258945Sroberto REQUIRE(VALID_MANAGER(manager)); 2721258945Sroberto REQUIRE(socketp != NULL && *socketp == NULL); 2722258945Sroberto 2723258945Sroberto result = allocate_socket(manager, isc_sockettype_fdwatch, &sock); 2724258945Sroberto if (result != ISC_R_SUCCESS) 2725258945Sroberto return (result); 2726258945Sroberto 2727258945Sroberto sock->fd = fd; 2728258945Sroberto sock->fdwatcharg = cbarg; 2729258945Sroberto sock->fdwatchcb = callback; 2730258945Sroberto sock->fdwatchflags = flags; 2731258945Sroberto sock->fdwatchtask = task; 2732258945Sroberto sock->statsindex = fdwatchstatsindex; 2733258945Sroberto 2734280849Scy sock->common.methods = (isc_socketmethods_t *)&socketmethods; 2735258945Sroberto sock->references = 1; 2736280849Scy *socketp = (isc_socket_t *)sock; 2737258945Sroberto 2738258945Sroberto /* 2739258945Sroberto * Note we don't have to lock the socket like we normally would because 2740258945Sroberto * there are no external references to it yet. 2741258945Sroberto */ 2742258945Sroberto 2743258945Sroberto lockid = FDLOCK_ID(sock->fd); 2744258945Sroberto LOCK(&manager->fdlock[lockid]); 2745258945Sroberto manager->fds[sock->fd] = sock; 2746258945Sroberto manager->fdstate[sock->fd] = MANAGED; 2747258945Sroberto UNLOCK(&manager->fdlock[lockid]); 2748258945Sroberto 2749258945Sroberto LOCK(&manager->lock); 2750258945Sroberto ISC_LIST_APPEND(manager->socklist, sock, link); 2751258945Sroberto#ifdef USE_SELECT 2752258945Sroberto if (manager->maxfd < sock->fd) 2753258945Sroberto manager->maxfd = sock->fd; 2754258945Sroberto#endif 2755258945Sroberto UNLOCK(&manager->lock); 2756258945Sroberto 2757258945Sroberto if (flags & ISC_SOCKFDWATCH_READ) 2758258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 2759258945Sroberto if (flags & ISC_SOCKFDWATCH_WRITE) 2760258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); 2761258945Sroberto 2762258945Sroberto socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 2763258945Sroberto ISC_MSG_CREATED, "fdwatch-created"); 2764258945Sroberto 2765258945Sroberto return (ISC_R_SUCCESS); 2766258945Sroberto} 2767258945Sroberto 2768258945Sroberto/* 2769280849Scy * Indicate to the manager that it should watch the socket again. 2770280849Scy * This can be used to restart watching if the previous event handler 2771280849Scy * didn't indicate there was more data to be processed. Primarily 2772280849Scy * it is for writing but could be used for reading if desired 2773280849Scy */ 2774280849Scy 2775280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 2776280849Scyisc__socket_fdwatchpoke(isc_socket_t *sock0, int flags) 2777280849Scy{ 2778280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 2779280849Scy 2780280849Scy REQUIRE(VALID_SOCKET(sock)); 2781280849Scy 2782280849Scy /* 2783280849Scy * We check both flags first to allow us to get the lock 2784280849Scy * once but only if we need it. 2785280849Scy */ 2786280849Scy 2787280849Scy if ((flags & (ISC_SOCKFDWATCH_READ | ISC_SOCKFDWATCH_WRITE)) != 0) { 2788280849Scy LOCK(&sock->lock); 2789280849Scy if (((flags & ISC_SOCKFDWATCH_READ) != 0) && 2790280849Scy !sock->pending_recv) 2791280849Scy select_poke(sock->manager, sock->fd, 2792280849Scy SELECT_POKE_READ); 2793280849Scy if (((flags & ISC_SOCKFDWATCH_WRITE) != 0) && 2794280849Scy !sock->pending_send) 2795280849Scy select_poke(sock->manager, sock->fd, 2796280849Scy SELECT_POKE_WRITE); 2797280849Scy UNLOCK(&sock->lock); 2798280849Scy } 2799280849Scy 2800280849Scy socket_log(sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET, 2801280849Scy ISC_MSG_POKED, "fdwatch-poked flags: %d", flags); 2802280849Scy 2803280849Scy return (ISC_R_SUCCESS); 2804280849Scy} 2805280849Scy 2806280849Scy/* 2807258945Sroberto * Attach to a socket. Caller must explicitly detach when it is done. 2808258945Sroberto */ 2809280849ScyISC_SOCKETFUNC_SCOPE void 2810280849Scyisc__socket_attach(isc_socket_t *sock0, isc_socket_t **socketp) { 2811280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 2812280849Scy 2813258945Sroberto REQUIRE(VALID_SOCKET(sock)); 2814258945Sroberto REQUIRE(socketp != NULL && *socketp == NULL); 2815258945Sroberto 2816258945Sroberto LOCK(&sock->lock); 2817258945Sroberto sock->references++; 2818258945Sroberto UNLOCK(&sock->lock); 2819258945Sroberto 2820280849Scy *socketp = (isc_socket_t *)sock; 2821258945Sroberto} 2822258945Sroberto 2823258945Sroberto/* 2824258945Sroberto * Dereference a socket. If this is the last reference to it, clean things 2825258945Sroberto * up by destroying the socket. 2826258945Sroberto */ 2827280849ScyISC_SOCKETFUNC_SCOPE void 2828280849Scyisc__socket_detach(isc_socket_t **socketp) { 2829280849Scy isc__socket_t *sock; 2830258945Sroberto isc_boolean_t kill_socket = ISC_FALSE; 2831258945Sroberto 2832258945Sroberto REQUIRE(socketp != NULL); 2833280849Scy sock = (isc__socket_t *)*socketp; 2834258945Sroberto REQUIRE(VALID_SOCKET(sock)); 2835258945Sroberto 2836258945Sroberto LOCK(&sock->lock); 2837258945Sroberto REQUIRE(sock->references > 0); 2838258945Sroberto sock->references--; 2839258945Sroberto if (sock->references == 0) 2840258945Sroberto kill_socket = ISC_TRUE; 2841258945Sroberto UNLOCK(&sock->lock); 2842258945Sroberto 2843258945Sroberto if (kill_socket) 2844258945Sroberto destroy(&sock); 2845258945Sroberto 2846258945Sroberto *socketp = NULL; 2847258945Sroberto} 2848258945Sroberto 2849280849Scy#ifdef BIND9 2850280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 2851280849Scyisc__socket_close(isc_socket_t *sock0) { 2852280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 2853258945Sroberto int fd; 2854280849Scy isc__socketmgr_t *manager; 2855258945Sroberto 2856280849Scy fflush(stdout); 2857258945Sroberto REQUIRE(VALID_SOCKET(sock)); 2858258945Sroberto 2859258945Sroberto LOCK(&sock->lock); 2860258945Sroberto 2861258945Sroberto REQUIRE(sock->references == 1); 2862258945Sroberto REQUIRE(sock->type != isc_sockettype_fdwatch); 2863258945Sroberto REQUIRE(sock->fd >= 0 && sock->fd < (int)sock->manager->maxsocks); 2864258945Sroberto 2865258945Sroberto INSIST(!sock->connecting); 2866258945Sroberto INSIST(!sock->pending_recv); 2867258945Sroberto INSIST(!sock->pending_send); 2868258945Sroberto INSIST(!sock->pending_accept); 2869258945Sroberto INSIST(ISC_LIST_EMPTY(sock->recv_list)); 2870258945Sroberto INSIST(ISC_LIST_EMPTY(sock->send_list)); 2871258945Sroberto INSIST(ISC_LIST_EMPTY(sock->accept_list)); 2872258945Sroberto INSIST(sock->connect_ev == NULL); 2873258945Sroberto 2874258945Sroberto manager = sock->manager; 2875258945Sroberto fd = sock->fd; 2876258945Sroberto sock->fd = -1; 2877280849Scy sock->dupped = 0; 2878258945Sroberto memset(sock->name, 0, sizeof(sock->name)); 2879258945Sroberto sock->tag = NULL; 2880258945Sroberto sock->listener = 0; 2881258945Sroberto sock->connected = 0; 2882258945Sroberto sock->connecting = 0; 2883258945Sroberto sock->bound = 0; 2884258945Sroberto isc_sockaddr_any(&sock->peer_address); 2885258945Sroberto 2886258945Sroberto UNLOCK(&sock->lock); 2887258945Sroberto 2888258945Sroberto closesocket(manager, sock, fd); 2889258945Sroberto 2890258945Sroberto return (ISC_R_SUCCESS); 2891258945Sroberto} 2892280849Scy#endif /* BIND9 */ 2893258945Sroberto 2894258945Sroberto/* 2895258945Sroberto * I/O is possible on a given socket. Schedule an event to this task that 2896258945Sroberto * will call an internal function to do the I/O. This will charge the 2897258945Sroberto * task with the I/O operation and let our select loop handler get back 2898258945Sroberto * to doing something real as fast as possible. 2899258945Sroberto * 2900258945Sroberto * The socket and manager must be locked before calling this function. 2901258945Sroberto */ 2902258945Srobertostatic void 2903280849Scydispatch_recv(isc__socket_t *sock) { 2904258945Sroberto intev_t *iev; 2905258945Sroberto isc_socketevent_t *ev; 2906258945Sroberto isc_task_t *sender; 2907258945Sroberto 2908258945Sroberto INSIST(!sock->pending_recv); 2909258945Sroberto 2910258945Sroberto if (sock->type != isc_sockettype_fdwatch) { 2911258945Sroberto ev = ISC_LIST_HEAD(sock->recv_list); 2912258945Sroberto if (ev == NULL) 2913258945Sroberto return; 2914258945Sroberto socket_log(sock, NULL, EVENT, NULL, 0, 0, 2915258945Sroberto "dispatch_recv: event %p -> task %p", 2916258945Sroberto ev, ev->ev_sender); 2917258945Sroberto sender = ev->ev_sender; 2918258945Sroberto } else { 2919258945Sroberto sender = sock->fdwatchtask; 2920258945Sroberto } 2921258945Sroberto 2922258945Sroberto sock->pending_recv = 1; 2923258945Sroberto iev = &sock->readable_ev; 2924258945Sroberto 2925258945Sroberto sock->references++; 2926258945Sroberto iev->ev_sender = sock; 2927258945Sroberto if (sock->type == isc_sockettype_fdwatch) 2928258945Sroberto iev->ev_action = internal_fdwatch_read; 2929258945Sroberto else 2930258945Sroberto iev->ev_action = internal_recv; 2931258945Sroberto iev->ev_arg = sock; 2932258945Sroberto 2933258945Sroberto isc_task_send(sender, (isc_event_t **)&iev); 2934258945Sroberto} 2935258945Sroberto 2936258945Srobertostatic void 2937280849Scydispatch_send(isc__socket_t *sock) { 2938258945Sroberto intev_t *iev; 2939258945Sroberto isc_socketevent_t *ev; 2940258945Sroberto isc_task_t *sender; 2941258945Sroberto 2942258945Sroberto INSIST(!sock->pending_send); 2943258945Sroberto 2944258945Sroberto if (sock->type != isc_sockettype_fdwatch) { 2945258945Sroberto ev = ISC_LIST_HEAD(sock->send_list); 2946258945Sroberto if (ev == NULL) 2947258945Sroberto return; 2948258945Sroberto socket_log(sock, NULL, EVENT, NULL, 0, 0, 2949258945Sroberto "dispatch_send: event %p -> task %p", 2950258945Sroberto ev, ev->ev_sender); 2951258945Sroberto sender = ev->ev_sender; 2952258945Sroberto } else { 2953258945Sroberto sender = sock->fdwatchtask; 2954258945Sroberto } 2955258945Sroberto 2956258945Sroberto sock->pending_send = 1; 2957258945Sroberto iev = &sock->writable_ev; 2958258945Sroberto 2959258945Sroberto sock->references++; 2960258945Sroberto iev->ev_sender = sock; 2961258945Sroberto if (sock->type == isc_sockettype_fdwatch) 2962258945Sroberto iev->ev_action = internal_fdwatch_write; 2963258945Sroberto else 2964258945Sroberto iev->ev_action = internal_send; 2965258945Sroberto iev->ev_arg = sock; 2966258945Sroberto 2967258945Sroberto isc_task_send(sender, (isc_event_t **)&iev); 2968258945Sroberto} 2969258945Sroberto 2970258945Sroberto/* 2971258945Sroberto * Dispatch an internal accept event. 2972258945Sroberto */ 2973258945Srobertostatic void 2974280849Scydispatch_accept(isc__socket_t *sock) { 2975258945Sroberto intev_t *iev; 2976258945Sroberto isc_socket_newconnev_t *ev; 2977258945Sroberto 2978258945Sroberto INSIST(!sock->pending_accept); 2979258945Sroberto 2980258945Sroberto /* 2981258945Sroberto * Are there any done events left, or were they all canceled 2982258945Sroberto * before the manager got the socket lock? 2983258945Sroberto */ 2984258945Sroberto ev = ISC_LIST_HEAD(sock->accept_list); 2985258945Sroberto if (ev == NULL) 2986258945Sroberto return; 2987258945Sroberto 2988258945Sroberto sock->pending_accept = 1; 2989258945Sroberto iev = &sock->readable_ev; 2990258945Sroberto 2991258945Sroberto sock->references++; /* keep socket around for this internal event */ 2992258945Sroberto iev->ev_sender = sock; 2993258945Sroberto iev->ev_action = internal_accept; 2994258945Sroberto iev->ev_arg = sock; 2995258945Sroberto 2996258945Sroberto isc_task_send(ev->ev_sender, (isc_event_t **)&iev); 2997258945Sroberto} 2998258945Sroberto 2999258945Srobertostatic void 3000280849Scydispatch_connect(isc__socket_t *sock) { 3001258945Sroberto intev_t *iev; 3002258945Sroberto isc_socket_connev_t *ev; 3003258945Sroberto 3004258945Sroberto iev = &sock->writable_ev; 3005258945Sroberto 3006258945Sroberto ev = sock->connect_ev; 3007258945Sroberto INSIST(ev != NULL); /* XXX */ 3008258945Sroberto 3009258945Sroberto INSIST(sock->connecting); 3010258945Sroberto 3011258945Sroberto sock->references++; /* keep socket around for this internal event */ 3012258945Sroberto iev->ev_sender = sock; 3013258945Sroberto iev->ev_action = internal_connect; 3014258945Sroberto iev->ev_arg = sock; 3015258945Sroberto 3016258945Sroberto isc_task_send(ev->ev_sender, (isc_event_t **)&iev); 3017258945Sroberto} 3018258945Sroberto 3019258945Sroberto/* 3020258945Sroberto * Dequeue an item off the given socket's read queue, set the result code 3021258945Sroberto * in the done event to the one provided, and send it to the task it was 3022258945Sroberto * destined for. 3023258945Sroberto * 3024258945Sroberto * If the event to be sent is on a list, remove it before sending. If 3025258945Sroberto * asked to, send and detach from the socket as well. 3026258945Sroberto * 3027258945Sroberto * Caller must have the socket locked if the event is attached to the socket. 3028258945Sroberto */ 3029258945Srobertostatic void 3030280849Scysend_recvdone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 3031258945Sroberto isc_task_t *task; 3032258945Sroberto 3033258945Sroberto task = (*dev)->ev_sender; 3034258945Sroberto 3035258945Sroberto (*dev)->ev_sender = sock; 3036258945Sroberto 3037258945Sroberto if (ISC_LINK_LINKED(*dev, ev_link)) 3038258945Sroberto ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 3039258945Sroberto 3040258945Sroberto if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 3041258945Sroberto == ISC_SOCKEVENTATTR_ATTACHED) 3042258945Sroberto isc_task_sendanddetach(&task, (isc_event_t **)dev); 3043258945Sroberto else 3044258945Sroberto isc_task_send(task, (isc_event_t **)dev); 3045258945Sroberto} 3046258945Sroberto 3047258945Sroberto/* 3048258945Sroberto * See comments for send_recvdone_event() above. 3049258945Sroberto * 3050258945Sroberto * Caller must have the socket locked if the event is attached to the socket. 3051258945Sroberto */ 3052258945Srobertostatic void 3053280849Scysend_senddone_event(isc__socket_t *sock, isc_socketevent_t **dev) { 3054258945Sroberto isc_task_t *task; 3055258945Sroberto 3056258945Sroberto INSIST(dev != NULL && *dev != NULL); 3057258945Sroberto 3058258945Sroberto task = (*dev)->ev_sender; 3059258945Sroberto (*dev)->ev_sender = sock; 3060258945Sroberto 3061258945Sroberto if (ISC_LINK_LINKED(*dev, ev_link)) 3062258945Sroberto ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 3063258945Sroberto 3064258945Sroberto if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 3065258945Sroberto == ISC_SOCKEVENTATTR_ATTACHED) 3066258945Sroberto isc_task_sendanddetach(&task, (isc_event_t **)dev); 3067258945Sroberto else 3068258945Sroberto isc_task_send(task, (isc_event_t **)dev); 3069258945Sroberto} 3070258945Sroberto 3071258945Sroberto/* 3072258945Sroberto * Call accept() on a socket, to get the new file descriptor. The listen 3073258945Sroberto * socket is used as a prototype to create a new isc_socket_t. The new 3074258945Sroberto * socket has one outstanding reference. The task receiving the event 3075258945Sroberto * will be detached from just after the event is delivered. 3076258945Sroberto * 3077258945Sroberto * On entry to this function, the event delivered is the internal 3078258945Sroberto * readable event, and the first item on the accept_list should be 3079258945Sroberto * the done event we want to send. If the list is empty, this is a no-op, 3080258945Sroberto * so just unlock and return. 3081258945Sroberto */ 3082258945Srobertostatic void 3083258945Srobertointernal_accept(isc_task_t *me, isc_event_t *ev) { 3084280849Scy isc__socket_t *sock; 3085280849Scy isc__socketmgr_t *manager; 3086258945Sroberto isc_socket_newconnev_t *dev; 3087258945Sroberto isc_task_t *task; 3088258945Sroberto ISC_SOCKADDR_LEN_T addrlen; 3089258945Sroberto int fd; 3090258945Sroberto isc_result_t result = ISC_R_SUCCESS; 3091258945Sroberto char strbuf[ISC_STRERRORSIZE]; 3092258945Sroberto const char *err = "accept"; 3093258945Sroberto 3094258945Sroberto UNUSED(me); 3095258945Sroberto 3096258945Sroberto sock = ev->ev_sender; 3097258945Sroberto INSIST(VALID_SOCKET(sock)); 3098258945Sroberto 3099258945Sroberto LOCK(&sock->lock); 3100258945Sroberto socket_log(sock, NULL, TRACE, 3101258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 3102258945Sroberto "internal_accept called, locked socket"); 3103258945Sroberto 3104258945Sroberto manager = sock->manager; 3105258945Sroberto INSIST(VALID_MANAGER(manager)); 3106258945Sroberto 3107258945Sroberto INSIST(sock->listener); 3108258945Sroberto INSIST(sock->pending_accept == 1); 3109258945Sroberto sock->pending_accept = 0; 3110258945Sroberto 3111258945Sroberto INSIST(sock->references > 0); 3112258945Sroberto sock->references--; /* the internal event is done with this socket */ 3113258945Sroberto if (sock->references == 0) { 3114258945Sroberto UNLOCK(&sock->lock); 3115258945Sroberto destroy(&sock); 3116258945Sroberto return; 3117258945Sroberto } 3118258945Sroberto 3119258945Sroberto /* 3120258945Sroberto * Get the first item off the accept list. 3121258945Sroberto * If it is empty, unlock the socket and return. 3122258945Sroberto */ 3123258945Sroberto dev = ISC_LIST_HEAD(sock->accept_list); 3124258945Sroberto if (dev == NULL) { 3125258945Sroberto UNLOCK(&sock->lock); 3126258945Sroberto return; 3127258945Sroberto } 3128258945Sroberto 3129258945Sroberto /* 3130258945Sroberto * Try to accept the new connection. If the accept fails with 3131258945Sroberto * EAGAIN or EINTR, simply poke the watcher to watch this socket 3132258945Sroberto * again. Also ignore ECONNRESET, which has been reported to 3133258945Sroberto * be spuriously returned on Linux 2.2.19 although it is not 3134258945Sroberto * a documented error for accept(). ECONNABORTED has been 3135258945Sroberto * reported for Solaris 8. The rest are thrown in not because 3136258945Sroberto * we have seen them but because they are ignored by other 3137258945Sroberto * daemons such as BIND 8 and Apache. 3138258945Sroberto */ 3139258945Sroberto 3140280849Scy addrlen = sizeof(NEWCONNSOCK(dev)->peer_address.type); 3141280849Scy memset(&NEWCONNSOCK(dev)->peer_address.type, 0, addrlen); 3142280849Scy fd = accept(sock->fd, &NEWCONNSOCK(dev)->peer_address.type.sa, 3143258945Sroberto (void *)&addrlen); 3144258945Sroberto 3145258945Sroberto#ifdef F_DUPFD 3146258945Sroberto /* 3147258945Sroberto * Leave a space for stdio to work in. 3148258945Sroberto */ 3149258945Sroberto if (fd >= 0 && fd < 20) { 3150258945Sroberto int new, tmp; 3151258945Sroberto new = fcntl(fd, F_DUPFD, 20); 3152258945Sroberto tmp = errno; 3153258945Sroberto (void)close(fd); 3154258945Sroberto errno = tmp; 3155258945Sroberto fd = new; 3156258945Sroberto err = "accept/fcntl"; 3157258945Sroberto } 3158258945Sroberto#endif 3159258945Sroberto 3160258945Sroberto if (fd < 0) { 3161258945Sroberto if (SOFT_ERROR(errno)) 3162258945Sroberto goto soft_error; 3163258945Sroberto switch (errno) { 3164258945Sroberto case ENFILE: 3165258945Sroberto case EMFILE: 3166258945Sroberto isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, 3167258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 3168258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 3169258945Sroberto ISC_MSG_TOOMANYFDS, 3170258945Sroberto "%s: too many open file descriptors", 3171258945Sroberto err); 3172258945Sroberto goto soft_error; 3173258945Sroberto 3174258945Sroberto case ENOBUFS: 3175258945Sroberto case ENOMEM: 3176258945Sroberto case ECONNRESET: 3177258945Sroberto case ECONNABORTED: 3178258945Sroberto case EHOSTUNREACH: 3179258945Sroberto case EHOSTDOWN: 3180258945Sroberto case ENETUNREACH: 3181258945Sroberto case ENETDOWN: 3182258945Sroberto case ECONNREFUSED: 3183258945Sroberto#ifdef EPROTO 3184258945Sroberto case EPROTO: 3185258945Sroberto#endif 3186258945Sroberto#ifdef ENONET 3187258945Sroberto case ENONET: 3188258945Sroberto#endif 3189258945Sroberto goto soft_error; 3190258945Sroberto default: 3191258945Sroberto break; 3192258945Sroberto } 3193258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 3194258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 3195258945Sroberto "internal_accept: %s() %s: %s", err, 3196258945Sroberto isc_msgcat_get(isc_msgcat, 3197258945Sroberto ISC_MSGSET_GENERAL, 3198258945Sroberto ISC_MSG_FAILED, 3199258945Sroberto "failed"), 3200258945Sroberto strbuf); 3201258945Sroberto fd = -1; 3202258945Sroberto result = ISC_R_UNEXPECTED; 3203258945Sroberto } else { 3204258945Sroberto if (addrlen == 0U) { 3205258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 3206258945Sroberto "internal_accept(): " 3207258945Sroberto "accept() failed to return " 3208258945Sroberto "remote address"); 3209258945Sroberto 3210258945Sroberto (void)close(fd); 3211258945Sroberto goto soft_error; 3212280849Scy } else if (NEWCONNSOCK(dev)->peer_address.type.sa.sa_family != 3213258945Sroberto sock->pf) 3214258945Sroberto { 3215258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 3216258945Sroberto "internal_accept(): " 3217258945Sroberto "accept() returned peer address " 3218258945Sroberto "family %u (expected %u)", 3219280849Scy NEWCONNSOCK(dev)->peer_address. 3220258945Sroberto type.sa.sa_family, 3221258945Sroberto sock->pf); 3222258945Sroberto (void)close(fd); 3223258945Sroberto goto soft_error; 3224258945Sroberto } else if (fd >= (int)manager->maxsocks) { 3225258945Sroberto isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, 3226258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 3227258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, 3228258945Sroberto ISC_MSG_TOOMANYFDS, 3229258945Sroberto "accept: " 3230258945Sroberto "file descriptor exceeds limit (%d/%u)", 3231258945Sroberto fd, manager->maxsocks); 3232258945Sroberto (void)close(fd); 3233258945Sroberto goto soft_error; 3234258945Sroberto } 3235258945Sroberto } 3236258945Sroberto 3237258945Sroberto if (fd != -1) { 3238280849Scy NEWCONNSOCK(dev)->peer_address.length = addrlen; 3239280849Scy NEWCONNSOCK(dev)->pf = sock->pf; 3240258945Sroberto } 3241258945Sroberto 3242258945Sroberto /* 3243258945Sroberto * Pull off the done event. 3244258945Sroberto */ 3245258945Sroberto ISC_LIST_UNLINK(sock->accept_list, dev, ev_link); 3246258945Sroberto 3247258945Sroberto /* 3248258945Sroberto * Poke watcher if there are more pending accepts. 3249258945Sroberto */ 3250258945Sroberto if (!ISC_LIST_EMPTY(sock->accept_list)) 3251258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_ACCEPT); 3252258945Sroberto 3253258945Sroberto UNLOCK(&sock->lock); 3254258945Sroberto 3255280849Scy if (fd != -1) { 3256280849Scy result = make_nonblock(fd); 3257280849Scy if (result != ISC_R_SUCCESS) { 3258280849Scy (void)close(fd); 3259280849Scy fd = -1; 3260280849Scy } 3261258945Sroberto } 3262258945Sroberto 3263258945Sroberto /* 3264258945Sroberto * -1 means the new socket didn't happen. 3265258945Sroberto */ 3266258945Sroberto if (fd != -1) { 3267258945Sroberto int lockid = FDLOCK_ID(fd); 3268258945Sroberto 3269258945Sroberto LOCK(&manager->fdlock[lockid]); 3270280849Scy manager->fds[fd] = NEWCONNSOCK(dev); 3271258945Sroberto manager->fdstate[fd] = MANAGED; 3272258945Sroberto UNLOCK(&manager->fdlock[lockid]); 3273258945Sroberto 3274258945Sroberto LOCK(&manager->lock); 3275280849Scy ISC_LIST_APPEND(manager->socklist, NEWCONNSOCK(dev), link); 3276258945Sroberto 3277280849Scy NEWCONNSOCK(dev)->fd = fd; 3278280849Scy NEWCONNSOCK(dev)->bound = 1; 3279280849Scy NEWCONNSOCK(dev)->connected = 1; 3280258945Sroberto 3281258945Sroberto /* 3282258945Sroberto * Save away the remote address 3283258945Sroberto */ 3284280849Scy dev->address = NEWCONNSOCK(dev)->peer_address; 3285258945Sroberto 3286258945Sroberto#ifdef USE_SELECT 3287258945Sroberto if (manager->maxfd < fd) 3288258945Sroberto manager->maxfd = fd; 3289258945Sroberto#endif 3290258945Sroberto 3291280849Scy socket_log(sock, &NEWCONNSOCK(dev)->peer_address, CREATION, 3292258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 3293258945Sroberto "accepted connection, new socket %p", 3294258945Sroberto dev->newsocket); 3295258945Sroberto 3296258945Sroberto UNLOCK(&manager->lock); 3297258945Sroberto 3298258945Sroberto inc_stats(manager->stats, sock->statsindex[STATID_ACCEPT]); 3299258945Sroberto } else { 3300258945Sroberto inc_stats(manager->stats, sock->statsindex[STATID_ACCEPTFAIL]); 3301280849Scy NEWCONNSOCK(dev)->references--; 3302280849Scy free_socket((isc__socket_t **)&dev->newsocket); 3303258945Sroberto } 3304258945Sroberto 3305258945Sroberto /* 3306258945Sroberto * Fill in the done event details and send it off. 3307258945Sroberto */ 3308258945Sroberto dev->result = result; 3309258945Sroberto task = dev->ev_sender; 3310258945Sroberto dev->ev_sender = sock; 3311258945Sroberto 3312258945Sroberto isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev)); 3313258945Sroberto return; 3314258945Sroberto 3315258945Sroberto soft_error: 3316258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_ACCEPT); 3317258945Sroberto UNLOCK(&sock->lock); 3318258945Sroberto 3319258945Sroberto inc_stats(manager->stats, sock->statsindex[STATID_ACCEPTFAIL]); 3320258945Sroberto return; 3321258945Sroberto} 3322258945Sroberto 3323258945Srobertostatic void 3324258945Srobertointernal_recv(isc_task_t *me, isc_event_t *ev) { 3325258945Sroberto isc_socketevent_t *dev; 3326280849Scy isc__socket_t *sock; 3327258945Sroberto 3328258945Sroberto INSIST(ev->ev_type == ISC_SOCKEVENT_INTR); 3329258945Sroberto 3330258945Sroberto sock = ev->ev_sender; 3331258945Sroberto INSIST(VALID_SOCKET(sock)); 3332258945Sroberto 3333258945Sroberto LOCK(&sock->lock); 3334258945Sroberto socket_log(sock, NULL, IOEVENT, 3335258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV, 3336258945Sroberto "internal_recv: task %p got event %p", me, ev); 3337258945Sroberto 3338258945Sroberto INSIST(sock->pending_recv == 1); 3339258945Sroberto sock->pending_recv = 0; 3340258945Sroberto 3341258945Sroberto INSIST(sock->references > 0); 3342258945Sroberto sock->references--; /* the internal event is done with this socket */ 3343258945Sroberto if (sock->references == 0) { 3344258945Sroberto UNLOCK(&sock->lock); 3345258945Sroberto destroy(&sock); 3346258945Sroberto return; 3347258945Sroberto } 3348258945Sroberto 3349258945Sroberto /* 3350258945Sroberto * Try to do as much I/O as possible on this socket. There are no 3351258945Sroberto * limits here, currently. 3352258945Sroberto */ 3353258945Sroberto dev = ISC_LIST_HEAD(sock->recv_list); 3354258945Sroberto while (dev != NULL) { 3355258945Sroberto switch (doio_recv(sock, dev)) { 3356258945Sroberto case DOIO_SOFT: 3357258945Sroberto goto poke; 3358258945Sroberto 3359258945Sroberto case DOIO_EOF: 3360258945Sroberto /* 3361258945Sroberto * read of 0 means the remote end was closed. 3362258945Sroberto * Run through the event queue and dispatch all 3363258945Sroberto * the events with an EOF result code. 3364258945Sroberto */ 3365258945Sroberto do { 3366258945Sroberto dev->result = ISC_R_EOF; 3367258945Sroberto send_recvdone_event(sock, &dev); 3368258945Sroberto dev = ISC_LIST_HEAD(sock->recv_list); 3369258945Sroberto } while (dev != NULL); 3370258945Sroberto goto poke; 3371258945Sroberto 3372258945Sroberto case DOIO_SUCCESS: 3373258945Sroberto case DOIO_HARD: 3374258945Sroberto send_recvdone_event(sock, &dev); 3375258945Sroberto break; 3376258945Sroberto } 3377258945Sroberto 3378258945Sroberto dev = ISC_LIST_HEAD(sock->recv_list); 3379258945Sroberto } 3380258945Sroberto 3381258945Sroberto poke: 3382258945Sroberto if (!ISC_LIST_EMPTY(sock->recv_list)) 3383258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 3384258945Sroberto 3385258945Sroberto UNLOCK(&sock->lock); 3386258945Sroberto} 3387258945Sroberto 3388258945Srobertostatic void 3389258945Srobertointernal_send(isc_task_t *me, isc_event_t *ev) { 3390258945Sroberto isc_socketevent_t *dev; 3391280849Scy isc__socket_t *sock; 3392258945Sroberto 3393258945Sroberto INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 3394258945Sroberto 3395258945Sroberto /* 3396258945Sroberto * Find out what socket this is and lock it. 3397258945Sroberto */ 3398280849Scy sock = (isc__socket_t *)ev->ev_sender; 3399258945Sroberto INSIST(VALID_SOCKET(sock)); 3400258945Sroberto 3401258945Sroberto LOCK(&sock->lock); 3402258945Sroberto socket_log(sock, NULL, IOEVENT, 3403258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND, 3404258945Sroberto "internal_send: task %p got event %p", me, ev); 3405258945Sroberto 3406258945Sroberto INSIST(sock->pending_send == 1); 3407258945Sroberto sock->pending_send = 0; 3408258945Sroberto 3409258945Sroberto INSIST(sock->references > 0); 3410258945Sroberto sock->references--; /* the internal event is done with this socket */ 3411258945Sroberto if (sock->references == 0) { 3412258945Sroberto UNLOCK(&sock->lock); 3413258945Sroberto destroy(&sock); 3414258945Sroberto return; 3415258945Sroberto } 3416258945Sroberto 3417258945Sroberto /* 3418258945Sroberto * Try to do as much I/O as possible on this socket. There are no 3419258945Sroberto * limits here, currently. 3420258945Sroberto */ 3421258945Sroberto dev = ISC_LIST_HEAD(sock->send_list); 3422258945Sroberto while (dev != NULL) { 3423258945Sroberto switch (doio_send(sock, dev)) { 3424258945Sroberto case DOIO_SOFT: 3425258945Sroberto goto poke; 3426258945Sroberto 3427258945Sroberto case DOIO_HARD: 3428258945Sroberto case DOIO_SUCCESS: 3429258945Sroberto send_senddone_event(sock, &dev); 3430258945Sroberto break; 3431258945Sroberto } 3432258945Sroberto 3433258945Sroberto dev = ISC_LIST_HEAD(sock->send_list); 3434258945Sroberto } 3435258945Sroberto 3436258945Sroberto poke: 3437258945Sroberto if (!ISC_LIST_EMPTY(sock->send_list)) 3438258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); 3439258945Sroberto 3440258945Sroberto UNLOCK(&sock->lock); 3441258945Sroberto} 3442258945Sroberto 3443258945Srobertostatic void 3444258945Srobertointernal_fdwatch_write(isc_task_t *me, isc_event_t *ev) { 3445280849Scy isc__socket_t *sock; 3446258945Sroberto int more_data; 3447258945Sroberto 3448258945Sroberto INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 3449258945Sroberto 3450258945Sroberto /* 3451258945Sroberto * Find out what socket this is and lock it. 3452258945Sroberto */ 3453280849Scy sock = (isc__socket_t *)ev->ev_sender; 3454258945Sroberto INSIST(VALID_SOCKET(sock)); 3455258945Sroberto 3456258945Sroberto LOCK(&sock->lock); 3457258945Sroberto socket_log(sock, NULL, IOEVENT, 3458258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND, 3459258945Sroberto "internal_fdwatch_write: task %p got event %p", me, ev); 3460258945Sroberto 3461258945Sroberto INSIST(sock->pending_send == 1); 3462258945Sroberto 3463258945Sroberto UNLOCK(&sock->lock); 3464280849Scy more_data = (sock->fdwatchcb)(me, (isc_socket_t *)sock, 3465280849Scy sock->fdwatcharg, ISC_SOCKFDWATCH_WRITE); 3466258945Sroberto LOCK(&sock->lock); 3467258945Sroberto 3468258945Sroberto sock->pending_send = 0; 3469258945Sroberto 3470258945Sroberto INSIST(sock->references > 0); 3471258945Sroberto sock->references--; /* the internal event is done with this socket */ 3472258945Sroberto if (sock->references == 0) { 3473258945Sroberto UNLOCK(&sock->lock); 3474258945Sroberto destroy(&sock); 3475258945Sroberto return; 3476258945Sroberto } 3477258945Sroberto 3478258945Sroberto if (more_data) 3479258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); 3480258945Sroberto 3481258945Sroberto UNLOCK(&sock->lock); 3482258945Sroberto} 3483258945Sroberto 3484258945Srobertostatic void 3485258945Srobertointernal_fdwatch_read(isc_task_t *me, isc_event_t *ev) { 3486280849Scy isc__socket_t *sock; 3487258945Sroberto int more_data; 3488258945Sroberto 3489258945Sroberto INSIST(ev->ev_type == ISC_SOCKEVENT_INTR); 3490258945Sroberto 3491258945Sroberto /* 3492258945Sroberto * Find out what socket this is and lock it. 3493258945Sroberto */ 3494280849Scy sock = (isc__socket_t *)ev->ev_sender; 3495258945Sroberto INSIST(VALID_SOCKET(sock)); 3496258945Sroberto 3497258945Sroberto LOCK(&sock->lock); 3498258945Sroberto socket_log(sock, NULL, IOEVENT, 3499258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV, 3500258945Sroberto "internal_fdwatch_read: task %p got event %p", me, ev); 3501258945Sroberto 3502258945Sroberto INSIST(sock->pending_recv == 1); 3503258945Sroberto 3504258945Sroberto UNLOCK(&sock->lock); 3505280849Scy more_data = (sock->fdwatchcb)(me, (isc_socket_t *)sock, 3506280849Scy sock->fdwatcharg, ISC_SOCKFDWATCH_READ); 3507258945Sroberto LOCK(&sock->lock); 3508258945Sroberto 3509258945Sroberto sock->pending_recv = 0; 3510258945Sroberto 3511258945Sroberto INSIST(sock->references > 0); 3512258945Sroberto sock->references--; /* the internal event is done with this socket */ 3513258945Sroberto if (sock->references == 0) { 3514258945Sroberto UNLOCK(&sock->lock); 3515258945Sroberto destroy(&sock); 3516258945Sroberto return; 3517258945Sroberto } 3518258945Sroberto 3519258945Sroberto if (more_data) 3520258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 3521258945Sroberto 3522258945Sroberto UNLOCK(&sock->lock); 3523258945Sroberto} 3524258945Sroberto 3525258945Sroberto/* 3526258945Sroberto * Process read/writes on each fd here. Avoid locking 3527258945Sroberto * and unlocking twice if both reads and writes are possible. 3528258945Sroberto */ 3529258945Srobertostatic void 3530280849Scyprocess_fd(isc__socketmgr_t *manager, int fd, isc_boolean_t readable, 3531258945Sroberto isc_boolean_t writeable) 3532258945Sroberto{ 3533280849Scy isc__socket_t *sock; 3534258945Sroberto isc_boolean_t unlock_sock; 3535258945Sroberto isc_boolean_t unwatch_read = ISC_FALSE, unwatch_write = ISC_FALSE; 3536258945Sroberto int lockid = FDLOCK_ID(fd); 3537258945Sroberto 3538258945Sroberto /* 3539258945Sroberto * If the socket is going to be closed, don't do more I/O. 3540258945Sroberto */ 3541258945Sroberto LOCK(&manager->fdlock[lockid]); 3542258945Sroberto if (manager->fdstate[fd] == CLOSE_PENDING) { 3543258945Sroberto UNLOCK(&manager->fdlock[lockid]); 3544258945Sroberto 3545258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 3546258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 3547258945Sroberto return; 3548258945Sroberto } 3549258945Sroberto 3550258945Sroberto sock = manager->fds[fd]; 3551258945Sroberto unlock_sock = ISC_FALSE; 3552258945Sroberto if (readable) { 3553258945Sroberto if (sock == NULL) { 3554258945Sroberto unwatch_read = ISC_TRUE; 3555258945Sroberto goto check_write; 3556258945Sroberto } 3557258945Sroberto unlock_sock = ISC_TRUE; 3558258945Sroberto LOCK(&sock->lock); 3559258945Sroberto if (!SOCK_DEAD(sock)) { 3560258945Sroberto if (sock->listener) 3561258945Sroberto dispatch_accept(sock); 3562258945Sroberto else 3563258945Sroberto dispatch_recv(sock); 3564258945Sroberto } 3565258945Sroberto unwatch_read = ISC_TRUE; 3566258945Sroberto } 3567258945Srobertocheck_write: 3568258945Sroberto if (writeable) { 3569258945Sroberto if (sock == NULL) { 3570258945Sroberto unwatch_write = ISC_TRUE; 3571258945Sroberto goto unlock_fd; 3572258945Sroberto } 3573258945Sroberto if (!unlock_sock) { 3574258945Sroberto unlock_sock = ISC_TRUE; 3575258945Sroberto LOCK(&sock->lock); 3576258945Sroberto } 3577258945Sroberto if (!SOCK_DEAD(sock)) { 3578258945Sroberto if (sock->connecting) 3579258945Sroberto dispatch_connect(sock); 3580258945Sroberto else 3581258945Sroberto dispatch_send(sock); 3582258945Sroberto } 3583258945Sroberto unwatch_write = ISC_TRUE; 3584258945Sroberto } 3585258945Sroberto if (unlock_sock) 3586258945Sroberto UNLOCK(&sock->lock); 3587258945Sroberto 3588258945Sroberto unlock_fd: 3589258945Sroberto UNLOCK(&manager->fdlock[lockid]); 3590258945Sroberto if (unwatch_read) 3591258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_READ); 3592258945Sroberto if (unwatch_write) 3593258945Sroberto (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); 3594258945Sroberto 3595258945Sroberto} 3596258945Sroberto 3597258945Sroberto#ifdef USE_KQUEUE 3598258945Srobertostatic isc_boolean_t 3599280849Scyprocess_fds(isc__socketmgr_t *manager, struct kevent *events, int nevents) { 3600258945Sroberto int i; 3601258945Sroberto isc_boolean_t readable, writable; 3602258945Sroberto isc_boolean_t done = ISC_FALSE; 3603280849Scy#ifdef USE_WATCHER_THREAD 3604258945Sroberto isc_boolean_t have_ctlevent = ISC_FALSE; 3605258945Sroberto#endif 3606258945Sroberto 3607258945Sroberto if (nevents == manager->nevents) { 3608258945Sroberto /* 3609258945Sroberto * This is not an error, but something unexpected. If this 3610258945Sroberto * happens, it may indicate the need for increasing 3611258945Sroberto * ISC_SOCKET_MAXEVENTS. 3612258945Sroberto */ 3613258945Sroberto manager_log(manager, ISC_LOGCATEGORY_GENERAL, 3614258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, 3615258945Sroberto "maximum number of FD events (%d) received", 3616258945Sroberto nevents); 3617258945Sroberto } 3618258945Sroberto 3619258945Sroberto for (i = 0; i < nevents; i++) { 3620258945Sroberto REQUIRE(events[i].ident < manager->maxsocks); 3621280849Scy#ifdef USE_WATCHER_THREAD 3622258945Sroberto if (events[i].ident == (uintptr_t)manager->pipe_fds[0]) { 3623258945Sroberto have_ctlevent = ISC_TRUE; 3624258945Sroberto continue; 3625258945Sroberto } 3626258945Sroberto#endif 3627258945Sroberto readable = ISC_TF(events[i].filter == EVFILT_READ); 3628258945Sroberto writable = ISC_TF(events[i].filter == EVFILT_WRITE); 3629258945Sroberto process_fd(manager, events[i].ident, readable, writable); 3630258945Sroberto } 3631258945Sroberto 3632280849Scy#ifdef USE_WATCHER_THREAD 3633258945Sroberto if (have_ctlevent) 3634258945Sroberto done = process_ctlfd(manager); 3635258945Sroberto#endif 3636258945Sroberto 3637258945Sroberto return (done); 3638258945Sroberto} 3639258945Sroberto#elif defined(USE_EPOLL) 3640258945Srobertostatic isc_boolean_t 3641280849Scyprocess_fds(isc__socketmgr_t *manager, struct epoll_event *events, int nevents) 3642280849Scy{ 3643258945Sroberto int i; 3644258945Sroberto isc_boolean_t done = ISC_FALSE; 3645280849Scy#ifdef USE_WATCHER_THREAD 3646258945Sroberto isc_boolean_t have_ctlevent = ISC_FALSE; 3647258945Sroberto#endif 3648258945Sroberto 3649258945Sroberto if (nevents == manager->nevents) { 3650258945Sroberto manager_log(manager, ISC_LOGCATEGORY_GENERAL, 3651258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, 3652258945Sroberto "maximum number of FD events (%d) received", 3653258945Sroberto nevents); 3654258945Sroberto } 3655258945Sroberto 3656258945Sroberto for (i = 0; i < nevents; i++) { 3657258945Sroberto REQUIRE(events[i].data.fd < (int)manager->maxsocks); 3658280849Scy#ifdef USE_WATCHER_THREAD 3659258945Sroberto if (events[i].data.fd == manager->pipe_fds[0]) { 3660258945Sroberto have_ctlevent = ISC_TRUE; 3661258945Sroberto continue; 3662258945Sroberto } 3663258945Sroberto#endif 3664258945Sroberto if ((events[i].events & EPOLLERR) != 0 || 3665258945Sroberto (events[i].events & EPOLLHUP) != 0) { 3666258945Sroberto /* 3667258945Sroberto * epoll does not set IN/OUT bits on an erroneous 3668258945Sroberto * condition, so we need to try both anyway. This is a 3669258945Sroberto * bit inefficient, but should be okay for such rare 3670258945Sroberto * events. Note also that the read or write attempt 3671258945Sroberto * won't block because we use non-blocking sockets. 3672258945Sroberto */ 3673258945Sroberto events[i].events |= (EPOLLIN | EPOLLOUT); 3674258945Sroberto } 3675258945Sroberto process_fd(manager, events[i].data.fd, 3676258945Sroberto (events[i].events & EPOLLIN) != 0, 3677258945Sroberto (events[i].events & EPOLLOUT) != 0); 3678258945Sroberto } 3679258945Sroberto 3680280849Scy#ifdef USE_WATCHER_THREAD 3681258945Sroberto if (have_ctlevent) 3682258945Sroberto done = process_ctlfd(manager); 3683258945Sroberto#endif 3684258945Sroberto 3685258945Sroberto return (done); 3686258945Sroberto} 3687258945Sroberto#elif defined(USE_DEVPOLL) 3688258945Srobertostatic isc_boolean_t 3689280849Scyprocess_fds(isc__socketmgr_t *manager, struct pollfd *events, int nevents) { 3690258945Sroberto int i; 3691258945Sroberto isc_boolean_t done = ISC_FALSE; 3692280849Scy#ifdef USE_WATCHER_THREAD 3693258945Sroberto isc_boolean_t have_ctlevent = ISC_FALSE; 3694258945Sroberto#endif 3695258945Sroberto 3696258945Sroberto if (nevents == manager->nevents) { 3697258945Sroberto manager_log(manager, ISC_LOGCATEGORY_GENERAL, 3698258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, 3699258945Sroberto "maximum number of FD events (%d) received", 3700258945Sroberto nevents); 3701258945Sroberto } 3702258945Sroberto 3703258945Sroberto for (i = 0; i < nevents; i++) { 3704258945Sroberto REQUIRE(events[i].fd < (int)manager->maxsocks); 3705280849Scy#ifdef USE_WATCHER_THREAD 3706258945Sroberto if (events[i].fd == manager->pipe_fds[0]) { 3707258945Sroberto have_ctlevent = ISC_TRUE; 3708258945Sroberto continue; 3709258945Sroberto } 3710258945Sroberto#endif 3711258945Sroberto process_fd(manager, events[i].fd, 3712258945Sroberto (events[i].events & POLLIN) != 0, 3713258945Sroberto (events[i].events & POLLOUT) != 0); 3714258945Sroberto } 3715258945Sroberto 3716280849Scy#ifdef USE_WATCHER_THREAD 3717258945Sroberto if (have_ctlevent) 3718258945Sroberto done = process_ctlfd(manager); 3719258945Sroberto#endif 3720258945Sroberto 3721258945Sroberto return (done); 3722258945Sroberto} 3723258945Sroberto#elif defined(USE_SELECT) 3724258945Srobertostatic void 3725280849Scyprocess_fds(isc__socketmgr_t *manager, int maxfd, fd_set *readfds, 3726280849Scy fd_set *writefds) 3727258945Sroberto{ 3728258945Sroberto int i; 3729258945Sroberto 3730258945Sroberto REQUIRE(maxfd <= (int)manager->maxsocks); 3731258945Sroberto 3732258945Sroberto for (i = 0; i < maxfd; i++) { 3733280849Scy#ifdef USE_WATCHER_THREAD 3734258945Sroberto if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1]) 3735258945Sroberto continue; 3736280849Scy#endif /* USE_WATCHER_THREAD */ 3737258945Sroberto process_fd(manager, i, FD_ISSET(i, readfds), 3738258945Sroberto FD_ISSET(i, writefds)); 3739258945Sroberto } 3740258945Sroberto} 3741258945Sroberto#endif 3742258945Sroberto 3743280849Scy#ifdef USE_WATCHER_THREAD 3744258945Srobertostatic isc_boolean_t 3745280849Scyprocess_ctlfd(isc__socketmgr_t *manager) { 3746258945Sroberto int msg, fd; 3747258945Sroberto 3748258945Sroberto for (;;) { 3749258945Sroberto select_readmsg(manager, &fd, &msg); 3750258945Sroberto 3751258945Sroberto manager_log(manager, IOEVENT, 3752258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 3753258945Sroberto ISC_MSG_WATCHERMSG, 3754258945Sroberto "watcher got message %d " 3755258945Sroberto "for socket %d"), msg, fd); 3756258945Sroberto 3757258945Sroberto /* 3758258945Sroberto * Nothing to read? 3759258945Sroberto */ 3760258945Sroberto if (msg == SELECT_POKE_NOTHING) 3761258945Sroberto break; 3762258945Sroberto 3763258945Sroberto /* 3764258945Sroberto * Handle shutdown message. We really should 3765258945Sroberto * jump out of this loop right away, but 3766258945Sroberto * it doesn't matter if we have to do a little 3767258945Sroberto * more work first. 3768258945Sroberto */ 3769258945Sroberto if (msg == SELECT_POKE_SHUTDOWN) 3770258945Sroberto return (ISC_TRUE); 3771258945Sroberto 3772258945Sroberto /* 3773258945Sroberto * This is a wakeup on a socket. Look 3774258945Sroberto * at the event queue for both read and write, 3775258945Sroberto * and decide if we need to watch on it now 3776258945Sroberto * or not. 3777258945Sroberto */ 3778258945Sroberto wakeup_socket(manager, fd, msg); 3779258945Sroberto } 3780258945Sroberto 3781258945Sroberto return (ISC_FALSE); 3782258945Sroberto} 3783258945Sroberto 3784258945Sroberto/* 3785258945Sroberto * This is the thread that will loop forever, always in a select or poll 3786258945Sroberto * call. 3787258945Sroberto * 3788258945Sroberto * When select returns something to do, track down what thread gets to do 3789258945Sroberto * this I/O and post the event to it. 3790258945Sroberto */ 3791258945Srobertostatic isc_threadresult_t 3792258945Srobertowatcher(void *uap) { 3793280849Scy isc__socketmgr_t *manager = uap; 3794258945Sroberto isc_boolean_t done; 3795258945Sroberto int cc; 3796258945Sroberto#ifdef USE_KQUEUE 3797258945Sroberto const char *fnname = "kevent()"; 3798258945Sroberto#elif defined (USE_EPOLL) 3799258945Sroberto const char *fnname = "epoll_wait()"; 3800258945Sroberto#elif defined(USE_DEVPOLL) 3801258945Sroberto const char *fnname = "ioctl(DP_POLL)"; 3802258945Sroberto struct dvpoll dvp; 3803258945Sroberto#elif defined (USE_SELECT) 3804258945Sroberto const char *fnname = "select()"; 3805258945Sroberto int maxfd; 3806280849Scy int ctlfd; 3807258945Sroberto#endif 3808258945Sroberto char strbuf[ISC_STRERRORSIZE]; 3809258945Sroberto#ifdef ISC_SOCKET_USE_POLLWATCH 3810258945Sroberto pollstate_t pollstate = poll_idle; 3811258945Sroberto#endif 3812258945Sroberto 3813280849Scy#if defined (USE_SELECT) 3814258945Sroberto /* 3815258945Sroberto * Get the control fd here. This will never change. 3816258945Sroberto */ 3817258945Sroberto ctlfd = manager->pipe_fds[0]; 3818280849Scy#endif 3819258945Sroberto done = ISC_FALSE; 3820258945Sroberto while (!done) { 3821258945Sroberto do { 3822258945Sroberto#ifdef USE_KQUEUE 3823258945Sroberto cc = kevent(manager->kqueue_fd, NULL, 0, 3824258945Sroberto manager->events, manager->nevents, NULL); 3825258945Sroberto#elif defined(USE_EPOLL) 3826258945Sroberto cc = epoll_wait(manager->epoll_fd, manager->events, 3827258945Sroberto manager->nevents, -1); 3828258945Sroberto#elif defined(USE_DEVPOLL) 3829258945Sroberto dvp.dp_fds = manager->events; 3830258945Sroberto dvp.dp_nfds = manager->nevents; 3831258945Sroberto#ifndef ISC_SOCKET_USE_POLLWATCH 3832258945Sroberto dvp.dp_timeout = -1; 3833258945Sroberto#else 3834258945Sroberto if (pollstate == poll_idle) 3835258945Sroberto dvp.dp_timeout = -1; 3836258945Sroberto else 3837258945Sroberto dvp.dp_timeout = ISC_SOCKET_POLLWATCH_TIMEOUT; 3838258945Sroberto#endif /* ISC_SOCKET_USE_POLLWATCH */ 3839258945Sroberto cc = ioctl(manager->devpoll_fd, DP_POLL, &dvp); 3840258945Sroberto#elif defined(USE_SELECT) 3841258945Sroberto LOCK(&manager->lock); 3842258945Sroberto memcpy(manager->read_fds_copy, manager->read_fds, 3843258945Sroberto manager->fd_bufsize); 3844258945Sroberto memcpy(manager->write_fds_copy, manager->write_fds, 3845258945Sroberto manager->fd_bufsize); 3846258945Sroberto maxfd = manager->maxfd + 1; 3847258945Sroberto UNLOCK(&manager->lock); 3848258945Sroberto 3849258945Sroberto cc = select(maxfd, manager->read_fds_copy, 3850258945Sroberto manager->write_fds_copy, NULL, NULL); 3851258945Sroberto#endif /* USE_KQUEUE */ 3852258945Sroberto 3853258945Sroberto if (cc < 0 && !SOFT_ERROR(errno)) { 3854258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 3855258945Sroberto FATAL_ERROR(__FILE__, __LINE__, 3856258945Sroberto "%s %s: %s", fnname, 3857258945Sroberto isc_msgcat_get(isc_msgcat, 3858258945Sroberto ISC_MSGSET_GENERAL, 3859258945Sroberto ISC_MSG_FAILED, 3860258945Sroberto "failed"), strbuf); 3861258945Sroberto } 3862258945Sroberto 3863258945Sroberto#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) 3864258945Sroberto if (cc == 0) { 3865258945Sroberto if (pollstate == poll_active) 3866258945Sroberto pollstate = poll_checking; 3867258945Sroberto else if (pollstate == poll_checking) 3868258945Sroberto pollstate = poll_idle; 3869258945Sroberto } else if (cc > 0) { 3870258945Sroberto if (pollstate == poll_checking) { 3871258945Sroberto /* 3872258945Sroberto * XXX: We'd like to use a more 3873258945Sroberto * verbose log level as it's actually an 3874258945Sroberto * unexpected event, but the kernel bug 3875258945Sroberto * reportedly happens pretty frequently 3876258945Sroberto * (and it can also be a false positive) 3877258945Sroberto * so it would be just too noisy. 3878258945Sroberto */ 3879258945Sroberto manager_log(manager, 3880258945Sroberto ISC_LOGCATEGORY_GENERAL, 3881258945Sroberto ISC_LOGMODULE_SOCKET, 3882258945Sroberto ISC_LOG_DEBUG(1), 3883258945Sroberto "unexpected POLL timeout"); 3884258945Sroberto } 3885258945Sroberto pollstate = poll_active; 3886258945Sroberto } 3887258945Sroberto#endif 3888258945Sroberto } while (cc < 0); 3889258945Sroberto 3890258945Sroberto#if defined(USE_KQUEUE) || defined (USE_EPOLL) || defined (USE_DEVPOLL) 3891258945Sroberto done = process_fds(manager, manager->events, cc); 3892258945Sroberto#elif defined(USE_SELECT) 3893258945Sroberto process_fds(manager, maxfd, manager->read_fds_copy, 3894258945Sroberto manager->write_fds_copy); 3895258945Sroberto 3896258945Sroberto /* 3897258945Sroberto * Process reads on internal, control fd. 3898258945Sroberto */ 3899258945Sroberto if (FD_ISSET(ctlfd, manager->read_fds_copy)) 3900258945Sroberto done = process_ctlfd(manager); 3901258945Sroberto#endif 3902258945Sroberto } 3903258945Sroberto 3904258945Sroberto manager_log(manager, TRACE, "%s", 3905258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 3906258945Sroberto ISC_MSG_EXITING, "watcher exiting")); 3907258945Sroberto 3908258945Sroberto return ((isc_threadresult_t)0); 3909258945Sroberto} 3910280849Scy#endif /* USE_WATCHER_THREAD */ 3911258945Sroberto 3912280849Scy#ifdef BIND9 3913280849ScyISC_SOCKETFUNC_SCOPE void 3914280849Scyisc__socketmgr_setreserved(isc_socketmgr_t *manager0, isc_uint32_t reserved) { 3915280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 3916258945Sroberto 3917258945Sroberto REQUIRE(VALID_MANAGER(manager)); 3918258945Sroberto 3919258945Sroberto manager->reserved = reserved; 3920258945Sroberto} 3921258945Sroberto 3922280849ScyISC_SOCKETFUNC_SCOPE void 3923280849Scyisc___socketmgr_maxudp(isc_socketmgr_t *manager0, int maxudp) { 3924280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 3925280849Scy 3926280849Scy REQUIRE(VALID_MANAGER(manager)); 3927280849Scy 3928280849Scy manager->maxudp = maxudp; 3929280849Scy} 3930280849Scy#endif /* BIND9 */ 3931280849Scy 3932258945Sroberto/* 3933258945Sroberto * Create a new socket manager. 3934258945Sroberto */ 3935258945Sroberto 3936258945Srobertostatic isc_result_t 3937280849Scysetup_watcher(isc_mem_t *mctx, isc__socketmgr_t *manager) { 3938258945Sroberto isc_result_t result; 3939258945Sroberto#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) 3940258945Sroberto char strbuf[ISC_STRERRORSIZE]; 3941258945Sroberto#endif 3942258945Sroberto 3943258945Sroberto#ifdef USE_KQUEUE 3944258945Sroberto manager->nevents = ISC_SOCKET_MAXEVENTS; 3945258945Sroberto manager->events = isc_mem_get(mctx, sizeof(struct kevent) * 3946258945Sroberto manager->nevents); 3947258945Sroberto if (manager->events == NULL) 3948258945Sroberto return (ISC_R_NOMEMORY); 3949258945Sroberto manager->kqueue_fd = kqueue(); 3950258945Sroberto if (manager->kqueue_fd == -1) { 3951258945Sroberto result = isc__errno2result(errno); 3952258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 3953258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 3954258945Sroberto "kqueue %s: %s", 3955258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 3956258945Sroberto ISC_MSG_FAILED, "failed"), 3957258945Sroberto strbuf); 3958258945Sroberto isc_mem_put(mctx, manager->events, 3959258945Sroberto sizeof(struct kevent) * manager->nevents); 3960258945Sroberto return (result); 3961258945Sroberto } 3962258945Sroberto 3963280849Scy#ifdef USE_WATCHER_THREAD 3964258945Sroberto result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); 3965258945Sroberto if (result != ISC_R_SUCCESS) { 3966258945Sroberto close(manager->kqueue_fd); 3967258945Sroberto isc_mem_put(mctx, manager->events, 3968258945Sroberto sizeof(struct kevent) * manager->nevents); 3969258945Sroberto return (result); 3970258945Sroberto } 3971280849Scy#endif /* USE_WATCHER_THREAD */ 3972258945Sroberto#elif defined(USE_EPOLL) 3973258945Sroberto manager->nevents = ISC_SOCKET_MAXEVENTS; 3974258945Sroberto manager->events = isc_mem_get(mctx, sizeof(struct epoll_event) * 3975258945Sroberto manager->nevents); 3976258945Sroberto if (manager->events == NULL) 3977258945Sroberto return (ISC_R_NOMEMORY); 3978258945Sroberto manager->epoll_fd = epoll_create(manager->nevents); 3979258945Sroberto if (manager->epoll_fd == -1) { 3980258945Sroberto result = isc__errno2result(errno); 3981258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 3982258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 3983258945Sroberto "epoll_create %s: %s", 3984258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 3985258945Sroberto ISC_MSG_FAILED, "failed"), 3986258945Sroberto strbuf); 3987258945Sroberto isc_mem_put(mctx, manager->events, 3988258945Sroberto sizeof(struct epoll_event) * manager->nevents); 3989258945Sroberto return (result); 3990258945Sroberto } 3991280849Scy#ifdef USE_WATCHER_THREAD 3992258945Sroberto result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); 3993258945Sroberto if (result != ISC_R_SUCCESS) { 3994258945Sroberto close(manager->epoll_fd); 3995258945Sroberto isc_mem_put(mctx, manager->events, 3996258945Sroberto sizeof(struct epoll_event) * manager->nevents); 3997258945Sroberto return (result); 3998258945Sroberto } 3999280849Scy#endif /* USE_WATCHER_THREAD */ 4000258945Sroberto#elif defined(USE_DEVPOLL) 4001258945Sroberto /* 4002258945Sroberto * XXXJT: /dev/poll seems to reject large numbers of events, 4003258945Sroberto * so we should be careful about redefining ISC_SOCKET_MAXEVENTS. 4004258945Sroberto */ 4005258945Sroberto manager->nevents = ISC_SOCKET_MAXEVENTS; 4006258945Sroberto manager->events = isc_mem_get(mctx, sizeof(struct pollfd) * 4007258945Sroberto manager->nevents); 4008258945Sroberto if (manager->events == NULL) 4009258945Sroberto return (ISC_R_NOMEMORY); 4010258945Sroberto /* 4011258945Sroberto * Note: fdpollinfo should be able to support all possible FDs, so 4012258945Sroberto * it must have maxsocks entries (not nevents). 4013258945Sroberto */ 4014258945Sroberto manager->fdpollinfo = isc_mem_get(mctx, sizeof(pollinfo_t) * 4015258945Sroberto manager->maxsocks); 4016258945Sroberto if (manager->fdpollinfo == NULL) { 4017258945Sroberto isc_mem_put(mctx, manager->events, 4018280849Scy sizeof(struct pollfd) * manager->nevents); 4019258945Sroberto return (ISC_R_NOMEMORY); 4020258945Sroberto } 4021258945Sroberto memset(manager->fdpollinfo, 0, sizeof(pollinfo_t) * manager->maxsocks); 4022258945Sroberto manager->devpoll_fd = open("/dev/poll", O_RDWR); 4023258945Sroberto if (manager->devpoll_fd == -1) { 4024258945Sroberto result = isc__errno2result(errno); 4025258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4026258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 4027258945Sroberto "open(/dev/poll) %s: %s", 4028258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 4029258945Sroberto ISC_MSG_FAILED, "failed"), 4030258945Sroberto strbuf); 4031258945Sroberto isc_mem_put(mctx, manager->events, 4032258945Sroberto sizeof(struct pollfd) * manager->nevents); 4033258945Sroberto isc_mem_put(mctx, manager->fdpollinfo, 4034258945Sroberto sizeof(pollinfo_t) * manager->maxsocks); 4035258945Sroberto return (result); 4036258945Sroberto } 4037280849Scy#ifdef USE_WATCHER_THREAD 4038258945Sroberto result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); 4039258945Sroberto if (result != ISC_R_SUCCESS) { 4040258945Sroberto close(manager->devpoll_fd); 4041258945Sroberto isc_mem_put(mctx, manager->events, 4042258945Sroberto sizeof(struct pollfd) * manager->nevents); 4043258945Sroberto isc_mem_put(mctx, manager->fdpollinfo, 4044258945Sroberto sizeof(pollinfo_t) * manager->maxsocks); 4045258945Sroberto return (result); 4046258945Sroberto } 4047280849Scy#endif /* USE_WATCHER_THREAD */ 4048258945Sroberto#elif defined(USE_SELECT) 4049258945Sroberto UNUSED(result); 4050258945Sroberto 4051258945Sroberto#if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE 4052258945Sroberto /* 4053258945Sroberto * Note: this code should also cover the case of MAXSOCKETS <= 4054258945Sroberto * FD_SETSIZE, but we separate the cases to avoid possible portability 4055258945Sroberto * issues regarding howmany() and the actual representation of fd_set. 4056258945Sroberto */ 4057258945Sroberto manager->fd_bufsize = howmany(manager->maxsocks, NFDBITS) * 4058258945Sroberto sizeof(fd_mask); 4059258945Sroberto#else 4060258945Sroberto manager->fd_bufsize = sizeof(fd_set); 4061258945Sroberto#endif 4062258945Sroberto 4063258945Sroberto manager->read_fds = NULL; 4064258945Sroberto manager->read_fds_copy = NULL; 4065258945Sroberto manager->write_fds = NULL; 4066258945Sroberto manager->write_fds_copy = NULL; 4067258945Sroberto 4068258945Sroberto manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize); 4069258945Sroberto if (manager->read_fds != NULL) 4070258945Sroberto manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); 4071258945Sroberto if (manager->read_fds_copy != NULL) 4072258945Sroberto manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize); 4073258945Sroberto if (manager->write_fds != NULL) { 4074258945Sroberto manager->write_fds_copy = isc_mem_get(mctx, 4075258945Sroberto manager->fd_bufsize); 4076258945Sroberto } 4077258945Sroberto if (manager->write_fds_copy == NULL) { 4078258945Sroberto if (manager->write_fds != NULL) { 4079258945Sroberto isc_mem_put(mctx, manager->write_fds, 4080258945Sroberto manager->fd_bufsize); 4081258945Sroberto } 4082258945Sroberto if (manager->read_fds_copy != NULL) { 4083258945Sroberto isc_mem_put(mctx, manager->read_fds_copy, 4084258945Sroberto manager->fd_bufsize); 4085258945Sroberto } 4086258945Sroberto if (manager->read_fds != NULL) { 4087258945Sroberto isc_mem_put(mctx, manager->read_fds, 4088258945Sroberto manager->fd_bufsize); 4089258945Sroberto } 4090258945Sroberto return (ISC_R_NOMEMORY); 4091258945Sroberto } 4092258945Sroberto memset(manager->read_fds, 0, manager->fd_bufsize); 4093258945Sroberto memset(manager->write_fds, 0, manager->fd_bufsize); 4094258945Sroberto 4095280849Scy#ifdef USE_WATCHER_THREAD 4096258945Sroberto (void)watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); 4097258945Sroberto manager->maxfd = manager->pipe_fds[0]; 4098280849Scy#else /* USE_WATCHER_THREAD */ 4099258945Sroberto manager->maxfd = 0; 4100280849Scy#endif /* USE_WATCHER_THREAD */ 4101258945Sroberto#endif /* USE_KQUEUE */ 4102258945Sroberto 4103258945Sroberto return (ISC_R_SUCCESS); 4104258945Sroberto} 4105258945Sroberto 4106258945Srobertostatic void 4107280849Scycleanup_watcher(isc_mem_t *mctx, isc__socketmgr_t *manager) { 4108280849Scy#ifdef USE_WATCHER_THREAD 4109258945Sroberto isc_result_t result; 4110258945Sroberto 4111258945Sroberto result = unwatch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); 4112258945Sroberto if (result != ISC_R_SUCCESS) { 4113258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 4114258945Sroberto "epoll_ctl(DEL) %s", 4115258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 4116258945Sroberto ISC_MSG_FAILED, "failed")); 4117258945Sroberto } 4118280849Scy#endif /* USE_WATCHER_THREAD */ 4119258945Sroberto 4120258945Sroberto#ifdef USE_KQUEUE 4121258945Sroberto close(manager->kqueue_fd); 4122258945Sroberto isc_mem_put(mctx, manager->events, 4123258945Sroberto sizeof(struct kevent) * manager->nevents); 4124258945Sroberto#elif defined(USE_EPOLL) 4125258945Sroberto close(manager->epoll_fd); 4126258945Sroberto isc_mem_put(mctx, manager->events, 4127258945Sroberto sizeof(struct epoll_event) * manager->nevents); 4128258945Sroberto#elif defined(USE_DEVPOLL) 4129258945Sroberto close(manager->devpoll_fd); 4130258945Sroberto isc_mem_put(mctx, manager->events, 4131258945Sroberto sizeof(struct pollfd) * manager->nevents); 4132258945Sroberto isc_mem_put(mctx, manager->fdpollinfo, 4133258945Sroberto sizeof(pollinfo_t) * manager->maxsocks); 4134258945Sroberto#elif defined(USE_SELECT) 4135258945Sroberto if (manager->read_fds != NULL) 4136258945Sroberto isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize); 4137258945Sroberto if (manager->read_fds_copy != NULL) 4138258945Sroberto isc_mem_put(mctx, manager->read_fds_copy, manager->fd_bufsize); 4139258945Sroberto if (manager->write_fds != NULL) 4140258945Sroberto isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize); 4141258945Sroberto if (manager->write_fds_copy != NULL) 4142258945Sroberto isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize); 4143258945Sroberto#endif /* USE_KQUEUE */ 4144258945Sroberto} 4145258945Sroberto 4146280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4147280849Scyisc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { 4148280849Scy return (isc__socketmgr_create2(mctx, managerp, 0)); 4149258945Sroberto} 4150258945Sroberto 4151280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4152280849Scyisc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, 4153280849Scy unsigned int maxsocks) 4154258945Sroberto{ 4155258945Sroberto int i; 4156280849Scy isc__socketmgr_t *manager; 4157280849Scy#ifdef USE_WATCHER_THREAD 4158258945Sroberto char strbuf[ISC_STRERRORSIZE]; 4159258945Sroberto#endif 4160258945Sroberto isc_result_t result; 4161258945Sroberto 4162258945Sroberto REQUIRE(managerp != NULL && *managerp == NULL); 4163258945Sroberto 4164280849Scy#ifdef USE_SHARED_MANAGER 4165258945Sroberto if (socketmgr != NULL) { 4166258945Sroberto /* Don't allow maxsocks to be updated */ 4167258945Sroberto if (maxsocks > 0 && socketmgr->maxsocks != maxsocks) 4168258945Sroberto return (ISC_R_EXISTS); 4169258945Sroberto 4170258945Sroberto socketmgr->refs++; 4171280849Scy *managerp = (isc_socketmgr_t *)socketmgr; 4172258945Sroberto return (ISC_R_SUCCESS); 4173258945Sroberto } 4174280849Scy#endif /* USE_SHARED_MANAGER */ 4175258945Sroberto 4176258945Sroberto if (maxsocks == 0) 4177258945Sroberto maxsocks = ISC_SOCKET_MAXSOCKETS; 4178258945Sroberto 4179258945Sroberto manager = isc_mem_get(mctx, sizeof(*manager)); 4180258945Sroberto if (manager == NULL) 4181258945Sroberto return (ISC_R_NOMEMORY); 4182258945Sroberto 4183258945Sroberto /* zero-clear so that necessary cleanup on failure will be easy */ 4184258945Sroberto memset(manager, 0, sizeof(*manager)); 4185258945Sroberto manager->maxsocks = maxsocks; 4186258945Sroberto manager->reserved = 0; 4187280849Scy manager->maxudp = 0; 4188258945Sroberto manager->fds = isc_mem_get(mctx, 4189280849Scy manager->maxsocks * sizeof(isc__socket_t *)); 4190258945Sroberto if (manager->fds == NULL) { 4191258945Sroberto result = ISC_R_NOMEMORY; 4192258945Sroberto goto free_manager; 4193258945Sroberto } 4194258945Sroberto manager->fdstate = isc_mem_get(mctx, manager->maxsocks * sizeof(int)); 4195258945Sroberto if (manager->fdstate == NULL) { 4196258945Sroberto result = ISC_R_NOMEMORY; 4197258945Sroberto goto free_manager; 4198258945Sroberto } 4199258945Sroberto manager->stats = NULL; 4200258945Sroberto 4201280849Scy manager->common.methods = &socketmgrmethods; 4202280849Scy manager->common.magic = ISCAPI_SOCKETMGR_MAGIC; 4203280849Scy manager->common.impmagic = SOCKET_MANAGER_MAGIC; 4204258945Sroberto manager->mctx = NULL; 4205258945Sroberto memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *)); 4206258945Sroberto ISC_LIST_INIT(manager->socklist); 4207258945Sroberto result = isc_mutex_init(&manager->lock); 4208258945Sroberto if (result != ISC_R_SUCCESS) 4209258945Sroberto goto free_manager; 4210258945Sroberto manager->fdlock = isc_mem_get(mctx, FDLOCK_COUNT * sizeof(isc_mutex_t)); 4211258945Sroberto if (manager->fdlock == NULL) { 4212258945Sroberto result = ISC_R_NOMEMORY; 4213258945Sroberto goto cleanup_lock; 4214258945Sroberto } 4215258945Sroberto for (i = 0; i < FDLOCK_COUNT; i++) { 4216258945Sroberto result = isc_mutex_init(&manager->fdlock[i]); 4217258945Sroberto if (result != ISC_R_SUCCESS) { 4218258945Sroberto while (--i >= 0) 4219258945Sroberto DESTROYLOCK(&manager->fdlock[i]); 4220258945Sroberto isc_mem_put(mctx, manager->fdlock, 4221258945Sroberto FDLOCK_COUNT * sizeof(isc_mutex_t)); 4222258945Sroberto manager->fdlock = NULL; 4223258945Sroberto goto cleanup_lock; 4224258945Sroberto } 4225258945Sroberto } 4226258945Sroberto 4227280849Scy#ifdef USE_WATCHER_THREAD 4228258945Sroberto if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) { 4229258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 4230258945Sroberto "isc_condition_init() %s", 4231258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 4232258945Sroberto ISC_MSG_FAILED, "failed")); 4233258945Sroberto result = ISC_R_UNEXPECTED; 4234258945Sroberto goto cleanup_lock; 4235258945Sroberto } 4236258945Sroberto 4237258945Sroberto /* 4238258945Sroberto * Create the special fds that will be used to wake up the 4239258945Sroberto * select/poll loop when something internal needs to be done. 4240258945Sroberto */ 4241258945Sroberto if (pipe(manager->pipe_fds) != 0) { 4242258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4243258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 4244258945Sroberto "pipe() %s: %s", 4245258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 4246258945Sroberto ISC_MSG_FAILED, "failed"), 4247258945Sroberto strbuf); 4248258945Sroberto result = ISC_R_UNEXPECTED; 4249258945Sroberto goto cleanup_condition; 4250258945Sroberto } 4251258945Sroberto 4252258945Sroberto RUNTIME_CHECK(make_nonblock(manager->pipe_fds[0]) == ISC_R_SUCCESS); 4253258945Sroberto#if 0 4254258945Sroberto RUNTIME_CHECK(make_nonblock(manager->pipe_fds[1]) == ISC_R_SUCCESS); 4255258945Sroberto#endif 4256280849Scy#endif /* USE_WATCHER_THREAD */ 4257280849Scy 4258280849Scy#ifdef USE_SHARED_MANAGER 4259258945Sroberto manager->refs = 1; 4260280849Scy#endif /* USE_SHARED_MANAGER */ 4261258945Sroberto 4262258945Sroberto /* 4263258945Sroberto * Set up initial state for the select loop 4264258945Sroberto */ 4265258945Sroberto result = setup_watcher(mctx, manager); 4266258945Sroberto if (result != ISC_R_SUCCESS) 4267258945Sroberto goto cleanup; 4268258945Sroberto memset(manager->fdstate, 0, manager->maxsocks * sizeof(int)); 4269280849Scy#ifdef USE_WATCHER_THREAD 4270258945Sroberto /* 4271258945Sroberto * Start up the select/poll thread. 4272258945Sroberto */ 4273258945Sroberto if (isc_thread_create(watcher, manager, &manager->watcher) != 4274258945Sroberto ISC_R_SUCCESS) { 4275258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 4276258945Sroberto "isc_thread_create() %s", 4277258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 4278258945Sroberto ISC_MSG_FAILED, "failed")); 4279258945Sroberto cleanup_watcher(mctx, manager); 4280258945Sroberto result = ISC_R_UNEXPECTED; 4281258945Sroberto goto cleanup; 4282258945Sroberto } 4283280849Scy#endif /* USE_WATCHER_THREAD */ 4284258945Sroberto isc_mem_attach(mctx, &manager->mctx); 4285258945Sroberto 4286280849Scy#ifdef USE_SHARED_MANAGER 4287258945Sroberto socketmgr = manager; 4288280849Scy#endif /* USE_SHARED_MANAGER */ 4289280849Scy *managerp = (isc_socketmgr_t *)manager; 4290258945Sroberto 4291258945Sroberto return (ISC_R_SUCCESS); 4292258945Sroberto 4293258945Srobertocleanup: 4294280849Scy#ifdef USE_WATCHER_THREAD 4295258945Sroberto (void)close(manager->pipe_fds[0]); 4296258945Sroberto (void)close(manager->pipe_fds[1]); 4297280849Scy#endif /* USE_WATCHER_THREAD */ 4298258945Sroberto 4299280849Scy#ifdef USE_WATCHER_THREAD 4300258945Srobertocleanup_condition: 4301258945Sroberto (void)isc_condition_destroy(&manager->shutdown_ok); 4302280849Scy#endif /* USE_WATCHER_THREAD */ 4303258945Sroberto 4304258945Sroberto 4305258945Srobertocleanup_lock: 4306258945Sroberto if (manager->fdlock != NULL) { 4307258945Sroberto for (i = 0; i < FDLOCK_COUNT; i++) 4308258945Sroberto DESTROYLOCK(&manager->fdlock[i]); 4309258945Sroberto } 4310258945Sroberto DESTROYLOCK(&manager->lock); 4311258945Sroberto 4312258945Srobertofree_manager: 4313258945Sroberto if (manager->fdlock != NULL) { 4314258945Sroberto isc_mem_put(mctx, manager->fdlock, 4315258945Sroberto FDLOCK_COUNT * sizeof(isc_mutex_t)); 4316258945Sroberto } 4317258945Sroberto if (manager->fdstate != NULL) { 4318258945Sroberto isc_mem_put(mctx, manager->fdstate, 4319258945Sroberto manager->maxsocks * sizeof(int)); 4320258945Sroberto } 4321258945Sroberto if (manager->fds != NULL) { 4322258945Sroberto isc_mem_put(mctx, manager->fds, 4323258945Sroberto manager->maxsocks * sizeof(isc_socket_t *)); 4324258945Sroberto } 4325258945Sroberto isc_mem_put(mctx, manager, sizeof(*manager)); 4326258945Sroberto 4327258945Sroberto return (result); 4328258945Sroberto} 4329258945Sroberto 4330280849Scy#ifdef BIND9 4331258945Srobertoisc_result_t 4332280849Scyisc__socketmgr_getmaxsockets(isc_socketmgr_t *manager0, unsigned int *nsockp) { 4333280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 4334258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4335258945Sroberto REQUIRE(nsockp != NULL); 4336258945Sroberto 4337258945Sroberto *nsockp = manager->maxsocks; 4338258945Sroberto 4339258945Sroberto return (ISC_R_SUCCESS); 4340258945Sroberto} 4341258945Sroberto 4342258945Srobertovoid 4343280849Scyisc__socketmgr_setstats(isc_socketmgr_t *manager0, isc_stats_t *stats) { 4344280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 4345280849Scy 4346258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4347258945Sroberto REQUIRE(ISC_LIST_EMPTY(manager->socklist)); 4348258945Sroberto REQUIRE(manager->stats == NULL); 4349258945Sroberto REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max); 4350258945Sroberto 4351258945Sroberto isc_stats_attach(stats, &manager->stats); 4352258945Sroberto} 4353280849Scy#endif 4354258945Sroberto 4355280849ScyISC_SOCKETFUNC_SCOPE void 4356280849Scyisc__socketmgr_destroy(isc_socketmgr_t **managerp) { 4357280849Scy isc__socketmgr_t *manager; 4358258945Sroberto int i; 4359258945Sroberto isc_mem_t *mctx; 4360258945Sroberto 4361258945Sroberto /* 4362258945Sroberto * Destroy a socket manager. 4363258945Sroberto */ 4364258945Sroberto 4365258945Sroberto REQUIRE(managerp != NULL); 4366280849Scy manager = (isc__socketmgr_t *)*managerp; 4367258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4368258945Sroberto 4369280849Scy#ifdef USE_SHARED_MANAGER 4370280849Scy manager->refs--; 4371280849Scy if (manager->refs > 0) { 4372258945Sroberto *managerp = NULL; 4373258945Sroberto return; 4374258945Sroberto } 4375280849Scy socketmgr = NULL; 4376280849Scy#endif /* USE_SHARED_MANAGER */ 4377258945Sroberto 4378258945Sroberto LOCK(&manager->lock); 4379258945Sroberto 4380258945Sroberto /* 4381258945Sroberto * Wait for all sockets to be destroyed. 4382258945Sroberto */ 4383258945Sroberto while (!ISC_LIST_EMPTY(manager->socklist)) { 4384280849Scy#ifdef USE_WATCHER_THREAD 4385258945Sroberto manager_log(manager, CREATION, "%s", 4386258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 4387258945Sroberto ISC_MSG_SOCKETSREMAIN, 4388258945Sroberto "sockets exist")); 4389258945Sroberto WAIT(&manager->shutdown_ok, &manager->lock); 4390280849Scy#else /* USE_WATCHER_THREAD */ 4391280849Scy UNLOCK(&manager->lock); 4392280849Scy isc__taskmgr_dispatch(NULL); 4393280849Scy LOCK(&manager->lock); 4394280849Scy#endif /* USE_WATCHER_THREAD */ 4395258945Sroberto } 4396258945Sroberto 4397258945Sroberto UNLOCK(&manager->lock); 4398258945Sroberto 4399258945Sroberto /* 4400258945Sroberto * Here, poke our select/poll thread. Do this by closing the write 4401258945Sroberto * half of the pipe, which will send EOF to the read half. 4402258945Sroberto * This is currently a no-op in the non-threaded case. 4403258945Sroberto */ 4404258945Sroberto select_poke(manager, 0, SELECT_POKE_SHUTDOWN); 4405258945Sroberto 4406280849Scy#ifdef USE_WATCHER_THREAD 4407258945Sroberto /* 4408258945Sroberto * Wait for thread to exit. 4409258945Sroberto */ 4410258945Sroberto if (isc_thread_join(manager->watcher, NULL) != ISC_R_SUCCESS) 4411258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 4412258945Sroberto "isc_thread_join() %s", 4413258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 4414258945Sroberto ISC_MSG_FAILED, "failed")); 4415280849Scy#endif /* USE_WATCHER_THREAD */ 4416258945Sroberto 4417258945Sroberto /* 4418258945Sroberto * Clean up. 4419258945Sroberto */ 4420258945Sroberto cleanup_watcher(manager->mctx, manager); 4421258945Sroberto 4422280849Scy#ifdef USE_WATCHER_THREAD 4423258945Sroberto (void)close(manager->pipe_fds[0]); 4424258945Sroberto (void)close(manager->pipe_fds[1]); 4425258945Sroberto (void)isc_condition_destroy(&manager->shutdown_ok); 4426280849Scy#endif /* USE_WATCHER_THREAD */ 4427258945Sroberto 4428258945Sroberto for (i = 0; i < (int)manager->maxsocks; i++) 4429258945Sroberto if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */ 4430258945Sroberto (void)close(i); 4431258945Sroberto 4432258945Sroberto isc_mem_put(manager->mctx, manager->fds, 4433280849Scy manager->maxsocks * sizeof(isc__socket_t *)); 4434258945Sroberto isc_mem_put(manager->mctx, manager->fdstate, 4435258945Sroberto manager->maxsocks * sizeof(int)); 4436258945Sroberto 4437258945Sroberto if (manager->stats != NULL) 4438258945Sroberto isc_stats_detach(&manager->stats); 4439258945Sroberto 4440258945Sroberto if (manager->fdlock != NULL) { 4441258945Sroberto for (i = 0; i < FDLOCK_COUNT; i++) 4442258945Sroberto DESTROYLOCK(&manager->fdlock[i]); 4443258945Sroberto isc_mem_put(manager->mctx, manager->fdlock, 4444258945Sroberto FDLOCK_COUNT * sizeof(isc_mutex_t)); 4445258945Sroberto } 4446258945Sroberto DESTROYLOCK(&manager->lock); 4447280849Scy manager->common.magic = 0; 4448280849Scy manager->common.impmagic = 0; 4449258945Sroberto mctx= manager->mctx; 4450258945Sroberto isc_mem_put(mctx, manager, sizeof(*manager)); 4451258945Sroberto 4452258945Sroberto isc_mem_detach(&mctx); 4453258945Sroberto 4454258945Sroberto *managerp = NULL; 4455280849Scy 4456280849Scy#ifdef USE_SHARED_MANAGER 4457280849Scy socketmgr = NULL; 4458280849Scy#endif 4459258945Sroberto} 4460258945Sroberto 4461258945Srobertostatic isc_result_t 4462280849Scysocket_recv(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 4463258945Sroberto unsigned int flags) 4464258945Sroberto{ 4465258945Sroberto int io_state; 4466258945Sroberto isc_boolean_t have_lock = ISC_FALSE; 4467258945Sroberto isc_task_t *ntask = NULL; 4468258945Sroberto isc_result_t result = ISC_R_SUCCESS; 4469258945Sroberto 4470258945Sroberto dev->ev_sender = task; 4471258945Sroberto 4472258945Sroberto if (sock->type == isc_sockettype_udp) { 4473258945Sroberto io_state = doio_recv(sock, dev); 4474258945Sroberto } else { 4475258945Sroberto LOCK(&sock->lock); 4476258945Sroberto have_lock = ISC_TRUE; 4477258945Sroberto 4478258945Sroberto if (ISC_LIST_EMPTY(sock->recv_list)) 4479258945Sroberto io_state = doio_recv(sock, dev); 4480258945Sroberto else 4481258945Sroberto io_state = DOIO_SOFT; 4482258945Sroberto } 4483258945Sroberto 4484258945Sroberto switch (io_state) { 4485258945Sroberto case DOIO_SOFT: 4486258945Sroberto /* 4487258945Sroberto * We couldn't read all or part of the request right now, so 4488258945Sroberto * queue it. 4489258945Sroberto * 4490258945Sroberto * Attach to socket and to task 4491258945Sroberto */ 4492258945Sroberto isc_task_attach(task, &ntask); 4493258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 4494258945Sroberto 4495258945Sroberto if (!have_lock) { 4496258945Sroberto LOCK(&sock->lock); 4497258945Sroberto have_lock = ISC_TRUE; 4498258945Sroberto } 4499258945Sroberto 4500258945Sroberto /* 4501258945Sroberto * Enqueue the request. If the socket was previously not being 4502258945Sroberto * watched, poke the watcher to start paying attention to it. 4503258945Sroberto */ 4504258945Sroberto if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv) 4505258945Sroberto select_poke(sock->manager, sock->fd, SELECT_POKE_READ); 4506258945Sroberto ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 4507258945Sroberto 4508258945Sroberto socket_log(sock, NULL, EVENT, NULL, 0, 0, 4509258945Sroberto "socket_recv: event %p -> task %p", 4510258945Sroberto dev, ntask); 4511258945Sroberto 4512258945Sroberto if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 4513258945Sroberto result = ISC_R_INPROGRESS; 4514258945Sroberto break; 4515258945Sroberto 4516258945Sroberto case DOIO_EOF: 4517258945Sroberto dev->result = ISC_R_EOF; 4518258945Sroberto /* fallthrough */ 4519258945Sroberto 4520258945Sroberto case DOIO_HARD: 4521258945Sroberto case DOIO_SUCCESS: 4522258945Sroberto if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 4523258945Sroberto send_recvdone_event(sock, &dev); 4524258945Sroberto break; 4525258945Sroberto } 4526258945Sroberto 4527258945Sroberto if (have_lock) 4528258945Sroberto UNLOCK(&sock->lock); 4529258945Sroberto 4530258945Sroberto return (result); 4531258945Sroberto} 4532258945Sroberto 4533280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4534280849Scyisc__socket_recvv(isc_socket_t *sock0, isc_bufferlist_t *buflist, 4535280849Scy unsigned int minimum, isc_task_t *task, 4536280849Scy isc_taskaction_t action, const void *arg) 4537258945Sroberto{ 4538280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 4539258945Sroberto isc_socketevent_t *dev; 4540280849Scy isc__socketmgr_t *manager; 4541258945Sroberto unsigned int iocount; 4542258945Sroberto isc_buffer_t *buffer; 4543258945Sroberto 4544258945Sroberto REQUIRE(VALID_SOCKET(sock)); 4545258945Sroberto REQUIRE(buflist != NULL); 4546258945Sroberto REQUIRE(!ISC_LIST_EMPTY(*buflist)); 4547258945Sroberto REQUIRE(task != NULL); 4548258945Sroberto REQUIRE(action != NULL); 4549258945Sroberto 4550258945Sroberto manager = sock->manager; 4551258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4552258945Sroberto 4553258945Sroberto iocount = isc_bufferlist_availablecount(buflist); 4554258945Sroberto REQUIRE(iocount > 0); 4555258945Sroberto 4556258945Sroberto INSIST(sock->bound); 4557258945Sroberto 4558258945Sroberto dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); 4559280849Scy if (dev == NULL) 4560258945Sroberto return (ISC_R_NOMEMORY); 4561258945Sroberto 4562258945Sroberto /* 4563258945Sroberto * UDP sockets are always partial read 4564258945Sroberto */ 4565258945Sroberto if (sock->type == isc_sockettype_udp) 4566258945Sroberto dev->minimum = 1; 4567258945Sroberto else { 4568258945Sroberto if (minimum == 0) 4569258945Sroberto dev->minimum = iocount; 4570258945Sroberto else 4571258945Sroberto dev->minimum = minimum; 4572258945Sroberto } 4573258945Sroberto 4574258945Sroberto /* 4575258945Sroberto * Move each buffer from the passed in list to our internal one. 4576258945Sroberto */ 4577258945Sroberto buffer = ISC_LIST_HEAD(*buflist); 4578258945Sroberto while (buffer != NULL) { 4579258945Sroberto ISC_LIST_DEQUEUE(*buflist, buffer, link); 4580258945Sroberto ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 4581258945Sroberto buffer = ISC_LIST_HEAD(*buflist); 4582258945Sroberto } 4583258945Sroberto 4584258945Sroberto return (socket_recv(sock, dev, task, 0)); 4585258945Sroberto} 4586258945Sroberto 4587280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4588280849Scyisc__socket_recv(isc_socket_t *sock0, isc_region_t *region, 4589280849Scy unsigned int minimum, isc_task_t *task, 4590280849Scy isc_taskaction_t action, const void *arg) 4591258945Sroberto{ 4592280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 4593258945Sroberto isc_socketevent_t *dev; 4594280849Scy isc__socketmgr_t *manager; 4595258945Sroberto 4596258945Sroberto REQUIRE(VALID_SOCKET(sock)); 4597258945Sroberto REQUIRE(action != NULL); 4598258945Sroberto 4599258945Sroberto manager = sock->manager; 4600258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4601258945Sroberto 4602258945Sroberto INSIST(sock->bound); 4603258945Sroberto 4604258945Sroberto dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); 4605258945Sroberto if (dev == NULL) 4606258945Sroberto return (ISC_R_NOMEMORY); 4607258945Sroberto 4608280849Scy return (isc__socket_recv2(sock0, region, minimum, task, dev, 0)); 4609258945Sroberto} 4610258945Sroberto 4611280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4612280849Scyisc__socket_recv2(isc_socket_t *sock0, isc_region_t *region, 4613280849Scy unsigned int minimum, isc_task_t *task, 4614280849Scy isc_socketevent_t *event, unsigned int flags) 4615258945Sroberto{ 4616280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 4617280849Scy 4618258945Sroberto event->ev_sender = sock; 4619280849Scy event->result = ISC_R_UNSET; 4620258945Sroberto ISC_LIST_INIT(event->bufferlist); 4621258945Sroberto event->region = *region; 4622258945Sroberto event->n = 0; 4623258945Sroberto event->offset = 0; 4624258945Sroberto event->attributes = 0; 4625258945Sroberto 4626258945Sroberto /* 4627258945Sroberto * UDP sockets are always partial read. 4628258945Sroberto */ 4629258945Sroberto if (sock->type == isc_sockettype_udp) 4630258945Sroberto event->minimum = 1; 4631258945Sroberto else { 4632258945Sroberto if (minimum == 0) 4633258945Sroberto event->minimum = region->length; 4634258945Sroberto else 4635258945Sroberto event->minimum = minimum; 4636258945Sroberto } 4637258945Sroberto 4638258945Sroberto return (socket_recv(sock, event, task, flags)); 4639258945Sroberto} 4640258945Sroberto 4641258945Srobertostatic isc_result_t 4642280849Scysocket_send(isc__socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 4643258945Sroberto isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 4644258945Sroberto unsigned int flags) 4645258945Sroberto{ 4646258945Sroberto int io_state; 4647258945Sroberto isc_boolean_t have_lock = ISC_FALSE; 4648258945Sroberto isc_task_t *ntask = NULL; 4649258945Sroberto isc_result_t result = ISC_R_SUCCESS; 4650258945Sroberto 4651258945Sroberto dev->ev_sender = task; 4652258945Sroberto 4653258945Sroberto set_dev_address(address, sock, dev); 4654258945Sroberto if (pktinfo != NULL) { 4655258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 4656258945Sroberto dev->pktinfo = *pktinfo; 4657258945Sroberto 4658258945Sroberto if (!isc_sockaddr_issitelocal(&dev->address) && 4659258945Sroberto !isc_sockaddr_islinklocal(&dev->address)) { 4660258945Sroberto socket_log(sock, NULL, TRACE, isc_msgcat, 4661258945Sroberto ISC_MSGSET_SOCKET, ISC_MSG_PKTINFOPROVIDED, 4662258945Sroberto "pktinfo structure provided, ifindex %u " 4663258945Sroberto "(set to 0)", pktinfo->ipi6_ifindex); 4664258945Sroberto 4665258945Sroberto /* 4666258945Sroberto * Set the pktinfo index to 0 here, to let the 4667258945Sroberto * kernel decide what interface it should send on. 4668258945Sroberto */ 4669258945Sroberto dev->pktinfo.ipi6_ifindex = 0; 4670258945Sroberto } 4671258945Sroberto } 4672258945Sroberto 4673258945Sroberto if (sock->type == isc_sockettype_udp) 4674258945Sroberto io_state = doio_send(sock, dev); 4675258945Sroberto else { 4676258945Sroberto LOCK(&sock->lock); 4677258945Sroberto have_lock = ISC_TRUE; 4678258945Sroberto 4679258945Sroberto if (ISC_LIST_EMPTY(sock->send_list)) 4680258945Sroberto io_state = doio_send(sock, dev); 4681258945Sroberto else 4682258945Sroberto io_state = DOIO_SOFT; 4683258945Sroberto } 4684258945Sroberto 4685258945Sroberto switch (io_state) { 4686258945Sroberto case DOIO_SOFT: 4687258945Sroberto /* 4688258945Sroberto * We couldn't send all or part of the request right now, so 4689258945Sroberto * queue it unless ISC_SOCKFLAG_NORETRY is set. 4690258945Sroberto */ 4691258945Sroberto if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { 4692258945Sroberto isc_task_attach(task, &ntask); 4693258945Sroberto dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 4694258945Sroberto 4695258945Sroberto if (!have_lock) { 4696258945Sroberto LOCK(&sock->lock); 4697258945Sroberto have_lock = ISC_TRUE; 4698258945Sroberto } 4699258945Sroberto 4700258945Sroberto /* 4701258945Sroberto * Enqueue the request. If the socket was previously 4702258945Sroberto * not being watched, poke the watcher to start 4703258945Sroberto * paying attention to it. 4704258945Sroberto */ 4705258945Sroberto if (ISC_LIST_EMPTY(sock->send_list) && 4706258945Sroberto !sock->pending_send) 4707258945Sroberto select_poke(sock->manager, sock->fd, 4708258945Sroberto SELECT_POKE_WRITE); 4709258945Sroberto ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 4710258945Sroberto 4711258945Sroberto socket_log(sock, NULL, EVENT, NULL, 0, 0, 4712258945Sroberto "socket_send: event %p -> task %p", 4713258945Sroberto dev, ntask); 4714258945Sroberto 4715258945Sroberto if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 4716258945Sroberto result = ISC_R_INPROGRESS; 4717258945Sroberto break; 4718258945Sroberto } 4719258945Sroberto 4720258945Sroberto case DOIO_HARD: 4721258945Sroberto case DOIO_SUCCESS: 4722258945Sroberto if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0) 4723258945Sroberto send_senddone_event(sock, &dev); 4724258945Sroberto break; 4725258945Sroberto } 4726258945Sroberto 4727258945Sroberto if (have_lock) 4728258945Sroberto UNLOCK(&sock->lock); 4729258945Sroberto 4730258945Sroberto return (result); 4731258945Sroberto} 4732258945Sroberto 4733280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4734280849Scyisc__socket_send(isc_socket_t *sock, isc_region_t *region, 4735280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg) 4736258945Sroberto{ 4737258945Sroberto /* 4738258945Sroberto * REQUIRE() checking is performed in isc_socket_sendto(). 4739258945Sroberto */ 4740280849Scy return (isc__socket_sendto(sock, region, task, action, arg, NULL, 4741280849Scy NULL)); 4742258945Sroberto} 4743258945Sroberto 4744280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4745280849Scyisc__socket_sendto(isc_socket_t *sock0, isc_region_t *region, 4746280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg, 4747280849Scy isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 4748258945Sroberto{ 4749280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 4750258945Sroberto isc_socketevent_t *dev; 4751280849Scy isc__socketmgr_t *manager; 4752258945Sroberto 4753258945Sroberto REQUIRE(VALID_SOCKET(sock)); 4754258945Sroberto REQUIRE(region != NULL); 4755258945Sroberto REQUIRE(task != NULL); 4756258945Sroberto REQUIRE(action != NULL); 4757258945Sroberto 4758258945Sroberto manager = sock->manager; 4759258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4760258945Sroberto 4761258945Sroberto INSIST(sock->bound); 4762258945Sroberto 4763258945Sroberto dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); 4764280849Scy if (dev == NULL) 4765258945Sroberto return (ISC_R_NOMEMORY); 4766258945Sroberto 4767258945Sroberto dev->region = *region; 4768258945Sroberto 4769258945Sroberto return (socket_send(sock, dev, task, address, pktinfo, 0)); 4770258945Sroberto} 4771258945Sroberto 4772280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4773280849Scyisc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 4774280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg) 4775258945Sroberto{ 4776280849Scy return (isc__socket_sendtov(sock, buflist, task, action, arg, NULL, 4777280849Scy NULL)); 4778258945Sroberto} 4779258945Sroberto 4780280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4781280849Scyisc__socket_sendtov(isc_socket_t *sock0, isc_bufferlist_t *buflist, 4782280849Scy isc_task_t *task, isc_taskaction_t action, const void *arg, 4783280849Scy isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 4784258945Sroberto{ 4785280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 4786258945Sroberto isc_socketevent_t *dev; 4787280849Scy isc__socketmgr_t *manager; 4788258945Sroberto unsigned int iocount; 4789258945Sroberto isc_buffer_t *buffer; 4790258945Sroberto 4791258945Sroberto REQUIRE(VALID_SOCKET(sock)); 4792258945Sroberto REQUIRE(buflist != NULL); 4793258945Sroberto REQUIRE(!ISC_LIST_EMPTY(*buflist)); 4794258945Sroberto REQUIRE(task != NULL); 4795258945Sroberto REQUIRE(action != NULL); 4796258945Sroberto 4797258945Sroberto manager = sock->manager; 4798258945Sroberto REQUIRE(VALID_MANAGER(manager)); 4799258945Sroberto 4800258945Sroberto iocount = isc_bufferlist_usedcount(buflist); 4801258945Sroberto REQUIRE(iocount > 0); 4802258945Sroberto 4803258945Sroberto dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); 4804280849Scy if (dev == NULL) 4805258945Sroberto return (ISC_R_NOMEMORY); 4806258945Sroberto 4807258945Sroberto /* 4808258945Sroberto * Move each buffer from the passed in list to our internal one. 4809258945Sroberto */ 4810258945Sroberto buffer = ISC_LIST_HEAD(*buflist); 4811258945Sroberto while (buffer != NULL) { 4812258945Sroberto ISC_LIST_DEQUEUE(*buflist, buffer, link); 4813258945Sroberto ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 4814258945Sroberto buffer = ISC_LIST_HEAD(*buflist); 4815258945Sroberto } 4816258945Sroberto 4817258945Sroberto return (socket_send(sock, dev, task, address, pktinfo, 0)); 4818258945Sroberto} 4819258945Sroberto 4820280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4821280849Scyisc__socket_sendto2(isc_socket_t *sock0, isc_region_t *region, 4822280849Scy isc_task_t *task, 4823280849Scy isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 4824280849Scy isc_socketevent_t *event, unsigned int flags) 4825258945Sroberto{ 4826280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 4827280849Scy 4828280849Scy REQUIRE(VALID_SOCKET(sock)); 4829258945Sroberto REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0); 4830258945Sroberto if ((flags & ISC_SOCKFLAG_NORETRY) != 0) 4831258945Sroberto REQUIRE(sock->type == isc_sockettype_udp); 4832258945Sroberto event->ev_sender = sock; 4833280849Scy event->result = ISC_R_UNSET; 4834258945Sroberto ISC_LIST_INIT(event->bufferlist); 4835258945Sroberto event->region = *region; 4836258945Sroberto event->n = 0; 4837258945Sroberto event->offset = 0; 4838258945Sroberto event->attributes = 0; 4839258945Sroberto 4840258945Sroberto return (socket_send(sock, event, task, address, pktinfo, flags)); 4841258945Sroberto} 4842258945Sroberto 4843280849ScyISC_SOCKETFUNC_SCOPE void 4844280849Scyisc__socket_cleanunix(isc_sockaddr_t *sockaddr, isc_boolean_t active) { 4845258945Sroberto#ifdef ISC_PLATFORM_HAVESYSUNH 4846258945Sroberto int s; 4847258945Sroberto struct stat sb; 4848258945Sroberto char strbuf[ISC_STRERRORSIZE]; 4849258945Sroberto 4850258945Sroberto if (sockaddr->type.sa.sa_family != AF_UNIX) 4851258945Sroberto return; 4852258945Sroberto 4853258945Sroberto#ifndef S_ISSOCK 4854258945Sroberto#if defined(S_IFMT) && defined(S_IFSOCK) 4855258945Sroberto#define S_ISSOCK(mode) ((mode & S_IFMT)==S_IFSOCK) 4856258945Sroberto#elif defined(_S_IFMT) && defined(S_IFSOCK) 4857258945Sroberto#define S_ISSOCK(mode) ((mode & _S_IFMT)==S_IFSOCK) 4858258945Sroberto#endif 4859258945Sroberto#endif 4860258945Sroberto 4861258945Sroberto#ifndef S_ISFIFO 4862258945Sroberto#if defined(S_IFMT) && defined(S_IFIFO) 4863258945Sroberto#define S_ISFIFO(mode) ((mode & S_IFMT)==S_IFIFO) 4864258945Sroberto#elif defined(_S_IFMT) && defined(S_IFIFO) 4865258945Sroberto#define S_ISFIFO(mode) ((mode & _S_IFMT)==S_IFIFO) 4866258945Sroberto#endif 4867258945Sroberto#endif 4868258945Sroberto 4869258945Sroberto#if !defined(S_ISFIFO) && !defined(S_ISSOCK) 4870258945Sroberto#error You need to define S_ISFIFO and S_ISSOCK as appropriate for your platform. See <sys/stat.h>. 4871258945Sroberto#endif 4872258945Sroberto 4873258945Sroberto#ifndef S_ISFIFO 4874258945Sroberto#define S_ISFIFO(mode) 0 4875258945Sroberto#endif 4876258945Sroberto 4877258945Sroberto#ifndef S_ISSOCK 4878258945Sroberto#define S_ISSOCK(mode) 0 4879258945Sroberto#endif 4880258945Sroberto 4881258945Sroberto if (active) { 4882258945Sroberto if (stat(sockaddr->type.sunix.sun_path, &sb) < 0) { 4883258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4884258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4885258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 4886258945Sroberto "isc_socket_cleanunix: stat(%s): %s", 4887258945Sroberto sockaddr->type.sunix.sun_path, strbuf); 4888258945Sroberto return; 4889258945Sroberto } 4890258945Sroberto if (!(S_ISSOCK(sb.st_mode) || S_ISFIFO(sb.st_mode))) { 4891258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4892258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 4893258945Sroberto "isc_socket_cleanunix: %s: not a socket", 4894258945Sroberto sockaddr->type.sunix.sun_path); 4895258945Sroberto return; 4896258945Sroberto } 4897258945Sroberto if (unlink(sockaddr->type.sunix.sun_path) < 0) { 4898258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4899258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4900258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 4901258945Sroberto "isc_socket_cleanunix: unlink(%s): %s", 4902258945Sroberto sockaddr->type.sunix.sun_path, strbuf); 4903258945Sroberto } 4904258945Sroberto return; 4905258945Sroberto } 4906258945Sroberto 4907258945Sroberto s = socket(AF_UNIX, SOCK_STREAM, 0); 4908258945Sroberto if (s < 0) { 4909258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4910258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4911258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, 4912258945Sroberto "isc_socket_cleanunix: socket(%s): %s", 4913258945Sroberto sockaddr->type.sunix.sun_path, strbuf); 4914258945Sroberto return; 4915258945Sroberto } 4916258945Sroberto 4917258945Sroberto if (stat(sockaddr->type.sunix.sun_path, &sb) < 0) { 4918258945Sroberto switch (errno) { 4919258945Sroberto case ENOENT: /* We exited cleanly last time */ 4920258945Sroberto break; 4921258945Sroberto default: 4922258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4923258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4924258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, 4925258945Sroberto "isc_socket_cleanunix: stat(%s): %s", 4926258945Sroberto sockaddr->type.sunix.sun_path, strbuf); 4927258945Sroberto break; 4928258945Sroberto } 4929258945Sroberto goto cleanup; 4930258945Sroberto } 4931258945Sroberto 4932258945Sroberto if (!(S_ISSOCK(sb.st_mode) || S_ISFIFO(sb.st_mode))) { 4933258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4934258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, 4935258945Sroberto "isc_socket_cleanunix: %s: not a socket", 4936258945Sroberto sockaddr->type.sunix.sun_path); 4937258945Sroberto goto cleanup; 4938258945Sroberto } 4939258945Sroberto 4940258945Sroberto if (connect(s, (struct sockaddr *)&sockaddr->type.sunix, 4941258945Sroberto sizeof(sockaddr->type.sunix)) < 0) { 4942258945Sroberto switch (errno) { 4943258945Sroberto case ECONNREFUSED: 4944258945Sroberto case ECONNRESET: 4945258945Sroberto if (unlink(sockaddr->type.sunix.sun_path) < 0) { 4946258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4947258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4948258945Sroberto ISC_LOGMODULE_SOCKET, 4949258945Sroberto ISC_LOG_WARNING, 4950258945Sroberto "isc_socket_cleanunix: " 4951258945Sroberto "unlink(%s): %s", 4952258945Sroberto sockaddr->type.sunix.sun_path, 4953258945Sroberto strbuf); 4954258945Sroberto } 4955258945Sroberto break; 4956258945Sroberto default: 4957258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 4958258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 4959258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_WARNING, 4960258945Sroberto "isc_socket_cleanunix: connect(%s): %s", 4961258945Sroberto sockaddr->type.sunix.sun_path, strbuf); 4962258945Sroberto break; 4963258945Sroberto } 4964258945Sroberto } 4965258945Sroberto cleanup: 4966258945Sroberto close(s); 4967258945Sroberto#else 4968258945Sroberto UNUSED(sockaddr); 4969258945Sroberto UNUSED(active); 4970258945Sroberto#endif 4971258945Sroberto} 4972258945Sroberto 4973280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 4974280849Scyisc__socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm, 4975258945Sroberto isc_uint32_t owner, isc_uint32_t group) 4976258945Sroberto{ 4977258945Sroberto#ifdef ISC_PLATFORM_HAVESYSUNH 4978258945Sroberto isc_result_t result = ISC_R_SUCCESS; 4979258945Sroberto char strbuf[ISC_STRERRORSIZE]; 4980258945Sroberto char path[sizeof(sockaddr->type.sunix.sun_path)]; 4981258945Sroberto#ifdef NEED_SECURE_DIRECTORY 4982258945Sroberto char *slash; 4983258945Sroberto#endif 4984258945Sroberto 4985258945Sroberto REQUIRE(sockaddr->type.sa.sa_family == AF_UNIX); 4986258945Sroberto INSIST(strlen(sockaddr->type.sunix.sun_path) < sizeof(path)); 4987258945Sroberto strcpy(path, sockaddr->type.sunix.sun_path); 4988258945Sroberto 4989258945Sroberto#ifdef NEED_SECURE_DIRECTORY 4990258945Sroberto slash = strrchr(path, '/'); 4991258945Sroberto if (slash != NULL) { 4992258945Sroberto if (slash != path) 4993258945Sroberto *slash = '\0'; 4994258945Sroberto else 4995258945Sroberto strcpy(path, "/"); 4996258945Sroberto } else 4997258945Sroberto strcpy(path, "."); 4998258945Sroberto#endif 4999258945Sroberto 5000258945Sroberto if (chmod(path, perm) < 0) { 5001258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5002258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 5003258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 5004258945Sroberto "isc_socket_permunix: chmod(%s, %d): %s", 5005258945Sroberto path, perm, strbuf); 5006258945Sroberto result = ISC_R_FAILURE; 5007258945Sroberto } 5008258945Sroberto if (chown(path, owner, group) < 0) { 5009258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5010258945Sroberto isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, 5011258945Sroberto ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 5012258945Sroberto "isc_socket_permunix: chown(%s, %d, %d): %s", 5013258945Sroberto path, owner, group, 5014258945Sroberto strbuf); 5015258945Sroberto result = ISC_R_FAILURE; 5016258945Sroberto } 5017258945Sroberto return (result); 5018258945Sroberto#else 5019258945Sroberto UNUSED(sockaddr); 5020258945Sroberto UNUSED(perm); 5021258945Sroberto UNUSED(owner); 5022258945Sroberto UNUSED(group); 5023258945Sroberto return (ISC_R_NOTIMPLEMENTED); 5024258945Sroberto#endif 5025258945Sroberto} 5026258945Sroberto 5027280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5028280849Scyisc__socket_bind(isc_socket_t *sock0, isc_sockaddr_t *sockaddr, 5029280849Scy unsigned int options) { 5030280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5031258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5032258945Sroberto int on = 1; 5033258945Sroberto 5034280849Scy REQUIRE(VALID_SOCKET(sock)); 5035280849Scy 5036258945Sroberto LOCK(&sock->lock); 5037258945Sroberto 5038258945Sroberto INSIST(!sock->bound); 5039280849Scy INSIST(!sock->dupped); 5040258945Sroberto 5041258945Sroberto if (sock->pf != sockaddr->type.sa.sa_family) { 5042258945Sroberto UNLOCK(&sock->lock); 5043258945Sroberto return (ISC_R_FAMILYMISMATCH); 5044258945Sroberto } 5045280849Scy 5046258945Sroberto /* 5047258945Sroberto * Only set SO_REUSEADDR when we want a specific port. 5048258945Sroberto */ 5049258945Sroberto#ifdef AF_UNIX 5050258945Sroberto if (sock->pf == AF_UNIX) 5051258945Sroberto goto bind_socket; 5052258945Sroberto#endif 5053258945Sroberto if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 5054258945Sroberto isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 5055258945Sroberto setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, 5056258945Sroberto sizeof(on)) < 0) { 5057258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 5058258945Sroberto "setsockopt(%d) %s", sock->fd, 5059258945Sroberto isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 5060258945Sroberto ISC_MSG_FAILED, "failed")); 5061258945Sroberto /* Press on... */ 5062258945Sroberto } 5063258945Sroberto#ifdef AF_UNIX 5064258945Sroberto bind_socket: 5065258945Sroberto#endif 5066258945Sroberto if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 5067258945Sroberto inc_stats(sock->manager->stats, 5068258945Sroberto sock->statsindex[STATID_BINDFAIL]); 5069258945Sroberto 5070258945Sroberto UNLOCK(&sock->lock); 5071258945Sroberto switch (errno) { 5072258945Sroberto case EACCES: 5073258945Sroberto return (ISC_R_NOPERM); 5074258945Sroberto case EADDRNOTAVAIL: 5075258945Sroberto return (ISC_R_ADDRNOTAVAIL); 5076258945Sroberto case EADDRINUSE: 5077258945Sroberto return (ISC_R_ADDRINUSE); 5078258945Sroberto case EINVAL: 5079258945Sroberto return (ISC_R_BOUND); 5080258945Sroberto default: 5081258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5082258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 5083258945Sroberto strbuf); 5084258945Sroberto return (ISC_R_UNEXPECTED); 5085258945Sroberto } 5086258945Sroberto } 5087258945Sroberto 5088258945Sroberto socket_log(sock, sockaddr, TRACE, 5089258945Sroberto isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound"); 5090258945Sroberto sock->bound = 1; 5091258945Sroberto 5092258945Sroberto UNLOCK(&sock->lock); 5093258945Sroberto return (ISC_R_SUCCESS); 5094258945Sroberto} 5095258945Sroberto 5096280849Scy/* 5097280849Scy * Enable this only for specific OS versions, and only when they have repaired 5098280849Scy * their problems with it. Until then, this is is broken and needs to be 5099280849Scy * diabled by default. See RT22589 for details. 5100280849Scy */ 5101280849Scy#undef ENABLE_ACCEPTFILTER 5102280849Scy 5103280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5104280849Scyisc__socket_filter(isc_socket_t *sock0, const char *filter) { 5105280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5106280849Scy#if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) 5107258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5108258945Sroberto struct accept_filter_arg afa; 5109258945Sroberto#else 5110258945Sroberto UNUSED(sock); 5111258945Sroberto UNUSED(filter); 5112258945Sroberto#endif 5113258945Sroberto 5114258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5115258945Sroberto 5116280849Scy#if defined(SO_ACCEPTFILTER) && defined(ENABLE_ACCEPTFILTER) 5117258945Sroberto bzero(&afa, sizeof(afa)); 5118258945Sroberto strncpy(afa.af_name, filter, sizeof(afa.af_name)); 5119258945Sroberto if (setsockopt(sock->fd, SOL_SOCKET, SO_ACCEPTFILTER, 5120258945Sroberto &afa, sizeof(afa)) == -1) { 5121258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5122258945Sroberto socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 5123258945Sroberto ISC_MSG_FILTER, "setsockopt(SO_ACCEPTFILTER): %s", 5124258945Sroberto strbuf); 5125258945Sroberto return (ISC_R_FAILURE); 5126258945Sroberto } 5127258945Sroberto return (ISC_R_SUCCESS); 5128258945Sroberto#else 5129258945Sroberto return (ISC_R_NOTIMPLEMENTED); 5130258945Sroberto#endif 5131258945Sroberto} 5132258945Sroberto 5133258945Sroberto/* 5134258945Sroberto * Set up to listen on a given socket. We do this by creating an internal 5135258945Sroberto * event that will be dispatched when the socket has read activity. The 5136258945Sroberto * watcher will send the internal event to the task when there is a new 5137258945Sroberto * connection. 5138258945Sroberto * 5139258945Sroberto * Unlike in read, we don't preallocate a done event here. Every time there 5140258945Sroberto * is a new connection we'll have to allocate a new one anyway, so we might 5141258945Sroberto * as well keep things simple rather than having to track them. 5142258945Sroberto */ 5143280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5144280849Scyisc__socket_listen(isc_socket_t *sock0, unsigned int backlog) { 5145280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5146258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5147258945Sroberto 5148258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5149258945Sroberto 5150258945Sroberto LOCK(&sock->lock); 5151258945Sroberto 5152258945Sroberto REQUIRE(!sock->listener); 5153258945Sroberto REQUIRE(sock->bound); 5154258945Sroberto REQUIRE(sock->type == isc_sockettype_tcp || 5155258945Sroberto sock->type == isc_sockettype_unix); 5156258945Sroberto 5157258945Sroberto if (backlog == 0) 5158258945Sroberto backlog = SOMAXCONN; 5159258945Sroberto 5160258945Sroberto if (listen(sock->fd, (int)backlog) < 0) { 5161258945Sroberto UNLOCK(&sock->lock); 5162258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5163258945Sroberto 5164258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf); 5165258945Sroberto 5166258945Sroberto return (ISC_R_UNEXPECTED); 5167258945Sroberto } 5168258945Sroberto 5169258945Sroberto sock->listener = 1; 5170258945Sroberto 5171258945Sroberto UNLOCK(&sock->lock); 5172258945Sroberto return (ISC_R_SUCCESS); 5173258945Sroberto} 5174258945Sroberto 5175258945Sroberto/* 5176258945Sroberto * This should try to do aggressive accept() XXXMLG 5177258945Sroberto */ 5178280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5179280849Scyisc__socket_accept(isc_socket_t *sock0, 5180258945Sroberto isc_task_t *task, isc_taskaction_t action, const void *arg) 5181258945Sroberto{ 5182280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5183258945Sroberto isc_socket_newconnev_t *dev; 5184280849Scy isc__socketmgr_t *manager; 5185258945Sroberto isc_task_t *ntask = NULL; 5186280849Scy isc__socket_t *nsock; 5187258945Sroberto isc_result_t result; 5188258945Sroberto isc_boolean_t do_poke = ISC_FALSE; 5189258945Sroberto 5190258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5191258945Sroberto manager = sock->manager; 5192258945Sroberto REQUIRE(VALID_MANAGER(manager)); 5193258945Sroberto 5194258945Sroberto LOCK(&sock->lock); 5195258945Sroberto 5196258945Sroberto REQUIRE(sock->listener); 5197258945Sroberto 5198258945Sroberto /* 5199258945Sroberto * Sender field is overloaded here with the task we will be sending 5200258945Sroberto * this event to. Just before the actual event is delivered the 5201258945Sroberto * actual ev_sender will be touched up to be the socket. 5202258945Sroberto */ 5203258945Sroberto dev = (isc_socket_newconnev_t *) 5204258945Sroberto isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN, 5205258945Sroberto action, arg, sizeof(*dev)); 5206258945Sroberto if (dev == NULL) { 5207258945Sroberto UNLOCK(&sock->lock); 5208258945Sroberto return (ISC_R_NOMEMORY); 5209258945Sroberto } 5210258945Sroberto ISC_LINK_INIT(dev, ev_link); 5211258945Sroberto 5212258945Sroberto result = allocate_socket(manager, sock->type, &nsock); 5213258945Sroberto if (result != ISC_R_SUCCESS) { 5214258945Sroberto isc_event_free(ISC_EVENT_PTR(&dev)); 5215258945Sroberto UNLOCK(&sock->lock); 5216258945Sroberto return (result); 5217258945Sroberto } 5218258945Sroberto 5219258945Sroberto /* 5220258945Sroberto * Attach to socket and to task. 5221258945Sroberto */ 5222258945Sroberto isc_task_attach(task, &ntask); 5223280849Scy if (isc_task_exiting(ntask)) { 5224280849Scy free_socket(&nsock); 5225280849Scy isc_task_detach(&ntask); 5226280849Scy isc_event_free(ISC_EVENT_PTR(&dev)); 5227280849Scy UNLOCK(&sock->lock); 5228280849Scy return (ISC_R_SHUTTINGDOWN); 5229280849Scy } 5230258945Sroberto nsock->references++; 5231258945Sroberto nsock->statsindex = sock->statsindex; 5232258945Sroberto 5233258945Sroberto dev->ev_sender = ntask; 5234280849Scy dev->newsocket = (isc_socket_t *)nsock; 5235258945Sroberto 5236258945Sroberto /* 5237258945Sroberto * Poke watcher here. We still have the socket locked, so there 5238258945Sroberto * is no race condition. We will keep the lock for such a short 5239258945Sroberto * bit of time waking it up now or later won't matter all that much. 5240258945Sroberto */ 5241258945Sroberto if (ISC_LIST_EMPTY(sock->accept_list)) 5242258945Sroberto do_poke = ISC_TRUE; 5243258945Sroberto 5244258945Sroberto ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link); 5245258945Sroberto 5246258945Sroberto if (do_poke) 5247258945Sroberto select_poke(manager, sock->fd, SELECT_POKE_ACCEPT); 5248258945Sroberto 5249258945Sroberto UNLOCK(&sock->lock); 5250258945Sroberto return (ISC_R_SUCCESS); 5251258945Sroberto} 5252258945Sroberto 5253280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5254280849Scyisc__socket_connect(isc_socket_t *sock0, isc_sockaddr_t *addr, 5255258945Sroberto isc_task_t *task, isc_taskaction_t action, const void *arg) 5256258945Sroberto{ 5257280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5258258945Sroberto isc_socket_connev_t *dev; 5259258945Sroberto isc_task_t *ntask = NULL; 5260280849Scy isc__socketmgr_t *manager; 5261258945Sroberto int cc; 5262258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5263280849Scy char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 5264258945Sroberto 5265258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5266258945Sroberto REQUIRE(addr != NULL); 5267258945Sroberto REQUIRE(task != NULL); 5268258945Sroberto REQUIRE(action != NULL); 5269258945Sroberto 5270258945Sroberto manager = sock->manager; 5271258945Sroberto REQUIRE(VALID_MANAGER(manager)); 5272258945Sroberto REQUIRE(addr != NULL); 5273258945Sroberto 5274258945Sroberto if (isc_sockaddr_ismulticast(addr)) 5275258945Sroberto return (ISC_R_MULTICAST); 5276258945Sroberto 5277258945Sroberto LOCK(&sock->lock); 5278258945Sroberto 5279258945Sroberto REQUIRE(!sock->connecting); 5280258945Sroberto 5281258945Sroberto dev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock, 5282258945Sroberto ISC_SOCKEVENT_CONNECT, 5283258945Sroberto action, arg, 5284258945Sroberto sizeof(*dev)); 5285258945Sroberto if (dev == NULL) { 5286258945Sroberto UNLOCK(&sock->lock); 5287258945Sroberto return (ISC_R_NOMEMORY); 5288258945Sroberto } 5289258945Sroberto ISC_LINK_INIT(dev, ev_link); 5290258945Sroberto 5291258945Sroberto /* 5292258945Sroberto * Try to do the connect right away, as there can be only one 5293258945Sroberto * outstanding, and it might happen to complete. 5294258945Sroberto */ 5295258945Sroberto sock->peer_address = *addr; 5296258945Sroberto cc = connect(sock->fd, &addr->type.sa, addr->length); 5297258945Sroberto if (cc < 0) { 5298258945Sroberto /* 5299258945Sroberto * HP-UX "fails" to connect a UDP socket and sets errno to 5300258945Sroberto * EINPROGRESS if it's non-blocking. We'd rather regard this as 5301258945Sroberto * a success and let the user detect it if it's really an error 5302258945Sroberto * at the time of sending a packet on the socket. 5303258945Sroberto */ 5304258945Sroberto if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { 5305258945Sroberto cc = 0; 5306258945Sroberto goto success; 5307258945Sroberto } 5308258945Sroberto if (SOFT_ERROR(errno) || errno == EINPROGRESS) 5309258945Sroberto goto queue; 5310258945Sroberto 5311258945Sroberto switch (errno) { 5312258945Sroberto#define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit; 5313258945Sroberto ERROR_MATCH(EACCES, ISC_R_NOPERM); 5314258945Sroberto ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 5315258945Sroberto ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 5316258945Sroberto ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 5317258945Sroberto ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 5318258945Sroberto#ifdef EHOSTDOWN 5319258945Sroberto ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 5320258945Sroberto#endif 5321258945Sroberto ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 5322258945Sroberto ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 5323258945Sroberto ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 5324258945Sroberto ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 5325258945Sroberto ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 5326258945Sroberto#undef ERROR_MATCH 5327258945Sroberto } 5328258945Sroberto 5329258945Sroberto sock->connected = 0; 5330258945Sroberto 5331258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5332280849Scy isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf)); 5333280849Scy UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s", 5334280849Scy addrbuf, errno, strbuf); 5335258945Sroberto 5336258945Sroberto UNLOCK(&sock->lock); 5337258945Sroberto inc_stats(sock->manager->stats, 5338258945Sroberto sock->statsindex[STATID_CONNECTFAIL]); 5339258945Sroberto isc_event_free(ISC_EVENT_PTR(&dev)); 5340258945Sroberto return (ISC_R_UNEXPECTED); 5341258945Sroberto 5342258945Sroberto err_exit: 5343258945Sroberto sock->connected = 0; 5344258945Sroberto isc_task_send(task, ISC_EVENT_PTR(&dev)); 5345258945Sroberto 5346258945Sroberto UNLOCK(&sock->lock); 5347258945Sroberto inc_stats(sock->manager->stats, 5348258945Sroberto sock->statsindex[STATID_CONNECTFAIL]); 5349258945Sroberto return (ISC_R_SUCCESS); 5350258945Sroberto } 5351258945Sroberto 5352258945Sroberto /* 5353258945Sroberto * If connect completed, fire off the done event. 5354258945Sroberto */ 5355258945Sroberto success: 5356258945Sroberto if (cc == 0) { 5357258945Sroberto sock->connected = 1; 5358258945Sroberto sock->bound = 1; 5359258945Sroberto dev->result = ISC_R_SUCCESS; 5360258945Sroberto isc_task_send(task, ISC_EVENT_PTR(&dev)); 5361258945Sroberto 5362258945Sroberto UNLOCK(&sock->lock); 5363258945Sroberto 5364258945Sroberto inc_stats(sock->manager->stats, 5365258945Sroberto sock->statsindex[STATID_CONNECT]); 5366258945Sroberto 5367258945Sroberto return (ISC_R_SUCCESS); 5368258945Sroberto } 5369258945Sroberto 5370258945Sroberto queue: 5371258945Sroberto 5372258945Sroberto /* 5373258945Sroberto * Attach to task. 5374258945Sroberto */ 5375258945Sroberto isc_task_attach(task, &ntask); 5376258945Sroberto 5377258945Sroberto sock->connecting = 1; 5378258945Sroberto 5379258945Sroberto dev->ev_sender = ntask; 5380258945Sroberto 5381258945Sroberto /* 5382258945Sroberto * Poke watcher here. We still have the socket locked, so there 5383258945Sroberto * is no race condition. We will keep the lock for such a short 5384258945Sroberto * bit of time waking it up now or later won't matter all that much. 5385258945Sroberto */ 5386258945Sroberto if (sock->connect_ev == NULL) 5387258945Sroberto select_poke(manager, sock->fd, SELECT_POKE_CONNECT); 5388258945Sroberto 5389258945Sroberto sock->connect_ev = dev; 5390258945Sroberto 5391258945Sroberto UNLOCK(&sock->lock); 5392258945Sroberto return (ISC_R_SUCCESS); 5393258945Sroberto} 5394258945Sroberto 5395258945Sroberto/* 5396258945Sroberto * Called when a socket with a pending connect() finishes. 5397258945Sroberto */ 5398258945Srobertostatic void 5399258945Srobertointernal_connect(isc_task_t *me, isc_event_t *ev) { 5400280849Scy isc__socket_t *sock; 5401258945Sroberto isc_socket_connev_t *dev; 5402258945Sroberto isc_task_t *task; 5403258945Sroberto int cc; 5404258945Sroberto ISC_SOCKADDR_LEN_T optlen; 5405258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5406258945Sroberto char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 5407258945Sroberto 5408258945Sroberto UNUSED(me); 5409258945Sroberto INSIST(ev->ev_type == ISC_SOCKEVENT_INTW); 5410258945Sroberto 5411258945Sroberto sock = ev->ev_sender; 5412258945Sroberto INSIST(VALID_SOCKET(sock)); 5413258945Sroberto 5414258945Sroberto LOCK(&sock->lock); 5415258945Sroberto 5416258945Sroberto /* 5417258945Sroberto * When the internal event was sent the reference count was bumped 5418258945Sroberto * to keep the socket around for us. Decrement the count here. 5419258945Sroberto */ 5420258945Sroberto INSIST(sock->references > 0); 5421258945Sroberto sock->references--; 5422258945Sroberto if (sock->references == 0) { 5423258945Sroberto UNLOCK(&sock->lock); 5424258945Sroberto destroy(&sock); 5425258945Sroberto return; 5426258945Sroberto } 5427258945Sroberto 5428258945Sroberto /* 5429258945Sroberto * Has this event been canceled? 5430258945Sroberto */ 5431258945Sroberto dev = sock->connect_ev; 5432258945Sroberto if (dev == NULL) { 5433258945Sroberto INSIST(!sock->connecting); 5434258945Sroberto UNLOCK(&sock->lock); 5435258945Sroberto return; 5436258945Sroberto } 5437258945Sroberto 5438258945Sroberto INSIST(sock->connecting); 5439258945Sroberto sock->connecting = 0; 5440258945Sroberto 5441258945Sroberto /* 5442258945Sroberto * Get any possible error status here. 5443258945Sroberto */ 5444258945Sroberto optlen = sizeof(cc); 5445258945Sroberto if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, 5446258945Sroberto (void *)&cc, (void *)&optlen) < 0) 5447258945Sroberto cc = errno; 5448258945Sroberto else 5449258945Sroberto errno = cc; 5450258945Sroberto 5451258945Sroberto if (errno != 0) { 5452258945Sroberto /* 5453258945Sroberto * If the error is EAGAIN, just re-select on this 5454258945Sroberto * fd and pretend nothing strange happened. 5455258945Sroberto */ 5456258945Sroberto if (SOFT_ERROR(errno) || errno == EINPROGRESS) { 5457258945Sroberto sock->connecting = 1; 5458258945Sroberto select_poke(sock->manager, sock->fd, 5459258945Sroberto SELECT_POKE_CONNECT); 5460258945Sroberto UNLOCK(&sock->lock); 5461258945Sroberto 5462258945Sroberto return; 5463258945Sroberto } 5464258945Sroberto 5465258945Sroberto inc_stats(sock->manager->stats, 5466258945Sroberto sock->statsindex[STATID_CONNECTFAIL]); 5467258945Sroberto 5468258945Sroberto /* 5469258945Sroberto * Translate other errors into ISC_R_* flavors. 5470258945Sroberto */ 5471258945Sroberto switch (errno) { 5472258945Sroberto#define ERROR_MATCH(a, b) case a: dev->result = b; break; 5473258945Sroberto ERROR_MATCH(EACCES, ISC_R_NOPERM); 5474258945Sroberto ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 5475258945Sroberto ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 5476258945Sroberto ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED); 5477258945Sroberto ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH); 5478258945Sroberto#ifdef EHOSTDOWN 5479258945Sroberto ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH); 5480258945Sroberto#endif 5481258945Sroberto ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH); 5482258945Sroberto ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES); 5483258945Sroberto ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH); 5484258945Sroberto ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED); 5485258945Sroberto ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT); 5486258945Sroberto ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET); 5487258945Sroberto#undef ERROR_MATCH 5488258945Sroberto default: 5489258945Sroberto dev->result = ISC_R_UNEXPECTED; 5490258945Sroberto isc_sockaddr_format(&sock->peer_address, peerbuf, 5491258945Sroberto sizeof(peerbuf)); 5492258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5493258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 5494258945Sroberto "internal_connect: connect(%s) %s", 5495258945Sroberto peerbuf, strbuf); 5496258945Sroberto } 5497258945Sroberto } else { 5498258945Sroberto inc_stats(sock->manager->stats, 5499258945Sroberto sock->statsindex[STATID_CONNECT]); 5500258945Sroberto dev->result = ISC_R_SUCCESS; 5501258945Sroberto sock->connected = 1; 5502258945Sroberto sock->bound = 1; 5503258945Sroberto } 5504258945Sroberto 5505258945Sroberto sock->connect_ev = NULL; 5506258945Sroberto 5507258945Sroberto UNLOCK(&sock->lock); 5508258945Sroberto 5509258945Sroberto task = dev->ev_sender; 5510258945Sroberto dev->ev_sender = sock; 5511258945Sroberto isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev)); 5512258945Sroberto} 5513258945Sroberto 5514280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5515280849Scyisc__socket_getpeername(isc_socket_t *sock0, isc_sockaddr_t *addressp) { 5516280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5517258945Sroberto isc_result_t result; 5518258945Sroberto 5519258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5520258945Sroberto REQUIRE(addressp != NULL); 5521258945Sroberto 5522258945Sroberto LOCK(&sock->lock); 5523258945Sroberto 5524258945Sroberto if (sock->connected) { 5525258945Sroberto *addressp = sock->peer_address; 5526258945Sroberto result = ISC_R_SUCCESS; 5527258945Sroberto } else { 5528258945Sroberto result = ISC_R_NOTCONNECTED; 5529258945Sroberto } 5530258945Sroberto 5531258945Sroberto UNLOCK(&sock->lock); 5532258945Sroberto 5533258945Sroberto return (result); 5534258945Sroberto} 5535258945Sroberto 5536280849ScyISC_SOCKETFUNC_SCOPE isc_result_t 5537280849Scyisc__socket_getsockname(isc_socket_t *sock0, isc_sockaddr_t *addressp) { 5538280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5539258945Sroberto ISC_SOCKADDR_LEN_T len; 5540258945Sroberto isc_result_t result; 5541258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5542258945Sroberto 5543258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5544258945Sroberto REQUIRE(addressp != NULL); 5545258945Sroberto 5546258945Sroberto LOCK(&sock->lock); 5547258945Sroberto 5548258945Sroberto if (!sock->bound) { 5549258945Sroberto result = ISC_R_NOTBOUND; 5550258945Sroberto goto out; 5551258945Sroberto } 5552258945Sroberto 5553258945Sroberto result = ISC_R_SUCCESS; 5554258945Sroberto 5555258945Sroberto len = sizeof(addressp->type); 5556258945Sroberto if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) { 5557258945Sroberto isc__strerror(errno, strbuf, sizeof(strbuf)); 5558258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s", 5559258945Sroberto strbuf); 5560258945Sroberto result = ISC_R_UNEXPECTED; 5561258945Sroberto goto out; 5562258945Sroberto } 5563258945Sroberto addressp->length = (unsigned int)len; 5564258945Sroberto 5565258945Sroberto out: 5566258945Sroberto UNLOCK(&sock->lock); 5567258945Sroberto 5568258945Sroberto return (result); 5569258945Sroberto} 5570258945Sroberto 5571258945Sroberto/* 5572258945Sroberto * Run through the list of events on this socket, and cancel the ones 5573258945Sroberto * queued for task "task" of type "how". "how" is a bitmask. 5574258945Sroberto */ 5575280849ScyISC_SOCKETFUNC_SCOPE void 5576280849Scyisc__socket_cancel(isc_socket_t *sock0, isc_task_t *task, unsigned int how) { 5577280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5578258945Sroberto 5579258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5580258945Sroberto 5581258945Sroberto /* 5582258945Sroberto * Quick exit if there is nothing to do. Don't even bother locking 5583258945Sroberto * in this case. 5584258945Sroberto */ 5585258945Sroberto if (how == 0) 5586258945Sroberto return; 5587258945Sroberto 5588258945Sroberto LOCK(&sock->lock); 5589258945Sroberto 5590258945Sroberto /* 5591258945Sroberto * All of these do the same thing, more or less. 5592258945Sroberto * Each will: 5593258945Sroberto * o If the internal event is marked as "posted" try to 5594258945Sroberto * remove it from the task's queue. If this fails, mark it 5595258945Sroberto * as canceled instead, and let the task clean it up later. 5596258945Sroberto * o For each I/O request for that task of that type, post 5597258945Sroberto * its done event with status of "ISC_R_CANCELED". 5598258945Sroberto * o Reset any state needed. 5599258945Sroberto */ 5600258945Sroberto if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) 5601258945Sroberto && !ISC_LIST_EMPTY(sock->recv_list)) { 5602258945Sroberto isc_socketevent_t *dev; 5603258945Sroberto isc_socketevent_t *next; 5604258945Sroberto isc_task_t *current_task; 5605258945Sroberto 5606258945Sroberto dev = ISC_LIST_HEAD(sock->recv_list); 5607258945Sroberto 5608258945Sroberto while (dev != NULL) { 5609258945Sroberto current_task = dev->ev_sender; 5610258945Sroberto next = ISC_LIST_NEXT(dev, ev_link); 5611258945Sroberto 5612258945Sroberto if ((task == NULL) || (task == current_task)) { 5613258945Sroberto dev->result = ISC_R_CANCELED; 5614258945Sroberto send_recvdone_event(sock, &dev); 5615258945Sroberto } 5616258945Sroberto dev = next; 5617258945Sroberto } 5618258945Sroberto } 5619258945Sroberto 5620258945Sroberto if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) 5621258945Sroberto && !ISC_LIST_EMPTY(sock->send_list)) { 5622258945Sroberto isc_socketevent_t *dev; 5623258945Sroberto isc_socketevent_t *next; 5624258945Sroberto isc_task_t *current_task; 5625258945Sroberto 5626258945Sroberto dev = ISC_LIST_HEAD(sock->send_list); 5627258945Sroberto 5628258945Sroberto while (dev != NULL) { 5629258945Sroberto current_task = dev->ev_sender; 5630258945Sroberto next = ISC_LIST_NEXT(dev, ev_link); 5631258945Sroberto 5632258945Sroberto if ((task == NULL) || (task == current_task)) { 5633258945Sroberto dev->result = ISC_R_CANCELED; 5634258945Sroberto send_senddone_event(sock, &dev); 5635258945Sroberto } 5636258945Sroberto dev = next; 5637258945Sroberto } 5638258945Sroberto } 5639258945Sroberto 5640258945Sroberto if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT) 5641258945Sroberto && !ISC_LIST_EMPTY(sock->accept_list)) { 5642258945Sroberto isc_socket_newconnev_t *dev; 5643258945Sroberto isc_socket_newconnev_t *next; 5644258945Sroberto isc_task_t *current_task; 5645258945Sroberto 5646258945Sroberto dev = ISC_LIST_HEAD(sock->accept_list); 5647258945Sroberto while (dev != NULL) { 5648258945Sroberto current_task = dev->ev_sender; 5649258945Sroberto next = ISC_LIST_NEXT(dev, ev_link); 5650258945Sroberto 5651258945Sroberto if ((task == NULL) || (task == current_task)) { 5652258945Sroberto 5653258945Sroberto ISC_LIST_UNLINK(sock->accept_list, dev, 5654258945Sroberto ev_link); 5655258945Sroberto 5656280849Scy NEWCONNSOCK(dev)->references--; 5657280849Scy free_socket((isc__socket_t **)&dev->newsocket); 5658258945Sroberto 5659258945Sroberto dev->result = ISC_R_CANCELED; 5660258945Sroberto dev->ev_sender = sock; 5661258945Sroberto isc_task_sendanddetach(¤t_task, 5662258945Sroberto ISC_EVENT_PTR(&dev)); 5663258945Sroberto } 5664258945Sroberto 5665258945Sroberto dev = next; 5666258945Sroberto } 5667258945Sroberto } 5668258945Sroberto 5669258945Sroberto /* 5670258945Sroberto * Connecting is not a list. 5671258945Sroberto */ 5672258945Sroberto if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 5673258945Sroberto && sock->connect_ev != NULL) { 5674258945Sroberto isc_socket_connev_t *dev; 5675258945Sroberto isc_task_t *current_task; 5676258945Sroberto 5677258945Sroberto INSIST(sock->connecting); 5678258945Sroberto sock->connecting = 0; 5679258945Sroberto 5680258945Sroberto dev = sock->connect_ev; 5681258945Sroberto current_task = dev->ev_sender; 5682258945Sroberto 5683258945Sroberto if ((task == NULL) || (task == current_task)) { 5684258945Sroberto sock->connect_ev = NULL; 5685258945Sroberto 5686258945Sroberto dev->result = ISC_R_CANCELED; 5687258945Sroberto dev->ev_sender = sock; 5688258945Sroberto isc_task_sendanddetach(¤t_task, 5689258945Sroberto ISC_EVENT_PTR(&dev)); 5690258945Sroberto } 5691258945Sroberto } 5692258945Sroberto 5693258945Sroberto UNLOCK(&sock->lock); 5694258945Sroberto} 5695258945Sroberto 5696280849ScyISC_SOCKETFUNC_SCOPE isc_sockettype_t 5697280849Scyisc__socket_gettype(isc_socket_t *sock0) { 5698280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5699280849Scy 5700258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5701258945Sroberto 5702258945Sroberto return (sock->type); 5703258945Sroberto} 5704258945Sroberto 5705280849ScyISC_SOCKETFUNC_SCOPE isc_boolean_t 5706280849Scyisc__socket_isbound(isc_socket_t *sock0) { 5707280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5708258945Sroberto isc_boolean_t val; 5709258945Sroberto 5710280849Scy REQUIRE(VALID_SOCKET(sock)); 5711280849Scy 5712258945Sroberto LOCK(&sock->lock); 5713258945Sroberto val = ((sock->bound) ? ISC_TRUE : ISC_FALSE); 5714258945Sroberto UNLOCK(&sock->lock); 5715258945Sroberto 5716258945Sroberto return (val); 5717258945Sroberto} 5718258945Sroberto 5719280849ScyISC_SOCKETFUNC_SCOPE void 5720280849Scyisc__socket_ipv6only(isc_socket_t *sock0, isc_boolean_t yes) { 5721280849Scy isc__socket_t *sock = (isc__socket_t *)sock0; 5722258945Sroberto#if defined(IPV6_V6ONLY) 5723258945Sroberto int onoff = yes ? 1 : 0; 5724258945Sroberto#else 5725258945Sroberto UNUSED(yes); 5726258945Sroberto UNUSED(sock); 5727258945Sroberto#endif 5728258945Sroberto 5729258945Sroberto REQUIRE(VALID_SOCKET(sock)); 5730280849Scy INSIST(!sock->dupped); 5731258945Sroberto 5732258945Sroberto#ifdef IPV6_V6ONLY 5733258945Sroberto if (sock->pf == AF_INET6) { 5734258945Sroberto if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, 5735258945Sroberto (void *)&onoff, sizeof(int)) < 0) { 5736258945Sroberto char strbuf[ISC_STRERRORSIZE]; 5737280849Scy isc__strerror(errno, strbuf, sizeof(strbuf)); 5738258945Sroberto UNEXPECTED_ERROR(__FILE__, __LINE__, 5739258945Sroberto "setsockopt(%d, IPV6_V6ONLY) " 5740258945Sroberto "%s: %s", sock->fd, 5741258945Sroberto isc_msgcat_get(isc_msgcat, 5742258945Sroberto ISC_MSGSET_GENERAL, 5743258945Sroberto ISC_MSG_FAILED, 5744258945Sroberto "failed"), 5745258945Sroberto strbuf); 5746258945Sroberto } 5747258945Sroberto } 5748258945Sroberto FIX_IPV6_RECVPKTINFO(sock); /* AIX */ 5749258945Sroberto#endif 5750258945Sroberto} 5751258945Sroberto 5752280849Scy#ifndef USE_WATCHER_THREAD 5753280849Scy/* 5754280849Scy * In our assumed scenario, we can simply use a single static object. 5755280849Scy * XXX: this is not true if the application uses multiple threads with 5756280849Scy * 'multi-context' mode. Fixing this is a future TODO item. 5757280849Scy */ 5758258945Srobertostatic isc_socketwait_t swait_private; 5759258945Sroberto 5760258945Srobertoint 5761280849Scyisc__socketmgr_waitevents(isc_socketmgr_t *manager0, struct timeval *tvp, 5762280849Scy isc_socketwait_t **swaitp) 5763280849Scy{ 5764280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 5765280849Scy 5766280849Scy 5767258945Sroberto int n; 5768258945Sroberto#ifdef USE_KQUEUE 5769258945Sroberto struct timespec ts, *tsp; 5770258945Sroberto#endif 5771258945Sroberto#ifdef USE_EPOLL 5772258945Sroberto int timeout; 5773258945Sroberto#endif 5774258945Sroberto#ifdef USE_DEVPOLL 5775258945Sroberto struct dvpoll dvp; 5776258945Sroberto#endif 5777258945Sroberto 5778258945Sroberto REQUIRE(swaitp != NULL && *swaitp == NULL); 5779258945Sroberto 5780280849Scy#ifdef USE_SHARED_MANAGER 5781280849Scy if (manager == NULL) 5782280849Scy manager = socketmgr; 5783280849Scy#endif 5784280849Scy if (manager == NULL) 5785258945Sroberto return (0); 5786258945Sroberto 5787258945Sroberto#ifdef USE_KQUEUE 5788258945Sroberto if (tvp != NULL) { 5789258945Sroberto ts.tv_sec = tvp->tv_sec; 5790258945Sroberto ts.tv_nsec = tvp->tv_usec * 1000; 5791258945Sroberto tsp = &ts; 5792258945Sroberto } else 5793258945Sroberto tsp = NULL; 5794280849Scy swait_private.nevents = kevent(manager->kqueue_fd, NULL, 0, 5795280849Scy manager->events, manager->nevents, 5796258945Sroberto tsp); 5797258945Sroberto n = swait_private.nevents; 5798258945Sroberto#elif defined(USE_EPOLL) 5799258945Sroberto if (tvp != NULL) 5800258945Sroberto timeout = tvp->tv_sec * 1000 + (tvp->tv_usec + 999) / 1000; 5801258945Sroberto else 5802258945Sroberto timeout = -1; 5803280849Scy swait_private.nevents = epoll_wait(manager->epoll_fd, 5804280849Scy manager->events, 5805280849Scy manager->nevents, timeout); 5806258945Sroberto n = swait_private.nevents; 5807258945Sroberto#elif defined(USE_DEVPOLL) 5808280849Scy dvp.dp_fds = manager->events; 5809280849Scy dvp.dp_nfds = manager->nevents; 5810258945Sroberto if (tvp != NULL) { 5811258945Sroberto dvp.dp_timeout = tvp->tv_sec * 1000 + 5812258945Sroberto (tvp->tv_usec + 999) / 1000; 5813258945Sroberto } else 5814258945Sroberto dvp.dp_timeout = -1; 5815280849Scy swait_private.nevents = ioctl(manager->devpoll_fd, DP_POLL, &dvp); 5816258945Sroberto n = swait_private.nevents; 5817258945Sroberto#elif defined(USE_SELECT) 5818280849Scy memcpy(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize); 5819280849Scy memcpy(manager->write_fds_copy, manager->write_fds, 5820280849Scy manager->fd_bufsize); 5821258945Sroberto 5822280849Scy swait_private.readset = manager->read_fds_copy; 5823280849Scy swait_private.writeset = manager->write_fds_copy; 5824280849Scy swait_private.maxfd = manager->maxfd + 1; 5825258945Sroberto 5826258945Sroberto n = select(swait_private.maxfd, swait_private.readset, 5827258945Sroberto swait_private.writeset, NULL, tvp); 5828258945Sroberto#endif 5829258945Sroberto 5830258945Sroberto *swaitp = &swait_private; 5831258945Sroberto return (n); 5832258945Sroberto} 5833258945Sroberto 5834258945Srobertoisc_result_t 5835280849Scyisc__socketmgr_dispatch(isc_socketmgr_t *manager0, isc_socketwait_t *swait) { 5836280849Scy isc__socketmgr_t *manager = (isc__socketmgr_t *)manager0; 5837280849Scy 5838258945Sroberto REQUIRE(swait == &swait_private); 5839258945Sroberto 5840280849Scy#ifdef USE_SHARED_MANAGER 5841280849Scy if (manager == NULL) 5842280849Scy manager = socketmgr; 5843280849Scy#endif 5844280849Scy if (manager == NULL) 5845258945Sroberto return (ISC_R_NOTFOUND); 5846258945Sroberto 5847258945Sroberto#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) 5848280849Scy (void)process_fds(manager, manager->events, swait->nevents); 5849258945Sroberto return (ISC_R_SUCCESS); 5850258945Sroberto#elif defined(USE_SELECT) 5851280849Scy process_fds(manager, swait->maxfd, swait->readset, swait->writeset); 5852258945Sroberto return (ISC_R_SUCCESS); 5853258945Sroberto#endif 5854258945Sroberto} 5855280849Scy#endif /* USE_WATCHER_THREAD */ 5856258945Sroberto 5857280849Scy#ifdef BIND9 5858258945Srobertovoid 5859280849Scyisc__socket_setname(isc_socket_t *socket0, const char *name, void *tag) { 5860280849Scy isc__socket_t *socket = (isc__socket_t *)socket0; 5861258945Sroberto 5862258945Sroberto /* 5863258945Sroberto * Name 'socket'. 5864258945Sroberto */ 5865258945Sroberto 5866258945Sroberto REQUIRE(VALID_SOCKET(socket)); 5867258945Sroberto 5868258945Sroberto LOCK(&socket->lock); 5869258945Sroberto memset(socket->name, 0, sizeof(socket->name)); 5870258945Sroberto strncpy(socket->name, name, sizeof(socket->name) - 1); 5871258945Sroberto socket->tag = tag; 5872258945Sroberto UNLOCK(&socket->lock); 5873258945Sroberto} 5874258945Sroberto 5875280849ScyISC_SOCKETFUNC_SCOPE const char * 5876280849Scyisc__socket_getname(isc_socket_t *socket0) { 5877280849Scy isc__socket_t *socket = (isc__socket_t *)socket0; 5878280849Scy 5879258945Sroberto return (socket->name); 5880258945Sroberto} 5881258945Sroberto 5882258945Srobertovoid * 5883280849Scyisc__socket_gettag(isc_socket_t *socket0) { 5884280849Scy isc__socket_t *socket = (isc__socket_t *)socket0; 5885280849Scy 5886258945Sroberto return (socket->tag); 5887258945Sroberto} 5888280849Scy#endif /* BIND9 */ 5889258945Sroberto 5890280849Scy#ifdef USE_SOCKETIMPREGISTER 5891280849Scyisc_result_t 5892280849Scyisc__socket_register() { 5893280849Scy return (isc_socket_register(isc__socketmgr_create)); 5894280849Scy} 5895280849Scy#endif 5896258945Sroberto 5897280849ScyISC_SOCKETFUNC_SCOPE int 5898280849Scyisc__socket_getfd(isc_socket_t *socket0) { 5899280849Scy isc__socket_t *socket = (isc__socket_t *)socket0; 5900280849Scy 5901280849Scy return ((short) socket->fd); 5902280849Scy} 5903280849Scy 5904280849Scy#if defined(HAVE_LIBXML2) && defined(BIND9) 5905280849Scy 5906258945Srobertostatic const char * 5907258945Sroberto_socktype(isc_sockettype_t type) 5908258945Sroberto{ 5909258945Sroberto if (type == isc_sockettype_udp) 5910258945Sroberto return ("udp"); 5911258945Sroberto else if (type == isc_sockettype_tcp) 5912258945Sroberto return ("tcp"); 5913258945Sroberto else if (type == isc_sockettype_unix) 5914258945Sroberto return ("unix"); 5915258945Sroberto else if (type == isc_sockettype_fdwatch) 5916258945Sroberto return ("fdwatch"); 5917258945Sroberto else 5918258945Sroberto return ("not-initialized"); 5919258945Sroberto} 5920258945Sroberto 5921280849ScyISC_SOCKETFUNC_SCOPE void 5922280849Scyisc_socketmgr_renderxml(isc_socketmgr_t *mgr0, xmlTextWriterPtr writer) { 5923280849Scy isc__socketmgr_t *mgr = (isc__socketmgr_t *)mgr0; 5924280849Scy isc__socket_t *sock; 5925258945Sroberto char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 5926258945Sroberto isc_sockaddr_t addr; 5927258945Sroberto ISC_SOCKADDR_LEN_T len; 5928258945Sroberto 5929258945Sroberto LOCK(&mgr->lock); 5930258945Sroberto 5931280849Scy#ifdef USE_SHARED_MANAGER 5932258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "references"); 5933258945Sroberto xmlTextWriterWriteFormatString(writer, "%d", mgr->refs); 5934258945Sroberto xmlTextWriterEndElement(writer); 5935280849Scy#endif /* USE_SHARED_MANAGER */ 5936258945Sroberto 5937258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets"); 5938258945Sroberto sock = ISC_LIST_HEAD(mgr->socklist); 5939258945Sroberto while (sock != NULL) { 5940258945Sroberto LOCK(&sock->lock); 5941258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket"); 5942258945Sroberto 5943258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "id"); 5944258945Sroberto xmlTextWriterWriteFormatString(writer, "%p", sock); 5945258945Sroberto xmlTextWriterEndElement(writer); 5946258945Sroberto 5947258945Sroberto if (sock->name[0] != 0) { 5948258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "name"); 5949258945Sroberto xmlTextWriterWriteFormatString(writer, "%s", 5950258945Sroberto sock->name); 5951258945Sroberto xmlTextWriterEndElement(writer); /* name */ 5952258945Sroberto } 5953258945Sroberto 5954258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "references"); 5955258945Sroberto xmlTextWriterWriteFormatString(writer, "%d", sock->references); 5956258945Sroberto xmlTextWriterEndElement(writer); 5957258945Sroberto 5958258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type", 5959258945Sroberto ISC_XMLCHAR _socktype(sock->type)); 5960258945Sroberto 5961258945Sroberto if (sock->connected) { 5962258945Sroberto isc_sockaddr_format(&sock->peer_address, peerbuf, 5963258945Sroberto sizeof(peerbuf)); 5964258945Sroberto xmlTextWriterWriteElement(writer, 5965258945Sroberto ISC_XMLCHAR "peer-address", 5966258945Sroberto ISC_XMLCHAR peerbuf); 5967258945Sroberto } 5968258945Sroberto 5969258945Sroberto len = sizeof(addr); 5970258945Sroberto if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) { 5971258945Sroberto isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf)); 5972258945Sroberto xmlTextWriterWriteElement(writer, 5973258945Sroberto ISC_XMLCHAR "local-address", 5974258945Sroberto ISC_XMLCHAR peerbuf); 5975258945Sroberto } 5976258945Sroberto 5977258945Sroberto xmlTextWriterStartElement(writer, ISC_XMLCHAR "states"); 5978258945Sroberto if (sock->pending_recv) 5979258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5980258945Sroberto ISC_XMLCHAR "pending-receive"); 5981258945Sroberto if (sock->pending_send) 5982258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5983258945Sroberto ISC_XMLCHAR "pending-send"); 5984258945Sroberto if (sock->pending_accept) 5985258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5986258945Sroberto ISC_XMLCHAR "pending_accept"); 5987258945Sroberto if (sock->listener) 5988258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5989258945Sroberto ISC_XMLCHAR "listener"); 5990258945Sroberto if (sock->connected) 5991258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5992258945Sroberto ISC_XMLCHAR "connected"); 5993258945Sroberto if (sock->connecting) 5994258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5995258945Sroberto ISC_XMLCHAR "connecting"); 5996258945Sroberto if (sock->bound) 5997258945Sroberto xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state", 5998258945Sroberto ISC_XMLCHAR "bound"); 5999258945Sroberto 6000258945Sroberto xmlTextWriterEndElement(writer); /* states */ 6001258945Sroberto 6002258945Sroberto xmlTextWriterEndElement(writer); /* socket */ 6003258945Sroberto 6004258945Sroberto UNLOCK(&sock->lock); 6005258945Sroberto sock = ISC_LIST_NEXT(sock, link); 6006258945Sroberto } 6007258945Sroberto xmlTextWriterEndElement(writer); /* sockets */ 6008258945Sroberto 6009258945Sroberto UNLOCK(&mgr->lock); 6010258945Sroberto} 6011258945Sroberto#endif /* HAVE_LIBXML2 */ 6012