nfs_nfsiod.c revision 212506
1139823Simp/*- 21541Srgrimes * Copyright (c) 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * Rick Macklem at The University of Guelph. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 4. Neither the name of the University nor the names of its contributors 171541Srgrimes * may be used to endorse or promote products derived from this software 181541Srgrimes * without specific prior written permission. 191541Srgrimes * 201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301541Srgrimes * SUCH DAMAGE. 311541Srgrimes * 3222521Sdyson * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 331541Srgrimes */ 341541Srgrimes 3583651Speter#include <sys/cdefs.h> 3683651Speter__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_nfsiod.c 212506 2010-09-12 19:06:08Z kib $"); 3783651Speter 381541Srgrimes#include <sys/param.h> 391541Srgrimes#include <sys/systm.h> 4012274Sbde#include <sys/sysproto.h> 411541Srgrimes#include <sys/kernel.h> 4219449Sdfr#include <sys/sysctl.h> 431541Srgrimes#include <sys/file.h> 4415480Sbde#include <sys/filedesc.h> 451541Srgrimes#include <sys/vnode.h> 4630354Sphk#include <sys/malloc.h> 471541Srgrimes#include <sys/mount.h> 481541Srgrimes#include <sys/proc.h> 4960041Sphk#include <sys/bio.h> 501541Srgrimes#include <sys/buf.h> 511541Srgrimes#include <sys/mbuf.h> 521541Srgrimes#include <sys/socket.h> 531541Srgrimes#include <sys/socketvar.h> 541541Srgrimes#include <sys/domain.h> 551541Srgrimes#include <sys/protosw.h> 561541Srgrimes#include <sys/namei.h> 5783651Speter#include <sys/unistd.h> 5883651Speter#include <sys/kthread.h> 5975631Salfred#include <sys/fcntl.h> 6075631Salfred#include <sys/lockf.h> 6183651Speter#include <sys/mutex.h> 62212506Skib#include <sys/taskqueue.h> 631541Srgrimes 641541Srgrimes#include <netinet/in.h> 651541Srgrimes#include <netinet/tcp.h> 66122698Salfred 679336Sdfr#include <nfs/xdr_subs.h> 689336Sdfr#include <nfs/nfsproto.h> 6983651Speter#include <nfsclient/nfs.h> 7083651Speter#include <nfsclient/nfsm_subs.h> 7183651Speter#include <nfsclient/nfsmount.h> 7283651Speter#include <nfsclient/nfsnode.h> 73210455Srmacklem#include <nfs/nfs_lock.h> 741541Srgrimes 75151897Srwatsonstatic MALLOC_DEFINE(M_NFSSVC, "nfsclient_srvsock", "Nfs server structure"); 7630309Sphk 7783651Speterstatic void nfssvc_iod(void *); 781541Srgrimes 79212506Skibstruct nfsiod_str { 80212506Skib STAILQ_ENTRY(nfsiod_str) ni_links; 81212506Skib int *ni_inst; 82212506Skib int ni_iod; 83212506Skib int ni_error; 84212506Skib int ni_done; 85212506Skib}; 86212506Skibstatic STAILQ_HEAD(, nfsiod_str) nfsiodhead = 87212506Skib STAILQ_HEAD_INITIALIZER(nfsiodhead); 88212506Skib 891541Srgrimesstatic int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; 9012457Sbde 9144112SdfrSYSCTL_DECL(_vfs_nfs); 9244112Sdfr 9389407Speter/* Maximum number of seconds a nfsiod kthread will sleep before exiting */ 9489407Speterstatic unsigned int nfs_iodmaxidle = 120; 95184561StrhodesSYSCTL_UINT(_vfs_nfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0, 96184561Strhodes "Max number of seconds an nfsiod kthread will sleep before exiting"); 9789407Speter 9889407Speter/* Maximum number of nfsiod kthreads */ 99128111Speadarunsigned int nfs_iodmax = 20; 10089407Speter 10189324Speter/* Minimum number of nfsiod kthreads to keep as spares */ 102158903Srwatsonstatic unsigned int nfs_iodmin = 0; 10389324Speter 10489407Speterstatic int 10589407Spetersysctl_iodmin(SYSCTL_HANDLER_ARGS) 10689407Speter{ 10789407Speter int error, i; 10889407Speter int newmin; 10989324Speter 11089407Speter newmin = nfs_iodmin; 11189407Speter error = sysctl_handle_int(oidp, &newmin, 0, req); 11289407Speter if (error || (req->newptr == NULL)) 11389407Speter return (error); 114158739Smohans mtx_lock(&nfs_iod_mtx); 115158739Smohans if (newmin > nfs_iodmax) { 116158739Smohans error = EINVAL; 117158739Smohans goto out; 118158739Smohans } 11989407Speter nfs_iodmin = newmin; 12089407Speter if (nfs_numasync >= nfs_iodmin) 121158739Smohans goto out; 12289407Speter /* 12389407Speter * If the current number of nfsiod is lower 12489407Speter * than the new minimum, create some more. 12589407Speter */ 12689407Speter for (i = nfs_iodmin - nfs_numasync; i > 0; i--) 127203072Srmacklem nfs_nfsiodnew(0); 128158739Smohansout: 129158739Smohans mtx_unlock(&nfs_iod_mtx); 13089407Speter return (0); 13189407Speter} 13289407SpeterSYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0, 133184561Strhodes sizeof (nfs_iodmin), sysctl_iodmin, "IU", 134184561Strhodes "Min number of nfsiod kthreads to keep as spares"); 13589407Speter 13689407Speter 13789407Speterstatic int 13889407Spetersysctl_iodmax(SYSCTL_HANDLER_ARGS) 13989407Speter{ 14089407Speter int error, i; 14189407Speter int iod, newmax; 14289407Speter 14389407Speter newmax = nfs_iodmax; 14489407Speter error = sysctl_handle_int(oidp, &newmax, 0, req); 14589407Speter if (error || (req->newptr == NULL)) 14689407Speter return (error); 14789407Speter if (newmax > NFS_MAXASYNCDAEMON) 14889407Speter return (EINVAL); 149158739Smohans mtx_lock(&nfs_iod_mtx); 15089407Speter nfs_iodmax = newmax; 15189407Speter if (nfs_numasync <= nfs_iodmax) 152158739Smohans goto out; 15389407Speter /* 15489407Speter * If there are some asleep nfsiods that should 15589407Speter * exit, wakeup() them so that they check nfs_iodmax 15689407Speter * and exit. Those who are active will exit as 15789407Speter * soon as they finish I/O. 15889407Speter */ 15989407Speter iod = nfs_numasync - 1; 16089407Speter for (i = 0; i < nfs_numasync - nfs_iodmax; i++) { 161203072Srmacklem if (nfs_iodwant[iod] == NFSIOD_AVAILABLE) 162111748Sdes wakeup(&nfs_iodwant[iod]); 16389407Speter iod--; 16489407Speter } 165158739Smohansout: 166158739Smohans mtx_unlock(&nfs_iod_mtx); 16789407Speter return (0); 16889407Speter} 16989407SpeterSYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0, 170184561Strhodes sizeof (nfs_iodmax), sysctl_iodmax, "IU", 171184561Strhodes "Max number of nfsiod kthreads"); 17289407Speter 173212506Skibvoid 174212506Skibnfs_nfsiodnew_tq(__unused void *arg, int pending) 175212506Skib{ 176212506Skib struct nfsiod_str *nip; 177212506Skib 178212506Skib mtx_lock(&nfs_iod_mtx); 179212506Skib while ((nip = STAILQ_FIRST(&nfsiodhead)) != NULL) { 180212506Skib STAILQ_REMOVE_HEAD(&nfsiodhead, ni_links); 181212506Skib mtx_unlock(&nfs_iod_mtx); 182212506Skib nip->ni_error = kproc_create(nfssvc_iod, nip->ni_inst, NULL, 183212506Skib RFHIGHPID, 0, "nfsiod %d", nip->ni_iod); 184212506Skib nip->ni_done = 1; 185212506Skib mtx_lock(&nfs_iod_mtx); 186212506Skib wakeup(nip); 187212506Skib } 188212506Skib mtx_unlock(&nfs_iod_mtx); 189212506Skib} 190212506Skib 19189324Speterint 192203072Srmacklemnfs_nfsiodnew(int set_iodwant) 19389324Speter{ 19489324Speter int error, i; 19589324Speter int newiod; 196212506Skib struct nfsiod_str *nip; 19789324Speter 19889407Speter if (nfs_numasync >= nfs_iodmax) 19989407Speter return (-1); 20089324Speter newiod = -1; 20189407Speter for (i = 0; i < nfs_iodmax; i++) 20289324Speter if (nfs_asyncdaemon[i] == 0) { 20389324Speter nfs_asyncdaemon[i]++; 20489324Speter newiod = i; 20589324Speter break; 20689324Speter } 20789324Speter if (newiod == -1) 20889324Speter return (-1); 209203072Srmacklem if (set_iodwant > 0) 210203072Srmacklem nfs_iodwant[i] = NFSIOD_CREATED_FOR_NFS_ASYNCIO; 211158739Smohans mtx_unlock(&nfs_iod_mtx); 212212506Skib nip = malloc(sizeof(*nip), M_TEMP, M_WAITOK | M_ZERO); 213212506Skib nip->ni_inst = nfs_asyncdaemon + i; 214212506Skib nip->ni_iod = newiod; 215158739Smohans mtx_lock(&nfs_iod_mtx); 216212506Skib STAILQ_INSERT_TAIL(&nfsiodhead, nip, ni_links); 217212506Skib taskqueue_enqueue(taskqueue_thread, &nfs_nfsiodnew_task); 218212506Skib while (!nip->ni_done) 219212506Skib mtx_sleep(nip, &nfs_iod_mtx, 0, "niwt", 0); 220212506Skib error = nip->ni_error; 221212506Skib free(nip, M_TEMP); 222203072Srmacklem if (error) { 223203072Srmacklem if (set_iodwant > 0) 224203072Srmacklem nfs_iodwant[i] = NFSIOD_NOT_AVAILABLE; 22589324Speter return (-1); 226203072Srmacklem } 22789324Speter nfs_numasync++; 22889324Speter return (newiod); 22989324Speter} 23089324Speter 23183651Speterstatic void 23283651Speternfsiod_setup(void *dummy) 2331541Srgrimes{ 23483651Speter int i; 2351541Srgrimes int error; 2361541Srgrimes 23789324Speter TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin); 238158739Smohans mtx_lock(&nfs_iod_mtx); 23989324Speter /* Silently limit the start number of nfsiod's */ 24089324Speter if (nfs_iodmin > NFS_MAXASYNCDAEMON) 24189324Speter nfs_iodmin = NFS_MAXASYNCDAEMON; 24289324Speter 24389324Speter for (i = 0; i < nfs_iodmin; i++) { 244203072Srmacklem error = nfs_nfsiodnew(0); 24589324Speter if (error == -1) 24689324Speter panic("nfsiod_setup: nfs_nfsiodnew failed"); 2471541Srgrimes } 248158739Smohans mtx_unlock(&nfs_iod_mtx); 2491541Srgrimes} 25083651SpeterSYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); 2511541Srgrimes 25283651Speterstatic int nfs_defect = 0; 253184561StrhodesSYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, 254184561Strhodes "Allow nfsiods to migrate serving different mounts"); 2551541Srgrimes 25644246Speter/* 2571541Srgrimes * Asynchronous I/O daemons for client nfs. 2581541Srgrimes * They do read-ahead and write-behind operations on the block I/O cache. 25989324Speter * Returns if we hit the timeout defined by the iodmaxidle sysctl. 2601541Srgrimes */ 26183651Speterstatic void 26289324Speternfssvc_iod(void *instance) 2631541Srgrimes{ 26483651Speter struct buf *bp; 26519449Sdfr struct nfsmount *nmp; 26689324Speter int myiod, timo; 26731016Sphk int error = 0; 2681541Srgrimes 269158739Smohans mtx_lock(&nfs_iod_mtx); 27089324Speter myiod = (int *)instance - nfs_asyncdaemon; 2711541Srgrimes /* 27289324Speter * Main loop 2731541Srgrimes */ 2741541Srgrimes for (;;) { 275172324Smohans while (((nmp = nfs_iodmount[myiod]) == NULL) 276172324Smohans || !TAILQ_FIRST(&nmp->nm_bufq)) { 27789407Speter if (myiod >= nfs_iodmax) 27889407Speter goto finish; 27919449Sdfr if (nmp) 28089324Speter nmp->nm_bufqiods--; 281203072Srmacklem if (nfs_iodwant[myiod] == NFSIOD_NOT_AVAILABLE) 282203072Srmacklem nfs_iodwant[myiod] = NFSIOD_AVAILABLE; 28319449Sdfr nfs_iodmount[myiod] = NULL; 28489324Speter /* 28589324Speter * Always keep at least nfs_iodmin kthreads. 28689324Speter */ 28789324Speter timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; 288158739Smohans error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH, 289117152Sphk "-", timo); 290172324Smohans if (error) { 291172324Smohans nmp = nfs_iodmount[myiod]; 292172324Smohans /* 293172324Smohans * Rechecking the nm_bufq closes a rare race where the 294172324Smohans * nfsiod is woken up at the exact time the idle timeout 295172324Smohans * fires 296172324Smohans */ 297172324Smohans if (nmp && TAILQ_FIRST(&nmp->nm_bufq)) 298172324Smohans error = 0; 299172324Smohans break; 300172324Smohans } 3019336Sdfr } 30289324Speter if (error) 30389324Speter break; 30483651Speter while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { 305158905Smohans int giant_locked = 0; 306158905Smohans 3079336Sdfr /* Take one off the front of the list */ 30819449Sdfr TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); 30919449Sdfr nmp->nm_bufqlen--; 31055431Sdillon if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { 31189324Speter nmp->nm_bufqwant = 0; 31219449Sdfr wakeup(&nmp->nm_bufq); 31319449Sdfr } 314158739Smohans mtx_unlock(&nfs_iod_mtx); 315158905Smohans if (NFS_ISV4(bp->b_vp)) { 316158905Smohans giant_locked = 1; 317158905Smohans mtx_lock(&Giant); 318158905Smohans } 319138899Sps if (bp->b_flags & B_DIRECT) { 320138899Sps KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set")); 321138899Sps (void)nfs_doio_directwrite(bp); 322138899Sps } else { 323138899Sps if (bp->b_iocmd == BIO_READ) 324138899Sps (void) nfs_doio(bp->b_vp, bp, bp->b_rcred, NULL); 325138899Sps else 326138899Sps (void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL); 327138899Sps } 328158905Smohans if (giant_locked) 329158905Smohans mtx_unlock(&Giant); 330158739Smohans mtx_lock(&nfs_iod_mtx); 33119449Sdfr /* 33219449Sdfr * If there are more than one iod on this mount, then defect 33319449Sdfr * so that the iods can be shared out fairly between the mounts 33419449Sdfr */ 33519449Sdfr if (nfs_defect && nmp->nm_bufqiods > 1) { 33619449Sdfr NFS_DPF(ASYNCIO, 33719449Sdfr ("nfssvc_iod: iod %d defecting from mount %p\n", 33819449Sdfr myiod, nmp)); 33919449Sdfr nfs_iodmount[myiod] = NULL; 34019449Sdfr nmp->nm_bufqiods--; 34119449Sdfr break; 34219449Sdfr } 3439336Sdfr } 3441541Srgrimes } 34589407Speterfinish: 34689324Speter nfs_asyncdaemon[myiod] = 0; 34789324Speter if (nmp) 34889324Speter nmp->nm_bufqiods--; 349203072Srmacklem nfs_iodwant[myiod] = NFSIOD_NOT_AVAILABLE; 35089324Speter nfs_iodmount[myiod] = NULL; 351128111Speadar /* Someone may be waiting for the last nfsiod to terminate. */ 352128111Speadar if (--nfs_numasync == 0) 353128111Speadar wakeup(&nfs_numasync); 354158739Smohans mtx_unlock(&nfs_iod_mtx); 35589407Speter if ((error == 0) || (error == EWOULDBLOCK)) 356172836Sjulian kproc_exit(0); 35789324Speter /* Abnormal termination */ 358172836Sjulian kproc_exit(1); 3591541Srgrimes} 360