nfs_nfsiod.c revision 9336
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_syscalls.c	8.3 (Berkeley) 1/4/94
37 * $Id: nfs_syscalls.c,v 1.6 1995/05/30 08:12:45 rgrimes Exp $
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/file.h>
44#include <sys/stat.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/proc.h>
48#include <sys/uio.h>
49#include <sys/malloc.h>
50#include <sys/buf.h>
51#include <sys/mbuf.h>
52#include <sys/socket.h>
53#include <sys/socketvar.h>
54#include <sys/domain.h>
55#include <sys/protosw.h>
56#include <sys/namei.h>
57#include <sys/syslog.h>
58
59#include <netinet/in.h>
60#include <netinet/tcp.h>
61#ifdef ISO
62#include <netiso/iso.h>
63#endif
64#include <nfs/xdr_subs.h>
65#include <nfs/rpcv2.h>
66#include <nfs/nfsproto.h>
67#include <nfs/nfs.h>
68#include <nfs/nfsm_subs.h>
69#include <nfs/nfsrvcache.h>
70#include <nfs/nfsmount.h>
71#include <nfs/nfsnode.h>
72#include <nfs/nqnfs.h>
73#include <nfs/nfsrtt.h>
74
75void	nfsrv_zapsock	__P((struct nfssvc_sock *));
76
77/* Global defs. */
78extern int (*nfsrv3_procs[NFS_NPROCS])();
79extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
80extern int nfs_numasync;
81extern time_t nqnfsstarttime;
82extern int nqsrv_writeslack;
83extern int nfsrtton;
84extern struct nfsstats nfsstats;
85extern int nfsrvw_procrastinate;
86struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
87int nuidhash_max = NFS_MAXUIDHASH;
88static int nfs_numnfsd = 0;
89int nfsd_waiting = 0;
90static int notstarted = 1;
91static int modify_flag = 0;
92static struct nfsdrt nfsdrt;
93void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
94static void nfsd_rt();
95void nfsrv_slpderef();
96
97#define	TRUE	1
98#define	FALSE	0
99
100static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
101/*
102 * NFS server system calls
103 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
104 */
105
106/*
107 * Get file handle system call
108 */
109struct getfh_args {
110	char	*fname;
111	fhandle_t *fhp;
112};
113int
114getfh(p, uap, retval)
115	struct proc *p;
116	register struct getfh_args *uap;
117	int *retval;
118{
119	register struct vnode *vp;
120	fhandle_t fh;
121	int error;
122	struct nameidata nd;
123
124	/*
125	 * Must be super user
126	 */
127	error = suser(p->p_ucred, &p->p_acflag);
128	if(error)
129		return (error);
130	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
131	error = namei(&nd);
132	if (error)
133		return (error);
134	vp = nd.ni_vp;
135	bzero((caddr_t)&fh, sizeof(fh));
136	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
137	error = VFS_VPTOFH(vp, &fh.fh_fid);
138	vput(vp);
139	if (error)
140		return (error);
141	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
142	return (error);
143}
144
145/*
146 * Nfs server psuedo system call for the nfsd's
147 * Based on the flag value it either:
148 * - adds a socket to the selection list
149 * - remains in the kernel as an nfsd
150 * - remains in the kernel as an nfsiod
151 */
152struct nfssvc_args {
153	int flag;
154	caddr_t argp;
155};
156int
157nfssvc(p, uap, retval)
158	struct proc *p;
159	register struct nfssvc_args *uap;
160	int *retval;
161{
162	struct nameidata nd;
163	struct file *fp;
164	struct mbuf *nam;
165	struct nfsd_args nfsdarg;
166	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
167	struct nfsd_cargs ncd;
168	struct nfsd *nfsd;
169	struct nfssvc_sock *slp;
170	struct nfsuid *nuidp;
171	struct nfsmount *nmp;
172	int error;
173
174	/*
175	 * Must be super user
176	 */
177	error = suser(p->p_ucred, &p->p_acflag);
178	if(error)
179		return (error);
180	while (nfssvc_sockhead_flag & SLP_INIT) {
181		 nfssvc_sockhead_flag |= SLP_WANTINIT;
182		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
183	}
184	if (uap->flag & NFSSVC_BIOD)
185		error = nfssvc_iod(p);
186	else if (uap->flag & NFSSVC_MNTD) {
187		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
188		if (error)
189			return (error);
190		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
191			ncd.ncd_dirp, p);
192		error = namei(&nd);
193		if (error)
194			return (error);
195		if ((nd.ni_vp->v_flag & VROOT) == 0)
196			error = EINVAL;
197		nmp = VFSTONFS(nd.ni_vp->v_mount);
198		vput(nd.ni_vp);
199		if (error)
200			return (error);
201		if ((nmp->nm_flag & NFSMNT_MNTD) &&
202			(uap->flag & NFSSVC_GOTAUTH) == 0)
203			return (0);
204		nmp->nm_flag |= NFSMNT_MNTD;
205		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
206			uap->argp, p);
207	} else if (uap->flag & NFSSVC_ADDSOCK) {
208		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
209		if (error)
210			return (error);
211		error = getsock(p->p_fd, nfsdarg.sock, &fp);
212		if (error)
213			return (error);
214		/*
215		 * Get the client address for connected sockets.
216		 */
217		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
218			nam = (struct mbuf *)0;
219		else {
220			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
221				MT_SONAME);
222			if (error)
223				return (error);
224		}
225		error = nfssvc_addsock(fp, nam);
226	} else {
227		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
228		if (error)
229			return (error);
230		if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
231			(nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
232			slp = nfsd->nfsd_slp;
233
234			/*
235			 * First check to see if another nfsd has already
236			 * added this credential.
237			 */
238			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
239			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
240				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
241				    (!nfsd->nfsd_nd->nd_nam2 ||
242				     netaddr_match(NU_NETFAM(nuidp),
243				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
244					break;
245			}
246			if (nuidp) {
247			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
248			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
249			} else {
250			    /*
251			     * Nope, so we will.
252			     */
253			    if (slp->ns_numuids < nuidhash_max) {
254				slp->ns_numuids++;
255				nuidp = (struct nfsuid *)
256				   malloc(sizeof (struct nfsuid), M_NFSUID,
257					M_WAITOK);
258			    } else
259				nuidp = (struct nfsuid *)0;
260			    if ((slp->ns_flag & SLP_VALID) == 0) {
261				if (nuidp)
262				    free((caddr_t)nuidp, M_NFSUID);
263			    } else {
264				if (nuidp == (struct nfsuid *)0) {
265				    nuidp = slp->ns_uidlruhead.tqh_first;
266				    LIST_REMOVE(nuidp, nu_hash);
267				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
268					nu_lru);
269				    if (nuidp->nu_flag & NU_NAM)
270					m_freem(nuidp->nu_nam);
271			        }
272				nuidp->nu_flag = 0;
273				nuidp->nu_cr = nsd->nsd_cr;
274				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
275				    nuidp->nu_cr.cr_ngroups = NGROUPS;
276				nuidp->nu_cr.cr_ref = 1;
277				nuidp->nu_timestamp = nsd->nsd_timestamp;
278				nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl;
279				/*
280				 * and save the session key in nu_key.
281				 */
282				bcopy(nsd->nsd_key, nuidp->nu_key,
283				    sizeof (nsd->nsd_key));
284				if (nfsd->nfsd_nd->nd_nam2) {
285				    struct sockaddr_in *saddr;
286
287				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
288					 struct sockaddr_in *);
289				    switch (saddr->sin_family) {
290				    case AF_INET:
291					nuidp->nu_flag |= NU_INETADDR;
292					nuidp->nu_inetaddr =
293					     saddr->sin_addr.s_addr;
294					break;
295				    case AF_ISO:
296				    default:
297					nuidp->nu_flag |= NU_NAM;
298					nuidp->nu_nam = m_copym(
299					    nfsd->nfsd_nd->nd_nam2, 0,
300					     M_COPYALL, M_WAIT);
301					break;
302				    };
303				}
304				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
305					nu_lru);
306				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
307					nuidp, nu_hash);
308				nfsrv_setcred(&nuidp->nu_cr,
309				    &nfsd->nfsd_nd->nd_cr);
310				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
311			    }
312			}
313		}
314		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
315			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
316		error = nfssvc_nfsd(nsd, uap->argp, p);
317	}
318	if (error == EINTR || error == ERESTART)
319		error = 0;
320	return (error);
321}
322
323/*
324 * Adds a socket to the list for servicing by nfsds.
325 */
326int
327nfssvc_addsock(fp, mynam)
328	struct file *fp;
329	struct mbuf *mynam;
330{
331	register struct mbuf *m;
332	register int siz;
333	register struct nfssvc_sock *slp;
334	register struct socket *so;
335	struct nfssvc_sock *tslp;
336	int error, s;
337
338	so = (struct socket *)fp->f_data;
339	tslp = (struct nfssvc_sock *)0;
340	/*
341	 * Add it to the list, as required.
342	 */
343	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
344		tslp = nfs_udpsock;
345		if (tslp->ns_flag & SLP_VALID) {
346			m_freem(mynam);
347			return (EPERM);
348		}
349#ifdef ISO
350	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
351		tslp = nfs_cltpsock;
352		if (tslp->ns_flag & SLP_VALID) {
353			m_freem(mynam);
354			return (EPERM);
355		}
356#endif /* ISO */
357	}
358	if (so->so_type == SOCK_STREAM)
359		siz = NFS_MAXPACKET + sizeof (u_long);
360	else
361		siz = NFS_MAXPACKET;
362	error = soreserve(so, siz, siz);
363	if (error) {
364		m_freem(mynam);
365		return (error);
366	}
367
368	/*
369	 * Set protocol specific options { for now TCP only } and
370	 * reserve some space. For datagram sockets, this can get called
371	 * repeatedly for the same socket, but that isn't harmful.
372	 */
373	if (so->so_type == SOCK_STREAM) {
374		MGET(m, M_WAIT, MT_SOOPTS);
375		*mtod(m, int *) = 1;
376		m->m_len = sizeof(int);
377		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
378	}
379	if (so->so_proto->pr_domain->dom_family == AF_INET &&
380	    so->so_proto->pr_protocol == IPPROTO_TCP) {
381		MGET(m, M_WAIT, MT_SOOPTS);
382		*mtod(m, int *) = 1;
383		m->m_len = sizeof(int);
384		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
385	}
386	so->so_rcv.sb_flags &= ~SB_NOINTR;
387	so->so_rcv.sb_timeo = 0;
388	so->so_snd.sb_flags &= ~SB_NOINTR;
389	so->so_snd.sb_timeo = 0;
390	if (tslp)
391		slp = tslp;
392	else {
393		slp = (struct nfssvc_sock *)
394			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
395		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
396		TAILQ_INIT(&slp->ns_uidlruhead);
397		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
398	}
399	slp->ns_so = so;
400	slp->ns_nam = mynam;
401	fp->f_count++;
402	slp->ns_fp = fp;
403	s = splnet();
404	so->so_upcallarg = (caddr_t)slp;
405	so->so_upcall = nfsrv_rcv;
406	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
407	nfsrv_wakenfsd(slp);
408	splx(s);
409	return (0);
410}
411
412/*
413 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
414 * until it is killed by a signal.
415 */
416int
417nfssvc_nfsd(nsd, argp, p)
418	struct nfsd_srvargs *nsd;
419	caddr_t argp;
420	struct proc *p;
421{
422	register struct mbuf *m;
423	register int siz;
424	register struct nfssvc_sock *slp;
425	register struct socket *so;
426	register int *solockp;
427	struct nfsd *nfsd = nsd->nsd_nfsd;
428	struct nfsrv_descript *nd = NULL;
429	struct mbuf *mreq;
430	struct nfsuid *uidp;
431	int error = 0, cacherep, s, sotype, writes_todo;
432	u_quad_t cur_usec;
433
434#ifndef nolint
435	cacherep = RC_DOIT;
436	writes_todo = 0;
437#endif
438	s = splnet();
439	if (nfsd == (struct nfsd *)0) {
440		nsd->nsd_nfsd = nfsd = (struct nfsd *)
441			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
442		bzero((caddr_t)nfsd, sizeof (struct nfsd));
443		nfsd->nfsd_procp = p;
444		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
445		nfs_numnfsd++;
446	}
447	/*
448	 * Loop getting rpc requests until SIGKILL.
449	 */
450	for (;;) {
451		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
452			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
453			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
454				nfsd->nfsd_flag |= NFSD_WAITING;
455				nfsd_waiting++;
456				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
457				    "nfsd", 0);
458				nfsd_waiting--;
459				if (error)
460					goto done;
461			}
462			if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
463			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
464				for (slp = nfssvc_sockhead.tqh_first; slp != 0;
465				    slp = slp->ns_chain.tqe_next) {
466				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
467					== (SLP_VALID | SLP_DOREC)) {
468					    slp->ns_flag &= ~SLP_DOREC;
469					    slp->ns_sref++;
470					    nfsd->nfsd_slp = slp;
471					    break;
472				    }
473				}
474				if (slp == 0)
475					nfsd_head_flag &= ~NFSD_CHECKSLP;
476			}
477			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
478				continue;
479			if (slp->ns_flag & SLP_VALID) {
480				if (slp->ns_flag & SLP_DISCONN)
481					nfsrv_zapsock(slp);
482				else if (slp->ns_flag & SLP_NEEDQ) {
483					slp->ns_flag &= ~SLP_NEEDQ;
484					(void) nfs_sndlock(&slp->ns_solock,
485						(struct nfsreq *)0);
486					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
487						M_WAIT);
488					nfs_sndunlock(&slp->ns_solock);
489				}
490				error = nfsrv_dorec(slp, nfsd, &nd);
491				cur_usec = (u_quad_t)time.tv_sec * 1000000 +
492					(u_quad_t)time.tv_usec;
493				if (error && slp->ns_tq.lh_first &&
494				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
495					error = 0;
496					cacherep = RC_DOIT;
497					writes_todo = 1;
498				} else
499					writes_todo = 0;
500				nfsd->nfsd_flag |= NFSD_REQINPROG;
501			}
502		} else {
503			error = 0;
504			slp = nfsd->nfsd_slp;
505		}
506		if (error || (slp->ns_flag & SLP_VALID) == 0) {
507			if (nd) {
508				free((caddr_t)nd, M_NFSRVDESC);
509				nd = NULL;
510			}
511			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
512			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
513			nfsrv_slpderef(slp);
514			continue;
515		}
516		splx(s);
517		so = slp->ns_so;
518		sotype = so->so_type;
519		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
520			solockp = &slp->ns_solock;
521		else
522			solockp = (int *)0;
523		if (nd) {
524		    nd->nd_starttime = time;
525		    if (nd->nd_nam2)
526			nd->nd_nam = nd->nd_nam2;
527		    else
528			nd->nd_nam = slp->ns_nam;
529
530		    /*
531		     * Check to see if authorization is needed.
532		     */
533		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
534			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
535			nsd->nsd_haddr = mtod(nd->nd_nam,
536			    struct sockaddr_in *)->sin_addr.s_addr;
537			nsd->nsd_authlen = nfsd->nfsd_authlen;
538			nsd->nsd_verflen = nfsd->nfsd_verflen;
539			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
540				nfsd->nfsd_authlen) &&
541			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
542				nfsd->nfsd_verflen) &&
543			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
544			    return (ENEEDAUTH);
545			cacherep = RC_DROPIT;
546		    } else
547			cacherep = nfsrv_getcache(nd, slp, &mreq);
548
549		    /*
550		     * Check for just starting up for NQNFS and send
551		     * fake "try again later" replies to the NQNFS clients.
552		     */
553		    if (notstarted && nqnfsstarttime <= time.tv_sec) {
554			if (modify_flag) {
555				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
556				modify_flag = 0;
557			} else
558				notstarted = 0;
559		    }
560		    if (notstarted) {
561			if ((nd->nd_flag & ND_NQNFS) == 0)
562				cacherep = RC_DROPIT;
563			else if (nd->nd_procnum != NFSPROC_WRITE) {
564				nd->nd_procnum = NFSPROC_NOOP;
565				nd->nd_repstat = NQNFS_TRYLATER;
566				cacherep = RC_DOIT;
567			} else
568				modify_flag = 1;
569		    } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
570			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
571			nd->nd_procnum = NFSPROC_NOOP;
572			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
573			cacherep = RC_DOIT;
574		    }
575		}
576
577		/*
578		 * Loop to get all the write rpc relies that have been
579		 * gathered together.
580		 */
581		do {
582		    switch (cacherep) {
583		    case RC_DOIT:
584			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
585			    nfsrvw_procrastinate > 0 && !notstarted))
586			    error = nfsrv_writegather(&nd, slp,
587				nfsd->nfsd_procp, &mreq);
588			else
589			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
590				slp, nfsd->nfsd_procp, &mreq);
591			if (mreq == NULL)
592				break;
593			if (error) {
594				if (nd->nd_procnum != NQNFSPROC_VACATED)
595					nfsstats.srv_errs++;
596				nfsrv_updatecache(nd, FALSE, mreq);
597				if (nd->nd_nam2)
598					m_freem(nd->nd_nam2);
599				break;
600			}
601			nfsstats.srvrpccnt[nd->nd_procnum]++;
602			nfsrv_updatecache(nd, TRUE, mreq);
603			nd->nd_mrep = (struct mbuf *)0;
604		    case RC_REPLY:
605			m = mreq;
606			siz = 0;
607			while (m) {
608				siz += m->m_len;
609				m = m->m_next;
610			}
611			if (siz <= 0 || siz > NFS_MAXPACKET) {
612				printf("mbuf siz=%d\n",siz);
613				panic("Bad nfs svc reply");
614			}
615			m = mreq;
616			m->m_pkthdr.len = siz;
617			m->m_pkthdr.rcvif = (struct ifnet *)0;
618			/*
619			 * For stream protocols, prepend a Sun RPC
620			 * Record Mark.
621			 */
622			if (sotype == SOCK_STREAM) {
623				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
624				*mtod(m, u_long *) = htonl(0x80000000 | siz);
625			}
626			if (solockp)
627				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
628			if (slp->ns_flag & SLP_VALID)
629			    error = nfs_send(so, nd->nd_nam2, m, NULL);
630			else {
631			    error = EPIPE;
632			    m_freem(m);
633			}
634			if (nfsrtton)
635				nfsd_rt(sotype, nd, cacherep);
636			if (nd->nd_nam2)
637				MFREE(nd->nd_nam2, m);
638			if (nd->nd_mrep)
639				m_freem(nd->nd_mrep);
640			if (error == EPIPE)
641				nfsrv_zapsock(slp);
642			if (solockp)
643				nfs_sndunlock(solockp);
644			if (error == EINTR || error == ERESTART) {
645				free((caddr_t)nd, M_NFSRVDESC);
646				nfsrv_slpderef(slp);
647				s = splnet();
648				goto done;
649			}
650			break;
651		    case RC_DROPIT:
652			if (nfsrtton)
653				nfsd_rt(sotype, nd, cacherep);
654			m_freem(nd->nd_mrep);
655			m_freem(nd->nd_nam2);
656			break;
657		    };
658		    if (nd) {
659			FREE((caddr_t)nd, M_NFSRVDESC);
660			nd = NULL;
661		    }
662
663		    /*
664		     * Check to see if there are outstanding writes that
665		     * need to be serviced.
666		     */
667		    cur_usec = (u_quad_t)time.tv_sec * 1000000 +
668			(u_quad_t)time.tv_usec;
669		    s = splsoftclock();
670		    if (slp->ns_tq.lh_first &&
671			slp->ns_tq.lh_first->nd_time <= cur_usec) {
672			cacherep = RC_DOIT;
673			writes_todo = 1;
674		    } else
675			writes_todo = 0;
676		    splx(s);
677		} while (writes_todo);
678		s = splnet();
679		if (nfsrv_dorec(slp, nfsd, &nd)) {
680			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
681			nfsd->nfsd_slp = NULL;
682			nfsrv_slpderef(slp);
683		}
684	}
685done:
686	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
687	splx(s);
688	free((caddr_t)nfsd, M_NFSD);
689	nsd->nsd_nfsd = (struct nfsd *)0;
690	if (--nfs_numnfsd == 0)
691		nfsrv_init(TRUE);	/* Reinitialize everything */
692	return (error);
693}
694
695/*
696 * Asynchronous I/O daemons for client nfs.
697 * They do read-ahead and write-behind operations on the block I/O cache.
698 * Never returns unless it fails or gets killed.
699 */
700int
701nfssvc_iod(p)
702	struct proc *p;
703{
704	register struct buf *bp, *nbp;
705	register int i, myiod;
706	struct vnode *vp;
707	int error = 0, s;
708
709	/*
710	 * Assign my position or return error if too many already running
711	 */
712	myiod = -1;
713	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
714		if (nfs_asyncdaemon[i] == 0) {
715			nfs_asyncdaemon[i]++;
716			myiod = i;
717			break;
718		}
719	if (myiod == -1)
720		return (EBUSY);
721	nfs_numasync++;
722	/*
723	 * Just loop around doin our stuff until SIGKILL
724	 */
725	for (;;) {
726	    while (nfs_bufq.tqh_first == NULL && error == 0) {
727		nfs_iodwant[myiod] = p;
728		error = tsleep((caddr_t)&nfs_iodwant[myiod],
729			PWAIT | PCATCH, "nfsidl", 0);
730	    }
731	    while ((bp = nfs_bufq.tqh_first) != NULL) {
732		/* Take one off the front of the list */
733		TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
734		if (bp->b_flags & B_READ)
735		    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
736		else do {
737		    /*
738		     * Look for a delayed write for the same vnode, so I can do
739		     * it now. We must grab it before calling nfs_doio() to
740		     * avoid any risk of the vnode getting vclean()'d while
741		     * we are doing the write rpc.
742		     */
743		    vp = bp->b_vp;
744		    s = splbio();
745		    for (nbp = vp->v_dirtyblkhd.lh_first; nbp;
746			nbp = nbp->b_vnbufs.le_next) {
747			if ((nbp->b_flags &
748			    (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
749			    continue;
750			bremfree(nbp);
751			vfs_busy_pages(nbp, 1);
752			nbp->b_flags |= (B_BUSY|B_ASYNC);
753			break;
754		    }
755		    splx(s);
756		    /*
757		     * For the delayed write, do the first part of nfs_bwrite()
758		     * up to, but not including nfs_strategy().
759		     */
760		    if (nbp) {
761			nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
762			reassignbuf(nbp, nbp->b_vp);
763			nbp->b_vp->v_numoutput++;
764		    }
765		    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
766		} while (bp = nbp);
767	    }
768	    if (error) {
769		nfs_asyncdaemon[myiod] = 0;
770		nfs_numasync--;
771		return (error);
772	    }
773	}
774}
775
776/*
777 * Shut down a socket associated with an nfssvc_sock structure.
778 * Should be called with the send lock set, if required.
779 * The trick here is to increment the sref at the start, so that the nfsds
780 * will stop using it and clear ns_flag at the end so that it will not be
781 * reassigned during cleanup.
782 */
783void
784nfsrv_zapsock(slp)
785	register struct nfssvc_sock *slp;
786{
787	register struct nfsuid *nuidp, *nnuidp;
788	register struct nfsrv_descript *nwp, *nnwp;
789	struct socket *so;
790	struct file *fp;
791	struct mbuf *m;
792	int s;
793
794	slp->ns_flag &= ~SLP_ALLFLAGS;
795	fp = slp->ns_fp;
796	if (fp) {
797		slp->ns_fp = (struct file *)0;
798		so = slp->ns_so;
799		so->so_upcall = NULL;
800		soshutdown(so, 2);
801		closef(fp, (struct proc *)0);
802		if (slp->ns_nam)
803			MFREE(slp->ns_nam, m);
804		m_freem(slp->ns_raw);
805		m_freem(slp->ns_rec);
806		for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
807		    nuidp = nnuidp) {
808			nnuidp = nuidp->nu_lru.tqe_next;
809			LIST_REMOVE(nuidp, nu_hash);
810			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
811			if (nuidp->nu_flag & NU_NAM)
812				m_freem(nuidp->nu_nam);
813			free((caddr_t)nuidp, M_NFSUID);
814		}
815		s = splsoftclock();
816		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
817			nnwp = nwp->nd_tq.le_next;
818			LIST_REMOVE(nwp, nd_tq);
819			free((caddr_t)nwp, M_NFSRVDESC);
820		}
821		LIST_INIT(&slp->ns_tq);
822		splx(s);
823	}
824}
825
826/*
827 * Get an authorization string for the uid by having the mount_nfs sitting
828 * on this mount point porpous out of the kernel and do it.
829 */
830int
831nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
832	register struct nfsmount *nmp;
833	struct nfsreq *rep;
834	struct ucred *cred;
835	char **auth_str;
836	int *auth_len;
837	char *verf_str;
838	int *verf_len;
839	NFSKERBKEY_T key;		/* return session key */
840{
841	int error = 0;
842
843	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
844		nmp->nm_flag |= NFSMNT_WANTAUTH;
845		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
846			"nfsauth1", 2 * hz);
847		error = nfs_sigintr(nmp, rep, rep->r_procp);
848		if (error) {
849			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
850			return (error);
851		}
852	}
853	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
854	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
855	nmp->nm_authlen = RPCAUTH_MAXSIZ;
856	nmp->nm_verfstr = verf_str;
857	nmp->nm_verflen = *verf_len;
858	nmp->nm_authuid = cred->cr_uid;
859	wakeup((caddr_t)&nmp->nm_authstr);
860
861	/*
862	 * And wait for mount_nfs to do its stuff.
863	 */
864	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
865		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
866			"nfsauth2", 2 * hz);
867		error = nfs_sigintr(nmp, rep, rep->r_procp);
868	}
869	if (nmp->nm_flag & NFSMNT_AUTHERR) {
870		nmp->nm_flag &= ~NFSMNT_AUTHERR;
871		error = EAUTH;
872	}
873	if (error)
874		free((caddr_t)*auth_str, M_TEMP);
875	else {
876		*auth_len = nmp->nm_authlen;
877		*verf_len = nmp->nm_verflen;
878		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
879	}
880	nmp->nm_flag &= ~NFSMNT_HASAUTH;
881	nmp->nm_flag |= NFSMNT_WAITAUTH;
882	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
883		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
884		wakeup((caddr_t)&nmp->nm_authtype);
885	}
886	return (error);
887}
888
889/*
890 * Get a nickname authenticator and verifier.
891 */
892int
893nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
894	struct nfsmount *nmp;
895	struct ucred *cred;
896	char **auth_str;
897	int *auth_len;
898	char *verf_str;
899	int verf_len;
900{
901	register struct nfsuid *nuidp;
902	register u_long *nickp, *verfp;
903	struct timeval ktvin, ktvout;
904	NFSKERBKEYSCHED_T keys;	/* stores key schedule */
905
906#ifdef DIAGNOSTIC
907	if (verf_len < (4 * NFSX_UNSIGNED))
908		panic("nfs_getnickauth verf too small");
909#endif
910	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
911	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
912		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
913			break;
914	}
915	if (!nuidp || nuidp->nu_expire < time.tv_sec)
916		return (EACCES);
917
918	/*
919	 * Move to the end of the lru list (end of lru == most recently used).
920	 */
921	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
922	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
923
924	nickp = (u_long *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
925	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
926	*nickp = txdr_unsigned(nuidp->nu_nickname);
927	*auth_str = (char *)nickp;
928	*auth_len = 2 * NFSX_UNSIGNED;
929
930	/*
931	 * Now we must encrypt the verifier and package it up.
932	 */
933	verfp = (u_long *)verf_str;
934	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
935	if (time.tv_sec > nuidp->nu_timestamp.tv_sec ||
936	    (time.tv_sec == nuidp->nu_timestamp.tv_sec &&
937	     time.tv_usec > nuidp->nu_timestamp.tv_usec))
938		nuidp->nu_timestamp = time;
939	else
940		nuidp->nu_timestamp.tv_usec++;
941	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
942	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
943
944	/*
945	 * Now encrypt the timestamp verifier in ecb mode using the session
946	 * key.
947	 */
948#ifdef NFSKERB
949	XXX
950#endif
951
952	*verfp++ = ktvout.tv_sec;
953	*verfp++ = ktvout.tv_usec;
954	*verfp = 0;
955	return (0);
956}
957
958/*
959 * Save the current nickname in a hash list entry on the mount point.
960 */
961int
962nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
963	register struct nfsmount *nmp;
964	struct ucred *cred;
965	int len;
966	NFSKERBKEY_T key;
967	struct mbuf **mdp;
968	char **dposp;
969	struct mbuf *mrep;
970{
971	register struct nfsuid *nuidp;
972	register u_long *tl;
973	register long t1;
974	struct mbuf *md = *mdp;
975	struct timeval ktvin, ktvout;
976	u_long nick;
977	NFSKERBKEYSCHED_T keys;
978	char *dpos = *dposp, *cp2;
979	int deltasec, error = 0;
980
981	if (len == (3 * NFSX_UNSIGNED)) {
982		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
983		ktvin.tv_sec = *tl++;
984		ktvin.tv_usec = *tl++;
985		nick = fxdr_unsigned(u_long, *tl);
986
987		/*
988		 * Decrypt the timestamp in ecb mode.
989		 */
990#ifdef NFSKERB
991		XXX
992#endif
993		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
994		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
995		deltasec = time.tv_sec - ktvout.tv_sec;
996		if (deltasec < 0)
997			deltasec = -deltasec;
998		/*
999		 * If ok, add it to the hash list for the mount point.
1000		 */
1001		if (deltasec <= NFS_KERBCLOCKSKEW) {
1002			if (nmp->nm_numuids < nuidhash_max) {
1003				nmp->nm_numuids++;
1004				nuidp = (struct nfsuid *)
1005				   malloc(sizeof (struct nfsuid), M_NFSUID,
1006					M_WAITOK);
1007			} else {
1008				nuidp = nmp->nm_uidlruhead.tqh_first;
1009				LIST_REMOVE(nuidp, nu_hash);
1010				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1011					nu_lru);
1012			}
1013			nuidp->nu_flag = 0;
1014			nuidp->nu_cr.cr_uid = cred->cr_uid;
1015			nuidp->nu_expire = time.tv_sec + NFS_KERBTTL;
1016			nuidp->nu_timestamp = ktvout;
1017			nuidp->nu_nickname = nick;
1018			bcopy(key, nuidp->nu_key, sizeof (key));
1019			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1020				nu_lru);
1021			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1022				nuidp, nu_hash);
1023		}
1024	} else
1025		nfsm_adv(nfsm_rndup(len));
1026nfsmout:
1027	*mdp = md;
1028	*dposp = dpos;
1029	return (error);
1030}
1031
1032/*
1033 * Derefence a server socket structure. If it has no more references and
1034 * is no longer valid, you can throw it away.
1035 */
1036void
1037nfsrv_slpderef(slp)
1038	register struct nfssvc_sock *slp;
1039{
1040	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
1041		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1042		free((caddr_t)slp, M_NFSSVC);
1043	}
1044}
1045
1046/*
1047 * Initialize the data structures for the server.
1048 * Handshake with any new nfsds starting up to avoid any chance of
1049 * corruption.
1050 */
1051void
1052nfsrv_init(terminating)
1053	int terminating;
1054{
1055	register struct nfssvc_sock *slp, *nslp;
1056
1057	if (nfssvc_sockhead_flag & SLP_INIT)
1058		panic("nfsd init");
1059	nfssvc_sockhead_flag |= SLP_INIT;
1060	if (terminating) {
1061		for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
1062			nslp = slp->ns_chain.tqe_next;
1063			if (slp->ns_flag & SLP_VALID)
1064				nfsrv_zapsock(slp);
1065			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1066			free((caddr_t)slp, M_NFSSVC);
1067		}
1068		nfsrv_cleancache();	/* And clear out server cache */
1069	}
1070
1071	TAILQ_INIT(&nfssvc_sockhead);
1072	nfssvc_sockhead_flag &= ~SLP_INIT;
1073	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
1074		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
1075		wakeup((caddr_t)&nfssvc_sockhead);
1076	}
1077
1078	TAILQ_INIT(&nfsd_head);
1079	nfsd_head_flag &= ~NFSD_CHECKSLP;
1080
1081	nfs_udpsock = (struct nfssvc_sock *)
1082	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1083	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
1084	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
1085	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
1086
1087	nfs_cltpsock = (struct nfssvc_sock *)
1088	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1089	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
1090	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
1091	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
1092}
1093
1094/*
1095 * Add entries to the server monitor log.
1096 */
1097static void
1098nfsd_rt(sotype, nd, cacherep)
1099	int sotype;
1100	register struct nfsrv_descript *nd;
1101	int cacherep;
1102{
1103	register struct drt *rt;
1104
1105	rt = &nfsdrt.drt[nfsdrt.pos];
1106	if (cacherep == RC_DOIT)
1107		rt->flag = 0;
1108	else if (cacherep == RC_REPLY)
1109		rt->flag = DRT_CACHEREPLY;
1110	else
1111		rt->flag = DRT_CACHEDROP;
1112	if (sotype == SOCK_STREAM)
1113		rt->flag |= DRT_TCP;
1114	if (nd->nd_flag & ND_NQNFS)
1115		rt->flag |= DRT_NQNFS;
1116	else if (nd->nd_flag & ND_NFSV3)
1117		rt->flag |= DRT_NFSV3;
1118	rt->proc = nd->nd_procnum;
1119	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
1120	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
1121	else
1122	    rt->ipadr = INADDR_ANY;
1123	rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
1124		(time.tv_usec - nd->nd_starttime.tv_usec);
1125	rt->tstamp = time;
1126	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
1127}
1128