nfs_srvkrpc.c revision 280258
1249259Sdim/*-
2249259Sdim * Copyright (c) 1989, 1993
3353358Sdim *	The Regents of the University of California.  All rights reserved.
4353358Sdim *
5353358Sdim * This code is derived from software contributed to Berkeley by
6249259Sdim * Rick Macklem at The University of Guelph.
7249259Sdim *
8249259Sdim * Redistribution and use in source and binary forms, with or without
9249259Sdim * modification, are permitted provided that the following conditions
10249259Sdim * are met:
11249259Sdim * 1. Redistributions of source code must retain the above copyright
12249259Sdim *    notice, this list of conditions and the following disclaimer.
13249259Sdim * 2. Redistributions in binary form must reproduce the above copyright
14249259Sdim *    notice, this list of conditions and the following disclaimer in the
15249259Sdim *    documentation and/or other materials provided with the distribution.
16249259Sdim * 4. Neither the name of the University nor the names of its contributors
17249259Sdim *    may be used to endorse or promote products derived from this software
18249259Sdim *    without specific prior written permission.
19249259Sdim *
20249259Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21249259Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22314564Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23249259Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24249259Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25321369Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26276479Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27249259Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28314564Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29314564Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30314564Sdim * SUCH DAMAGE.
31249259Sdim *
32249259Sdim *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
33249259Sdim */
34314564Sdim
35249259Sdim#include <sys/cdefs.h>
36314564Sdim__FBSDID("$FreeBSD: stable/10/sys/nfsserver/nfs_srvkrpc.c 280258 2015-03-19 13:37:36Z rwatson $");
37296417Sdim
38314564Sdim#include "opt_inet6.h"
39314564Sdim#include "opt_kgssapi.h"
40249259Sdim
41276479Sdim#include <sys/param.h>
42296417Sdim#include <sys/capsicum.h>
43249259Sdim#include <sys/systm.h>
44321369Sdim#include <sys/sysproto.h>
45249259Sdim#include <sys/kernel.h>
46249259Sdim#include <sys/sysctl.h>
47249259Sdim#include <sys/file.h>
48249259Sdim#include <sys/filedesc.h>
49249259Sdim#include <sys/jail.h>
50314564Sdim#include <sys/vnode.h>
51249259Sdim#include <sys/malloc.h>
52249259Sdim#include <sys/mount.h>
53249259Sdim#include <sys/priv.h>
54249259Sdim#include <sys/proc.h>
55276479Sdim#include <sys/bio.h>
56249259Sdim#include <sys/buf.h>
57249259Sdim#include <sys/mbuf.h>
58249259Sdim#include <sys/socket.h>
59249259Sdim#include <sys/socketvar.h>
60249259Sdim#include <sys/domain.h>
61249259Sdim#include <sys/protosw.h>
62276479Sdim#include <sys/namei.h>
63249259Sdim#include <sys/fcntl.h>
64249259Sdim#include <sys/lockf.h>
65314564Sdim#include <sys/eventhandler.h>
66314564Sdim
67249259Sdim#include <netinet/in.h>
68321369Sdim#include <netinet/tcp.h>
69309124Sdim#ifdef INET6
70249259Sdim#include <net/if.h>
71249259Sdim#include <netinet6/in6_var.h>
72314564Sdim#endif
73314564Sdim
74314564Sdim#include <rpc/rpc.h>
75314564Sdim#include <rpc/rpcsec_gss.h>
76314564Sdim#include <rpc/replay.h>
77341825Sdim
78341825Sdim#include <nfs/xdr_subs.h>
79341825Sdim#include <nfs/nfsproto.h>
80341825Sdim#include <nfs/nfs_fha.h>
81341825Sdim#include <nfsserver/nfs.h>
82341825Sdim#include <nfsserver/nfsm_subs.h>
83341825Sdim#include <nfsserver/nfsrvcache.h>
84341825Sdim#include <nfsserver/nfs_fha_old.h>
85341825Sdim
86341825Sdim#include <security/mac/mac_framework.h>
87249259Sdim
88249259Sdimstatic MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure");
89249259Sdim
90261991SdimMALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor");
91249259SdimMALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure");
92249259Sdim
93249259Sdim#define	TRUE	1
94249259Sdim#define	FALSE	0
95249259Sdim
96249259SdimSYSCTL_DECL(_vfs_nfsrv);
97249259Sdim
98249259SdimSVCPOOL		*nfsrv_pool;
99249259Sdimint		nfsd_waiting = 0;
100249259Sdimint		nfsrv_numnfsd = 0;
101249259Sdimstruct callout	nfsrv_callout;
102249259Sdimstatic eventhandler_tag nfsrv_nmbclusters_tag;
103249259Sdim
104249259Sdimstatic int	nfs_privport = 0;
105249259SdimSYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW,
106249259Sdim    &nfs_privport, 0,
107249259Sdim    "Only allow clients using a privileged port");
108249259SdimSYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW,
109249259Sdim    &nfsrvw_procrastinate, 0,
110249259Sdim    "Delay value for write gathering");
111249259SdimSYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW,
112309124Sdim    &nfsrvw_procrastinate_v3, 0,
113309124Sdim    "Delay in seconds for NFSv3 write gathering");
114309124Sdim
115249259Sdimstatic int	nfssvc_addsock(struct file *, struct thread *);
116249259Sdimstatic int	nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *);
117249259Sdim
118249259Sdimextern u_long sb_max_adj;
119249259Sdim
120249259Sdimint32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
121249259Sdim    struct nfssvc_sock *slp, struct mbuf **mreqp) = {
122249259Sdim	nfsrv_null,
123296417Sdim	nfsrv_getattr,
124296417Sdim	nfsrv_setattr,
125296417Sdim	nfsrv_lookup,
126296417Sdim	nfsrv3_access,
127296417Sdim	nfsrv_readlink,
128296417Sdim	nfsrv_read,
129296417Sdim	nfsrv_write,
130249259Sdim	nfsrv_create,
131249259Sdim	nfsrv_mkdir,
132249259Sdim	nfsrv_symlink,
133249259Sdim	nfsrv_mknod,
134249259Sdim	nfsrv_remove,
135249259Sdim	nfsrv_rmdir,
136249259Sdim	nfsrv_rename,
137249259Sdim	nfsrv_link,
138249259Sdim	nfsrv_readdir,
139249259Sdim	nfsrv_readdirplus,
140249259Sdim	nfsrv_statfs,
141249259Sdim	nfsrv_fsinfo,
142249259Sdim	nfsrv_pathconf,
143249259Sdim	nfsrv_commit,
144249259Sdim	nfsrv_noop
145249259Sdim};
146249259Sdim
147249259Sdim/*
148249259Sdim * NFS server system calls
149249259Sdim */
150249259Sdim/*
151249259Sdim * This is now called from nfssvc() in nfs/nfs_nfssvc.c.
152249259Sdim */
153249259Sdim
154249259Sdim/*
155249259Sdim * Nfs server psuedo system call for the nfsd's
156249259Sdim * Based on the flag value it either:
157249259Sdim * - adds a socket to the selection list
158249259Sdim * - remains in the kernel as an nfsd
159249259Sdim * - remains in the kernel as an nfsiod
160249259Sdim * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets
161249259Sdim * and that mountd provides
162249259Sdim *  - sockaddr with no IPv4-mapped addresses
163249259Sdim *  - mask for both INET and INET6 families if there is IPv4-mapped overlap
164249259Sdim */
165321369Sdimint
166249259Sdimnfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap)
167249259Sdim{
168249259Sdim	struct file *fp;
169249259Sdim	struct nfsd_addsock_args addsockarg;
170321369Sdim	struct nfsd_nfsd_args nfsdarg;
171249259Sdim	cap_rights_t rights;
172249259Sdim	int error;
173249259Sdim
174249259Sdim	if (uap->flag & NFSSVC_ADDSOCK) {
175321369Sdim		error = copyin(uap->argp, (caddr_t)&addsockarg,
176249259Sdim		    sizeof(addsockarg));
177309124Sdim		if (error)
178309124Sdim			return (error);
179309124Sdim		error = fget(td, addsockarg.sock,
180309124Sdim		    cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
181314564Sdim		if (error)
182314564Sdim			return (error);
183314564Sdim		if (fp->f_type != DTYPE_SOCKET) {
184314564Sdim			fdrop(fp, td);
185314564Sdim			return (error);	/* XXXRW: Should be EINVAL? */
186314564Sdim		}
187321369Sdim		error = nfssvc_addsock(fp, td);
188321369Sdim		fdrop(fp, td);
189321369Sdim	} else if (uap->flag & NFSSVC_OLDNFSD)
190321369Sdim		error = nfssvc_nfsd(td, NULL);
191321369Sdim	else if (uap->flag & NFSSVC_NFSD) {
192321369Sdim		if (!uap->argp)
193321369Sdim			return (EINVAL);
194321369Sdim		error = copyin(uap->argp, (caddr_t)&nfsdarg,
195321369Sdim		    sizeof(nfsdarg));
196321369Sdim		if (error)
197321369Sdim			return (error);
198321369Sdim		error = nfssvc_nfsd(td, &nfsdarg);
199321369Sdim	} else
200321369Sdim		error = ENXIO;
201321369Sdim	return (error);
202321369Sdim}
203321369Sdim
204321369Sdim/*
205321369Sdim * Generate the rpc reply header
206321369Sdim * siz arg. is used to decide if adding a cluster is worthwhile
207321369Sdim */
208321369Sdimstruct mbuf *
209321369Sdimnfs_rephead(int siz, struct nfsrv_descript *nd, int err,
210321369Sdim    struct mbuf **mbp, caddr_t *bposp)
211321369Sdim{
212321369Sdim	u_int32_t *tl;
213321369Sdim	struct mbuf *mreq;
214321369Sdim	caddr_t bpos;
215321369Sdim	struct mbuf *mb;
216321369Sdim
217321369Sdim	if (err == EBADRPC)
218321369Sdim		return (NULL);
219321369Sdim
220321369Sdim	nd->nd_repstat = err;
221321369Sdim	if (err && (nd->nd_flag & ND_NFSV3) == 0)	/* XXX recheck */
222321369Sdim		siz = 0;
223321369Sdim
224321369Sdim	MGET(mreq, M_WAITOK, MT_DATA);
225321369Sdim
226321369Sdim	/*
227321369Sdim	 * If this is a big reply, use a cluster
228321369Sdim	 */
229321369Sdim	mreq->m_len = 0;
230321369Sdim	if (siz >= MINCLSIZE) {
231321369Sdim		MCLGET(mreq, M_WAITOK);
232321369Sdim	}
233321369Sdim	mb = mreq;
234321369Sdim	bpos = mtod(mb, caddr_t);
235321369Sdim
236321369Sdim	if (err != NFSERR_RETVOID) {
237321369Sdim		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
238321369Sdim		if (err)
239321369Sdim			*tl = txdr_unsigned(nfsrv_errmap(nd, err));
240321369Sdim		else
241321369Sdim			*tl = 0;
242321369Sdim	}
243321369Sdim
244321369Sdim	*mbp = mb;
245321369Sdim	*bposp = bpos;
246360784Sdim	if (err != 0 && err != NFSERR_RETVOID)
247321369Sdim		nfsrvstats.srvrpc_errs++;
248321369Sdim
249321369Sdim	return (mreq);
250249259Sdim}
251321369Sdim
252249259Sdimstatic void
253249259Sdimnfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
254249259Sdim{
255249259Sdim	rpcproc_t procnum;
256249259Sdim	int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp,
257249259Sdim	    struct mbuf **mreqp);
258249259Sdim	int flag;
259249259Sdim	struct nfsrv_descript nd;
260249259Sdim	struct mbuf *mreq, *mrep;
261249259Sdim	int error;
262249259Sdim
263314564Sdim	if (rqst->rq_vers == NFS_VER2) {
264249259Sdim		if (rqst->rq_proc > NFSV2PROC_STATFS) {
265314564Sdim			svcerr_noproc(rqst);
266			svc_freereq(rqst);
267			return;
268		}
269		procnum = nfsrv_nfsv3_procid[rqst->rq_proc];
270		flag = 0;
271	} else {
272		if (rqst->rq_proc >= NFS_NPROCS) {
273			svcerr_noproc(rqst);
274			svc_freereq(rqst);
275			return;
276		}
277		procnum = rqst->rq_proc;
278		flag = ND_NFSV3;
279	}
280	proc = nfsrv3_procs[procnum];
281
282	mreq = mrep = NULL;
283	mreq = rqst->rq_args;
284	rqst->rq_args = NULL;
285	(void)nfs_realign(&mreq, M_WAITOK);
286
287	/*
288	 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 -
289	 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP
290	 * mounts.
291	 */
292	memset(&nd, 0, sizeof(nd));
293	nd.nd_md = nd.nd_mrep = mreq;
294	nd.nd_dpos = mtod(mreq, caddr_t);
295	nd.nd_nam = svc_getrpccaller(rqst);
296	nd.nd_nam2 = rqst->rq_addr;
297	nd.nd_procnum = procnum;
298	nd.nd_cr = NULL;
299	nd.nd_flag = flag;
300
301	if (nfs_privport) {
302		/* Check if source port is privileged */
303		u_short port;
304		struct sockaddr *nam = nd.nd_nam;
305		struct sockaddr_in *sin;
306
307		sin = (struct sockaddr_in *)nam;
308		/*
309		 * INET/INET6 - same code:
310		 *    sin_port and sin6_port are at same offset
311		 */
312		port = ntohs(sin->sin_port);
313		if (port >= IPPORT_RESERVED &&
314		    nd.nd_procnum != NFSPROC_NULL) {
315#ifdef INET6
316			char b6[INET6_ADDRSTRLEN];
317#if defined(KLD_MODULE)
318			/* Do not use ip6_sprintf: the nfs module should work without INET6. */
319#define ip6_sprintf(buf, a)						\
320			(sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x",	\
321			    (a)->s6_addr16[0], (a)->s6_addr16[1],	\
322			    (a)->s6_addr16[2], (a)->s6_addr16[3],	\
323			    (a)->s6_addr16[4], (a)->s6_addr16[5],	\
324			    (a)->s6_addr16[6], (a)->s6_addr16[7]),	\
325			    (buf))
326#endif
327#endif
328			printf("NFS request from unprivileged port (%s:%d)\n",
329#ifdef INET6
330			    sin->sin_family == AF_INET6 ?
331			    ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
332#if defined(KLD_MODULE)
333#undef ip6_sprintf
334#endif
335#endif
336			    inet_ntoa(sin->sin_addr), port);
337			m_freem(mreq);
338			svcerr_weakauth(rqst);
339			svc_freereq(rqst);
340			return;
341		}
342	}
343
344	if (proc != nfsrv_null) {
345		if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) {
346			m_freem(mreq);
347			svcerr_weakauth(rqst);
348			svc_freereq(rqst);
349			return;
350		}
351#ifdef MAC
352		mac_cred_associate_nfsd(nd.nd_cr);
353#endif
354	}
355	nfsrvstats.srvrpccnt[nd.nd_procnum]++;
356
357	error = proc(&nd, NULL, &mrep);
358
359	if (nd.nd_cr)
360		crfree(nd.nd_cr);
361
362	if (mrep == NULL) {
363		svcerr_decode(rqst);
364		svc_freereq(rqst);
365		return;
366	}
367	if (error && error != NFSERR_RETVOID) {
368		svcerr_systemerr(rqst);
369		svc_freereq(rqst);
370		return;
371	}
372	if (nd.nd_repstat & NFSERR_AUTHERR) {
373		svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR);
374		m_freem(mrep);
375	} else {
376		if (!svc_sendreply_mbuf(rqst, mrep))
377			svcerr_systemerr(rqst);
378	}
379	svc_freereq(rqst);
380}
381
382/*
383 * Adds a socket to the list for servicing by nfsds.
384 */
385static int
386nfssvc_addsock(struct file *fp, struct thread *td)
387{
388	int siz;
389	struct socket *so;
390	int error;
391	SVCXPRT *xprt;
392
393	so = fp->f_data;
394
395	siz = sb_max_adj;
396	error = soreserve(so, siz, siz);
397	if (error)
398		return (error);
399
400	/*
401	 * Steal the socket from userland so that it doesn't close
402	 * unexpectedly.
403	 */
404	if (so->so_type == SOCK_DGRAM)
405		xprt = svc_dg_create(nfsrv_pool, so, 0, 0);
406	else
407		xprt = svc_vc_create(nfsrv_pool, so, 0, 0);
408	if (xprt) {
409		fp->f_ops = &badfileops;
410		fp->f_data = NULL;
411		svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL);
412		svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL);
413		SVC_RELEASE(xprt);
414	}
415
416	return (0);
417}
418
419/*
420 * Called by nfssvc() for nfsds.  Just loops around servicing rpc requests
421 * until it is killed by a signal.
422 */
423static int
424nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
425{
426	char principal[128];
427	int error;
428
429	if (args) {
430		error = copyinstr(args->principal, principal,
431		    sizeof(principal), NULL);
432		if (error)
433			return (error);
434	} else {
435		memcpy(principal, "nfs@", 4);
436		getcredhostname(td->td_ucred, principal + 4,
437		    sizeof(principal) - 4);
438	}
439
440	/*
441	 * Only the first nfsd actually does any work.  The RPC code
442	 * adds threads to it as needed.  Any extra processes offered
443	 * by nfsd just exit.  If nfsd is new enough, it will call us
444	 * once with a structure that specifies how many threads to
445	 * use.
446	 */
447	NFSD_LOCK();
448	if (nfsrv_numnfsd == 0) {
449		nfsrv_numnfsd++;
450
451		NFSD_UNLOCK();
452
453		rpc_gss_set_svc_name_call(principal, "kerberosv5",
454		    GSS_C_INDEFINITE, NFS_PROG, NFS_VER2);
455		rpc_gss_set_svc_name_call(principal, "kerberosv5",
456		    GSS_C_INDEFINITE, NFS_PROG, NFS_VER3);
457
458		if (args) {
459			nfsrv_pool->sp_minthreads = args->minthreads;
460			nfsrv_pool->sp_maxthreads = args->maxthreads;
461		} else {
462			nfsrv_pool->sp_minthreads = 4;
463			nfsrv_pool->sp_maxthreads = 4;
464		}
465
466		svc_run(nfsrv_pool);
467
468		rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2);
469		rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3);
470
471		NFSD_LOCK();
472		nfsrv_numnfsd--;
473		nfsrv_init(TRUE);
474	}
475	NFSD_UNLOCK();
476
477	return (0);
478}
479
480/*
481 * Size the NFS server's duplicate request cache at 1/2 the
482 * nmbclusters, floating within a (64, 2048) range.  This is to
483 * prevent all mbuf clusters being tied up in the NFS dupreq
484 * cache for small values of nmbclusters.
485 */
486static size_t
487nfsrv_replay_size(void)
488{
489	size_t replaysiz;
490
491	replaysiz = nmbclusters / 2;
492	if (replaysiz > NFSRVCACHE_MAX_SIZE)
493		replaysiz = NFSRVCACHE_MAX_SIZE;
494	if (replaysiz < NFSRVCACHE_MIN_SIZE)
495		replaysiz = NFSRVCACHE_MIN_SIZE;
496	replaysiz *= MCLBYTES;
497
498	return (replaysiz);
499}
500
501/*
502 * Called when nmbclusters changes - we resize the replay cache
503 * accordingly.
504 */
505static void
506nfsrv_nmbclusters_change(void *tag)
507{
508
509	if (nfsrv_pool)
510		replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size());
511}
512
513/*
514 * Initialize the data structures for the server.
515 * Handshake with any new nfsds starting up to avoid any chance of
516 * corruption.
517 */
518void
519nfsrv_init(int terminating)
520{
521
522	NFSD_LOCK_ASSERT();
523
524	if (terminating) {
525		NFSD_UNLOCK();
526		EVENTHANDLER_DEREGISTER(nmbclusters_change,
527		    nfsrv_nmbclusters_tag);
528		svcpool_destroy(nfsrv_pool);
529		nfsrv_pool = NULL;
530		NFSD_LOCK();
531	} else
532		nfs_pub.np_valid = 0;
533
534	NFSD_UNLOCK();
535
536	nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv));
537	nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size());
538	nfsrv_pool->sp_assign = fhaold_assign;
539	nfsrv_pool->sp_done = fha_nd_complete;
540	nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change,
541	    nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST);
542
543	NFSD_LOCK();
544}
545