1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD$");
37
38#include "opt_inet6.h"
39#include "opt_kgssapi.h"
40
41#include <sys/param.h>
42#include <sys/capsicum.h>
43#include <sys/systm.h>
44#include <sys/sysproto.h>
45#include <sys/kernel.h>
46#include <sys/sysctl.h>
47#include <sys/file.h>
48#include <sys/filedesc.h>
49#include <sys/jail.h>
50#include <sys/vnode.h>
51#include <sys/malloc.h>
52#include <sys/mount.h>
53#include <sys/priv.h>
54#include <sys/proc.h>
55#include <sys/bio.h>
56#include <sys/buf.h>
57#include <sys/mbuf.h>
58#include <sys/socket.h>
59#include <sys/socketvar.h>
60#include <sys/domain.h>
61#include <sys/protosw.h>
62#include <sys/namei.h>
63#include <sys/fcntl.h>
64#include <sys/lockf.h>
65#include <sys/eventhandler.h>
66
67#include <netinet/in.h>
68#include <netinet/tcp.h>
69#ifdef INET6
70#include <net/if.h>
71#include <netinet6/in6_var.h>
72#endif
73
74#include <rpc/rpc.h>
75#include <rpc/rpcsec_gss.h>
76#include <rpc/replay.h>
77
78#include <nfs/xdr_subs.h>
79#include <nfs/nfsproto.h>
80#include <nfs/nfs_fha.h>
81#include <nfsserver/nfs.h>
82#include <nfsserver/nfsm_subs.h>
83#include <nfsserver/nfsrvcache.h>
84#include <nfsserver/nfs_fha_old.h>
85
86#include <security/mac/mac_framework.h>
87
88static MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure");
89
90MALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor");
91MALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure");
92
93#define	TRUE	1
94#define	FALSE	0
95
96SYSCTL_DECL(_vfs_nfsrv);
97
98SVCPOOL		*nfsrv_pool;
99int		nfsd_waiting = 0;
100int		nfsrv_numnfsd = 0;
101struct callout	nfsrv_callout;
102static eventhandler_tag nfsrv_nmbclusters_tag;
103
104static int	nfs_privport = 0;
105SYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW,
106    &nfs_privport, 0,
107    "Only allow clients using a privileged port");
108SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW,
109    &nfsrvw_procrastinate, 0,
110    "Delay value for write gathering");
111SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW,
112    &nfsrvw_procrastinate_v3, 0,
113    "Delay in seconds for NFSv3 write gathering");
114
115static int	nfssvc_addsock(struct file *, struct thread *);
116static int	nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *);
117
118extern u_long sb_max_adj;
119
120int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
121    struct nfssvc_sock *slp, struct mbuf **mreqp) = {
122	nfsrv_null,
123	nfsrv_getattr,
124	nfsrv_setattr,
125	nfsrv_lookup,
126	nfsrv3_access,
127	nfsrv_readlink,
128	nfsrv_read,
129	nfsrv_write,
130	nfsrv_create,
131	nfsrv_mkdir,
132	nfsrv_symlink,
133	nfsrv_mknod,
134	nfsrv_remove,
135	nfsrv_rmdir,
136	nfsrv_rename,
137	nfsrv_link,
138	nfsrv_readdir,
139	nfsrv_readdirplus,
140	nfsrv_statfs,
141	nfsrv_fsinfo,
142	nfsrv_pathconf,
143	nfsrv_commit,
144	nfsrv_noop
145};
146
147/*
148 * NFS server system calls
149 */
150/*
151 * This is now called from nfssvc() in nfs/nfs_nfssvc.c.
152 */
153
154/*
155 * Nfs server psuedo system call for the nfsd's
156 * Based on the flag value it either:
157 * - adds a socket to the selection list
158 * - remains in the kernel as an nfsd
159 * - remains in the kernel as an nfsiod
160 * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets
161 * and that mountd provides
162 *  - sockaddr with no IPv4-mapped addresses
163 *  - mask for both INET and INET6 families if there is IPv4-mapped overlap
164 */
165int
166nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap)
167{
168	struct file *fp;
169	struct nfsd_addsock_args addsockarg;
170	struct nfsd_nfsd_args nfsdarg;
171	cap_rights_t rights;
172	int error;
173
174	if (uap->flag & NFSSVC_ADDSOCK) {
175		error = copyin(uap->argp, (caddr_t)&addsockarg,
176		    sizeof(addsockarg));
177		if (error)
178			return (error);
179		error = fget(td, addsockarg.sock,
180		    cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
181		if (error)
182			return (error);
183		if (fp->f_type != DTYPE_SOCKET) {
184			fdrop(fp, td);
185			return (error);	/* XXXRW: Should be EINVAL? */
186		}
187		error = nfssvc_addsock(fp, td);
188		fdrop(fp, td);
189	} else if (uap->flag & NFSSVC_OLDNFSD)
190		error = nfssvc_nfsd(td, NULL);
191	else if (uap->flag & NFSSVC_NFSD) {
192		if (!uap->argp)
193			return (EINVAL);
194		error = copyin(uap->argp, (caddr_t)&nfsdarg,
195		    sizeof(nfsdarg));
196		if (error)
197			return (error);
198		error = nfssvc_nfsd(td, &nfsdarg);
199	} else
200		error = ENXIO;
201	return (error);
202}
203
204/*
205 * Generate the rpc reply header
206 * siz arg. is used to decide if adding a cluster is worthwhile
207 */
208struct mbuf *
209nfs_rephead(int siz, struct nfsrv_descript *nd, int err,
210    struct mbuf **mbp, caddr_t *bposp)
211{
212	u_int32_t *tl;
213	struct mbuf *mreq;
214	caddr_t bpos;
215	struct mbuf *mb;
216
217	if (err == EBADRPC)
218		return (NULL);
219
220	nd->nd_repstat = err;
221	if (err && (nd->nd_flag & ND_NFSV3) == 0)	/* XXX recheck */
222		siz = 0;
223
224	MGET(mreq, M_WAITOK, MT_DATA);
225
226	/*
227	 * If this is a big reply, use a cluster
228	 */
229	mreq->m_len = 0;
230	if (siz >= MINCLSIZE) {
231		MCLGET(mreq, M_WAITOK);
232	}
233	mb = mreq;
234	bpos = mtod(mb, caddr_t);
235
236	if (err != NFSERR_RETVOID) {
237		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
238		if (err)
239			*tl = txdr_unsigned(nfsrv_errmap(nd, err));
240		else
241			*tl = 0;
242	}
243
244	*mbp = mb;
245	*bposp = bpos;
246	if (err != 0 && err != NFSERR_RETVOID)
247		nfsrvstats.srvrpc_errs++;
248
249	return (mreq);
250}
251
252static void
253nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
254{
255	rpcproc_t procnum;
256	int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp,
257	    struct mbuf **mreqp);
258	int flag;
259	struct nfsrv_descript nd;
260	struct mbuf *mreq, *mrep;
261	int error;
262
263	if (rqst->rq_vers == NFS_VER2) {
264		if (rqst->rq_proc > NFSV2PROC_STATFS) {
265			svcerr_noproc(rqst);
266			svc_freereq(rqst);
267			return;
268		}
269		procnum = nfsrv_nfsv3_procid[rqst->rq_proc];
270		flag = 0;
271	} else {
272		if (rqst->rq_proc >= NFS_NPROCS) {
273			svcerr_noproc(rqst);
274			svc_freereq(rqst);
275			return;
276		}
277		procnum = rqst->rq_proc;
278		flag = ND_NFSV3;
279	}
280	proc = nfsrv3_procs[procnum];
281
282	mreq = mrep = NULL;
283	mreq = rqst->rq_args;
284	rqst->rq_args = NULL;
285	(void)nfs_realign(&mreq, M_WAITOK);
286
287	/*
288	 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 -
289	 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP
290	 * mounts.
291	 */
292	memset(&nd, 0, sizeof(nd));
293	nd.nd_md = nd.nd_mrep = mreq;
294	nd.nd_dpos = mtod(mreq, caddr_t);
295	nd.nd_nam = svc_getrpccaller(rqst);
296	nd.nd_nam2 = rqst->rq_addr;
297	nd.nd_procnum = procnum;
298	nd.nd_cr = NULL;
299	nd.nd_flag = flag;
300
301	if (nfs_privport) {
302		/* Check if source port is privileged */
303		u_short port;
304		struct sockaddr *nam = nd.nd_nam;
305		struct sockaddr_in *sin;
306
307		sin = (struct sockaddr_in *)nam;
308		/*
309		 * INET/INET6 - same code:
310		 *    sin_port and sin6_port are at same offset
311		 */
312		port = ntohs(sin->sin_port);
313		if (port >= IPPORT_RESERVED &&
314		    nd.nd_procnum != NFSPROC_NULL) {
315#ifdef INET6
316			char b6[INET6_ADDRSTRLEN];
317#if defined(KLD_MODULE)
318			/* Do not use ip6_sprintf: the nfs module should work without INET6. */
319#define ip6_sprintf(buf, a)						\
320			(sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x",	\
321			    (a)->s6_addr16[0], (a)->s6_addr16[1],	\
322			    (a)->s6_addr16[2], (a)->s6_addr16[3],	\
323			    (a)->s6_addr16[4], (a)->s6_addr16[5],	\
324			    (a)->s6_addr16[6], (a)->s6_addr16[7]),	\
325			    (buf))
326#endif
327#endif
328			printf("NFS request from unprivileged port (%s:%d)\n",
329#ifdef INET6
330			    sin->sin_family == AF_INET6 ?
331			    ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
332#if defined(KLD_MODULE)
333#undef ip6_sprintf
334#endif
335#endif
336			    inet_ntoa(sin->sin_addr), port);
337			m_freem(mreq);
338			svcerr_weakauth(rqst);
339			svc_freereq(rqst);
340			return;
341		}
342	}
343
344	if (proc != nfsrv_null) {
345		if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) {
346			m_freem(mreq);
347			svcerr_weakauth(rqst);
348			svc_freereq(rqst);
349			return;
350		}
351#ifdef MAC
352		mac_cred_associate_nfsd(nd.nd_cr);
353#endif
354	}
355	nfsrvstats.srvrpccnt[nd.nd_procnum]++;
356
357	error = proc(&nd, NULL, &mrep);
358
359	if (nd.nd_cr)
360		crfree(nd.nd_cr);
361
362	if (mrep == NULL) {
363		svcerr_decode(rqst);
364		svc_freereq(rqst);
365		return;
366	}
367	if (error && error != NFSERR_RETVOID) {
368		svcerr_systemerr(rqst);
369		svc_freereq(rqst);
370		return;
371	}
372	if (nd.nd_repstat & NFSERR_AUTHERR) {
373		svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR);
374		m_freem(mrep);
375	} else {
376		if (!svc_sendreply_mbuf(rqst, mrep))
377			svcerr_systemerr(rqst);
378	}
379	svc_freereq(rqst);
380}
381
382/*
383 * Adds a socket to the list for servicing by nfsds.
384 */
385static int
386nfssvc_addsock(struct file *fp, struct thread *td)
387{
388	int siz;
389	struct socket *so;
390	int error;
391	SVCXPRT *xprt;
392
393	so = fp->f_data;
394
395	siz = sb_max_adj;
396	error = soreserve(so, siz, siz);
397	if (error)
398		return (error);
399
400	/*
401	 * Steal the socket from userland so that it doesn't close
402	 * unexpectedly.
403	 */
404	if (so->so_type == SOCK_DGRAM)
405		xprt = svc_dg_create(nfsrv_pool, so, 0, 0);
406	else
407		xprt = svc_vc_create(nfsrv_pool, so, 0, 0);
408	if (xprt) {
409		fp->f_ops = &badfileops;
410		fp->f_data = NULL;
411		svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL);
412		svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL);
413		SVC_RELEASE(xprt);
414	}
415
416	return (0);
417}
418
419/*
420 * Called by nfssvc() for nfsds.  Just loops around servicing rpc requests
421 * until it is killed by a signal.
422 */
423static int
424nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
425{
426	char principal[128];
427	int error;
428
429	if (args) {
430		error = copyinstr(args->principal, principal,
431		    sizeof(principal), NULL);
432		if (error)
433			return (error);
434	} else {
435		memcpy(principal, "nfs@", 4);
436		getcredhostname(td->td_ucred, principal + 4,
437		    sizeof(principal) - 4);
438	}
439
440	/*
441	 * Only the first nfsd actually does any work.  The RPC code
442	 * adds threads to it as needed.  Any extra processes offered
443	 * by nfsd just exit.  If nfsd is new enough, it will call us
444	 * once with a structure that specifies how many threads to
445	 * use.
446	 */
447	NFSD_LOCK();
448	if (nfsrv_numnfsd == 0) {
449		nfsrv_numnfsd++;
450
451		NFSD_UNLOCK();
452
453		rpc_gss_set_svc_name_call(principal, "kerberosv5",
454		    GSS_C_INDEFINITE, NFS_PROG, NFS_VER2);
455		rpc_gss_set_svc_name_call(principal, "kerberosv5",
456		    GSS_C_INDEFINITE, NFS_PROG, NFS_VER3);
457
458		if (args) {
459			nfsrv_pool->sp_minthreads = args->minthreads;
460			nfsrv_pool->sp_maxthreads = args->maxthreads;
461		} else {
462			nfsrv_pool->sp_minthreads = 4;
463			nfsrv_pool->sp_maxthreads = 4;
464		}
465
466		svc_run(nfsrv_pool);
467
468		rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2);
469		rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3);
470
471		NFSD_LOCK();
472		nfsrv_numnfsd--;
473		nfsrv_init(TRUE);
474	}
475	NFSD_UNLOCK();
476
477	return (0);
478}
479
480/*
481 * Size the NFS server's duplicate request cache at 1/2 the
482 * nmbclusters, floating within a (64, 2048) range.  This is to
483 * prevent all mbuf clusters being tied up in the NFS dupreq
484 * cache for small values of nmbclusters.
485 */
486static size_t
487nfsrv_replay_size(void)
488{
489	size_t replaysiz;
490
491	replaysiz = nmbclusters / 2;
492	if (replaysiz > NFSRVCACHE_MAX_SIZE)
493		replaysiz = NFSRVCACHE_MAX_SIZE;
494	if (replaysiz < NFSRVCACHE_MIN_SIZE)
495		replaysiz = NFSRVCACHE_MIN_SIZE;
496	replaysiz *= MCLBYTES;
497
498	return (replaysiz);
499}
500
501/*
502 * Called when nmbclusters changes - we resize the replay cache
503 * accordingly.
504 */
505static void
506nfsrv_nmbclusters_change(void *tag)
507{
508
509	if (nfsrv_pool)
510		replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size());
511}
512
513/*
514 * Initialize the data structures for the server.
515 * Handshake with any new nfsds starting up to avoid any chance of
516 * corruption.
517 */
518void
519nfsrv_init(int terminating)
520{
521
522	NFSD_LOCK_ASSERT();
523
524	if (terminating) {
525		NFSD_UNLOCK();
526		EVENTHANDLER_DEREGISTER(nmbclusters_change,
527		    nfsrv_nmbclusters_tag);
528		svcpool_destroy(nfsrv_pool);
529		nfsrv_pool = NULL;
530		NFSD_LOCK();
531	} else
532		nfs_pub.np_valid = 0;
533
534	NFSD_UNLOCK();
535
536	nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv));
537	nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size());
538	nfsrv_pool->sp_assign = fhaold_assign;
539	nfsrv_pool->sp_done = fha_nd_complete;
540	nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change,
541	    nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST);
542
543	NFSD_LOCK();
544}
545