1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD$"); 37 38#include "opt_inet6.h" 39#include "opt_kgssapi.h" 40 41#include <sys/param.h> 42#include <sys/capsicum.h> 43#include <sys/systm.h> 44#include <sys/sysproto.h> 45#include <sys/kernel.h> 46#include <sys/sysctl.h> 47#include <sys/file.h> 48#include <sys/filedesc.h> 49#include <sys/jail.h> 50#include <sys/vnode.h> 51#include <sys/malloc.h> 52#include <sys/mount.h> 53#include <sys/priv.h> 54#include <sys/proc.h> 55#include <sys/bio.h> 56#include <sys/buf.h> 57#include <sys/mbuf.h> 58#include <sys/socket.h> 59#include <sys/socketvar.h> 60#include <sys/domain.h> 61#include <sys/protosw.h> 62#include <sys/namei.h> 63#include <sys/fcntl.h> 64#include <sys/lockf.h> 65#include <sys/eventhandler.h> 66 67#include <netinet/in.h> 68#include <netinet/tcp.h> 69#ifdef INET6 70#include <net/if.h> 71#include <netinet6/in6_var.h> 72#endif 73 74#include <rpc/rpc.h> 75#include <rpc/rpcsec_gss.h> 76#include <rpc/replay.h> 77 78#include <nfs/xdr_subs.h> 79#include <nfs/nfsproto.h> 80#include <nfs/nfs_fha.h> 81#include <nfsserver/nfs.h> 82#include <nfsserver/nfsm_subs.h> 83#include <nfsserver/nfsrvcache.h> 84#include <nfsserver/nfs_fha_old.h> 85 86#include <security/mac/mac_framework.h> 87 88static MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure"); 89 90MALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor"); 91MALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure"); 92 93#define TRUE 1 94#define FALSE 0 95 96SYSCTL_DECL(_vfs_nfsrv); 97 98SVCPOOL *nfsrv_pool; 99int nfsd_waiting = 0; 100int nfsrv_numnfsd = 0; 101struct callout nfsrv_callout; 102static eventhandler_tag nfsrv_nmbclusters_tag; 103 104static int nfs_privport = 0; 105SYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, 106 &nfs_privport, 0, 107 "Only allow clients using a privileged port"); 108SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW, 109 &nfsrvw_procrastinate, 0, 110 "Delay value for write gathering"); 111SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, 112 &nfsrvw_procrastinate_v3, 0, 113 "Delay in seconds for NFSv3 write gathering"); 114 115static int nfssvc_addsock(struct file *, struct thread *); 116static int nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *); 117 118extern u_long sb_max_adj; 119 120int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, 121 struct nfssvc_sock *slp, struct mbuf **mreqp) = { 122 nfsrv_null, 123 nfsrv_getattr, 124 nfsrv_setattr, 125 nfsrv_lookup, 126 nfsrv3_access, 127 nfsrv_readlink, 128 nfsrv_read, 129 nfsrv_write, 130 nfsrv_create, 131 nfsrv_mkdir, 132 nfsrv_symlink, 133 nfsrv_mknod, 134 nfsrv_remove, 135 nfsrv_rmdir, 136 nfsrv_rename, 137 nfsrv_link, 138 nfsrv_readdir, 139 nfsrv_readdirplus, 140 nfsrv_statfs, 141 nfsrv_fsinfo, 142 nfsrv_pathconf, 143 nfsrv_commit, 144 nfsrv_noop 145}; 146 147/* 148 * NFS server system calls 149 */ 150/* 151 * This is now called from nfssvc() in nfs/nfs_nfssvc.c. 152 */ 153 154/* 155 * Nfs server psuedo system call for the nfsd's 156 * Based on the flag value it either: 157 * - adds a socket to the selection list 158 * - remains in the kernel as an nfsd 159 * - remains in the kernel as an nfsiod 160 * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets 161 * and that mountd provides 162 * - sockaddr with no IPv4-mapped addresses 163 * - mask for both INET and INET6 families if there is IPv4-mapped overlap 164 */ 165int 166nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap) 167{ 168 struct file *fp; 169 struct nfsd_addsock_args addsockarg; 170 struct nfsd_nfsd_args nfsdarg; 171 cap_rights_t rights; 172 int error; 173 174 if (uap->flag & NFSSVC_ADDSOCK) { 175 error = copyin(uap->argp, (caddr_t)&addsockarg, 176 sizeof(addsockarg)); 177 if (error) 178 return (error); 179 error = fget(td, addsockarg.sock, 180 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 181 if (error) 182 return (error); 183 if (fp->f_type != DTYPE_SOCKET) { 184 fdrop(fp, td); 185 return (error); /* XXXRW: Should be EINVAL? */ 186 } 187 error = nfssvc_addsock(fp, td); 188 fdrop(fp, td); 189 } else if (uap->flag & NFSSVC_OLDNFSD) 190 error = nfssvc_nfsd(td, NULL); 191 else if (uap->flag & NFSSVC_NFSD) { 192 if (!uap->argp) 193 return (EINVAL); 194 error = copyin(uap->argp, (caddr_t)&nfsdarg, 195 sizeof(nfsdarg)); 196 if (error) 197 return (error); 198 error = nfssvc_nfsd(td, &nfsdarg); 199 } else 200 error = ENXIO; 201 return (error); 202} 203 204/* 205 * Generate the rpc reply header 206 * siz arg. is used to decide if adding a cluster is worthwhile 207 */ 208struct mbuf * 209nfs_rephead(int siz, struct nfsrv_descript *nd, int err, 210 struct mbuf **mbp, caddr_t *bposp) 211{ 212 u_int32_t *tl; 213 struct mbuf *mreq; 214 caddr_t bpos; 215 struct mbuf *mb; 216 217 if (err == EBADRPC) 218 return (NULL); 219 220 nd->nd_repstat = err; 221 if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */ 222 siz = 0; 223 224 MGET(mreq, M_WAITOK, MT_DATA); 225 226 /* 227 * If this is a big reply, use a cluster 228 */ 229 mreq->m_len = 0; 230 if (siz >= MINCLSIZE) { 231 MCLGET(mreq, M_WAITOK); 232 } 233 mb = mreq; 234 bpos = mtod(mb, caddr_t); 235 236 if (err != NFSERR_RETVOID) { 237 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 238 if (err) 239 *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 240 else 241 *tl = 0; 242 } 243 244 *mbp = mb; 245 *bposp = bpos; 246 if (err != 0 && err != NFSERR_RETVOID) 247 nfsrvstats.srvrpc_errs++; 248 249 return (mreq); 250} 251 252static void 253nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) 254{ 255 rpcproc_t procnum; 256 int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp, 257 struct mbuf **mreqp); 258 int flag; 259 struct nfsrv_descript nd; 260 struct mbuf *mreq, *mrep; 261 int error; 262 263 if (rqst->rq_vers == NFS_VER2) { 264 if (rqst->rq_proc > NFSV2PROC_STATFS) { 265 svcerr_noproc(rqst); 266 svc_freereq(rqst); 267 return; 268 } 269 procnum = nfsrv_nfsv3_procid[rqst->rq_proc]; 270 flag = 0; 271 } else { 272 if (rqst->rq_proc >= NFS_NPROCS) { 273 svcerr_noproc(rqst); 274 svc_freereq(rqst); 275 return; 276 } 277 procnum = rqst->rq_proc; 278 flag = ND_NFSV3; 279 } 280 proc = nfsrv3_procs[procnum]; 281 282 mreq = mrep = NULL; 283 mreq = rqst->rq_args; 284 rqst->rq_args = NULL; 285 (void)nfs_realign(&mreq, M_WAITOK); 286 287 /* 288 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 - 289 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP 290 * mounts. 291 */ 292 memset(&nd, 0, sizeof(nd)); 293 nd.nd_md = nd.nd_mrep = mreq; 294 nd.nd_dpos = mtod(mreq, caddr_t); 295 nd.nd_nam = svc_getrpccaller(rqst); 296 nd.nd_nam2 = rqst->rq_addr; 297 nd.nd_procnum = procnum; 298 nd.nd_cr = NULL; 299 nd.nd_flag = flag; 300 301 if (nfs_privport) { 302 /* Check if source port is privileged */ 303 u_short port; 304 struct sockaddr *nam = nd.nd_nam; 305 struct sockaddr_in *sin; 306 307 sin = (struct sockaddr_in *)nam; 308 /* 309 * INET/INET6 - same code: 310 * sin_port and sin6_port are at same offset 311 */ 312 port = ntohs(sin->sin_port); 313 if (port >= IPPORT_RESERVED && 314 nd.nd_procnum != NFSPROC_NULL) { 315#ifdef INET6 316 char b6[INET6_ADDRSTRLEN]; 317#if defined(KLD_MODULE) 318 /* Do not use ip6_sprintf: the nfs module should work without INET6. */ 319#define ip6_sprintf(buf, a) \ 320 (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \ 321 (a)->s6_addr16[0], (a)->s6_addr16[1], \ 322 (a)->s6_addr16[2], (a)->s6_addr16[3], \ 323 (a)->s6_addr16[4], (a)->s6_addr16[5], \ 324 (a)->s6_addr16[6], (a)->s6_addr16[7]), \ 325 (buf)) 326#endif 327#endif 328 printf("NFS request from unprivileged port (%s:%d)\n", 329#ifdef INET6 330 sin->sin_family == AF_INET6 ? 331 ip6_sprintf(b6, &satosin6(sin)->sin6_addr) : 332#if defined(KLD_MODULE) 333#undef ip6_sprintf 334#endif 335#endif 336 inet_ntoa(sin->sin_addr), port); 337 m_freem(mreq); 338 svcerr_weakauth(rqst); 339 svc_freereq(rqst); 340 return; 341 } 342 } 343 344 if (proc != nfsrv_null) { 345 if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) { 346 m_freem(mreq); 347 svcerr_weakauth(rqst); 348 svc_freereq(rqst); 349 return; 350 } 351#ifdef MAC 352 mac_cred_associate_nfsd(nd.nd_cr); 353#endif 354 } 355 nfsrvstats.srvrpccnt[nd.nd_procnum]++; 356 357 error = proc(&nd, NULL, &mrep); 358 359 if (nd.nd_cr) 360 crfree(nd.nd_cr); 361 362 if (mrep == NULL) { 363 svcerr_decode(rqst); 364 svc_freereq(rqst); 365 return; 366 } 367 if (error && error != NFSERR_RETVOID) { 368 svcerr_systemerr(rqst); 369 svc_freereq(rqst); 370 return; 371 } 372 if (nd.nd_repstat & NFSERR_AUTHERR) { 373 svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); 374 m_freem(mrep); 375 } else { 376 if (!svc_sendreply_mbuf(rqst, mrep)) 377 svcerr_systemerr(rqst); 378 } 379 svc_freereq(rqst); 380} 381 382/* 383 * Adds a socket to the list for servicing by nfsds. 384 */ 385static int 386nfssvc_addsock(struct file *fp, struct thread *td) 387{ 388 int siz; 389 struct socket *so; 390 int error; 391 SVCXPRT *xprt; 392 393 so = fp->f_data; 394 395 siz = sb_max_adj; 396 error = soreserve(so, siz, siz); 397 if (error) 398 return (error); 399 400 /* 401 * Steal the socket from userland so that it doesn't close 402 * unexpectedly. 403 */ 404 if (so->so_type == SOCK_DGRAM) 405 xprt = svc_dg_create(nfsrv_pool, so, 0, 0); 406 else 407 xprt = svc_vc_create(nfsrv_pool, so, 0, 0); 408 if (xprt) { 409 fp->f_ops = &badfileops; 410 fp->f_data = NULL; 411 svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); 412 svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); 413 SVC_RELEASE(xprt); 414 } 415 416 return (0); 417} 418 419/* 420 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests 421 * until it is killed by a signal. 422 */ 423static int 424nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) 425{ 426 char principal[128]; 427 int error; 428 429 if (args) { 430 error = copyinstr(args->principal, principal, 431 sizeof(principal), NULL); 432 if (error) 433 return (error); 434 } else { 435 memcpy(principal, "nfs@", 4); 436 getcredhostname(td->td_ucred, principal + 4, 437 sizeof(principal) - 4); 438 } 439 440 /* 441 * Only the first nfsd actually does any work. The RPC code 442 * adds threads to it as needed. Any extra processes offered 443 * by nfsd just exit. If nfsd is new enough, it will call us 444 * once with a structure that specifies how many threads to 445 * use. 446 */ 447 NFSD_LOCK(); 448 if (nfsrv_numnfsd == 0) { 449 nfsrv_numnfsd++; 450 451 NFSD_UNLOCK(); 452 453 rpc_gss_set_svc_name_call(principal, "kerberosv5", 454 GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); 455 rpc_gss_set_svc_name_call(principal, "kerberosv5", 456 GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); 457 458 if (args) { 459 nfsrv_pool->sp_minthreads = args->minthreads; 460 nfsrv_pool->sp_maxthreads = args->maxthreads; 461 } else { 462 nfsrv_pool->sp_minthreads = 4; 463 nfsrv_pool->sp_maxthreads = 4; 464 } 465 466 svc_run(nfsrv_pool); 467 468 rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); 469 rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); 470 471 NFSD_LOCK(); 472 nfsrv_numnfsd--; 473 nfsrv_init(TRUE); 474 } 475 NFSD_UNLOCK(); 476 477 return (0); 478} 479 480/* 481 * Size the NFS server's duplicate request cache at 1/2 the 482 * nmbclusters, floating within a (64, 2048) range. This is to 483 * prevent all mbuf clusters being tied up in the NFS dupreq 484 * cache for small values of nmbclusters. 485 */ 486static size_t 487nfsrv_replay_size(void) 488{ 489 size_t replaysiz; 490 491 replaysiz = nmbclusters / 2; 492 if (replaysiz > NFSRVCACHE_MAX_SIZE) 493 replaysiz = NFSRVCACHE_MAX_SIZE; 494 if (replaysiz < NFSRVCACHE_MIN_SIZE) 495 replaysiz = NFSRVCACHE_MIN_SIZE; 496 replaysiz *= MCLBYTES; 497 498 return (replaysiz); 499} 500 501/* 502 * Called when nmbclusters changes - we resize the replay cache 503 * accordingly. 504 */ 505static void 506nfsrv_nmbclusters_change(void *tag) 507{ 508 509 if (nfsrv_pool) 510 replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size()); 511} 512 513/* 514 * Initialize the data structures for the server. 515 * Handshake with any new nfsds starting up to avoid any chance of 516 * corruption. 517 */ 518void 519nfsrv_init(int terminating) 520{ 521 522 NFSD_LOCK_ASSERT(); 523 524 if (terminating) { 525 NFSD_UNLOCK(); 526 EVENTHANDLER_DEREGISTER(nmbclusters_change, 527 nfsrv_nmbclusters_tag); 528 svcpool_destroy(nfsrv_pool); 529 nfsrv_pool = NULL; 530 NFSD_LOCK(); 531 } else 532 nfs_pub.np_valid = 0; 533 534 NFSD_UNLOCK(); 535 536 nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv)); 537 nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size()); 538 nfsrv_pool->sp_assign = fhaold_assign; 539 nfsrv_pool->sp_done = fha_nd_complete; 540 nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change, 541 nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST); 542 543 NFSD_LOCK(); 544} 545