nfs_srvkrpc.c revision 280258
1249259Sdim/*- 2249259Sdim * Copyright (c) 1989, 1993 3353358Sdim * The Regents of the University of California. All rights reserved. 4353358Sdim * 5353358Sdim * This code is derived from software contributed to Berkeley by 6249259Sdim * Rick Macklem at The University of Guelph. 7249259Sdim * 8249259Sdim * Redistribution and use in source and binary forms, with or without 9249259Sdim * modification, are permitted provided that the following conditions 10249259Sdim * are met: 11249259Sdim * 1. Redistributions of source code must retain the above copyright 12249259Sdim * notice, this list of conditions and the following disclaimer. 13249259Sdim * 2. Redistributions in binary form must reproduce the above copyright 14249259Sdim * notice, this list of conditions and the following disclaimer in the 15249259Sdim * documentation and/or other materials provided with the distribution. 16249259Sdim * 4. Neither the name of the University nor the names of its contributors 17249259Sdim * may be used to endorse or promote products derived from this software 18249259Sdim * without specific prior written permission. 19249259Sdim * 20249259Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21249259Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22314564Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23249259Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24249259Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25321369Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26276479Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27249259Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28314564Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29314564Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30314564Sdim * SUCH DAMAGE. 31249259Sdim * 32249259Sdim * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 33249259Sdim */ 34314564Sdim 35249259Sdim#include <sys/cdefs.h> 36314564Sdim__FBSDID("$FreeBSD: stable/10/sys/nfsserver/nfs_srvkrpc.c 280258 2015-03-19 13:37:36Z rwatson $"); 37296417Sdim 38314564Sdim#include "opt_inet6.h" 39314564Sdim#include "opt_kgssapi.h" 40249259Sdim 41276479Sdim#include <sys/param.h> 42296417Sdim#include <sys/capsicum.h> 43249259Sdim#include <sys/systm.h> 44321369Sdim#include <sys/sysproto.h> 45249259Sdim#include <sys/kernel.h> 46249259Sdim#include <sys/sysctl.h> 47249259Sdim#include <sys/file.h> 48249259Sdim#include <sys/filedesc.h> 49249259Sdim#include <sys/jail.h> 50314564Sdim#include <sys/vnode.h> 51249259Sdim#include <sys/malloc.h> 52249259Sdim#include <sys/mount.h> 53249259Sdim#include <sys/priv.h> 54249259Sdim#include <sys/proc.h> 55276479Sdim#include <sys/bio.h> 56249259Sdim#include <sys/buf.h> 57249259Sdim#include <sys/mbuf.h> 58249259Sdim#include <sys/socket.h> 59249259Sdim#include <sys/socketvar.h> 60249259Sdim#include <sys/domain.h> 61249259Sdim#include <sys/protosw.h> 62276479Sdim#include <sys/namei.h> 63249259Sdim#include <sys/fcntl.h> 64249259Sdim#include <sys/lockf.h> 65314564Sdim#include <sys/eventhandler.h> 66314564Sdim 67249259Sdim#include <netinet/in.h> 68321369Sdim#include <netinet/tcp.h> 69309124Sdim#ifdef INET6 70249259Sdim#include <net/if.h> 71249259Sdim#include <netinet6/in6_var.h> 72314564Sdim#endif 73314564Sdim 74314564Sdim#include <rpc/rpc.h> 75314564Sdim#include <rpc/rpcsec_gss.h> 76314564Sdim#include <rpc/replay.h> 77341825Sdim 78341825Sdim#include <nfs/xdr_subs.h> 79341825Sdim#include <nfs/nfsproto.h> 80341825Sdim#include <nfs/nfs_fha.h> 81341825Sdim#include <nfsserver/nfs.h> 82341825Sdim#include <nfsserver/nfsm_subs.h> 83341825Sdim#include <nfsserver/nfsrvcache.h> 84341825Sdim#include <nfsserver/nfs_fha_old.h> 85341825Sdim 86341825Sdim#include <security/mac/mac_framework.h> 87249259Sdim 88249259Sdimstatic MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure"); 89249259Sdim 90261991SdimMALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor"); 91249259SdimMALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure"); 92249259Sdim 93249259Sdim#define TRUE 1 94249259Sdim#define FALSE 0 95249259Sdim 96249259SdimSYSCTL_DECL(_vfs_nfsrv); 97249259Sdim 98249259SdimSVCPOOL *nfsrv_pool; 99249259Sdimint nfsd_waiting = 0; 100249259Sdimint nfsrv_numnfsd = 0; 101249259Sdimstruct callout nfsrv_callout; 102249259Sdimstatic eventhandler_tag nfsrv_nmbclusters_tag; 103249259Sdim 104249259Sdimstatic int nfs_privport = 0; 105249259SdimSYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, 106249259Sdim &nfs_privport, 0, 107249259Sdim "Only allow clients using a privileged port"); 108249259SdimSYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW, 109249259Sdim &nfsrvw_procrastinate, 0, 110249259Sdim "Delay value for write gathering"); 111249259SdimSYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, 112309124Sdim &nfsrvw_procrastinate_v3, 0, 113309124Sdim "Delay in seconds for NFSv3 write gathering"); 114309124Sdim 115249259Sdimstatic int nfssvc_addsock(struct file *, struct thread *); 116249259Sdimstatic int nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *); 117249259Sdim 118249259Sdimextern u_long sb_max_adj; 119249259Sdim 120249259Sdimint32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, 121249259Sdim struct nfssvc_sock *slp, struct mbuf **mreqp) = { 122249259Sdim nfsrv_null, 123296417Sdim nfsrv_getattr, 124296417Sdim nfsrv_setattr, 125296417Sdim nfsrv_lookup, 126296417Sdim nfsrv3_access, 127296417Sdim nfsrv_readlink, 128296417Sdim nfsrv_read, 129296417Sdim nfsrv_write, 130249259Sdim nfsrv_create, 131249259Sdim nfsrv_mkdir, 132249259Sdim nfsrv_symlink, 133249259Sdim nfsrv_mknod, 134249259Sdim nfsrv_remove, 135249259Sdim nfsrv_rmdir, 136249259Sdim nfsrv_rename, 137249259Sdim nfsrv_link, 138249259Sdim nfsrv_readdir, 139249259Sdim nfsrv_readdirplus, 140249259Sdim nfsrv_statfs, 141249259Sdim nfsrv_fsinfo, 142249259Sdim nfsrv_pathconf, 143249259Sdim nfsrv_commit, 144249259Sdim nfsrv_noop 145249259Sdim}; 146249259Sdim 147249259Sdim/* 148249259Sdim * NFS server system calls 149249259Sdim */ 150249259Sdim/* 151249259Sdim * This is now called from nfssvc() in nfs/nfs_nfssvc.c. 152249259Sdim */ 153249259Sdim 154249259Sdim/* 155249259Sdim * Nfs server psuedo system call for the nfsd's 156249259Sdim * Based on the flag value it either: 157249259Sdim * - adds a socket to the selection list 158249259Sdim * - remains in the kernel as an nfsd 159249259Sdim * - remains in the kernel as an nfsiod 160249259Sdim * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets 161249259Sdim * and that mountd provides 162249259Sdim * - sockaddr with no IPv4-mapped addresses 163249259Sdim * - mask for both INET and INET6 families if there is IPv4-mapped overlap 164249259Sdim */ 165321369Sdimint 166249259Sdimnfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap) 167249259Sdim{ 168249259Sdim struct file *fp; 169249259Sdim struct nfsd_addsock_args addsockarg; 170321369Sdim struct nfsd_nfsd_args nfsdarg; 171249259Sdim cap_rights_t rights; 172249259Sdim int error; 173249259Sdim 174249259Sdim if (uap->flag & NFSSVC_ADDSOCK) { 175321369Sdim error = copyin(uap->argp, (caddr_t)&addsockarg, 176249259Sdim sizeof(addsockarg)); 177309124Sdim if (error) 178309124Sdim return (error); 179309124Sdim error = fget(td, addsockarg.sock, 180309124Sdim cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 181314564Sdim if (error) 182314564Sdim return (error); 183314564Sdim if (fp->f_type != DTYPE_SOCKET) { 184314564Sdim fdrop(fp, td); 185314564Sdim return (error); /* XXXRW: Should be EINVAL? */ 186314564Sdim } 187321369Sdim error = nfssvc_addsock(fp, td); 188321369Sdim fdrop(fp, td); 189321369Sdim } else if (uap->flag & NFSSVC_OLDNFSD) 190321369Sdim error = nfssvc_nfsd(td, NULL); 191321369Sdim else if (uap->flag & NFSSVC_NFSD) { 192321369Sdim if (!uap->argp) 193321369Sdim return (EINVAL); 194321369Sdim error = copyin(uap->argp, (caddr_t)&nfsdarg, 195321369Sdim sizeof(nfsdarg)); 196321369Sdim if (error) 197321369Sdim return (error); 198321369Sdim error = nfssvc_nfsd(td, &nfsdarg); 199321369Sdim } else 200321369Sdim error = ENXIO; 201321369Sdim return (error); 202321369Sdim} 203321369Sdim 204321369Sdim/* 205321369Sdim * Generate the rpc reply header 206321369Sdim * siz arg. is used to decide if adding a cluster is worthwhile 207321369Sdim */ 208321369Sdimstruct mbuf * 209321369Sdimnfs_rephead(int siz, struct nfsrv_descript *nd, int err, 210321369Sdim struct mbuf **mbp, caddr_t *bposp) 211321369Sdim{ 212321369Sdim u_int32_t *tl; 213321369Sdim struct mbuf *mreq; 214321369Sdim caddr_t bpos; 215321369Sdim struct mbuf *mb; 216321369Sdim 217321369Sdim if (err == EBADRPC) 218321369Sdim return (NULL); 219321369Sdim 220321369Sdim nd->nd_repstat = err; 221321369Sdim if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */ 222321369Sdim siz = 0; 223321369Sdim 224321369Sdim MGET(mreq, M_WAITOK, MT_DATA); 225321369Sdim 226321369Sdim /* 227321369Sdim * If this is a big reply, use a cluster 228321369Sdim */ 229321369Sdim mreq->m_len = 0; 230321369Sdim if (siz >= MINCLSIZE) { 231321369Sdim MCLGET(mreq, M_WAITOK); 232321369Sdim } 233321369Sdim mb = mreq; 234321369Sdim bpos = mtod(mb, caddr_t); 235321369Sdim 236321369Sdim if (err != NFSERR_RETVOID) { 237321369Sdim tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 238321369Sdim if (err) 239321369Sdim *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 240321369Sdim else 241321369Sdim *tl = 0; 242321369Sdim } 243321369Sdim 244321369Sdim *mbp = mb; 245321369Sdim *bposp = bpos; 246360784Sdim if (err != 0 && err != NFSERR_RETVOID) 247321369Sdim nfsrvstats.srvrpc_errs++; 248321369Sdim 249321369Sdim return (mreq); 250249259Sdim} 251321369Sdim 252249259Sdimstatic void 253249259Sdimnfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) 254249259Sdim{ 255249259Sdim rpcproc_t procnum; 256249259Sdim int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp, 257249259Sdim struct mbuf **mreqp); 258249259Sdim int flag; 259249259Sdim struct nfsrv_descript nd; 260249259Sdim struct mbuf *mreq, *mrep; 261249259Sdim int error; 262249259Sdim 263314564Sdim if (rqst->rq_vers == NFS_VER2) { 264249259Sdim if (rqst->rq_proc > NFSV2PROC_STATFS) { 265314564Sdim svcerr_noproc(rqst); 266 svc_freereq(rqst); 267 return; 268 } 269 procnum = nfsrv_nfsv3_procid[rqst->rq_proc]; 270 flag = 0; 271 } else { 272 if (rqst->rq_proc >= NFS_NPROCS) { 273 svcerr_noproc(rqst); 274 svc_freereq(rqst); 275 return; 276 } 277 procnum = rqst->rq_proc; 278 flag = ND_NFSV3; 279 } 280 proc = nfsrv3_procs[procnum]; 281 282 mreq = mrep = NULL; 283 mreq = rqst->rq_args; 284 rqst->rq_args = NULL; 285 (void)nfs_realign(&mreq, M_WAITOK); 286 287 /* 288 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 - 289 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP 290 * mounts. 291 */ 292 memset(&nd, 0, sizeof(nd)); 293 nd.nd_md = nd.nd_mrep = mreq; 294 nd.nd_dpos = mtod(mreq, caddr_t); 295 nd.nd_nam = svc_getrpccaller(rqst); 296 nd.nd_nam2 = rqst->rq_addr; 297 nd.nd_procnum = procnum; 298 nd.nd_cr = NULL; 299 nd.nd_flag = flag; 300 301 if (nfs_privport) { 302 /* Check if source port is privileged */ 303 u_short port; 304 struct sockaddr *nam = nd.nd_nam; 305 struct sockaddr_in *sin; 306 307 sin = (struct sockaddr_in *)nam; 308 /* 309 * INET/INET6 - same code: 310 * sin_port and sin6_port are at same offset 311 */ 312 port = ntohs(sin->sin_port); 313 if (port >= IPPORT_RESERVED && 314 nd.nd_procnum != NFSPROC_NULL) { 315#ifdef INET6 316 char b6[INET6_ADDRSTRLEN]; 317#if defined(KLD_MODULE) 318 /* Do not use ip6_sprintf: the nfs module should work without INET6. */ 319#define ip6_sprintf(buf, a) \ 320 (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \ 321 (a)->s6_addr16[0], (a)->s6_addr16[1], \ 322 (a)->s6_addr16[2], (a)->s6_addr16[3], \ 323 (a)->s6_addr16[4], (a)->s6_addr16[5], \ 324 (a)->s6_addr16[6], (a)->s6_addr16[7]), \ 325 (buf)) 326#endif 327#endif 328 printf("NFS request from unprivileged port (%s:%d)\n", 329#ifdef INET6 330 sin->sin_family == AF_INET6 ? 331 ip6_sprintf(b6, &satosin6(sin)->sin6_addr) : 332#if defined(KLD_MODULE) 333#undef ip6_sprintf 334#endif 335#endif 336 inet_ntoa(sin->sin_addr), port); 337 m_freem(mreq); 338 svcerr_weakauth(rqst); 339 svc_freereq(rqst); 340 return; 341 } 342 } 343 344 if (proc != nfsrv_null) { 345 if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) { 346 m_freem(mreq); 347 svcerr_weakauth(rqst); 348 svc_freereq(rqst); 349 return; 350 } 351#ifdef MAC 352 mac_cred_associate_nfsd(nd.nd_cr); 353#endif 354 } 355 nfsrvstats.srvrpccnt[nd.nd_procnum]++; 356 357 error = proc(&nd, NULL, &mrep); 358 359 if (nd.nd_cr) 360 crfree(nd.nd_cr); 361 362 if (mrep == NULL) { 363 svcerr_decode(rqst); 364 svc_freereq(rqst); 365 return; 366 } 367 if (error && error != NFSERR_RETVOID) { 368 svcerr_systemerr(rqst); 369 svc_freereq(rqst); 370 return; 371 } 372 if (nd.nd_repstat & NFSERR_AUTHERR) { 373 svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); 374 m_freem(mrep); 375 } else { 376 if (!svc_sendreply_mbuf(rqst, mrep)) 377 svcerr_systemerr(rqst); 378 } 379 svc_freereq(rqst); 380} 381 382/* 383 * Adds a socket to the list for servicing by nfsds. 384 */ 385static int 386nfssvc_addsock(struct file *fp, struct thread *td) 387{ 388 int siz; 389 struct socket *so; 390 int error; 391 SVCXPRT *xprt; 392 393 so = fp->f_data; 394 395 siz = sb_max_adj; 396 error = soreserve(so, siz, siz); 397 if (error) 398 return (error); 399 400 /* 401 * Steal the socket from userland so that it doesn't close 402 * unexpectedly. 403 */ 404 if (so->so_type == SOCK_DGRAM) 405 xprt = svc_dg_create(nfsrv_pool, so, 0, 0); 406 else 407 xprt = svc_vc_create(nfsrv_pool, so, 0, 0); 408 if (xprt) { 409 fp->f_ops = &badfileops; 410 fp->f_data = NULL; 411 svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); 412 svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); 413 SVC_RELEASE(xprt); 414 } 415 416 return (0); 417} 418 419/* 420 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests 421 * until it is killed by a signal. 422 */ 423static int 424nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) 425{ 426 char principal[128]; 427 int error; 428 429 if (args) { 430 error = copyinstr(args->principal, principal, 431 sizeof(principal), NULL); 432 if (error) 433 return (error); 434 } else { 435 memcpy(principal, "nfs@", 4); 436 getcredhostname(td->td_ucred, principal + 4, 437 sizeof(principal) - 4); 438 } 439 440 /* 441 * Only the first nfsd actually does any work. The RPC code 442 * adds threads to it as needed. Any extra processes offered 443 * by nfsd just exit. If nfsd is new enough, it will call us 444 * once with a structure that specifies how many threads to 445 * use. 446 */ 447 NFSD_LOCK(); 448 if (nfsrv_numnfsd == 0) { 449 nfsrv_numnfsd++; 450 451 NFSD_UNLOCK(); 452 453 rpc_gss_set_svc_name_call(principal, "kerberosv5", 454 GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); 455 rpc_gss_set_svc_name_call(principal, "kerberosv5", 456 GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); 457 458 if (args) { 459 nfsrv_pool->sp_minthreads = args->minthreads; 460 nfsrv_pool->sp_maxthreads = args->maxthreads; 461 } else { 462 nfsrv_pool->sp_minthreads = 4; 463 nfsrv_pool->sp_maxthreads = 4; 464 } 465 466 svc_run(nfsrv_pool); 467 468 rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); 469 rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); 470 471 NFSD_LOCK(); 472 nfsrv_numnfsd--; 473 nfsrv_init(TRUE); 474 } 475 NFSD_UNLOCK(); 476 477 return (0); 478} 479 480/* 481 * Size the NFS server's duplicate request cache at 1/2 the 482 * nmbclusters, floating within a (64, 2048) range. This is to 483 * prevent all mbuf clusters being tied up in the NFS dupreq 484 * cache for small values of nmbclusters. 485 */ 486static size_t 487nfsrv_replay_size(void) 488{ 489 size_t replaysiz; 490 491 replaysiz = nmbclusters / 2; 492 if (replaysiz > NFSRVCACHE_MAX_SIZE) 493 replaysiz = NFSRVCACHE_MAX_SIZE; 494 if (replaysiz < NFSRVCACHE_MIN_SIZE) 495 replaysiz = NFSRVCACHE_MIN_SIZE; 496 replaysiz *= MCLBYTES; 497 498 return (replaysiz); 499} 500 501/* 502 * Called when nmbclusters changes - we resize the replay cache 503 * accordingly. 504 */ 505static void 506nfsrv_nmbclusters_change(void *tag) 507{ 508 509 if (nfsrv_pool) 510 replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size()); 511} 512 513/* 514 * Initialize the data structures for the server. 515 * Handshake with any new nfsds starting up to avoid any chance of 516 * corruption. 517 */ 518void 519nfsrv_init(int terminating) 520{ 521 522 NFSD_LOCK_ASSERT(); 523 524 if (terminating) { 525 NFSD_UNLOCK(); 526 EVENTHANDLER_DEREGISTER(nmbclusters_change, 527 nfsrv_nmbclusters_tag); 528 svcpool_destroy(nfsrv_pool); 529 nfsrv_pool = NULL; 530 NFSD_LOCK(); 531 } else 532 nfs_pub.np_valid = 0; 533 534 NFSD_UNLOCK(); 535 536 nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv)); 537 nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size()); 538 nfsrv_pool->sp_assign = fhaold_assign; 539 nfsrv_pool->sp_done = fha_nd_complete; 540 nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change, 541 nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST); 542 543 NFSD_LOCK(); 544} 545