nlm_advlock.c revision 180025
1/*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/nlm/nlm_advlock.c 180025 2008-06-26 10:21:54Z dfr $"); 30 31#include <sys/param.h> 32#include <sys/fcntl.h> 33#include <sys/kernel.h> 34#include <sys/limits.h> 35#include <sys/lock.h> 36#include <sys/lockf.h> 37#include <sys/malloc.h> 38#include <sys/mount.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/syslog.h> 42#include <sys/systm.h> 43#include <sys/unistd.h> 44#include <sys/vnode.h> 45 46#include <rpc/rpcclnt.h> 47#include <nfs/nfsproto.h> 48#include <nfsclient/nfs.h> 49#include <nfsclient/nfsnode.h> 50#include <nfsclient/nfsmount.h> 51 52#include <nlm/nlm_prot.h> 53#include <nlm/nlm.h> 54 55/* 56 * We need to keep track of the svid values used for F_FLOCK locks. 57 */ 58struct nlm_file_svid { 59 int ns_refs; /* thread count + 1 if active */ 60 int ns_svid; /* on-the-wire SVID for this file */ 61 struct ucred *ns_ucred; /* creds to use for lock recovery */ 62 void *ns_id; /* local struct file pointer */ 63 bool_t ns_active; /* TRUE if we own a lock */ 64 LIST_ENTRY(nlm_file_svid) ns_link; 65}; 66LIST_HEAD(nlm_file_svid_list, nlm_file_svid); 67 68#define NLM_SVID_HASH_SIZE 256 69struct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE]; 70 71struct mtx nlm_svid_lock; 72static struct unrhdr *nlm_svid_allocator; 73static volatile u_int nlm_xid = 1; 74 75static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, 76 rpcvers_t vers, struct timeval *timo, int retries, 77 struct vnode *vp, int op, struct flock *fl, int flags, 78 int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim); 79static int nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, 80 rpcvers_t vers, struct timeval *timo, int retries, 81 struct vnode *vp, int op, struct flock *fl, int flags, 82 int svid, size_t fhlen, void *fh, off_t size); 83static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, 84 rpcvers_t vers, struct timeval *timo, int retries, 85 struct vnode *vp, int op, struct flock *fl, int flags, 86 int svid, size_t fhlen, void *fh, off_t size); 87static int nlm_map_status(nlm4_stats stat); 88static struct nlm_file_svid *nlm_find_svid(void *id); 89static void nlm_free_svid(struct nlm_file_svid *nf); 90static int nlm_init_lock(struct flock *fl, int flags, int svid, 91 rpcvers_t vers, size_t fhlen, void *fh, off_t size, 92 struct nlm4_lock *lock, char oh_space[32]); 93 94static void 95nlm_client_init(void *dummy) 96{ 97 int i; 98 99 mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF); 100 nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock); 101 for (i = 0; i < NLM_SVID_HASH_SIZE; i++) 102 LIST_INIT(&nlm_file_svids[i]); 103} 104SYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL); 105 106static int 107nlm_msg(struct thread *td, const char *server, const char *msg, int error) 108{ 109 struct proc *p; 110 111 p = td ? td->td_proc : NULL; 112 if (error) { 113 tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 114 msg, error); 115 } else { 116 tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 117 } 118 return (0); 119} 120 121struct nlm_feedback_arg { 122 bool_t nf_printed; 123 struct nfsmount *nf_nmp; 124}; 125 126static void 127nlm_down(struct nlm_feedback_arg *nf, struct thread *td, 128 const char *msg, int error) 129{ 130 struct nfsmount *nmp = nf->nf_nmp; 131 132 if (nmp == NULL) 133 return; 134 mtx_lock(&nmp->nm_mtx); 135 if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 136 nmp->nm_state |= NFSSTA_LOCKTIMEO; 137 mtx_unlock(&nmp->nm_mtx); 138 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 139 VQ_NOTRESPLOCK, 0); 140 } else { 141 mtx_unlock(&nmp->nm_mtx); 142 } 143 144 nf->nf_printed = TRUE; 145 nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 146} 147 148static void 149nlm_up(struct nlm_feedback_arg *nf, struct thread *td, 150 const char *msg) 151{ 152 struct nfsmount *nmp = nf->nf_nmp; 153 154 if (!nf->nf_printed) 155 return; 156 157 nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 158 159 mtx_lock(&nmp->nm_mtx); 160 if (nmp->nm_state & NFSSTA_LOCKTIMEO) { 161 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 162 mtx_unlock(&nmp->nm_mtx); 163 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 164 VQ_NOTRESPLOCK, 1); 165 } else { 166 mtx_unlock(&nmp->nm_mtx); 167 } 168} 169 170static void 171nlm_feedback(int type, int proc, void *arg) 172{ 173 struct thread *td = curthread; 174 struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg; 175 176 switch (type) { 177 case FEEDBACK_REXMIT2: 178 case FEEDBACK_RECONNECT: 179 nlm_down(nf, td, "lockd not responding", 0); 180 break; 181 182 case FEEDBACK_OK: 183 nlm_up(nf, td, "lockd is alive again"); 184 break; 185 } 186} 187 188/* 189 * nlm_advlock -- 190 * NFS advisory byte-level locks. 191 */ 192static int 193nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, 194 int flags, bool_t reclaim, bool_t unlock_vp) 195{ 196 struct thread *td = curthread; 197 struct nfsmount *nmp; 198 struct nfsnode *np; 199 off_t size; 200 size_t fhlen; 201 union nfsfh fh; 202 struct sockaddr *sa; 203 struct sockaddr_storage ss; 204 char servername[MNAMELEN]; 205 struct timeval timo; 206 int retries; 207 rpcvers_t vers; 208 struct nlm_host *host; 209 struct rpc_callextra ext; 210 struct nlm_feedback_arg nf; 211 AUTH *auth; 212 struct ucred *cred; 213 struct nlm_file_svid *ns; 214 int svid; 215 int error; 216 217 ASSERT_VOP_LOCKED(vp, "nlm_advlock_1"); 218 219 /* 220 * Push any pending writes to the server and flush our cache 221 * so that if we are contending with another machine for a 222 * file, we get whatever they wrote and vice-versa. 223 */ 224 if (op == F_SETLK || op == F_UNLCK) 225 nfs_vinvalbuf(vp, V_SAVE, td, 1); 226 227 np = VTONFS(vp); 228 nmp = VFSTONFS(vp->v_mount); 229 size = np->n_size; 230 sa = nmp->nm_nam; 231 memcpy(&ss, sa, sa->sa_len); 232 sa = (struct sockaddr *) &ss; 233 strcpy(servername, nmp->nm_hostname); 234 fhlen = np->n_fhsize; 235 memcpy(&fh.fh_bytes, np->n_fhp, fhlen); 236 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 237 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); 238 if (NFS_ISV3(vp)) 239 vers = NLM_VERS4; 240 else 241 vers = NLM_VERS; 242 243 if (nmp->nm_flag & NFSMNT_SOFT) 244 retries = nmp->nm_retry; 245 else 246 retries = INT_MAX; 247 248 if (unlock_vp) 249 VOP_UNLOCK(vp, 0); 250 251 /* 252 * We need to switch to mount-point creds so that we can send 253 * packets from a privileged port. 254 */ 255 cred = td->td_ucred; 256 td->td_ucred = vp->v_mount->mnt_cred; 257 258 host = nlm_find_host_by_name(servername, sa, vers); 259 auth = authunix_create(cred); 260 memset(&ext, 0, sizeof(ext)); 261 262 nf.nf_printed = FALSE; 263 nf.nf_nmp = nmp; 264 ext.rc_auth = auth; 265 266 ext.rc_feedback = nlm_feedback; 267 ext.rc_feedback_arg = &nf; 268 269 ns = NULL; 270 if (flags & F_FLOCK) { 271 ns = nlm_find_svid(id); 272 KASSERT(fl->l_start == 0 && fl->l_len == 0, 273 ("F_FLOCK lock requests must be whole-file locks")); 274 if (!ns->ns_ucred) { 275 /* 276 * Remember the creds used for locking in case 277 * we need to recover the lock later. 278 */ 279 ns->ns_ucred = crdup(cred); 280 } 281 svid = ns->ns_svid; 282 } else if (flags & F_REMOTE) { 283 /* 284 * If we are recovering after a server restart or 285 * trashing locks on a force unmount, use the same 286 * svid as last time. 287 */ 288 svid = fl->l_pid; 289 } else { 290 svid = ((struct proc *) id)->p_pid; 291 } 292 293 switch(op) { 294 case F_SETLK: 295 if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT) 296 && fl->l_type == F_WRLCK) { 297 /* 298 * The semantics for flock(2) require that any 299 * shared lock on the file must be released 300 * before an exclusive lock is granted. The 301 * local locking code interprets this by 302 * unlocking the file before sleeping on a 303 * blocked exclusive lock request. We 304 * approximate this by first attempting 305 * non-blocking and if that fails, we unlock 306 * the file and block. 307 */ 308 error = nlm_setlock(host, &ext, vers, &timo, retries, 309 vp, F_SETLK, fl, flags & ~F_WAIT, 310 svid, fhlen, &fh.fh_bytes, size, reclaim); 311 if (error == EAGAIN) { 312 fl->l_type = F_UNLCK; 313 error = nlm_clearlock(host, &ext, vers, &timo, 314 retries, vp, F_UNLCK, fl, flags, 315 svid, fhlen, &fh.fh_bytes, size); 316 fl->l_type = F_WRLCK; 317 if (!error) { 318 mtx_lock(&nlm_svid_lock); 319 if (ns->ns_active) { 320 ns->ns_refs--; 321 ns->ns_active = FALSE; 322 } 323 mtx_unlock(&nlm_svid_lock); 324 flags |= F_WAIT; 325 error = nlm_setlock(host, &ext, vers, 326 &timo, retries, vp, F_SETLK, fl, 327 flags, svid, fhlen, &fh.fh_bytes, 328 size, reclaim); 329 } 330 } 331 } else { 332 error = nlm_setlock(host, &ext, vers, &timo, retries, 333 vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, 334 size, reclaim); 335 } 336 if (!error && ns) { 337 mtx_lock(&nlm_svid_lock); 338 if (!ns->ns_active) { 339 /* 340 * Add one to the reference count to 341 * hold onto the SVID for the lifetime 342 * of the lock. Note that since 343 * F_FLOCK only supports whole-file 344 * locks, there can only be one active 345 * lock for this SVID. 346 */ 347 ns->ns_refs++; 348 ns->ns_active = TRUE; 349 } 350 mtx_unlock(&nlm_svid_lock); 351 } 352 break; 353 354 case F_UNLCK: 355 error = nlm_clearlock(host, &ext, vers, &timo, retries, 356 vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); 357 if (!error && ns) { 358 mtx_lock(&nlm_svid_lock); 359 if (ns->ns_active) { 360 ns->ns_refs--; 361 ns->ns_active = FALSE; 362 } 363 mtx_unlock(&nlm_svid_lock); 364 } 365 break; 366 367 case F_GETLK: 368 error = nlm_getlock(host, &ext, vers, &timo, retries, 369 vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); 370 break; 371 372 default: 373 error = EINVAL; 374 break; 375 } 376 377 if (ns) 378 nlm_free_svid(ns); 379 380 td->td_ucred = cred; 381 AUTH_DESTROY(auth); 382 383 nlm_host_release(host); 384 385 return (error); 386} 387 388int 389nlm_advlock(struct vop_advlock_args *ap) 390{ 391 392 return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl, 393 ap->a_flags, FALSE, TRUE)); 394} 395 396/* 397 * Set the creds of td to the creds of the given lock's owner. The new 398 * creds reference count will be incremented via crhold. The caller is 399 * responsible for calling crfree and restoring td's original creds. 400 */ 401static void 402nlm_set_creds_for_lock(struct thread *td, struct flock *fl) 403{ 404 int i; 405 struct nlm_file_svid *ns; 406 struct proc *p; 407 struct ucred *cred; 408 409 cred = NULL; 410 if (fl->l_pid > PID_MAX) { 411 /* 412 * If this was originally a F_FLOCK-style lock, we 413 * recorded the creds used when it was originally 414 * locked in the nlm_file_svid structure. 415 */ 416 mtx_lock(&nlm_svid_lock); 417 for (i = 0; i < NLM_SVID_HASH_SIZE; i++) { 418 for (ns = LIST_FIRST(&nlm_file_svids[i]); ns; 419 ns = LIST_NEXT(ns, ns_link)) { 420 if (ns->ns_svid == fl->l_pid) { 421 cred = crhold(ns->ns_ucred); 422 break; 423 } 424 } 425 } 426 mtx_unlock(&nlm_svid_lock); 427 } else { 428 /* 429 * This lock is owned by a process. Get a reference to 430 * the process creds. 431 */ 432 p = pfind(fl->l_pid); 433 if (p) { 434 cred = crhold(p->p_ucred); 435 PROC_UNLOCK(p); 436 } 437 } 438 439 /* 440 * If we can't find a cred, fall back on the recovery 441 * thread's cred. 442 */ 443 if (!cred) { 444 cred = crhold(td->td_ucred); 445 } 446 447 td->td_ucred = cred; 448} 449 450static int 451nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg) 452{ 453 struct flock newfl; 454 struct thread *td = curthread; 455 struct ucred *oldcred; 456 int error; 457 458 newfl = *fl; 459 newfl.l_type = F_UNLCK; 460 461 oldcred = td->td_ucred; 462 nlm_set_creds_for_lock(td, &newfl); 463 464 error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE, 465 FALSE, FALSE); 466 467 crfree(td->td_ucred); 468 td->td_ucred = oldcred; 469 470 return (error); 471} 472 473int 474nlm_reclaim(struct vop_reclaim_args *ap) 475{ 476 477 nlm_cancel_wait(ap->a_vp); 478 lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL); 479 return (0); 480} 481 482struct nlm_recovery_context { 483 struct nlm_host *nr_host; /* host we are recovering */ 484 int nr_state; /* remote NSM state for recovery */ 485}; 486 487static int 488nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg) 489{ 490 struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg; 491 struct thread *td = curthread; 492 struct ucred *oldcred; 493 int state, error; 494 495 /* 496 * If the remote NSM state changes during recovery, the host 497 * must have rebooted a second time. In that case, we must 498 * restart the recovery. 499 */ 500 state = nlm_host_get_state(nr->nr_host); 501 if (nr->nr_state != state) 502 return (ERESTART); 503 504 error = vn_lock(vp, LK_SHARED); 505 if (error) 506 return (error); 507 508 oldcred = td->td_ucred; 509 nlm_set_creds_for_lock(td, fl); 510 511 error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE, 512 TRUE, TRUE); 513 514 crfree(td->td_ucred); 515 td->td_ucred = oldcred; 516 517 return (error); 518} 519 520void 521nlm_client_recovery(struct nlm_host *host) 522{ 523 struct nlm_recovery_context nr; 524 int sysid, error; 525 526 sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host); 527 do { 528 nr.nr_host = host; 529 nr.nr_state = nlm_host_get_state(host); 530 error = lf_iteratelocks_sysid(sysid, 531 nlm_client_recover_lock, &nr); 532 } while (error == ERESTART); 533} 534 535static void 536nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src) 537{ 538 539 dst->caller_name = src->caller_name; 540 dst->fh = src->fh; 541 dst->oh = src->oh; 542 dst->svid = src->svid; 543 dst->l_offset = src->l_offset; 544 dst->l_len = src->l_len; 545} 546 547static void 548nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src) 549{ 550 551 dst->exclusive = src->exclusive; 552 dst->svid = src->svid; 553 dst->oh = src->oh; 554 dst->l_offset = src->l_offset; 555 dst->l_len = src->l_len; 556} 557 558static void 559nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src) 560{ 561 dst->cookie = src->cookie; 562 dst->stat.stat = (enum nlm4_stats) src->stat.stat; 563} 564 565static enum clnt_stat 566nlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client, 567 struct rpc_callextra *ext, struct timeval timo) 568{ 569 if (vers == NLM_VERS4) { 570 return nlm4_test_4(args, res, client, ext, timo); 571 } else { 572 nlm_testargs args1; 573 nlm_testres res1; 574 enum clnt_stat stat; 575 576 args1.cookie = args->cookie; 577 args1.exclusive = args->exclusive; 578 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 579 memset(&res1, 0, sizeof(res1)); 580 581 stat = nlm_test_1(&args1, &res1, client, ext, timo); 582 583 if (stat == RPC_SUCCESS) { 584 res->cookie = res1.cookie; 585 res->stat.stat = (enum nlm4_stats) res1.stat.stat; 586 if (res1.stat.stat == nlm_denied) 587 nlm_convert_to_nlm4_holder( 588 &res->stat.nlm4_testrply_u.holder, 589 &res1.stat.nlm_testrply_u.holder); 590 } 591 592 return (stat); 593 } 594} 595 596static enum clnt_stat 597nlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client, 598 struct rpc_callextra *ext, struct timeval timo) 599{ 600 if (vers == NLM_VERS4) { 601 return nlm4_lock_4(args, res, client, ext, timo); 602 } else { 603 nlm_lockargs args1; 604 nlm_res res1; 605 enum clnt_stat stat; 606 607 args1.cookie = args->cookie; 608 args1.block = args->block; 609 args1.exclusive = args->exclusive; 610 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 611 args1.reclaim = args->reclaim; 612 args1.state = args->state; 613 memset(&res1, 0, sizeof(res1)); 614 615 stat = nlm_lock_1(&args1, &res1, client, ext, timo); 616 617 if (stat == RPC_SUCCESS) { 618 nlm_convert_to_nlm4_res(res, &res1); 619 } 620 621 return (stat); 622 } 623} 624 625static enum clnt_stat 626nlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client, 627 struct rpc_callextra *ext, struct timeval timo) 628{ 629 if (vers == NLM_VERS4) { 630 return nlm4_cancel_4(args, res, client, ext, timo); 631 } else { 632 nlm_cancargs args1; 633 nlm_res res1; 634 enum clnt_stat stat; 635 636 args1.cookie = args->cookie; 637 args1.block = args->block; 638 args1.exclusive = args->exclusive; 639 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 640 memset(&res1, 0, sizeof(res1)); 641 642 stat = nlm_cancel_1(&args1, &res1, client, ext, timo); 643 644 if (stat == RPC_SUCCESS) { 645 nlm_convert_to_nlm4_res(res, &res1); 646 } 647 648 return (stat); 649 } 650} 651 652static enum clnt_stat 653nlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client, 654 struct rpc_callextra *ext, struct timeval timo) 655{ 656 if (vers == NLM_VERS4) { 657 return nlm4_unlock_4(args, res, client, ext, timo); 658 } else { 659 nlm_unlockargs args1; 660 nlm_res res1; 661 enum clnt_stat stat; 662 663 args1.cookie = args->cookie; 664 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 665 memset(&res1, 0, sizeof(res1)); 666 667 stat = nlm_unlock_1(&args1, &res1, client, ext, timo); 668 669 if (stat == RPC_SUCCESS) { 670 nlm_convert_to_nlm4_res(res, &res1); 671 } 672 673 return (stat); 674 } 675} 676 677/* 678 * Called after a lock request (set or clear) succeeded. We record the 679 * details in the local lock manager. Note that since the remote 680 * server has granted the lock, we can be sure that it doesn't 681 * conflict with any other locks we have in the local lock manager. 682 * 683 * Since it is possible that host may also make NLM client requests to 684 * our NLM server, we use a different sysid value to record our own 685 * client locks. 686 * 687 * Note that since it is possible for us to receive replies from the 688 * server in a different order than the locks were granted (e.g. if 689 * many local threads are contending for the same lock), we must use a 690 * blocking operation when registering with the local lock manager. 691 * We expect that any actual wait will be rare and short hence we 692 * ignore signals for this. 693 */ 694static void 695nlm_record_lock(struct vnode *vp, int op, struct flock *fl, 696 int svid, int sysid, off_t size) 697{ 698 struct vop_advlockasync_args a; 699 struct flock newfl; 700 int error; 701 702 a.a_vp = vp; 703 a.a_id = NULL; 704 a.a_op = op; 705 a.a_fl = &newfl; 706 a.a_flags = F_REMOTE|F_WAIT|F_NOINTR; 707 a.a_task = NULL; 708 a.a_cookiep = NULL; 709 newfl.l_start = fl->l_start; 710 newfl.l_len = fl->l_len; 711 newfl.l_type = fl->l_type; 712 newfl.l_whence = fl->l_whence; 713 newfl.l_pid = svid; 714 newfl.l_sysid = NLM_SYSID_CLIENT | sysid; 715 716 error = lf_advlockasync(&a, &vp->v_lockf, size); 717 KASSERT(error == 0, ("Failed to register NFS lock locally - error=%d", 718 error)); 719} 720 721static int 722nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, 723 rpcvers_t vers, struct timeval *timo, int retries, 724 struct vnode *vp, int op, struct flock *fl, int flags, 725 int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim) 726{ 727 struct nlm4_lockargs args; 728 char oh_space[32]; 729 struct nlm4_res res; 730 u_int xid; 731 CLIENT *client; 732 enum clnt_stat stat; 733 int retry, block, exclusive; 734 void *wait_handle = NULL; 735 int error; 736 737 memset(&args, 0, sizeof(args)); 738 memset(&res, 0, sizeof(res)); 739 740 block = (flags & F_WAIT) ? TRUE : FALSE; 741 exclusive = (fl->l_type == F_WRLCK); 742 743 error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, 744 &args.alock, oh_space); 745 if (error) 746 return (error); 747 args.block = block; 748 args.exclusive = exclusive; 749 args.reclaim = reclaim; 750 args.state = nlm_nsm_state; 751 752 retry = 5*hz; 753 for (;;) { 754 client = nlm_host_get_rpc(host); 755 if (!client) 756 return (ENOLCK); /* XXX retry? */ 757 758 if (block) 759 wait_handle = nlm_register_wait_lock(&args.alock, vp); 760 761 xid = atomic_fetchadd_int(&nlm_xid, 1); 762 args.cookie.n_len = sizeof(xid); 763 args.cookie.n_bytes = (char*) &xid; 764 765 stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo); 766 767 CLNT_RELEASE(client); 768 769 if (stat != RPC_SUCCESS) { 770 if (block) 771 nlm_deregister_wait_lock(wait_handle); 772 if (retries) { 773 retries--; 774 continue; 775 } 776 return (EINVAL); 777 } 778 779 /* 780 * Free res.cookie. 781 */ 782 xdr_free((xdrproc_t) xdr_nlm4_res, &res); 783 784 if (block && res.stat.stat != nlm4_blocked) 785 nlm_deregister_wait_lock(wait_handle); 786 787 if (res.stat.stat == nlm4_denied_grace_period) { 788 /* 789 * The server has recently rebooted and is 790 * giving old clients a change to reclaim 791 * their locks. Wait for a few seconds and try 792 * again. 793 */ 794 error = tsleep(&args, PCATCH, "nlmgrace", retry); 795 if (error && error != EWOULDBLOCK) 796 return (error); 797 retry = 2*retry; 798 if (retry > 30*hz) 799 retry = 30*hz; 800 continue; 801 } 802 803 if (block && res.stat.stat == nlm4_blocked) { 804 /* 805 * The server should call us back with a 806 * granted message when the lock succeeds. In 807 * order to deal with broken servers, lost 808 * granted messages and server reboots, we 809 * will also re-try every few seconds. 810 */ 811 error = nlm_wait_lock(wait_handle, retry); 812 if (error == EWOULDBLOCK) { 813 retry = 2*retry; 814 if (retry > 30*hz) 815 retry = 30*hz; 816 continue; 817 } 818 if (error) { 819 /* 820 * We need to call the server to 821 * cancel our lock request. 822 */ 823 nlm4_cancargs cancel; 824 825 memset(&cancel, 0, sizeof(cancel)); 826 827 xid = atomic_fetchadd_int(&nlm_xid, 1); 828 cancel.cookie.n_len = sizeof(xid); 829 cancel.cookie.n_bytes = (char*) &xid; 830 cancel.block = block; 831 cancel.exclusive = exclusive; 832 cancel.alock = args.alock; 833 834 do { 835 client = nlm_host_get_rpc(host); 836 if (!client) 837 /* XXX retry? */ 838 return (ENOLCK); 839 840 stat = nlm_cancel_rpc(vers, &cancel, 841 &res, client, ext, *timo); 842 843 CLNT_RELEASE(client); 844 845 if (stat != RPC_SUCCESS) { 846 /* 847 * We need to cope 848 * with temporary 849 * network partitions 850 * as well as server 851 * reboots. This means 852 * we have to keep 853 * trying to cancel 854 * until the server 855 * wakes up again. 856 */ 857 pause("nlmcancel", 10*hz); 858 } 859 } while (stat != RPC_SUCCESS); 860 861 /* 862 * Free res.cookie. 863 */ 864 xdr_free((xdrproc_t) xdr_nlm4_res, &res); 865 866 switch (res.stat.stat) { 867 case nlm_denied: 868 /* 869 * There was nothing 870 * to cancel. We are 871 * going to go ahead 872 * and assume we got 873 * the lock. 874 */ 875 error = 0; 876 break; 877 878 case nlm4_denied_grace_period: 879 /* 880 * The server has 881 * recently rebooted - 882 * treat this as a 883 * successful 884 * cancellation. 885 */ 886 break; 887 888 case nlm4_granted: 889 /* 890 * We managed to 891 * cancel. 892 */ 893 break; 894 895 default: 896 /* 897 * Broken server 898 * implementation - 899 * can't really do 900 * anything here. 901 */ 902 break; 903 } 904 905 } 906 } else { 907 error = nlm_map_status(res.stat.stat); 908 } 909 910 if (!error && !reclaim) { 911 nlm_record_lock(vp, op, fl, args.alock.svid, 912 nlm_host_get_sysid(host), size); 913 nlm_host_monitor(host, 0); 914 } 915 916 return (error); 917 } 918} 919 920static int 921nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, 922 rpcvers_t vers, struct timeval *timo, int retries, 923 struct vnode *vp, int op, struct flock *fl, int flags, 924 int svid, size_t fhlen, void *fh, off_t size) 925{ 926 struct nlm4_unlockargs args; 927 char oh_space[32]; 928 struct nlm4_res res; 929 u_int xid; 930 CLIENT *client; 931 enum clnt_stat stat; 932 int error; 933 934 memset(&args, 0, sizeof(args)); 935 memset(&res, 0, sizeof(res)); 936 937 error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, 938 &args.alock, oh_space); 939 if (error) 940 return (error); 941 942 for (;;) { 943 client = nlm_host_get_rpc(host); 944 if (!client) 945 return (ENOLCK); /* XXX retry? */ 946 947 xid = atomic_fetchadd_int(&nlm_xid, 1); 948 args.cookie.n_len = sizeof(xid); 949 args.cookie.n_bytes = (char*) &xid; 950 951 stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo); 952 953 CLNT_RELEASE(client); 954 955 if (stat != RPC_SUCCESS) { 956 if (retries) { 957 retries--; 958 continue; 959 } 960 return (EINVAL); 961 } 962 963 /* 964 * Free res.cookie. 965 */ 966 xdr_free((xdrproc_t) xdr_nlm4_res, &res); 967 968 if (res.stat.stat == nlm4_denied_grace_period) { 969 /* 970 * The server has recently rebooted and is 971 * giving old clients a change to reclaim 972 * their locks. Wait for a few seconds and try 973 * again. 974 */ 975 error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); 976 if (error && error != EWOULDBLOCK) 977 return (error); 978 continue; 979 } 980 981 /* 982 * If we are being called via nlm_reclaim (which will 983 * use the F_REMOTE flag), don't record the lock 984 * operation in the local lock manager since the vnode 985 * is going away. 986 */ 987 if (!(flags & F_REMOTE)) 988 nlm_record_lock(vp, op, fl, args.alock.svid, 989 nlm_host_get_sysid(host), size); 990 991 return (0); 992 } 993} 994 995static int 996nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, 997 rpcvers_t vers, struct timeval *timo, int retries, 998 struct vnode *vp, int op, struct flock *fl, int flags, 999 int svid, size_t fhlen, void *fh, off_t size) 1000{ 1001 struct nlm4_testargs args; 1002 char oh_space[32]; 1003 struct nlm4_testres res; 1004 u_int xid; 1005 CLIENT *client; 1006 enum clnt_stat stat; 1007 int exclusive; 1008 int error; 1009 1010 KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK")); 1011 1012 memset(&args, 0, sizeof(args)); 1013 memset(&res, 0, sizeof(res)); 1014 1015 exclusive = (fl->l_type == F_WRLCK); 1016 1017 error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, 1018 &args.alock, oh_space); 1019 if (error) 1020 return (error); 1021 args.exclusive = exclusive; 1022 1023 for (;;) { 1024 client = nlm_host_get_rpc(host); 1025 if (!client) 1026 return (ENOLCK); /* XXX retry? */ 1027 1028 xid = atomic_fetchadd_int(&nlm_xid, 1); 1029 args.cookie.n_len = sizeof(xid); 1030 args.cookie.n_bytes = (char*) &xid; 1031 1032 stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo); 1033 1034 CLNT_RELEASE(client); 1035 1036 if (stat != RPC_SUCCESS) { 1037 if (retries) { 1038 retries--; 1039 continue; 1040 } 1041 return (EINVAL); 1042 } 1043 1044 if (res.stat.stat == nlm4_denied_grace_period) { 1045 /* 1046 * The server has recently rebooted and is 1047 * giving old clients a change to reclaim 1048 * their locks. Wait for a few seconds and try 1049 * again. 1050 */ 1051 xdr_free((xdrproc_t) xdr_nlm4_testres, &res); 1052 error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); 1053 if (error && error != EWOULDBLOCK) 1054 return (error); 1055 continue; 1056 } 1057 1058 if (res.stat.stat == nlm4_denied) { 1059 struct nlm4_holder *h = 1060 &res.stat.nlm4_testrply_u.holder; 1061 fl->l_start = h->l_offset; 1062 fl->l_len = h->l_len; 1063 fl->l_pid = h->svid; 1064 if (h->exclusive) 1065 fl->l_type = F_WRLCK; 1066 else 1067 fl->l_type = F_RDLCK; 1068 fl->l_whence = SEEK_SET; 1069 fl->l_sysid = 0; 1070 } else { 1071 fl->l_type = F_UNLCK; 1072 } 1073 1074 xdr_free((xdrproc_t) xdr_nlm4_testres, &res); 1075 1076 return (0); 1077 } 1078} 1079 1080static int 1081nlm_map_status(nlm4_stats stat) 1082{ 1083 switch (stat) { 1084 case nlm4_granted: 1085 return (0); 1086 1087 case nlm4_denied: 1088 return (EAGAIN); 1089 1090 case nlm4_denied_nolocks: 1091 return (ENOLCK); 1092 1093 case nlm4_deadlck: 1094 return (EDEADLK); 1095 1096 case nlm4_rofs: 1097 return (EROFS); 1098 1099 case nlm4_stale_fh: 1100 return (ESTALE); 1101 1102 case nlm4_fbig: 1103 return (EFBIG); 1104 1105 case nlm4_failed: 1106 return (EACCES); 1107 1108 default: 1109 return (EINVAL); 1110 } 1111} 1112 1113static struct nlm_file_svid * 1114nlm_find_svid(void *id) 1115{ 1116 struct nlm_file_svid *ns, *newns; 1117 int h; 1118 1119 h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE; 1120 1121 mtx_lock(&nlm_svid_lock); 1122 LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { 1123 if (ns->ns_id == id) { 1124 ns->ns_refs++; 1125 break; 1126 } 1127 } 1128 mtx_unlock(&nlm_svid_lock); 1129 if (!ns) { 1130 int svid = alloc_unr(nlm_svid_allocator); 1131 newns = malloc(sizeof(struct nlm_file_svid), M_NLM, 1132 M_WAITOK); 1133 newns->ns_refs = 1; 1134 newns->ns_id = id; 1135 newns->ns_svid = svid; 1136 newns->ns_ucred = NULL; 1137 newns->ns_active = FALSE; 1138 1139 /* 1140 * We need to check for a race with some other 1141 * thread allocating a svid for this file. 1142 */ 1143 mtx_lock(&nlm_svid_lock); 1144 LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { 1145 if (ns->ns_id == id) { 1146 ns->ns_refs++; 1147 break; 1148 } 1149 } 1150 if (ns) { 1151 mtx_unlock(&nlm_svid_lock); 1152 free_unr(nlm_svid_allocator, newns->ns_svid); 1153 free(newns, M_NLM); 1154 } else { 1155 LIST_INSERT_HEAD(&nlm_file_svids[h], newns, 1156 ns_link); 1157 ns = newns; 1158 mtx_unlock(&nlm_svid_lock); 1159 } 1160 } 1161 1162 return (ns); 1163} 1164 1165static void 1166nlm_free_svid(struct nlm_file_svid *ns) 1167{ 1168 1169 mtx_lock(&nlm_svid_lock); 1170 ns->ns_refs--; 1171 if (!ns->ns_refs) { 1172 KASSERT(!ns->ns_active, ("Freeing active SVID")); 1173 LIST_REMOVE(ns, ns_link); 1174 mtx_unlock(&nlm_svid_lock); 1175 free_unr(nlm_svid_allocator, ns->ns_svid); 1176 if (ns->ns_ucred) 1177 crfree(ns->ns_ucred); 1178 free(ns, M_NLM); 1179 } else { 1180 mtx_unlock(&nlm_svid_lock); 1181 } 1182} 1183 1184static int 1185nlm_init_lock(struct flock *fl, int flags, int svid, 1186 rpcvers_t vers, size_t fhlen, void *fh, off_t size, 1187 struct nlm4_lock *lock, char oh_space[32]) 1188{ 1189 size_t oh_len; 1190 off_t start, len; 1191 1192 if (fl->l_whence == SEEK_END) { 1193 if (size > OFF_MAX 1194 || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) 1195 return (EOVERFLOW); 1196 start = size + fl->l_start; 1197 } else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) { 1198 start = fl->l_start; 1199 } else { 1200 return (EINVAL); 1201 } 1202 if (start < 0) 1203 return (EINVAL); 1204 if (fl->l_len < 0) { 1205 len = -fl->l_len; 1206 start -= len; 1207 if (start < 0) 1208 return (EINVAL); 1209 } else { 1210 len = fl->l_len; 1211 } 1212 1213 if (vers == NLM_VERS) { 1214 /* 1215 * Enforce range limits on V1 locks 1216 */ 1217 if (start > 0xffffffffLL || len > 0xffffffffLL) 1218 return (EOVERFLOW); 1219 } 1220 1221 snprintf(oh_space, 32, "%d@%s", svid, hostname); 1222 oh_len = strlen(oh_space); 1223 1224 memset(lock, 0, sizeof(*lock)); 1225 lock->caller_name = hostname; 1226 lock->fh.n_len = fhlen; 1227 lock->fh.n_bytes = fh; 1228 lock->oh.n_len = oh_len; 1229 lock->oh.n_bytes = oh_space; 1230 lock->svid = svid; 1231 lock->l_offset = start; 1232 lock->l_len = len; 1233 1234 return (0); 1235} 1236