nfs_lock.c revision 82204
1300113Sscottl/*- 2300113Sscottl * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. 3300113Sscottl * 4300113Sscottl * Redistribution and use in source and binary forms, with or without 5300113Sscottl * modification, are permitted provided that the following conditions 6300113Sscottl * are met: 7300113Sscottl * 1. Redistributions of source code must retain the above copyright 8300113Sscottl * notice, this list of conditions and the following disclaimer. 9300113Sscottl * 2. Redistributions in binary form must reproduce the above copyright 10300113Sscottl * notice, this list of conditions and the following disclaimer in the 11300113Sscottl * documentation and/or other materials provided with the distribution. 12300113Sscottl * 3. Berkeley Software Design Inc's name may not be used to endorse or 13300113Sscottl * promote products derived from this software without specific prior 14300113Sscottl * written permission. 15300113Sscottl * 16300113Sscottl * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17300113Sscottl * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18300113Sscottl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19300113Sscottl * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20300113Sscottl * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21300113Sscottl * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22300113Sscottl * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23300113Sscottl * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24300113Sscottl * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25300113Sscottl * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26300113Sscottl * SUCH DAMAGE. 27300113Sscottl * 28300113Sscottl * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp 29300113Sscottl * $FreeBSD: head/sys/nfsclient/nfs_lock.c 82204 2001-08-23 16:13:59Z ache $ 30300113Sscottl */ 31300113Sscottl 32300113Sscottl#include <sys/param.h> 33300113Sscottl#include <sys/systm.h> 34300113Sscottl#include <sys/fcntl.h> 35300113Sscottl#include <sys/kernel.h> /* for hz */ 36300113Sscottl#include <sys/lock.h> 37300113Sscottl#include <sys/malloc.h> 38300113Sscottl#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */ 39300113Sscottl#include <sys/mbuf.h> 40300113Sscottl#include <sys/mount.h> 41300113Sscottl#include <sys/namei.h> 42300113Sscottl#include <sys/proc.h> 43300113Sscottl#include <sys/resourcevar.h> 44300113Sscottl#include <sys/socket.h> 45300113Sscottl#include <sys/socket.h> 46300113Sscottl#include <sys/unistd.h> 47300113Sscottl#include <sys/vnode.h> 48300113Sscottl 49300113Sscottl#include <machine/limits.h> 50300113Sscottl 51300113Sscottl#include <net/if.h> 52300113Sscottl 53300113Sscottl#include <nfs/rpcv2.h> 54300113Sscottl#include <nfs/nfsproto.h> 55300113Sscottl#include <nfs/nfs.h> 56300113Sscottl#include <nfs/nfsmount.h> 57300113Sscottl#include <nfs/nfsnode.h> 58300113Sscottl#include <nfs/nfs_lock.h> 59300113Sscottl#include <nfs/nlminfo.h> 60300113Sscottl 61300113Sscottl#define NFSOWNER_1ST_LEVEL_START 1 /* initial entries */ 62300113Sscottl#define NFSOWNER_2ND_LEVEL 256 /* some power of 2 */ 63304704Sshurd 64300113Sscottl#define NFSOWNER(tbl, i) \ 65300113Sscottl (tbl)[(i) / NFSOWNER_2ND_LEVEL][(i) % NFSOWNER_2ND_LEVEL] 66304704Sshurd 67300113Sscottl/* 68300113Sscottl * XXX 69300113Sscottl * We have to let the process know if the call succeeded. I'm using an extra 70300113Sscottl * field in the p_nlminfo field in the proc structure, as it is already for 71300113Sscottl * lockd stuff. 72300113Sscottl */ 73300113Sscottl 74300113Sscottl/* 75300113Sscottl * nfs_advlock -- 76300113Sscottl * NFS advisory byte-level locks. 77300113Sscottl */ 78300113Sscottlint 79300113Sscottlnfs_dolock(ap) 80300113Sscottl struct vop_advlock_args /* { 81300113Sscottl struct vnode *a_vp; 82300113Sscottl caddr_t a_id; 83300113Sscottl int a_op; 84300113Sscottl struct flock *a_fl; 85300113Sscottl int a_flags; 86300113Sscottl } */ *ap; 87300113Sscottl{ 88300113Sscottl LOCKD_MSG msg; 89300113Sscottl struct nameidata nd; 90300113Sscottl struct proc *p; 91300113Sscottl uid_t saved_uid; 92300113Sscottl struct vnode *vp, *wvp; 93300113Sscottl int error, error1; 94300113Sscottl struct flock *fl; 95300113Sscottl int fmode, ioflg; 96300113Sscottl 97300113Sscottl p = curproc; 98300113Sscottl vp = ap->a_vp; 99300113Sscottl fl = ap->a_fl; 100300113Sscottl 101300113Sscottl /* 102300113Sscottl * the NLM protocol doesn't allow the server to return an error 103300113Sscottl * on ranges, so we do it. 104300113Sscottl */ 105300113Sscottl if (fl->l_whence != SEEK_END) { 106300113Sscottl if (fl->l_whence != SEEK_CUR || fl->l_whence != SEEK_SET || 107300113Sscottl fl->l_start < 0 || 108300113Sscottl (fl->l_len < 0 && 109300113Sscottl (fl->l_start == 0 || fl->l_start + fl->l_len < 0))) 110300113Sscottl return (EINVAL); 111300113Sscottl if (fl->l_len > 0 && 112300113Sscottl (fl->l_len - 1 > OFF_MAX - fl->l_start)) 113300113Sscottl return (EOVERFLOW); 114361055Serj } 115361055Serj 116361055Serj /* 117361055Serj * Fill in the information structure. 118361055Serj */ 119361055Serj msg.lm_version = LOCKD_MSG_VERSION; 120300113Sscottl msg.lm_msg_ident.pid = p->p_pid; 121300113Sscottl /* 122300113Sscottl * if there is no nfsowner table yet, allocate one. 123300113Sscottl */ 124300113Sscottl if (p->p_nlminfo == NULL) { 125300113Sscottl MALLOC(p->p_nlminfo, struct nlminfo *, 126300113Sscottl sizeof(struct nlminfo), M_LOCKF, M_WAITOK | M_ZERO); 127300113Sscottl p->p_nlminfo->pid_start = p->p_stats->p_start; 128300113Sscottl } 129300113Sscottl msg.lm_msg_ident.pid_start = p->p_nlminfo->pid_start; 130300113Sscottl msg.lm_msg_ident.msg_seq = ++(p->p_nlminfo->msg_seq); 131300113Sscottl 132300113Sscottl msg.lm_fl = *fl; 133300113Sscottl msg.lm_wait = ap->a_flags & F_WAIT; 134300113Sscottl msg.lm_getlk = ap->a_op == F_GETLK; 135300113Sscottl /* 136300113Sscottl * XXX -- I think this is wrong for anything other AF_INET. 137300113Sscottl */ 138300113Sscottl msg.lm_addr = *(VFSTONFS(vp->v_mount)->nm_nam); 139300113Sscottl msg.lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH; 140300113Sscottl bcopy(VTONFS(vp)->n_fhp, msg.lm_fh, msg.lm_fh_len); 141300113Sscottl msg.lm_nfsv3 = NFS_ISV3(vp); 142300113Sscottl msg.lm_cred = *(p->p_ucred); 143300113Sscottl 144300113Sscottl /* 145300113Sscottl * Open the lock fifo. If for any reason we don't find the fifo, it 146300113Sscottl * means that the lock daemon isn't running. Translate any missing 147300113Sscottl * file error message for the user, otherwise the application will 148300113Sscottl * complain that the user's file is missing, which isn't the case. 149300113Sscottl * Note that we use proc0's cred, so the fifo is opened as root. 150300113Sscottl */ 151300113Sscottl NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, _PATH_LCKFIFO, p); 152300113Sscottl 153300113Sscottl /* 154300113Sscottl * XXX Hack to temporarily allow this process (regardless of it's creds) 155300113Sscottl * to open the fifo we need to write to. vn_open() really should 156300113Sscottl * take a ucred (and once it does, this code should be fixed to use 157300113Sscottl * proc0's ucred. 158300113Sscottl */ 159300113Sscottl saved_uid = p->p_ucred->cr_uid; 160300113Sscottl p->p_ucred->cr_uid = 0; /* temporarly run the vn_open as root */ 161300113Sscottl 162300113Sscottl fmode = FFLAGS(O_WRONLY); 163300113Sscottl error = vn_open(&nd, &fmode, 0); 164300113Sscottl p->p_ucred->cr_uid = saved_uid; 165300113Sscottl if (error != 0) { 166300113Sscottl return (error == ENOENT ? EOPNOTSUPP : error); 167300113Sscottl } 168300113Sscottl wvp = nd.ni_vp; 169300113Sscottl VOP_UNLOCK(wvp, 0, p); /* vn_open leaves it locked */ 170300113Sscottl 171300113Sscottl 172300113Sscottl ioflg = IO_UNIT; 173300113Sscottl for (;;) { 174300113Sscottl VOP_LEASE(wvp, p, proc0.p_ucred, LEASE_WRITE); 175300113Sscottl 176300113Sscottl error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)&msg, sizeof(msg), 0, 177300113Sscottl UIO_SYSSPACE, ioflg, proc0.p_ucred, NULL, p); 178300113Sscottl 179300113Sscottl if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) { 180300113Sscottl break; 181300113Sscottl } 182300113Sscottl /* 183300113Sscottl * If we're locking a file, wait for an answer. Unlocks succeed 184300113Sscottl * immediately. 185300113Sscottl */ 186300113Sscottl if (fl->l_type == F_UNLCK) 187300113Sscottl /* 188300113Sscottl * XXX this isn't exactly correct. The client side 189300113Sscottl * needs to continue sending it's unlock until 190300113Sscottl * it gets a responce back. 191300113Sscottl */ 192300113Sscottl break; 193300113Sscottl 194300113Sscottl /* 195300113Sscottl * retry after 20 seconds if we haven't gotten a responce yet. 196300113Sscottl * This number was picked out of thin air... but is longer 197300113Sscottl * then even a reasonably loaded system should take (at least 198300113Sscottl * on a local network). XXX Probably should use a back-off 199300113Sscottl * scheme. 200300113Sscottl */ 201300113Sscottl if ((error = tsleep((void *)p->p_nlminfo, 202300113Sscottl PCATCH | PUSER, "lockd", 20*hz)) != 0) { 203300113Sscottl if (error == EWOULDBLOCK) { 204333338Sshurd /* 205300113Sscottl * We timed out, so we rewrite the request 206300113Sscottl * to the fifo, but only if it isn't already 207300113Sscottl * full. 208300113Sscottl */ 209333338Sshurd ioflg |= IO_NDELAY; 210333338Sshurd continue; 211333338Sshurd } 212333338Sshurd 213333338Sshurd break; 214300113Sscottl } 215300113Sscottl 216300113Sscottl if (msg.lm_getlk && p->p_nlminfo->retcode == 0) { 217300113Sscottl if (p->p_nlminfo->set_getlk_pid) { 218300113Sscottl fl->l_pid = p->p_nlminfo->getlk_pid; 219300113Sscottl } else { 220300113Sscottl fl->l_type = F_UNLCK; 221300113Sscottl } 222300113Sscottl } 223300113Sscottl error = p->p_nlminfo->retcode; 224300113Sscottl break; 225300113Sscottl } 226300113Sscottl 227300113Sscottl if ((error1 = vn_close(wvp, FWRITE, proc0.p_ucred, p)) && error == 0) 228300113Sscottl return (error1); 229300113Sscottl 230300113Sscottl return (error); 231300113Sscottl} 232300113Sscottl 233300113Sscottl/* 234300113Sscottl * nfslockdans -- 235300113Sscottl * NFS advisory byte-level locks answer from the lock daemon. 236300113Sscottl */ 237300113Sscottlint 238300113Sscottlnfslockdans(p, ansp) 239300113Sscottl struct proc *p; 240300113Sscottl struct lockd_ans *ansp; 241300113Sscottl{ 242300113Sscottl int error; 243333338Sshurd 244300113Sscottl /* Let root, or someone who once was root (lockd generally 245300113Sscottl * switches to the daemon uid once it is done setting up) make 246300113Sscottl * this call. 247300113Sscottl * 248300113Sscottl * XXX This authorization check is probably not right. 249300113Sscottl */ 250300113Sscottl if ((error = suser(p)) != 0 && p->p_ucred->cr_svuid != 0) 251300113Sscottl return (error); 252300113Sscottl 253300113Sscottl /* the version should match, or we're out of sync */ 254300113Sscottl if (ansp->la_vers != LOCKD_ANS_VERSION) 255300113Sscottl return (EINVAL); 256300113Sscottl 257300113Sscottl /* Find the process, set its return errno and wake it up. */ 258300113Sscottl if ((p = pfind(ansp->la_msg_ident.pid)) == NULL) 259300113Sscottl return (ESRCH); 260300113Sscottl 261300113Sscottl /* verify the pid hasn't been reused (if we can), and it isn't waiting 262300113Sscottl * for an answer from a more recent request. We return an EPIPE if 263300113Sscottl * the match fails, because we've already used ESRCH above, and this 264300113Sscottl * is sort of like writing on a pipe after the reader has closed it. 265300113Sscottl */ 266300113Sscottl if (p->p_nlminfo == NULL || 267300113Sscottl ((ansp->la_msg_ident.msg_seq != -1) && 268300113Sscottl (timevalcmp(&p->p_nlminfo->pid_start, 269300113Sscottl &ansp->la_msg_ident.pid_start, !=) || 270300113Sscottl p->p_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq))) { 271300113Sscottl PROC_UNLOCK(p); 272300113Sscottl return (EPIPE); 273300113Sscottl } 274300113Sscottl 275300113Sscottl p->p_nlminfo->retcode = ansp->la_errno; 276300113Sscottl p->p_nlminfo->set_getlk_pid = ansp->la_set_getlk_pid; 277300113Sscottl p->p_nlminfo->getlk_pid = ansp->la_getlk_pid; 278300113Sscottl 279300113Sscottl (void)wakeup((void *)p->p_nlminfo); 280300113Sscottl 281300113Sscottl PROC_UNLOCK(p); 282300113Sscottl return (0); 283300113Sscottl} 284300113Sscottl