nfs_lock.c revision 216931
175631Salfred/*-
275631Salfred * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
375631Salfred *
475631Salfred * Redistribution and use in source and binary forms, with or without
575631Salfred * modification, are permitted provided that the following conditions
675631Salfred * are met:
775631Salfred * 1. Redistributions of source code must retain the above copyright
875631Salfred *    notice, this list of conditions and the following disclaimer.
975631Salfred * 2. Redistributions in binary form must reproduce the above copyright
1075631Salfred *    notice, this list of conditions and the following disclaimer in the
1175631Salfred *    documentation and/or other materials provided with the distribution.
1275631Salfred * 3. Berkeley Software Design Inc's name may not be used to endorse or
1375631Salfred *    promote products derived from this software without specific prior
1475631Salfred *    written permission.
1575631Salfred *
1675631Salfred * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1775631Salfred * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1875631Salfred * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1975631Salfred * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2075631Salfred * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2175631Salfred * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2275631Salfred * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2375631Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2475631Salfred * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2575631Salfred * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2675631Salfred * SUCH DAMAGE.
2775631Salfred *
2875631Salfred *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
2975631Salfred */
3075631Salfred
3183651Speter#include <sys/cdefs.h>
3283651Speter__FBSDID("$FreeBSD: head/sys/nfs/nfs_lock.c 216931 2011-01-03 20:37:31Z rmacklem $");
3383651Speter
3475631Salfred#include <sys/param.h>
3575631Salfred#include <sys/systm.h>
36138430Sphk#include <sys/conf.h>
3775631Salfred#include <sys/fcntl.h>
3876166Smarkm#include <sys/kernel.h>		/* for hz */
39114216Skan#include <sys/limits.h>
4076166Smarkm#include <sys/lock.h>
4175631Salfred#include <sys/malloc.h>
4276166Smarkm#include <sys/lockf.h>		/* for hz */ /* Must come after sys/malloc.h */
4375631Salfred#include <sys/mbuf.h>
4475631Salfred#include <sys/mount.h>
4575631Salfred#include <sys/namei.h>
46168931Srwatson#include <sys/priv.h>
4775631Salfred#include <sys/proc.h>
4876166Smarkm#include <sys/resourcevar.h>
4975631Salfred#include <sys/socket.h>
5076166Smarkm#include <sys/socket.h>
5175631Salfred#include <sys/unistd.h>
5275631Salfred#include <sys/vnode.h>
5375631Salfred
5475631Salfred#include <net/if.h>
5575631Salfred
5675631Salfred#include <nfs/nfsproto.h>
57210455Srmacklem#include <nfs/nfs_lock.h>
5883651Speter#include <nfsclient/nfs.h>
5983651Speter#include <nfsclient/nfsmount.h>
6083651Speter#include <nfsclient/nfsnode.h>
6183651Speter#include <nfsclient/nlminfo.h>
6275631Salfred
63151695Sglebiusextern void (*nlminfo_release_p)(struct proc *p);
64151695Sglebius
65214048Srmacklemvop_advlock_t	*nfs_advlock_p = nfs_dolock;
66214048Srmacklemvop_reclaim_t	*nfs_reclaim_p = NULL;
67214048Srmacklem
68151897SrwatsonMALLOC_DEFINE(M_NFSLOCK, "nfsclient_lock", "NFS lock request");
69151897SrwatsonMALLOC_DEFINE(M_NLMINFO, "nfsclient_nlminfo", "NFS lock process structure");
70138430Sphk
71138430Sphkstatic int nfslockdans(struct thread *td, struct lockd_ans *ansp);
72151695Sglebiusstatic void nlminfo_release(struct proc *p);
7375631Salfred/*
74138430Sphk * --------------------------------------------------------------------
75138430Sphk * A miniature device driver which the userland uses to talk to us.
76138430Sphk *
77138430Sphk */
78138430Sphk
79138430Sphkstatic struct cdev *nfslock_dev;
80138430Sphkstatic struct mtx nfslock_mtx;
81138430Sphkstatic int nfslock_isopen;
82138430Sphkstatic TAILQ_HEAD(,__lock_msg)	nfslock_list;
83138430Sphk
84138430Sphkstatic int
85138430Sphknfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
86138430Sphk{
87138430Sphk	int error;
88138430Sphk
89168931Srwatson	error = priv_check(td, PRIV_NFS_LOCKD);
90168931Srwatson	if (error)
91168931Srwatson		return (error);
92168931Srwatson
93138430Sphk	mtx_lock(&nfslock_mtx);
94138430Sphk	if (!nfslock_isopen) {
95138430Sphk		error = 0;
96138430Sphk		nfslock_isopen = 1;
97138430Sphk	} else {
98138430Sphk		error = EOPNOTSUPP;
99138430Sphk	}
100138430Sphk	mtx_unlock(&nfslock_mtx);
101138430Sphk
102138430Sphk	return (error);
103138430Sphk}
104138430Sphk
105138430Sphkstatic int
106138430Sphknfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
107138430Sphk{
108138430Sphk	struct __lock_msg *lm;
109138430Sphk
110138430Sphk	mtx_lock(&nfslock_mtx);
111138430Sphk	nfslock_isopen = 0;
112138430Sphk	while (!TAILQ_EMPTY(&nfslock_list)) {
113138430Sphk		lm = TAILQ_FIRST(&nfslock_list);
114138430Sphk		/* XXX: answer request */
115138430Sphk		TAILQ_REMOVE(&nfslock_list, lm, lm_link);
116138430Sphk		free(lm, M_NFSLOCK);
117138430Sphk	}
118138430Sphk	mtx_unlock(&nfslock_mtx);
119138430Sphk	return (0);
120138430Sphk}
121138430Sphk
122138430Sphkstatic int
123138430Sphknfslock_read(struct cdev *dev, struct uio *uio, int ioflag)
124138430Sphk{
125138430Sphk	int error;
126138430Sphk	struct __lock_msg *lm;
127138430Sphk
128138430Sphk	if (uio->uio_resid != sizeof *lm)
129138430Sphk		return (EOPNOTSUPP);
130138430Sphk	lm = NULL;
131138430Sphk	error = 0;
132138430Sphk	mtx_lock(&nfslock_mtx);
133138430Sphk	while (TAILQ_EMPTY(&nfslock_list)) {
134138430Sphk		error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH,
135138430Sphk		    "nfslockd", 0);
136138430Sphk		if (error)
137138430Sphk			break;
138138430Sphk	}
139138430Sphk	if (!error) {
140138430Sphk		lm = TAILQ_FIRST(&nfslock_list);
141138430Sphk		TAILQ_REMOVE(&nfslock_list, lm, lm_link);
142138430Sphk	}
143138430Sphk	mtx_unlock(&nfslock_mtx);
144138430Sphk	if (!error) {
145138430Sphk		error = uiomove(lm, sizeof *lm, uio);
146138430Sphk		free(lm, M_NFSLOCK);
147138430Sphk	}
148138430Sphk	return (error);
149138430Sphk}
150138430Sphk
151138430Sphkstatic int
152138430Sphknfslock_write(struct cdev *dev, struct uio *uio, int ioflag)
153138430Sphk{
154138430Sphk	struct lockd_ans la;
155138430Sphk	int error;
156138430Sphk
157138430Sphk	if (uio->uio_resid != sizeof la)
158138430Sphk		return (EOPNOTSUPP);
159138430Sphk	error = uiomove(&la, sizeof la, uio);
160138430Sphk	if (!error)
161138430Sphk		error = nfslockdans(curthread, &la);
162138430Sphk	return (error);
163138430Sphk}
164138430Sphk
165138430Sphkstatic int
166138430Sphknfslock_send(struct __lock_msg *lm)
167138430Sphk{
168138430Sphk	struct __lock_msg *lm2;
169138430Sphk	int error;
170138430Sphk
171138430Sphk	error = 0;
172138430Sphk	lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK);
173138430Sphk	mtx_lock(&nfslock_mtx);
174138430Sphk	if (nfslock_isopen) {
175138430Sphk		memcpy(lm2, lm, sizeof *lm2);
176138430Sphk		TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link);
177138430Sphk		wakeup(&nfslock_list);
178138430Sphk	} else {
179138430Sphk		error = EOPNOTSUPP;
180138430Sphk	}
181138430Sphk	mtx_unlock(&nfslock_mtx);
182138430Sphk	if (error)
183138430Sphk		free(lm2, M_NFSLOCK);
184138430Sphk	return (error);
185138430Sphk}
186138430Sphk
187138430Sphkstatic struct cdevsw nfslock_cdevsw = {
188138430Sphk	.d_version =	D_VERSION,
189138430Sphk	.d_open =	nfslock_open,
190138430Sphk	.d_close =	nfslock_close,
191138430Sphk	.d_read =	nfslock_read,
192138430Sphk	.d_write =	nfslock_write,
193138430Sphk	.d_name =	"nfslock"
194138430Sphk};
195138430Sphk
196138430Sphkstatic int
197138430Sphknfslock_modevent(module_t mod __unused, int type, void *data __unused)
198138430Sphk{
199138430Sphk
200138430Sphk	switch (type) {
201138430Sphk	case MOD_LOAD:
202138430Sphk		if (bootverbose)
203138430Sphk			printf("nfslock: pseudo-device\n");
204138430Sphk		mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF);
205138430Sphk		TAILQ_INIT(&nfslock_list);
206151695Sglebius		nlminfo_release_p = nlminfo_release;
207138430Sphk		nfslock_dev = make_dev(&nfslock_cdevsw, 0,
208138430Sphk		    UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV);
209138430Sphk		return (0);
210138430Sphk	default:
211138430Sphk		return (EOPNOTSUPP);
212138430Sphk	}
213138430Sphk}
214138430Sphk
215138430SphkDEV_MODULE(nfslock, nfslock_modevent, NULL);
216138430SphkMODULE_VERSION(nfslock, 1);
217138430Sphk
218138430Sphk
219138430Sphk/*
22075631Salfred * XXX
22175631Salfred * We have to let the process know if the call succeeded.  I'm using an extra
22283651Speter * field in the p_nlminfo field in the proc structure, as it is already for
22375631Salfred * lockd stuff.
22475631Salfred */
22575631Salfred
22675631Salfred/*
22775631Salfred * nfs_advlock --
22875631Salfred *      NFS advisory byte-level locks.
229178243Skib *
230178243Skib * The vnode shall be (shared) locked on the entry, it is
231178243Skib * unconditionally unlocked after.
23275631Salfred */
23375631Salfredint
23483651Speternfs_dolock(struct vop_advlock_args *ap)
23575631Salfred{
23675631Salfred	LOCKD_MSG msg;
23783366Sjulian	struct thread *td;
238138430Sphk	struct vnode *vp;
239138430Sphk	int error;
24075631Salfred	struct flock *fl;
24183366Sjulian	struct proc *p;
242214048Srmacklem	struct nfsmount *nmp;
24375631Salfred
24483366Sjulian	td = curthread;
24583366Sjulian	p = td->td_proc;
24683366Sjulian
24775631Salfred	vp = ap->a_vp;
24875631Salfred	fl = ap->a_fl;
249214048Srmacklem	nmp = VFSTONFS(vp->v_mount);
25075631Salfred
251178243Skib	ASSERT_VOP_LOCKED(vp, "nfs_dolock");
252178243Skib
253214048Srmacklem	nmp->nm_getinfo(vp, msg.lm_fh, &msg.lm_fh_len, &msg.lm_addr,
254216931Srmacklem	    &msg.lm_nfsv3, NULL, NULL);
255178243Skib	VOP_UNLOCK(vp, 0);
256178243Skib
25775631Salfred	/*
25875631Salfred	 * the NLM protocol doesn't allow the server to return an error
25982174Sache	 * on ranges, so we do it.
26075631Salfred	 */
26182194Sache	if (fl->l_whence != SEEK_END) {
26282213Sache		if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
26382204Sache		    fl->l_start < 0 ||
26482204Sache		    (fl->l_len < 0 &&
26582204Sache		     (fl->l_start == 0 || fl->l_start + fl->l_len < 0)))
26682194Sache			return (EINVAL);
26782204Sache		if (fl->l_len > 0 &&
26882204Sache			 (fl->l_len - 1 > OFF_MAX - fl->l_start))
26982194Sache			return (EOVERFLOW);
27082194Sache	}
27175631Salfred
27275631Salfred	/*
27375631Salfred	 * Fill in the information structure.
27475631Salfred	 */
27575631Salfred	msg.lm_version = LOCKD_MSG_VERSION;
27675631Salfred	msg.lm_msg_ident.pid = p->p_pid;
277178243Skib
278178243Skib	mtx_lock(&Giant);
27975631Salfred	/*
28075631Salfred	 * if there is no nfsowner table yet, allocate one.
28175631Salfred	 */
28275631Salfred	if (p->p_nlminfo == NULL) {
283184214Sdes		p->p_nlminfo = malloc(sizeof(struct nlminfo),
284184214Sdes		    M_NLMINFO, M_WAITOK | M_ZERO);
28575631Salfred		p->p_nlminfo->pid_start = p->p_stats->p_start;
286114434Sdes		timevaladd(&p->p_nlminfo->pid_start, &boottime);
28775631Salfred	}
28875631Salfred	msg.lm_msg_ident.pid_start = p->p_nlminfo->pid_start;
28975631Salfred	msg.lm_msg_ident.msg_seq = ++(p->p_nlminfo->msg_seq);
29075631Salfred
29175631Salfred	msg.lm_fl = *fl;
29275631Salfred	msg.lm_wait = ap->a_flags & F_WAIT;
29375631Salfred	msg.lm_getlk = ap->a_op == F_GETLK;
294101947Salfred	cru2x(td->td_ucred, &msg.lm_cred);
29575631Salfred
29675631Salfred	for (;;) {
297138430Sphk		error = nfslock_send(&msg);
298138430Sphk		if (error)
299178243Skib			goto out;
30075631Salfred
301138430Sphk		/* Unlocks succeed immediately.  */
30275631Salfred		if (fl->l_type == F_UNLCK)
303178243Skib			goto out;
30475631Salfred
30575631Salfred		/*
306161371Sthomas		 * Retry after 20 seconds if we haven't gotten a response yet.
30775631Salfred		 * This number was picked out of thin air... but is longer
30875631Salfred		 * then even a reasonably loaded system should take (at least
30983651Speter		 * on a local network).  XXX Probably should use a back-off
31075631Salfred		 * scheme.
311116185Srwatson		 *
312116185Srwatson		 * XXX: No PCATCH here since we currently have no useful
313116185Srwatson		 * way to signal to the userland rpc.lockd that the request
314116185Srwatson		 * has been aborted.  Once the rpc.lockd implementation
315116185Srwatson		 * can handle aborts, and we report them properly,
316116185Srwatson		 * PCATCH can be put back.  In the mean time, if we did
317116185Srwatson		 * permit aborting, the lock attempt would "get lost"
318116185Srwatson		 * and the lock would get stuck in the locked state.
31975631Salfred		 */
320115415Srwatson		error = tsleep(p->p_nlminfo, PUSER, "lockd", 20*hz);
321107104Salfred		if (error != 0) {
32275631Salfred			if (error == EWOULDBLOCK) {
32375631Salfred				/*
32475631Salfred				 * We timed out, so we rewrite the request
325154316Srwatson				 * to the fifo.
32675631Salfred				 */
32775631Salfred				continue;
32875631Salfred			}
32975631Salfred
33075631Salfred			break;
33175631Salfred		}
33275631Salfred
33375631Salfred		if (msg.lm_getlk && p->p_nlminfo->retcode == 0) {
33475631Salfred			if (p->p_nlminfo->set_getlk_pid) {
335177633Sdfr				fl->l_sysid = 0; /* XXX */
33675631Salfred				fl->l_pid = p->p_nlminfo->getlk_pid;
33775631Salfred			} else {
33875631Salfred				fl->l_type = F_UNLCK;
33975631Salfred			}
34075631Salfred		}
34175631Salfred		error = p->p_nlminfo->retcode;
34275631Salfred		break;
34375631Salfred	}
344178243Skib out:
345178243Skib	mtx_unlock(&Giant);
346138430Sphk	return (error);
34775631Salfred}
34875631Salfred
34975631Salfred/*
35075631Salfred * nfslockdans --
35175631Salfred *      NFS advisory byte-level locks answer from the lock daemon.
35275631Salfred */
353138430Sphkstatic int
35486363Srwatsonnfslockdans(struct thread *td, struct lockd_ans *ansp)
35575631Salfred{
35686363Srwatson	struct proc *targetp;
35775631Salfred
35875631Salfred	/* the version should match, or we're out of sync */
35975631Salfred	if (ansp->la_vers != LOCKD_ANS_VERSION)
36075631Salfred		return (EINVAL);
36175631Salfred
36275631Salfred	/* Find the process, set its return errno and wake it up. */
36386363Srwatson	if ((targetp = pfind(ansp->la_msg_ident.pid)) == NULL)
36475631Salfred		return (ESRCH);
36575631Salfred
36683651Speter	/* verify the pid hasn't been reused (if we can), and it isn't waiting
36775631Salfred	 * for an answer from a more recent request.  We return an EPIPE if
36875631Salfred	 * the match fails, because we've already used ESRCH above, and this
36975631Salfred	 * is sort of like writing on a pipe after the reader has closed it.
37075631Salfred	 */
37186363Srwatson	if (targetp->p_nlminfo == NULL ||
37275631Salfred	    ((ansp->la_msg_ident.msg_seq != -1) &&
37386363Srwatson	      (timevalcmp(&targetp->p_nlminfo->pid_start,
37475631Salfred			&ansp->la_msg_ident.pid_start, !=) ||
37586363Srwatson	       targetp->p_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq))) {
37686363Srwatson		PROC_UNLOCK(targetp);
37775631Salfred		return (EPIPE);
37877563Sjake	}
37975631Salfred
38086363Srwatson	targetp->p_nlminfo->retcode = ansp->la_errno;
38186363Srwatson	targetp->p_nlminfo->set_getlk_pid = ansp->la_set_getlk_pid;
38286363Srwatson	targetp->p_nlminfo->getlk_pid = ansp->la_getlk_pid;
38375631Salfred
384107104Salfred	wakeup(targetp->p_nlminfo);
38575631Salfred
38686363Srwatson	PROC_UNLOCK(targetp);
38775631Salfred	return (0);
38875631Salfred}
389138430Sphk
390151695Sglebius/*
391151695Sglebius * Free nlminfo attached to process.
392151695Sglebius */
393151695Sglebiusvoid
394151695Sglebiusnlminfo_release(struct proc *p)
395151695Sglebius{
396151695Sglebius	free(p->p_nlminfo, M_NLMINFO);
397151695Sglebius	p->p_nlminfo = NULL;
398151695Sglebius}
399