175631Salfred/*-
275631Salfred * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
375631Salfred *
475631Salfred * Redistribution and use in source and binary forms, with or without
575631Salfred * modification, are permitted provided that the following conditions
675631Salfred * are met:
775631Salfred * 1. Redistributions of source code must retain the above copyright
875631Salfred *    notice, this list of conditions and the following disclaimer.
975631Salfred * 2. Redistributions in binary form must reproduce the above copyright
1075631Salfred *    notice, this list of conditions and the following disclaimer in the
1175631Salfred *    documentation and/or other materials provided with the distribution.
1275631Salfred * 3. Berkeley Software Design Inc's name may not be used to endorse or
1375631Salfred *    promote products derived from this software without specific prior
1475631Salfred *    written permission.
1575631Salfred *
1675631Salfred * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1775631Salfred * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1875631Salfred * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1975631Salfred * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2075631Salfred * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2175631Salfred * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2275631Salfred * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2375631Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2475631Salfred * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2575631Salfred * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2675631Salfred * SUCH DAMAGE.
2775631Salfred *
2875631Salfred *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
2975631Salfred */
3075631Salfred
3183651Speter#include <sys/cdefs.h>
3283651Speter__FBSDID("$FreeBSD$");
3383651Speter
3475631Salfred#include <sys/param.h>
3575631Salfred#include <sys/systm.h>
36138430Sphk#include <sys/conf.h>
3775631Salfred#include <sys/fcntl.h>
3876166Smarkm#include <sys/kernel.h>		/* for hz */
39114216Skan#include <sys/limits.h>
4076166Smarkm#include <sys/lock.h>
4175631Salfred#include <sys/malloc.h>
4276166Smarkm#include <sys/lockf.h>		/* for hz */ /* Must come after sys/malloc.h */
4375631Salfred#include <sys/mbuf.h>
4475631Salfred#include <sys/mount.h>
4575631Salfred#include <sys/namei.h>
46168931Srwatson#include <sys/priv.h>
4775631Salfred#include <sys/proc.h>
4876166Smarkm#include <sys/resourcevar.h>
4975631Salfred#include <sys/socket.h>
5076166Smarkm#include <sys/socket.h>
5175631Salfred#include <sys/unistd.h>
5275631Salfred#include <sys/vnode.h>
5375631Salfred
5475631Salfred#include <net/if.h>
5575631Salfred
5675631Salfred#include <nfs/nfsproto.h>
57210455Srmacklem#include <nfs/nfs_lock.h>
5883651Speter#include <nfsclient/nfs.h>
5983651Speter#include <nfsclient/nfsmount.h>
6083651Speter#include <nfsclient/nfsnode.h>
6183651Speter#include <nfsclient/nlminfo.h>
6275631Salfred
63151695Sglebiusextern void (*nlminfo_release_p)(struct proc *p);
64151695Sglebius
65214048Srmacklemvop_advlock_t	*nfs_advlock_p = nfs_dolock;
66214048Srmacklemvop_reclaim_t	*nfs_reclaim_p = NULL;
67214048Srmacklem
68227293Sedstatic MALLOC_DEFINE(M_NFSLOCK, "nfsclient_lock", "NFS lock request");
69227293Sedstatic MALLOC_DEFINE(M_NLMINFO, "nfsclient_nlminfo",
70227293Sed    "NFS lock process structure");
71138430Sphk
72138430Sphkstatic int nfslockdans(struct thread *td, struct lockd_ans *ansp);
73151695Sglebiusstatic void nlminfo_release(struct proc *p);
7475631Salfred/*
75138430Sphk * --------------------------------------------------------------------
76138430Sphk * A miniature device driver which the userland uses to talk to us.
77138430Sphk *
78138430Sphk */
79138430Sphk
80138430Sphkstatic struct cdev *nfslock_dev;
81138430Sphkstatic struct mtx nfslock_mtx;
82138430Sphkstatic int nfslock_isopen;
83138430Sphkstatic TAILQ_HEAD(,__lock_msg)	nfslock_list;
84138430Sphk
85138430Sphkstatic int
86138430Sphknfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
87138430Sphk{
88138430Sphk	int error;
89138430Sphk
90168931Srwatson	error = priv_check(td, PRIV_NFS_LOCKD);
91168931Srwatson	if (error)
92168931Srwatson		return (error);
93168931Srwatson
94138430Sphk	mtx_lock(&nfslock_mtx);
95138430Sphk	if (!nfslock_isopen) {
96138430Sphk		error = 0;
97138430Sphk		nfslock_isopen = 1;
98138430Sphk	} else {
99138430Sphk		error = EOPNOTSUPP;
100138430Sphk	}
101138430Sphk	mtx_unlock(&nfslock_mtx);
102138430Sphk
103138430Sphk	return (error);
104138430Sphk}
105138430Sphk
106138430Sphkstatic int
107138430Sphknfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
108138430Sphk{
109138430Sphk	struct __lock_msg *lm;
110138430Sphk
111138430Sphk	mtx_lock(&nfslock_mtx);
112138430Sphk	nfslock_isopen = 0;
113138430Sphk	while (!TAILQ_EMPTY(&nfslock_list)) {
114138430Sphk		lm = TAILQ_FIRST(&nfslock_list);
115138430Sphk		/* XXX: answer request */
116138430Sphk		TAILQ_REMOVE(&nfslock_list, lm, lm_link);
117138430Sphk		free(lm, M_NFSLOCK);
118138430Sphk	}
119138430Sphk	mtx_unlock(&nfslock_mtx);
120138430Sphk	return (0);
121138430Sphk}
122138430Sphk
123138430Sphkstatic int
124138430Sphknfslock_read(struct cdev *dev, struct uio *uio, int ioflag)
125138430Sphk{
126138430Sphk	int error;
127138430Sphk	struct __lock_msg *lm;
128138430Sphk
129138430Sphk	if (uio->uio_resid != sizeof *lm)
130138430Sphk		return (EOPNOTSUPP);
131138430Sphk	lm = NULL;
132138430Sphk	error = 0;
133138430Sphk	mtx_lock(&nfslock_mtx);
134138430Sphk	while (TAILQ_EMPTY(&nfslock_list)) {
135138430Sphk		error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH,
136138430Sphk		    "nfslockd", 0);
137138430Sphk		if (error)
138138430Sphk			break;
139138430Sphk	}
140138430Sphk	if (!error) {
141138430Sphk		lm = TAILQ_FIRST(&nfslock_list);
142138430Sphk		TAILQ_REMOVE(&nfslock_list, lm, lm_link);
143138430Sphk	}
144138430Sphk	mtx_unlock(&nfslock_mtx);
145138430Sphk	if (!error) {
146138430Sphk		error = uiomove(lm, sizeof *lm, uio);
147138430Sphk		free(lm, M_NFSLOCK);
148138430Sphk	}
149138430Sphk	return (error);
150138430Sphk}
151138430Sphk
152138430Sphkstatic int
153138430Sphknfslock_write(struct cdev *dev, struct uio *uio, int ioflag)
154138430Sphk{
155138430Sphk	struct lockd_ans la;
156138430Sphk	int error;
157138430Sphk
158138430Sphk	if (uio->uio_resid != sizeof la)
159138430Sphk		return (EOPNOTSUPP);
160138430Sphk	error = uiomove(&la, sizeof la, uio);
161138430Sphk	if (!error)
162138430Sphk		error = nfslockdans(curthread, &la);
163138430Sphk	return (error);
164138430Sphk}
165138430Sphk
166138430Sphkstatic int
167138430Sphknfslock_send(struct __lock_msg *lm)
168138430Sphk{
169138430Sphk	struct __lock_msg *lm2;
170138430Sphk	int error;
171138430Sphk
172138430Sphk	error = 0;
173138430Sphk	lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK);
174138430Sphk	mtx_lock(&nfslock_mtx);
175138430Sphk	if (nfslock_isopen) {
176138430Sphk		memcpy(lm2, lm, sizeof *lm2);
177138430Sphk		TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link);
178138430Sphk		wakeup(&nfslock_list);
179138430Sphk	} else {
180138430Sphk		error = EOPNOTSUPP;
181138430Sphk	}
182138430Sphk	mtx_unlock(&nfslock_mtx);
183138430Sphk	if (error)
184138430Sphk		free(lm2, M_NFSLOCK);
185138430Sphk	return (error);
186138430Sphk}
187138430Sphk
188138430Sphkstatic struct cdevsw nfslock_cdevsw = {
189138430Sphk	.d_version =	D_VERSION,
190138430Sphk	.d_open =	nfslock_open,
191138430Sphk	.d_close =	nfslock_close,
192138430Sphk	.d_read =	nfslock_read,
193138430Sphk	.d_write =	nfslock_write,
194138430Sphk	.d_name =	"nfslock"
195138430Sphk};
196138430Sphk
197138430Sphkstatic int
198138430Sphknfslock_modevent(module_t mod __unused, int type, void *data __unused)
199138430Sphk{
200138430Sphk
201138430Sphk	switch (type) {
202138430Sphk	case MOD_LOAD:
203138430Sphk		if (bootverbose)
204138430Sphk			printf("nfslock: pseudo-device\n");
205138430Sphk		mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF);
206138430Sphk		TAILQ_INIT(&nfslock_list);
207151695Sglebius		nlminfo_release_p = nlminfo_release;
208138430Sphk		nfslock_dev = make_dev(&nfslock_cdevsw, 0,
209138430Sphk		    UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV);
210138430Sphk		return (0);
211138430Sphk	default:
212138430Sphk		return (EOPNOTSUPP);
213138430Sphk	}
214138430Sphk}
215138430Sphk
216138430SphkDEV_MODULE(nfslock, nfslock_modevent, NULL);
217138430SphkMODULE_VERSION(nfslock, 1);
218138430Sphk
219138430Sphk
220138430Sphk/*
22175631Salfred * XXX
22275631Salfred * We have to let the process know if the call succeeded.  I'm using an extra
22383651Speter * field in the p_nlminfo field in the proc structure, as it is already for
22475631Salfred * lockd stuff.
22575631Salfred */
22675631Salfred
22775631Salfred/*
22875631Salfred * nfs_advlock --
22975631Salfred *      NFS advisory byte-level locks.
230178243Skib *
231178243Skib * The vnode shall be (shared) locked on the entry, it is
232178243Skib * unconditionally unlocked after.
23375631Salfred */
23475631Salfredint
23583651Speternfs_dolock(struct vop_advlock_args *ap)
23675631Salfred{
23775631Salfred	LOCKD_MSG msg;
23883366Sjulian	struct thread *td;
239138430Sphk	struct vnode *vp;
240138430Sphk	int error;
24175631Salfred	struct flock *fl;
24283366Sjulian	struct proc *p;
243214048Srmacklem	struct nfsmount *nmp;
24475631Salfred
24583366Sjulian	td = curthread;
24683366Sjulian	p = td->td_proc;
24783366Sjulian
24875631Salfred	vp = ap->a_vp;
24975631Salfred	fl = ap->a_fl;
250214048Srmacklem	nmp = VFSTONFS(vp->v_mount);
25175631Salfred
252178243Skib	ASSERT_VOP_LOCKED(vp, "nfs_dolock");
253178243Skib
254214048Srmacklem	nmp->nm_getinfo(vp, msg.lm_fh, &msg.lm_fh_len, &msg.lm_addr,
255216931Srmacklem	    &msg.lm_nfsv3, NULL, NULL);
256178243Skib	VOP_UNLOCK(vp, 0);
257178243Skib
25875631Salfred	/*
25975631Salfred	 * the NLM protocol doesn't allow the server to return an error
26082174Sache	 * on ranges, so we do it.
26175631Salfred	 */
26282194Sache	if (fl->l_whence != SEEK_END) {
26382213Sache		if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
26482204Sache		    fl->l_start < 0 ||
26582204Sache		    (fl->l_len < 0 &&
26682204Sache		     (fl->l_start == 0 || fl->l_start + fl->l_len < 0)))
26782194Sache			return (EINVAL);
26882204Sache		if (fl->l_len > 0 &&
26982204Sache			 (fl->l_len - 1 > OFF_MAX - fl->l_start))
27082194Sache			return (EOVERFLOW);
27182194Sache	}
27275631Salfred
27375631Salfred	/*
27475631Salfred	 * Fill in the information structure.
27575631Salfred	 */
27675631Salfred	msg.lm_version = LOCKD_MSG_VERSION;
27775631Salfred	msg.lm_msg_ident.pid = p->p_pid;
278178243Skib
279178243Skib	mtx_lock(&Giant);
28075631Salfred	/*
28175631Salfred	 * if there is no nfsowner table yet, allocate one.
28275631Salfred	 */
28375631Salfred	if (p->p_nlminfo == NULL) {
284184214Sdes		p->p_nlminfo = malloc(sizeof(struct nlminfo),
285184214Sdes		    M_NLMINFO, M_WAITOK | M_ZERO);
28675631Salfred		p->p_nlminfo->pid_start = p->p_stats->p_start;
287114434Sdes		timevaladd(&p->p_nlminfo->pid_start, &boottime);
28875631Salfred	}
28975631Salfred	msg.lm_msg_ident.pid_start = p->p_nlminfo->pid_start;
29075631Salfred	msg.lm_msg_ident.msg_seq = ++(p->p_nlminfo->msg_seq);
29175631Salfred
29275631Salfred	msg.lm_fl = *fl;
29375631Salfred	msg.lm_wait = ap->a_flags & F_WAIT;
29475631Salfred	msg.lm_getlk = ap->a_op == F_GETLK;
295101947Salfred	cru2x(td->td_ucred, &msg.lm_cred);
29675631Salfred
29775631Salfred	for (;;) {
298138430Sphk		error = nfslock_send(&msg);
299138430Sphk		if (error)
300178243Skib			goto out;
30175631Salfred
302138430Sphk		/* Unlocks succeed immediately.  */
30375631Salfred		if (fl->l_type == F_UNLCK)
304178243Skib			goto out;
30575631Salfred
30675631Salfred		/*
307161371Sthomas		 * Retry after 20 seconds if we haven't gotten a response yet.
30875631Salfred		 * This number was picked out of thin air... but is longer
30975631Salfred		 * then even a reasonably loaded system should take (at least
31083651Speter		 * on a local network).  XXX Probably should use a back-off
31175631Salfred		 * scheme.
312116185Srwatson		 *
313116185Srwatson		 * XXX: No PCATCH here since we currently have no useful
314116185Srwatson		 * way to signal to the userland rpc.lockd that the request
315116185Srwatson		 * has been aborted.  Once the rpc.lockd implementation
316116185Srwatson		 * can handle aborts, and we report them properly,
317116185Srwatson		 * PCATCH can be put back.  In the mean time, if we did
318116185Srwatson		 * permit aborting, the lock attempt would "get lost"
319116185Srwatson		 * and the lock would get stuck in the locked state.
32075631Salfred		 */
321115415Srwatson		error = tsleep(p->p_nlminfo, PUSER, "lockd", 20*hz);
322107104Salfred		if (error != 0) {
32375631Salfred			if (error == EWOULDBLOCK) {
32475631Salfred				/*
32575631Salfred				 * We timed out, so we rewrite the request
326154316Srwatson				 * to the fifo.
32775631Salfred				 */
32875631Salfred				continue;
32975631Salfred			}
33075631Salfred
33175631Salfred			break;
33275631Salfred		}
33375631Salfred
33475631Salfred		if (msg.lm_getlk && p->p_nlminfo->retcode == 0) {
33575631Salfred			if (p->p_nlminfo->set_getlk_pid) {
336177633Sdfr				fl->l_sysid = 0; /* XXX */
33775631Salfred				fl->l_pid = p->p_nlminfo->getlk_pid;
33875631Salfred			} else {
33975631Salfred				fl->l_type = F_UNLCK;
34075631Salfred			}
34175631Salfred		}
34275631Salfred		error = p->p_nlminfo->retcode;
34375631Salfred		break;
34475631Salfred	}
345178243Skib out:
346178243Skib	mtx_unlock(&Giant);
347138430Sphk	return (error);
34875631Salfred}
34975631Salfred
35075631Salfred/*
35175631Salfred * nfslockdans --
35275631Salfred *      NFS advisory byte-level locks answer from the lock daemon.
35375631Salfred */
354138430Sphkstatic int
35586363Srwatsonnfslockdans(struct thread *td, struct lockd_ans *ansp)
35675631Salfred{
35786363Srwatson	struct proc *targetp;
35875631Salfred
35975631Salfred	/* the version should match, or we're out of sync */
36075631Salfred	if (ansp->la_vers != LOCKD_ANS_VERSION)
36175631Salfred		return (EINVAL);
36275631Salfred
36375631Salfred	/* Find the process, set its return errno and wake it up. */
36486363Srwatson	if ((targetp = pfind(ansp->la_msg_ident.pid)) == NULL)
36575631Salfred		return (ESRCH);
36675631Salfred
36783651Speter	/* verify the pid hasn't been reused (if we can), and it isn't waiting
36875631Salfred	 * for an answer from a more recent request.  We return an EPIPE if
36975631Salfred	 * the match fails, because we've already used ESRCH above, and this
37075631Salfred	 * is sort of like writing on a pipe after the reader has closed it.
37175631Salfred	 */
37286363Srwatson	if (targetp->p_nlminfo == NULL ||
37375631Salfred	    ((ansp->la_msg_ident.msg_seq != -1) &&
37486363Srwatson	      (timevalcmp(&targetp->p_nlminfo->pid_start,
37575631Salfred			&ansp->la_msg_ident.pid_start, !=) ||
37686363Srwatson	       targetp->p_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq))) {
37786363Srwatson		PROC_UNLOCK(targetp);
37875631Salfred		return (EPIPE);
37977563Sjake	}
38075631Salfred
38186363Srwatson	targetp->p_nlminfo->retcode = ansp->la_errno;
38286363Srwatson	targetp->p_nlminfo->set_getlk_pid = ansp->la_set_getlk_pid;
38386363Srwatson	targetp->p_nlminfo->getlk_pid = ansp->la_getlk_pid;
38475631Salfred
385107104Salfred	wakeup(targetp->p_nlminfo);
38675631Salfred
38786363Srwatson	PROC_UNLOCK(targetp);
38875631Salfred	return (0);
38975631Salfred}
390138430Sphk
391151695Sglebius/*
392151695Sglebius * Free nlminfo attached to process.
393151695Sglebius */
394151695Sglebiusvoid
395151695Sglebiusnlminfo_release(struct proc *p)
396151695Sglebius{
397151695Sglebius	free(p->p_nlminfo, M_NLMINFO);
398151695Sglebius	p->p_nlminfo = NULL;
399151695Sglebius}
400