1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/proc.h>
34#include <sys/rangelock.h>
35#include <sys/systm.h>
36
37#include <vm/uma.h>
38
39struct rl_q_entry {
40	TAILQ_ENTRY(rl_q_entry) rl_q_link;
41	off_t		rl_q_start, rl_q_end;
42	int		rl_q_flags;
43};
44
45static uma_zone_t rl_entry_zone;
46
47static void
48rangelock_sys_init(void)
49{
50
51	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
52	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
53}
54SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
55
56static struct rl_q_entry *
57rlqentry_alloc(void)
58{
59
60	return (uma_zalloc(rl_entry_zone, M_WAITOK));
61}
62
63void
64rlqentry_free(struct rl_q_entry *rleq)
65{
66
67	uma_zfree(rl_entry_zone, rleq);
68}
69
70void
71rangelock_init(struct rangelock *lock)
72{
73
74	TAILQ_INIT(&lock->rl_waiters);
75	lock->rl_currdep = NULL;
76}
77
78void
79rangelock_destroy(struct rangelock *lock)
80{
81
82	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
83}
84
85/*
86 * Two entries are compatible if their ranges do not overlap, or both
87 * entries are for read.
88 */
89static int
90ranges_overlap(const struct rl_q_entry *e1,
91    const struct rl_q_entry *e2)
92{
93
94	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
95		return (1);
96	return (0);
97}
98
99/*
100 * Recalculate the lock->rl_currdep after an unlock.
101 */
102static void
103rangelock_calc_block(struct rangelock *lock)
104{
105	struct rl_q_entry *entry, *nextentry, *entry1;
106
107	for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
108		nextentry = TAILQ_NEXT(entry, rl_q_link);
109		if (entry->rl_q_flags & RL_LOCK_READ) {
110			/* Reads must not overlap with granted writes. */
111			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
112			    !(entry1->rl_q_flags & RL_LOCK_READ);
113			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
114				if (ranges_overlap(entry, entry1))
115					goto out;
116			}
117		} else {
118			/* Write must not overlap with any granted locks. */
119			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
120			    entry1 != entry;
121			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
122				if (ranges_overlap(entry, entry1))
123					goto out;
124			}
125
126			/* Move grantable write locks to the front. */
127			TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
128			TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
129		}
130
131		/* Grant this lock. */
132		entry->rl_q_flags |= RL_LOCK_GRANTED;
133		wakeup(entry);
134	}
135out:
136	lock->rl_currdep = entry;
137}
138
139static void
140rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
141    struct mtx *ilk, bool do_calc_block)
142{
143
144	MPASS(lock != NULL && entry != NULL && ilk != NULL);
145	mtx_assert(ilk, MA_OWNED);
146
147	if (!do_calc_block) {
148		/*
149		 * This is the case where rangelock_enqueue() has been called
150		 * with trylock == true and just inserted this entry in the
151		 * queue.
152		 * If rl_currdep is this entry, rl_currdep needs to
153		 * be set to the next entry in the rl_waiters list.
154		 * However, since this entry is the last entry in the
155		 * list, the next entry is NULL.
156		 */
157		if (lock->rl_currdep == entry) {
158			KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL,
159			    ("rangelock_enqueue: next entry not NULL"));
160			lock->rl_currdep = NULL;
161		}
162	} else
163		KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
164
165	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
166	if (do_calc_block)
167		rangelock_calc_block(lock);
168	mtx_unlock(ilk);
169	if (curthread->td_rlqe == NULL)
170		curthread->td_rlqe = entry;
171	else
172		rlqentry_free(entry);
173}
174
175void
176rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
177{
178
179	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
180
181	mtx_lock(ilk);
182	rangelock_unlock_locked(lock, cookie, ilk, true);
183}
184
185/*
186 * Unlock the sub-range of granted lock.
187 */
188void *
189rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
190    off_t end, struct mtx *ilk)
191{
192	struct rl_q_entry *entry;
193
194	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
195	entry = cookie;
196	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
197	    ("Unlocking non-granted lock"));
198	KASSERT(entry->rl_q_start == start, ("wrong start"));
199	KASSERT(entry->rl_q_end >= end, ("wrong end"));
200
201	mtx_lock(ilk);
202	if (entry->rl_q_end == end) {
203		rangelock_unlock_locked(lock, cookie, ilk, true);
204		return (NULL);
205	}
206	entry->rl_q_end = end;
207	rangelock_calc_block(lock);
208	mtx_unlock(ilk);
209	return (cookie);
210}
211
212/*
213 * Add the lock request to the queue of the pending requests for
214 * rangelock.  Sleep until the request can be granted unless trylock == true.
215 */
216static void *
217rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
218    struct mtx *ilk, bool trylock)
219{
220	struct rl_q_entry *entry;
221	struct thread *td;
222
223	MPASS(lock != NULL && ilk != NULL);
224
225	td = curthread;
226	if (td->td_rlqe != NULL) {
227		entry = td->td_rlqe;
228		td->td_rlqe = NULL;
229	} else
230		entry = rlqentry_alloc();
231	MPASS(entry != NULL);
232	entry->rl_q_flags = mode;
233	entry->rl_q_start = start;
234	entry->rl_q_end = end;
235
236	mtx_lock(ilk);
237	/*
238	 * XXXKIB TODO. Check that a thread does not try to enqueue a
239	 * lock that is incompatible with another request from the same
240	 * thread.
241	 */
242
243	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
244	/*
245	 * If rl_currdep == NULL, there is no entry waiting for a conflicting
246	 * range to be resolved, so set rl_currdep to this entry.  If there is
247	 * no conflicting entry for this entry, rl_currdep will be set back to
248	 * NULL by rangelock_calc_block().
249	 */
250	if (lock->rl_currdep == NULL)
251		lock->rl_currdep = entry;
252	rangelock_calc_block(lock);
253	while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) {
254		if (trylock) {
255			/*
256			 * For this case, the range is not actually locked
257			 * yet, but removal from the list requires the same
258			 * steps, except for not doing a rangelock_calc_block()
259			 * call, since rangelock_calc_block() was called above.
260			 */
261			rangelock_unlock_locked(lock, entry, ilk, false);
262			return (NULL);
263		}
264		msleep(entry, ilk, 0, "range", 0);
265	}
266	mtx_unlock(ilk);
267	return (entry);
268}
269
270void *
271rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
272{
273
274	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false));
275}
276
277void *
278rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
279    struct mtx *ilk)
280{
281
282	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true));
283}
284
285void *
286rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
287{
288
289	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false));
290}
291
292void *
293rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
294    struct mtx *ilk)
295{
296
297	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true));
298}
299
300#ifdef INVARIANT_SUPPORT
301void
302_rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
303{
304	struct rl_q_entry *entry;
305	int flags;
306
307	MPASS(cookie != NULL);
308	entry = cookie;
309	flags = entry->rl_q_flags;
310	switch (what) {
311	case RCA_LOCKED:
312		if ((flags & RL_LOCK_GRANTED) == 0)
313			panic("rangelock not held @ %s:%d\n", file, line);
314		break;
315	case RCA_RLOCKED:
316		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
317		    (RL_LOCK_GRANTED | RL_LOCK_READ))
318			panic("rangelock not rlocked @ %s:%d\n", file, line);
319		break;
320	case RCA_WLOCKED:
321		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
322		    (RL_LOCK_GRANTED | RL_LOCK_WRITE))
323			panic("rangelock not wlocked @ %s:%d\n", file, line);
324		break;
325	default:
326		panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
327		    line);
328	}
329}
330#endif	/* INVARIANT_SUPPORT */
331