1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include "opt_ktrace.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/kernel.h>
39#include <sys/ktr.h>
40#include <sys/condvar.h>
41#include <sys/sched.h>
42#include <sys/signalvar.h>
43#include <sys/sleepqueue.h>
44#include <sys/resourcevar.h>
45#ifdef KTRACE
46#include <sys/uio.h>
47#include <sys/ktrace.h>
48#endif
49
50/*
51 * A bound below which cv_waiters is valid.  Once cv_waiters reaches this bound,
52 * cv_signal must manually check the wait queue for threads.
53 */
54#define	CV_WAITERS_BOUND	INT_MAX
55
56#define	CV_WAITERS_INC(cvp) do {					\
57	if ((cvp)->cv_waiters < CV_WAITERS_BOUND)			\
58		(cvp)->cv_waiters++;					\
59} while (0)
60
61/*
62 * Common sanity checks for cv_wait* functions.
63 */
64#define	CV_ASSERT(cvp, lock, td) do {					\
65	KASSERT((td) != NULL, ("%s: td NULL", __func__));		\
66	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
67	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
68	KASSERT((lock) != NULL, ("%s: lock NULL", __func__));		\
69} while (0)
70
71/*
72 * Initialize a condition variable.  Must be called before use.
73 */
74void
75cv_init(struct cv *cvp, const char *desc)
76{
77
78	cvp->cv_description = desc;
79	cvp->cv_waiters = 0;
80}
81
82/*
83 * Destroy a condition variable.  The condition variable must be re-initialized
84 * in order to be re-used.
85 */
86void
87cv_destroy(struct cv *cvp)
88{
89#ifdef INVARIANTS
90	struct sleepqueue *sq;
91
92	sleepq_lock(cvp);
93	sq = sleepq_lookup(cvp);
94	sleepq_release(cvp);
95	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
96#endif
97}
98
99/*
100 * Wait on a condition variable.  The current thread is placed on the condition
101 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
102 * condition variable will resume the thread.  The mutex is released before
103 * sleeping and will be held on return.  It is recommended that the mutex be
104 * held when cv_signal or cv_broadcast are called.
105 */
106void
107_cv_wait(struct cv *cvp, struct lock_object *lock)
108{
109	WITNESS_SAVE_DECL(lock_witness);
110	struct lock_class *class;
111	struct thread *td;
112	uintptr_t lock_state;
113
114	td = curthread;
115	lock_state = 0;
116#ifdef KTRACE
117	if (KTRPOINT(td, KTR_CSW))
118		ktrcsw(1, 0, cv_wmesg(cvp));
119#endif
120	CV_ASSERT(cvp, lock, td);
121	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
122	    "Waiting on \"%s\"", cvp->cv_description);
123	class = LOCK_CLASS(lock);
124
125	if (cold || panicstr) {
126		/*
127		 * During autoconfiguration, just give interrupts
128		 * a chance, then just return.  Don't run any other
129		 * thread or panic below, in case this is the idle
130		 * process and already asleep.
131		 */
132		return;
133	}
134
135	sleepq_lock(cvp);
136
137	CV_WAITERS_INC(cvp);
138	if (lock == &Giant.lock_object)
139		mtx_assert(&Giant, MA_OWNED);
140	DROP_GIANT();
141
142	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
143	if (lock != &Giant.lock_object) {
144		if (class->lc_flags & LC_SLEEPABLE)
145			sleepq_release(cvp);
146		WITNESS_SAVE(lock, lock_witness);
147		lock_state = class->lc_unlock(lock);
148		if (class->lc_flags & LC_SLEEPABLE)
149			sleepq_lock(cvp);
150	}
151	sleepq_wait(cvp, 0);
152
153#ifdef KTRACE
154	if (KTRPOINT(td, KTR_CSW))
155		ktrcsw(0, 0, cv_wmesg(cvp));
156#endif
157	PICKUP_GIANT();
158	if (lock != &Giant.lock_object) {
159		class->lc_lock(lock, lock_state);
160		WITNESS_RESTORE(lock, lock_witness);
161	}
162}
163
164/*
165 * Wait on a condition variable.  This function differs from cv_wait by
166 * not acquiring the mutex after condition variable was signaled.
167 */
168void
169_cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
170{
171	struct lock_class *class;
172	struct thread *td;
173
174	td = curthread;
175#ifdef KTRACE
176	if (KTRPOINT(td, KTR_CSW))
177		ktrcsw(1, 0, cv_wmesg(cvp));
178#endif
179	CV_ASSERT(cvp, lock, td);
180	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
181	    "Waiting on \"%s\"", cvp->cv_description);
182	KASSERT(lock != &Giant.lock_object,
183	    ("cv_wait_unlock cannot be used with Giant"));
184	class = LOCK_CLASS(lock);
185
186	if (cold || panicstr) {
187		/*
188		 * During autoconfiguration, just give interrupts
189		 * a chance, then just return.  Don't run any other
190		 * thread or panic below, in case this is the idle
191		 * process and already asleep.
192		 */
193		class->lc_unlock(lock);
194		return;
195	}
196
197	sleepq_lock(cvp);
198
199	CV_WAITERS_INC(cvp);
200	DROP_GIANT();
201
202	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
203	if (class->lc_flags & LC_SLEEPABLE)
204		sleepq_release(cvp);
205	class->lc_unlock(lock);
206	if (class->lc_flags & LC_SLEEPABLE)
207		sleepq_lock(cvp);
208	sleepq_wait(cvp, 0);
209
210#ifdef KTRACE
211	if (KTRPOINT(td, KTR_CSW))
212		ktrcsw(0, 0, cv_wmesg(cvp));
213#endif
214	PICKUP_GIANT();
215}
216
217/*
218 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
219 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
220 * a signal was caught.  If ERESTART is returned the system call should be
221 * restarted if possible.
222 */
223int
224_cv_wait_sig(struct cv *cvp, struct lock_object *lock)
225{
226	WITNESS_SAVE_DECL(lock_witness);
227	struct lock_class *class;
228	struct thread *td;
229	uintptr_t lock_state;
230	int rval;
231
232	td = curthread;
233	lock_state = 0;
234#ifdef KTRACE
235	if (KTRPOINT(td, KTR_CSW))
236		ktrcsw(1, 0, cv_wmesg(cvp));
237#endif
238	CV_ASSERT(cvp, lock, td);
239	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
240	    "Waiting on \"%s\"", cvp->cv_description);
241	class = LOCK_CLASS(lock);
242
243	if (cold || panicstr) {
244		/*
245		 * After a panic, or during autoconfiguration, just give
246		 * interrupts a chance, then just return; don't run any other
247		 * procs or panic below, in case this is the idle process and
248		 * already asleep.
249		 */
250		return (0);
251	}
252
253	sleepq_lock(cvp);
254
255	CV_WAITERS_INC(cvp);
256	if (lock == &Giant.lock_object)
257		mtx_assert(&Giant, MA_OWNED);
258	DROP_GIANT();
259
260	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
261	    SLEEPQ_INTERRUPTIBLE, 0);
262	if (lock != &Giant.lock_object) {
263		if (class->lc_flags & LC_SLEEPABLE)
264			sleepq_release(cvp);
265		WITNESS_SAVE(lock, lock_witness);
266		lock_state = class->lc_unlock(lock);
267		if (class->lc_flags & LC_SLEEPABLE)
268			sleepq_lock(cvp);
269	}
270	rval = sleepq_wait_sig(cvp, 0);
271
272#ifdef KTRACE
273	if (KTRPOINT(td, KTR_CSW))
274		ktrcsw(0, 0, cv_wmesg(cvp));
275#endif
276	PICKUP_GIANT();
277	if (lock != &Giant.lock_object) {
278		class->lc_lock(lock, lock_state);
279		WITNESS_RESTORE(lock, lock_witness);
280	}
281
282	return (rval);
283}
284
285/*
286 * Wait on a condition variable for (at most) the value specified in sbt
287 * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast,
288 * EWOULDBLOCK if the timeout expires.
289 */
290int
291_cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt,
292    sbintime_t pr, int flags)
293{
294	WITNESS_SAVE_DECL(lock_witness);
295	struct lock_class *class;
296	struct thread *td;
297	int lock_state, rval;
298
299	td = curthread;
300	lock_state = 0;
301#ifdef KTRACE
302	if (KTRPOINT(td, KTR_CSW))
303		ktrcsw(1, 0, cv_wmesg(cvp));
304#endif
305	CV_ASSERT(cvp, lock, td);
306	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
307	    "Waiting on \"%s\"", cvp->cv_description);
308	class = LOCK_CLASS(lock);
309
310	if (cold || panicstr) {
311		/*
312		 * After a panic, or during autoconfiguration, just give
313		 * interrupts a chance, then just return; don't run any other
314		 * thread or panic below, in case this is the idle process and
315		 * already asleep.
316		 */
317		return 0;
318	}
319
320	sleepq_lock(cvp);
321
322	CV_WAITERS_INC(cvp);
323	if (lock == &Giant.lock_object)
324		mtx_assert(&Giant, MA_OWNED);
325	DROP_GIANT();
326
327	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
328	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
329	if (lock != &Giant.lock_object) {
330		if (class->lc_flags & LC_SLEEPABLE)
331			sleepq_release(cvp);
332		WITNESS_SAVE(lock, lock_witness);
333		lock_state = class->lc_unlock(lock);
334		if (class->lc_flags & LC_SLEEPABLE)
335			sleepq_lock(cvp);
336	}
337	rval = sleepq_timedwait(cvp, 0);
338
339#ifdef KTRACE
340	if (KTRPOINT(td, KTR_CSW))
341		ktrcsw(0, 0, cv_wmesg(cvp));
342#endif
343	PICKUP_GIANT();
344	if (lock != &Giant.lock_object) {
345		class->lc_lock(lock, lock_state);
346		WITNESS_RESTORE(lock, lock_witness);
347	}
348
349	return (rval);
350}
351
352/*
353 * Wait on a condition variable for (at most) the value specified in sbt
354 * argument, allowing interruption by signals.
355 * Returns 0 if the thread was resumed by cv_signal or cv_broadcast,
356 * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal
357 * was caught.
358 */
359int
360_cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
361    sbintime_t sbt, sbintime_t pr, int flags)
362{
363	WITNESS_SAVE_DECL(lock_witness);
364	struct lock_class *class;
365	struct thread *td;
366	int lock_state, rval;
367
368	td = curthread;
369	lock_state = 0;
370#ifdef KTRACE
371	if (KTRPOINT(td, KTR_CSW))
372		ktrcsw(1, 0, cv_wmesg(cvp));
373#endif
374	CV_ASSERT(cvp, lock, td);
375	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
376	    "Waiting on \"%s\"", cvp->cv_description);
377	class = LOCK_CLASS(lock);
378
379	if (cold || panicstr) {
380		/*
381		 * After a panic, or during autoconfiguration, just give
382		 * interrupts a chance, then just return; don't run any other
383		 * thread or panic below, in case this is the idle process and
384		 * already asleep.
385		 */
386		return 0;
387	}
388
389	sleepq_lock(cvp);
390
391	CV_WAITERS_INC(cvp);
392	if (lock == &Giant.lock_object)
393		mtx_assert(&Giant, MA_OWNED);
394	DROP_GIANT();
395
396	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
397	    SLEEPQ_INTERRUPTIBLE, 0);
398	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
399	if (lock != &Giant.lock_object) {
400		if (class->lc_flags & LC_SLEEPABLE)
401			sleepq_release(cvp);
402		WITNESS_SAVE(lock, lock_witness);
403		lock_state = class->lc_unlock(lock);
404		if (class->lc_flags & LC_SLEEPABLE)
405			sleepq_lock(cvp);
406	}
407	rval = sleepq_timedwait_sig(cvp, 0);
408
409#ifdef KTRACE
410	if (KTRPOINT(td, KTR_CSW))
411		ktrcsw(0, 0, cv_wmesg(cvp));
412#endif
413	PICKUP_GIANT();
414	if (lock != &Giant.lock_object) {
415		class->lc_lock(lock, lock_state);
416		WITNESS_RESTORE(lock, lock_witness);
417	}
418
419	return (rval);
420}
421
422/*
423 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
424 * the swapper if the process is not in memory, so that it can bring the
425 * sleeping process in.  Note that this may also result in additional threads
426 * being made runnable.  Should be called with the same mutex as was passed to
427 * cv_wait held.
428 */
429void
430cv_signal(struct cv *cvp)
431{
432	int wakeup_swapper;
433
434	wakeup_swapper = 0;
435	sleepq_lock(cvp);
436	if (cvp->cv_waiters > 0) {
437		if (cvp->cv_waiters == CV_WAITERS_BOUND &&
438		    sleepq_lookup(cvp) == NULL) {
439			cvp->cv_waiters = 0;
440		} else {
441			if (cvp->cv_waiters < CV_WAITERS_BOUND)
442				cvp->cv_waiters--;
443			wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0,
444			    0);
445		}
446	}
447	sleepq_release(cvp);
448	if (wakeup_swapper)
449		kick_proc0();
450}
451
452/*
453 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
454 * Should be called with the same mutex as was passed to cv_wait held.
455 */
456void
457cv_broadcastpri(struct cv *cvp, int pri)
458{
459	int wakeup_swapper;
460
461	/*
462	 * XXX sleepq_broadcast pri argument changed from -1 meaning
463	 * no pri to 0 meaning no pri.
464	 */
465	wakeup_swapper = 0;
466	if (pri == -1)
467		pri = 0;
468	sleepq_lock(cvp);
469	if (cvp->cv_waiters > 0) {
470		cvp->cv_waiters = 0;
471		wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
472	}
473	sleepq_release(cvp);
474	if (wakeup_swapper)
475		kick_proc0();
476}
477