rwlock.c revision 290001
1/*
2 * Copyright (C) 2004, 2005, 2007, 2009, 2011, 2012  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2001, 2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id$ */
19
20/*! \file */
21
22#include <config.h>
23
24#include <stddef.h>
25
26#include <isc/atomic.h>
27#include <isc/magic.h>
28#include <isc/msgs.h>
29#include <isc/platform.h>
30#include <isc/rwlock.h>
31#include <isc/util.h>
32
33#define RWLOCK_MAGIC		ISC_MAGIC('R', 'W', 'L', 'k')
34#define VALID_RWLOCK(rwl)	ISC_MAGIC_VALID(rwl, RWLOCK_MAGIC)
35
36#ifdef ISC_PLATFORM_USETHREADS
37
38#ifndef RWLOCK_DEFAULT_READ_QUOTA
39#define RWLOCK_DEFAULT_READ_QUOTA 4
40#endif
41
42#ifndef RWLOCK_DEFAULT_WRITE_QUOTA
43#define RWLOCK_DEFAULT_WRITE_QUOTA 4
44#endif
45
46#ifdef ISC_RWLOCK_TRACE
47#include <stdio.h>		/* Required for fprintf/stderr. */
48#include <isc/thread.h>		/* Required for isc_thread_self(). */
49
50static void
51print_lock(const char *operation, isc_rwlock_t *rwl, isc_rwlocktype_t type) {
52	fprintf(stderr,
53		isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
54			       ISC_MSG_PRINTLOCK,
55			       "rwlock %p thread %lu %s(%s): %s, %u active, "
56			       "%u granted, %u rwaiting, %u wwaiting\n"),
57		rwl, isc_thread_self(), operation,
58		(type == isc_rwlocktype_read ?
59		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
60				ISC_MSG_READ, "read") :
61		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
62				ISC_MSG_WRITE, "write")),
63		(rwl->type == isc_rwlocktype_read ?
64		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
65				ISC_MSG_READING, "reading") :
66		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
67				ISC_MSG_WRITING, "writing")),
68		rwl->active, rwl->granted, rwl->readers_waiting,
69		rwl->writers_waiting);
70}
71#endif
72
73isc_result_t
74isc_rwlock_init(isc_rwlock_t *rwl, unsigned int read_quota,
75		unsigned int write_quota)
76{
77	isc_result_t result;
78
79	REQUIRE(rwl != NULL);
80
81	/*
82	 * In case there's trouble initializing, we zero magic now.  If all
83	 * goes well, we'll set it to RWLOCK_MAGIC.
84	 */
85	rwl->magic = 0;
86
87#if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
88	rwl->write_requests = 0;
89	rwl->write_completions = 0;
90	rwl->cnt_and_flag = 0;
91	rwl->readers_waiting = 0;
92	rwl->write_granted = 0;
93	if (read_quota != 0) {
94		UNEXPECTED_ERROR(__FILE__, __LINE__,
95				 "read quota is not supported");
96	}
97	if (write_quota == 0)
98		write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
99	rwl->write_quota = write_quota;
100#else
101	rwl->type = isc_rwlocktype_read;
102	rwl->original = isc_rwlocktype_none;
103	rwl->active = 0;
104	rwl->granted = 0;
105	rwl->readers_waiting = 0;
106	rwl->writers_waiting = 0;
107	if (read_quota == 0)
108		read_quota = RWLOCK_DEFAULT_READ_QUOTA;
109	rwl->read_quota = read_quota;
110	if (write_quota == 0)
111		write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
112	rwl->write_quota = write_quota;
113#endif
114
115	result = isc_mutex_init(&rwl->lock);
116	if (result != ISC_R_SUCCESS)
117		return (result);
118
119	result = isc_condition_init(&rwl->readable);
120	if (result != ISC_R_SUCCESS) {
121		UNEXPECTED_ERROR(__FILE__, __LINE__,
122				 "isc_condition_init(readable) %s: %s",
123				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
124						ISC_MSG_FAILED, "failed"),
125				 isc_result_totext(result));
126		result = ISC_R_UNEXPECTED;
127		goto destroy_lock;
128	}
129	result = isc_condition_init(&rwl->writeable);
130	if (result != ISC_R_SUCCESS) {
131		UNEXPECTED_ERROR(__FILE__, __LINE__,
132				 "isc_condition_init(writeable) %s: %s",
133				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
134						ISC_MSG_FAILED, "failed"),
135				 isc_result_totext(result));
136		result = ISC_R_UNEXPECTED;
137		goto destroy_rcond;
138	}
139
140	rwl->magic = RWLOCK_MAGIC;
141
142	return (ISC_R_SUCCESS);
143
144  destroy_rcond:
145	(void)isc_condition_destroy(&rwl->readable);
146  destroy_lock:
147	DESTROYLOCK(&rwl->lock);
148
149	return (result);
150}
151
152void
153isc_rwlock_destroy(isc_rwlock_t *rwl) {
154	REQUIRE(VALID_RWLOCK(rwl));
155
156#if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
157	REQUIRE(rwl->write_requests == rwl->write_completions &&
158		rwl->cnt_and_flag == 0 && rwl->readers_waiting == 0);
159#else
160	LOCK(&rwl->lock);
161	REQUIRE(rwl->active == 0 &&
162		rwl->readers_waiting == 0 &&
163		rwl->writers_waiting == 0);
164	UNLOCK(&rwl->lock);
165#endif
166
167	rwl->magic = 0;
168	(void)isc_condition_destroy(&rwl->readable);
169	(void)isc_condition_destroy(&rwl->writeable);
170	DESTROYLOCK(&rwl->lock);
171}
172
173#if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
174
175/*
176 * When some architecture-dependent atomic operations are available,
177 * rwlock can be more efficient than the generic algorithm defined below.
178 * The basic algorithm is described in the following URL:
179 *   http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html
180 *
181 * The key is to use the following integer variables modified atomically:
182 *   write_requests, write_completions, and cnt_and_flag.
183 *
184 * write_requests and write_completions act as a waiting queue for writers
185 * in order to ensure the FIFO order.  Both variables begin with the initial
186 * value of 0.  When a new writer tries to get a write lock, it increments
187 * write_requests and gets the previous value of the variable as a "ticket".
188 * When write_completions reaches the ticket number, the new writer can start
189 * writing.  When the writer completes its work, it increments
190 * write_completions so that another new writer can start working.  If the
191 * write_requests is not equal to write_completions, it means a writer is now
192 * working or waiting.  In this case, a new readers cannot start reading, or
193 * in other words, this algorithm basically prefers writers.
194 *
195 * cnt_and_flag is a "lock" shared by all readers and writers.  This integer
196 * variable is a kind of structure with two members: writer_flag (1 bit) and
197 * reader_count (31 bits).  The writer_flag shows whether a writer is working,
198 * and the reader_count shows the number of readers currently working or almost
199 * ready for working.  A writer who has the current "ticket" tries to get the
200 * lock by exclusively setting the writer_flag to 1, provided that the whole
201 * 32-bit is 0 (meaning no readers or writers working).  On the other hand,
202 * a new reader tries to increment the "reader_count" field provided that
203 * the writer_flag is 0 (meaning there is no writer working).
204 *
205 * If some of the above operations fail, the reader or the writer sleeps
206 * until the related condition changes.  When a working reader or writer
207 * completes its work, some readers or writers are sleeping, and the condition
208 * that suspended the reader or writer has changed, it wakes up the sleeping
209 * readers or writers.
210 *
211 * As already noted, this algorithm basically prefers writers.  In order to
212 * prevent readers from starving, however, the algorithm also introduces the
213 * "writer quota" (Q).  When Q consecutive writers have completed their work,
214 * suspending readers, the last writer will wake up the readers, even if a new
215 * writer is waiting.
216 *
217 * Implementation specific note: due to the combination of atomic operations
218 * and a mutex lock, ordering between the atomic operation and locks can be
219 * very sensitive in some cases.  In particular, it is generally very important
220 * to check the atomic variable that requires a reader or writer to sleep after
221 * locking the mutex and before actually sleeping; otherwise, it could be very
222 * likely to cause a deadlock.  For example, assume "var" is a variable
223 * atomically modified, then the corresponding code would be:
224 *	if (var == need_sleep) {
225 *		LOCK(lock);
226 *		if (var == need_sleep)
227 *			WAIT(cond, lock);
228 *		UNLOCK(lock);
229 *	}
230 * The second check is important, since "var" is protected by the atomic
231 * operation, not by the mutex, and can be changed just before sleeping.
232 * (The first "if" could be omitted, but this is also important in order to
233 * make the code efficient by avoiding the use of the mutex unless it is
234 * really necessary.)
235 */
236
237#define WRITER_ACTIVE	0x1
238#define READER_INCR	0x2
239
240isc_result_t
241isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
242	isc_int32_t cntflag;
243
244	REQUIRE(VALID_RWLOCK(rwl));
245
246#ifdef ISC_RWLOCK_TRACE
247	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
248				  ISC_MSG_PRELOCK, "prelock"), rwl, type);
249#endif
250
251	if (type == isc_rwlocktype_read) {
252		if (rwl->write_requests != rwl->write_completions) {
253			/* there is a waiting or active writer */
254			LOCK(&rwl->lock);
255			if (rwl->write_requests != rwl->write_completions) {
256				rwl->readers_waiting++;
257				WAIT(&rwl->readable, &rwl->lock);
258				rwl->readers_waiting--;
259			}
260			UNLOCK(&rwl->lock);
261		}
262
263		cntflag = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
264		POST(cntflag);
265		while (1) {
266			if ((rwl->cnt_and_flag & WRITER_ACTIVE) == 0)
267				break;
268
269			/* A writer is still working */
270			LOCK(&rwl->lock);
271			rwl->readers_waiting++;
272			if ((rwl->cnt_and_flag & WRITER_ACTIVE) != 0)
273				WAIT(&rwl->readable, &rwl->lock);
274			rwl->readers_waiting--;
275			UNLOCK(&rwl->lock);
276
277			/*
278			 * Typically, the reader should be able to get a lock
279			 * at this stage:
280			 *   (1) there should have been no pending writer when
281			 *       the reader was trying to increment the
282			 *       counter; otherwise, the writer should be in
283			 *       the waiting queue, preventing the reader from
284			 *       proceeding to this point.
285			 *   (2) once the reader increments the counter, no
286			 *       more writer can get a lock.
287			 * Still, it is possible another writer can work at
288			 * this point, e.g. in the following scenario:
289			 *   A previous writer unlocks the writer lock.
290			 *   This reader proceeds to point (1).
291			 *   A new writer appears, and gets a new lock before
292			 *   the reader increments the counter.
293			 *   The reader then increments the counter.
294			 *   The previous writer notices there is a waiting
295			 *   reader who is almost ready, and wakes it up.
296			 * So, the reader needs to confirm whether it can now
297			 * read explicitly (thus we loop).  Note that this is
298			 * not an infinite process, since the reader has
299			 * incremented the counter at this point.
300			 */
301		}
302
303		/*
304		 * If we are temporarily preferred to writers due to the writer
305		 * quota, reset the condition (race among readers doesn't
306		 * matter).
307		 */
308		rwl->write_granted = 0;
309	} else {
310		isc_int32_t prev_writer;
311
312		/* enter the waiting queue, and wait for our turn */
313		prev_writer = isc_atomic_xadd(&rwl->write_requests, 1);
314		while (rwl->write_completions != prev_writer) {
315			LOCK(&rwl->lock);
316			if (rwl->write_completions != prev_writer) {
317				WAIT(&rwl->writeable, &rwl->lock);
318				UNLOCK(&rwl->lock);
319				continue;
320			}
321			UNLOCK(&rwl->lock);
322			break;
323		}
324
325		while (1) {
326			cntflag = isc_atomic_cmpxchg(&rwl->cnt_and_flag, 0,
327						     WRITER_ACTIVE);
328			if (cntflag == 0)
329				break;
330
331			/* Another active reader or writer is working. */
332			LOCK(&rwl->lock);
333			if (rwl->cnt_and_flag != 0)
334				WAIT(&rwl->writeable, &rwl->lock);
335			UNLOCK(&rwl->lock);
336		}
337
338		INSIST((rwl->cnt_and_flag & WRITER_ACTIVE) != 0);
339		rwl->write_granted++;
340	}
341
342#ifdef ISC_RWLOCK_TRACE
343	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
344				  ISC_MSG_POSTLOCK, "postlock"), rwl, type);
345#endif
346
347	return (ISC_R_SUCCESS);
348}
349
350isc_result_t
351isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
352	isc_int32_t cntflag;
353
354	REQUIRE(VALID_RWLOCK(rwl));
355
356#ifdef ISC_RWLOCK_TRACE
357	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
358				  ISC_MSG_PRELOCK, "prelock"), rwl, type);
359#endif
360
361	if (type == isc_rwlocktype_read) {
362		/* If a writer is waiting or working, we fail. */
363		if (rwl->write_requests != rwl->write_completions)
364			return (ISC_R_LOCKBUSY);
365
366		/* Otherwise, be ready for reading. */
367		cntflag = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
368		if ((cntflag & WRITER_ACTIVE) != 0) {
369			/*
370			 * A writer is working.  We lose, and cancel the read
371			 * request.
372			 */
373			cntflag = isc_atomic_xadd(&rwl->cnt_and_flag,
374						  -READER_INCR);
375			/*
376			 * If no other readers are waiting and we've suspended
377			 * new writers in this short period, wake them up.
378			 */
379			if (cntflag == READER_INCR &&
380			    rwl->write_completions != rwl->write_requests) {
381				LOCK(&rwl->lock);
382				BROADCAST(&rwl->writeable);
383				UNLOCK(&rwl->lock);
384			}
385
386			return (ISC_R_LOCKBUSY);
387		}
388	} else {
389		/* Try locking without entering the waiting queue. */
390		cntflag = isc_atomic_cmpxchg(&rwl->cnt_and_flag, 0,
391					     WRITER_ACTIVE);
392		if (cntflag != 0)
393			return (ISC_R_LOCKBUSY);
394
395		/*
396		 * XXXJT: jump into the queue, possibly breaking the writer
397		 * order.
398		 */
399		(void)isc_atomic_xadd(&rwl->write_completions, -1);
400
401		rwl->write_granted++;
402	}
403
404#ifdef ISC_RWLOCK_TRACE
405	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
406				  ISC_MSG_POSTLOCK, "postlock"), rwl, type);
407#endif
408
409	return (ISC_R_SUCCESS);
410}
411
412isc_result_t
413isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
414	isc_int32_t prevcnt;
415
416	REQUIRE(VALID_RWLOCK(rwl));
417
418	/* Try to acquire write access. */
419	prevcnt = isc_atomic_cmpxchg(&rwl->cnt_and_flag,
420				     READER_INCR, WRITER_ACTIVE);
421	/*
422	 * There must have been no writer, and there must have been at least
423	 * one reader.
424	 */
425	INSIST((prevcnt & WRITER_ACTIVE) == 0 &&
426	       (prevcnt & ~WRITER_ACTIVE) != 0);
427
428	if (prevcnt == READER_INCR) {
429		/*
430		 * We are the only reader and have been upgraded.
431		 * Now jump into the head of the writer waiting queue.
432		 */
433		(void)isc_atomic_xadd(&rwl->write_completions, -1);
434	} else
435		return (ISC_R_LOCKBUSY);
436
437	return (ISC_R_SUCCESS);
438
439}
440
441void
442isc_rwlock_downgrade(isc_rwlock_t *rwl) {
443	isc_int32_t prev_readers;
444
445	REQUIRE(VALID_RWLOCK(rwl));
446
447	/* Become an active reader. */
448	prev_readers = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
449	/* We must have been a writer. */
450	INSIST((prev_readers & WRITER_ACTIVE) != 0);
451
452	/* Complete write */
453	(void)isc_atomic_xadd(&rwl->cnt_and_flag, -WRITER_ACTIVE);
454	(void)isc_atomic_xadd(&rwl->write_completions, 1);
455
456	/* Resume other readers */
457	LOCK(&rwl->lock);
458	if (rwl->readers_waiting > 0)
459		BROADCAST(&rwl->readable);
460	UNLOCK(&rwl->lock);
461}
462
463isc_result_t
464isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
465	isc_int32_t prev_cnt;
466
467	REQUIRE(VALID_RWLOCK(rwl));
468
469#ifdef ISC_RWLOCK_TRACE
470	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
471				  ISC_MSG_PREUNLOCK, "preunlock"), rwl, type);
472#endif
473
474	if (type == isc_rwlocktype_read) {
475		prev_cnt = isc_atomic_xadd(&rwl->cnt_and_flag, -READER_INCR);
476
477		/*
478		 * If we're the last reader and any writers are waiting, wake
479		 * them up.  We need to wake up all of them to ensure the
480		 * FIFO order.
481		 */
482		if (prev_cnt == READER_INCR &&
483		    rwl->write_completions != rwl->write_requests) {
484			LOCK(&rwl->lock);
485			BROADCAST(&rwl->writeable);
486			UNLOCK(&rwl->lock);
487		}
488	} else {
489		isc_boolean_t wakeup_writers = ISC_TRUE;
490
491		/*
492		 * Reset the flag, and (implicitly) tell other writers
493		 * we are done.
494		 */
495		(void)isc_atomic_xadd(&rwl->cnt_and_flag, -WRITER_ACTIVE);
496		(void)isc_atomic_xadd(&rwl->write_completions, 1);
497
498		if (rwl->write_granted >= rwl->write_quota ||
499		    rwl->write_requests == rwl->write_completions ||
500		    (rwl->cnt_and_flag & ~WRITER_ACTIVE) != 0) {
501			/*
502			 * We have passed the write quota, no writer is
503			 * waiting, or some readers are almost ready, pending
504			 * possible writers.  Note that the last case can
505			 * happen even if write_requests != write_completions
506			 * (which means a new writer in the queue), so we need
507			 * to catch the case explicitly.
508			 */
509			LOCK(&rwl->lock);
510			if (rwl->readers_waiting > 0) {
511				wakeup_writers = ISC_FALSE;
512				BROADCAST(&rwl->readable);
513			}
514			UNLOCK(&rwl->lock);
515		}
516
517		if (rwl->write_requests != rwl->write_completions &&
518		    wakeup_writers) {
519			LOCK(&rwl->lock);
520			BROADCAST(&rwl->writeable);
521			UNLOCK(&rwl->lock);
522		}
523	}
524
525#ifdef ISC_RWLOCK_TRACE
526	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
527				  ISC_MSG_POSTUNLOCK, "postunlock"),
528		   rwl, type);
529#endif
530
531	return (ISC_R_SUCCESS);
532}
533
534#else /* ISC_PLATFORM_HAVEXADD && ISC_PLATFORM_HAVECMPXCHG */
535
536static isc_result_t
537doit(isc_rwlock_t *rwl, isc_rwlocktype_t type, isc_boolean_t nonblock) {
538	isc_boolean_t skip = ISC_FALSE;
539	isc_boolean_t done = ISC_FALSE;
540	isc_result_t result = ISC_R_SUCCESS;
541
542	REQUIRE(VALID_RWLOCK(rwl));
543
544	LOCK(&rwl->lock);
545
546#ifdef ISC_RWLOCK_TRACE
547	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
548				  ISC_MSG_PRELOCK, "prelock"), rwl, type);
549#endif
550
551	if (type == isc_rwlocktype_read) {
552		if (rwl->readers_waiting != 0)
553			skip = ISC_TRUE;
554		while (!done) {
555			if (!skip &&
556			    ((rwl->active == 0 ||
557			      (rwl->type == isc_rwlocktype_read &&
558			       (rwl->writers_waiting == 0 ||
559				rwl->granted < rwl->read_quota)))))
560			{
561				rwl->type = isc_rwlocktype_read;
562				rwl->active++;
563				rwl->granted++;
564				done = ISC_TRUE;
565			} else if (nonblock) {
566				result = ISC_R_LOCKBUSY;
567				done = ISC_TRUE;
568			} else {
569				skip = ISC_FALSE;
570				rwl->readers_waiting++;
571				WAIT(&rwl->readable, &rwl->lock);
572				rwl->readers_waiting--;
573			}
574		}
575	} else {
576		if (rwl->writers_waiting != 0)
577			skip = ISC_TRUE;
578		while (!done) {
579			if (!skip && rwl->active == 0) {
580				rwl->type = isc_rwlocktype_write;
581				rwl->active = 1;
582				rwl->granted++;
583				done = ISC_TRUE;
584			} else if (nonblock) {
585				result = ISC_R_LOCKBUSY;
586				done = ISC_TRUE;
587			} else {
588				skip = ISC_FALSE;
589				rwl->writers_waiting++;
590				WAIT(&rwl->writeable, &rwl->lock);
591				rwl->writers_waiting--;
592			}
593		}
594	}
595
596#ifdef ISC_RWLOCK_TRACE
597	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
598				  ISC_MSG_POSTLOCK, "postlock"), rwl, type);
599#endif
600
601	UNLOCK(&rwl->lock);
602
603	return (result);
604}
605
606isc_result_t
607isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
608	return (doit(rwl, type, ISC_FALSE));
609}
610
611isc_result_t
612isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
613	return (doit(rwl, type, ISC_TRUE));
614}
615
616isc_result_t
617isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
618	isc_result_t result = ISC_R_SUCCESS;
619
620	REQUIRE(VALID_RWLOCK(rwl));
621	LOCK(&rwl->lock);
622	REQUIRE(rwl->type == isc_rwlocktype_read);
623	REQUIRE(rwl->active != 0);
624
625	/* If we are the only reader then succeed. */
626	if (rwl->active == 1) {
627		rwl->original = (rwl->original == isc_rwlocktype_none) ?
628				isc_rwlocktype_read : isc_rwlocktype_none;
629		rwl->type = isc_rwlocktype_write;
630	} else
631		result = ISC_R_LOCKBUSY;
632
633	UNLOCK(&rwl->lock);
634	return (result);
635}
636
637void
638isc_rwlock_downgrade(isc_rwlock_t *rwl) {
639
640	REQUIRE(VALID_RWLOCK(rwl));
641	LOCK(&rwl->lock);
642	REQUIRE(rwl->type == isc_rwlocktype_write);
643	REQUIRE(rwl->active == 1);
644
645	rwl->type = isc_rwlocktype_read;
646	rwl->original = (rwl->original == isc_rwlocktype_none) ?
647			isc_rwlocktype_write : isc_rwlocktype_none;
648	/*
649	 * Resume processing any read request that were blocked when
650	 * we upgraded.
651	 */
652	if (rwl->original == isc_rwlocktype_none &&
653	    (rwl->writers_waiting == 0 || rwl->granted < rwl->read_quota) &&
654	    rwl->readers_waiting > 0)
655		BROADCAST(&rwl->readable);
656
657	UNLOCK(&rwl->lock);
658}
659
660isc_result_t
661isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
662
663	REQUIRE(VALID_RWLOCK(rwl));
664	LOCK(&rwl->lock);
665	REQUIRE(rwl->type == type);
666
667	UNUSED(type);
668
669#ifdef ISC_RWLOCK_TRACE
670	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
671				  ISC_MSG_PREUNLOCK, "preunlock"), rwl, type);
672#endif
673
674	INSIST(rwl->active > 0);
675	rwl->active--;
676	if (rwl->active == 0) {
677		if (rwl->original != isc_rwlocktype_none) {
678			rwl->type = rwl->original;
679			rwl->original = isc_rwlocktype_none;
680		}
681		if (rwl->type == isc_rwlocktype_read) {
682			rwl->granted = 0;
683			if (rwl->writers_waiting > 0) {
684				rwl->type = isc_rwlocktype_write;
685				SIGNAL(&rwl->writeable);
686			} else if (rwl->readers_waiting > 0) {
687				/* Does this case ever happen? */
688				BROADCAST(&rwl->readable);
689			}
690		} else {
691			if (rwl->readers_waiting > 0) {
692				if (rwl->writers_waiting > 0 &&
693				    rwl->granted < rwl->write_quota) {
694					SIGNAL(&rwl->writeable);
695				} else {
696					rwl->granted = 0;
697					rwl->type = isc_rwlocktype_read;
698					BROADCAST(&rwl->readable);
699				}
700			} else if (rwl->writers_waiting > 0) {
701				rwl->granted = 0;
702				SIGNAL(&rwl->writeable);
703			} else {
704				rwl->granted = 0;
705			}
706		}
707	}
708	INSIST(rwl->original == isc_rwlocktype_none);
709
710#ifdef ISC_RWLOCK_TRACE
711	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
712				  ISC_MSG_POSTUNLOCK, "postunlock"),
713		   rwl, type);
714#endif
715
716	UNLOCK(&rwl->lock);
717
718	return (ISC_R_SUCCESS);
719}
720
721#endif /* ISC_PLATFORM_HAVEXADD && ISC_PLATFORM_HAVECMPXCHG */
722#else /* ISC_PLATFORM_USETHREADS */
723
724isc_result_t
725isc_rwlock_init(isc_rwlock_t *rwl, unsigned int read_quota,
726		unsigned int write_quota)
727{
728	REQUIRE(rwl != NULL);
729
730	UNUSED(read_quota);
731	UNUSED(write_quota);
732
733	rwl->type = isc_rwlocktype_read;
734	rwl->active = 0;
735	rwl->magic = RWLOCK_MAGIC;
736
737	return (ISC_R_SUCCESS);
738}
739
740isc_result_t
741isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
742	REQUIRE(VALID_RWLOCK(rwl));
743
744	if (type == isc_rwlocktype_read) {
745		if (rwl->type != isc_rwlocktype_read && rwl->active != 0)
746			return (ISC_R_LOCKBUSY);
747		rwl->type = isc_rwlocktype_read;
748		rwl->active++;
749	} else {
750		if (rwl->active != 0)
751			return (ISC_R_LOCKBUSY);
752		rwl->type = isc_rwlocktype_write;
753		rwl->active = 1;
754	}
755	return (ISC_R_SUCCESS);
756}
757
758isc_result_t
759isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
760	return (isc_rwlock_lock(rwl, type));
761}
762
763isc_result_t
764isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
765	isc_result_t result = ISC_R_SUCCESS;
766
767	REQUIRE(VALID_RWLOCK(rwl));
768	REQUIRE(rwl->type == isc_rwlocktype_read);
769	REQUIRE(rwl->active != 0);
770
771	/* If we are the only reader then succeed. */
772	if (rwl->active == 1)
773		rwl->type = isc_rwlocktype_write;
774	else
775		result = ISC_R_LOCKBUSY;
776	return (result);
777}
778
779void
780isc_rwlock_downgrade(isc_rwlock_t *rwl) {
781
782	REQUIRE(VALID_RWLOCK(rwl));
783	REQUIRE(rwl->type == isc_rwlocktype_write);
784	REQUIRE(rwl->active == 1);
785
786	rwl->type = isc_rwlocktype_read;
787}
788
789isc_result_t
790isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
791	REQUIRE(VALID_RWLOCK(rwl));
792	REQUIRE(rwl->type == type);
793
794	UNUSED(type);
795
796	INSIST(rwl->active > 0);
797	rwl->active--;
798
799	return (ISC_R_SUCCESS);
800}
801
802void
803isc_rwlock_destroy(isc_rwlock_t *rwl) {
804	REQUIRE(rwl != NULL);
805	REQUIRE(rwl->active == 0);
806	rwl->magic = 0;
807}
808
809#endif /* ISC_PLATFORM_USETHREADS */
810