1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 *	pthread_support.c
31 */
32
33#if PSYNCH
34
35#include <sys/param.h>
36#include <sys/queue.h>
37#include <sys/resourcevar.h>
38#include <sys/proc_internal.h>
39#include <sys/kauth.h>
40#include <sys/systm.h>
41#include <sys/timeb.h>
42#include <sys/times.h>
43#include <sys/time.h>
44#include <sys/acct.h>
45#include <sys/kernel.h>
46#include <sys/wait.h>
47#include <sys/signalvar.h>
48#include <sys/syslog.h>
49#include <sys/stat.h>
50#include <sys/lock.h>
51#include <sys/kdebug.h>
52#include <sys/sysproto.h>
53#include <sys/pthread_internal.h>
54#include <sys/vm.h>
55#include <sys/user.h>
56
57#include <mach/mach_types.h>
58#include <mach/vm_prot.h>
59#include <mach/semaphore.h>
60#include <mach/sync_policy.h>
61#include <mach/task.h>
62#include <kern/kern_types.h>
63#include <kern/task.h>
64#include <kern/clock.h>
65#include <mach/kern_return.h>
66#include <kern/thread.h>
67#include <kern/sched_prim.h>
68#include <kern/thread_call.h>
69#include <kern/kalloc.h>
70#include <kern/zalloc.h>
71#include <kern/sched_prim.h>
72#include <kern/processor.h>
73#include <kern/affinity.h>
74#include <kern/wait_queue.h>
75#include <kern/mach_param.h>
76#include <mach/mach_vm.h>
77#include <mach/mach_param.h>
78#include <mach/thread_policy.h>
79#include <mach/message.h>
80#include <mach/port.h>
81#include <vm/vm_protos.h>
82#include <vm/vm_map.h>
83#include <mach/vm_region.h>
84
85#include <libkern/OSAtomic.h>
86
87#include <pexpert/pexpert.h>
88
89#define __PSYNCH_DEBUG__ 0			/* debug panic actions  */
90#if (KDEBUG && STANDARD_KDEBUG)
91#define _PSYNCH_TRACE_ 1		/* kdebug trace */
92#endif
93
94#define __TESTMODE__ 2		/* 0 - return error on user error conditions */
95				/* 1 - log error on user error conditions */
96				/* 2 - abort caller on user error conditions */
97				/* 3 - panic on user error conditions */
98static int __test_panics__;
99static int __test_aborts__;
100static int __test_prints__;
101
102static inline void __FAILEDUSERTEST__(const char *str)
103{
104	proc_t p;
105
106	if (__test_panics__ != 0)
107		panic(str);
108
109	if (__test_aborts__ != 0 || __test_prints__ != 0)
110		p = current_proc();
111
112	if (__test_prints__ != 0)
113		printf("PSYNCH: pid[%d]: %s\n", p->p_pid, str);
114
115	if (__test_aborts__ != 0)
116		psignal(p, SIGABRT);
117}
118
119#if _PSYNCH_TRACE_
120#define _PSYNCH_TRACE_MLWAIT	0x9000000
121#define _PSYNCH_TRACE_MLDROP	0x9000004
122#define _PSYNCH_TRACE_CVWAIT	0x9000008
123#define _PSYNCH_TRACE_CVSIGNAL	0x900000c
124#define _PSYNCH_TRACE_CVBROAD	0x9000010
125#define _PSYNCH_TRACE_KMDROP	0x9000014
126#define _PSYNCH_TRACE_RWRDLOCK	0x9000018
127#define _PSYNCH_TRACE_RWLRDLOCK	0x900001c
128#define _PSYNCH_TRACE_RWWRLOCK	0x9000020
129#define _PSYNCH_TRACE_RWYWRLOCK	0x9000024
130#define _PSYNCH_TRACE_RWUPGRADE	0x9000028
131#define _PSYNCH_TRACE_RWDOWNGRADE	0x900002c
132#define _PSYNCH_TRACE_RWUNLOCK	0x9000030
133#define _PSYNCH_TRACE_RWUNLOCK2	0x9000034
134#define _PSYNCH_TRACE_RWHANDLEU	0x9000038
135#define _PSYNCH_TRACE_FSEQTILL	0x9000040
136#define _PSYNCH_TRACE_CLRPRE	0x9000044
137#define _PSYNCH_TRACE_CVHBROAD	0x9000048
138#define _PSYNCH_TRACE_CVSEQ	0x900004c
139#define _PSYNCH_TRACE_THWAKEUP	0x9000050
140/* user side */
141#define _PSYNCH_TRACE_UM_LOCK	0x9000060
142#define _PSYNCH_TRACE_UM_UNLOCK	0x9000064
143#define _PSYNCH_TRACE_UM_MHOLD	0x9000068
144#define _PSYNCH_TRACE_UM_MDROP	0x900006c
145#define _PSYNCH_TRACE_UM_CVWAIT	0x9000070
146#define _PSYNCH_TRACE_UM_CVSIG	0x9000074
147#define _PSYNCH_TRACE_UM_CVBRD	0x9000078
148
149proc_t pthread_debug_proc = PROC_NULL;
150static inline void __PTHREAD_TRACE_DEBUG(uint32_t debugid, uintptr_t arg1,
151                uintptr_t arg2,
152                uintptr_t arg3,
153                uintptr_t arg4,
154                uintptr_t arg5)
155{
156	proc_t p = current_proc();
157
158	if ((pthread_debug_proc != NULL) && (p == pthread_debug_proc))
159		KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, arg5);
160}
161
162#endif /* _PSYNCH_TRACE_ */
163
164#define ECVCERORR       256
165#define ECVPERORR       512
166
167lck_mtx_t * pthread_list_mlock;
168
169#define PTHHASH(addr)    (&pthashtbl[(addr) & pthhash])
170extern LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
171struct pthhashhead * pth_glob_hashtbl;
172u_long pthhash;
173
174LIST_HEAD(, ksyn_wait_queue) pth_free_list;
175int num_total_kwq = 0;  /* number of kwq in use currently */
176int num_infreekwq = 0;	/* number of kwq in free list */
177int num_freekwq = 0;	/* number of kwq actually  freed from the free the list */
178int num_reusekwq = 0;	/* number of kwq pulled back for reuse from free list */
179int num_addedfreekwq = 0; /* number of added free kwq from the last instance */
180int num_lastfreekwqcount = 0;	/* the free count from the last time */
181
182static int PTH_HASHSIZE = 100;
183
184static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
185static zone_t kwe_zone;	/* zone for allocation of ksyn_waitq_element */
186
187#define SEQFIT 0
188#define FIRSTFIT 1
189
190struct ksyn_queue {
191	TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
192	uint32_t	ksynq_count;		/* number of entries in queue */
193	uint32_t	ksynq_firstnum;		/* lowest seq in queue */
194	uint32_t	ksynq_lastnum;		/* highest seq in queue */
195};
196typedef struct ksyn_queue * ksyn_queue_t;
197
198#define KSYN_QUEUE_READ		0
199#define KSYN_QUEUE_LREAD	1
200#define KSYN_QUEUE_WRITER	2
201#define KSYN_QUEUE_YWRITER	3
202#define KSYN_QUEUE_UPGRADE	4
203#define KSYN_QUEUE_MAX		5
204
205struct ksyn_wait_queue {
206	LIST_ENTRY(ksyn_wait_queue) kw_hash;
207	LIST_ENTRY(ksyn_wait_queue) kw_list;
208	user_addr_t kw_addr;
209	uint64_t  kw_owner;
210	uint64_t kw_object;		/* object backing in shared mode */
211	uint64_t kw_offset;		/* offset inside the object in shared mode */
212	int     kw_flags;		/* mutex, cvar options/flags */
213	int 	kw_pflags;		/* flags under listlock protection */
214	struct timeval kw_ts;		/* timeval need for upkeep before free */
215	int	kw_iocount;		/* inuse reference */
216	int 	kw_dropcount;		/* current users unlocking... */
217
218	int	kw_type;		/* queue type like mutex, cvar, etc */
219	uint32_t kw_inqueue;		/* num of waiters held */
220	uint32_t kw_fakecount;		/* number of error/prepost fakes */
221	uint32_t kw_highseq;		/* highest seq in the queue */
222	uint32_t kw_lowseq;		/* lowest seq in the queue */
223	uint32_t kw_lword;		/* L value from userland */
224	uint32_t kw_uword;		/* U world value from userland */
225	uint32_t kw_sword;		/* S word value from userland */
226	uint32_t kw_lastunlockseq;	/* the last seq that unlocked */
227/* for CV to be used as the seq kernel has seen so far */
228#define kw_cvkernelseq kw_lastunlockseq
229	uint32_t kw_lastseqword;		/* the last seq that unlocked */
230/* for mutex and cvar we need to track I bit values */
231	uint32_t kw_nextseqword;	/* the last seq that unlocked; with num of waiters */
232#define kw_initrecv kw_nextseqword	/* number of incoming waiters with Ibit seen sofar */
233	uint32_t kw_overlapwatch;	/* chance for overlaps  */
234#define kw_initcount kw_overlapwatch	/* number of incoming waiters with Ibit expected */
235	uint32_t kw_initcountseq;	/* highest seq with Ibit on for mutex and cvar*/
236	uint32_t kw_pre_rwwc;		/* prepost count */
237	uint32_t kw_pre_lockseq;	/* prepost target seq */
238	uint32_t kw_pre_sseq;		/* prepost target sword, in cvar used for mutexowned  */
239	uint32_t kw_pre_intrcount;	/*  prepost of missed wakeup due to intrs */
240	uint32_t kw_pre_intrseq;	/*  prepost of missed wakeup limit seq */
241	uint32_t kw_pre_intrretbits;	/*  return bits value for missed wakeup threads */
242	uint32_t kw_pre_intrtype;	/*  type of failed wakueps*/
243
244	int 	kw_kflags;
245	struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];	/* queues to hold threads */
246	lck_mtx_t kw_lock;		/* mutex lock protecting this structure */
247};
248typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
249
250#define PTHRW_INC			0x100
251#define PTHRW_BIT_MASK		0x000000ff
252
253#define PTHRW_COUNT_SHIFT	8
254#define PTHRW_COUNT_MASK	0xffffff00
255#define PTHRW_MAX_READERS	0xffffff00
256
257/* New model bits on Lword */
258#define PTH_RWL_KBIT	0x01	/* users cannot acquire in user mode */
259#define PTH_RWL_EBIT	0x02	/* exclusive lock in progress */
260#define PTH_RWL_WBIT	0x04	/* write waiters pending in kernel */
261#define PTH_RWL_PBIT    0x04    /* prepost (cv) pending in kernel */
262#define PTH_RWL_YBIT	0x08	/* yielding write waiters pending in kernel */
263#define PTH_RWL_RETRYBIT 0x08	/* mutex retry wait */
264#define PTH_RWL_LBIT	0x10	/* long read in progress */
265#define PTH_RWL_MTXNONE 0x10    /* indicates the cvwait does not have mutex held */
266#define PTH_RWL_UBIT	0x20	/* upgrade request pending */
267#define PTH_RWL_MTX_WAIT 0x20	/* in cvar in mutex wait */
268#define PTH_RWL_RBIT	0x40	/* reader pending in kernel(not used) */
269#define PTH_RWL_MBIT	0x40	/* overlapping grants from kernel */
270#define PTH_RWL_TRYLKBIT 0x40	/* trylock attempt (mutex only) */
271#define PTH_RWL_IBIT	0x80	/* lcok reset, held untill first succeesful unlock */
272
273
274/* UBIT values for mutex, cvar */
275#define PTH_RWU_SBIT    0x01
276#define PTH_RWU_BBIT    0x02
277
278#define PTHRW_RWL_INIT       PTH_RWL_IBIT    /* reset state on the lock bits (U)*/
279
280/* New model bits on Sword */
281#define PTH_RWS_SBIT	0x01	/* kernel transition seq not set yet*/
282#define PTH_RWS_IBIT	0x02	/* Sequence is not set on return from kernel */
283#define PTH_RWS_CV_CBIT PTH_RWS_SBIT    /* kernel has cleared all info w.r.s.t CV */
284#define PTH_RWS_CV_PBIT PTH_RWS_IBIT    /* kernel has prepost/fake structs only,no waiters */
285#define PTH_RWS_CV_MBIT PTH_RWL_MBIT	/* to indicate prepost return */
286#define PTH_RWS_WSVBIT  0x04    /* save W bit */
287#define PTH_RWS_USVBIT  0x08    /* save U bit */
288#define PTH_RWS_YSVBIT  0x10    /* save Y bit */
289#define PTHRW_RWS_INIT       PTH_RWS_SBIT    /* reset on the lock bits (U)*/
290#define PTHRW_RWS_SAVEMASK (PTH_RWS_WSVBIT|PTH_RWS_USVBIT|PTH_RWS_YSVBIT)    /*save bits mask*/
291#define PTHRW_SW_Reset_BIT_MASK 0x000000fe      /* remove S bit and get rest of the bits */
292
293#define PTHRW_RWS_INIT       PTH_RWS_SBIT    /* reset on the lock bits (U)*/
294
295
296#define PTHRW_UN_BIT_MASK 0x000000bf	/* remove overlap  bit */
297
298
299#define PTHREAD_MTX_TID_SWITCHING (uint64_t)-1
300
301/* new L word defns */
302#define is_rwl_readinuser(x) ((((x) & (PTH_RWL_UBIT | PTH_RWL_KBIT)) == 0)||(((x) & PTH_RWL_LBIT) != 0))
303#define is_rwl_ebit_set(x) (((x) & PTH_RWL_EBIT) != 0)
304#define is_rwl_lbit_set(x) (((x) & PTH_RWL_LBIT) != 0)
305#define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0)
306#define is_rw_ubit_set(x) (((x) & PTH_RWL_UBIT) != 0)
307
308/* S word checks */
309#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT))
310#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT))
311
312/* first contended seq that kernel sees */
313#define KW_MTXFIRST_KSEQ	0x200
314#define KW_CVFIRST_KSEQ		1
315#define KW_RWFIRST_KSEQ		0x200
316
317int is_seqlower(uint32_t x, uint32_t y);
318int is_seqlower_eq(uint32_t x, uint32_t y);
319int is_seqhigher(uint32_t x, uint32_t y);
320int is_seqhigher_eq(uint32_t x, uint32_t y);
321int find_diff(uint32_t upto, uint32_t lowest);
322
323
324static inline  int diff_genseq(uint32_t x, uint32_t y) {
325	if (x > y)  {
326		return(x-y);
327	} else {
328		return((PTHRW_MAX_READERS - y) + x + PTHRW_INC);
329	}
330}
331
332#define TID_ZERO (uint64_t)0
333
334/* bits needed in handling the rwlock unlock */
335#define PTH_RW_TYPE_READ	0x01
336#define PTH_RW_TYPE_LREAD	0x02
337#define PTH_RW_TYPE_WRITE	0x04
338#define PTH_RW_TYPE_YWRITE	0x08
339#define PTH_RW_TYPE_UPGRADE	0x10
340#define PTH_RW_TYPE_MASK	0xff
341#define PTH_RW_TYPE_SHIFT  	8
342
343#define PTH_RWSHFT_TYPE_READ	0x0100
344#define PTH_RWSHFT_TYPE_LREAD	0x0200
345#define PTH_RWSHFT_TYPE_WRITE	0x0400
346#define PTH_RWSHFT_TYPE_YWRITE	0x0800
347#define PTH_RWSHFT_TYPE_MASK	0xff00
348
349/*
350 * Mutex protocol attributes
351 */
352#define PTHREAD_PRIO_NONE            0
353#define PTHREAD_PRIO_INHERIT         1
354#define PTHREAD_PRIO_PROTECT         2
355#define PTHREAD_PROTOCOL_FLAGS_MASK  0x3
356
357/*
358 * Mutex type attributes
359 */
360#define PTHREAD_MUTEX_NORMAL            0
361#define PTHREAD_MUTEX_ERRORCHECK        4
362#define PTHREAD_MUTEX_RECURSIVE         8
363#define PTHREAD_MUTEX_DEFAULT           PTHREAD_MUTEX_NORMAL
364#define PTHREAD_TYPE_FLAGS_MASK		0xc
365
366/*
367 * Mutex pshared attributes
368 */
369#define PTHREAD_PROCESS_SHARED         0x10
370#define PTHREAD_PROCESS_PRIVATE        0x20
371#define PTHREAD_PSHARED_FLAGS_MASK	0x30
372
373/*
374 * Mutex policy attributes
375 */
376#define _PTHREAD_MUTEX_POLICY_NONE              0
377#define _PTHREAD_MUTEX_POLICY_FAIRSHARE         0x040	/* 1 */
378#define _PTHREAD_MUTEX_POLICY_FIRSTFIT          0x080	/* 2 */
379#define _PTHREAD_MUTEX_POLICY_REALTIME          0x0c0	/* 3 */
380#define _PTHREAD_MUTEX_POLICY_ADAPTIVE          0x100	/* 4 */
381#define _PTHREAD_MUTEX_POLICY_PRIPROTECT        0x140	/* 5 */
382#define _PTHREAD_MUTEX_POLICY_PRIINHERIT        0x180	/* 6 */
383#define PTHREAD_POLICY_FLAGS_MASK	0x1c0
384
385#define _PTHREAD_MTX_OPT_HOLDLOCK 	0x200
386#define _PTHREAD_MTX_OPT_NOMTX 		0x400
387
388#define _PTHREAD_MTX_OPT_NOTIFY 	0x1000
389#define _PTHREAD_MTX_OPT_MUTEX		0x2000	/* this is a mutex type  */
390
391#define _PTHREAD_RWLOCK_UPGRADE_TRY 0x10000
392
393/* pflags */
394#define KSYN_WQ_INLIST	1
395#define KSYN_WQ_INHASH	2
396#define KSYN_WQ_SHARED	4
397#define KSYN_WQ_WAITING 8	/* threads waiting for this wq to be available */
398#define KSYN_WQ_FLIST 	0X10	/* in free list to be freed after a short delay */
399
400/* kflags */
401#define KSYN_KWF_INITCLEARED	1	/* the init status found and preposts cleared */
402#define KSYN_KWF_ZEROEDOUT	2	/* the lword, etc are inited to 0 */
403
404#define KSYN_CLEANUP_DEADLINE 10
405int psynch_cleanupset;
406thread_call_t psynch_thcall;
407
408#define KSYN_WQTYPE_INWAIT	0x1000
409#define KSYN_WQTYPE_INDROP	0x2000
410#define KSYN_WQTYPE_MTX		0x1
411#define KSYN_WQTYPE_CVAR	0x2
412#define KSYN_WQTYPE_RWLOCK	0x4
413#define KSYN_WQTYPE_SEMA	0x8
414#define KSYN_WQTYPE_BARR	0x10
415#define KSYN_WQTYPE_MASK        0x00ff
416
417#define KSYN_MTX_MAX 0x0fffffff
418#define KSYN_WQTYPE_MUTEXDROP	(KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
419
420#define KW_UNLOCK_PREPOST 		0x01
421#define KW_UNLOCK_PREPOST_UPGRADE 	0x02
422#define KW_UNLOCK_PREPOST_DOWNGRADE 	0x04
423#define KW_UNLOCK_PREPOST_READLOCK 	0x08
424#define KW_UNLOCK_PREPOST_LREADLOCK 	0x10
425#define KW_UNLOCK_PREPOST_WRLOCK 	0x20
426#define KW_UNLOCK_PREPOST_YWRLOCK 	0x40
427
428#define CLEAR_PREPOST_BITS(kwq)  {\
429			kwq->kw_pre_lockseq = 0; \
430			kwq->kw_pre_sseq = PTHRW_RWS_INIT; \
431			kwq->kw_pre_rwwc = 0; \
432			}
433
434#define CLEAR_INITCOUNT_BITS(kwq)  {\
435			kwq->kw_initcount = 0; \
436			kwq->kw_initrecv = 0; \
437			kwq->kw_initcountseq = 0; \
438			}
439
440#define CLEAR_INTR_PREPOST_BITS(kwq)  {\
441			kwq->kw_pre_intrcount = 0; \
442			kwq->kw_pre_intrseq = 0; \
443			kwq->kw_pre_intrretbits = 0; \
444			kwq->kw_pre_intrtype = 0; \
445			}
446
447#define CLEAR_REINIT_BITS(kwq)  {\
448			if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { \
449				if((kwq->kw_inqueue != 0) && (kwq->kw_inqueue != kwq->kw_fakecount)) \
450					panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);	\
451			};\
452			if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { \
453				kwq->kw_nextseqword = PTHRW_RWS_INIT; \
454				kwq->kw_overlapwatch = 0; \
455			}; \
456			kwq->kw_pre_lockseq = 0; \
457			kwq->kw_pre_rwwc = 0; \
458			kwq->kw_pre_sseq = PTHRW_RWS_INIT; \
459			kwq->kw_lastunlockseq = PTHRW_RWL_INIT; \
460			kwq->kw_lastseqword = PTHRW_RWS_INIT; \
461			kwq->kw_pre_intrcount = 0; \
462			kwq->kw_pre_intrseq = 0; \
463			kwq->kw_pre_intrretbits = 0; \
464			kwq->kw_pre_intrtype = 0; \
465			kwq->kw_lword = 0;	\
466			kwq->kw_uword = 0;	\
467			kwq->kw_sword = PTHRW_RWS_INIT;	\
468			}
469
470void pthread_list_lock(void);
471void pthread_list_unlock(void);
472void pthread_list_lock_spin(void);
473void pthread_list_lock_convert_spin(void);
474void ksyn_wqlock(ksyn_wait_queue_t kwq);
475void ksyn_wqunlock(ksyn_wait_queue_t kwq);
476ksyn_wait_queue_t ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t offset);
477int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype , ksyn_wait_queue_t * wq);
478void ksyn_wqrelease(ksyn_wait_queue_t mkwq, ksyn_wait_queue_t ckwq, int qfreenow, int wqtype);
479extern int ksyn_findobj(uint64_t mutex, uint64_t * object, uint64_t * offset);
480static void UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype);
481extern thread_t port_name_to_thread(mach_port_name_t port_name);
482
483kern_return_t ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log, thread_continue_t, void * parameter);
484kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe);
485void ksyn_freeallkwe(ksyn_queue_t kq);
486
487uint32_t psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags);
488int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t * updatep, int flags, int *blockp, uint32_t premgen);
489
490void ksyn_queue_init(ksyn_queue_t kq);
491int ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, ksyn_waitq_element_t kwe, int firstfit);
492ksyn_waitq_element_t ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq);
493void ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
494int ksyn_queue_move_tofree(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t upto, ksyn_queue_t freeq, int all, int reease);
495void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
496uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
497uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
498
499int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t  nwaiters, uint32_t *countp);
500uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
501
502ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
503uint32_t ksyn_queue_cvcount_entries(ksyn_queue_t kq, uint32_t upto, uint32_t from, int * numwaitersp, int * numintrp, int * numprepop);
504void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
505void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release);
506ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
507ksyn_waitq_element_t ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, thread_t th, uint32_t toseq);
508void psynch_cvcontinue(void *, wait_result_t);
509void psynch_mtxcontinue(void *, wait_result_t);
510
511int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp);
512int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * type, uint32_t lowest[]);
513ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq, int remove);
514int kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, uint32_t *updatebitsp, int flags , int * blockp);
515int kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, int flags, uint32_t premgen, int * blockp);
516
517static void
518UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, __unused uint64_t tid, __unused int wqtype)
519{
520	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
521		if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
522			/* the values of L,U and S are cleared out due to L==S in previous transition */
523			kwq->kw_lword = mgen;
524			kwq->kw_uword = ugen;
525			kwq->kw_sword = rw_wc;
526			kwq->kw_kflags &=  ~KSYN_KWF_ZEROEDOUT;
527		}
528		if (is_seqhigher((mgen & PTHRW_COUNT_MASK), (kwq->kw_lword & PTHRW_COUNT_MASK)) != 0)
529			kwq->kw_lword = mgen;
530		if (is_seqhigher((ugen & PTHRW_COUNT_MASK), (kwq->kw_uword & PTHRW_COUNT_MASK)) != 0)
531			kwq->kw_uword = ugen;
532		if ((rw_wc & PTH_RWS_CV_CBIT) != 0) {
533			if(is_seqlower(kwq->kw_cvkernelseq, (rw_wc & PTHRW_COUNT_MASK)) != 0) {
534				kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
535			}
536			if (is_seqhigher((rw_wc & PTHRW_COUNT_MASK), (kwq->kw_sword & PTHRW_COUNT_MASK)) != 0)
537				kwq->kw_sword = rw_wc;
538		}
539	}
540}
541
542
543/* to protect the hashes, iocounts, freelist */
544void
545pthread_list_lock(void)
546{
547	lck_mtx_lock(pthread_list_mlock);
548}
549
550void
551pthread_list_lock_spin(void)
552{
553	lck_mtx_lock_spin(pthread_list_mlock);
554}
555
556void
557pthread_list_lock_convert_spin(void)
558{
559	lck_mtx_convert_spin(pthread_list_mlock);
560}
561
562
563void
564pthread_list_unlock(void)
565{
566	lck_mtx_unlock(pthread_list_mlock);
567}
568
569/* to protect the indiv queue */
570void
571ksyn_wqlock(ksyn_wait_queue_t kwq)
572{
573
574	lck_mtx_lock(&kwq->kw_lock);
575}
576
577void
578ksyn_wqunlock(ksyn_wait_queue_t kwq)
579{
580	lck_mtx_unlock(&kwq->kw_lock);
581}
582
583
584/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
585uint32_t
586psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags)
587{
588	uint32_t nextgen, low_writer, updatebits, returnbits = 0;
589	int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT;
590	ksyn_waitq_element_t kwe = NULL;
591	kern_return_t kret = KERN_SUCCESS;
592
593	nextgen = (ugen + PTHRW_INC);
594
595#if _PSYNCH_TRACE_
596	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_START, (uint32_t)kwq->kw_addr, lkseq, ugen, flags, 0);
597#endif /* _PSYNCH_TRACE_ */
598
599	ksyn_wqlock(kwq);
600
601redrive:
602
603	if (kwq->kw_inqueue != 0) {
604		updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
605		kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
606		if (firstfit != 0)
607		{
608			/* first fit , pick any one */
609			kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
610			kwe->kwe_psynchretval = updatebits;
611			kwe->kwe_kwqqueue = NULL;
612
613#if _PSYNCH_TRACE_
614			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf1, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
615#endif /* _PSYNCH_TRACE_ */
616
617			kret = ksyn_wakeup_thread(kwq, kwe);
618#if __TESTPANICS__
619			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
620				panic("psynch_mutexdrop_internal: panic unable to wakeup firstfit mutex thread\n");
621#endif /* __TESTPANICS__ */
622			if (kret == KERN_NOT_WAITING)
623				goto redrive;
624		} else {
625			/* handle fairshare */
626			low_writer = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
627			low_writer &= PTHRW_COUNT_MASK;
628
629			if (low_writer == nextgen) {
630				/* next seq to be granted found */
631				kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
632
633				/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
634				kwe->kwe_psynchretval = updatebits | PTH_RWL_MTX_WAIT;
635				kwe->kwe_kwqqueue = NULL;
636
637#if _PSYNCH_TRACE_
638				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
639#endif /* _PSYNCH_TRACE_ */
640
641				kret = ksyn_wakeup_thread(kwq, kwe);
642#if __TESTPANICS__
643				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
644					panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n");
645#endif /* __TESTPANICS__ */
646				if (kret == KERN_NOT_WAITING) {
647					/* interrupt post */
648					kwq->kw_pre_intrcount = 1;
649					kwq->kw_pre_intrseq = nextgen;
650					kwq->kw_pre_intrretbits = updatebits;
651					kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
652#if _PSYNCH_TRACE_
653					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfafafaf1, nextgen, kwq->kw_pre_intrretbits, 0);
654#endif /* _PSYNCH_TRACE_ */
655				}
656
657			} else if (is_seqhigher(low_writer, nextgen) != 0) {
658				kwq->kw_pre_rwwc++;
659
660				if (kwq->kw_pre_rwwc > 1) {
661					__FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (1)\n");
662					goto out;
663				}
664
665				kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
666#if _PSYNCH_TRACE_
667				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
668#endif /* _PSYNCH_TRACE_ */
669			} else {
670
671				//__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
672
673				kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (nextgen & PTHRW_COUNT_MASK), 1);
674				if (kwe != NULL) {
675					/* next seq to be granted found */
676					/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
677					kwe->kwe_psynchretval = updatebits | PTH_RWL_MTX_WAIT;
678					kwe->kwe_kwqqueue = NULL;
679#if _PSYNCH_TRACE_
680					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf3, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
681#endif /* _PSYNCH_TRACE_ */
682					kret = ksyn_wakeup_thread(kwq, kwe);
683#if __TESTPANICS__
684					if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
685						panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n");
686#endif /* __TESTPANICS__ */
687					if (kret == KERN_NOT_WAITING)
688						goto redrive;
689				} else {
690					/* next seq to be granted not found, prepost */
691					kwq->kw_pre_rwwc++;
692
693					if (kwq->kw_pre_rwwc > 1) {
694						__FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (2)\n");
695						goto out;
696					}
697
698					kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
699#if _PSYNCH_TRACE_
700					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
701#endif /* _PSYNCH_TRACE_ */
702				}
703			}
704		}
705	} else {
706
707		/* if firstfit the last one could be spurious */
708		if (firstfit == 0) {
709			kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
710			kwq->kw_pre_rwwc++;
711
712			if (kwq->kw_pre_rwwc > 1) {
713				__FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (3)\n");
714				goto out;
715			}
716
717			kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
718#if _PSYNCH_TRACE_
719			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
720#endif /* _PSYNCH_TRACE_ */
721		} else {
722			/* first fit case */
723#if _PSYNCH_TRACE_
724			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_lastunlockseq, kwq->kw_pre_lockseq, 0);
725#endif /* _PSYNCH_TRACE_ */
726			kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
727			/* not set or the new lkseq is higher */
728			if ((kwq->kw_pre_rwwc == 0) || (is_seqlower(kwq->kw_pre_lockseq, lkseq) == 0))
729				kwq->kw_pre_lockseq = (lkseq & PTHRW_COUNT_MASK);
730			kwq->kw_pre_rwwc = 1;
731#if _PSYNCH_TRACE_
732			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
733#endif /* _PSYNCH_TRACE_ */
734
735			/* indicate prepost content in kernel */
736			returnbits = lkseq | PTH_RWL_PBIT;
737		}
738	}
739
740out:
741	ksyn_wqunlock(kwq);
742
743#if _PSYNCH_TRACE_
744	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, 0, 0, 0);
745#endif /* _PSYNCH_TRACE_ */
746	ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX));
747	return(returnbits);
748}
749
750/*
751 *  psynch_mutexwait: This system call is used for contended psynch mutexes to block.
752 */
753
754int
755psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t * retval)
756{
757	user_addr_t mutex  = uap->mutex;
758	uint32_t mgen = uap->mgen;
759	uint32_t ugen = uap->ugen;
760	uint64_t tid = uap->tid;
761	int flags = uap->flags;
762	ksyn_wait_queue_t kwq;
763	int error=0;
764	int ins_flags, retry;
765	uthread_t uth;
766	int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT;
767	uint32_t lockseq, updatebits=0;
768	ksyn_waitq_element_t kwe;
769	kern_return_t kret;
770
771#if _PSYNCH_TRACE_
772	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0);
773	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, mgen, ugen, (uint32_t)tid, 0);
774#endif /* _PSYNCH_TRACE_ */
775
776	uth = current_uthread();
777
778	kwe = &uth->uu_kwe;
779	kwe->kwe_lockseq = uap->mgen;
780	kwe->kwe_uth = uth;
781	kwe->kwe_psynchretval = 0;
782	kwe->kwe_kwqqueue = NULL;
783	lockseq = (uap->mgen & PTHRW_COUNT_MASK);
784
785	if (firstfit  == 0) {
786		ins_flags = SEQFIT;
787	} else  {
788		/* first fit */
789		ins_flags = FIRSTFIT;
790	}
791
792	error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
793	if (error != 0) {
794#if _PSYNCH_TRACE_
795		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 1, 0xdeadbeef, error, 0);
796#endif /* _PSYNCH_TRACE_ */
797		return(error);
798	}
799
800	ksyn_wqlock(kwq);
801
802
803	if ((mgen & PTH_RWL_RETRYBIT) != 0) {
804		retry = 1;
805		mgen &= ~PTH_RWL_RETRYBIT;
806	}
807
808        /* handle first the missed wakeups */
809        if ((kwq->kw_pre_intrcount != 0) &&
810                ((kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE)) &&
811                (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
812                kwq->kw_pre_intrcount--;
813                kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
814                if (kwq->kw_pre_intrcount==0)
815                        CLEAR_INTR_PREPOST_BITS(kwq);
816                ksyn_wqunlock(kwq);
817				*retval = kwe->kwe_psynchretval;
818#if _PSYNCH_TRACE_
819			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 0xfafafaf1, kwe->kwe_psynchretval, kwq->kw_pre_intrcount, 0);
820#endif /* _PSYNCH_TRACE_ */
821                goto out;
822        }
823
824	if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
825		/* got preposted lock */
826		kwq->kw_pre_rwwc--;
827		if (kwq->kw_pre_rwwc == 0) {
828			CLEAR_PREPOST_BITS(kwq);
829			kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
830			if (kwq->kw_inqueue == 0) {
831				updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
832			} else {
833				updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
834			}
835			updatebits &= ~PTH_RWL_MTX_WAIT;
836
837			kwe->kwe_psynchretval = updatebits;
838
839			if (updatebits == 0) {
840				__FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq  in mutexwait with no EBIT \n");
841			}
842			ksyn_wqunlock(kwq);
843			*retval = updatebits;
844#if _PSYNCH_TRACE_
845			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
846#endif /* _PSYNCH_TRACE_ */
847			goto out;
848		} else {
849			__FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
850			kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
851			ksyn_wqunlock(kwq);
852			error = EINVAL;
853			goto out;
854		}
855	}
856
857#if _PSYNCH_TRACE_
858	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfeedfeed, mgen, ins_flags, 0);
859#endif /* _PSYNCH_TRACE_ */
860
861	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], mgen, uth, kwe, ins_flags);
862	if (error != 0) {
863		ksyn_wqunlock(kwq);
864#if _PSYNCH_TRACE_
865		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 2, 0xdeadbeef, error, 0);
866#endif /* _PSYNCH_TRACE_ */
867		goto out;
868	}
869
870	kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, psynch_mtxcontinue, (void *)kwq);
871
872	psynch_mtxcontinue((void *)kwq, kret);
873
874	/* not expected to return from unix_syscall_return */
875	panic("psynch_mtxcontinue returned from unix_syscall_return");
876
877out:
878	ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
879#if _PSYNCH_TRACE_
880	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0);
881#endif /* _PSYNCH_TRACE_ */
882
883	return(error);
884}
885
886void
887psynch_mtxcontinue(void * parameter, wait_result_t result)
888{
889	int error = 0;
890	uint32_t updatebits = 0;
891	uthread_t uth = current_uthread();
892	ksyn_wait_queue_t kwq = (ksyn_wait_queue_t)parameter;
893	ksyn_waitq_element_t kwe;
894
895	kwe = &uth->uu_kwe;
896
897	switch (result) {
898		case THREAD_TIMED_OUT:
899			error  = ETIMEDOUT;
900			break;
901		case THREAD_INTERRUPTED:
902			error  = EINTR;
903			break;
904		default:
905			error = 0;
906			break;
907	}
908
909	if (error != 0) {
910		ksyn_wqlock(kwq);
911
912#if _PSYNCH_TRACE_
913		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, 0xdeadbeef, error, 0);
914#endif /* _PSYNCH_TRACE_ */
915		if (kwe->kwe_kwqqueue != NULL)
916			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
917		ksyn_wqunlock(kwq);
918	} else {
919		updatebits = kwe->kwe_psynchretval;
920		updatebits &= ~PTH_RWL_MTX_WAIT;
921		uth->uu_rval[0] = updatebits;
922
923		if (updatebits == 0)
924			__FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq  in mutexwait with no EBIT \n");
925	}
926	ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
927#if _PSYNCH_TRACE_
928	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, updatebits, error, 0);
929#endif /* _PSYNCH_TRACE_ */
930
931	unix_syscall_return(error);
932}
933
934/*
935 *  psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
936  */
937int
938psynch_mutexdrop(__unused proc_t p, struct psynch_mutexdrop_args * uap, uint32_t * retval)
939{
940	user_addr_t mutex  = uap->mutex;
941	uint32_t mgen = uap->mgen;
942	uint32_t ugen = uap->ugen;
943	uint64_t tid = uap->tid;
944	int flags = uap->flags;
945	ksyn_wait_queue_t kwq;
946	uint32_t updateval;
947	int error=0;
948
949	error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq);
950	if (error != 0) {
951		return(error);
952	}
953
954	updateval = psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
955	/* drops the kwq reference */
956
957	*retval = updateval;
958	return(0);
959
960}
961
962/*
963 *  psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
964 */
965int
966psynch_cvbroad(__unused proc_t p, struct psynch_cvbroad_args * uap, uint32_t * retval)
967{
968	user_addr_t cond  = uap->cv;
969	uint64_t cvlsgen = uap->cvlsgen;
970	uint64_t cvudgen = uap->cvudgen;
971	uint32_t cgen, cugen, csgen, diffgen;
972	uint32_t uptoseq, fromseq;
973	int flags = uap->flags;
974	ksyn_wait_queue_t ckwq;
975	int error=0;
976	uint32_t updatebits = 0;
977	uint32_t count;
978	struct ksyn_queue  kfreeq;
979
980	csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff);
981	cgen = ((uint32_t)(cvlsgen & 0xffffffff));
982	cugen = (uint32_t)((cvudgen >> 32) & 0xffffffff);
983	diffgen = ((uint32_t)(cvudgen & 0xffffffff));
984	count = (diffgen >> PTHRW_COUNT_SHIFT);
985
986#if _PSYNCH_TRACE_
987	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0);
988	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0xcbcbcbc1, diffgen,flags, 0);
989#endif /* _PSYNCH_TRACE_ */
990
991	uptoseq = cgen & PTHRW_COUNT_MASK;
992	fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
993
994	if (is_seqhigher(fromseq, uptoseq) || is_seqhigher((csgen & PTHRW_COUNT_MASK), uptoseq)) {
995		__FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
996		return EINVAL;
997	}
998	if (count > (uint32_t)task_threadmax) {
999		__FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1000		return EBUSY;
1001	}
1002
1003	ckwq = NULL;
1004
1005	error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq);
1006	if (error != 0) {
1007#if _PSYNCH_TRACE_
1008		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
1009#endif /* _PSYNCH_TRACE_ */
1010		return(error);
1011	}
1012
1013	*retval = 0;
1014
1015	ksyn_wqlock(ckwq);
1016
1017	/* update L, U and S... */
1018	UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR);
1019
1020	/* broadcast wakeups/prepost handling */
1021	ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits);
1022
1023	/* set C or P bits and free if needed */
1024	ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1025	ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1);
1026	ksyn_wqunlock(ckwq);
1027
1028	*retval = updatebits;
1029
1030	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1031#if _PSYNCH_TRACE_
1032	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, error, 0);
1033#endif /* _PSYNCH_TRACE_ */
1034
1035	return(error);
1036}
1037
1038ksyn_waitq_element_t
1039ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, __unused ksyn_queue_t kq, thread_t th, uint32_t upto)
1040{
1041	uthread_t uth = get_bsdthread_info(th);
1042	ksyn_waitq_element_t kwe = &uth->uu_kwe;
1043
1044	if (kwe->kwe_kwqqueue != ckwq ||
1045	    is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), upto)) {
1046		/* the thread is not waiting in the cv (or wasn't when the wakeup happened) */
1047		return NULL;
1048	}
1049	return kwe;
1050}
1051
1052/*
1053 *  psynch_cvsignal: This system call is used for signalling the  blocked waiters of  psynch cvars.
1054 */
1055int
1056psynch_cvsignal(__unused proc_t p, struct psynch_cvsignal_args * uap, uint32_t * retval)
1057{
1058	user_addr_t cond  = uap->cv;
1059	uint64_t cvlsgen = uap->cvlsgen;
1060	uint32_t cgen, csgen, signalseq, uptoseq;
1061	uint32_t cugen = uap->cvugen;
1062	int threadport = uap->thread_port;
1063	int flags = uap->flags;
1064	ksyn_wait_queue_t ckwq = NULL;
1065	ksyn_waitq_element_t kwe, nkwe = NULL;
1066	ksyn_queue_t kq;
1067	int error=0;
1068	thread_t th = THREAD_NULL;
1069	uint32_t updatebits = 0;
1070	kern_return_t kret;
1071	struct ksyn_queue  kfreeq;
1072
1073
1074	csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff);
1075	cgen = ((uint32_t)(cvlsgen & 0xffffffff));
1076
1077#if _PSYNCH_TRACE_
1078	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, threadport, 0);
1079#endif /* _PSYNCH_TRACE_ */
1080
1081	uptoseq = cgen & PTHRW_COUNT_MASK;
1082	signalseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
1083
1084	/* validate sane L, U, and S values */
1085	if (((threadport == 0) && (is_seqhigher(signalseq, uptoseq))) || is_seqhigher((csgen & PTHRW_COUNT_MASK), uptoseq)) {
1086		__FAILEDUSERTEST__("psync_cvsignal; invalid sequence numbers\n");
1087		error = EINVAL;
1088		goto out;
1089	}
1090
1091	/* If we are looking for a specific thread, grab a reference for it */
1092	if (threadport != 0) {
1093		th = (thread_t)port_name_to_thread((mach_port_name_t)threadport);
1094		if (th == THREAD_NULL) {
1095			error = ESRCH;
1096			goto out;
1097		}
1098	}
1099
1100	error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq);
1101	if (error != 0)  {
1102#if _PSYNCH_TRACE_
1103		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
1104#endif /* _PSYNCH_TRACE_ */
1105		goto out;
1106	}
1107
1108	ksyn_wqlock(ckwq);
1109
1110	/* update L, U and S... */
1111	UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR);
1112
1113	kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1114
1115retry:
1116	/* Only bother if we aren't already balanced */
1117	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) != (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1118
1119		kwe = (th != NULL) ? ksyn_queue_find_threadseq(ckwq, kq, th, uptoseq) :
1120			ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
1121		if (kwe != NULL) {
1122			switch (kwe->kwe_flags) {
1123
1124			case KWE_THREAD_BROADCAST:
1125				/* broadcasts swallow our signal */
1126				break;
1127
1128			case KWE_THREAD_PREPOST:
1129				/* merge in with existing prepost at our same uptoseq */
1130				kwe->kwe_count += 1;
1131				break;
1132
1133			case KWE_THREAD_INWAIT:
1134				if (is_seqlower((kwe->kwe_lockseq & PTHRW_COUNT_MASK), signalseq)) {
1135					/*
1136					 * A valid thread in our range, but lower than our signal.
1137					 * Matching it may leave our match with nobody to wake it if/when
1138					 * it arrives (the signal originally meant for this thread might
1139					 * not successfully wake it).
1140					 *
1141					 * Convert to broadcast - may cause some spurious wakeups
1142					 * (allowed by spec), but avoids starvation (better choice).
1143					 */
1144#if _PSYNCH_TRACE_
1145					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xc1c1c1c1, uptoseq, 0, 0);
1146#endif /* _PSYNCH_TRACE_ */
1147					ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits);
1148				} else {
1149					ksyn_queue_removeitem(ckwq, kq, kwe);
1150					kwe->kwe_psynchretval = PTH_RWL_MTX_WAIT;
1151					kwe->kwe_kwqqueue = NULL;
1152#if _PSYNCH_TRACE_
1153					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
1154#endif /* _PSYNCH_TRACE_ */
1155					kret = ksyn_wakeup_thread(ckwq, kwe);
1156#if __TESTPANICS__
1157					if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
1158						panic("ksyn_wakeup_thread: panic waking up condition waiter\n");
1159#endif /* __TESTPANICS__ */
1160					updatebits += PTHRW_INC;
1161				}
1162
1163				ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1164				break;
1165
1166			default:
1167				panic("unknown kweflags\n");
1168				break;
1169			}
1170
1171		} else if (th != NULL) {
1172			/*
1173			 * Could not find the thread, post a broadcast,
1174			 * otherwise the waiter will be stuck. Use to send
1175			 * ESRCH here, did lead to rare hangs.
1176			 */
1177			ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits);
1178			ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1179		} else if (nkwe == NULL) {
1180			ksyn_wqunlock(ckwq);
1181			nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
1182			ksyn_wqlock(ckwq);
1183			goto retry;
1184
1185		} else {
1186			/* no eligible entries - add prepost */
1187			bzero(nkwe, sizeof(struct ksyn_waitq_element));
1188			nkwe->kwe_kwqqueue = ckwq;
1189			nkwe->kwe_flags = KWE_THREAD_PREPOST;
1190			nkwe->kwe_lockseq = uptoseq;
1191			nkwe->kwe_count = 1;
1192			nkwe->kwe_uth = NULL;
1193			nkwe->kwe_psynchretval = 0;
1194
1195#if _PSYNCH_TRACE_
1196			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfefe, uptoseq, 0, 0);
1197#endif /* _PSYNCH_TRACE_ */
1198
1199			(void)ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uptoseq, NULL, nkwe, SEQFIT);
1200			ckwq->kw_fakecount++;
1201			nkwe = NULL;
1202		}
1203
1204		/* set C or P bits and free if needed */
1205		ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1);
1206	}
1207
1208	ksyn_wqunlock(ckwq);
1209	if (nkwe != NULL)
1210		zfree(kwe_zone, nkwe);
1211
1212	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1213
1214out:
1215	if (th != NULL)
1216		thread_deallocate(th);
1217	if (error == 0)
1218		*retval = updatebits;
1219#if _PSYNCH_TRACE_
1220	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, updatebits, error, 0);
1221#endif /* _PSYNCH_TRACE_ */
1222
1223	return(error);
1224}
1225
1226/*
1227 *  psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1228 */
1229int
1230psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * retval)
1231{
1232	user_addr_t cond  = uap->cv;
1233	uint64_t cvlsgen = uap->cvlsgen;
1234	uint32_t cgen, csgen;
1235	uint32_t cugen = uap->cvugen;
1236	user_addr_t mutex = uap->mutex;
1237	uint64_t mugen = uap->mugen;
1238	uint32_t mgen, ugen;
1239	int flags = uap->flags;
1240	ksyn_wait_queue_t kwq, ckwq;
1241	int error=0, local_error = 0;
1242	uint64_t abstime = 0;
1243	uint32_t lockseq, updatebits=0;
1244	struct timespec  ts;
1245	uthread_t uth;
1246	ksyn_waitq_element_t kwe, nkwe = NULL;
1247	struct ksyn_queue  *kq, kfreeq;
1248	kern_return_t kret;
1249
1250	/* for conformance reasons */
1251	__pthread_testcancel(0);
1252
1253	csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff);
1254	cgen = ((uint32_t)(cvlsgen & 0xffffffff));
1255	ugen = (uint32_t)((mugen >> 32) & 0xffffffff);
1256	mgen = ((uint32_t)(mugen & 0xffffffff));
1257
1258#if _PSYNCH_TRACE_
1259	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0);
1260	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)mutex, mgen, ugen, flags, 0);
1261#endif /* _PSYNCH_TRACE_ */
1262
1263	lockseq = (cgen & PTHRW_COUNT_MASK);
1264	/*
1265	 * In cvwait U word can be out of range as cond could be used only for
1266	 * timeouts. However S word needs to be within bounds and validated at
1267	 * user level as well.
1268	 */
1269	if (is_seqhigher_eq((csgen & PTHRW_COUNT_MASK), lockseq) != 0) {
1270		__FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1271		return EINVAL;
1272	}
1273
1274	ckwq = kwq = NULL;
1275	error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1276	if (error != 0) {
1277#if _PSYNCH_TRACE_
1278		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 1, 0xdeadbeef, error, 0);
1279#endif /* _PSYNCH_TRACE_ */
1280		return(error);
1281	}
1282
1283
1284	if (mutex != (user_addr_t)0) {
1285		error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq);
1286		if (error != 0)  {
1287			local_error = error;
1288#if _PSYNCH_TRACE_
1289			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)mutex, 2, 0xdeadbeef, error, 0);
1290#endif /* _PSYNCH_TRACE_ */
1291			goto out;
1292		}
1293
1294		(void)psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
1295		/* drops kwq reference */
1296		kwq = NULL;
1297	}
1298
1299	if (uap->sec != 0 || (uap->nsec & 0x3fffffff)  != 0) {
1300		ts.tv_sec = uap->sec;
1301		ts.tv_nsec = (uap->nsec & 0x3fffffff);
1302		nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec,  &abstime );
1303		clock_absolutetime_interval_to_deadline( abstime, &abstime );
1304	}
1305
1306	ksyn_wqlock(ckwq);
1307
1308	/* update L, U and S... */
1309	UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR);
1310
1311	/* Look for the sequence for prepost (or conflicting thread */
1312	kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1313	kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1314
1315	if (kwe != NULL) {
1316		switch (kwe->kwe_flags) {
1317
1318		case KWE_THREAD_INWAIT:
1319			ksyn_wqunlock(ckwq);
1320			__FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1321			local_error = EBUSY;
1322			goto out;
1323
1324		case KWE_THREAD_BROADCAST:
1325			break;
1326
1327		case KWE_THREAD_PREPOST:
1328			if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1329				/* we can safely consume a reference, so do so */
1330				if (--kwe->kwe_count == 0) {
1331					ksyn_queue_removeitem(ckwq, kq, kwe);
1332					ckwq->kw_fakecount--;
1333					nkwe = kwe;
1334				}
1335			} else {
1336				/*
1337				 * consuming a prepost higher than our lock sequence is valid, but
1338				 * can leave the higher thread without a match. Convert the entry
1339				 * to a broadcast to compensate for this.
1340				 */
1341#if _PSYNCH_TRACE_
1342				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xc2c2c2c2, kwe->kwe_lockseq, 0, 0);
1343#endif /* _PSYNCH_TRACE_ */
1344
1345				ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1346#if __TESTPANICS__
1347				if (updatebits != 0)
1348					panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n",
1349					      updatebits);
1350#endif /* __TESTPANICS__ */
1351			}
1352
1353			break;
1354
1355		default:
1356			panic("psync_cvwait: unexpected wait queue element type\n");
1357		}
1358
1359#if _PSYNCH_TRACE_
1360                                __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfefefefe, kwe->kwe_lockseq, 0, 0);
1361#endif /* _PSYNCH_TRACE_ */
1362
1363
1364		updatebits = PTHRW_INC;
1365		ckwq->kw_sword += PTHRW_INC;
1366
1367		/* set C or P bits and free if needed */
1368		ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1);
1369
1370		error = 0;
1371		local_error = 0;
1372
1373		*retval = updatebits;
1374
1375		ksyn_wqunlock(ckwq);
1376
1377		if (nkwe != NULL)
1378			zfree(kwe_zone, nkwe);
1379
1380		goto out;
1381
1382	}
1383
1384	uth = current_uthread();
1385	kwe = &uth->uu_kwe;
1386	kwe->kwe_kwqqueue = ckwq;
1387	kwe->kwe_flags = KWE_THREAD_INWAIT;
1388	kwe->kwe_lockseq = lockseq;
1389	kwe->kwe_count = 1;
1390	kwe->kwe_uth = uth;
1391	kwe->kwe_psynchretval = 0;
1392
1393#if _PSYNCH_TRACE_
1394	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfeed, cgen, 0, 0);
1395#endif /* _PSYNCH_TRACE_ */
1396
1397	error = ksyn_queue_insert(ckwq, kq, cgen, uth, kwe, SEQFIT);
1398	if (error != 0) {
1399		ksyn_wqunlock(ckwq);
1400		local_error = error;
1401		goto out;
1402	}
1403
1404	kret = ksyn_block_thread_locked(ckwq, abstime, kwe, 1, psynch_cvcontinue, (void *)ckwq);
1405	/* lock dropped */
1406
1407	psynch_cvcontinue(ckwq, kret);
1408	/* not expected to return from unix_syscall_return */
1409	panic("psynch_cvcontinue returned from unix_syscall_return");
1410
1411out:
1412#if _PSYNCH_TRACE_
1413	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0);
1414#endif /* _PSYNCH_TRACE_ */
1415	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1416	return(local_error);
1417}
1418
1419
1420void
1421psynch_cvcontinue(void * parameter, wait_result_t result)
1422{
1423	int error = 0, local_error = 0;
1424	uthread_t uth = current_uthread();
1425	ksyn_wait_queue_t ckwq = (ksyn_wait_queue_t)parameter;
1426	ksyn_waitq_element_t kwe;
1427	struct ksyn_queue  kfreeq;
1428
1429	switch (result) {
1430		case THREAD_TIMED_OUT:
1431			error  = ETIMEDOUT;
1432			break;
1433		case THREAD_INTERRUPTED:
1434			error  = EINTR;
1435			break;
1436		default:
1437			error = 0;
1438			break;
1439	}
1440#if _PSYNCH_TRACE_
1441		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uintptr_t)uth, result, 0, 0);
1442#endif /* _PSYNCH_TRACE_ */
1443
1444	local_error = error;
1445	kwe = &uth->uu_kwe;
1446
1447	if (error != 0) {
1448		ksyn_wqlock(ckwq);
1449		/* just in case it got woken up as we were granting */
1450		uth->uu_rval[0] = kwe->kwe_psynchretval;
1451
1452#if __TESTPANICS__
1453		if ((kwe->kwe_kwqqueue != NULL) && (kwe->kwe_kwqqueue != ckwq))
1454			panic("cvwait waiting on some other kwq\n");
1455
1456#endif /* __TESTPANICS__ */
1457
1458
1459		if (kwe->kwe_kwqqueue != NULL) {
1460			ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
1461			kwe->kwe_kwqqueue = NULL;
1462		}
1463		if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1464			/* the condition var granted.
1465			 * reset the error so that the thread returns back.
1466			 */
1467			local_error = 0;
1468			/* no need to set any bits just return as cvsig/broad covers this */
1469			ksyn_wqunlock(ckwq);
1470			goto out;
1471		}
1472
1473		ckwq->kw_sword += PTHRW_INC;
1474
1475		/* set C and P bits, in the local error */
1476		if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1477			local_error |= ECVCERORR;
1478			if (ckwq->kw_inqueue != 0) {
1479				(void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1);
1480			}
1481			ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1482			ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1483		} else {
1484			/* everythig in the queue is a fake entry ? */
1485			if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) {
1486				local_error |= ECVPERORR;
1487			}
1488		}
1489		ksyn_wqunlock(ckwq);
1490
1491	} else  {
1492		/* PTH_RWL_MTX_WAIT is removed */
1493		if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT)  != 0)
1494			uth->uu_rval[0] = PTHRW_INC | PTH_RWS_CV_CBIT;
1495		else
1496			uth->uu_rval[0] = 0;
1497		local_error = 0;
1498	}
1499out:
1500#if _PSYNCH_TRACE_
1501	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)ckwq->kw_addr, 0xeeeeeeed, uth->uu_rval[0], local_error, 0);
1502#endif /* _PSYNCH_TRACE_ */
1503	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1504
1505	unix_syscall_return(local_error);
1506
1507}
1508
1509/*
1510 *  psynch_cvclrprepost: This system call clears pending prepost if present.
1511 */
1512int
1513psynch_cvclrprepost(__unused proc_t p, struct psynch_cvclrprepost_args * uap, __unused int * retval)
1514{
1515	user_addr_t cond  = uap->cv;
1516	uint32_t cgen = uap->cvgen;
1517	uint32_t cugen = uap->cvugen;
1518	uint32_t csgen = uap->cvsgen;
1519	uint32_t pseq = uap->preposeq;
1520	uint32_t flags = uap->flags;
1521	int error;
1522	ksyn_wait_queue_t ckwq = NULL;
1523	struct ksyn_queue  kfreeq;
1524
1525#if _PSYNCH_TRACE_
1526	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0);
1527	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_NONE, (uint32_t)cond, 0xcececece, pseq, flags, 0);
1528#endif /* _PSYNCH_TRACE_ */
1529
1530	if ((flags & _PTHREAD_MTX_OPT_MUTEX) == 0) {
1531		error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq);
1532		if (error != 0)  {
1533			*retval = 0;
1534#if _PSYNCH_TRACE_
1535			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
1536#endif /* _PSYNCH_TRACE_ */
1537			return(error);
1538		}
1539
1540		ksyn_wqlock(ckwq);
1541		(void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (pseq & PTHRW_COUNT_MASK), &kfreeq, 0, 1);
1542		ksyn_wqunlock(ckwq);
1543		ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP));
1544	} else {
1545		/* mutex type */
1546		error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, (KSYN_WQTYPE_MTX | KSYN_WQTYPE_INDROP), &ckwq);
1547		if (error != 0)  {
1548			*retval = 0;
1549#if _PSYNCH_TRACE_
1550			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
1551#endif /* _PSYNCH_TRACE_ */
1552			return(error);
1553		}
1554
1555		ksyn_wqlock(ckwq);
1556		if (((flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT) != 0) && (ckwq->kw_pre_rwwc != 0)) {
1557			if (is_seqlower_eq(ckwq->kw_pre_lockseq, cgen) != 0) {
1558				/* clear prepost */
1559				ckwq->kw_pre_rwwc = 0;
1560				ckwq->kw_pre_lockseq = 0;
1561			}
1562		}
1563		ksyn_wqunlock(ckwq);
1564		ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_MTX | KSYN_WQTYPE_INDROP));
1565	}
1566
1567#if _PSYNCH_TRACE_
1568		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, 0, 0, 0);
1569#endif /* _PSYNCH_TRACE_ */
1570	return(0);
1571}
1572
1573/* ***************** pthread_rwlock ************************ */
1574/*
1575 *  psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1576 */
1577int
1578psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t * retval)
1579{
1580	user_addr_t rwlock  = uap->rwlock;
1581	uint32_t lgen = uap->lgenval;
1582	uint32_t ugen = uap->ugenval;
1583	uint32_t rw_wc = uap->rw_wc;
1584	//uint64_t tid = uap->tid;
1585	int flags = uap->flags;
1586	int error = 0, block;
1587	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
1588	ksyn_wait_queue_t kwq;
1589	uthread_t uth;
1590	int isinit = lgen & PTHRW_RWL_INIT;
1591	uint32_t returnbits  = 0;
1592	ksyn_waitq_element_t kwe;
1593	kern_return_t kret;
1594
1595#if _PSYNCH_TRACE_
1596	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
1597#endif /* _PSYNCH_TRACE_ */
1598	uth = current_uthread();
1599
1600	/* preserve the seq number */
1601	kwe = &uth->uu_kwe;
1602	kwe->kwe_lockseq = lgen;
1603	kwe->kwe_uth = uth;
1604	kwe->kwe_psynchretval = 0;
1605	kwe->kwe_kwqqueue = NULL;
1606
1607	lockseq = lgen  & PTHRW_COUNT_MASK;
1608
1609
1610	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1611	if (error != 0)  {
1612#if _PSYNCH_TRACE_
1613	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
1614#endif /* _PSYNCH_TRACE_ */
1615		return(error);
1616	}
1617
1618	ksyn_wqlock(kwq);
1619
1620	if (isinit != 0) {
1621		lgen &= ~PTHRW_RWL_INIT;
1622		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
1623			/* first to notice the reset of the lock, clear preposts */
1624                	CLEAR_REINIT_BITS(kwq);
1625			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1626#if _PSYNCH_TRACE_
1627	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
1628#endif /* _PSYNCH_TRACE_ */
1629		}
1630	}
1631
1632	/* handle first the missed wakeups */
1633	if ((kwq->kw_pre_intrcount != 0) &&
1634		((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) &&
1635		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
1636
1637		kwq->kw_pre_intrcount--;
1638		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
1639		if (kwq->kw_pre_intrcount==0)
1640			CLEAR_INTR_PREPOST_BITS(kwq);
1641		ksyn_wqunlock(kwq);
1642		goto out;
1643	}
1644
1645	/* handle overlap first as they are not counted against pre_rwwc */
1646
1647	/* check for overlap and if no pending W bit (indicates writers) */
1648	if ((kwq->kw_overlapwatch != 0) && ((rw_wc & PTHRW_RWS_SAVEMASK) == 0) && ((lgen & PTH_RWL_WBIT) == 0)) {
1649#if _PSYNCH_TRACE_
1650	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 10, kwq->kw_nextseqword, kwq->kw_lastseqword, 0);
1651#endif /* _PSYNCH_TRACE_ */
1652		error = kwq_handle_overlap(kwq, lgen, ugen, rw_wc, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block);
1653#if __TESTPANICS__
1654		if (error != 0)
1655			panic("rw_rdlock: kwq_handle_overlap failed %d\n",error);
1656#endif /* __TESTPANICS__ */
1657		if (block == 0) {
1658			error = 0;
1659			kwe->kwe_psynchretval = updatebits;
1660#if _PSYNCH_TRACE_
1661	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 0xff, updatebits, 0xee, 0);
1662#endif /* _PSYNCH_TRACE_ */
1663			ksyn_wqunlock(kwq);
1664			goto out;
1665		}
1666	}
1667
1668	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
1669#if _PSYNCH_TRACE_
1670	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
1671#endif /* _PSYNCH_TRACE_ */
1672		kwq->kw_pre_rwwc--;
1673		if (kwq->kw_pre_rwwc == 0) {
1674			preseq = kwq->kw_pre_lockseq;
1675			prerw_wc = kwq->kw_pre_sseq;
1676			CLEAR_PREPOST_BITS(kwq);
1677			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
1678				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1679#if _PSYNCH_TRACE_
1680	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
1681#endif /* _PSYNCH_TRACE_ */
1682			}
1683			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block, lgen);
1684#if __TESTPANICS__
1685			if (error != 0)
1686				panic("rw_rdlock: kwq_handle_unlock failed %d\n",error);
1687#endif /* __TESTPANICS__ */
1688			if (block == 0) {
1689				ksyn_wqunlock(kwq);
1690				goto out;
1691			}
1692			/* insert to q and proceed as ususal */
1693		}
1694	}
1695
1696
1697#if _PSYNCH_TRACE_
1698	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
1699#endif /* _PSYNCH_TRACE_ */
1700	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], lgen, uth, kwe, SEQFIT);
1701#if __TESTPANICS__
1702	if (error != 0)
1703		panic("psynch_rw_rdlock: failed to enqueue\n");
1704#endif /* __TESTPANICS__ */
1705	kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
1706	/* drops the kwq lock */
1707	switch (kret) {
1708		case THREAD_TIMED_OUT:
1709			error  = ETIMEDOUT;
1710			break;
1711		case THREAD_INTERRUPTED:
1712			error  = EINTR;
1713			break;
1714		default:
1715			error = 0;
1716			break;
1717	}
1718
1719out:
1720	if (error != 0) {
1721#if _PSYNCH_TRACE_
1722	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
1723#endif /* _PSYNCH_TRACE_ */
1724		ksyn_wqlock(kwq);
1725		if (kwe->kwe_kwqqueue != NULL)
1726			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwe);
1727		ksyn_wqunlock(kwq);
1728	} else {
1729		/* update bits */
1730		*retval = kwe->kwe_psynchretval;
1731		returnbits = kwe->kwe_psynchretval;
1732	}
1733	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1734#if _PSYNCH_TRACE_
1735	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
1736#endif /* _PSYNCH_TRACE_ */
1737	return(error);
1738}
1739
1740/*
1741 *  psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1742 */
1743int
1744#ifdef NOTYET
1745psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap,  __unused uint32_t * retval)
1746#else /* NOTYET */
1747psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_args * uap,  __unused uint32_t * retval)
1748#endif /* NOTYET */
1749{
1750#ifdef NOTYET
1751	user_addr_t rwlock  = uap->rwlock;
1752	uint32_t lgen = uap->lgenval;
1753	uint32_t ugen = uap->ugenval;
1754	uint32_t rw_wc = uap->rw_wc;
1755	//uint64_t tid = uap->tid;
1756	int flags = uap->flags;
1757	int isinit = lgen & PTHRW_RWL_INIT;
1758	uint32_t returnbits=0;
1759	ksyn_waitq_element_t kwe;
1760	kern_return_t kret;
1761
1762	ksyn_wait_queue_t kwq;
1763	int error=0, block = 0 ;
1764	uthread_t uth;
1765	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
1766
1767#if _PSYNCH_TRACE_
1768	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
1769#endif /* _PSYNCH_TRACE_ */
1770	uth = current_uthread();
1771	kwe = &uth->uu_kwe;
1772	kwe->kwe_lockseq = lgen;
1773	kwe->kwe_uth = uth;
1774	kwe->kwe_psynchretval = 0;
1775	kwe->kwe_kwqqueue = NULL;
1776	lockseq = (lgen & PTHRW_COUNT_MASK);
1777
1778	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1779	if (error != 0)  {
1780#if _PSYNCH_TRACE_
1781	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
1782#endif /* _PSYNCH_TRACE_ */
1783		return(error);
1784	}
1785
1786	ksyn_wqlock(kwq);
1787
1788	if (isinit != 0) {
1789		lgen &= ~PTHRW_RWL_INIT;
1790		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
1791			/* first to notice the reset of the lock, clear preposts */
1792                	CLEAR_REINIT_BITS(kwq);
1793			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1794#if _PSYNCH_TRACE_
1795	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
1796#endif /* _PSYNCH_TRACE_ */
1797		}
1798	}
1799
1800	/* handle first the missed wakeups */
1801	if ((kwq->kw_pre_intrcount != 0) &&
1802		(kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD) &&
1803		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
1804
1805		kwq->kw_pre_intrcount--;
1806		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
1807		if (kwq->kw_pre_intrcount==0)
1808			CLEAR_INTR_PREPOST_BITS(kwq);
1809		ksyn_wqunlock(kwq);
1810		goto out;
1811	}
1812
1813
1814	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
1815#if _PSYNCH_TRACE_
1816	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
1817#endif /* _PSYNCH_TRACE_ */
1818		kwq->kw_pre_rwwc--;
1819		if (kwq->kw_pre_rwwc == 0) {
1820			preseq = kwq->kw_pre_lockseq;
1821			prerw_wc = kwq->kw_pre_sseq;
1822			CLEAR_PREPOST_BITS(kwq);
1823			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
1824				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1825#if _PSYNCH_TRACE_
1826	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
1827#endif /* _PSYNCH_TRACE_ */
1828			}
1829			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_LREADLOCK|KW_UNLOCK_PREPOST), &block, lgen);
1830#if __TESTPANICS__
1831			if (error != 0)
1832				panic("kwq_handle_unlock failed %d\n",error);
1833#endif /* __TESTPANICS__ */
1834			if (block == 0) {
1835				ksyn_wqunlock(kwq);
1836				goto out;
1837			}
1838			/* insert to q and proceed as ususal */
1839		}
1840	}
1841
1842#if _PSYNCH_TRACE_
1843	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
1844#endif /* _PSYNCH_TRACE_ */
1845	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], lgen, uth, kwe, SEQFIT);
1846#if __TESTPANICS__
1847	if (error != 0)
1848		panic("psynch_rw_longrdlock: failed to enqueue\n");
1849#endif /* __TESTPANICS__ */
1850
1851	kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
1852	/* drops the kwq lock */
1853	switch (kret) {
1854		case THREAD_TIMED_OUT:
1855			error  = ETIMEDOUT;
1856			break;
1857		case THREAD_INTERRUPTED:
1858			error  = EINTR;
1859			break;
1860		default:
1861			error = 0;
1862			break;
1863	}
1864out:
1865	if (error != 0) {
1866#if _PSYNCH_TRACE_
1867	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
1868#endif /* _PSYNCH_TRACE_ */
1869		ksyn_wqlock(kwq);
1870		if (kwe->kwe_kwqqueue != NULL)
1871			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwe);
1872		ksyn_wqunlock(kwq);
1873	} else {
1874		/* update bits */
1875		*retval = kwe->kwe_psynchretval;
1876		returnbits = kwe->kwe_psynchretval;
1877	}
1878
1879	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1880
1881#if _PSYNCH_TRACE_
1882	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, returnbits, error, 0);
1883#endif /* _PSYNCH_TRACE_ */
1884	return(error);
1885#else /* NOTYET */
1886	return(ESRCH);
1887#endif /* NOTYET */
1888}
1889
1890
1891/*
1892 *  psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1893 */
1894int
1895psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t * retval)
1896{
1897	user_addr_t rwlock  = uap->rwlock;
1898	uint32_t lgen = uap->lgenval;
1899	uint32_t ugen = uap->ugenval;
1900	uint32_t rw_wc = uap->rw_wc;
1901	//uint64_t tid = uap->tid;
1902	int flags = uap->flags;
1903	int block;
1904	ksyn_wait_queue_t kwq;
1905	int error=0;
1906	uthread_t uth;
1907	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
1908	int isinit = lgen & PTHRW_RWL_INIT;
1909	uint32_t returnbits  = 0;
1910	ksyn_waitq_element_t kwe;
1911	kern_return_t kret;
1912
1913#if _PSYNCH_TRACE_
1914	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
1915#endif /* _PSYNCH_TRACE_ */
1916	uth = current_uthread();
1917	kwe = &uth->uu_kwe;
1918	kwe->kwe_lockseq = lgen;
1919	kwe->kwe_uth = uth;
1920	kwe->kwe_psynchretval = 0;
1921	kwe->kwe_kwqqueue = NULL;
1922	lockseq = (lgen & PTHRW_COUNT_MASK);
1923
1924	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1925	if (error != 0)  {
1926#if _PSYNCH_TRACE_
1927	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
1928#endif /* _PSYNCH_TRACE_ */
1929		return(error);
1930	}
1931
1932	ksyn_wqlock(kwq);
1933
1934
1935	if (isinit != 0) {
1936		lgen &= ~PTHRW_RWL_INIT;
1937		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
1938			/* first to notice the reset of the lock, clear preposts */
1939                	CLEAR_REINIT_BITS(kwq);
1940			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1941#if _PSYNCH_TRACE_
1942	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
1943#endif /* _PSYNCH_TRACE_ */
1944		}
1945	}
1946
1947
1948	/* handle first the missed wakeups */
1949	if ((kwq->kw_pre_intrcount != 0) &&
1950		(kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE) &&
1951		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
1952
1953		kwq->kw_pre_intrcount--;
1954		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
1955		if (kwq->kw_pre_intrcount==0)
1956			CLEAR_INTR_PREPOST_BITS(kwq);
1957		ksyn_wqunlock(kwq);
1958		goto out;
1959	}
1960
1961
1962	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
1963#if _PSYNCH_TRACE_
1964	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
1965#endif /* _PSYNCH_TRACE_ */
1966		kwq->kw_pre_rwwc--;
1967		if (kwq->kw_pre_rwwc == 0) {
1968			preseq = kwq->kw_pre_lockseq;
1969			prerw_wc = kwq->kw_pre_sseq;
1970			CLEAR_PREPOST_BITS(kwq);
1971			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
1972				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1973#if _PSYNCH_TRACE_
1974	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
1975#endif /* _PSYNCH_TRACE_ */
1976			}
1977			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_WRLOCK|KW_UNLOCK_PREPOST), &block, lgen);
1978#if __TESTPANICS__
1979			if (error != 0)
1980				panic("rw_wrlock: kwq_handle_unlock failed %d\n",error);
1981#endif /* __TESTPANICS__ */
1982			if (block == 0) {
1983				ksyn_wqunlock(kwq);
1984				*retval = updatebits;
1985				goto out1;
1986			}
1987			/* insert to q and proceed as ususal */
1988		}
1989	}
1990
1991	/* No overlap watch needed  go ahead and block */
1992
1993#if _PSYNCH_TRACE_
1994	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
1995#endif /* _PSYNCH_TRACE_ */
1996	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], lgen, uth, kwe, SEQFIT);
1997#if __TESTPANICS__
1998	if (error != 0)
1999		panic("psynch_rw_wrlock: failed to enqueue\n");
2000#endif /* __TESTPANICS__ */
2001
2002	kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
2003	/* drops the wq lock */
2004	switch (kret) {
2005		case THREAD_TIMED_OUT:
2006			error  = ETIMEDOUT;
2007			break;
2008		case THREAD_INTERRUPTED:
2009			error  = EINTR;
2010			break;
2011		default:
2012			error = 0;
2013			break;
2014	}
2015
2016out:
2017	if (error != 0) {
2018#if _PSYNCH_TRACE_
2019	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
2020#endif /* _PSYNCH_TRACE_ */
2021		ksyn_wqlock(kwq);
2022		if (kwe->kwe_kwqqueue != NULL)
2023			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
2024		ksyn_wqunlock(kwq);
2025	} else  {
2026		/* update bits */
2027		*retval = kwe->kwe_psynchretval;
2028		returnbits = kwe->kwe_psynchretval;
2029	}
2030out1:
2031	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
2032
2033#if _PSYNCH_TRACE_
2034	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
2035#endif /* _PSYNCH_TRACE_ */
2036	return(error);
2037}
2038
2039/*
2040 *  psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
2041 */
2042int
2043#ifdef NOTYET
2044psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval)
2045#else /* NOTYET */
2046psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  __unused psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval)
2047#endif /* NOTYET */
2048{
2049#ifdef NOTYET
2050	user_addr_t rwlock  = uap->rwlock;
2051	uint32_t lgen = uap->lgenval;
2052	uint32_t ugen = uap->ugenval;
2053	uint32_t rw_wc = uap->rw_wc;
2054	//uint64_t tid = uap->tid;
2055	int flags = uap->flags;
2056	int block;
2057	ksyn_wait_queue_t kwq;
2058	int error=0;
2059	int isinit = lgen & PTHRW_RWL_INIT;
2060	uthread_t uth;
2061	uint32_t returnbits=0;
2062	ksyn_waitq_element_t kwe;
2063	kern_return_t kret;
2064
2065#if _PSYNCH_TRACE_
2066	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
2067#endif /* _PSYNCH_TRACE_ */
2068	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
2069
2070	uth = current_uthread();
2071	kwe = &uth->uu_kwe;
2072	kwe->kwe_lockseq = lgen;
2073	kwe->kwe_uth = uth;
2074	kwe->kwe_psynchretval = 0;
2075	kwe->kwe_kwqqueue = NULL;
2076	lockseq = (lgen & PTHRW_COUNT_MASK);
2077
2078	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
2079	if (error != 0)  {
2080#if _PSYNCH_TRACE_
2081	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
2082#endif /* _PSYNCH_TRACE_ */
2083		return(error);
2084	}
2085
2086	ksyn_wqlock(kwq);
2087
2088	if (isinit != 0) {
2089		lgen &= ~PTHRW_RWL_INIT;
2090		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
2091			/* first to notice the reset of the lock, clear preposts */
2092                	CLEAR_REINIT_BITS(kwq);
2093			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
2094#if _PSYNCH_TRACE_
2095	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
2096#endif /* _PSYNCH_TRACE_ */
2097		}
2098	}
2099
2100	/* handle first the missed wakeups */
2101	if ((kwq->kw_pre_intrcount != 0) &&
2102		(kwq->kw_pre_intrtype == PTH_RW_TYPE_YWRITE) &&
2103		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
2104
2105		kwq->kw_pre_intrcount--;
2106		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
2107		if (kwq->kw_pre_intrcount==0)
2108			CLEAR_INTR_PREPOST_BITS(kwq);
2109		ksyn_wqunlock(kwq);
2110		goto out;
2111	}
2112
2113	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
2114#if _PSYNCH_TRACE_
2115	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
2116#endif /* _PSYNCH_TRACE_ */
2117		kwq->kw_pre_rwwc--;
2118		if (kwq->kw_pre_rwwc == 0) {
2119			preseq = kwq->kw_pre_lockseq;
2120			prerw_wc = kwq->kw_pre_sseq;
2121			CLEAR_PREPOST_BITS(kwq);
2122			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
2123				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
2124#if _PSYNCH_TRACE_
2125	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
2126#endif /* _PSYNCH_TRACE_ */
2127			}
2128			error = kwq_handle_unlock(kwq, preseq,  prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_YWRLOCK|KW_UNLOCK_PREPOST), &block, lgen);
2129#if __TESTPANICS__
2130			if (error != 0)
2131				panic("kwq_handle_unlock failed %d\n",error);
2132#endif /* __TESTPANICS__ */
2133			if (block == 0) {
2134				ksyn_wqunlock(kwq);
2135				*retval = updatebits;
2136				goto out;
2137			}
2138			/* insert to q and proceed as ususal */
2139		}
2140	}
2141
2142#if _PSYNCH_TRACE_
2143	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
2144#endif /* _PSYNCH_TRACE_ */
2145	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], lgen, uth, kwe, SEQFIT);
2146#if __TESTPANICS__
2147	if (error != 0)
2148		panic("psynch_rw_yieldwrlock: failed to enqueue\n");
2149#endif /* __TESTPANICS__ */
2150
2151	kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
2152	switch (kret) {
2153		case THREAD_TIMED_OUT:
2154			error  = ETIMEDOUT;
2155			break;
2156		case THREAD_INTERRUPTED:
2157			error  = EINTR;
2158			break;
2159		default:
2160			error = 0;
2161			break;
2162	}
2163
2164out:
2165	if (error != 0) {
2166#if _PSYNCH_TRACE_
2167	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
2168#endif /* _PSYNCH_TRACE_ */
2169		ksyn_wqlock(kwq);
2170		if (kwe->kwe_kwqqueue != NULL)
2171			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwe);
2172		ksyn_wqunlock(kwq);
2173	} else  {
2174		/* update bits */
2175		*retval = kwe->kwe_psynchretval;
2176		returnbits = kwe->kwe_psynchretval;
2177	}
2178
2179	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
2180
2181#if _PSYNCH_TRACE_
2182	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
2183#endif /* _PSYNCH_TRACE_ */
2184	return(error);
2185#else /* NOTYET */
2186	return(ESRCH);
2187#endif /* NOTYET */
2188}
2189
2190#if NOTYET
2191/*
2192 *  psynch_rw_downgrade: This system call is used for wakeup blocked readers who are eligible to run due to downgrade.
2193 */
2194int
2195psynch_rw_downgrade(__unused proc_t p, struct psynch_rw_downgrade_args * uap, __unused int * retval)
2196{
2197	user_addr_t rwlock  = uap->rwlock;
2198	uint32_t lgen = uap->lgenval;
2199	uint32_t ugen = uap->ugenval;
2200	uint32_t rw_wc = uap->rw_wc;
2201	//uint64_t tid = uap->tid;
2202	int flags = uap->flags;
2203	uint32_t count = 0;
2204	int isinit = lgen & PTHRW_RWL_INIT;
2205	ksyn_wait_queue_t kwq;
2206	int error=0;
2207	uthread_t uth;
2208	uint32_t curgen = 0;
2209
2210#if _PSYNCH_TRACE_
2211	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
2212#endif /* _PSYNCH_TRACE_ */
2213	uth = current_uthread();
2214
2215	curgen = (lgen & PTHRW_COUNT_MASK);
2216
2217	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
2218	if (error != 0)  {
2219#if _PSYNCH_TRACE_
2220	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
2221#endif /* _PSYNCH_TRACE_ */
2222		return(error);
2223	}
2224
2225	ksyn_wqlock(kwq);
2226
2227	if ((lgen & PTHRW_RWL_INIT) != 0) {
2228		lgen &= ~PTHRW_RWL_INIT;
2229		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0){
2230			CLEAR_REINIT_BITS(kwq);
2231			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
2232#if _PSYNCH_TRACE_
2233	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
2234#endif /* _PSYNCH_TRACE_ */
2235		}
2236		isinit = 1;
2237	}
2238
2239	/* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
2240	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0)) {
2241		/* spurious  updatebits?? */
2242		error = 0;
2243		goto out;
2244	}
2245
2246
2247
2248	/* If L-U != num of waiters, then it needs to be preposted or spr */
2249	diff = find_diff(lgen, ugen);
2250	/* take count of  the downgrade thread itself */
2251	diff--;
2252
2253
2254#if _PSYNCH_TRACE_
2255	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0);
2256#endif /* _PSYNCH_TRACE_ */
2257	if (find_seq_till(kwq, curgen, diff, &count) == 0) {
2258		if (count < (uint32_t)diff)
2259			goto prepost;
2260	}
2261
2262	/* no prepost and all threads are in place, reset the bit */
2263	if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
2264		kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
2265#if _PSYNCH_TRACE_
2266	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
2267#endif /* _PSYNCH_TRACE_ */
2268	}
2269
2270	/* can handle unlock now */
2271
2272	CLEAR_PREPOST_BITS(kwq);
2273
2274dounlock:
2275#if _PSYNCH_TRACE_
2276	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
2277#endif /* _PSYNCH_TRACE_ */
2278	error = kwq_handle_downgrade(kwq, lgen, 0, 0, NULL);
2279
2280#if __TESTPANICS__
2281	if (error != 0)
2282		panic("psynch_rw_downgrade: failed to wakeup\n");
2283#endif /* __TESTPANICS__ */
2284
2285out:
2286	ksyn_wqunlock(kwq);
2287#if _PSYNCH_TRACE_
2288	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
2289#endif /* _PSYNCH_TRACE_ */
2290	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
2291
2292	return(error);
2293
2294prepost:
2295	kwq->kw_pre_rwwc = (rw_wc - count);
2296	kwq->kw_pre_lockseq = lgen;
2297#if _PSYNCH_TRACE_
2298	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
2299#endif /* _PSYNCH_TRACE_ */
2300	error = 0;
2301	goto out;
2302}
2303
2304
2305/*
2306 *  psynch_rw_upgrade: This system call is used by an reader to block waiting for upgrade to be granted.
2307 */
2308int
2309psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32_t * retval)
2310{
2311	user_addr_t rwlock  = uap->rwlock;
2312	uint32_t lgen = uap->lgenval;
2313	uint32_t ugen = uap->ugenval;
2314	uint32_t rw_wc = uap->rw_wc;
2315	//uint64_t tid = uap->tid;
2316	int flags = uap->flags;
2317	int block;
2318	ksyn_wait_queue_t kwq;
2319	int error=0;
2320	uthread_t uth;
2321	uint32_t lockseq = 0, updatebits = 0, preseq = 0;
2322	int isinit = lgen & PTHRW_RWL_INIT;
2323	ksyn_waitq_element_t kwe;
2324	kern_return_t kret;
2325
2326#if _PSYNCH_TRACE_
2327	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
2328#endif /* _PSYNCH_TRACE_ */
2329	uth = current_uthread();
2330	kwe = &uth->uu_kwe;
2331	kwe->kwe_lockseq = lgen;
2332	kwe->kwe_uth = uth;
2333	kwe->kwe_psynchretval = 0;
2334	kwe->kwe_kwqqueue = NULL;
2335	lockseq = (lgen & PTHRW_COUNT_MASK);
2336
2337	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
2338	if (error != 0)  {
2339#if _PSYNCH_TRACE_
2340	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
2341#endif /* _PSYNCH_TRACE_ */
2342		return(error);
2343	}
2344
2345	ksyn_wqlock(kwq);
2346
2347	if (isinit != 0) {
2348		lgen &= ~PTHRW_RWL_INIT;
2349		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
2350			/* first to notice the reset of the lock, clear preposts */
2351                	CLEAR_REINIT_BITS(kwq);
2352			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
2353#if _PSYNCH_TRACE_
2354	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
2355#endif /* _PSYNCH_TRACE_ */
2356		}
2357	}
2358
2359	/* handle first the missed wakeups */
2360	if ((kwq->kw_pre_intrcount != 0) &&
2361		((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) &&
2362		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
2363
2364		kwq->kw_pre_intrcount--;
2365		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
2366		if (kwq->kw_pre_intrcount==0)
2367			CLEAR_INTR_PREPOST_BITS(kwq);
2368		ksyn_wqunlock(kwq);
2369		goto out;
2370	}
2371
2372	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
2373#if _PSYNCH_TRACE_
2374	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
2375#endif /* _PSYNCH_TRACE_ */
2376		kwq->kw_pre_rwwc--;
2377		if (kwq->kw_pre_rwwc == 0) {
2378			preseq = kwq->kw_pre_lockseq;
2379			prerw_wc = kwq->kw_pre_sseq;
2380			CLEAR_PREPOST_BITS(kwq);
2381			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
2382				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
2383#if _PSYNCH_TRACE_
2384	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
2385#endif /* _PSYNCH_TRACE_ */
2386			}
2387			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_UPGRADE|KW_UNLOCK_PREPOST), &block, lgen);
2388#if __TESTPANICS__
2389			if (error != 0)
2390				panic("rw_rdlock: kwq_handle_unlock failed %d\n",error);
2391#endif /* __TESTPANICS__ */
2392			if (block == 0) {
2393				ksyn_wqunlock(kwq);
2394				goto out;
2395			}
2396			/* insert to q and proceed as ususal */
2397		}
2398	}
2399
2400
2401#if _PSYNCH_TRACE_
2402	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
2403#endif /* _PSYNCH_TRACE_ */
2404	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], lgen, uth, kwe, SEQFIT);
2405#if __TESTPANICS__
2406	if (error != 0)
2407		panic("psynch_rw_upgrade: failed to enqueue\n");
2408#endif /* __TESTPANICS__ */
2409
2410
2411	kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL);
2412	/* drops the lock */
2413	switch (kret) {
2414		case THREAD_TIMED_OUT:
2415			error  = ETIMEDOUT;
2416			break;
2417		case THREAD_INTERRUPTED:
2418			error  = EINTR;
2419			break;
2420		default:
2421			error = 0;
2422			break;
2423	}
2424
2425out:
2426	if (error != 0) {
2427#if _PSYNCH_TRACE_
2428	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
2429#endif /* _PSYNCH_TRACE_ */
2430		ksyn_wqlock(kwq);
2431		if (kwe->kwe_kwqqueue != NULL)
2432			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwe);
2433		ksyn_wqunlock(kwq);
2434	} else {
2435		/* update bits */
2436		*retval = kwe->kwe_psynchretval;
2437	}
2438
2439	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
2440#if _PSYNCH_TRACE_
2441	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
2442#endif /* _PSYNCH_TRACE_ */
2443
2444	return(error);
2445}
2446
2447#else /* NOTYET */
2448int
2449psynch_rw_upgrade(__unused proc_t p, __unused struct psynch_rw_upgrade_args * uap, __unused uint32_t * retval)
2450{
2451	return(0);
2452}
2453int
2454psynch_rw_downgrade(__unused proc_t p, __unused struct psynch_rw_downgrade_args * uap, __unused int * retval)
2455{
2456	return(0);
2457}
2458#endif /* NOTYET */
2459/*
2460 *  psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
2461 *			reader/writer variety lock.
2462 */
2463
2464int
2465psynch_rw_unlock(__unused proc_t p, struct psynch_rw_unlock_args  * uap, uint32_t * retval)
2466{
2467	user_addr_t rwlock  = uap->rwlock;
2468	uint32_t lgen = uap->lgenval;
2469	uint32_t ugen = uap->ugenval;
2470	uint32_t rw_wc = uap->rw_wc;
2471	uint32_t curgen;
2472	//uint64_t tid = uap->tid;
2473	int flags = uap->flags;
2474	uthread_t uth;
2475	ksyn_wait_queue_t kwq;
2476	uint32_t updatebits = 0;
2477	int error=0, diff;
2478	uint32_t count = 0;
2479	int isinit = 0;
2480
2481
2482#if _PSYNCH_TRACE_
2483	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
2484#endif /* _PSYNCH_TRACE_ */
2485	uth = current_uthread();
2486
2487	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
2488	if (error != 0)  {
2489#if _PSYNCH_TRACE_
2490	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
2491#endif /* _PSYNCH_TRACE_ */
2492		return(error);
2493	}
2494
2495	curgen = lgen & PTHRW_COUNT_MASK;
2496
2497	ksyn_wqlock(kwq);
2498
2499	if ((lgen & PTHRW_RWL_INIT) != 0) {
2500		lgen &= ~PTHRW_RWL_INIT;
2501		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0){
2502			CLEAR_REINIT_BITS(kwq);
2503			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
2504#if _PSYNCH_TRACE_
2505	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
2506#endif /* _PSYNCH_TRACE_ */
2507		}
2508		isinit = 1;
2509	}
2510
2511	/* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
2512	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0)) {
2513#if _PSYNCH_TRACE_
2514	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, (uint32_t)0xeeeeeeee, rw_wc, kwq->kw_lastunlockseq, 0);
2515#endif /* _PSYNCH_TRACE_ */
2516		error = 0;
2517		goto out;
2518	}
2519
2520	/* If L-U != num of waiters, then it needs to be preposted or spr */
2521	diff = find_diff(lgen, ugen);
2522
2523#if _PSYNCH_TRACE_
2524	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0);
2525#endif /* _PSYNCH_TRACE_ */
2526	if (find_seq_till(kwq, curgen, diff, &count) == 0) {
2527		if ((count == 0) || (count < (uint32_t)diff))
2528			goto prepost;
2529	}
2530
2531	/* no prepost and all threads are in place, reset the bit */
2532	if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
2533		kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
2534#if _PSYNCH_TRACE_
2535	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
2536#endif /* _PSYNCH_TRACE_ */
2537	}
2538
2539	/* can handle unlock now */
2540
2541	CLEAR_PREPOST_BITS(kwq);
2542
2543#if _PSYNCH_TRACE_
2544	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, 0, 0, 0);
2545#endif /* _PSYNCH_TRACE_ */
2546	error = kwq_handle_unlock(kwq, lgen, rw_wc,  &updatebits, 0, NULL, 0);
2547#if __TESTPANICS__
2548	if (error != 0)
2549		panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
2550#endif /* __TESTPANICS__ */
2551out:
2552	if (error == 0) {
2553		/* update bits?? */
2554		*retval = updatebits;
2555	}
2556
2557
2558	ksyn_wqunlock(kwq);
2559
2560	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
2561#if _PSYNCH_TRACE_
2562	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, updatebits, error, 0);
2563#endif /* _PSYNCH_TRACE_ */
2564
2565	return(error);
2566
2567prepost:
2568	/* update if the new seq is higher than prev prepost, or first set */
2569	if ((is_rws_setseq(kwq->kw_pre_sseq) != 0) ||
2570			(is_seqhigher_eq((rw_wc & PTHRW_COUNT_MASK), (kwq->kw_pre_sseq & PTHRW_COUNT_MASK)) != 0)) {
2571		kwq->kw_pre_rwwc = (diff - count);
2572		kwq->kw_pre_lockseq = curgen;
2573		kwq->kw_pre_sseq = rw_wc;
2574#if _PSYNCH_TRACE_
2575		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, rw_wc, count, 0);
2576		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
2577#endif /* _PSYNCH_TRACE_ */
2578		updatebits = lgen;	/* let this not do unlock handling */
2579	}
2580	error = 0;
2581	goto out;
2582}
2583
2584
2585/*
2586 *  psynch_rw_unlock2: This system call is used to wakeup pending readers when  unlock grant frm kernel
2587 *			  to new reader arrival races
2588 */
2589int
2590psynch_rw_unlock2(__unused proc_t p, __unused struct psynch_rw_unlock2_args  * uap, __unused uint32_t * retval)
2591{
2592	return(ENOTSUP);
2593}
2594
2595
2596/* ************************************************************************** */
2597void
2598pth_global_hashinit()
2599{
2600	int arg;
2601
2602	pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
2603
2604	/*
2605	 * pthtest={0,1,2,3} (override default aborting behavior on pthread sync failures)
2606	 * 0 - just return errors
2607	 * 1 - print and return errors
2608	 * 2 - abort user, print and return errors
2609	 * 3 - panic
2610	 */
2611	if (!PE_parse_boot_argn("pthtest", &arg, sizeof(arg)))
2612		arg = __TESTMODE__;
2613
2614	if (arg == 3) {
2615		__test_panics__ = 1;
2616		printf("Pthread support PANICS when sync kernel primitives misused\n");
2617	} else if (arg == 2) {
2618		__test_aborts__ = 1;
2619		__test_prints__ = 1;
2620		printf("Pthread support ABORTS when sync kernel primitives misused\n");
2621	} else if (arg == 1) {
2622		__test_prints__ = 1;
2623		printf("Pthread support LOGS when sync kernel primitives misused\n");
2624	}
2625}
2626
2627void
2628pth_proc_hashinit(proc_t p)
2629{
2630	p->p_pthhash  = hashinit(PTH_HASHSIZE, M_PROC, &pthhash);
2631	if (p->p_pthhash == NULL)
2632		panic("pth_proc_hashinit: hash init returned 0\n");
2633}
2634
2635
2636ksyn_wait_queue_t
2637ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t objoffset)
2638{
2639	ksyn_wait_queue_t kwq;
2640	struct pthhashhead * hashptr;
2641
2642	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED)
2643	{
2644		hashptr = pth_glob_hashtbl;
2645		kwq = (&hashptr[object & pthhash])->lh_first;
2646		if (kwq != 0) {
2647			for (; kwq != NULL; kwq = kwq->kw_hash.le_next) {
2648				if ((kwq->kw_object == object) &&(kwq->kw_offset == objoffset)) {
2649					return (kwq);
2650				}
2651			}
2652		}
2653	} else {
2654		hashptr = p->p_pthhash;
2655		kwq = (&hashptr[mutex & pthhash])->lh_first;
2656		if (kwq != 0)
2657        		for (; kwq != NULL; kwq = kwq->kw_hash.le_next) {
2658                		if (kwq->kw_addr == mutex) {
2659                        		return (kwq);
2660                		}
2661			}
2662        }
2663	return(NULL);
2664}
2665
2666void
2667pth_proc_hashdelete(proc_t p)
2668{
2669	struct pthhashhead * hashptr;
2670	ksyn_wait_queue_t kwq;
2671	int hashsize = pthhash + 1;
2672	int i;
2673
2674#if _PSYNCH_TRACE_
2675	if ((pthread_debug_proc != NULL) && (p == pthread_debug_proc))
2676		pthread_debug_proc = PROC_NULL;
2677#endif /* _PSYNCH_TRACE_ */
2678	hashptr = p->p_pthhash;
2679	p->p_pthhash = NULL;
2680	if (hashptr == NULL)
2681		return;
2682
2683	pthread_list_lock();
2684	for(i= 0; i < hashsize; i++) {
2685		while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
2686			if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
2687				kwq->kw_pflags &= ~KSYN_WQ_INHASH;
2688				LIST_REMOVE(kwq, kw_hash);
2689			}
2690			if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
2691				kwq->kw_pflags &= ~KSYN_WQ_FLIST;
2692				LIST_REMOVE(kwq, kw_list);
2693				num_infreekwq--;
2694			}
2695			num_freekwq++;
2696			pthread_list_unlock();
2697			/* release fake entries if present for cvars */
2698			if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
2699				ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
2700			lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
2701			zfree(kwq_zone, kwq);
2702			pthread_list_lock();
2703		}
2704	}
2705	pthread_list_unlock();
2706	FREE(hashptr, M_PROC);
2707}
2708
2709/* no lock held for this as the waitqueue is getting freed */
2710void
2711ksyn_freeallkwe(ksyn_queue_t kq)
2712{
2713	ksyn_waitq_element_t kwe;
2714
2715	/* free all the fake entries, dequeue rest */
2716	kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
2717	while (kwe != NULL) {
2718		if (kwe->kwe_flags != KWE_THREAD_INWAIT) {
2719			TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2720			zfree(kwe_zone, kwe);
2721		} else {
2722			TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2723		}
2724		kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
2725	}
2726}
2727
2728/* find kernel waitqueue, if not present create one. Grants a reference  */
2729int
2730ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype, ksyn_wait_queue_t * kwqp)
2731{
2732	ksyn_wait_queue_t kwq;
2733	ksyn_wait_queue_t nkwq;
2734	struct pthhashhead * hashptr;
2735	uint64_t object = 0, offset = 0;
2736	uint64_t hashhint;
2737	proc_t p  = current_proc();
2738	int retry = mgen & PTH_RWL_RETRYBIT;
2739	struct ksyn_queue kfreeq;
2740	int i;
2741
2742	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED)
2743	{
2744		(void)ksyn_findobj(mutex, &object, &offset);
2745		hashhint = object;
2746		hashptr = pth_glob_hashtbl;
2747	} else {
2748		hashptr = p->p_pthhash;
2749	}
2750
2751	ksyn_queue_init(&kfreeq);
2752
2753	if (((wqtype & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX) && (retry != 0))
2754		mgen &= ~PTH_RWL_RETRYBIT;
2755
2756loop:
2757	//pthread_list_lock_spin();
2758	pthread_list_lock();
2759
2760	kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset);
2761
2762	if (kwq != NULL) {
2763		if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
2764			LIST_REMOVE(kwq, kw_list);
2765			kwq->kw_pflags &= ~KSYN_WQ_FLIST;
2766			num_infreekwq--;
2767			num_reusekwq++;
2768		}
2769		if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype &KSYN_WQTYPE_MASK)) {
2770			if ((kwq->kw_inqueue == 0) && (kwq->kw_pre_rwwc ==0) && (kwq->kw_pre_intrcount == 0)) {
2771				if (kwq->kw_iocount == 0) {
2772					kwq->kw_addr = mutex;
2773					kwq->kw_flags = flags;
2774					kwq->kw_object = object;
2775					kwq->kw_offset = offset;
2776					kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
2777					CLEAR_REINIT_BITS(kwq);
2778					CLEAR_INTR_PREPOST_BITS(kwq);
2779					CLEAR_PREPOST_BITS(kwq);
2780					kwq->kw_lword = mgen;
2781					kwq->kw_uword = ugen;
2782					kwq->kw_sword = rw_wc;
2783					kwq->kw_owner = tid;
2784				} else if ((kwq->kw_iocount == 1) && (kwq->kw_dropcount == kwq->kw_iocount)) {
2785					/* if all users are unlockers then wait for it to finish */
2786					kwq->kw_pflags |= KSYN_WQ_WAITING;
2787					/* wait for the wq to be free */
2788					(void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
2789					/* does not have list lock */
2790					goto loop;
2791				} else {
2792					__FAILEDUSERTEST__("address already known to kernel for another (busy) synchronizer type\n");
2793					pthread_list_unlock();
2794					return EBUSY;
2795				}
2796			} else {
2797				__FAILEDUSERTEST__("address already known to kernel for another (busy) synchronizer type(1)\n");
2798				pthread_list_unlock();
2799				return EBUSY;
2800			}
2801		}
2802		kwq->kw_iocount++;
2803		if (wqtype == KSYN_WQTYPE_MUTEXDROP)
2804			kwq->kw_dropcount++;
2805		if (kwqp != NULL)
2806			*kwqp = kwq;
2807		pthread_list_unlock();
2808		return (0);
2809	}
2810
2811	pthread_list_unlock();
2812
2813	nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
2814	bzero(nkwq, sizeof(struct ksyn_wait_queue));
2815	nkwq->kw_addr = mutex;
2816	nkwq->kw_flags = flags;
2817	nkwq->kw_iocount = 1;
2818	if (wqtype == KSYN_WQTYPE_MUTEXDROP)
2819			nkwq->kw_dropcount++;
2820	nkwq->kw_object = object;
2821	nkwq->kw_offset = offset;
2822	nkwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
2823	nkwq->kw_lastseqword = PTHRW_RWS_INIT;
2824	if (nkwq->kw_type == KSYN_WQTYPE_RWLOCK)
2825		nkwq->kw_nextseqword = PTHRW_RWS_INIT;
2826
2827	nkwq->kw_pre_sseq = PTHRW_RWS_INIT;
2828
2829	CLEAR_PREPOST_BITS(nkwq);
2830	CLEAR_INTR_PREPOST_BITS(nkwq);
2831	CLEAR_REINIT_BITS(nkwq);
2832	nkwq->kw_lword = mgen;
2833	nkwq->kw_uword = ugen;
2834	nkwq->kw_sword = rw_wc;
2835	nkwq->kw_owner = tid;
2836
2837
2838	for (i=0; i< KSYN_QUEUE_MAX; i++)
2839		ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
2840
2841	lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
2842
2843	//pthread_list_lock_spin();
2844	pthread_list_lock();
2845	/* see whether it is alread allocated */
2846	kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset);
2847
2848	if (kwq != NULL) {
2849		if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
2850			LIST_REMOVE(kwq, kw_list);
2851			kwq->kw_pflags &= ~KSYN_WQ_FLIST;
2852			num_infreekwq--;
2853			num_reusekwq++;
2854		}
2855		if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype &KSYN_WQTYPE_MASK)) {
2856			if ((kwq->kw_inqueue == 0) && (kwq->kw_pre_rwwc ==0) && (kwq->kw_pre_intrcount == 0)) {
2857				if (kwq->kw_iocount == 0) {
2858					kwq->kw_addr = mutex;
2859					kwq->kw_flags = flags;
2860					kwq->kw_object = object;
2861					kwq->kw_offset = offset;
2862					kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
2863					CLEAR_REINIT_BITS(kwq);
2864					CLEAR_INTR_PREPOST_BITS(kwq);
2865					CLEAR_PREPOST_BITS(kwq);
2866					kwq->kw_lword = mgen;
2867					kwq->kw_uword = ugen;
2868					kwq->kw_sword = rw_wc;
2869					kwq->kw_owner = tid;
2870				} else if ((kwq->kw_iocount == 1) && (kwq->kw_dropcount == kwq->kw_iocount)) {
2871					kwq->kw_pflags |= KSYN_WQ_WAITING;
2872					/* wait for the wq to be free */
2873					(void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
2874
2875					lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
2876					zfree(kwq_zone, nkwq);
2877					/* will acquire lock again */
2878
2879					goto loop;
2880				} else {
2881					__FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type(2)\n");
2882					pthread_list_unlock();
2883					lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
2884					zfree(kwq_zone, nkwq);
2885					return EBUSY;
2886				}
2887			} else {
2888				__FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type(3)\n");
2889				pthread_list_unlock();
2890				lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
2891				zfree(kwq_zone, nkwq);
2892				return EBUSY;
2893			}
2894		}
2895		kwq->kw_iocount++;
2896		if (wqtype == KSYN_WQTYPE_MUTEXDROP)
2897			kwq->kw_dropcount++;
2898		if (kwqp != NULL)
2899			*kwqp = kwq;
2900		pthread_list_unlock();
2901		lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
2902		zfree(kwq_zone, nkwq);
2903		return (0);
2904	}
2905	kwq = nkwq;
2906
2907#if _PSYNCH_TRACE_
2908	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword, 0xffff, 0);
2909#endif /* _PSYNCH_TRACE_ */
2910	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED)
2911	{
2912		kwq->kw_pflags |= KSYN_WQ_SHARED;
2913		LIST_INSERT_HEAD(&hashptr[kwq->kw_object & pthhash], kwq, kw_hash);
2914	} else
2915		LIST_INSERT_HEAD(&hashptr[mutex & pthhash], kwq, kw_hash);
2916
2917	kwq->kw_pflags |= KSYN_WQ_INHASH;
2918	num_total_kwq++;
2919
2920	pthread_list_unlock();
2921
2922	if (kwqp != NULL)
2923		*kwqp = kwq;
2924        return (0);
2925}
2926
2927/* Reference from find is dropped here. Starts the free process if needed  */
2928void
2929ksyn_wqrelease(ksyn_wait_queue_t kwq, ksyn_wait_queue_t ckwq, int qfreenow, int wqtype)
2930{
2931	uint64_t deadline;
2932	struct timeval t;
2933	int sched = 0;
2934	ksyn_wait_queue_t free_elem = NULL;
2935	ksyn_wait_queue_t free_elem1 = NULL;
2936
2937	//pthread_list_lock_spin();
2938	pthread_list_lock();
2939	kwq->kw_iocount--;
2940	if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
2941		kwq->kw_dropcount--;
2942	}
2943	if (kwq->kw_iocount == 0) {
2944		if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
2945			/* some one is waiting for the waitqueue, wake them up */
2946			kwq->kw_pflags &=  ~KSYN_WQ_WAITING;
2947			wakeup(&kwq->kw_pflags);
2948		}
2949
2950		if ((kwq->kw_pre_rwwc == 0) && (kwq->kw_inqueue == 0) && (kwq->kw_pre_intrcount == 0)) {
2951			if (qfreenow == 0) {
2952				microuptime(&kwq->kw_ts);
2953				LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
2954				kwq->kw_pflags |= KSYN_WQ_FLIST;
2955				num_infreekwq++;
2956				free_elem = NULL;
2957			} else {
2958				/* remove from the only list it is in ie hash */
2959				kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
2960				LIST_REMOVE(kwq, kw_hash);
2961				lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
2962				num_total_kwq--;
2963				num_freekwq++;
2964				free_elem = kwq;
2965			}
2966		} else
2967			free_elem = NULL;
2968		if (qfreenow == 0)
2969			sched = 1;
2970	}
2971
2972	if (ckwq != NULL) {
2973		ckwq->kw_iocount--;
2974		if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
2975			kwq->kw_dropcount--;
2976		}
2977		if ( ckwq->kw_iocount == 0) {
2978			if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
2979				/* some one is waiting for the waitqueue, wake them up */
2980				kwq->kw_pflags &=  ~KSYN_WQ_WAITING;
2981				wakeup(&kwq->kw_pflags);
2982			}
2983			if ((ckwq->kw_pre_rwwc == 0) && (ckwq->kw_inqueue == 0) && (ckwq->kw_pre_intrcount == 0)) {
2984				if (qfreenow == 0) {
2985					/* mark for free if we can */
2986					microuptime(&ckwq->kw_ts);
2987					LIST_INSERT_HEAD(&pth_free_list, ckwq, kw_list);
2988					ckwq->kw_pflags |= KSYN_WQ_FLIST;
2989					num_infreekwq++;
2990					free_elem1 = NULL;
2991				} else {
2992					/* remove from the only list it is in ie hash */
2993					ckwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
2994					LIST_REMOVE(ckwq, kw_hash);
2995					lck_mtx_destroy(&ckwq->kw_lock, pthread_lck_grp);
2996					num_total_kwq--;
2997					num_freekwq++;
2998					free_elem1 = ckwq;
2999				}
3000			} else
3001				free_elem1 = NULL;
3002			if (qfreenow == 0)
3003				sched = 1;
3004		}
3005	}
3006
3007	if (sched == 1 && psynch_cleanupset == 0) {
3008		psynch_cleanupset = 1;
3009		microuptime(&t);
3010		t.tv_sec += KSYN_CLEANUP_DEADLINE;
3011
3012		deadline = tvtoabstime(&t);
3013		thread_call_enter_delayed(psynch_thcall, deadline);
3014	}
3015	pthread_list_unlock();
3016	if (free_elem != NULL)
3017		zfree(kwq_zone, free_elem);
3018	if (free_elem1 != NULL)
3019		zfree(kwq_zone, free_elem1);
3020}
3021
3022/* responsible to free the waitqueues */
3023void
3024psynch_wq_cleanup(__unused void *  param, __unused void * param1)
3025{
3026	ksyn_wait_queue_t kwq;
3027	struct timeval t;
3028	LIST_HEAD(, ksyn_wait_queue) freelist = {NULL};
3029	int count = 0, delayed = 0, diff;
3030	uint64_t deadline = 0;
3031
3032	//pthread_list_lock_spin();
3033	pthread_list_lock();
3034
3035	num_addedfreekwq = num_infreekwq - num_lastfreekwqcount;
3036	num_lastfreekwqcount = num_infreekwq;
3037	microuptime(&t);
3038
3039	LIST_FOREACH(kwq, &pth_free_list, kw_list) {
3040		if ((kwq->kw_iocount != 0) || (kwq->kw_pre_rwwc != 0) || (kwq->kw_inqueue != 0) || (kwq->kw_pre_intrcount != 0)) {
3041			/* still in use */
3042			continue;
3043		}
3044		diff = t.tv_sec - kwq->kw_ts.tv_sec;
3045		if (diff < 0)
3046			diff *= -1;
3047		if (diff >= KSYN_CLEANUP_DEADLINE) {
3048			/* out of hash */
3049			kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
3050			num_infreekwq--;
3051			num_freekwq++;
3052			LIST_REMOVE(kwq, kw_hash);
3053			LIST_REMOVE(kwq, kw_list);
3054			LIST_INSERT_HEAD(&freelist, kwq, kw_list);
3055			count ++;
3056			num_total_kwq--;
3057		} else {
3058			delayed = 1;
3059		}
3060
3061	}
3062	if (delayed != 0) {
3063		t.tv_sec += KSYN_CLEANUP_DEADLINE;
3064
3065		deadline = tvtoabstime(&t);
3066		thread_call_enter_delayed(psynch_thcall, deadline);
3067		psynch_cleanupset = 1;
3068	} else
3069		psynch_cleanupset = 0;
3070
3071	pthread_list_unlock();
3072
3073
3074	while ((kwq = LIST_FIRST(&freelist)) != NULL) {
3075		LIST_REMOVE(kwq, kw_list);
3076		lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
3077		zfree(kwq_zone, kwq);
3078	}
3079}
3080
3081
3082kern_return_t
3083#if _PSYNCH_TRACE_
3084ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog, thread_continue_t continuation, void * parameter)
3085#else
3086ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, __unused int mylog, thread_continue_t continuation, void * parameter)
3087#endif
3088{
3089	kern_return_t kret;
3090#if _PSYNCH_TRACE_
3091	int error = 0;
3092	uthread_t uth = NULL;
3093#endif /* _PSYNCH_TRACE_ */
3094
3095	kwe->kwe_kwqqueue = (void *)kwq;
3096	assert_wait_deadline(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, abstime);
3097	ksyn_wqunlock(kwq);
3098
3099	if (continuation == THREAD_CONTINUE_NULL)
3100		kret = thread_block(NULL);
3101	else
3102		kret = thread_block_parameter(continuation, parameter);
3103
3104#if _PSYNCH_TRACE_
3105	switch (kret) {
3106		case THREAD_TIMED_OUT:
3107			error  = ETIMEDOUT;
3108			break;
3109		case THREAD_INTERRUPTED:
3110			error  = EINTR;
3111			break;
3112	}
3113	uth = current_uthread();
3114#if defined(__i386__)
3115	if (mylog != 0)
3116		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uint32_t)uth, kret, 0, 0);
3117#else
3118	if (mylog != 0)
3119		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xeeeeeeee, kret, error, 0xeeeeeeee, 0);
3120#endif
3121#endif /* _PSYNCH_TRACE_ */
3122
3123	return(kret);
3124}
3125
3126kern_return_t
3127ksyn_wakeup_thread(__unused ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe)
3128{
3129	kern_return_t kret;
3130#if _PSYNCH_TRACE_
3131	uthread_t uth = NULL;
3132#endif /* _PSYNCH_TRACE_ */
3133
3134	kret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
3135
3136	if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3137		panic("ksyn_wakeup_thread: panic waking up thread %x\n", kret);
3138#if _PSYNCH_TRACE_
3139	uth = kwe->kwe_uth;
3140#if defined(__i386__)
3141	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf1f2f3f4, (uint32_t)uth, kret, 0, 0);
3142#endif
3143#endif /* _PSYNCH_TRACE_ */
3144
3145	return(kret);
3146}
3147
3148/* find the true shared obect/offset for shared mutexes */
3149int
3150ksyn_findobj(uint64_t mutex, uint64_t * objectp, uint64_t * offsetp)
3151{
3152	vm_page_info_basic_data_t info;
3153	kern_return_t kret;
3154	mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
3155
3156	kret = vm_map_page_info(current_map(), mutex, VM_PAGE_INFO_BASIC,
3157			(vm_page_info_t)&info, &count);
3158
3159	if (kret != KERN_SUCCESS)
3160		return(EINVAL);
3161
3162	if (objectp != NULL)
3163		*objectp = (uint64_t)info.object_id;
3164	if (offsetp != NULL)
3165		*offsetp = (uint64_t)info.offset;
3166
3167	return(0);
3168}
3169
3170
3171/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
3172int
3173kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * typep, uint32_t lowest[])
3174{
3175
3176	uint32_t kw_fr, kw_flr, kw_fwr, kw_fywr, low;
3177	int type = 0, lowtype, typenum[4];
3178	uint32_t numbers[4];
3179        int count = 0, i;
3180
3181
3182	if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
3183		type |= PTH_RWSHFT_TYPE_READ;
3184		/* read entries are present */
3185		if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
3186			kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
3187			if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
3188				kw_fr = premgen;
3189		} else
3190			kw_fr = premgen;
3191
3192		lowest[KSYN_QUEUE_READ] = kw_fr;
3193		numbers[count]= kw_fr;
3194		typenum[count] = PTH_RW_TYPE_READ;
3195		count++;
3196	} else
3197		lowest[KSYN_QUEUE_READ] = 0;
3198
3199	if ((kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0)) {
3200		type |= PTH_RWSHFT_TYPE_LREAD;
3201		/* read entries are present */
3202		if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) {
3203			kw_flr = kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum;
3204			if (((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0) && (is_seqlower(premgen, kw_flr) != 0))
3205				kw_flr = premgen;
3206		} else
3207			kw_flr = premgen;
3208
3209		lowest[KSYN_QUEUE_LREAD] = kw_flr;
3210		numbers[count]= kw_flr;
3211		typenum[count] =  PTH_RW_TYPE_LREAD;
3212		count++;
3213	} else
3214		lowest[KSYN_QUEUE_LREAD] = 0;
3215
3216
3217	if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
3218		type |= PTH_RWSHFT_TYPE_WRITE;
3219		/* read entries are present */
3220		if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
3221			kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
3222			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
3223				kw_fwr = premgen;
3224		} else
3225			kw_fwr = premgen;
3226
3227		lowest[KSYN_QUEUE_WRITER] = kw_fwr;
3228		numbers[count]= kw_fwr;
3229		typenum[count] =  PTH_RW_TYPE_WRITE;
3230		count++;
3231	} else
3232		lowest[KSYN_QUEUE_WRITER] = 0;
3233
3234	if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0)) {
3235		type |= PTH_RWSHFT_TYPE_YWRITE;
3236		/* read entries are present */
3237		if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) {
3238			kw_fywr = kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_firstnum;
3239			if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (is_seqlower(premgen, kw_fywr) != 0))
3240				kw_fywr = premgen;
3241		} else
3242			kw_fywr = premgen;
3243
3244		lowest[KSYN_QUEUE_YWRITER] = kw_fywr;
3245		numbers[count]= kw_fywr;
3246		typenum[count] =  PTH_RW_TYPE_YWRITE;
3247		count++;
3248	} else
3249		lowest[KSYN_QUEUE_YWRITER] = 0;
3250
3251
3252#if __TESTPANICS__
3253	if (count == 0)
3254		panic("nothing in the queue???\n");
3255#endif /* __TESTPANICS__ */
3256
3257	low = numbers[0];
3258	lowtype = typenum[0];
3259        if (count > 1) {
3260                for (i = 1; i< count; i++) {
3261                        if(is_seqlower(numbers[i] , low) != 0) {
3262                                low = numbers[i];
3263				lowtype = typenum[i];
3264			}
3265                }
3266        }
3267	type |= lowtype;
3268
3269	if (typep != 0)
3270		*typep = type;
3271	return(0);
3272}
3273
3274/* wakeup readers and longreaders to upto the  writer limits */
3275int
3276ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t  updatebits, int * wokenp)
3277{
3278	ksyn_waitq_element_t kwe = NULL;
3279	ksyn_queue_t kq;
3280	int failedwakeup = 0;
3281	int numwoken = 0;
3282	kern_return_t kret = KERN_SUCCESS;
3283	uint32_t lbits = 0;
3284
3285	lbits = updatebits;
3286	if (longreadset != 0) {
3287		/* clear all read and longreads */
3288		while ((kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwq)) != NULL) {
3289			kwe->kwe_psynchretval = lbits;
3290			kwe->kwe_kwqqueue = NULL;
3291
3292			numwoken++;
3293			kret = ksyn_wakeup_thread(kwq, kwe);
3294#if __TESTPANICS__
3295			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3296				panic("ksyn_wakeupreaders: panic waking up readers\n");
3297#endif /* __TESTPANICS__ */
3298			if (kret == KERN_NOT_WAITING) {
3299				failedwakeup++;
3300			}
3301		}
3302		while ((kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwq)) != NULL) {
3303			kwe->kwe_psynchretval = lbits;
3304			kwe->kwe_kwqqueue = NULL;
3305			numwoken++;
3306			kret = ksyn_wakeup_thread(kwq, kwe);
3307#if __TESTPANICS__
3308			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3309				panic("ksyn_wakeupreaders: panic waking up lreaders\n");
3310#endif /* __TESTPANICS__ */
3311			if (kret == KERN_NOT_WAITING) {
3312				failedwakeup++;
3313			}
3314		}
3315	} else {
3316		kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
3317		while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
3318			kwe = ksyn_queue_removefirst(kq, kwq);
3319			kwe->kwe_psynchretval = lbits;
3320			kwe->kwe_kwqqueue = NULL;
3321			numwoken++;
3322			kret = ksyn_wakeup_thread(kwq, kwe);
3323#if __TESTPANICS__
3324			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3325				panic("ksyn_wakeupreaders: panic waking up readers\n");
3326#endif /* __TESTPANICS__ */
3327			if (kret == KERN_NOT_WAITING) {
3328				failedwakeup++;
3329			}
3330		}
3331	}
3332
3333	if (wokenp != NULL)
3334		*wokenp = numwoken;
3335	return(failedwakeup);
3336}
3337
3338
3339/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
3340int
3341kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t rw_wc, uint32_t * updatep, int flags, int * blockp, uint32_t premgen)
3342{
3343	uint32_t low_reader, low_writer, low_ywriter, low_lreader,limitrdnum;
3344	int rwtype, error=0;
3345	int longreadset = 0, allreaders, failed;
3346	uint32_t updatebits=0, numneeded = 0;;
3347	int prepost = flags & KW_UNLOCK_PREPOST;
3348	thread_t preth = THREAD_NULL;
3349	ksyn_waitq_element_t kwe;
3350	uthread_t uth;
3351	thread_t th;
3352	int woken = 0;
3353	int block = 1;
3354	uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
3355	kern_return_t kret = KERN_SUCCESS;
3356	ksyn_queue_t kq;
3357	int curthreturns = 0;
3358
3359#if _PSYNCH_TRACE_
3360	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_START, (uint32_t)kwq->kw_addr, mgen, premgen, rw_wc, 0);
3361#endif /* _PSYNCH_TRACE_ */
3362	if (prepost != 0) {
3363		preth = current_thread();
3364	}
3365
3366	kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
3367	kwq->kw_lastseqword = rw_wc;
3368	kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
3369	kwq->kw_overlapwatch = 0;
3370
3371	/* upgrade pending */
3372	if (is_rw_ubit_set(mgen)) {
3373#if __TESTPANICS__
3374		panic("NO UBIT SHOULD BE SET\n");
3375		updatebits = PTH_RWL_EBIT | PTH_RWL_KBIT;
3376		if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
3377			updatebits |= PTH_RWL_WBIT;
3378		if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0)
3379			updatebits |= PTH_RWL_YBIT;
3380		if (prepost != 0)  {
3381			if((flags & KW_UNLOCK_PREPOST_UPGRADE) != 0) {
3382				/* upgrade thread calling the prepost */
3383				/* upgrade granted */
3384				block = 0;
3385				goto out;
3386			}
3387
3388		}
3389		if (kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE].ksynq_count > 0) {
3390			kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwq);
3391
3392			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
3393			kwe->kwe_psynchretval = updatebits;
3394			kwe->kwe_kwqqueue = NULL;
3395			kret = ksyn_wakeup_thread(kwq, kwe);
3396			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3397				panic("kwq_handle_unlock: panic waking up the upgrade thread \n");
3398			if (kret == KERN_NOT_WAITING) {
3399				kwq->kw_pre_intrcount = 1;	/* actually a  count */
3400				kwq->kw_pre_intrseq = mgen;
3401				kwq->kw_pre_intrretbits = kwe->kwe_psynchretval;
3402				kwq->kw_pre_intrtype = PTH_RW_TYPE_UPGRADE;
3403			}
3404			error = 0;
3405		} else {
3406			panic("panic unable to find the upgrade thread\n");
3407		}
3408#endif /* __TESTPANICS__ */
3409		ksyn_wqunlock(kwq);
3410		goto out;
3411	}
3412
3413	error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
3414#if __TESTPANICS__
3415	if (error != 0)
3416		panic("rwunlock: cannot fails to slot next round of threads");
3417#endif /* __TESTPANICS__ */
3418
3419#if _PSYNCH_TRACE_
3420	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 1, rwtype, 0, 0);
3421#endif /* _PSYNCH_TRACE_ */
3422	low_reader = lowest[KSYN_QUEUE_READ];
3423	low_lreader = lowest[KSYN_QUEUE_LREAD];
3424	low_writer = lowest[KSYN_QUEUE_WRITER];
3425	low_ywriter = lowest[KSYN_QUEUE_YWRITER];
3426
3427
3428	longreadset = 0;
3429	allreaders = 0;
3430	updatebits = 0;
3431
3432
3433	switch (rwtype & PTH_RW_TYPE_MASK) {
3434		case PTH_RW_TYPE_LREAD:
3435			longreadset = 1;
3436
3437		case PTH_RW_TYPE_READ: {
3438			/* what about the preflight which is LREAD or READ ?? */
3439			if  ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
3440				if (rwtype & PTH_RWSHFT_TYPE_WRITE)
3441					updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
3442				if (rwtype & PTH_RWSHFT_TYPE_YWRITE)
3443					updatebits |= PTH_RWL_YBIT;
3444			}
3445			limitrdnum = 0;
3446			if (longreadset == 0) {
3447				switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) {
3448					case PTH_RWSHFT_TYPE_WRITE:
3449						limitrdnum = low_writer;
3450						if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) &&
3451							(is_seqlower(low_lreader, limitrdnum) != 0)) {
3452							longreadset = 1;
3453						}
3454						if (((flags &  KW_UNLOCK_PREPOST_LREADLOCK) != 0) &&
3455							(is_seqlower(premgen, limitrdnum) != 0)) {
3456							longreadset = 1;
3457						}
3458						break;
3459					case PTH_RWSHFT_TYPE_YWRITE:
3460						/* all read ? */
3461						if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) &&
3462							(is_seqlower(low_lreader, low_ywriter) != 0)) {
3463							longreadset = 1;
3464						} else
3465							allreaders = 1;
3466						if (((flags &  KW_UNLOCK_PREPOST_LREADLOCK) != 0) &&
3467							(is_seqlower(premgen, low_ywriter) != 0)) {
3468							longreadset = 1;
3469							allreaders = 0;
3470						}
3471
3472
3473						break;
3474					case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE):
3475						if (is_seqlower(low_ywriter, low_writer) != 0) {
3476							limitrdnum = low_ywriter;
3477						} else
3478							limitrdnum = low_writer;
3479						if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) &&
3480							(is_seqlower(low_lreader, limitrdnum) != 0)) {
3481							longreadset = 1;
3482						}
3483						if (((flags &  KW_UNLOCK_PREPOST_LREADLOCK) != 0) &&
3484							(is_seqlower(premgen, limitrdnum) != 0)) {
3485							longreadset = 1;
3486						}
3487						break;
3488					default: /* no writers at all */
3489						if ((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0)
3490							longreadset = 1;
3491						else
3492							allreaders = 1;
3493				};
3494
3495			}
3496			numneeded = 0;
3497			if (longreadset !=  0) {
3498				updatebits |= PTH_RWL_LBIT;
3499				updatebits &= ~PTH_RWL_KBIT;
3500				if ((flags &  (KW_UNLOCK_PREPOST_READLOCK | KW_UNLOCK_PREPOST_LREADLOCK)) != 0)
3501					numneeded += 1;
3502				numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
3503				numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count;
3504				updatebits += (numneeded << PTHRW_COUNT_SHIFT);
3505				kwq->kw_overlapwatch = 1;
3506			} else {
3507				/* no longread, evaluate number of readers */
3508
3509				switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) {
3510					case PTH_RWSHFT_TYPE_WRITE:
3511						limitrdnum = low_writer;
3512						numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
3513						if (((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
3514							curthreturns = 1;
3515							numneeded += 1;
3516						}
3517						break;
3518					case PTH_RWSHFT_TYPE_YWRITE:
3519						/* all read ? */
3520						numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
3521						if ((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) {
3522							curthreturns = 1;
3523							numneeded += 1;
3524						}
3525						break;
3526					case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE):
3527						limitrdnum = low_writer;
3528						numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
3529						if (((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
3530							curthreturns = 1;
3531							numneeded += 1;
3532						}
3533						break;
3534					default: /* no writers at all */
3535						/* no other waiters only readers */
3536						kwq->kw_overlapwatch = 1;
3537						numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
3538						if ((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) {
3539							curthreturns = 1;
3540							numneeded += 1;
3541						}
3542				};
3543
3544				updatebits += (numneeded << PTHRW_COUNT_SHIFT);
3545			}
3546			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
3547
3548			if (curthreturns != 0) {
3549				block = 0;
3550				uth = current_uthread();
3551				kwe = &uth->uu_kwe;
3552				kwe->kwe_psynchretval = updatebits;
3553			}
3554
3555
3556			failed = ksyn_wakeupreaders(kwq, limitrdnum, longreadset, allreaders, updatebits, &woken);
3557#if _PSYNCH_TRACE_
3558	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0);
3559#endif /* _PSYNCH_TRACE_ */
3560
3561			if (failed != 0) {
3562				kwq->kw_pre_intrcount = failed;	/* actually a  count */
3563				kwq->kw_pre_intrseq = limitrdnum;
3564				kwq->kw_pre_intrretbits = updatebits;
3565				if (longreadset)
3566					kwq->kw_pre_intrtype = PTH_RW_TYPE_LREAD;
3567				else
3568					kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
3569			}
3570
3571			error = 0;
3572
3573			if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
3574				panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
3575		}
3576		break;
3577
3578		case PTH_RW_TYPE_WRITE: {
3579
3580			/* only one thread is goin to be granted */
3581			updatebits |= (PTHRW_INC);
3582			updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
3583
3584			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
3585				block = 0;
3586				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
3587					updatebits |= PTH_RWL_WBIT;
3588				if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
3589					updatebits |= PTH_RWL_YBIT;
3590				th = preth;
3591				uth = get_bsdthread_info(th);
3592				kwe = &uth->uu_kwe;
3593				kwe->kwe_psynchretval = updatebits;
3594			}  else {
3595				/*  we are not granting writelock to the preposting thread */
3596				kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
3597
3598				/* if there are writers present or the preposting write thread then W bit is to be set */
3599				if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) )
3600					updatebits |= PTH_RWL_WBIT;
3601				if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
3602					updatebits |= PTH_RWL_YBIT;
3603				kwe->kwe_psynchretval = updatebits;
3604				kwe->kwe_kwqqueue = NULL;
3605				/* setup next in the queue */
3606				kret = ksyn_wakeup_thread(kwq, kwe);
3607#if _PSYNCH_TRACE_
3608	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, kret, 0, 0);
3609#endif /* _PSYNCH_TRACE_ */
3610#if __TESTPANICS__
3611				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3612					panic("kwq_handle_unlock: panic waking up writer\n");
3613#endif /* __TESTPANICS__ */
3614				if (kret == KERN_NOT_WAITING) {
3615					kwq->kw_pre_intrcount = 1;	/* actually a  count */
3616					kwq->kw_pre_intrseq = low_writer;
3617					kwq->kw_pre_intrretbits = updatebits;
3618					kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
3619				}
3620				error = 0;
3621			}
3622			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
3623			if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
3624				panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
3625
3626		 }
3627		break;
3628
3629		case PTH_RW_TYPE_YWRITE: {
3630			/* can reader locks be granted ahead of this write? */
3631			if ((rwtype & PTH_RWSHFT_TYPE_READ) != 0)  {
3632				if  ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
3633					if (rwtype & PTH_RWSHFT_TYPE_WRITE)
3634						updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
3635					if (rwtype & PTH_RWSHFT_TYPE_YWRITE)
3636						updatebits |= PTH_RWL_YBIT;
3637				}
3638
3639				if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
3640					/* is lowest reader less than the low writer? */
3641					if (is_seqlower(low_reader,low_writer) == 0)
3642						goto yielditis;
3643
3644					numneeded = ksyn_queue_count_tolowest(kq, low_writer);
3645					updatebits += (numneeded << PTHRW_COUNT_SHIFT);
3646					if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, low_writer) != 0)) {
3647						uth = current_uthread();
3648						kwe = &uth->uu_kwe;
3649						/* add one more */
3650						updatebits += PTHRW_INC;
3651						kwe->kwe_psynchretval = updatebits;
3652						block = 0;
3653					}
3654
3655					kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
3656
3657					/* there will be readers to wakeup , no need to check for woken */
3658					failed = ksyn_wakeupreaders(kwq, low_writer, 0, 0, updatebits, NULL);
3659#if _PSYNCH_TRACE_
3660	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0);
3661#endif /* _PSYNCH_TRACE_ */
3662					if (failed != 0) {
3663						kwq->kw_pre_intrcount = failed;	/* actually a  count */
3664						kwq->kw_pre_intrseq = low_writer;
3665						kwq->kw_pre_intrretbits = updatebits;
3666						kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
3667					}
3668					error = 0;
3669				} else {
3670					/* wakeup all readers */
3671					numneeded = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
3672					updatebits += (numneeded << PTHRW_COUNT_SHIFT);
3673					if ((prepost != 0) &&  ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
3674						uth = current_uthread();
3675						kwe = &uth->uu_kwe;
3676						updatebits += PTHRW_INC;
3677						kwe->kwe_psynchretval = updatebits;
3678						block = 0;
3679					}
3680					kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
3681					failed = ksyn_wakeupreaders(kwq, low_writer, 0, 1, updatebits, &woken);
3682#if _PSYNCH_TRACE_
3683	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0);
3684#endif /* _PSYNCH_TRACE_ */
3685					if (failed != 0) {
3686						kwq->kw_pre_intrcount = failed;	/* actually a  count */
3687						kwq->kw_pre_intrseq = kwq->kw_highseq;
3688						kwq->kw_pre_intrretbits = updatebits;
3689						kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
3690					}
3691					error = 0;
3692				}
3693			} else {
3694yielditis:
3695				/* no reads, so granting yeilding writes */
3696				updatebits |= PTHRW_INC;
3697				updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
3698
3699				if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (low_writer == premgen)) {
3700					/* preposting yielding write thread is being granted exclusive lock */
3701
3702					block = 0;
3703
3704					if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
3705						updatebits |= PTH_RWL_WBIT;
3706					else if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0)
3707						updatebits |= PTH_RWL_YBIT;
3708
3709					th = preth;
3710					uth = get_bsdthread_info(th);
3711					kwe = &uth->uu_kwe;
3712					kwe->kwe_psynchretval = updatebits;
3713				}  else {
3714					/*  we are granting yield writelock to some other thread */
3715					kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwq);
3716
3717					if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
3718						updatebits |= PTH_RWL_WBIT;
3719					/* if there are ywriters present or the preposting ywrite thread then W bit is to be set */
3720					else if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) )
3721						updatebits |= PTH_RWL_YBIT;
3722
3723					kwe->kwe_psynchretval = updatebits;
3724					kwe->kwe_kwqqueue = NULL;
3725
3726					kret = ksyn_wakeup_thread(kwq, kwe);
3727#if _PSYNCH_TRACE_
3728	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, kret, 0, 0);
3729#endif /* _PSYNCH_TRACE_ */
3730#if __TESTPANICS__
3731					if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
3732						panic("kwq_handle_unlock : panic waking up readers\n");
3733#endif /* __TESTPANICS__ */
3734					if (kret == KERN_NOT_WAITING) {
3735						kwq->kw_pre_intrcount = 1;	/* actually a  count */
3736						kwq->kw_pre_intrseq = low_ywriter;
3737						kwq->kw_pre_intrretbits = updatebits;
3738						kwq->kw_pre_intrtype = PTH_RW_TYPE_YWRITE;
3739					}
3740					error = 0;
3741				}
3742				kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
3743			}
3744		}
3745		break;
3746
3747		default:
3748			panic("rwunlock: invalid type for lock grants");
3749
3750	};
3751
3752
3753out:
3754	if (updatep != NULL)
3755		*updatep = updatebits;
3756	if (blockp != NULL)
3757		*blockp = block;
3758#if _PSYNCH_TRACE_
3759	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0, updatebits, block, 0);
3760#endif /* _PSYNCH_TRACE_ */
3761	return(error);
3762}
3763
3764int
3765kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t lgenval, __unused uint32_t ugenval, uint32_t rw_wc, uint32_t *updatebitsp, __unused int flags , int * blockp)
3766{
3767	uint32_t highword = kwq->kw_nextseqword & PTHRW_COUNT_MASK;
3768	uint32_t lowword = kwq->kw_lastseqword & PTHRW_COUNT_MASK;
3769	uint32_t val=0;
3770	int withinseq;
3771
3772
3773	/* overlap is set, so no need to check for valid state for overlap */
3774
3775	withinseq = ((is_seqlower_eq(rw_wc, highword) != 0) || (is_seqhigher_eq(lowword, rw_wc) != 0));
3776
3777	if (withinseq != 0) {
3778		if ((kwq->kw_nextseqword & PTH_RWL_LBIT) == 0)  {
3779			/* if no writers ahead, overlap granted */
3780			if ((lgenval & PTH_RWL_WBIT) == 0) {
3781				goto grantoverlap;
3782			}
3783		} else  {
3784			/* Lbit is set, and writers ahead does not count */
3785			goto grantoverlap;
3786		}
3787	}
3788
3789	*blockp = 1;
3790	return(0);
3791
3792grantoverlap:
3793		/* increase the next expected seq by one */
3794		kwq->kw_nextseqword += PTHRW_INC;
3795		/* set count by one &  bits from the nextseq and add M bit */
3796		val = PTHRW_INC;
3797		val |= ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
3798		*updatebitsp = val;
3799		*blockp = 0;
3800		return(0);
3801}
3802
3803#if NOTYET
3804/* handle downgrade actions */
3805int
3806kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, __unused int flags, __unused uint32_t premgen, __unused int * blockp)
3807{
3808	uint32_t updatebits, lowriter = 0;
3809	int longreadset, allreaders, count;
3810
3811	/* can handle downgrade now */
3812	updatebits = mgen;
3813
3814	longreadset = 0;
3815	allreaders = 0;
3816	if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 0) {
3817		lowriter = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
3818		if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count > 0) {
3819			if (is_seqlower(kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum, lowriter) != 0)
3820				longreadset = 1;
3821		}
3822	} else {
3823		allreaders = 1;
3824		if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count > 0) {
3825			lowriter = kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_firstnum;
3826			if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count > 0) {
3827				if (is_seqlower(kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum, lowriter) != 0)
3828					longreadset = 1;
3829			}
3830		}
3831	}
3832
3833	count = ksyn_wakeupreaders(kwq, lowriter, longreadset, allreaders, updatebits, NULL);
3834	if (count != 0) {
3835		kwq->kw_pre_limrd = count;
3836		kwq->kw_pre_limrdseq = lowriter;
3837		kwq->kw_pre_limrdbits = lowriter;
3838		/* need to handle prepost */
3839	}
3840	return(0);
3841}
3842
3843#endif /* NOTYET */
3844
3845/************* Indiv queue support routines ************************/
3846void
3847ksyn_queue_init(ksyn_queue_t kq)
3848{
3849	TAILQ_INIT(&kq->ksynq_kwelist);
3850	kq->ksynq_count = 0;
3851	kq->ksynq_firstnum = 0;
3852	kq->ksynq_lastnum = 0;
3853}
3854
3855int
3856ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, ksyn_waitq_element_t kwe, int fit)
3857{
3858	uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
3859	ksyn_waitq_element_t q_kwe, r_kwe;
3860	int res = 0;
3861	uthread_t nuth = NULL;
3862
3863	if (kq->ksynq_count == 0) {
3864		TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
3865		kq->ksynq_firstnum = lockseq;
3866		kq->ksynq_lastnum = lockseq;
3867		goto out;
3868	}
3869
3870	if (fit == FIRSTFIT) {
3871		/* TBD: if retry bit is set for mutex, add it to the head */
3872		/* firstfit, arriving order */
3873		TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
3874		if (is_seqlower (lockseq, kq->ksynq_firstnum) != 0)
3875			kq->ksynq_firstnum = lockseq;
3876		if (is_seqhigher (lockseq, kq->ksynq_lastnum) != 0)
3877			kq->ksynq_lastnum = lockseq;
3878		goto out;
3879	}
3880
3881	if ((lockseq == kq->ksynq_firstnum) || (lockseq == kq->ksynq_lastnum)) {
3882		/* During prepost when a thread is getting cancelled, we could have two with same seq */
3883		if (kwe->kwe_flags == KWE_THREAD_PREPOST) {
3884			q_kwe = ksyn_queue_find_seq(kwq, kq, lockseq, 0);
3885			if ((q_kwe != NULL) && ((nuth = (uthread_t)q_kwe->kwe_uth) != NULL) &&
3886				((nuth->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL)) {
3887				TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
3888				goto out;
3889
3890			} else {
3891				__FAILEDUSERTEST__("ksyn_queue_insert: two threads with same lockseq ");
3892				res = EBUSY;
3893				goto out1;
3894			}
3895		 } else {
3896			__FAILEDUSERTEST__("ksyn_queue_insert: two threads with same lockseq ");
3897			res = EBUSY;
3898			goto out1;
3899		}
3900	}
3901
3902	/* check for next seq one */
3903	if (is_seqlower(kq->ksynq_lastnum, lockseq) != 0) {
3904		TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
3905		kq->ksynq_lastnum = lockseq;
3906		goto out;
3907	}
3908
3909	if (is_seqlower(lockseq, kq->ksynq_firstnum) != 0) {
3910		TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
3911		kq->ksynq_firstnum = lockseq;
3912		goto out;
3913	}
3914
3915	/* goto slow  insert mode */
3916	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
3917		if (is_seqhigher(q_kwe->kwe_lockseq, lockseq) != 0) {
3918			TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
3919			goto out;
3920		}
3921	}
3922
3923#if __TESTPANICS__
3924	panic("failed to insert \n");
3925#endif /* __TESTPANICS__ */
3926
3927out:
3928	if (uth != NULL)
3929		kwe->kwe_uth = uth;
3930	kq->ksynq_count++;
3931	kwq->kw_inqueue++;
3932	update_low_high(kwq, lockseq);
3933out1:
3934	return(res);
3935}
3936
3937ksyn_waitq_element_t
3938ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq)
3939{
3940	ksyn_waitq_element_t kwe = NULL;
3941	ksyn_waitq_element_t q_kwe;
3942	uint32_t curseq;
3943
3944	if (kq->ksynq_count != 0) {
3945		kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
3946		TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
3947		curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
3948		kq->ksynq_count--;
3949		kwq->kw_inqueue--;
3950
3951		if(kq->ksynq_count != 0) {
3952			q_kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
3953			kq->ksynq_firstnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK);
3954		} else {
3955			kq->ksynq_firstnum = 0;
3956			kq->ksynq_lastnum = 0;
3957
3958		}
3959		if (kwq->kw_inqueue == 0) {
3960			kwq->kw_lowseq = 0;
3961			kwq->kw_highseq = 0;
3962		} else {
3963			if (kwq->kw_lowseq == curseq)
3964				kwq->kw_lowseq = find_nextlowseq(kwq);
3965			if (kwq->kw_highseq == curseq)
3966				kwq->kw_highseq = find_nexthighseq(kwq);
3967		}
3968	}
3969	return(kwe);
3970}
3971
3972void
3973ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
3974{
3975	ksyn_waitq_element_t q_kwe;
3976	uint32_t curseq;
3977
3978	if (kq->ksynq_count > 0) {
3979		TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
3980		kq->ksynq_count--;
3981		if(kq->ksynq_count != 0) {
3982			q_kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
3983			kq->ksynq_firstnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK);
3984			q_kwe = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
3985			kq->ksynq_lastnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK);
3986		} else {
3987			kq->ksynq_firstnum = 0;
3988			kq->ksynq_lastnum = 0;
3989
3990		}
3991		kwq->kw_inqueue--;
3992		curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
3993		if (kwq->kw_inqueue == 0) {
3994			kwq->kw_lowseq = 0;
3995			kwq->kw_highseq = 0;
3996		} else {
3997			if (kwq->kw_lowseq == curseq)
3998				kwq->kw_lowseq = find_nextlowseq(kwq);
3999			if (kwq->kw_highseq == curseq)
4000				kwq->kw_highseq = find_nexthighseq(kwq);
4001		}
4002	}
4003}
4004
4005/* find the thread and removes from the queue */
4006ksyn_waitq_element_t
4007ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq, int remove)
4008{
4009	ksyn_waitq_element_t q_kwe, r_kwe;
4010
4011	/* TBD: bail out if higher seq is seen */
4012	/* case where wrap in the tail of the queue exists */
4013	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
4014		if ((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
4015			if (remove != 0)
4016				ksyn_queue_removeitem(kwq, kq, q_kwe);
4017			return(q_kwe);
4018		}
4019	}
4020	return(NULL);
4021}
4022
4023
4024/* find the thread at the target sequence (or a broadcast/prepost at or above) */
4025ksyn_waitq_element_t
4026ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
4027{
4028	ksyn_waitq_element_t q_kwe, r_kwe;
4029	uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
4030
4031	/* case where wrap in the tail of the queue exists */
4032	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
4033
4034		/* skip the lower entries */
4035		if (is_seqlower((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), cgen) != 0)
4036			continue;
4037
4038		switch (q_kwe->kwe_flags) {
4039
4040		case KWE_THREAD_INWAIT:
4041			if ((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen)
4042				break;
4043			/* fall thru */
4044
4045		case KWE_THREAD_BROADCAST:
4046		case KWE_THREAD_PREPOST:
4047			return (q_kwe);
4048		}
4049	}
4050	return(NULL);
4051}
4052
4053/* look for a thread at lockseq, a  */
4054ksyn_waitq_element_t
4055ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
4056{
4057	ksyn_waitq_element_t q_kwe, r_kwe, t_kwe = NULL;
4058
4059	/* case where wrap in the tail of the queue exists */
4060	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
4061
4062		switch (q_kwe->kwe_flags) {
4063
4064		case KWE_THREAD_PREPOST:
4065			if (is_seqhigher((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq))
4066				return t_kwe;
4067			/* fall thru */
4068
4069		case KWE_THREAD_BROADCAST:
4070			/* match any prepost at our same uptoseq or any broadcast above */
4071			if (is_seqlower((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq))
4072				continue;
4073			return  q_kwe;
4074
4075		case KWE_THREAD_INWAIT:
4076			/*
4077			 * Match any (non-cancelled) thread at or below our upto sequence -
4078			 * but prefer an exact match to our signal sequence (if present) to
4079			 * keep exact matches happening.
4080			 */
4081			if (is_seqhigher((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq))
4082				return t_kwe;
4083
4084			if (q_kwe->kwe_kwqqueue == kwq) {
4085				uthread_t ut = q_kwe->kwe_uth;
4086				if ((ut->uu_flag & ( UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) != UT_CANCEL) {
4087					/* if equal or higher than our signal sequence, return this one */
4088					if (is_seqhigher_eq((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), signalseq))
4089						return q_kwe;
4090
4091					/* otherwise, just remember this eligible thread and move on */
4092					if (t_kwe == NULL)
4093						t_kwe = q_kwe;
4094				}
4095			}
4096			break;
4097
4098		default:
4099			panic("ksyn_queue_find_signalseq(): unknow wait queue element type (%d)\n", q_kwe->kwe_flags);
4100			break;
4101		}
4102	}
4103	return t_kwe;
4104}
4105
4106
4107int
4108ksyn_queue_move_tofree(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, uint32_t upto, ksyn_queue_t kfreeq, int all, int release)
4109{
4110	ksyn_waitq_element_t kwe;
4111	int count = 0;
4112	uint32_t tseq = upto & PTHRW_COUNT_MASK;
4113#if _PSYNCH_TRACE_
4114	uthread_t ut;
4115#endif /* _PSYNCH_TRACE_ */
4116
4117	ksyn_queue_init(kfreeq);
4118
4119	/* free all the entries, must be only fakes.. */
4120	kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
4121	while (kwe != NULL) {
4122		if ((all == 0) && (is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), tseq) != 0))
4123			break;
4124		if (kwe->kwe_flags == KWE_THREAD_INWAIT) {
4125			/*
4126			 * This scenario is typically noticed when the cvar is
4127			 * reinited and the new waiters are waiting. We can
4128			 * return them as spurious wait so the cvar state gets
4129			 * reset correctly.
4130			 */
4131#if _PSYNCH_TRACE_
4132			ut = (uthread_t)kwe->kwe_uth;
4133#endif /* _PSYNCH_TRACE_ */
4134
4135			/* skip canceled ones */
4136			/* wake the rest */
4137			ksyn_queue_removeitem(ckwq, kq, kwe);
4138			/* set M bit to indicate to waking CV to retun Inc val */
4139			kwe->kwe_psynchretval = PTHRW_INC | (PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
4140			kwe->kwe_kwqqueue = NULL;
4141#if _PSYNCH_TRACE_
4142		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf3, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
4143#endif /* _PSYNCH_TRACE_ */
4144			(void)ksyn_wakeup_thread(ckwq, kwe);
4145		} else {
4146			ksyn_queue_removeitem(ckwq, kq, kwe);
4147			TAILQ_INSERT_TAIL(&kfreeq->ksynq_kwelist, kwe, kwe_list);
4148			ckwq->kw_fakecount--;
4149			count++;
4150		}
4151		kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
4152	}
4153
4154	if ((release != 0) && (count != 0)) {
4155		kwe = TAILQ_FIRST(&kfreeq->ksynq_kwelist);
4156		while (kwe != NULL) {
4157			TAILQ_REMOVE(&kfreeq->ksynq_kwelist, kwe, kwe_list);
4158			zfree(kwe_zone, kwe);
4159			kwe = TAILQ_FIRST(&kfreeq->ksynq_kwelist);
4160		}
4161	}
4162
4163	return(count);
4164}
4165
4166/*************************************************************************/
4167
4168void
4169update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
4170{
4171	if (kwq->kw_inqueue == 1) {
4172		kwq->kw_lowseq = lockseq;
4173		kwq->kw_highseq = lockseq;
4174	} else {
4175		if (is_seqlower(lockseq, kwq->kw_lowseq) != 0)
4176			kwq->kw_lowseq = lockseq;
4177		if (is_seqhigher(lockseq, kwq->kw_highseq) != 0)
4178			kwq->kw_highseq = lockseq;
4179	}
4180}
4181
4182uint32_t
4183find_nextlowseq(ksyn_wait_queue_t kwq)
4184{
4185	uint32_t numbers[KSYN_QUEUE_MAX];
4186	int count = 0, i;
4187	uint32_t lowest;
4188
4189	for(i = 0; i< KSYN_QUEUE_MAX; i++) {
4190		if (kwq->kw_ksynqueues[i].ksynq_count != 0) {
4191			numbers[count]= kwq->kw_ksynqueues[i].ksynq_firstnum;
4192			count++;
4193		}
4194	}
4195
4196	if (count == 0)
4197		return(0);
4198	lowest = numbers[0];
4199	if (count > 1) {
4200		for (i = 1; i< count; i++) {
4201			if(is_seqlower(numbers[i] , lowest) != 0)
4202				lowest = numbers[count];
4203
4204		}
4205	}
4206	return(lowest);
4207}
4208
4209uint32_t
4210find_nexthighseq(ksyn_wait_queue_t kwq)
4211{
4212	uint32_t numbers[KSYN_QUEUE_MAX];
4213	int count = 0, i;
4214	uint32_t highest;
4215
4216	for(i = 0; i< KSYN_QUEUE_MAX; i++) {
4217		if (kwq->kw_ksynqueues[i].ksynq_count != 0) {
4218			numbers[count]= kwq->kw_ksynqueues[i].ksynq_lastnum;
4219			count++;
4220		}
4221	}
4222
4223
4224
4225	if (count == 0)
4226		return(0);
4227	highest = numbers[0];
4228	if (count > 1) {
4229		for (i = 1; i< count; i++) {
4230			if(is_seqhigher(numbers[i], highest) != 0)
4231				highest = numbers[i];
4232
4233		}
4234	}
4235	return(highest);
4236}
4237
4238int
4239is_seqlower(uint32_t x, uint32_t y)
4240{
4241	if (x < y) {
4242		if ((y-x) < (PTHRW_MAX_READERS/2))
4243			return(1);
4244	} else {
4245		if ((x-y) > (PTHRW_MAX_READERS/2))
4246			return(1);
4247	}
4248	return(0);
4249}
4250
4251int
4252is_seqlower_eq(uint32_t x, uint32_t y)
4253{
4254	if (x==y)
4255		return(1);
4256	else
4257		return(is_seqlower(x,y));
4258}
4259
4260int
4261is_seqhigher(uint32_t x, uint32_t y)
4262{
4263	if (x > y) {
4264		if ((x-y) < (PTHRW_MAX_READERS/2))
4265			return(1);
4266	} else {
4267		if ((y-x) > (PTHRW_MAX_READERS/2))
4268			return(1);
4269	}
4270	return(0);
4271}
4272
4273int
4274is_seqhigher_eq(uint32_t x, uint32_t y)
4275{
4276	if (x==y)
4277		return(1);
4278	else
4279		return(is_seqhigher(x,y));
4280}
4281
4282
4283int
4284find_diff(uint32_t upto, uint32_t lowest)
4285{
4286	uint32_t diff;
4287
4288	if (upto == lowest)
4289		return(0);
4290#if 0
4291	diff = diff_genseq(upto, lowest);
4292#else
4293        if (is_seqlower(upto, lowest) != 0)
4294                diff = diff_genseq(lowest, upto);
4295        else
4296                diff = diff_genseq(upto, lowest);
4297#endif
4298	diff = (diff >> PTHRW_COUNT_SHIFT);
4299	return(diff);
4300}
4301
4302
4303int
4304find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
4305{
4306	int  i;
4307	uint32_t count = 0;
4308
4309
4310#if _PSYNCH_TRACE_
4311	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_START, 0, 0, upto, nwaiters, 0);
4312#endif /* _PSYNCH_TRACE_ */
4313
4314	for (i= 0; i< KSYN_QUEUE_MAX; i++) {
4315		count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
4316#if _PSYNCH_TRACE_
4317	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_NONE, 0, 1, i, count, 0);
4318#endif /* _PSYNCH_TRACE_ */
4319		if (count >= nwaiters) {
4320			break;
4321		}
4322	}
4323
4324	if (countp != NULL) {
4325		*countp = count;
4326	}
4327#if _PSYNCH_TRACE_
4328	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_END, 0, 0, count, nwaiters, 0);
4329#endif /* _PSYNCH_TRACE_ */
4330	if (count == 0)
4331		return(0);
4332	else if (count >= nwaiters)
4333		return(1);
4334	else
4335		return(0);
4336}
4337
4338
4339uint32_t
4340ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
4341{
4342	uint32_t i = 0;
4343	ksyn_waitq_element_t kwe, newkwe;
4344	uint32_t curval;
4345
4346	/* if nothing or the  first num is greater than upto, return none */
4347	if ((kq->ksynq_count == 0) || (is_seqhigher(kq->ksynq_firstnum, upto) != 0))
4348		return(0);
4349	if (upto == kq->ksynq_firstnum)
4350		return(1);
4351
4352	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
4353		curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
4354		if (upto == curval) {
4355			i++;
4356			break;
4357		} else if (is_seqhigher(curval, upto) != 0) {
4358			break;
4359		}  else {
4360			/* seq is lower */
4361			i++;
4362		}
4363	}
4364	return(i);
4365}
4366
4367
4368/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
4369void
4370ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t * updatep)
4371{
4372	kern_return_t kret;
4373	ksyn_queue_t kq;
4374	ksyn_waitq_element_t kwe, newkwe;
4375	uint32_t updatebits = 0;
4376	struct ksyn_queue  kfreeq;
4377	uthread_t ut;
4378
4379#if _PSYNCH_TRACE_
4380	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_START, 0xcbcbcbc2, upto, 0, 0, 0);
4381#endif /* _PSYNCH_TRACE_ */
4382
4383	ksyn_queue_init(&kfreeq);
4384	kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
4385
4386 retry:
4387	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
4388
4389		if (is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), upto))	/* outside our range */
4390			break;
4391
4392		/* now handle the one we found (inside the range) */
4393		switch (kwe->kwe_flags) {
4394
4395		case KWE_THREAD_INWAIT:
4396			ut = (uthread_t)kwe->kwe_uth;
4397
4398			/* skip canceled ones */
4399			if (kwe->kwe_kwqqueue != ckwq ||
4400			    (ut->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL)
4401				break;
4402
4403			/* wake the rest */
4404			ksyn_queue_removeitem(ckwq, kq, kwe);
4405			kwe->kwe_psynchretval = PTH_RWL_MTX_WAIT;
4406			kwe->kwe_kwqqueue = NULL;
4407#if _PSYNCH_TRACE_
4408				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
4409#endif /* _PSYNCH_TRACE_ */
4410				kret = ksyn_wakeup_thread(ckwq, kwe);
4411#if __TESTPANICS__
4412			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
4413				panic("ksyn_wakeupreaders: panic waking up readers\n");
4414#endif /* __TESTPANICS__ */
4415			updatebits += PTHRW_INC;
4416			break;
4417
4418		case KWE_THREAD_BROADCAST:
4419		case KWE_THREAD_PREPOST:
4420			ksyn_queue_removeitem(ckwq, kq, kwe);
4421			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
4422			ckwq->kw_fakecount--;
4423			break;
4424
4425		default:
4426			panic("unknown kweflags\n");
4427			break;
4428		}
4429	}
4430
4431	/* Need to enter a broadcast in the queue (if not already at L == S) */
4432
4433	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) != (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
4434
4435		newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
4436		if (newkwe == NULL) {
4437			ksyn_wqunlock(ckwq);
4438			newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
4439			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
4440			ksyn_wqlock(ckwq);
4441			goto retry;
4442		}
4443
4444		TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
4445		bzero(newkwe, sizeof(struct ksyn_waitq_element));
4446		newkwe->kwe_kwqqueue = ckwq;
4447		newkwe->kwe_flags = KWE_THREAD_BROADCAST;
4448		newkwe->kwe_lockseq = upto;
4449		newkwe->kwe_count = 0;
4450		newkwe->kwe_uth = NULL;
4451		newkwe->kwe_psynchretval = 0;
4452
4453#if _PSYNCH_TRACE_
4454		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfeed, upto, 0, 0);
4455#endif /* _PSYNCH_TRACE_ */
4456
4457		(void)ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], upto, NULL, newkwe, SEQFIT);
4458		ckwq->kw_fakecount++;
4459	}
4460
4461	/* free up any remaining things stumbled across above */
4462	kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
4463	while (kwe != NULL) {
4464		TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
4465		zfree(kwe_zone, kwe);
4466		kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
4467	}
4468
4469	if (updatep != NULL)
4470		*updatep = updatebits;
4471
4472#if _PSYNCH_TRACE_
4473	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_END, 0xeeeeeeed, updatebits, 0, 0, 0);
4474#endif /* _PSYNCH_TRACE_ */
4475}
4476
4477void
4478ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release)
4479{
4480	uint32_t updatebits = 0;
4481
4482	if (updatep != NULL)
4483		updatebits = *updatep;
4484	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
4485		updatebits |= PTH_RWS_CV_CBIT;
4486		if (ckwq->kw_inqueue != 0) {
4487			/* FREE THE QUEUE */
4488			ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ckwq->kw_lword, kfreeq, 0, release);
4489#if __TESTPANICS__
4490			if (ckwq->kw_inqueue != 0)
4491				panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
4492#endif /* __TESTPANICS__ */
4493		}
4494		ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
4495		ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
4496	} else if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) {
4497		/* only fake entries are present in the queue */
4498		updatebits |= PTH_RWS_CV_PBIT;
4499	}
4500	if (updatep != NULL)
4501		*updatep = updatebits;
4502}
4503
4504void
4505psynch_zoneinit(void)
4506{
4507        kwq_zone = (zone_t)zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_waitqueue zone");
4508        kwe_zone = (zone_t)zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element zone");
4509}
4510#endif /* PSYNCH */
4511