kern_intr.c revision 272946
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/kern/kern_intr.c 272946 2014-10-11 17:49:51Z kib $");
29
30#include "opt_ddb.h"
31#include "opt_kstack_usage_prof.h"
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/conf.h>
36#include <sys/cpuset.h>
37#include <sys/rtprio.h>
38#include <sys/systm.h>
39#include <sys/interrupt.h>
40#include <sys/kernel.h>
41#include <sys/kthread.h>
42#include <sys/ktr.h>
43#include <sys/limits.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mutex.h>
47#include <sys/priv.h>
48#include <sys/proc.h>
49#include <sys/random.h>
50#include <sys/resourcevar.h>
51#include <sys/sched.h>
52#include <sys/smp.h>
53#include <sys/sysctl.h>
54#include <sys/syslog.h>
55#include <sys/unistd.h>
56#include <sys/vmmeter.h>
57#include <machine/atomic.h>
58#include <machine/cpu.h>
59#include <machine/md_var.h>
60#include <machine/stdarg.h>
61#ifdef DDB
62#include <ddb/ddb.h>
63#include <ddb/db_sym.h>
64#endif
65
66/*
67 * Describe an interrupt thread.  There is one of these per interrupt event.
68 */
69struct intr_thread {
70	struct intr_event *it_event;
71	struct thread *it_thread;	/* Kernel thread. */
72	int	it_flags;		/* (j) IT_* flags. */
73	int	it_need;		/* Needs service. */
74};
75
76/* Interrupt thread flags kept in it_flags */
77#define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
78#define	IT_WAIT		0x000002	/* Thread is waiting for completion. */
79
80struct	intr_entropy {
81	struct	thread *td;
82	uintptr_t event;
83};
84
85struct	intr_event *clk_intr_event;
86struct	intr_event *tty_intr_event;
87void	*vm_ih;
88struct proc *intrproc;
89
90static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
91
92static int intr_storm_threshold = 1000;
93TUNABLE_INT("hw.intr_storm_threshold", &intr_storm_threshold);
94SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RW,
95    &intr_storm_threshold, 0,
96    "Number of consecutive interrupts before storm protection is enabled");
97static TAILQ_HEAD(, intr_event) event_list =
98    TAILQ_HEAD_INITIALIZER(event_list);
99static struct mtx event_lock;
100MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF);
101
102static void	intr_event_update(struct intr_event *ie);
103#ifdef INTR_FILTER
104static int	intr_event_schedule_thread(struct intr_event *ie,
105		    struct intr_thread *ithd);
106static int	intr_filter_loop(struct intr_event *ie,
107		    struct trapframe *frame, struct intr_thread **ithd);
108static struct intr_thread *ithread_create(const char *name,
109			      struct intr_handler *ih);
110#else
111static int	intr_event_schedule_thread(struct intr_event *ie);
112static struct intr_thread *ithread_create(const char *name);
113#endif
114static void	ithread_destroy(struct intr_thread *ithread);
115static void	ithread_execute_handlers(struct proc *p,
116		    struct intr_event *ie);
117#ifdef INTR_FILTER
118static void	priv_ithread_execute_handler(struct proc *p,
119		    struct intr_handler *ih);
120#endif
121static void	ithread_loop(void *);
122static void	ithread_update(struct intr_thread *ithd);
123static void	start_softintr(void *);
124
125/* Map an interrupt type to an ithread priority. */
126u_char
127intr_priority(enum intr_type flags)
128{
129	u_char pri;
130
131	flags &= (INTR_TYPE_TTY | INTR_TYPE_BIO | INTR_TYPE_NET |
132	    INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV);
133	switch (flags) {
134	case INTR_TYPE_TTY:
135		pri = PI_TTY;
136		break;
137	case INTR_TYPE_BIO:
138		pri = PI_DISK;
139		break;
140	case INTR_TYPE_NET:
141		pri = PI_NET;
142		break;
143	case INTR_TYPE_CAM:
144		pri = PI_DISK;
145		break;
146	case INTR_TYPE_AV:
147		pri = PI_AV;
148		break;
149	case INTR_TYPE_CLK:
150		pri = PI_REALTIME;
151		break;
152	case INTR_TYPE_MISC:
153		pri = PI_DULL;          /* don't care */
154		break;
155	default:
156		/* We didn't specify an interrupt level. */
157		panic("intr_priority: no interrupt type in flags");
158	}
159
160	return pri;
161}
162
163/*
164 * Update an ithread based on the associated intr_event.
165 */
166static void
167ithread_update(struct intr_thread *ithd)
168{
169	struct intr_event *ie;
170	struct thread *td;
171	u_char pri;
172
173	ie = ithd->it_event;
174	td = ithd->it_thread;
175
176	/* Determine the overall priority of this event. */
177	if (TAILQ_EMPTY(&ie->ie_handlers))
178		pri = PRI_MAX_ITHD;
179	else
180		pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri;
181
182	/* Update name and priority. */
183	strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name));
184#ifdef KTR
185	sched_clear_tdname(td);
186#endif
187	thread_lock(td);
188	sched_prio(td, pri);
189	thread_unlock(td);
190}
191
192/*
193 * Regenerate the full name of an interrupt event and update its priority.
194 */
195static void
196intr_event_update(struct intr_event *ie)
197{
198	struct intr_handler *ih;
199	char *last;
200	int missed, space;
201
202	/* Start off with no entropy and just the name of the event. */
203	mtx_assert(&ie->ie_lock, MA_OWNED);
204	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
205	ie->ie_flags &= ~IE_ENTROPY;
206	missed = 0;
207	space = 1;
208
209	/* Run through all the handlers updating values. */
210	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
211		if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
212		    sizeof(ie->ie_fullname)) {
213			strcat(ie->ie_fullname, " ");
214			strcat(ie->ie_fullname, ih->ih_name);
215			space = 0;
216		} else
217			missed++;
218		if (ih->ih_flags & IH_ENTROPY)
219			ie->ie_flags |= IE_ENTROPY;
220	}
221
222	/*
223	 * If the handler names were too long, add +'s to indicate missing
224	 * names. If we run out of room and still have +'s to add, change
225	 * the last character from a + to a *.
226	 */
227	last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
228	while (missed-- > 0) {
229		if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
230			if (*last == '+') {
231				*last = '*';
232				break;
233			} else
234				*last = '+';
235		} else if (space) {
236			strcat(ie->ie_fullname, " +");
237			space = 0;
238		} else
239			strcat(ie->ie_fullname, "+");
240	}
241
242	/*
243	 * If this event has an ithread, update it's priority and
244	 * name.
245	 */
246	if (ie->ie_thread != NULL)
247		ithread_update(ie->ie_thread);
248	CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname);
249}
250
251int
252intr_event_create(struct intr_event **event, void *source, int flags, int irq,
253    void (*pre_ithread)(void *), void (*post_ithread)(void *),
254    void (*post_filter)(void *), int (*assign_cpu)(void *, u_char),
255    const char *fmt, ...)
256{
257	struct intr_event *ie;
258	va_list ap;
259
260	/* The only valid flag during creation is IE_SOFT. */
261	if ((flags & ~IE_SOFT) != 0)
262		return (EINVAL);
263	ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO);
264	ie->ie_source = source;
265	ie->ie_pre_ithread = pre_ithread;
266	ie->ie_post_ithread = post_ithread;
267	ie->ie_post_filter = post_filter;
268	ie->ie_assign_cpu = assign_cpu;
269	ie->ie_flags = flags;
270	ie->ie_irq = irq;
271	ie->ie_cpu = NOCPU;
272	TAILQ_INIT(&ie->ie_handlers);
273	mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
274
275	va_start(ap, fmt);
276	vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap);
277	va_end(ap);
278	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
279	mtx_lock(&event_lock);
280	TAILQ_INSERT_TAIL(&event_list, ie, ie_list);
281	mtx_unlock(&event_lock);
282	if (event != NULL)
283		*event = ie;
284	CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name);
285	return (0);
286}
287
288/*
289 * Bind an interrupt event to the specified CPU.  Note that not all
290 * platforms support binding an interrupt to a CPU.  For those
291 * platforms this request will fail.  For supported platforms, any
292 * associated ithreads as well as the primary interrupt context will
293 * be bound to the specificed CPU.  Using a cpu id of NOCPU unbinds
294 * the interrupt event.
295 */
296int
297intr_event_bind(struct intr_event *ie, u_char cpu)
298{
299	cpuset_t mask;
300	lwpid_t id;
301	int error;
302
303	/* Need a CPU to bind to. */
304	if (cpu != NOCPU && CPU_ABSENT(cpu))
305		return (EINVAL);
306
307	if (ie->ie_assign_cpu == NULL)
308		return (EOPNOTSUPP);
309
310	error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR);
311	if (error)
312		return (error);
313
314	/*
315	 * If we have any ithreads try to set their mask first to verify
316	 * permissions, etc.
317	 */
318	mtx_lock(&ie->ie_lock);
319	if (ie->ie_thread != NULL) {
320		CPU_ZERO(&mask);
321		if (cpu == NOCPU)
322			CPU_COPY(cpuset_root, &mask);
323		else
324			CPU_SET(cpu, &mask);
325		id = ie->ie_thread->it_thread->td_tid;
326		mtx_unlock(&ie->ie_lock);
327		error = cpuset_setthread(id, &mask);
328		if (error)
329			return (error);
330	} else
331		mtx_unlock(&ie->ie_lock);
332	error = ie->ie_assign_cpu(ie->ie_source, cpu);
333	if (error) {
334		mtx_lock(&ie->ie_lock);
335		if (ie->ie_thread != NULL) {
336			CPU_ZERO(&mask);
337			if (ie->ie_cpu == NOCPU)
338				CPU_COPY(cpuset_root, &mask);
339			else
340				CPU_SET(ie->ie_cpu, &mask);
341			id = ie->ie_thread->it_thread->td_tid;
342			mtx_unlock(&ie->ie_lock);
343			(void)cpuset_setthread(id, &mask);
344		} else
345			mtx_unlock(&ie->ie_lock);
346		return (error);
347	}
348
349	mtx_lock(&ie->ie_lock);
350	ie->ie_cpu = cpu;
351	mtx_unlock(&ie->ie_lock);
352
353	return (error);
354}
355
356static struct intr_event *
357intr_lookup(int irq)
358{
359	struct intr_event *ie;
360
361	mtx_lock(&event_lock);
362	TAILQ_FOREACH(ie, &event_list, ie_list)
363		if (ie->ie_irq == irq &&
364		    (ie->ie_flags & IE_SOFT) == 0 &&
365		    TAILQ_FIRST(&ie->ie_handlers) != NULL)
366			break;
367	mtx_unlock(&event_lock);
368	return (ie);
369}
370
371int
372intr_setaffinity(int irq, void *m)
373{
374	struct intr_event *ie;
375	cpuset_t *mask;
376	u_char cpu;
377	int n;
378
379	mask = m;
380	cpu = NOCPU;
381	/*
382	 * If we're setting all cpus we can unbind.  Otherwise make sure
383	 * only one cpu is in the set.
384	 */
385	if (CPU_CMP(cpuset_root, mask)) {
386		for (n = 0; n < CPU_SETSIZE; n++) {
387			if (!CPU_ISSET(n, mask))
388				continue;
389			if (cpu != NOCPU)
390				return (EINVAL);
391			cpu = (u_char)n;
392		}
393	}
394	ie = intr_lookup(irq);
395	if (ie == NULL)
396		return (ESRCH);
397	return (intr_event_bind(ie, cpu));
398}
399
400int
401intr_getaffinity(int irq, void *m)
402{
403	struct intr_event *ie;
404	cpuset_t *mask;
405
406	mask = m;
407	ie = intr_lookup(irq);
408	if (ie == NULL)
409		return (ESRCH);
410	CPU_ZERO(mask);
411	mtx_lock(&ie->ie_lock);
412	if (ie->ie_cpu == NOCPU)
413		CPU_COPY(cpuset_root, mask);
414	else
415		CPU_SET(ie->ie_cpu, mask);
416	mtx_unlock(&ie->ie_lock);
417	return (0);
418}
419
420int
421intr_event_destroy(struct intr_event *ie)
422{
423
424	mtx_lock(&event_lock);
425	mtx_lock(&ie->ie_lock);
426	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
427		mtx_unlock(&ie->ie_lock);
428		mtx_unlock(&event_lock);
429		return (EBUSY);
430	}
431	TAILQ_REMOVE(&event_list, ie, ie_list);
432#ifndef notyet
433	if (ie->ie_thread != NULL) {
434		ithread_destroy(ie->ie_thread);
435		ie->ie_thread = NULL;
436	}
437#endif
438	mtx_unlock(&ie->ie_lock);
439	mtx_unlock(&event_lock);
440	mtx_destroy(&ie->ie_lock);
441	free(ie, M_ITHREAD);
442	return (0);
443}
444
445#ifndef INTR_FILTER
446static struct intr_thread *
447ithread_create(const char *name)
448{
449	struct intr_thread *ithd;
450	struct thread *td;
451	int error;
452
453	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
454
455	error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
456		    &td, RFSTOPPED | RFHIGHPID,
457	    	    0, "intr", "%s", name);
458	if (error)
459		panic("kproc_create() failed with %d", error);
460	thread_lock(td);
461	sched_class(td, PRI_ITHD);
462	TD_SET_IWAIT(td);
463	thread_unlock(td);
464	td->td_pflags |= TDP_ITHREAD;
465	ithd->it_thread = td;
466	CTR2(KTR_INTR, "%s: created %s", __func__, name);
467	return (ithd);
468}
469#else
470static struct intr_thread *
471ithread_create(const char *name, struct intr_handler *ih)
472{
473	struct intr_thread *ithd;
474	struct thread *td;
475	int error;
476
477	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
478
479	error = kproc_kthread_add(ithread_loop, ih, &intrproc,
480		    &td, RFSTOPPED | RFHIGHPID,
481	    	    0, "intr", "%s", name);
482	if (error)
483		panic("kproc_create() failed with %d", error);
484	thread_lock(td);
485	sched_class(td, PRI_ITHD);
486	TD_SET_IWAIT(td);
487	thread_unlock(td);
488	td->td_pflags |= TDP_ITHREAD;
489	ithd->it_thread = td;
490	CTR2(KTR_INTR, "%s: created %s", __func__, name);
491	return (ithd);
492}
493#endif
494
495static void
496ithread_destroy(struct intr_thread *ithread)
497{
498	struct thread *td;
499
500	CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name);
501	td = ithread->it_thread;
502	thread_lock(td);
503	ithread->it_flags |= IT_DEAD;
504	if (TD_AWAITING_INTR(td)) {
505		TD_CLR_IWAIT(td);
506		sched_add(td, SRQ_INTR);
507	}
508	thread_unlock(td);
509}
510
511#ifndef INTR_FILTER
512int
513intr_event_add_handler(struct intr_event *ie, const char *name,
514    driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
515    enum intr_type flags, void **cookiep)
516{
517	struct intr_handler *ih, *temp_ih;
518	struct intr_thread *it;
519
520	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
521		return (EINVAL);
522
523	/* Allocate and populate an interrupt handler structure. */
524	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
525	ih->ih_filter = filter;
526	ih->ih_handler = handler;
527	ih->ih_argument = arg;
528	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
529	ih->ih_event = ie;
530	ih->ih_pri = pri;
531	if (flags & INTR_EXCL)
532		ih->ih_flags = IH_EXCLUSIVE;
533	if (flags & INTR_MPSAFE)
534		ih->ih_flags |= IH_MPSAFE;
535	if (flags & INTR_ENTROPY)
536		ih->ih_flags |= IH_ENTROPY;
537
538	/* We can only have one exclusive handler in a event. */
539	mtx_lock(&ie->ie_lock);
540	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
541		if ((flags & INTR_EXCL) ||
542		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
543			mtx_unlock(&ie->ie_lock);
544			free(ih, M_ITHREAD);
545			return (EINVAL);
546		}
547	}
548
549	/* Create a thread if we need one. */
550	while (ie->ie_thread == NULL && handler != NULL) {
551		if (ie->ie_flags & IE_ADDING_THREAD)
552			msleep(ie, &ie->ie_lock, 0, "ithread", 0);
553		else {
554			ie->ie_flags |= IE_ADDING_THREAD;
555			mtx_unlock(&ie->ie_lock);
556			it = ithread_create("intr: newborn");
557			mtx_lock(&ie->ie_lock);
558			ie->ie_flags &= ~IE_ADDING_THREAD;
559			ie->ie_thread = it;
560			it->it_event = ie;
561			ithread_update(it);
562			wakeup(ie);
563		}
564	}
565
566	/* Add the new handler to the event in priority order. */
567	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
568		if (temp_ih->ih_pri > ih->ih_pri)
569			break;
570	}
571	if (temp_ih == NULL)
572		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
573	else
574		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
575	intr_event_update(ie);
576
577	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
578	    ie->ie_name);
579	mtx_unlock(&ie->ie_lock);
580
581	if (cookiep != NULL)
582		*cookiep = ih;
583	return (0);
584}
585#else
586int
587intr_event_add_handler(struct intr_event *ie, const char *name,
588    driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
589    enum intr_type flags, void **cookiep)
590{
591	struct intr_handler *ih, *temp_ih;
592	struct intr_thread *it;
593
594	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
595		return (EINVAL);
596
597	/* Allocate and populate an interrupt handler structure. */
598	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
599	ih->ih_filter = filter;
600	ih->ih_handler = handler;
601	ih->ih_argument = arg;
602	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
603	ih->ih_event = ie;
604	ih->ih_pri = pri;
605	if (flags & INTR_EXCL)
606		ih->ih_flags = IH_EXCLUSIVE;
607	if (flags & INTR_MPSAFE)
608		ih->ih_flags |= IH_MPSAFE;
609	if (flags & INTR_ENTROPY)
610		ih->ih_flags |= IH_ENTROPY;
611
612	/* We can only have one exclusive handler in a event. */
613	mtx_lock(&ie->ie_lock);
614	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
615		if ((flags & INTR_EXCL) ||
616		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
617			mtx_unlock(&ie->ie_lock);
618			free(ih, M_ITHREAD);
619			return (EINVAL);
620		}
621	}
622
623	/* For filtered handlers, create a private ithread to run on. */
624	if (filter != NULL && handler != NULL) {
625		mtx_unlock(&ie->ie_lock);
626		it = ithread_create("intr: newborn", ih);
627		mtx_lock(&ie->ie_lock);
628		it->it_event = ie;
629		ih->ih_thread = it;
630		ithread_update(it); /* XXX - do we really need this?!?!? */
631	} else { /* Create the global per-event thread if we need one. */
632		while (ie->ie_thread == NULL && handler != NULL) {
633			if (ie->ie_flags & IE_ADDING_THREAD)
634				msleep(ie, &ie->ie_lock, 0, "ithread", 0);
635			else {
636				ie->ie_flags |= IE_ADDING_THREAD;
637				mtx_unlock(&ie->ie_lock);
638				it = ithread_create("intr: newborn", ih);
639				mtx_lock(&ie->ie_lock);
640				ie->ie_flags &= ~IE_ADDING_THREAD;
641				ie->ie_thread = it;
642				it->it_event = ie;
643				ithread_update(it);
644				wakeup(ie);
645			}
646		}
647	}
648
649	/* Add the new handler to the event in priority order. */
650	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
651		if (temp_ih->ih_pri > ih->ih_pri)
652			break;
653	}
654	if (temp_ih == NULL)
655		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
656	else
657		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
658	intr_event_update(ie);
659
660	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
661	    ie->ie_name);
662	mtx_unlock(&ie->ie_lock);
663
664	if (cookiep != NULL)
665		*cookiep = ih;
666	return (0);
667}
668#endif
669
670/*
671 * Append a description preceded by a ':' to the name of the specified
672 * interrupt handler.
673 */
674int
675intr_event_describe_handler(struct intr_event *ie, void *cookie,
676    const char *descr)
677{
678	struct intr_handler *ih;
679	size_t space;
680	char *start;
681
682	mtx_lock(&ie->ie_lock);
683#ifdef INVARIANTS
684	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
685		if (ih == cookie)
686			break;
687	}
688	if (ih == NULL) {
689		mtx_unlock(&ie->ie_lock);
690		panic("handler %p not found in interrupt event %p", cookie, ie);
691	}
692#endif
693	ih = cookie;
694
695	/*
696	 * Look for an existing description by checking for an
697	 * existing ":".  This assumes device names do not include
698	 * colons.  If one is found, prepare to insert the new
699	 * description at that point.  If one is not found, find the
700	 * end of the name to use as the insertion point.
701	 */
702	start = strchr(ih->ih_name, ':');
703	if (start == NULL)
704		start = strchr(ih->ih_name, 0);
705
706	/*
707	 * See if there is enough remaining room in the string for the
708	 * description + ":".  The "- 1" leaves room for the trailing
709	 * '\0'.  The "+ 1" accounts for the colon.
710	 */
711	space = sizeof(ih->ih_name) - (start - ih->ih_name) - 1;
712	if (strlen(descr) + 1 > space) {
713		mtx_unlock(&ie->ie_lock);
714		return (ENOSPC);
715	}
716
717	/* Append a colon followed by the description. */
718	*start = ':';
719	strcpy(start + 1, descr);
720	intr_event_update(ie);
721	mtx_unlock(&ie->ie_lock);
722	return (0);
723}
724
725/*
726 * Return the ie_source field from the intr_event an intr_handler is
727 * associated with.
728 */
729void *
730intr_handler_source(void *cookie)
731{
732	struct intr_handler *ih;
733	struct intr_event *ie;
734
735	ih = (struct intr_handler *)cookie;
736	if (ih == NULL)
737		return (NULL);
738	ie = ih->ih_event;
739	KASSERT(ie != NULL,
740	    ("interrupt handler \"%s\" has a NULL interrupt event",
741	    ih->ih_name));
742	return (ie->ie_source);
743}
744
745/*
746 * Sleep until an ithread finishes executing an interrupt handler.
747 *
748 * XXX Doesn't currently handle interrupt filters or fast interrupt
749 * handlers.  This is intended for compatibility with linux drivers
750 * only.  Do not use in BSD code.
751 */
752void
753_intr_drain(int irq)
754{
755	struct intr_event *ie;
756	struct intr_thread *ithd;
757	struct thread *td;
758
759	ie = intr_lookup(irq);
760	if (ie == NULL)
761		return;
762	if (ie->ie_thread == NULL)
763		return;
764	ithd = ie->ie_thread;
765	td = ithd->it_thread;
766	/*
767	 * We set the flag and wait for it to be cleared to avoid
768	 * long delays with potentially busy interrupt handlers
769	 * were we to only sample TD_AWAITING_INTR() every tick.
770	 */
771	thread_lock(td);
772	if (!TD_AWAITING_INTR(td)) {
773		ithd->it_flags |= IT_WAIT;
774		while (ithd->it_flags & IT_WAIT) {
775			thread_unlock(td);
776			pause("idrain", 1);
777			thread_lock(td);
778		}
779	}
780	thread_unlock(td);
781	return;
782}
783
784
785#ifndef INTR_FILTER
786int
787intr_event_remove_handler(void *cookie)
788{
789	struct intr_handler *handler = (struct intr_handler *)cookie;
790	struct intr_event *ie;
791#ifdef INVARIANTS
792	struct intr_handler *ih;
793#endif
794#ifdef notyet
795	int dead;
796#endif
797
798	if (handler == NULL)
799		return (EINVAL);
800	ie = handler->ih_event;
801	KASSERT(ie != NULL,
802	    ("interrupt handler \"%s\" has a NULL interrupt event",
803	    handler->ih_name));
804	mtx_lock(&ie->ie_lock);
805	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
806	    ie->ie_name);
807#ifdef INVARIANTS
808	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
809		if (ih == handler)
810			goto ok;
811	mtx_unlock(&ie->ie_lock);
812	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
813	    ih->ih_name, ie->ie_name);
814ok:
815#endif
816	/*
817	 * If there is no ithread, then just remove the handler and return.
818	 * XXX: Note that an INTR_FAST handler might be running on another
819	 * CPU!
820	 */
821	if (ie->ie_thread == NULL) {
822		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
823		mtx_unlock(&ie->ie_lock);
824		free(handler, M_ITHREAD);
825		return (0);
826	}
827
828	/*
829	 * If the interrupt thread is already running, then just mark this
830	 * handler as being dead and let the ithread do the actual removal.
831	 *
832	 * During a cold boot while cold is set, msleep() does not sleep,
833	 * so we have to remove the handler here rather than letting the
834	 * thread do it.
835	 */
836	thread_lock(ie->ie_thread->it_thread);
837	if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
838		handler->ih_flags |= IH_DEAD;
839
840		/*
841		 * Ensure that the thread will process the handler list
842		 * again and remove this handler if it has already passed
843		 * it on the list.
844		 */
845		atomic_store_rel_int(&ie->ie_thread->it_need, 1);
846	} else
847		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
848	thread_unlock(ie->ie_thread->it_thread);
849	while (handler->ih_flags & IH_DEAD)
850		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
851	intr_event_update(ie);
852#ifdef notyet
853	/*
854	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
855	 * this could lead to races of stale data when servicing an
856	 * interrupt.
857	 */
858	dead = 1;
859	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
860		if (!(ih->ih_flags & IH_FAST)) {
861			dead = 0;
862			break;
863		}
864	}
865	if (dead) {
866		ithread_destroy(ie->ie_thread);
867		ie->ie_thread = NULL;
868	}
869#endif
870	mtx_unlock(&ie->ie_lock);
871	free(handler, M_ITHREAD);
872	return (0);
873}
874
875static int
876intr_event_schedule_thread(struct intr_event *ie)
877{
878	struct intr_entropy entropy;
879	struct intr_thread *it;
880	struct thread *td;
881	struct thread *ctd;
882	struct proc *p;
883
884	/*
885	 * If no ithread or no handlers, then we have a stray interrupt.
886	 */
887	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) ||
888	    ie->ie_thread == NULL)
889		return (EINVAL);
890
891	ctd = curthread;
892	it = ie->ie_thread;
893	td = it->it_thread;
894	p = td->td_proc;
895
896	/*
897	 * If any of the handlers for this ithread claim to be good
898	 * sources of entropy, then gather some.
899	 */
900	if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
901		CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
902		    p->p_pid, td->td_name);
903		entropy.event = (uintptr_t)ie;
904		entropy.td = ctd;
905		random_harvest(&entropy, sizeof(entropy), 2,
906		    RANDOM_INTERRUPT);
907	}
908
909	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
910
911	/*
912	 * Set it_need to tell the thread to keep running if it is already
913	 * running.  Then, lock the thread and see if we actually need to
914	 * put it on the runqueue.
915	 */
916	atomic_store_rel_int(&it->it_need, 1);
917	thread_lock(td);
918	if (TD_AWAITING_INTR(td)) {
919		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
920		    td->td_name);
921		TD_CLR_IWAIT(td);
922		sched_add(td, SRQ_INTR);
923	} else {
924		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
925		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
926	}
927	thread_unlock(td);
928
929	return (0);
930}
931#else
932int
933intr_event_remove_handler(void *cookie)
934{
935	struct intr_handler *handler = (struct intr_handler *)cookie;
936	struct intr_event *ie;
937	struct intr_thread *it;
938#ifdef INVARIANTS
939	struct intr_handler *ih;
940#endif
941#ifdef notyet
942	int dead;
943#endif
944
945	if (handler == NULL)
946		return (EINVAL);
947	ie = handler->ih_event;
948	KASSERT(ie != NULL,
949	    ("interrupt handler \"%s\" has a NULL interrupt event",
950	    handler->ih_name));
951	mtx_lock(&ie->ie_lock);
952	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
953	    ie->ie_name);
954#ifdef INVARIANTS
955	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
956		if (ih == handler)
957			goto ok;
958	mtx_unlock(&ie->ie_lock);
959	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
960	    ih->ih_name, ie->ie_name);
961ok:
962#endif
963	/*
964	 * If there are no ithreads (per event and per handler), then
965	 * just remove the handler and return.
966	 * XXX: Note that an INTR_FAST handler might be running on another CPU!
967	 */
968	if (ie->ie_thread == NULL && handler->ih_thread == NULL) {
969		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
970		mtx_unlock(&ie->ie_lock);
971		free(handler, M_ITHREAD);
972		return (0);
973	}
974
975	/* Private or global ithread? */
976	it = (handler->ih_thread) ? handler->ih_thread : ie->ie_thread;
977	/*
978	 * If the interrupt thread is already running, then just mark this
979	 * handler as being dead and let the ithread do the actual removal.
980	 *
981	 * During a cold boot while cold is set, msleep() does not sleep,
982	 * so we have to remove the handler here rather than letting the
983	 * thread do it.
984	 */
985	thread_lock(it->it_thread);
986	if (!TD_AWAITING_INTR(it->it_thread) && !cold) {
987		handler->ih_flags |= IH_DEAD;
988
989		/*
990		 * Ensure that the thread will process the handler list
991		 * again and remove this handler if it has already passed
992		 * it on the list.
993		 */
994		atomic_store_rel_int(&it->it_need, 1);
995	} else
996		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
997	thread_unlock(it->it_thread);
998	while (handler->ih_flags & IH_DEAD)
999		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
1000	/*
1001	 * At this point, the handler has been disconnected from the event,
1002	 * so we can kill the private ithread if any.
1003	 */
1004	if (handler->ih_thread) {
1005		ithread_destroy(handler->ih_thread);
1006		handler->ih_thread = NULL;
1007	}
1008	intr_event_update(ie);
1009#ifdef notyet
1010	/*
1011	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
1012	 * this could lead to races of stale data when servicing an
1013	 * interrupt.
1014	 */
1015	dead = 1;
1016	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1017		if (handler != NULL) {
1018			dead = 0;
1019			break;
1020		}
1021	}
1022	if (dead) {
1023		ithread_destroy(ie->ie_thread);
1024		ie->ie_thread = NULL;
1025	}
1026#endif
1027	mtx_unlock(&ie->ie_lock);
1028	free(handler, M_ITHREAD);
1029	return (0);
1030}
1031
1032static int
1033intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
1034{
1035	struct intr_entropy entropy;
1036	struct thread *td;
1037	struct thread *ctd;
1038	struct proc *p;
1039
1040	/*
1041	 * If no ithread or no handlers, then we have a stray interrupt.
1042	 */
1043	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || it == NULL)
1044		return (EINVAL);
1045
1046	ctd = curthread;
1047	td = it->it_thread;
1048	p = td->td_proc;
1049
1050	/*
1051	 * If any of the handlers for this ithread claim to be good
1052	 * sources of entropy, then gather some.
1053	 */
1054	if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
1055		CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
1056		    p->p_pid, td->td_name);
1057		entropy.event = (uintptr_t)ie;
1058		entropy.td = ctd;
1059		random_harvest(&entropy, sizeof(entropy), 2,
1060		    RANDOM_INTERRUPT);
1061	}
1062
1063	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
1064
1065	/*
1066	 * Set it_need to tell the thread to keep running if it is already
1067	 * running.  Then, lock the thread and see if we actually need to
1068	 * put it on the runqueue.
1069	 */
1070	atomic_store_rel_int(&it->it_need, 1);
1071	thread_lock(td);
1072	if (TD_AWAITING_INTR(td)) {
1073		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
1074		    td->td_name);
1075		TD_CLR_IWAIT(td);
1076		sched_add(td, SRQ_INTR);
1077	} else {
1078		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
1079		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
1080	}
1081	thread_unlock(td);
1082
1083	return (0);
1084}
1085#endif
1086
1087/*
1088 * Allow interrupt event binding for software interrupt handlers -- a no-op,
1089 * since interrupts are generated in software rather than being directed by
1090 * a PIC.
1091 */
1092static int
1093swi_assign_cpu(void *arg, u_char cpu)
1094{
1095
1096	return (0);
1097}
1098
1099/*
1100 * Add a software interrupt handler to a specified event.  If a given event
1101 * is not specified, then a new event is created.
1102 */
1103int
1104swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
1105	    void *arg, int pri, enum intr_type flags, void **cookiep)
1106{
1107	struct intr_event *ie;
1108	int error;
1109
1110	if (flags & INTR_ENTROPY)
1111		return (EINVAL);
1112
1113	ie = (eventp != NULL) ? *eventp : NULL;
1114
1115	if (ie != NULL) {
1116		if (!(ie->ie_flags & IE_SOFT))
1117			return (EINVAL);
1118	} else {
1119		error = intr_event_create(&ie, NULL, IE_SOFT, 0,
1120		    NULL, NULL, NULL, swi_assign_cpu, "swi%d:", pri);
1121		if (error)
1122			return (error);
1123		if (eventp != NULL)
1124			*eventp = ie;
1125	}
1126	error = intr_event_add_handler(ie, name, NULL, handler, arg,
1127	    PI_SWI(pri), flags, cookiep);
1128	return (error);
1129}
1130
1131/*
1132 * Schedule a software interrupt thread.
1133 */
1134void
1135swi_sched(void *cookie, int flags)
1136{
1137	struct intr_handler *ih = (struct intr_handler *)cookie;
1138	struct intr_event *ie = ih->ih_event;
1139	struct intr_entropy entropy;
1140	int error;
1141
1142	CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
1143	    ih->ih_need);
1144
1145	if (harvest.swi) {
1146		CTR2(KTR_INTR, "swi_sched: pid %d (%s) gathering entropy",
1147		    curproc->p_pid, curthread->td_name);
1148		entropy.event = (uintptr_t)ih;
1149		entropy.td = curthread;
1150		random_harvest(&entropy, sizeof(entropy), 1,
1151		    RANDOM_SWI);
1152	}
1153
1154	/*
1155	 * Set ih_need for this handler so that if the ithread is already
1156	 * running it will execute this handler on the next pass.  Otherwise,
1157	 * it will execute it the next time it runs.
1158	 */
1159	atomic_store_rel_int(&ih->ih_need, 1);
1160
1161	if (!(flags & SWI_DELAY)) {
1162		PCPU_INC(cnt.v_soft);
1163#ifdef INTR_FILTER
1164		error = intr_event_schedule_thread(ie, ie->ie_thread);
1165#else
1166		error = intr_event_schedule_thread(ie);
1167#endif
1168		KASSERT(error == 0, ("stray software interrupt"));
1169	}
1170}
1171
1172/*
1173 * Remove a software interrupt handler.  Currently this code does not
1174 * remove the associated interrupt event if it becomes empty.  Calling code
1175 * may do so manually via intr_event_destroy(), but that's not really
1176 * an optimal interface.
1177 */
1178int
1179swi_remove(void *cookie)
1180{
1181
1182	return (intr_event_remove_handler(cookie));
1183}
1184
1185#ifdef INTR_FILTER
1186static void
1187priv_ithread_execute_handler(struct proc *p, struct intr_handler *ih)
1188{
1189	struct intr_event *ie;
1190
1191	ie = ih->ih_event;
1192	/*
1193	 * If this handler is marked for death, remove it from
1194	 * the list of handlers and wake up the sleeper.
1195	 */
1196	if (ih->ih_flags & IH_DEAD) {
1197		mtx_lock(&ie->ie_lock);
1198		TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
1199		ih->ih_flags &= ~IH_DEAD;
1200		wakeup(ih);
1201		mtx_unlock(&ie->ie_lock);
1202		return;
1203	}
1204
1205	/* Execute this handler. */
1206	CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1207	     __func__, p->p_pid, (void *)ih->ih_handler, ih->ih_argument,
1208	     ih->ih_name, ih->ih_flags);
1209
1210	if (!(ih->ih_flags & IH_MPSAFE))
1211		mtx_lock(&Giant);
1212	ih->ih_handler(ih->ih_argument);
1213	if (!(ih->ih_flags & IH_MPSAFE))
1214		mtx_unlock(&Giant);
1215}
1216#endif
1217
1218/*
1219 * This is a public function for use by drivers that mux interrupt
1220 * handlers for child devices from their interrupt handler.
1221 */
1222void
1223intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
1224{
1225	struct intr_handler *ih, *ihn;
1226
1227	TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
1228		/*
1229		 * If this handler is marked for death, remove it from
1230		 * the list of handlers and wake up the sleeper.
1231		 */
1232		if (ih->ih_flags & IH_DEAD) {
1233			mtx_lock(&ie->ie_lock);
1234			TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
1235			ih->ih_flags &= ~IH_DEAD;
1236			wakeup(ih);
1237			mtx_unlock(&ie->ie_lock);
1238			continue;
1239		}
1240
1241		/* Skip filter only handlers */
1242		if (ih->ih_handler == NULL)
1243			continue;
1244
1245		/*
1246		 * For software interrupt threads, we only execute
1247		 * handlers that have their need flag set.  Hardware
1248		 * interrupt threads always invoke all of their handlers.
1249		 */
1250		if (ie->ie_flags & IE_SOFT) {
1251			if (atomic_load_acq_int(&ih->ih_need) == 0)
1252				continue;
1253			else
1254				atomic_store_rel_int(&ih->ih_need, 0);
1255		}
1256
1257		/* Execute this handler. */
1258		CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1259		    __func__, p->p_pid, (void *)ih->ih_handler,
1260		    ih->ih_argument, ih->ih_name, ih->ih_flags);
1261
1262		if (!(ih->ih_flags & IH_MPSAFE))
1263			mtx_lock(&Giant);
1264		ih->ih_handler(ih->ih_argument);
1265		if (!(ih->ih_flags & IH_MPSAFE))
1266			mtx_unlock(&Giant);
1267	}
1268}
1269
1270static void
1271ithread_execute_handlers(struct proc *p, struct intr_event *ie)
1272{
1273
1274	/* Interrupt handlers should not sleep. */
1275	if (!(ie->ie_flags & IE_SOFT))
1276		THREAD_NO_SLEEPING();
1277	intr_event_execute_handlers(p, ie);
1278	if (!(ie->ie_flags & IE_SOFT))
1279		THREAD_SLEEPING_OK();
1280
1281	/*
1282	 * Interrupt storm handling:
1283	 *
1284	 * If this interrupt source is currently storming, then throttle
1285	 * it to only fire the handler once  per clock tick.
1286	 *
1287	 * If this interrupt source is not currently storming, but the
1288	 * number of back to back interrupts exceeds the storm threshold,
1289	 * then enter storming mode.
1290	 */
1291	if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold &&
1292	    !(ie->ie_flags & IE_SOFT)) {
1293		/* Report the message only once every second. */
1294		if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) {
1295			printf(
1296	"interrupt storm detected on \"%s\"; throttling interrupt source\n",
1297			    ie->ie_name);
1298		}
1299		pause("istorm", 1);
1300	} else
1301		ie->ie_count++;
1302
1303	/*
1304	 * Now that all the handlers have had a chance to run, reenable
1305	 * the interrupt source.
1306	 */
1307	if (ie->ie_post_ithread != NULL)
1308		ie->ie_post_ithread(ie->ie_source);
1309}
1310
1311#ifndef INTR_FILTER
1312/*
1313 * This is the main code for interrupt threads.
1314 */
1315static void
1316ithread_loop(void *arg)
1317{
1318	struct intr_thread *ithd;
1319	struct intr_event *ie;
1320	struct thread *td;
1321	struct proc *p;
1322	int wake;
1323
1324	td = curthread;
1325	p = td->td_proc;
1326	ithd = (struct intr_thread *)arg;
1327	KASSERT(ithd->it_thread == td,
1328	    ("%s: ithread and proc linkage out of sync", __func__));
1329	ie = ithd->it_event;
1330	ie->ie_count = 0;
1331	wake = 0;
1332
1333	/*
1334	 * As long as we have interrupts outstanding, go through the
1335	 * list of handlers, giving each one a go at it.
1336	 */
1337	for (;;) {
1338		/*
1339		 * If we are an orphaned thread, then just die.
1340		 */
1341		if (ithd->it_flags & IT_DEAD) {
1342			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1343			    p->p_pid, td->td_name);
1344			free(ithd, M_ITHREAD);
1345			kthread_exit();
1346		}
1347
1348		/*
1349		 * Service interrupts.  If another interrupt arrives while
1350		 * we are running, it will set it_need to note that we
1351		 * should make another pass.
1352		 */
1353		while (atomic_load_acq_int(&ithd->it_need) != 0) {
1354			/*
1355			 * This might need a full read and write barrier
1356			 * to make sure that this write posts before any
1357			 * of the memory or device accesses in the
1358			 * handlers.
1359			 */
1360			atomic_store_rel_int(&ithd->it_need, 0);
1361			ithread_execute_handlers(p, ie);
1362		}
1363		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1364		mtx_assert(&Giant, MA_NOTOWNED);
1365
1366		/*
1367		 * Processed all our interrupts.  Now get the sched
1368		 * lock.  This may take a while and it_need may get
1369		 * set again, so we have to check it again.
1370		 */
1371		thread_lock(td);
1372		if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
1373		    !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
1374			TD_SET_IWAIT(td);
1375			ie->ie_count = 0;
1376			mi_switch(SW_VOL | SWT_IWAIT, NULL);
1377		}
1378		if (ithd->it_flags & IT_WAIT) {
1379			wake = 1;
1380			ithd->it_flags &= ~IT_WAIT;
1381		}
1382		thread_unlock(td);
1383		if (wake) {
1384			wakeup(ithd);
1385			wake = 0;
1386		}
1387	}
1388}
1389
1390/*
1391 * Main interrupt handling body.
1392 *
1393 * Input:
1394 * o ie:                        the event connected to this interrupt.
1395 * o frame:                     some archs (i.e. i386) pass a frame to some.
1396 *                              handlers as their main argument.
1397 * Return value:
1398 * o 0:                         everything ok.
1399 * o EINVAL:                    stray interrupt.
1400 */
1401int
1402intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1403{
1404	struct intr_handler *ih;
1405	struct trapframe *oldframe;
1406	struct thread *td;
1407	int error, ret, thread;
1408
1409	td = curthread;
1410
1411#ifdef KSTACK_USAGE_PROF
1412	intr_prof_stack_use(td, frame);
1413#endif
1414
1415	/* An interrupt with no event or handlers is a stray interrupt. */
1416	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
1417		return (EINVAL);
1418
1419	/*
1420	 * Execute fast interrupt handlers directly.
1421	 * To support clock handlers, if a handler registers
1422	 * with a NULL argument, then we pass it a pointer to
1423	 * a trapframe as its argument.
1424	 */
1425	td->td_intr_nesting_level++;
1426	thread = 0;
1427	ret = 0;
1428	critical_enter();
1429	oldframe = td->td_intr_frame;
1430	td->td_intr_frame = frame;
1431	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1432		if (ih->ih_filter == NULL) {
1433			thread = 1;
1434			continue;
1435		}
1436		CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
1437		    ih->ih_filter, ih->ih_argument == NULL ? frame :
1438		    ih->ih_argument, ih->ih_name);
1439		if (ih->ih_argument == NULL)
1440			ret = ih->ih_filter(frame);
1441		else
1442			ret = ih->ih_filter(ih->ih_argument);
1443		KASSERT(ret == FILTER_STRAY ||
1444		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
1445		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
1446		    ("%s: incorrect return value %#x from %s", __func__, ret,
1447		    ih->ih_name));
1448
1449		/*
1450		 * Wrapper handler special handling:
1451		 *
1452		 * in some particular cases (like pccard and pccbb),
1453		 * the _real_ device handler is wrapped in a couple of
1454		 * functions - a filter wrapper and an ithread wrapper.
1455		 * In this case (and just in this case), the filter wrapper
1456		 * could ask the system to schedule the ithread and mask
1457		 * the interrupt source if the wrapped handler is composed
1458		 * of just an ithread handler.
1459		 *
1460		 * TODO: write a generic wrapper to avoid people rolling
1461		 * their own
1462		 */
1463		if (!thread) {
1464			if (ret == FILTER_SCHEDULE_THREAD)
1465				thread = 1;
1466		}
1467	}
1468	td->td_intr_frame = oldframe;
1469
1470	if (thread) {
1471		if (ie->ie_pre_ithread != NULL)
1472			ie->ie_pre_ithread(ie->ie_source);
1473	} else {
1474		if (ie->ie_post_filter != NULL)
1475			ie->ie_post_filter(ie->ie_source);
1476	}
1477
1478	/* Schedule the ithread if needed. */
1479	if (thread) {
1480		error = intr_event_schedule_thread(ie);
1481#ifndef XEN
1482		KASSERT(error == 0, ("bad stray interrupt"));
1483#else
1484		if (error != 0)
1485			log(LOG_WARNING, "bad stray interrupt");
1486#endif
1487	}
1488	critical_exit();
1489	td->td_intr_nesting_level--;
1490	return (0);
1491}
1492#else
1493/*
1494 * This is the main code for interrupt threads.
1495 */
1496static void
1497ithread_loop(void *arg)
1498{
1499	struct intr_thread *ithd;
1500	struct intr_handler *ih;
1501	struct intr_event *ie;
1502	struct thread *td;
1503	struct proc *p;
1504	int priv;
1505	int wake;
1506
1507	td = curthread;
1508	p = td->td_proc;
1509	ih = (struct intr_handler *)arg;
1510	priv = (ih->ih_thread != NULL) ? 1 : 0;
1511	ithd = (priv) ? ih->ih_thread : ih->ih_event->ie_thread;
1512	KASSERT(ithd->it_thread == td,
1513	    ("%s: ithread and proc linkage out of sync", __func__));
1514	ie = ithd->it_event;
1515	ie->ie_count = 0;
1516	wake = 0;
1517
1518	/*
1519	 * As long as we have interrupts outstanding, go through the
1520	 * list of handlers, giving each one a go at it.
1521	 */
1522	for (;;) {
1523		/*
1524		 * If we are an orphaned thread, then just die.
1525		 */
1526		if (ithd->it_flags & IT_DEAD) {
1527			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1528			    p->p_pid, td->td_name);
1529			free(ithd, M_ITHREAD);
1530			kthread_exit();
1531		}
1532
1533		/*
1534		 * Service interrupts.  If another interrupt arrives while
1535		 * we are running, it will set it_need to note that we
1536		 * should make another pass.
1537		 */
1538		while (atomic_load_acq_int(&ithd->it_need) != 0) {
1539			/*
1540			 * This might need a full read and write barrier
1541			 * to make sure that this write posts before any
1542			 * of the memory or device accesses in the
1543			 * handlers.
1544			 */
1545			atomic_store_rel_int(&ithd->it_need, 0);
1546			if (priv)
1547				priv_ithread_execute_handler(p, ih);
1548			else
1549				ithread_execute_handlers(p, ie);
1550		}
1551		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1552		mtx_assert(&Giant, MA_NOTOWNED);
1553
1554		/*
1555		 * Processed all our interrupts.  Now get the sched
1556		 * lock.  This may take a while and it_need may get
1557		 * set again, so we have to check it again.
1558		 */
1559		thread_lock(td);
1560		if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
1561		    !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
1562			TD_SET_IWAIT(td);
1563			ie->ie_count = 0;
1564			mi_switch(SW_VOL | SWT_IWAIT, NULL);
1565		}
1566		if (ithd->it_flags & IT_WAIT) {
1567			wake = 1;
1568			ithd->it_flags &= ~IT_WAIT;
1569		}
1570		thread_unlock(td);
1571		if (wake) {
1572			wakeup(ithd);
1573			wake = 0;
1574		}
1575	}
1576}
1577
1578/*
1579 * Main loop for interrupt filter.
1580 *
1581 * Some architectures (i386, amd64 and arm) require the optional frame
1582 * parameter, and use it as the main argument for fast handler execution
1583 * when ih_argument == NULL.
1584 *
1585 * Return value:
1586 * o FILTER_STRAY:              No filter recognized the event, and no
1587 *                              filter-less handler is registered on this
1588 *                              line.
1589 * o FILTER_HANDLED:            A filter claimed the event and served it.
1590 * o FILTER_SCHEDULE_THREAD:    No filter claimed the event, but there's at
1591 *                              least one filter-less handler on this line.
1592 * o FILTER_HANDLED |
1593 *   FILTER_SCHEDULE_THREAD:    A filter claimed the event, and asked for
1594 *                              scheduling the per-handler ithread.
1595 *
1596 * In case an ithread has to be scheduled, in *ithd there will be a
1597 * pointer to a struct intr_thread containing the thread to be
1598 * scheduled.
1599 */
1600
1601static int
1602intr_filter_loop(struct intr_event *ie, struct trapframe *frame,
1603		 struct intr_thread **ithd)
1604{
1605	struct intr_handler *ih;
1606	void *arg;
1607	int ret, thread_only;
1608
1609	ret = 0;
1610	thread_only = 0;
1611	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1612		/*
1613		 * Execute fast interrupt handlers directly.
1614		 * To support clock handlers, if a handler registers
1615		 * with a NULL argument, then we pass it a pointer to
1616		 * a trapframe as its argument.
1617		 */
1618		arg = ((ih->ih_argument == NULL) ? frame : ih->ih_argument);
1619
1620		CTR5(KTR_INTR, "%s: exec %p/%p(%p) for %s", __func__,
1621		     ih->ih_filter, ih->ih_handler, arg, ih->ih_name);
1622
1623		if (ih->ih_filter != NULL)
1624			ret = ih->ih_filter(arg);
1625		else {
1626			thread_only = 1;
1627			continue;
1628		}
1629		KASSERT(ret == FILTER_STRAY ||
1630		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
1631		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
1632		    ("%s: incorrect return value %#x from %s", __func__, ret,
1633		    ih->ih_name));
1634		if (ret & FILTER_STRAY)
1635			continue;
1636		else {
1637			*ithd = ih->ih_thread;
1638			return (ret);
1639		}
1640	}
1641
1642	/*
1643	 * No filters handled the interrupt and we have at least
1644	 * one handler without a filter.  In this case, we schedule
1645	 * all of the filter-less handlers to run in the ithread.
1646	 */
1647	if (thread_only) {
1648		*ithd = ie->ie_thread;
1649		return (FILTER_SCHEDULE_THREAD);
1650	}
1651	return (FILTER_STRAY);
1652}
1653
1654/*
1655 * Main interrupt handling body.
1656 *
1657 * Input:
1658 * o ie:                        the event connected to this interrupt.
1659 * o frame:                     some archs (i.e. i386) pass a frame to some.
1660 *                              handlers as their main argument.
1661 * Return value:
1662 * o 0:                         everything ok.
1663 * o EINVAL:                    stray interrupt.
1664 */
1665int
1666intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1667{
1668	struct intr_thread *ithd;
1669	struct trapframe *oldframe;
1670	struct thread *td;
1671	int thread;
1672
1673	ithd = NULL;
1674	td = curthread;
1675
1676	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
1677		return (EINVAL);
1678
1679	td->td_intr_nesting_level++;
1680	thread = 0;
1681	critical_enter();
1682	oldframe = td->td_intr_frame;
1683	td->td_intr_frame = frame;
1684	thread = intr_filter_loop(ie, frame, &ithd);
1685	if (thread & FILTER_HANDLED) {
1686		if (ie->ie_post_filter != NULL)
1687			ie->ie_post_filter(ie->ie_source);
1688	} else {
1689		if (ie->ie_pre_ithread != NULL)
1690			ie->ie_pre_ithread(ie->ie_source);
1691	}
1692	td->td_intr_frame = oldframe;
1693	critical_exit();
1694
1695	/* Interrupt storm logic */
1696	if (thread & FILTER_STRAY) {
1697		ie->ie_count++;
1698		if (ie->ie_count < intr_storm_threshold)
1699			printf("Interrupt stray detection not present\n");
1700	}
1701
1702	/* Schedule an ithread if needed. */
1703	if (thread & FILTER_SCHEDULE_THREAD) {
1704		if (intr_event_schedule_thread(ie, ithd) != 0)
1705			panic("%s: impossible stray interrupt", __func__);
1706	}
1707	td->td_intr_nesting_level--;
1708	return (0);
1709}
1710#endif
1711
1712#ifdef DDB
1713/*
1714 * Dump details about an interrupt handler
1715 */
1716static void
1717db_dump_intrhand(struct intr_handler *ih)
1718{
1719	int comma;
1720
1721	db_printf("\t%-10s ", ih->ih_name);
1722	switch (ih->ih_pri) {
1723	case PI_REALTIME:
1724		db_printf("CLK ");
1725		break;
1726	case PI_AV:
1727		db_printf("AV  ");
1728		break;
1729	case PI_TTY:
1730		db_printf("TTY ");
1731		break;
1732	case PI_NET:
1733		db_printf("NET ");
1734		break;
1735	case PI_DISK:
1736		db_printf("DISK");
1737		break;
1738	case PI_DULL:
1739		db_printf("DULL");
1740		break;
1741	default:
1742		if (ih->ih_pri >= PI_SOFT)
1743			db_printf("SWI ");
1744		else
1745			db_printf("%4u", ih->ih_pri);
1746		break;
1747	}
1748	db_printf(" ");
1749	if (ih->ih_filter != NULL) {
1750		db_printf("[F]");
1751		db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC);
1752	}
1753	if (ih->ih_handler != NULL) {
1754		if (ih->ih_filter != NULL)
1755			db_printf(",");
1756		db_printf("[H]");
1757		db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
1758	}
1759	db_printf("(%p)", ih->ih_argument);
1760	if (ih->ih_need ||
1761	    (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD |
1762	    IH_MPSAFE)) != 0) {
1763		db_printf(" {");
1764		comma = 0;
1765		if (ih->ih_flags & IH_EXCLUSIVE) {
1766			if (comma)
1767				db_printf(", ");
1768			db_printf("EXCL");
1769			comma = 1;
1770		}
1771		if (ih->ih_flags & IH_ENTROPY) {
1772			if (comma)
1773				db_printf(", ");
1774			db_printf("ENTROPY");
1775			comma = 1;
1776		}
1777		if (ih->ih_flags & IH_DEAD) {
1778			if (comma)
1779				db_printf(", ");
1780			db_printf("DEAD");
1781			comma = 1;
1782		}
1783		if (ih->ih_flags & IH_MPSAFE) {
1784			if (comma)
1785				db_printf(", ");
1786			db_printf("MPSAFE");
1787			comma = 1;
1788		}
1789		if (ih->ih_need) {
1790			if (comma)
1791				db_printf(", ");
1792			db_printf("NEED");
1793		}
1794		db_printf("}");
1795	}
1796	db_printf("\n");
1797}
1798
1799/*
1800 * Dump details about a event.
1801 */
1802void
1803db_dump_intr_event(struct intr_event *ie, int handlers)
1804{
1805	struct intr_handler *ih;
1806	struct intr_thread *it;
1807	int comma;
1808
1809	db_printf("%s ", ie->ie_fullname);
1810	it = ie->ie_thread;
1811	if (it != NULL)
1812		db_printf("(pid %d)", it->it_thread->td_proc->p_pid);
1813	else
1814		db_printf("(no thread)");
1815	if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 ||
1816	    (it != NULL && it->it_need)) {
1817		db_printf(" {");
1818		comma = 0;
1819		if (ie->ie_flags & IE_SOFT) {
1820			db_printf("SOFT");
1821			comma = 1;
1822		}
1823		if (ie->ie_flags & IE_ENTROPY) {
1824			if (comma)
1825				db_printf(", ");
1826			db_printf("ENTROPY");
1827			comma = 1;
1828		}
1829		if (ie->ie_flags & IE_ADDING_THREAD) {
1830			if (comma)
1831				db_printf(", ");
1832			db_printf("ADDING_THREAD");
1833			comma = 1;
1834		}
1835		if (it != NULL && it->it_need) {
1836			if (comma)
1837				db_printf(", ");
1838			db_printf("NEED");
1839		}
1840		db_printf("}");
1841	}
1842	db_printf("\n");
1843
1844	if (handlers)
1845		TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
1846		    db_dump_intrhand(ih);
1847}
1848
1849/*
1850 * Dump data about interrupt handlers
1851 */
1852DB_SHOW_COMMAND(intr, db_show_intr)
1853{
1854	struct intr_event *ie;
1855	int all, verbose;
1856
1857	verbose = strchr(modif, 'v') != NULL;
1858	all = strchr(modif, 'a') != NULL;
1859	TAILQ_FOREACH(ie, &event_list, ie_list) {
1860		if (!all && TAILQ_EMPTY(&ie->ie_handlers))
1861			continue;
1862		db_dump_intr_event(ie, verbose);
1863		if (db_pager_quit)
1864			break;
1865	}
1866}
1867#endif /* DDB */
1868
1869/*
1870 * Start standard software interrupt threads
1871 */
1872static void
1873start_softintr(void *dummy)
1874{
1875
1876	if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih))
1877		panic("died while creating vm swi ithread");
1878}
1879SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr,
1880    NULL);
1881
1882/*
1883 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
1884 * The data for this machine dependent, and the declarations are in machine
1885 * dependent code.  The layout of intrnames and intrcnt however is machine
1886 * independent.
1887 *
1888 * We do not know the length of intrcnt and intrnames at compile time, so
1889 * calculate things at run time.
1890 */
1891static int
1892sysctl_intrnames(SYSCTL_HANDLER_ARGS)
1893{
1894	return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req));
1895}
1896
1897SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
1898    NULL, 0, sysctl_intrnames, "", "Interrupt Names");
1899
1900static int
1901sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
1902{
1903#ifdef SCTL_MASK32
1904	uint32_t *intrcnt32;
1905	unsigned i;
1906	int error;
1907
1908	if (req->flags & SCTL_MASK32) {
1909		if (!req->oldptr)
1910			return (sysctl_handle_opaque(oidp, NULL, sintrcnt / 2, req));
1911		intrcnt32 = malloc(sintrcnt / 2, M_TEMP, M_NOWAIT);
1912		if (intrcnt32 == NULL)
1913			return (ENOMEM);
1914		for (i = 0; i < sintrcnt / sizeof (u_long); i++)
1915			intrcnt32[i] = intrcnt[i];
1916		error = sysctl_handle_opaque(oidp, intrcnt32, sintrcnt / 2, req);
1917		free(intrcnt32, M_TEMP);
1918		return (error);
1919	}
1920#endif
1921	return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req));
1922}
1923
1924SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
1925    NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");
1926
1927#ifdef DDB
1928/*
1929 * DDB command to dump the interrupt statistics.
1930 */
1931DB_SHOW_COMMAND(intrcnt, db_show_intrcnt)
1932{
1933	u_long *i;
1934	char *cp;
1935	u_int j;
1936
1937	cp = intrnames;
1938	j = 0;
1939	for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit;
1940	    i++, j++) {
1941		if (*cp == '\0')
1942			break;
1943		if (*i != 0)
1944			db_printf("%s\t%lu\n", cp, *i);
1945		cp += strlen(cp) + 1;
1946	}
1947}
1948#endif
1949