1/*-
2 * Copyright (c) 2015-2016 Svatopluk Kraus
3 * Copyright (c) 2015-2016 Michal Meloun
4 * All rights reserved.
5 * Copyright (c) 2015-2016 The FreeBSD Foundation
6 * Copyright (c) 2021 Jessica Clarke <jrtc27@FreeBSD.org>
7 *
8 * Portions of this software were developed by Andrew Turner under
9 * sponsorship from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34/*
35 *	New-style Interrupt Framework
36 *
37 *  TODO: - add support for disconnected PICs.
38 *        - to support IPI (PPI) enabling on other CPUs if already started.
39 *        - to complete things for removable PICs.
40 */
41
42#include "opt_ddb.h"
43#include "opt_hwpmc_hooks.h"
44#include "opt_iommu.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/asan.h>
49#include <sys/bitstring.h>
50#include <sys/bus.h>
51#include <sys/conf.h>
52#include <sys/cpuset.h>
53#include <sys/interrupt.h>
54#include <sys/kernel.h>
55#include <sys/lock.h>
56#include <sys/malloc.h>
57#include <sys/msan.h>
58#include <sys/mutex.h>
59#include <sys/proc.h>
60#include <sys/queue.h>
61#include <sys/rman.h>
62#include <sys/sched.h>
63#include <sys/smp.h>
64#include <sys/sysctl.h>
65#include <sys/syslog.h>
66#include <sys/taskqueue.h>
67#include <sys/tree.h>
68#include <sys/vmmeter.h>
69#ifdef HWPMC_HOOKS
70#include <sys/pmckern.h>
71#endif
72
73#include <machine/atomic.h>
74#include <machine/cpu.h>
75#include <machine/intr.h>
76#include <machine/smp.h>
77#include <machine/stdarg.h>
78
79#ifdef DDB
80#include <ddb/ddb.h>
81#endif
82
83#ifdef IOMMU
84#include <dev/iommu/iommu_msi.h>
85#endif
86
87#include "pic_if.h"
88#include "msi_if.h"
89
90#define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
91
92#ifdef DEBUG
93#define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
94    printf(fmt,##args); } while (0)
95#else
96#define debugf(fmt, args...)
97#endif
98
99MALLOC_DECLARE(M_INTRNG);
100MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
101
102/* Main interrupt handler called from assembler -> 'hidden' for C code. */
103void intr_irq_handler(struct trapframe *tf);
104
105/* Root interrupt controller stuff. */
106device_t intr_irq_root_dev;
107static intr_irq_filter_t *irq_root_filter;
108static void *irq_root_arg;
109
110struct intr_pic_child {
111	SLIST_ENTRY(intr_pic_child)	 pc_next;
112	struct intr_pic			*pc_pic;
113	intr_child_irq_filter_t		*pc_filter;
114	void				*pc_filter_arg;
115	uintptr_t			 pc_start;
116	uintptr_t			 pc_length;
117};
118
119/* Interrupt controller definition. */
120struct intr_pic {
121	SLIST_ENTRY(intr_pic)	pic_next;
122	intptr_t		pic_xref;	/* hardware identification */
123	device_t		pic_dev;
124/* Only one of FLAG_PIC or FLAG_MSI may be set */
125#define	FLAG_PIC	(1 << 0)
126#define	FLAG_MSI	(1 << 1)
127#define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
128	u_int			pic_flags;
129	struct mtx		pic_child_lock;
130	SLIST_HEAD(, intr_pic_child) pic_children;
131};
132
133#ifdef SMP
134#define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
135
136struct intr_ipi {
137	intr_ipi_handler_t	*ii_handler;
138	void			*ii_handler_arg;
139	struct intr_irqsrc	*ii_isrc;
140	char			ii_name[INTR_IPI_NAMELEN];
141	u_long			*ii_count;
142};
143
144static device_t intr_ipi_dev;
145static u_int intr_ipi_dev_priority;
146static bool intr_ipi_dev_frozen;
147#endif
148
149static struct mtx pic_list_lock;
150static SLIST_HEAD(, intr_pic) pic_list;
151
152static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, u_int flags);
153
154/* Interrupt source definition. */
155static struct mtx isrc_table_lock;
156static struct intr_irqsrc **irq_sources;
157static u_int irq_next_free;
158
159#ifdef SMP
160#ifdef EARLY_AP_STARTUP
161static bool irq_assign_cpu = true;
162#else
163static bool irq_assign_cpu = false;
164#endif
165
166static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
167#endif
168
169u_int intr_nirq = NIRQ;
170SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
171    "Number of IRQs");
172
173/* Data for MI statistics reporting. */
174u_long *intrcnt;
175char *intrnames;
176size_t sintrcnt;
177size_t sintrnames;
178int nintrcnt;
179static bitstr_t *intrcnt_bitmap;
180
181static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
182static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
183static struct intr_map_data * intr_map_get_map_data(u_int res_id);
184static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
185    struct intr_map_data **data);
186
187/*
188 *  Interrupt framework initialization routine.
189 */
190static void
191intr_irq_init(void *dummy __unused)
192{
193
194	SLIST_INIT(&pic_list);
195	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
196
197	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
198
199	/*
200	 * - 2 counters for each I/O interrupt.
201	 * - mp_maxid + 1 counters for each IPI counters for SMP.
202	 */
203	nintrcnt = intr_nirq * 2;
204#ifdef SMP
205	nintrcnt += INTR_IPI_COUNT * (mp_maxid + 1);
206#endif
207
208	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
209	    M_WAITOK | M_ZERO);
210	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
211	    M_WAITOK | M_ZERO);
212	sintrcnt = nintrcnt * sizeof(u_long);
213	sintrnames = nintrcnt * INTRNAME_LEN;
214
215	/* Allocate the bitmap tracking counter allocations. */
216	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
217
218	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
219	    M_INTRNG, M_WAITOK | M_ZERO);
220}
221SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
222
223static void
224intrcnt_setname(const char *name, int index)
225{
226
227	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
228	    INTRNAME_LEN - 1, name);
229}
230
231/*
232 *  Update name for interrupt source with interrupt event.
233 */
234static void
235intrcnt_updatename(struct intr_irqsrc *isrc)
236{
237
238	/* QQQ: What about stray counter name? */
239	mtx_assert(&isrc_table_lock, MA_OWNED);
240	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
241}
242
243/*
244 *  Virtualization for interrupt source interrupt counter increment.
245 */
246static inline void
247isrc_increment_count(struct intr_irqsrc *isrc)
248{
249
250	if (isrc->isrc_flags & INTR_ISRCF_PPI)
251		atomic_add_long(&isrc->isrc_count[0], 1);
252	else
253		isrc->isrc_count[0]++;
254}
255
256/*
257 *  Virtualization for interrupt source interrupt stray counter increment.
258 */
259static inline void
260isrc_increment_straycount(struct intr_irqsrc *isrc)
261{
262
263	isrc->isrc_count[1]++;
264}
265
266/*
267 *  Virtualization for interrupt source interrupt name update.
268 */
269static void
270isrc_update_name(struct intr_irqsrc *isrc, const char *name)
271{
272	char str[INTRNAME_LEN];
273
274	mtx_assert(&isrc_table_lock, MA_OWNED);
275
276	if (name != NULL) {
277		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
278		intrcnt_setname(str, isrc->isrc_index);
279		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
280		    name);
281		intrcnt_setname(str, isrc->isrc_index + 1);
282	} else {
283		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
284		intrcnt_setname(str, isrc->isrc_index);
285		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
286		intrcnt_setname(str, isrc->isrc_index + 1);
287	}
288}
289
290/*
291 *  Virtualization for interrupt source interrupt counters setup.
292 */
293static void
294isrc_setup_counters(struct intr_irqsrc *isrc)
295{
296	int index;
297
298	mtx_assert(&isrc_table_lock, MA_OWNED);
299
300	/*
301	 * Allocate two counter values, the second tracking "stray" interrupts.
302	 */
303	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
304	if (index == -1)
305		panic("Failed to allocate 2 counters. Array exhausted?");
306	bit_nset(intrcnt_bitmap, index, index + 1);
307	isrc->isrc_index = index;
308	isrc->isrc_count = &intrcnt[index];
309	isrc_update_name(isrc, NULL);
310}
311
312/*
313 *  Virtualization for interrupt source interrupt counters release.
314 */
315static void
316isrc_release_counters(struct intr_irqsrc *isrc)
317{
318	int idx = isrc->isrc_index;
319
320	mtx_assert(&isrc_table_lock, MA_OWNED);
321
322	bit_nclear(intrcnt_bitmap, idx, idx + 1);
323}
324
325/*
326 *  Main interrupt dispatch handler. It's called straight
327 *  from the assembler, where CPU interrupt is served.
328 */
329void
330intr_irq_handler(struct trapframe *tf)
331{
332	struct trapframe * oldframe;
333	struct thread * td;
334
335	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
336
337	kasan_mark(tf, sizeof(*tf), sizeof(*tf), 0);
338	kmsan_mark(tf, sizeof(*tf), KMSAN_STATE_INITED);
339
340	VM_CNT_INC(v_intr);
341	critical_enter();
342	td = curthread;
343	oldframe = td->td_intr_frame;
344	td->td_intr_frame = tf;
345	irq_root_filter(irq_root_arg);
346	td->td_intr_frame = oldframe;
347	critical_exit();
348#ifdef HWPMC_HOOKS
349	if (pmc_hook && TRAPF_USERMODE(tf) &&
350	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
351		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
352#endif
353}
354
355int
356intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
357{
358	struct intr_pic_child *child;
359	bool found;
360
361	found = false;
362	mtx_lock_spin(&parent->pic_child_lock);
363	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
364		if (child->pc_start <= irq &&
365		    irq < (child->pc_start + child->pc_length)) {
366			found = true;
367			break;
368		}
369	}
370	mtx_unlock_spin(&parent->pic_child_lock);
371
372	if (found)
373		return (child->pc_filter(child->pc_filter_arg, irq));
374
375	return (FILTER_STRAY);
376}
377
378/*
379 *  interrupt controller dispatch function for interrupts. It should
380 *  be called straight from the interrupt controller, when associated interrupt
381 *  source is learned.
382 */
383int
384intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
385{
386
387	KASSERT(isrc != NULL, ("%s: no source", __func__));
388
389	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
390		isrc_increment_count(isrc);
391
392#ifdef INTR_SOLO
393	if (isrc->isrc_filter != NULL) {
394		int error;
395		error = isrc->isrc_filter(isrc->isrc_arg, tf);
396		PIC_POST_FILTER(isrc->isrc_dev, isrc);
397		if (error == FILTER_HANDLED)
398			return (0);
399	} else
400#endif
401	if (isrc->isrc_event != NULL) {
402		if (intr_event_handle(isrc->isrc_event, tf) == 0)
403			return (0);
404	}
405
406	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
407		isrc_increment_straycount(isrc);
408	return (EINVAL);
409}
410
411/*
412 *  Alloc unique interrupt number (resource handle) for interrupt source.
413 *
414 *  There could be various strategies how to allocate free interrupt number
415 *  (resource handle) for new interrupt source.
416 *
417 *  1. Handles are always allocated forward, so handles are not recycled
418 *     immediately. However, if only one free handle left which is reused
419 *     constantly...
420 */
421static inline int
422isrc_alloc_irq(struct intr_irqsrc *isrc)
423{
424	u_int irq;
425
426	mtx_assert(&isrc_table_lock, MA_OWNED);
427
428	if (irq_next_free >= intr_nirq)
429		return (ENOSPC);
430
431	for (irq = irq_next_free; irq < intr_nirq; irq++) {
432		if (irq_sources[irq] == NULL)
433			goto found;
434	}
435	for (irq = 0; irq < irq_next_free; irq++) {
436		if (irq_sources[irq] == NULL)
437			goto found;
438	}
439
440	irq_next_free = intr_nirq;
441	return (ENOSPC);
442
443found:
444	isrc->isrc_irq = irq;
445	irq_sources[irq] = isrc;
446
447	irq_next_free = irq + 1;
448	if (irq_next_free >= intr_nirq)
449		irq_next_free = 0;
450	return (0);
451}
452
453/*
454 *  Free unique interrupt number (resource handle) from interrupt source.
455 */
456static inline int
457isrc_free_irq(struct intr_irqsrc *isrc)
458{
459
460	mtx_assert(&isrc_table_lock, MA_OWNED);
461
462	if (isrc->isrc_irq >= intr_nirq)
463		return (EINVAL);
464	if (irq_sources[isrc->isrc_irq] != isrc)
465		return (EINVAL);
466
467	irq_sources[isrc->isrc_irq] = NULL;
468	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
469
470	/*
471	 * If we are recovering from the state irq_sources table is full,
472	 * then the following allocation should check the entire table. This
473	 * will ensure maximum separation of allocation order from release
474	 * order.
475	 */
476	if (irq_next_free >= intr_nirq)
477		irq_next_free = 0;
478
479	return (0);
480}
481
482/*
483 *  Initialize interrupt source and register it into global interrupt table.
484 */
485int
486intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
487    const char *fmt, ...)
488{
489	int error;
490	va_list ap;
491
492	bzero(isrc, sizeof(struct intr_irqsrc));
493	isrc->isrc_dev = dev;
494	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
495	isrc->isrc_flags = flags;
496
497	va_start(ap, fmt);
498	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
499	va_end(ap);
500
501	mtx_lock(&isrc_table_lock);
502	error = isrc_alloc_irq(isrc);
503	if (error != 0) {
504		mtx_unlock(&isrc_table_lock);
505		return (error);
506	}
507	/*
508	 * Setup interrupt counters, but not for IPI sources. Those are setup
509	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
510	 * our counter pool.
511	 */
512	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
513		isrc_setup_counters(isrc);
514	mtx_unlock(&isrc_table_lock);
515	return (0);
516}
517
518/*
519 *  Deregister interrupt source from global interrupt table.
520 */
521int
522intr_isrc_deregister(struct intr_irqsrc *isrc)
523{
524	int error;
525
526	mtx_lock(&isrc_table_lock);
527	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
528		isrc_release_counters(isrc);
529	error = isrc_free_irq(isrc);
530	mtx_unlock(&isrc_table_lock);
531	return (error);
532}
533
534#ifdef SMP
535/*
536 *  A support function for a PIC to decide if provided ISRC should be inited
537 *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
538 *  struct intr_irqsrc is the following:
539 *
540 *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
541 *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
542 *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
543 */
544bool
545intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
546{
547
548	if (isrc->isrc_handlers == 0)
549		return (false);
550	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
551		return (false);
552	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
553		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
554
555	CPU_SET(cpu, &isrc->isrc_cpu);
556	return (true);
557}
558#endif
559
560#ifdef INTR_SOLO
561/*
562 *  Setup filter into interrupt source.
563 */
564static int
565iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
566    intr_irq_filter_t *filter, void *arg, void **cookiep)
567{
568
569	if (filter == NULL)
570		return (EINVAL);
571
572	mtx_lock(&isrc_table_lock);
573	/*
574	 * Make sure that we do not mix the two ways
575	 * how we handle interrupt sources.
576	 */
577	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
578		mtx_unlock(&isrc_table_lock);
579		return (EBUSY);
580	}
581	isrc->isrc_filter = filter;
582	isrc->isrc_arg = arg;
583	isrc_update_name(isrc, name);
584	mtx_unlock(&isrc_table_lock);
585
586	*cookiep = isrc;
587	return (0);
588}
589#endif
590
591/*
592 *  Interrupt source pre_ithread method for MI interrupt framework.
593 */
594static void
595intr_isrc_pre_ithread(void *arg)
596{
597	struct intr_irqsrc *isrc = arg;
598
599	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
600}
601
602/*
603 *  Interrupt source post_ithread method for MI interrupt framework.
604 */
605static void
606intr_isrc_post_ithread(void *arg)
607{
608	struct intr_irqsrc *isrc = arg;
609
610	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
611}
612
613/*
614 *  Interrupt source post_filter method for MI interrupt framework.
615 */
616static void
617intr_isrc_post_filter(void *arg)
618{
619	struct intr_irqsrc *isrc = arg;
620
621	PIC_POST_FILTER(isrc->isrc_dev, isrc);
622}
623
624/*
625 *  Interrupt source assign_cpu method for MI interrupt framework.
626 */
627static int
628intr_isrc_assign_cpu(void *arg, int cpu)
629{
630#ifdef SMP
631	struct intr_irqsrc *isrc = arg;
632	int error;
633
634	mtx_lock(&isrc_table_lock);
635	if (cpu == NOCPU) {
636		CPU_ZERO(&isrc->isrc_cpu);
637		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
638	} else {
639		CPU_SETOF(cpu, &isrc->isrc_cpu);
640		isrc->isrc_flags |= INTR_ISRCF_BOUND;
641	}
642
643	/*
644	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
645	 * re-balance it to another CPU or enable it on more CPUs. However,
646	 * PIC is expected to change isrc_cpu appropriately to keep us well
647	 * informed if the call is successful.
648	 */
649	if (irq_assign_cpu) {
650		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
651		if (error) {
652			CPU_ZERO(&isrc->isrc_cpu);
653			mtx_unlock(&isrc_table_lock);
654			return (error);
655		}
656	}
657	mtx_unlock(&isrc_table_lock);
658	return (0);
659#else
660	return (EOPNOTSUPP);
661#endif
662}
663
664/*
665 *  Create interrupt event for interrupt source.
666 */
667static int
668isrc_event_create(struct intr_irqsrc *isrc)
669{
670	struct intr_event *ie;
671	int error;
672
673	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
674	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
675	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
676	if (error)
677		return (error);
678
679	mtx_lock(&isrc_table_lock);
680	/*
681	 * Make sure that we do not mix the two ways
682	 * how we handle interrupt sources. Let contested event wins.
683	 */
684#ifdef INTR_SOLO
685	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
686#else
687	if (isrc->isrc_event != NULL) {
688#endif
689		mtx_unlock(&isrc_table_lock);
690		intr_event_destroy(ie);
691		return (isrc->isrc_event != NULL ? EBUSY : 0);
692	}
693	isrc->isrc_event = ie;
694	mtx_unlock(&isrc_table_lock);
695
696	return (0);
697}
698#ifdef notyet
699/*
700 *  Destroy interrupt event for interrupt source.
701 */
702static void
703isrc_event_destroy(struct intr_irqsrc *isrc)
704{
705	struct intr_event *ie;
706
707	mtx_lock(&isrc_table_lock);
708	ie = isrc->isrc_event;
709	isrc->isrc_event = NULL;
710	mtx_unlock(&isrc_table_lock);
711
712	if (ie != NULL)
713		intr_event_destroy(ie);
714}
715#endif
716/*
717 *  Add handler to interrupt source.
718 */
719static int
720isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
721    driver_filter_t filter, driver_intr_t handler, void *arg,
722    enum intr_type flags, void **cookiep)
723{
724	int error;
725
726	if (isrc->isrc_event == NULL) {
727		error = isrc_event_create(isrc);
728		if (error)
729			return (error);
730	}
731
732	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
733	    arg, intr_priority(flags), flags, cookiep);
734	if (error == 0) {
735		mtx_lock(&isrc_table_lock);
736		intrcnt_updatename(isrc);
737		mtx_unlock(&isrc_table_lock);
738	}
739
740	return (error);
741}
742
743/*
744 *  Lookup interrupt controller locked.
745 */
746static inline struct intr_pic *
747pic_lookup_locked(device_t dev, intptr_t xref, u_int flags)
748{
749	struct intr_pic *pic;
750
751	mtx_assert(&pic_list_lock, MA_OWNED);
752
753	if (dev == NULL && xref == 0)
754		return (NULL);
755
756	/* Note that pic->pic_dev is never NULL on registered PIC. */
757	SLIST_FOREACH(pic, &pic_list, pic_next) {
758		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
759		    (flags & FLAG_TYPE_MASK))
760			continue;
761
762		if (dev == NULL) {
763			if (xref == pic->pic_xref)
764				return (pic);
765		} else if (xref == 0 || pic->pic_xref == 0) {
766			if (dev == pic->pic_dev)
767				return (pic);
768		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
769				return (pic);
770	}
771	return (NULL);
772}
773
774/*
775 *  Lookup interrupt controller.
776 */
777static struct intr_pic *
778pic_lookup(device_t dev, intptr_t xref, u_int flags)
779{
780	struct intr_pic *pic;
781
782	mtx_lock(&pic_list_lock);
783	pic = pic_lookup_locked(dev, xref, flags);
784	mtx_unlock(&pic_list_lock);
785	return (pic);
786}
787
788/*
789 *  Create interrupt controller.
790 */
791static struct intr_pic *
792pic_create(device_t dev, intptr_t xref, u_int flags)
793{
794	struct intr_pic *pic;
795
796	mtx_lock(&pic_list_lock);
797	pic = pic_lookup_locked(dev, xref, flags);
798	if (pic != NULL) {
799		mtx_unlock(&pic_list_lock);
800		return (pic);
801	}
802	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
803	if (pic == NULL) {
804		mtx_unlock(&pic_list_lock);
805		return (NULL);
806	}
807	pic->pic_xref = xref;
808	pic->pic_dev = dev;
809	pic->pic_flags = flags;
810	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
811	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
812	mtx_unlock(&pic_list_lock);
813
814	return (pic);
815}
816#ifdef notyet
817/*
818 *  Destroy interrupt controller.
819 */
820static void
821pic_destroy(device_t dev, intptr_t xref, u_int flags)
822{
823	struct intr_pic *pic;
824
825	mtx_lock(&pic_list_lock);
826	pic = pic_lookup_locked(dev, xref, flags);
827	if (pic == NULL) {
828		mtx_unlock(&pic_list_lock);
829		return;
830	}
831	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
832	mtx_unlock(&pic_list_lock);
833
834	free(pic, M_INTRNG);
835}
836#endif
837/*
838 *  Register interrupt controller.
839 */
840struct intr_pic *
841intr_pic_register(device_t dev, intptr_t xref)
842{
843	struct intr_pic *pic;
844
845	if (dev == NULL)
846		return (NULL);
847	pic = pic_create(dev, xref, FLAG_PIC);
848	if (pic == NULL)
849		return (NULL);
850
851	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
852	    device_get_nameunit(dev), dev, (uintmax_t)xref);
853	return (pic);
854}
855
856/*
857 *  Unregister interrupt controller.
858 */
859int
860intr_pic_deregister(device_t dev, intptr_t xref)
861{
862
863	panic("%s: not implemented", __func__);
864}
865
866/*
867 *  Mark interrupt controller (itself) as a root one.
868 *
869 *  Note that only an interrupt controller can really know its position
870 *  in interrupt controller's tree. So root PIC must claim itself as a root.
871 *
872 *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
873 *  page 30:
874 *    "The root of the interrupt tree is determined when traversal
875 *     of the interrupt tree reaches an interrupt controller node without
876 *     an interrupts property and thus no explicit interrupt parent."
877 */
878int
879intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
880    void *arg)
881{
882	struct intr_pic *pic;
883
884	pic = pic_lookup(dev, xref, FLAG_PIC);
885	if (pic == NULL) {
886		device_printf(dev, "not registered\n");
887		return (EINVAL);
888	}
889
890	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
891	    ("%s: Found a non-PIC controller: %s", __func__,
892	     device_get_name(pic->pic_dev)));
893
894	if (filter == NULL) {
895		device_printf(dev, "filter missing\n");
896		return (EINVAL);
897	}
898
899	/*
900	 * Only one interrupt controllers could be on the root for now.
901	 * Note that we further suppose that there is not threaded interrupt
902	 * routine (handler) on the root. See intr_irq_handler().
903	 */
904	if (intr_irq_root_dev != NULL) {
905		device_printf(dev, "another root already set\n");
906		return (EBUSY);
907	}
908
909	intr_irq_root_dev = dev;
910	irq_root_filter = filter;
911	irq_root_arg = arg;
912
913	debugf("irq root set to %s\n", device_get_nameunit(dev));
914	return (0);
915}
916
917/*
918 * Add a handler to manage a sub range of a parents interrupts.
919 */
920int
921intr_pic_add_handler(device_t parent, struct intr_pic *pic,
922    intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
923    uintptr_t length)
924{
925	struct intr_pic *parent_pic;
926	struct intr_pic_child *newchild;
927#ifdef INVARIANTS
928	struct intr_pic_child *child;
929#endif
930
931	/* Find the parent PIC */
932	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
933	if (parent_pic == NULL)
934		return (ENXIO);
935
936	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
937	newchild->pc_pic = pic;
938	newchild->pc_filter = filter;
939	newchild->pc_filter_arg = arg;
940	newchild->pc_start = start;
941	newchild->pc_length = length;
942
943	mtx_lock_spin(&parent_pic->pic_child_lock);
944#ifdef INVARIANTS
945	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
946		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
947		    __func__));
948	}
949#endif
950	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
951	mtx_unlock_spin(&parent_pic->pic_child_lock);
952
953	return (0);
954}
955
956static int
957intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
958    struct intr_irqsrc **isrc)
959{
960	struct intr_pic *pic;
961	struct intr_map_data_msi *msi;
962
963	if (data == NULL)
964		return (EINVAL);
965
966	pic = pic_lookup(dev, xref,
967	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
968	if (pic == NULL)
969		return (ESRCH);
970
971	switch (data->type) {
972	case INTR_MAP_DATA_MSI:
973		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
974		    ("%s: Found a non-MSI controller: %s", __func__,
975		     device_get_name(pic->pic_dev)));
976		msi = (struct intr_map_data_msi *)data;
977		*isrc = msi->isrc;
978		return (0);
979
980	default:
981		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
982		    ("%s: Found a non-PIC controller: %s", __func__,
983		     device_get_name(pic->pic_dev)));
984		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
985	}
986}
987
988bool
989intr_is_per_cpu(struct resource *res)
990{
991	u_int res_id;
992	struct intr_irqsrc *isrc;
993
994	res_id = (u_int)rman_get_start(res);
995	isrc = intr_map_get_isrc(res_id);
996
997	if (isrc == NULL)
998		panic("Attempt to get isrc for non-active resource id: %u\n",
999		    res_id);
1000	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1001}
1002
1003int
1004intr_activate_irq(device_t dev, struct resource *res)
1005{
1006	device_t map_dev;
1007	intptr_t map_xref;
1008	struct intr_map_data *data;
1009	struct intr_irqsrc *isrc;
1010	u_int res_id;
1011	int error;
1012
1013	KASSERT(rman_get_start(res) == rman_get_end(res),
1014	    ("%s: more interrupts in resource", __func__));
1015
1016	res_id = (u_int)rman_get_start(res);
1017	if (intr_map_get_isrc(res_id) != NULL)
1018		panic("Attempt to double activation of resource id: %u\n",
1019		    res_id);
1020	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1021	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1022	if (error != 0) {
1023		free(data, M_INTRNG);
1024		/* XXX TODO DISCONECTED PICs */
1025		/* if (error == EINVAL) return(0); */
1026		return (error);
1027	}
1028	intr_map_set_isrc(res_id, isrc);
1029	rman_set_virtual(res, data);
1030	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1031}
1032
1033int
1034intr_deactivate_irq(device_t dev, struct resource *res)
1035{
1036	struct intr_map_data *data;
1037	struct intr_irqsrc *isrc;
1038	u_int res_id;
1039	int error;
1040
1041	KASSERT(rman_get_start(res) == rman_get_end(res),
1042	    ("%s: more interrupts in resource", __func__));
1043
1044	res_id = (u_int)rman_get_start(res);
1045	isrc = intr_map_get_isrc(res_id);
1046	if (isrc == NULL)
1047		panic("Attempt to deactivate non-active resource id: %u\n",
1048		    res_id);
1049
1050	data = rman_get_virtual(res);
1051	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1052	intr_map_set_isrc(res_id, NULL);
1053	rman_set_virtual(res, NULL);
1054	free(data, M_INTRNG);
1055	return (error);
1056}
1057
1058int
1059intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1060    driver_intr_t hand, void *arg, int flags, void **cookiep)
1061{
1062	int error;
1063	struct intr_map_data *data;
1064	struct intr_irqsrc *isrc;
1065	const char *name;
1066	u_int res_id;
1067
1068	KASSERT(rman_get_start(res) == rman_get_end(res),
1069	    ("%s: more interrupts in resource", __func__));
1070
1071	res_id = (u_int)rman_get_start(res);
1072	isrc = intr_map_get_isrc(res_id);
1073	if (isrc == NULL) {
1074		/* XXX TODO DISCONECTED PICs */
1075		return (EINVAL);
1076	}
1077
1078	data = rman_get_virtual(res);
1079	name = device_get_nameunit(dev);
1080
1081#ifdef INTR_SOLO
1082	/*
1083	 * Standard handling is done through MI interrupt framework. However,
1084	 * some interrupts could request solely own special handling. This
1085	 * non standard handling can be used for interrupt controllers without
1086	 * handler (filter only), so in case that interrupt controllers are
1087	 * chained, MI interrupt framework is called only in leaf controller.
1088	 *
1089	 * Note that root interrupt controller routine is served as well,
1090	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1091	 */
1092	if (flags & INTR_SOLO && hand != NULL) {
1093		debugf("irq %u cannot solo on %s\n", irq, name);
1094		return (EINVAL);
1095	}
1096
1097	if (flags & INTR_SOLO) {
1098		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1099		    arg, cookiep);
1100		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1101		    name);
1102	} else
1103#endif
1104		{
1105		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1106		    cookiep);
1107		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1108	}
1109	if (error != 0)
1110		return (error);
1111
1112	mtx_lock(&isrc_table_lock);
1113	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1114	if (error == 0) {
1115		isrc->isrc_handlers++;
1116		if (isrc->isrc_handlers == 1)
1117			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1118	}
1119	mtx_unlock(&isrc_table_lock);
1120	if (error != 0)
1121		intr_event_remove_handler(*cookiep);
1122	return (error);
1123}
1124
1125int
1126intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1127{
1128	int error;
1129	struct intr_map_data *data;
1130	struct intr_irqsrc *isrc;
1131	u_int res_id;
1132
1133	KASSERT(rman_get_start(res) == rman_get_end(res),
1134	    ("%s: more interrupts in resource", __func__));
1135
1136	res_id = (u_int)rman_get_start(res);
1137	isrc = intr_map_get_isrc(res_id);
1138	if (isrc == NULL || isrc->isrc_handlers == 0)
1139		return (EINVAL);
1140
1141	data = rman_get_virtual(res);
1142
1143#ifdef INTR_SOLO
1144	if (isrc->isrc_filter != NULL) {
1145		if (isrc != cookie)
1146			return (EINVAL);
1147
1148		mtx_lock(&isrc_table_lock);
1149		isrc->isrc_filter = NULL;
1150		isrc->isrc_arg = NULL;
1151		isrc->isrc_handlers = 0;
1152		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1153		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1154		isrc_update_name(isrc, NULL);
1155		mtx_unlock(&isrc_table_lock);
1156		return (0);
1157	}
1158#endif
1159	if (isrc != intr_handler_source(cookie))
1160		return (EINVAL);
1161
1162	error = intr_event_remove_handler(cookie);
1163	if (error == 0) {
1164		mtx_lock(&isrc_table_lock);
1165		isrc->isrc_handlers--;
1166		if (isrc->isrc_handlers == 0)
1167			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1168		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1169		intrcnt_updatename(isrc);
1170		mtx_unlock(&isrc_table_lock);
1171	}
1172	return (error);
1173}
1174
1175int
1176intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1177    const char *descr)
1178{
1179	int error;
1180	struct intr_irqsrc *isrc;
1181	u_int res_id;
1182
1183	KASSERT(rman_get_start(res) == rman_get_end(res),
1184	    ("%s: more interrupts in resource", __func__));
1185
1186	res_id = (u_int)rman_get_start(res);
1187	isrc = intr_map_get_isrc(res_id);
1188	if (isrc == NULL || isrc->isrc_handlers == 0)
1189		return (EINVAL);
1190#ifdef INTR_SOLO
1191	if (isrc->isrc_filter != NULL) {
1192		if (isrc != cookie)
1193			return (EINVAL);
1194
1195		mtx_lock(&isrc_table_lock);
1196		isrc_update_name(isrc, descr);
1197		mtx_unlock(&isrc_table_lock);
1198		return (0);
1199	}
1200#endif
1201	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1202	if (error == 0) {
1203		mtx_lock(&isrc_table_lock);
1204		intrcnt_updatename(isrc);
1205		mtx_unlock(&isrc_table_lock);
1206	}
1207	return (error);
1208}
1209
1210#ifdef SMP
1211int
1212intr_bind_irq(device_t dev, struct resource *res, int cpu)
1213{
1214	struct intr_irqsrc *isrc;
1215	u_int res_id;
1216
1217	KASSERT(rman_get_start(res) == rman_get_end(res),
1218	    ("%s: more interrupts in resource", __func__));
1219
1220	res_id = (u_int)rman_get_start(res);
1221	isrc = intr_map_get_isrc(res_id);
1222	if (isrc == NULL || isrc->isrc_handlers == 0)
1223		return (EINVAL);
1224#ifdef INTR_SOLO
1225	if (isrc->isrc_filter != NULL)
1226		return (intr_isrc_assign_cpu(isrc, cpu));
1227#endif
1228	return (intr_event_bind(isrc->isrc_event, cpu));
1229}
1230
1231/*
1232 * Return the CPU that the next interrupt source should use.
1233 * For now just returns the next CPU according to round-robin.
1234 */
1235u_int
1236intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1237{
1238	u_int cpu;
1239
1240	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1241	if (!irq_assign_cpu || mp_ncpus == 1) {
1242		cpu = PCPU_GET(cpuid);
1243
1244		if (CPU_ISSET(cpu, cpumask))
1245			return (curcpu);
1246
1247		return (CPU_FFS(cpumask) - 1);
1248	}
1249
1250	do {
1251		last_cpu++;
1252		if (last_cpu > mp_maxid)
1253			last_cpu = 0;
1254	} while (!CPU_ISSET(last_cpu, cpumask));
1255	return (last_cpu);
1256}
1257
1258#ifndef EARLY_AP_STARTUP
1259/*
1260 *  Distribute all the interrupt sources among the available
1261 *  CPUs once the AP's have been launched.
1262 */
1263static void
1264intr_irq_shuffle(void *arg __unused)
1265{
1266	struct intr_irqsrc *isrc;
1267	u_int i;
1268
1269	if (mp_ncpus == 1)
1270		return;
1271
1272	mtx_lock(&isrc_table_lock);
1273	irq_assign_cpu = true;
1274	for (i = 0; i < intr_nirq; i++) {
1275		isrc = irq_sources[i];
1276		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1277		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1278			continue;
1279
1280		if (isrc->isrc_event != NULL &&
1281		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1282		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1283			panic("%s: CPU inconsistency", __func__);
1284
1285		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1286			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1287
1288		/*
1289		 * We are in wicked position here if the following call fails
1290		 * for bound ISRC. The best thing we can do is to clear
1291		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1292		 */
1293		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1294			CPU_ZERO(&isrc->isrc_cpu);
1295	}
1296	mtx_unlock(&isrc_table_lock);
1297}
1298SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1299#endif /* !EARLY_AP_STARTUP */
1300
1301#else
1302u_int
1303intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1304{
1305
1306	return (PCPU_GET(cpuid));
1307}
1308#endif /* SMP */
1309
1310/*
1311 * Allocate memory for new intr_map_data structure.
1312 * Initialize common fields.
1313 */
1314struct intr_map_data *
1315intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1316{
1317	struct intr_map_data *data;
1318
1319	data = malloc(len, M_INTRNG, flags);
1320	data->type = type;
1321	data->len = len;
1322	return (data);
1323}
1324
1325void intr_free_intr_map_data(struct intr_map_data *data)
1326{
1327
1328	free(data, M_INTRNG);
1329}
1330
1331/*
1332 *  Register a MSI/MSI-X interrupt controller
1333 */
1334int
1335intr_msi_register(device_t dev, intptr_t xref)
1336{
1337	struct intr_pic *pic;
1338
1339	if (dev == NULL)
1340		return (EINVAL);
1341	pic = pic_create(dev, xref, FLAG_MSI);
1342	if (pic == NULL)
1343		return (ENOMEM);
1344
1345	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1346	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1347	return (0);
1348}
1349
1350int
1351intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1352    int maxcount, int *irqs)
1353{
1354	struct iommu_domain *domain;
1355	struct intr_irqsrc **isrc;
1356	struct intr_pic *pic;
1357	device_t pdev;
1358	struct intr_map_data_msi *msi;
1359	int err, i;
1360
1361	pic = pic_lookup(NULL, xref, FLAG_MSI);
1362	if (pic == NULL)
1363		return (ESRCH);
1364
1365	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1366	    ("%s: Found a non-MSI controller: %s", __func__,
1367	     device_get_name(pic->pic_dev)));
1368
1369	/*
1370	 * If this is the first time we have used this context ask the
1371	 * interrupt controller to map memory the msi source will need.
1372	 */
1373	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1374	if (err != 0)
1375		return (err);
1376
1377	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1378	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1379	if (err != 0) {
1380		free(isrc, M_INTRNG);
1381		return (err);
1382	}
1383
1384	for (i = 0; i < count; i++) {
1385		isrc[i]->isrc_iommu = domain;
1386		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1387		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1388		msi-> isrc = isrc[i];
1389
1390		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1391		    (struct intr_map_data *)msi);
1392	}
1393	free(isrc, M_INTRNG);
1394
1395	return (err);
1396}
1397
1398int
1399intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1400    int *irqs)
1401{
1402	struct intr_irqsrc **isrc;
1403	struct intr_pic *pic;
1404	struct intr_map_data_msi *msi;
1405	int i, err;
1406
1407	pic = pic_lookup(NULL, xref, FLAG_MSI);
1408	if (pic == NULL)
1409		return (ESRCH);
1410
1411	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1412	    ("%s: Found a non-MSI controller: %s", __func__,
1413	     device_get_name(pic->pic_dev)));
1414
1415	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1416
1417	for (i = 0; i < count; i++) {
1418		msi = (struct intr_map_data_msi *)
1419		    intr_map_get_map_data(irqs[i]);
1420		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1421		    ("%s: irq %d map data is not MSI", __func__,
1422		    irqs[i]));
1423		isrc[i] = msi->isrc;
1424	}
1425
1426	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1427
1428	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1429
1430	for (i = 0; i < count; i++) {
1431		if (isrc[i] != NULL)
1432			intr_unmap_irq(irqs[i]);
1433	}
1434
1435	free(isrc, M_INTRNG);
1436	return (err);
1437}
1438
1439int
1440intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1441{
1442	struct iommu_domain *domain;
1443	struct intr_irqsrc *isrc;
1444	struct intr_pic *pic;
1445	device_t pdev;
1446	struct intr_map_data_msi *msi;
1447	int err;
1448
1449	pic = pic_lookup(NULL, xref, FLAG_MSI);
1450	if (pic == NULL)
1451		return (ESRCH);
1452
1453	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1454	    ("%s: Found a non-MSI controller: %s", __func__,
1455	     device_get_name(pic->pic_dev)));
1456
1457	/*
1458	 * If this is the first time we have used this context ask the
1459	 * interrupt controller to map memory the msi source will need.
1460	 */
1461	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1462	if (err != 0)
1463		return (err);
1464
1465	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1466	if (err != 0)
1467		return (err);
1468
1469	isrc->isrc_iommu = domain;
1470	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1471		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1472	msi->isrc = isrc;
1473	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1474	return (0);
1475}
1476
1477int
1478intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1479{
1480	struct intr_irqsrc *isrc;
1481	struct intr_pic *pic;
1482	struct intr_map_data_msi *msi;
1483	int err;
1484
1485	pic = pic_lookup(NULL, xref, FLAG_MSI);
1486	if (pic == NULL)
1487		return (ESRCH);
1488
1489	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1490	    ("%s: Found a non-MSI controller: %s", __func__,
1491	     device_get_name(pic->pic_dev)));
1492
1493	msi = (struct intr_map_data_msi *)
1494	    intr_map_get_map_data(irq);
1495	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1496	    ("%s: irq %d map data is not MSI", __func__,
1497	    irq));
1498	isrc = msi->isrc;
1499	if (isrc == NULL) {
1500		intr_unmap_irq(irq);
1501		return (EINVAL);
1502	}
1503
1504	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1505
1506	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1507	intr_unmap_irq(irq);
1508
1509	return (err);
1510}
1511
1512int
1513intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1514    uint64_t *addr, uint32_t *data)
1515{
1516	struct intr_irqsrc *isrc;
1517	struct intr_pic *pic;
1518	int err;
1519
1520	pic = pic_lookup(NULL, xref, FLAG_MSI);
1521	if (pic == NULL)
1522		return (ESRCH);
1523
1524	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1525	    ("%s: Found a non-MSI controller: %s", __func__,
1526	     device_get_name(pic->pic_dev)));
1527
1528	isrc = intr_map_get_isrc(irq);
1529	if (isrc == NULL)
1530		return (EINVAL);
1531
1532	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1533
1534#ifdef IOMMU
1535	if (isrc->isrc_iommu != NULL)
1536		iommu_translate_msi(isrc->isrc_iommu, addr);
1537#endif
1538
1539	return (err);
1540}
1541
1542void dosoftints(void);
1543void
1544dosoftints(void)
1545{
1546}
1547
1548#ifdef SMP
1549/*
1550 *  Init interrupt controller on another CPU.
1551 */
1552void
1553intr_pic_init_secondary(void)
1554{
1555
1556	/*
1557	 * QQQ: Only root PIC is aware of other CPUs ???
1558	 */
1559	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1560
1561	//mtx_lock(&isrc_table_lock);
1562	PIC_INIT_SECONDARY(intr_irq_root_dev);
1563	//mtx_unlock(&isrc_table_lock);
1564}
1565#endif
1566
1567#ifdef DDB
1568DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1569{
1570	u_int i, irqsum;
1571	u_long num;
1572	struct intr_irqsrc *isrc;
1573
1574	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1575		isrc = irq_sources[i];
1576		if (isrc == NULL)
1577			continue;
1578
1579		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1580		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1581		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1582		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1583		irqsum += num;
1584	}
1585	db_printf("irq total %u\n", irqsum);
1586}
1587#endif
1588
1589/*
1590 * Interrupt mapping table functions.
1591 *
1592 * Please, keep this part separately, it can be transformed to
1593 * extension of standard resources.
1594 */
1595struct intr_map_entry
1596{
1597	device_t 		dev;
1598	intptr_t 		xref;
1599	struct intr_map_data 	*map_data;
1600	struct intr_irqsrc 	*isrc;
1601	/* XXX TODO DISCONECTED PICs */
1602	/*int			flags */
1603};
1604
1605/* XXX Convert irq_map[] to dynamicaly expandable one. */
1606static struct intr_map_entry **irq_map;
1607static u_int irq_map_count;
1608static u_int irq_map_first_free_idx;
1609static struct mtx irq_map_lock;
1610
1611static struct intr_irqsrc *
1612intr_map_get_isrc(u_int res_id)
1613{
1614	struct intr_irqsrc *isrc;
1615
1616	isrc = NULL;
1617	mtx_lock(&irq_map_lock);
1618	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1619		isrc = irq_map[res_id]->isrc;
1620	mtx_unlock(&irq_map_lock);
1621
1622	return (isrc);
1623}
1624
1625static void
1626intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1627{
1628
1629	mtx_lock(&irq_map_lock);
1630	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1631		irq_map[res_id]->isrc = isrc;
1632	mtx_unlock(&irq_map_lock);
1633}
1634
1635/*
1636 * Get a copy of intr_map_entry data
1637 */
1638static struct intr_map_data *
1639intr_map_get_map_data(u_int res_id)
1640{
1641	struct intr_map_data *data;
1642
1643	data = NULL;
1644	mtx_lock(&irq_map_lock);
1645	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1646		panic("Attempt to copy invalid resource id: %u\n", res_id);
1647	data = irq_map[res_id]->map_data;
1648	mtx_unlock(&irq_map_lock);
1649
1650	return (data);
1651}
1652
1653/*
1654 * Get a copy of intr_map_entry data
1655 */
1656static void
1657intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1658    struct intr_map_data **data)
1659{
1660	size_t len;
1661
1662	len = 0;
1663	mtx_lock(&irq_map_lock);
1664	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1665		panic("Attempt to copy invalid resource id: %u\n", res_id);
1666	if (irq_map[res_id]->map_data != NULL)
1667		len = irq_map[res_id]->map_data->len;
1668	mtx_unlock(&irq_map_lock);
1669
1670	if (len == 0)
1671		*data = NULL;
1672	else
1673		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1674	mtx_lock(&irq_map_lock);
1675	if (irq_map[res_id] == NULL)
1676		panic("Attempt to copy invalid resource id: %u\n", res_id);
1677	if (len != 0) {
1678		if (len != irq_map[res_id]->map_data->len)
1679			panic("Resource id: %u has changed.\n", res_id);
1680		memcpy(*data, irq_map[res_id]->map_data, len);
1681	}
1682	*map_dev = irq_map[res_id]->dev;
1683	*map_xref = irq_map[res_id]->xref;
1684	mtx_unlock(&irq_map_lock);
1685}
1686
1687/*
1688 * Allocate and fill new entry in irq_map table.
1689 */
1690u_int
1691intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1692{
1693	u_int i;
1694	struct intr_map_entry *entry;
1695
1696	/* Prepare new entry first. */
1697	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1698
1699	entry->dev = dev;
1700	entry->xref = xref;
1701	entry->map_data = data;
1702	entry->isrc = NULL;
1703
1704	mtx_lock(&irq_map_lock);
1705	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1706		if (irq_map[i] == NULL) {
1707			irq_map[i] = entry;
1708			irq_map_first_free_idx = i + 1;
1709			mtx_unlock(&irq_map_lock);
1710			return (i);
1711		}
1712	}
1713	for (i = 0; i < irq_map_first_free_idx; i++) {
1714		if (irq_map[i] == NULL) {
1715			irq_map[i] = entry;
1716			irq_map_first_free_idx = i + 1;
1717			mtx_unlock(&irq_map_lock);
1718			return (i);
1719		}
1720	}
1721	mtx_unlock(&irq_map_lock);
1722
1723	/* XXX Expand irq_map table */
1724	panic("IRQ mapping table is full.");
1725}
1726
1727/*
1728 * Remove and free mapping entry.
1729 */
1730void
1731intr_unmap_irq(u_int res_id)
1732{
1733	struct intr_map_entry *entry;
1734
1735	mtx_lock(&irq_map_lock);
1736	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1737		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1738	entry = irq_map[res_id];
1739	irq_map[res_id] = NULL;
1740	irq_map_first_free_idx = res_id;
1741	mtx_unlock(&irq_map_lock);
1742	intr_free_intr_map_data(entry->map_data);
1743	free(entry, M_INTRNG);
1744}
1745
1746/*
1747 * Clone mapping entry.
1748 */
1749u_int
1750intr_map_clone_irq(u_int old_res_id)
1751{
1752	device_t map_dev;
1753	intptr_t map_xref;
1754	struct intr_map_data *data;
1755
1756	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1757	return (intr_map_irq(map_dev, map_xref, data));
1758}
1759
1760static void
1761intr_map_init(void *dummy __unused)
1762{
1763
1764	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1765
1766	irq_map_count = 2 * intr_nirq;
1767	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1768	    M_INTRNG, M_WAITOK | M_ZERO);
1769}
1770SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1771
1772#ifdef SMP
1773/* Virtualization for interrupt source IPI counter increment. */
1774static inline void
1775intr_ipi_increment_count(u_long *counter, u_int cpu)
1776{
1777
1778	KASSERT(cpu < mp_maxid + 1, ("%s: too big cpu %u", __func__, cpu));
1779	counter[cpu]++;
1780}
1781
1782/*
1783 *  Virtualization for interrupt source IPI counters setup.
1784 */
1785static u_long *
1786intr_ipi_setup_counters(const char *name)
1787{
1788	u_int index, i;
1789	char str[INTRNAME_LEN];
1790
1791	mtx_lock(&isrc_table_lock);
1792
1793	/*
1794	 * We should never have a problem finding mp_maxid + 1 contiguous
1795	 * counters, in practice. Interrupts will be allocated sequentially
1796	 * during boot, so the array should fill from low to high index. Once
1797	 * reserved, the IPI counters will never be released. Similarly, we
1798	 * will not need to allocate more IPIs once the system is running.
1799	 */
1800	bit_ffc_area(intrcnt_bitmap, nintrcnt, mp_maxid + 1, &index);
1801	if (index == -1)
1802		panic("Failed to allocate %d counters. Array exhausted?",
1803		    mp_maxid + 1);
1804	bit_nset(intrcnt_bitmap, index, index + mp_maxid);
1805	for (i = 0; i < mp_maxid + 1; i++) {
1806		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
1807		intrcnt_setname(str, index + i);
1808	}
1809	mtx_unlock(&isrc_table_lock);
1810	return (&intrcnt[index]);
1811}
1812
1813/*
1814 *  Lookup IPI source.
1815 */
1816static struct intr_ipi *
1817intr_ipi_lookup(u_int ipi)
1818{
1819
1820	if (ipi >= INTR_IPI_COUNT)
1821		panic("%s: no such IPI %u", __func__, ipi);
1822
1823	return (&ipi_sources[ipi]);
1824}
1825
1826int
1827intr_ipi_pic_register(device_t dev, u_int priority)
1828{
1829	if (intr_ipi_dev_frozen) {
1830		device_printf(dev, "IPI device already frozen");
1831		return (EBUSY);
1832	}
1833
1834	if (intr_ipi_dev == NULL || priority > intr_ipi_dev_priority)
1835		intr_ipi_dev = dev;
1836
1837	return (0);
1838}
1839
1840/*
1841 *  Setup IPI handler on interrupt controller.
1842 *
1843 *  Not SMP coherent.
1844 */
1845void
1846intr_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
1847    void *arg)
1848{
1849	struct intr_irqsrc *isrc;
1850	struct intr_ipi *ii;
1851	int error;
1852
1853	if (!intr_ipi_dev_frozen) {
1854		if (intr_ipi_dev == NULL)
1855			panic("%s: no IPI PIC attached", __func__);
1856
1857		intr_ipi_dev_frozen = true;
1858		device_printf(intr_ipi_dev, "using for IPIs\n");
1859	}
1860
1861	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
1862
1863	error = PIC_IPI_SETUP(intr_ipi_dev, ipi, &isrc);
1864	if (error != 0)
1865		return;
1866
1867	isrc->isrc_handlers++;
1868
1869	ii = intr_ipi_lookup(ipi);
1870	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
1871
1872	ii->ii_handler = hand;
1873	ii->ii_handler_arg = arg;
1874	ii->ii_isrc = isrc;
1875	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
1876	ii->ii_count = intr_ipi_setup_counters(name);
1877
1878	PIC_ENABLE_INTR(intr_ipi_dev, isrc);
1879}
1880
1881void
1882intr_ipi_send(cpuset_t cpus, u_int ipi)
1883{
1884	struct intr_ipi *ii;
1885
1886	KASSERT(intr_ipi_dev_frozen,
1887	    ("%s: IPI device not yet frozen", __func__));
1888
1889	ii = intr_ipi_lookup(ipi);
1890	if (ii->ii_count == NULL)
1891		panic("%s: not setup IPI %u", __func__, ipi);
1892
1893	/*
1894	 * XXX: Surely needed on other architectures too? Either way should be
1895	 * some kind of MI hook defined in an MD header, or the responsibility
1896	 * of the MD caller if not widespread.
1897	 */
1898#ifdef __aarch64__
1899	/*
1900	 * Ensure that this CPU's stores will be visible to IPI
1901	 * recipients before starting to send the interrupts.
1902	 */
1903	dsb(ishst);
1904#endif
1905
1906	PIC_IPI_SEND(intr_ipi_dev, ii->ii_isrc, cpus, ipi);
1907}
1908
1909/*
1910 *  interrupt controller dispatch function for IPIs. It should
1911 *  be called straight from the interrupt controller, when associated
1912 *  interrupt source is learned. Or from anybody who has an interrupt
1913 *  source mapped.
1914 */
1915void
1916intr_ipi_dispatch(u_int ipi)
1917{
1918	struct intr_ipi *ii;
1919
1920	ii = intr_ipi_lookup(ipi);
1921	if (ii->ii_count == NULL)
1922		panic("%s: not setup IPI %u", __func__, ipi);
1923
1924	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
1925
1926	ii->ii_handler(ii->ii_handler_arg);
1927}
1928#endif
1929