1/******************************************************************************
2 * xen_intr.c
3 *
4 * Xen event and interrupt services for x86 HVM guests.
5 *
6 * Copyright (c) 2002-2005, K A Fraser
7 * Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
8 * Copyright (c) 2012, Spectra Logic Corporation
9 *
10 * This file may be distributed separately from the Linux kernel, or
11 * incorporated into other software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include "opt_ddb.h"
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bus.h>
40#include <sys/malloc.h>
41#include <sys/kernel.h>
42#include <sys/limits.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/interrupt.h>
46#include <sys/pcpu.h>
47#include <sys/smp.h>
48#include <sys/refcount.h>
49
50#include <vm/vm.h>
51#include <vm/pmap.h>
52
53#include <machine/intr_machdep.h>
54#include <x86/apicvar.h>
55#include <x86/apicreg.h>
56#include <machine/smp.h>
57#include <machine/stdarg.h>
58
59#include <machine/xen/synch_bitops.h>
60#include <machine/xen/xen-os.h>
61
62#include <xen/xen-os.h>
63#include <xen/hypervisor.h>
64#include <xen/xen_intr.h>
65#include <xen/evtchn/evtchnvar.h>
66
67#include <dev/xen/xenpci/xenpcivar.h>
68#include <dev/pci/pcivar.h>
69
70#ifdef DDB
71#include <ddb/ddb.h>
72#endif
73
74static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services");
75
76static u_int first_evtchn_irq;
77
78/**
79 * Per-cpu event channel processing state.
80 */
81struct xen_intr_pcpu_data {
82	/**
83	 * The last event channel bitmap section (level one bit) processed.
84	 * This is used to ensure we scan all ports before
85	 * servicing an already servied port again.
86	 */
87	u_int	last_processed_l1i;
88
89	/**
90	 * The last event channel processed within the event channel
91	 * bitmap being scanned.
92	 */
93	u_int	last_processed_l2i;
94
95	/** Pointer to this CPU's interrupt statistic counter. */
96	u_long *evtchn_intrcnt;
97
98	/**
99	 * A bitmap of ports that can be serviced from this CPU.
100	 * A set bit means interrupt handling is enabled.
101	 */
102	u_long	evtchn_enabled[sizeof(u_long) * 8];
103};
104
105/*
106 * Start the scan at port 0 by initializing the last scanned
107 * location as the highest numbered event channel port.
108 */
109DPCPU_DEFINE_STATIC(struct xen_intr_pcpu_data, xen_intr_pcpu) = {
110	.last_processed_l1i = LONG_BIT - 1,
111	.last_processed_l2i = LONG_BIT - 1
112};
113
114DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
115
116#define	XEN_EEXIST		17 /* Xen "already exists" error */
117#define	XEN_ALLOCATE_VECTOR	0 /* Allocate a vector for this event channel */
118#define	XEN_INVALID_EVTCHN	0 /* Invalid event channel */
119
120#define	is_valid_evtchn(x)	((x) != XEN_INVALID_EVTCHN)
121
122struct xenisrc {
123	struct intsrc	xi_intsrc;
124	enum evtchn_type xi_type;
125	int		xi_cpu;		/* VCPU for delivery. */
126	int		xi_vector;	/* Global isrc vector number. */
127	evtchn_port_t	xi_port;
128	int		xi_pirq;
129	int		xi_virq;
130	void		*xi_cookie;
131	u_int		xi_close:1;	/* close on unbind? */
132	u_int		xi_activehi:1;
133	u_int		xi_edgetrigger:1;
134	u_int		xi_masked:1;
135	volatile u_int	xi_refcount;
136};
137
138static void	xen_intr_suspend(struct pic *);
139static void	xen_intr_resume(struct pic *, bool suspend_cancelled);
140static void	xen_intr_enable_source(struct intsrc *isrc);
141static void	xen_intr_disable_source(struct intsrc *isrc, int eoi);
142static void	xen_intr_eoi_source(struct intsrc *isrc);
143static void	xen_intr_enable_intr(struct intsrc *isrc);
144static void	xen_intr_disable_intr(struct intsrc *isrc);
145static int	xen_intr_vector(struct intsrc *isrc);
146static int	xen_intr_source_pending(struct intsrc *isrc);
147static int	xen_intr_config_intr(struct intsrc *isrc,
148		     enum intr_trigger trig, enum intr_polarity pol);
149static int	xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id);
150
151static void	xen_intr_pirq_enable_source(struct intsrc *isrc);
152static void	xen_intr_pirq_disable_source(struct intsrc *isrc, int eoi);
153static void	xen_intr_pirq_eoi_source(struct intsrc *isrc);
154static void	xen_intr_pirq_enable_intr(struct intsrc *isrc);
155static void	xen_intr_pirq_disable_intr(struct intsrc *isrc);
156static int	xen_intr_pirq_config_intr(struct intsrc *isrc,
157		     enum intr_trigger trig, enum intr_polarity pol);
158
159/**
160 * PIC interface for all event channel port types except physical IRQs.
161 */
162struct pic xen_intr_pic = {
163	.pic_enable_source  = xen_intr_enable_source,
164	.pic_disable_source = xen_intr_disable_source,
165	.pic_eoi_source     = xen_intr_eoi_source,
166	.pic_enable_intr    = xen_intr_enable_intr,
167	.pic_disable_intr   = xen_intr_disable_intr,
168	.pic_vector         = xen_intr_vector,
169	.pic_source_pending = xen_intr_source_pending,
170	.pic_suspend        = xen_intr_suspend,
171	.pic_resume         = xen_intr_resume,
172	.pic_config_intr    = xen_intr_config_intr,
173	.pic_assign_cpu     = xen_intr_assign_cpu
174};
175
176/**
177 * PIC interface for all event channel representing
178 * physical interrupt sources.
179 */
180struct pic xen_intr_pirq_pic = {
181#ifdef __amd64__
182	.pic_register_sources = xenpv_register_pirqs,
183#endif
184	.pic_enable_source  = xen_intr_pirq_enable_source,
185	.pic_disable_source = xen_intr_pirq_disable_source,
186	.pic_eoi_source     = xen_intr_pirq_eoi_source,
187	.pic_enable_intr    = xen_intr_pirq_enable_intr,
188	.pic_disable_intr   = xen_intr_pirq_disable_intr,
189	.pic_vector         = xen_intr_vector,
190	.pic_source_pending = xen_intr_source_pending,
191	.pic_config_intr    = xen_intr_pirq_config_intr,
192	.pic_assign_cpu     = xen_intr_assign_cpu
193};
194
195static struct mtx	 xen_intr_isrc_lock;
196static u_int		 xen_intr_auto_vector_count;
197static struct xenisrc	*xen_intr_port_to_isrc[NR_EVENT_CHANNELS];
198static u_long		*xen_intr_pirq_eoi_map;
199static boolean_t	 xen_intr_pirq_eoi_map_enabled;
200
201/*------------------------- Private Functions --------------------------------*/
202/**
203 * Disable signal delivery for an event channel port on the
204 * specified CPU.
205 *
206 * \param port  The event channel port to mask.
207 *
208 * This API is used to manage the port<=>CPU binding of event
209 * channel handlers.
210 *
211 * \note  This operation does not preclude reception of an event
212 *        for this event channel on another CPU.  To mask the
213 *        event channel globally, use evtchn_mask().
214 */
215static inline void
216evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port)
217{
218	struct xen_intr_pcpu_data *pcpu;
219
220	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
221	xen_clear_bit(port, pcpu->evtchn_enabled);
222}
223
224/**
225 * Enable signal delivery for an event channel port on the
226 * specified CPU.
227 *
228 * \param port  The event channel port to unmask.
229 *
230 * This API is used to manage the port<=>CPU binding of event
231 * channel handlers.
232 *
233 * \note  This operation does not guarantee that event delivery
234 *        is enabled for this event channel port.  The port must
235 *        also be globally enabled.  See evtchn_unmask().
236 */
237static inline void
238evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port)
239{
240	struct xen_intr_pcpu_data *pcpu;
241
242	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
243	xen_set_bit(port, pcpu->evtchn_enabled);
244}
245
246/**
247 * Allocate and register a per-cpu Xen upcall interrupt counter.
248 *
249 * \param cpu  The cpu for which to register this interrupt count.
250 */
251static void
252xen_intr_intrcnt_add(u_int cpu)
253{
254	char buf[MAXCOMLEN + 1];
255	struct xen_intr_pcpu_data *pcpu;
256
257	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
258	if (pcpu->evtchn_intrcnt != NULL)
259		return;
260
261	snprintf(buf, sizeof(buf), "cpu%d:xen", cpu);
262	intrcnt_add(buf, &pcpu->evtchn_intrcnt);
263}
264
265/**
266 * Search for an already allocated but currently unused Xen interrupt
267 * source object.
268 *
269 * \param type  Restrict the search to interrupt sources of the given
270 *              type.
271 *
272 * \return  A pointer to a free Xen interrupt source object or NULL.
273 */
274static struct xenisrc *
275xen_intr_find_unused_isrc(enum evtchn_type type)
276{
277	int isrc_idx;
278
279	KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held"));
280
281	for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx ++) {
282		struct xenisrc *isrc;
283		u_int vector;
284
285		vector = first_evtchn_irq + isrc_idx;
286		isrc = (struct xenisrc *)intr_lookup_source(vector);
287		if (isrc != NULL
288		 && isrc->xi_type == EVTCHN_TYPE_UNBOUND) {
289			KASSERT(isrc->xi_intsrc.is_handlers == 0,
290			    ("Free evtchn still has handlers"));
291			isrc->xi_type = type;
292			return (isrc);
293		}
294	}
295	return (NULL);
296}
297
298/**
299 * Allocate a Xen interrupt source object.
300 *
301 * \param type  The type of interrupt source to create.
302 *
303 * \return  A pointer to a newly allocated Xen interrupt source
304 *          object or NULL.
305 */
306static struct xenisrc *
307xen_intr_alloc_isrc(enum evtchn_type type, int vector)
308{
309	static int warned;
310	struct xenisrc *isrc;
311
312	KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held"));
313
314	if (xen_intr_auto_vector_count > NR_EVENT_CHANNELS) {
315		if (!warned) {
316			warned = 1;
317			printf("xen_intr_alloc: Event channels exhausted.\n");
318		}
319		return (NULL);
320	}
321
322	if (type != EVTCHN_TYPE_PIRQ) {
323		vector = first_evtchn_irq + xen_intr_auto_vector_count;
324		xen_intr_auto_vector_count++;
325	}
326
327	KASSERT((intr_lookup_source(vector) == NULL),
328	    ("Trying to use an already allocated vector"));
329
330	mtx_unlock(&xen_intr_isrc_lock);
331	isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO);
332	isrc->xi_intsrc.is_pic =
333	    (type == EVTCHN_TYPE_PIRQ) ? &xen_intr_pirq_pic : &xen_intr_pic;
334	isrc->xi_vector = vector;
335	isrc->xi_type = type;
336	intr_register_source(&isrc->xi_intsrc);
337	mtx_lock(&xen_intr_isrc_lock);
338
339	return (isrc);
340}
341
342/**
343 * Attempt to free an active Xen interrupt source object.
344 *
345 * \param isrc  The interrupt source object to release.
346 *
347 * \returns  EBUSY if the source is still in use, otherwise 0.
348 */
349static int
350xen_intr_release_isrc(struct xenisrc *isrc)
351{
352
353	mtx_lock(&xen_intr_isrc_lock);
354	KASSERT(isrc->xi_intsrc.is_handlers == 0,
355	    ("Release called, but xenisrc still in use"));
356	evtchn_mask_port(isrc->xi_port);
357	evtchn_clear_port(isrc->xi_port);
358
359	/* Rebind port to CPU 0. */
360	evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
361	evtchn_cpu_unmask_port(0, isrc->xi_port);
362
363	if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) {
364		struct evtchn_close close = { .port = isrc->xi_port };
365		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
366			panic("EVTCHNOP_close failed");
367	}
368
369	xen_intr_port_to_isrc[isrc->xi_port] = NULL;
370	isrc->xi_cpu = 0;
371	isrc->xi_type = EVTCHN_TYPE_UNBOUND;
372	isrc->xi_port = 0;
373	isrc->xi_cookie = NULL;
374	mtx_unlock(&xen_intr_isrc_lock);
375	return (0);
376}
377
378/**
379 * Associate an interrupt handler with an already allocated local Xen
380 * event channel port.
381 *
382 * \param isrcp       The returned Xen interrupt object associated with
383 *                    the specified local port.
384 * \param local_port  The event channel to bind.
385 * \param type        The event channel type of local_port.
386 * \param intr_owner  The device making this bind request.
387 * \param filter      An interrupt filter handler.  Specify NULL
388 *                    to always dispatch to the ithread handler.
389 * \param handler     An interrupt ithread handler.  Optional (can
390 *                    specify NULL) if all necessary event actions
391 *                    are performed by filter.
392 * \param arg         Argument to present to both filter and handler.
393 * \param irqflags    Interrupt handler flags.  See sys/bus.h.
394 * \param handlep     Pointer to an opaque handle used to manage this
395 *                    registration.
396 *
397 * \returns  0 on success, otherwise an errno.
398 */
399static int
400xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port,
401    enum evtchn_type type, const char *intr_owner, driver_filter_t filter,
402    driver_intr_t handler, void *arg, enum intr_type flags,
403    xen_intr_handle_t *port_handlep)
404{
405	struct xenisrc *isrc;
406	int error;
407
408	*isrcp = NULL;
409	if (port_handlep == NULL) {
410		printf("%s: xen_intr_bind_isrc: Bad event handle\n",
411		    intr_owner);
412		return (EINVAL);
413	}
414
415	mtx_lock(&xen_intr_isrc_lock);
416	isrc = xen_intr_find_unused_isrc(type);
417	if (isrc == NULL) {
418		isrc = xen_intr_alloc_isrc(type, XEN_ALLOCATE_VECTOR);
419		if (isrc == NULL) {
420			mtx_unlock(&xen_intr_isrc_lock);
421			return (ENOSPC);
422		}
423	}
424	isrc->xi_port = local_port;
425	xen_intr_port_to_isrc[local_port] = isrc;
426	refcount_init(&isrc->xi_refcount, 1);
427	mtx_unlock(&xen_intr_isrc_lock);
428
429	/* Assign the opaque handler (the event channel port) */
430	*port_handlep = &isrc->xi_vector;
431
432#ifdef SMP
433	if (type == EVTCHN_TYPE_PORT) {
434		/*
435		 * By default all interrupts are assigned to vCPU#0
436		 * unless specified otherwise, so shuffle them to balance
437		 * the interrupt load.
438		 */
439		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0));
440	}
441#endif
442
443	if (filter == NULL && handler == NULL) {
444		/*
445		 * No filter/handler provided, leave the event channel
446		 * masked and without a valid handler, the caller is
447		 * in charge of setting that up.
448		 */
449		*isrcp = isrc;
450		return (0);
451	}
452
453	error = xen_intr_add_handler(intr_owner, filter, handler, arg, flags,
454	    *port_handlep);
455	if (error != 0) {
456		xen_intr_release_isrc(isrc);
457		return (error);
458	}
459	*isrcp = isrc;
460	return (0);
461}
462
463/**
464 * Lookup a Xen interrupt source object given an interrupt binding handle.
465 *
466 * \param handle  A handle initialized by a previous call to
467 *                xen_intr_bind_isrc().
468 *
469 * \returns  A pointer to the Xen interrupt source object associated
470 *           with the given interrupt handle.  NULL if no association
471 *           currently exists.
472 */
473static struct xenisrc *
474xen_intr_isrc(xen_intr_handle_t handle)
475{
476	int vector;
477
478	if (handle == NULL)
479		return (NULL);
480
481	vector = *(int *)handle;
482	KASSERT(vector >= first_evtchn_irq &&
483	    vector < (first_evtchn_irq + xen_intr_auto_vector_count),
484	    ("Xen interrupt vector is out of range"));
485
486	return ((struct xenisrc *)intr_lookup_source(vector));
487}
488
489/**
490 * Determine the event channel ports at the given section of the
491 * event port bitmap which have pending events for the given cpu.
492 *
493 * \param pcpu  The Xen interrupt pcpu data for the cpu being querried.
494 * \param sh    The Xen shared info area.
495 * \param idx   The index of the section of the event channel bitmap to
496 *              inspect.
497 *
498 * \returns  A u_long with bits set for every event channel with pending
499 *           events.
500 */
501static inline u_long
502xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh,
503    u_int idx)
504{
505
506	CTASSERT(sizeof(sh->evtchn_mask[0]) == sizeof(sh->evtchn_pending[0]));
507	CTASSERT(sizeof(sh->evtchn_mask[0]) == sizeof(pcpu->evtchn_enabled[0]));
508	CTASSERT(sizeof(sh->evtchn_mask) == sizeof(sh->evtchn_pending));
509	CTASSERT(sizeof(sh->evtchn_mask) == sizeof(pcpu->evtchn_enabled));
510	return (sh->evtchn_pending[idx]
511	      & ~sh->evtchn_mask[idx]
512	      & pcpu->evtchn_enabled[idx]);
513}
514
515/**
516 * Interrupt handler for processing all Xen event channel events.
517 *
518 * \param trap_frame  The trap frame context for the current interrupt.
519 */
520void
521xen_intr_handle_upcall(struct trapframe *trap_frame)
522{
523	u_int l1i, l2i, port, cpu;
524	u_long masked_l1, masked_l2;
525	struct xenisrc *isrc;
526	shared_info_t *s;
527	vcpu_info_t *v;
528	struct xen_intr_pcpu_data *pc;
529	u_long l1, l2;
530
531	/*
532	 * Disable preemption in order to always check and fire events
533	 * on the right vCPU
534	 */
535	critical_enter();
536
537	cpu = PCPU_GET(cpuid);
538	pc  = DPCPU_PTR(xen_intr_pcpu);
539	s   = HYPERVISOR_shared_info;
540	v   = DPCPU_GET(vcpu_info);
541
542	if (xen_hvm_domain() && !xen_vector_callback_enabled) {
543		KASSERT((cpu == 0), ("Fired PCI event callback on wrong CPU"));
544	}
545
546	v->evtchn_upcall_pending = 0;
547
548#if 0
549#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
550	/* Clear master flag /before/ clearing selector flag. */
551	wmb();
552#endif
553#endif
554
555	l1 = atomic_readandclear_long(&v->evtchn_pending_sel);
556
557	l1i = pc->last_processed_l1i;
558	l2i = pc->last_processed_l2i;
559	(*pc->evtchn_intrcnt)++;
560
561	while (l1 != 0) {
562
563		l1i = (l1i + 1) % LONG_BIT;
564		masked_l1 = l1 & ((~0UL) << l1i);
565
566		if (masked_l1 == 0) {
567			/*
568			 * if we masked out all events, wrap around
569			 * to the beginning.
570			 */
571			l1i = LONG_BIT - 1;
572			l2i = LONG_BIT - 1;
573			continue;
574		}
575		l1i = ffsl(masked_l1) - 1;
576
577		do {
578			l2 = xen_intr_active_ports(pc, s, l1i);
579
580			l2i = (l2i + 1) % LONG_BIT;
581			masked_l2 = l2 & ((~0UL) << l2i);
582
583			if (masked_l2 == 0) {
584				/* if we masked out all events, move on */
585				l2i = LONG_BIT - 1;
586				break;
587			}
588			l2i = ffsl(masked_l2) - 1;
589
590			/* process port */
591			port = (l1i * LONG_BIT) + l2i;
592			synch_clear_bit(port, &s->evtchn_pending[0]);
593
594			isrc = xen_intr_port_to_isrc[port];
595			if (__predict_false(isrc == NULL))
596				continue;
597
598			/* Make sure we are firing on the right vCPU */
599			KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)),
600				("Received unexpected event on vCPU#%d, event bound to vCPU#%d",
601				PCPU_GET(cpuid), isrc->xi_cpu));
602
603			intr_execute_handlers(&isrc->xi_intsrc, trap_frame);
604
605			/*
606			 * If this is the final port processed,
607			 * we'll pick up here+1 next time.
608			 */
609			pc->last_processed_l1i = l1i;
610			pc->last_processed_l2i = l2i;
611
612		} while (l2i != LONG_BIT - 1);
613
614		l2 = xen_intr_active_ports(pc, s, l1i);
615		if (l2 == 0) {
616			/*
617			 * We handled all ports, so we can clear the
618			 * selector bit.
619			 */
620			l1 &= ~(1UL << l1i);
621		}
622	}
623	critical_exit();
624}
625
626static int
627xen_intr_init(void *dummy __unused)
628{
629	shared_info_t *s = HYPERVISOR_shared_info;
630	struct xen_intr_pcpu_data *pcpu;
631	struct physdev_pirq_eoi_gmfn eoi_gmfn;
632	int i, rc;
633
634	if (!xen_domain())
635		return (0);
636
637	mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
638
639	/*
640	 * Set the per-cpu mask of CPU#0 to enable all, since by default all
641	 * event channels are bound to CPU#0.
642	 */
643	CPU_FOREACH(i) {
644		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
645		memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0,
646		    sizeof(pcpu->evtchn_enabled));
647	}
648
649	for (i = 0; i < nitems(s->evtchn_mask); i++)
650		atomic_store_rel_long(&s->evtchn_mask[i], ~0);
651
652	/* Try to register PIRQ EOI map */
653	xen_intr_pirq_eoi_map = malloc(PAGE_SIZE, M_XENINTR, M_WAITOK | M_ZERO);
654	eoi_gmfn.gmfn = atop(vtophys(xen_intr_pirq_eoi_map));
655	rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
656	if (rc != 0 && bootverbose)
657		printf("Xen interrupts: unable to register PIRQ EOI map\n");
658	else
659		xen_intr_pirq_eoi_map_enabled = true;
660
661	intr_register_pic(&xen_intr_pic);
662	if (xen_pv_domain() && xen_initial_domain())
663		intr_register_pic(&xen_intr_pirq_pic);
664
665	if (bootverbose)
666		printf("Xen interrupt system initialized\n");
667
668	return (0);
669}
670SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL);
671
672static void
673xen_intrcnt_init(void *dummy __unused)
674{
675	unsigned int i;
676
677	if (!xen_domain())
678		return;
679
680	/*
681	 * Register interrupt count manually as we aren't guaranteed to see a
682	 * call to xen_intr_assign_cpu() before our first interrupt.
683	 */
684	CPU_FOREACH(i)
685		xen_intr_intrcnt_add(i);
686}
687SYSINIT(xen_intrcnt_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intrcnt_init, NULL);
688
689void
690xen_intr_alloc_irqs(void)
691{
692
693	if (num_io_irqs > UINT_MAX - NR_EVENT_CHANNELS)
694		panic("IRQ allocation overflow (num_msi_irqs too high?)");
695	first_evtchn_irq = num_io_irqs;
696	num_io_irqs += NR_EVENT_CHANNELS;
697}
698
699/*--------------------------- Common PIC Functions ---------------------------*/
700/**
701 * Prepare this PIC for system suspension.
702 */
703static void
704xen_intr_suspend(struct pic *unused)
705{
706}
707
708static void
709xen_rebind_ipi(struct xenisrc *isrc)
710{
711#ifdef SMP
712	int cpu = isrc->xi_cpu;
713	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
714	int error;
715	struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id };
716
717	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
718	                                    &bind_ipi);
719	if (error != 0)
720		panic("unable to rebind xen IPI: %d", error);
721
722	isrc->xi_port = bind_ipi.port;
723	isrc->xi_cpu = 0;
724	xen_intr_port_to_isrc[bind_ipi.port] = isrc;
725
726	error = xen_intr_assign_cpu(&isrc->xi_intsrc,
727	                            cpu_apic_ids[cpu]);
728	if (error)
729		panic("unable to bind xen IPI to CPU#%d: %d",
730		      cpu, error);
731
732	evtchn_unmask_port(bind_ipi.port);
733#else
734	panic("Resume IPI event channel on UP");
735#endif
736}
737
738static void
739xen_rebind_virq(struct xenisrc *isrc)
740{
741	int cpu = isrc->xi_cpu;
742	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
743	int error;
744	struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
745	                                      .vcpu = vcpu_id };
746
747	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
748	                                    &bind_virq);
749	if (error != 0)
750		panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error);
751
752	isrc->xi_port = bind_virq.port;
753	isrc->xi_cpu = 0;
754	xen_intr_port_to_isrc[bind_virq.port] = isrc;
755
756#ifdef SMP
757	error = xen_intr_assign_cpu(&isrc->xi_intsrc,
758	                            cpu_apic_ids[cpu]);
759	if (error)
760		panic("unable to bind xen VIRQ#%d to CPU#%d: %d",
761		      isrc->xi_virq, cpu, error);
762#endif
763
764	evtchn_unmask_port(bind_virq.port);
765}
766
767/**
768 * Return this PIC to service after being suspended.
769 */
770static void
771xen_intr_resume(struct pic *unused, bool suspend_cancelled)
772{
773	shared_info_t *s = HYPERVISOR_shared_info;
774	struct xenisrc *isrc;
775	u_int isrc_idx;
776	int i;
777
778	if (suspend_cancelled)
779		return;
780
781	/* Reset the per-CPU masks */
782	CPU_FOREACH(i) {
783		struct xen_intr_pcpu_data *pcpu;
784
785		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
786		memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0,
787		    sizeof(pcpu->evtchn_enabled));
788	}
789
790	/* Mask all event channels. */
791	for (i = 0; i < nitems(s->evtchn_mask); i++)
792		atomic_store_rel_long(&s->evtchn_mask[i], ~0);
793
794	/* Remove port -> isrc mappings */
795	memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc));
796
797	/* Free unused isrcs and rebind VIRQs and IPIs */
798	for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx++) {
799		u_int vector;
800
801		vector = first_evtchn_irq + isrc_idx;
802		isrc = (struct xenisrc *)intr_lookup_source(vector);
803		if (isrc != NULL) {
804			isrc->xi_port = 0;
805			switch (isrc->xi_type) {
806			case EVTCHN_TYPE_IPI:
807				xen_rebind_ipi(isrc);
808				break;
809			case EVTCHN_TYPE_VIRQ:
810				xen_rebind_virq(isrc);
811				break;
812			default:
813				break;
814			}
815		}
816	}
817}
818
819/**
820 * Disable a Xen interrupt source.
821 *
822 * \param isrc  The interrupt source to disable.
823 */
824static void
825xen_intr_disable_intr(struct intsrc *base_isrc)
826{
827	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
828
829	evtchn_mask_port(isrc->xi_port);
830}
831
832/**
833 * Determine the global interrupt vector number for
834 * a Xen interrupt source.
835 *
836 * \param isrc  The interrupt source to query.
837 *
838 * \return  The vector number corresponding to the given interrupt source.
839 */
840static int
841xen_intr_vector(struct intsrc *base_isrc)
842{
843	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
844
845	return (isrc->xi_vector);
846}
847
848/**
849 * Determine whether or not interrupt events are pending on the
850 * the given interrupt source.
851 *
852 * \param isrc  The interrupt source to query.
853 *
854 * \returns  0 if no events are pending, otherwise non-zero.
855 */
856static int
857xen_intr_source_pending(struct intsrc *isrc)
858{
859	/*
860	 * EventChannels are edge triggered and never masked.
861	 * There can be no pending events.
862	 */
863	return (0);
864}
865
866/**
867 * Perform configuration of an interrupt source.
868 *
869 * \param isrc  The interrupt source to configure.
870 * \param trig  Edge or level.
871 * \param pol   Active high or low.
872 *
873 * \returns  0 if no events are pending, otherwise non-zero.
874 */
875static int
876xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig,
877    enum intr_polarity pol)
878{
879	/* Configuration is only possible via the evtchn apis. */
880	return (ENODEV);
881}
882
883/**
884 * Configure CPU affinity for interrupt source event delivery.
885 *
886 * \param isrc     The interrupt source to configure.
887 * \param apic_id  The apic id of the CPU for handling future events.
888 *
889 * \returns  0 if successful, otherwise an errno.
890 */
891static int
892xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
893{
894#ifdef SMP
895	struct evtchn_bind_vcpu bind_vcpu;
896	struct xenisrc *isrc;
897	u_int to_cpu, vcpu_id;
898	int error, masked;
899
900	if (xen_vector_callback_enabled == 0)
901		return (EOPNOTSUPP);
902
903	to_cpu = apic_cpuid(apic_id);
904	vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id;
905
906	mtx_lock(&xen_intr_isrc_lock);
907	isrc = (struct xenisrc *)base_isrc;
908	if (!is_valid_evtchn(isrc->xi_port)) {
909		mtx_unlock(&xen_intr_isrc_lock);
910		return (EINVAL);
911	}
912
913	/*
914	 * Mask the event channel while binding it to prevent interrupt
915	 * delivery with an inconsistent state in isrc->xi_cpu.
916	 */
917	masked = evtchn_test_and_set_mask(isrc->xi_port);
918	if ((isrc->xi_type == EVTCHN_TYPE_VIRQ) ||
919		(isrc->xi_type == EVTCHN_TYPE_IPI)) {
920		/*
921		 * Virtual IRQs are associated with a cpu by
922		 * the Hypervisor at evtchn_bind_virq time, so
923		 * all we need to do is update the per-CPU masks.
924		 */
925		evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
926		isrc->xi_cpu = to_cpu;
927		evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port);
928		goto out;
929	}
930
931	bind_vcpu.port = isrc->xi_port;
932	bind_vcpu.vcpu = vcpu_id;
933
934	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu);
935	if (isrc->xi_cpu != to_cpu) {
936		if (error == 0) {
937			/* Commit to new binding by removing the old one. */
938			evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
939			isrc->xi_cpu = to_cpu;
940			evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port);
941		}
942	}
943
944out:
945	if (masked == 0)
946		evtchn_unmask_port(isrc->xi_port);
947	mtx_unlock(&xen_intr_isrc_lock);
948	return (0);
949#else
950	return (EOPNOTSUPP);
951#endif
952}
953
954/*------------------- Virtual Interrupt Source PIC Functions -----------------*/
955/*
956 * Mask a level triggered interrupt source.
957 *
958 * \param isrc  The interrupt source to mask (if necessary).
959 * \param eoi   If non-zero, perform any necessary end-of-interrupt
960 *              acknowledgements.
961 */
962static void
963xen_intr_disable_source(struct intsrc *base_isrc, int eoi)
964{
965	struct xenisrc *isrc;
966
967	isrc = (struct xenisrc *)base_isrc;
968
969	/*
970	 * NB: checking if the event channel is already masked is
971	 * needed because the event channel user-space device
972	 * masks event channels on its filter as part of its
973	 * normal operation, and those shouldn't be automatically
974	 * unmasked by the generic interrupt code. The event channel
975	 * device will unmask them when needed.
976	 */
977	isrc->xi_masked = !!evtchn_test_and_set_mask(isrc->xi_port);
978}
979
980/*
981 * Unmask a level triggered interrupt source.
982 *
983 * \param isrc  The interrupt source to unmask (if necessary).
984 */
985static void
986xen_intr_enable_source(struct intsrc *base_isrc)
987{
988	struct xenisrc *isrc;
989
990	isrc = (struct xenisrc *)base_isrc;
991
992	if (isrc->xi_masked == 0)
993		evtchn_unmask_port(isrc->xi_port);
994}
995
996/*
997 * Perform any necessary end-of-interrupt acknowledgements.
998 *
999 * \param isrc  The interrupt source to EOI.
1000 */
1001static void
1002xen_intr_eoi_source(struct intsrc *base_isrc)
1003{
1004}
1005
1006/*
1007 * Enable and unmask the interrupt source.
1008 *
1009 * \param isrc  The interrupt source to enable.
1010 */
1011static void
1012xen_intr_enable_intr(struct intsrc *base_isrc)
1013{
1014	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
1015
1016	evtchn_unmask_port(isrc->xi_port);
1017}
1018
1019/*------------------ Physical Interrupt Source PIC Functions -----------------*/
1020/*
1021 * Mask a level triggered interrupt source.
1022 *
1023 * \param isrc  The interrupt source to mask (if necessary).
1024 * \param eoi   If non-zero, perform any necessary end-of-interrupt
1025 *              acknowledgements.
1026 */
1027static void
1028xen_intr_pirq_disable_source(struct intsrc *base_isrc, int eoi)
1029{
1030	struct xenisrc *isrc;
1031
1032	isrc = (struct xenisrc *)base_isrc;
1033
1034	if (isrc->xi_edgetrigger == 0)
1035		evtchn_mask_port(isrc->xi_port);
1036	if (eoi == PIC_EOI)
1037		xen_intr_pirq_eoi_source(base_isrc);
1038}
1039
1040/*
1041 * Unmask a level triggered interrupt source.
1042 *
1043 * \param isrc  The interrupt source to unmask (if necessary).
1044 */
1045static void
1046xen_intr_pirq_enable_source(struct intsrc *base_isrc)
1047{
1048	struct xenisrc *isrc;
1049
1050	isrc = (struct xenisrc *)base_isrc;
1051
1052	if (isrc->xi_edgetrigger == 0)
1053		evtchn_unmask_port(isrc->xi_port);
1054}
1055
1056/*
1057 * Perform any necessary end-of-interrupt acknowledgements.
1058 *
1059 * \param isrc  The interrupt source to EOI.
1060 */
1061static void
1062xen_intr_pirq_eoi_source(struct intsrc *base_isrc)
1063{
1064	struct xenisrc *isrc;
1065	int error;
1066
1067	isrc = (struct xenisrc *)base_isrc;
1068
1069	if (xen_test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map)) {
1070		struct physdev_eoi eoi = { .irq = isrc->xi_pirq };
1071
1072		error = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
1073		if (error != 0)
1074			panic("Unable to EOI PIRQ#%d: %d\n",
1075			    isrc->xi_pirq, error);
1076	}
1077}
1078
1079/*
1080 * Enable and unmask the interrupt source.
1081 *
1082 * \param isrc  The interrupt source to enable.
1083 */
1084static void
1085xen_intr_pirq_enable_intr(struct intsrc *base_isrc)
1086{
1087	struct xenisrc *isrc;
1088	struct evtchn_bind_pirq bind_pirq;
1089	struct physdev_irq_status_query irq_status;
1090	int error;
1091
1092	isrc = (struct xenisrc *)base_isrc;
1093
1094	if (!xen_intr_pirq_eoi_map_enabled) {
1095		irq_status.irq = isrc->xi_pirq;
1096		error = HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query,
1097		    &irq_status);
1098		if (error)
1099			panic("unable to get status of IRQ#%d", isrc->xi_pirq);
1100
1101		if (irq_status.flags & XENIRQSTAT_needs_eoi) {
1102			/*
1103			 * Since the dynamic PIRQ EOI map is not available
1104			 * mark the PIRQ as needing EOI unconditionally.
1105			 */
1106			xen_set_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map);
1107		}
1108	}
1109
1110	bind_pirq.pirq = isrc->xi_pirq;
1111	bind_pirq.flags = isrc->xi_edgetrigger ? 0 : BIND_PIRQ__WILL_SHARE;
1112	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
1113	if (error)
1114		panic("unable to bind IRQ#%d", isrc->xi_pirq);
1115
1116	isrc->xi_port = bind_pirq.port;
1117
1118	mtx_lock(&xen_intr_isrc_lock);
1119	KASSERT((xen_intr_port_to_isrc[bind_pirq.port] == NULL),
1120	    ("trying to override an already setup event channel port"));
1121	xen_intr_port_to_isrc[bind_pirq.port] = isrc;
1122	mtx_unlock(&xen_intr_isrc_lock);
1123
1124	evtchn_unmask_port(isrc->xi_port);
1125}
1126
1127/*
1128 * Disable an interrupt source.
1129 *
1130 * \param isrc  The interrupt source to disable.
1131 */
1132static void
1133xen_intr_pirq_disable_intr(struct intsrc *base_isrc)
1134{
1135	struct xenisrc *isrc;
1136	struct evtchn_close close;
1137	int error;
1138
1139	isrc = (struct xenisrc *)base_isrc;
1140
1141	evtchn_mask_port(isrc->xi_port);
1142
1143	close.port = isrc->xi_port;
1144	error = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
1145	if (error)
1146		panic("unable to close event channel %d IRQ#%d",
1147		    isrc->xi_port, isrc->xi_pirq);
1148
1149	mtx_lock(&xen_intr_isrc_lock);
1150	xen_intr_port_to_isrc[isrc->xi_port] = NULL;
1151	mtx_unlock(&xen_intr_isrc_lock);
1152
1153	isrc->xi_port = 0;
1154}
1155
1156/**
1157 * Perform configuration of an interrupt source.
1158 *
1159 * \param isrc  The interrupt source to configure.
1160 * \param trig  Edge or level.
1161 * \param pol   Active high or low.
1162 *
1163 * \returns  0 if no events are pending, otherwise non-zero.
1164 */
1165static int
1166xen_intr_pirq_config_intr(struct intsrc *base_isrc, enum intr_trigger trig,
1167    enum intr_polarity pol)
1168{
1169	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
1170	struct physdev_setup_gsi setup_gsi;
1171	int error;
1172
1173	KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
1174	    ("%s: Conforming trigger or polarity\n", __func__));
1175
1176	setup_gsi.gsi = isrc->xi_pirq;
1177	setup_gsi.triggering = trig == INTR_TRIGGER_EDGE ? 0 : 1;
1178	setup_gsi.polarity = pol == INTR_POLARITY_HIGH ? 0 : 1;
1179
1180	error = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
1181	if (error == -XEN_EEXIST) {
1182		if ((isrc->xi_edgetrigger && (trig != INTR_TRIGGER_EDGE)) ||
1183		    (isrc->xi_activehi && (pol != INTR_POLARITY_HIGH)))
1184			panic("unable to reconfigure interrupt IRQ#%d",
1185			    isrc->xi_pirq);
1186		error = 0;
1187	}
1188	if (error)
1189		panic("unable to configure IRQ#%d\n", isrc->xi_pirq);
1190
1191	isrc->xi_activehi = pol == INTR_POLARITY_HIGH ? 1 : 0;
1192	isrc->xi_edgetrigger = trig == INTR_TRIGGER_EDGE ? 1 : 0;
1193
1194	return (0);
1195}
1196
1197/*--------------------------- Public Functions -------------------------------*/
1198/*------- API comments for these methods can be found in xen/xenintr.h -------*/
1199int
1200xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port,
1201    driver_filter_t filter, driver_intr_t handler, void *arg,
1202    enum intr_type flags, xen_intr_handle_t *port_handlep)
1203{
1204	struct xenisrc *isrc;
1205	int error;
1206
1207	error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT,
1208	    device_get_nameunit(dev), filter, handler, arg, flags,
1209	    port_handlep);
1210	if (error != 0)
1211		return (error);
1212
1213	/*
1214	 * The Event Channel API didn't open this port, so it is not
1215	 * responsible for closing it automatically on unbind.
1216	 */
1217	isrc->xi_close = 0;
1218	return (0);
1219}
1220
1221int
1222xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain,
1223    driver_filter_t filter, driver_intr_t handler, void *arg,
1224    enum intr_type flags, xen_intr_handle_t *port_handlep)
1225{
1226	struct xenisrc *isrc;
1227	struct evtchn_alloc_unbound alloc_unbound;
1228	int error;
1229
1230	alloc_unbound.dom        = DOMID_SELF;
1231	alloc_unbound.remote_dom = remote_domain;
1232	error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
1233		    &alloc_unbound);
1234	if (error != 0) {
1235		/*
1236		 * XXX Trap Hypercall error code Linuxisms in
1237		 *     the HYPERCALL layer.
1238		 */
1239		return (-error);
1240	}
1241
1242	error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT,
1243	    device_get_nameunit(dev), filter, handler, arg, flags,
1244	    port_handlep);
1245	if (error != 0) {
1246		evtchn_close_t close = { .port = alloc_unbound.port };
1247		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
1248			panic("EVTCHNOP_close failed");
1249		return (error);
1250	}
1251
1252	isrc->xi_close = 1;
1253	return (0);
1254}
1255
1256int
1257xen_intr_bind_remote_port(device_t dev, u_int remote_domain,
1258    u_int remote_port, driver_filter_t filter, driver_intr_t handler,
1259    void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep)
1260{
1261	struct xenisrc *isrc;
1262	struct evtchn_bind_interdomain bind_interdomain;
1263	int error;
1264
1265	bind_interdomain.remote_dom  = remote_domain;
1266	bind_interdomain.remote_port = remote_port;
1267	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1268					    &bind_interdomain);
1269	if (error != 0) {
1270		/*
1271		 * XXX Trap Hypercall error code Linuxisms in
1272		 *     the HYPERCALL layer.
1273		 */
1274		return (-error);
1275	}
1276
1277	error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port,
1278	    EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg,
1279	    flags, port_handlep);
1280	if (error) {
1281		evtchn_close_t close = { .port = bind_interdomain.local_port };
1282		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
1283			panic("EVTCHNOP_close failed");
1284		return (error);
1285	}
1286
1287	/*
1288	 * The Event Channel API opened this port, so it is
1289	 * responsible for closing it automatically on unbind.
1290	 */
1291	isrc->xi_close = 1;
1292	return (0);
1293}
1294
1295int
1296xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
1297    driver_filter_t filter, driver_intr_t handler, void *arg,
1298    enum intr_type flags, xen_intr_handle_t *port_handlep)
1299{
1300	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
1301	struct xenisrc *isrc;
1302	struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id };
1303	int error;
1304
1305	isrc = NULL;
1306	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
1307	if (error != 0) {
1308		/*
1309		 * XXX Trap Hypercall error code Linuxisms in
1310		 *     the HYPERCALL layer.
1311		 */
1312		return (-error);
1313	}
1314
1315	error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ,
1316	    device_get_nameunit(dev), filter, handler, arg, flags,
1317	    port_handlep);
1318
1319#ifdef SMP
1320	if (error == 0)
1321		error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
1322#endif
1323
1324	if (error != 0) {
1325		evtchn_close_t close = { .port = bind_virq.port };
1326
1327		xen_intr_unbind(*port_handlep);
1328		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
1329			panic("EVTCHNOP_close failed");
1330		return (error);
1331	}
1332
1333#ifdef SMP
1334	if (isrc->xi_cpu != cpu) {
1335		/*
1336		 * Too early in the boot process for the generic interrupt
1337		 * code to perform the binding.  Update our event channel
1338		 * masks manually so events can't fire on the wrong cpu
1339		 * during AP startup.
1340		 */
1341		xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
1342	}
1343#endif
1344
1345	/*
1346	 * The Event Channel API opened this port, so it is
1347	 * responsible for closing it automatically on unbind.
1348	 */
1349	isrc->xi_close = 1;
1350	isrc->xi_virq = virq;
1351
1352	return (0);
1353}
1354
1355int
1356xen_intr_alloc_and_bind_ipi(u_int cpu, driver_filter_t filter,
1357    enum intr_type flags, xen_intr_handle_t *port_handlep)
1358{
1359#ifdef SMP
1360	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
1361	struct xenisrc *isrc;
1362	struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id };
1363	/* Same size as the one used by intr_handler->ih_name. */
1364	char name[MAXCOMLEN + 1];
1365	int error;
1366
1367	isrc = NULL;
1368	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
1369	if (error != 0) {
1370		/*
1371		 * XXX Trap Hypercall error code Linuxisms in
1372		 *     the HYPERCALL layer.
1373		 */
1374		return (-error);
1375	}
1376
1377	snprintf(name, sizeof(name), "cpu%u", cpu);
1378
1379	error = xen_intr_bind_isrc(&isrc, bind_ipi.port, EVTCHN_TYPE_IPI,
1380	    name, filter, NULL, NULL, flags, port_handlep);
1381	if (error != 0) {
1382		evtchn_close_t close = { .port = bind_ipi.port };
1383
1384		xen_intr_unbind(*port_handlep);
1385		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
1386			panic("EVTCHNOP_close failed");
1387		return (error);
1388	}
1389
1390	if (isrc->xi_cpu != cpu) {
1391		/*
1392		 * Too early in the boot process for the generic interrupt
1393		 * code to perform the binding.  Update our event channel
1394		 * masks manually so events can't fire on the wrong cpu
1395		 * during AP startup.
1396		 */
1397		xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
1398	}
1399
1400	/*
1401	 * The Event Channel API opened this port, so it is
1402	 * responsible for closing it automatically on unbind.
1403	 */
1404	isrc->xi_close = 1;
1405	return (0);
1406#else
1407	return (EOPNOTSUPP);
1408#endif
1409}
1410
1411int
1412xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol)
1413{
1414	struct physdev_map_pirq map_pirq;
1415	struct xenisrc *isrc;
1416	int error;
1417
1418	if (vector == 0)
1419		return (EINVAL);
1420
1421	if (bootverbose)
1422		printf("xen: register IRQ#%d\n", vector);
1423
1424	map_pirq.domid = DOMID_SELF;
1425	map_pirq.type = MAP_PIRQ_TYPE_GSI;
1426	map_pirq.index = vector;
1427	map_pirq.pirq = vector;
1428
1429	error = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_pirq);
1430	if (error) {
1431		printf("xen: unable to map IRQ#%d\n", vector);
1432		return (error);
1433	}
1434
1435	mtx_lock(&xen_intr_isrc_lock);
1436	isrc = xen_intr_alloc_isrc(EVTCHN_TYPE_PIRQ, vector);
1437	mtx_unlock(&xen_intr_isrc_lock);
1438	KASSERT((isrc != NULL), ("xen: unable to allocate isrc for interrupt"));
1439	isrc->xi_pirq = vector;
1440	isrc->xi_activehi = pol == INTR_POLARITY_HIGH ? 1 : 0;
1441	isrc->xi_edgetrigger = trig == INTR_TRIGGER_EDGE ? 1 : 0;
1442
1443	return (0);
1444}
1445
1446int
1447xen_register_msi(device_t dev, int vector, int count)
1448{
1449	struct physdev_map_pirq msi_irq;
1450	struct xenisrc *isrc;
1451	int ret;
1452
1453	memset(&msi_irq, 0, sizeof(msi_irq));
1454	msi_irq.domid = DOMID_SELF;
1455	msi_irq.type = count == 1 ?
1456	    MAP_PIRQ_TYPE_MSI_SEG : MAP_PIRQ_TYPE_MULTI_MSI;
1457	msi_irq.index = -1;
1458	msi_irq.pirq = -1;
1459	msi_irq.bus = pci_get_bus(dev) | (pci_get_domain(dev) << 16);
1460	msi_irq.devfn = (pci_get_slot(dev) << 3) | pci_get_function(dev);
1461	msi_irq.entry_nr = count;
1462
1463	ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &msi_irq);
1464	if (ret != 0)
1465		return (ret);
1466	if (count != msi_irq.entry_nr) {
1467		panic("unable to setup all requested MSI vectors "
1468		    "(expected %d got %d)", count, msi_irq.entry_nr);
1469	}
1470
1471	mtx_lock(&xen_intr_isrc_lock);
1472	for (int i = 0; i < count; i++) {
1473		isrc = xen_intr_alloc_isrc(EVTCHN_TYPE_PIRQ, vector + i);
1474		KASSERT(isrc != NULL,
1475		    ("xen: unable to allocate isrc for interrupt"));
1476		isrc->xi_pirq = msi_irq.pirq + i;
1477		/* MSI interrupts are always edge triggered */
1478		isrc->xi_edgetrigger = 1;
1479	}
1480	mtx_unlock(&xen_intr_isrc_lock);
1481
1482	return (0);
1483}
1484
1485int
1486xen_release_msi(int vector)
1487{
1488	struct physdev_unmap_pirq unmap;
1489	struct xenisrc *isrc;
1490	int ret;
1491
1492	isrc = (struct xenisrc *)intr_lookup_source(vector);
1493	if (isrc == NULL)
1494		return (ENXIO);
1495
1496	unmap.pirq = isrc->xi_pirq;
1497	ret = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap);
1498	if (ret != 0)
1499		return (ret);
1500
1501	xen_intr_release_isrc(isrc);
1502
1503	return (0);
1504}
1505
1506int
1507xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...)
1508{
1509	char descr[MAXCOMLEN + 1];
1510	struct xenisrc *isrc;
1511	va_list ap;
1512
1513	isrc = xen_intr_isrc(port_handle);
1514	if (isrc == NULL)
1515		return (EINVAL);
1516
1517	va_start(ap, fmt);
1518	vsnprintf(descr, sizeof(descr), fmt, ap);
1519	va_end(ap);
1520	return (intr_describe(isrc->xi_vector, isrc->xi_cookie, descr));
1521}
1522
1523void
1524xen_intr_unbind(xen_intr_handle_t *port_handlep)
1525{
1526	struct xenisrc *isrc;
1527
1528	KASSERT(port_handlep != NULL,
1529	    ("NULL xen_intr_handle_t passed to xen_intr_unbind"));
1530
1531	isrc = xen_intr_isrc(*port_handlep);
1532	*port_handlep = NULL;
1533	if (isrc == NULL)
1534		return;
1535
1536	mtx_lock(&xen_intr_isrc_lock);
1537	if (refcount_release(&isrc->xi_refcount) == 0) {
1538		mtx_unlock(&xen_intr_isrc_lock);
1539		return;
1540	}
1541	mtx_unlock(&xen_intr_isrc_lock);
1542
1543	if (isrc->xi_cookie != NULL)
1544		intr_remove_handler(isrc->xi_cookie);
1545	xen_intr_release_isrc(isrc);
1546}
1547
1548void
1549xen_intr_signal(xen_intr_handle_t handle)
1550{
1551	struct xenisrc *isrc;
1552
1553	isrc = xen_intr_isrc(handle);
1554	if (isrc != NULL) {
1555		KASSERT(isrc->xi_type == EVTCHN_TYPE_PORT ||
1556			isrc->xi_type == EVTCHN_TYPE_IPI,
1557			("evtchn_signal on something other than a local port"));
1558		struct evtchn_send send = { .port = isrc->xi_port };
1559		(void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
1560	}
1561}
1562
1563evtchn_port_t
1564xen_intr_port(xen_intr_handle_t handle)
1565{
1566	struct xenisrc *isrc;
1567
1568	isrc = xen_intr_isrc(handle);
1569	if (isrc == NULL)
1570		return (0);
1571
1572	return (isrc->xi_port);
1573}
1574
1575int
1576xen_intr_add_handler(const char *name, driver_filter_t filter,
1577    driver_intr_t handler, void *arg, enum intr_type flags,
1578    xen_intr_handle_t handle)
1579{
1580	struct xenisrc *isrc;
1581	int error;
1582
1583	isrc = xen_intr_isrc(handle);
1584	if (isrc == NULL || isrc->xi_cookie != NULL)
1585		return (EINVAL);
1586
1587	error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg,
1588	    flags|INTR_EXCL, &isrc->xi_cookie, 0);
1589	if (error != 0) {
1590		printf(
1591		    "%s: xen_intr_add_handler: intr_add_handler failed: %d\n",
1592		    name, error);
1593	}
1594
1595	return (error);
1596}
1597
1598int
1599xen_intr_get_evtchn_from_port(evtchn_port_t port, xen_intr_handle_t *handlep)
1600{
1601
1602	if (!is_valid_evtchn(port) || port >= NR_EVENT_CHANNELS)
1603		return (EINVAL);
1604
1605	if (handlep == NULL) {
1606		return (EINVAL);
1607	}
1608
1609	mtx_lock(&xen_intr_isrc_lock);
1610	if (xen_intr_port_to_isrc[port] == NULL) {
1611		mtx_unlock(&xen_intr_isrc_lock);
1612		return (EINVAL);
1613	}
1614	refcount_acquire(&xen_intr_port_to_isrc[port]->xi_refcount);
1615	mtx_unlock(&xen_intr_isrc_lock);
1616
1617	/* Assign the opaque handler (the event channel port) */
1618	*handlep = &xen_intr_port_to_isrc[port]->xi_vector;
1619
1620	return (0);
1621}
1622
1623#ifdef DDB
1624static const char *
1625xen_intr_print_type(enum evtchn_type type)
1626{
1627	static const char *evtchn_type_to_string[EVTCHN_TYPE_COUNT] = {
1628		[EVTCHN_TYPE_UNBOUND]	= "UNBOUND",
1629		[EVTCHN_TYPE_PIRQ]	= "PIRQ",
1630		[EVTCHN_TYPE_VIRQ]	= "VIRQ",
1631		[EVTCHN_TYPE_IPI]	= "IPI",
1632		[EVTCHN_TYPE_PORT]	= "PORT",
1633	};
1634
1635	if (type >= EVTCHN_TYPE_COUNT)
1636		return ("UNKNOWN");
1637
1638	return (evtchn_type_to_string[type]);
1639}
1640
1641static void
1642xen_intr_dump_port(struct xenisrc *isrc)
1643{
1644	struct xen_intr_pcpu_data *pcpu;
1645	shared_info_t *s = HYPERVISOR_shared_info;
1646	int i;
1647
1648	db_printf("Port %d Type: %s\n",
1649	    isrc->xi_port, xen_intr_print_type(isrc->xi_type));
1650	if (isrc->xi_type == EVTCHN_TYPE_PIRQ) {
1651		db_printf("\tPirq: %d ActiveHi: %d EdgeTrigger: %d "
1652		    "NeedsEOI: %d\n",
1653		    isrc->xi_pirq, isrc->xi_activehi, isrc->xi_edgetrigger,
1654		    !!xen_test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map));
1655	}
1656	if (isrc->xi_type == EVTCHN_TYPE_VIRQ)
1657		db_printf("\tVirq: %d\n", isrc->xi_virq);
1658
1659	db_printf("\tMasked: %d Pending: %d\n",
1660	    !!xen_test_bit(isrc->xi_port, &s->evtchn_mask[0]),
1661	    !!xen_test_bit(isrc->xi_port, &s->evtchn_pending[0]));
1662
1663	db_printf("\tPer-CPU Masks: ");
1664	CPU_FOREACH(i) {
1665		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
1666		db_printf("cpu#%d: %d ", i,
1667		    !!xen_test_bit(isrc->xi_port, pcpu->evtchn_enabled));
1668	}
1669	db_printf("\n");
1670}
1671
1672DB_SHOW_COMMAND(xen_evtchn, db_show_xen_evtchn)
1673{
1674	int i;
1675
1676	if (!xen_domain()) {
1677		db_printf("Only available on Xen guests\n");
1678		return;
1679	}
1680
1681	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1682		struct xenisrc *isrc;
1683
1684		isrc = xen_intr_port_to_isrc[i];
1685		if (isrc == NULL)
1686			continue;
1687
1688		xen_intr_dump_port(isrc);
1689	}
1690}
1691#endif /* DDB */
1692