msi.c revision 333126
1/*-
2 * Copyright (c) 2006 Yahoo!, Inc.
3 * All rights reserved.
4 * Written by: John Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the author nor the names of any co-contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * Support for PCI Message Signalled Interrupts (MSI).  MSI interrupts on
33 * x86 are basically APIC messages that the northbridge delivers directly
34 * to the local APICs as if they had come from an I/O APIC.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/10/sys/x86/x86/msi.c 333126 2018-04-30 20:29:28Z jhb $");
39
40#include <sys/param.h>
41#include <sys/bus.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/sx.h>
47#include <sys/sysctl.h>
48#include <sys/systm.h>
49#include <x86/apicreg.h>
50#include <machine/cputypes.h>
51#include <machine/md_var.h>
52#include <machine/frame.h>
53#include <machine/intr_machdep.h>
54#include <machine/apicvar.h>
55#include <machine/specialreg.h>
56#include <dev/pci/pcivar.h>
57
58/* Fields in address for Intel MSI messages. */
59#define	MSI_INTEL_ADDR_DEST		0x000ff000
60#define	MSI_INTEL_ADDR_RH		0x00000008
61# define MSI_INTEL_ADDR_RH_ON		0x00000008
62# define MSI_INTEL_ADDR_RH_OFF		0x00000000
63#define	MSI_INTEL_ADDR_DM		0x00000004
64# define MSI_INTEL_ADDR_DM_PHYSICAL	0x00000000
65# define MSI_INTEL_ADDR_DM_LOGICAL	0x00000004
66
67/* Fields in data for Intel MSI messages. */
68#define	MSI_INTEL_DATA_TRGRMOD		IOART_TRGRMOD	/* Trigger mode. */
69# define MSI_INTEL_DATA_TRGREDG		IOART_TRGREDG
70# define MSI_INTEL_DATA_TRGRLVL		IOART_TRGRLVL
71#define	MSI_INTEL_DATA_LEVEL		0x00004000	/* Polarity. */
72# define MSI_INTEL_DATA_DEASSERT	0x00000000
73# define MSI_INTEL_DATA_ASSERT		0x00004000
74#define	MSI_INTEL_DATA_DELMOD		IOART_DELMOD	/* Delivery mode. */
75# define MSI_INTEL_DATA_DELFIXED	IOART_DELFIXED
76# define MSI_INTEL_DATA_DELLOPRI	IOART_DELLOPRI
77# define MSI_INTEL_DATA_DELSMI		IOART_DELSMI
78# define MSI_INTEL_DATA_DELNMI		IOART_DELNMI
79# define MSI_INTEL_DATA_DELINIT		IOART_DELINIT
80# define MSI_INTEL_DATA_DELEXINT	IOART_DELEXINT
81#define	MSI_INTEL_DATA_INTVEC		IOART_INTVEC	/* Interrupt vector. */
82
83/*
84 * Build Intel MSI message and data values from a source.  AMD64 systems
85 * seem to be compatible, so we use the same function for both.
86 */
87#define	INTEL_ADDR(msi)							\
88	(MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 |			\
89	    MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
90#define	INTEL_DATA(msi)							\
91	(MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
92
93static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
94
95/*
96 * MSI sources are bunched into groups.  This is because MSI forces
97 * all of the messages to share the address and data registers and
98 * thus certain properties (such as the local APIC ID target on x86).
99 * Each group has a 'first' source that contains information global to
100 * the group.  These fields are marked with (g) below.
101 *
102 * Note that local APIC ID is kind of special.  Each message will be
103 * assigned an ID by the system; however, a group will use the ID from
104 * the first message.
105 *
106 * For MSI-X, each message is isolated.
107 */
108struct msi_intsrc {
109	struct intsrc msi_intsrc;
110	device_t msi_dev;		/* Owning device. (g) */
111	struct msi_intsrc *msi_first;	/* First source in group. */
112	u_int msi_irq;			/* IRQ cookie. */
113	u_int msi_msix;			/* MSI-X message. */
114	u_int msi_vector:8;		/* IDT vector. */
115	u_int msi_cpu:8;		/* Local APIC ID. (g) */
116	u_int msi_count:8;		/* Messages in this group. (g) */
117	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
118	int *msi_irqs;			/* Group's IRQ list. (g) */
119};
120
121static void	msi_create_source(void);
122static void	msi_enable_source(struct intsrc *isrc);
123static void	msi_disable_source(struct intsrc *isrc, int eoi);
124static void	msi_eoi_source(struct intsrc *isrc);
125static void	msi_enable_intr(struct intsrc *isrc);
126static void	msi_disable_intr(struct intsrc *isrc);
127static int	msi_vector(struct intsrc *isrc);
128static int	msi_source_pending(struct intsrc *isrc);
129static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
130		    enum intr_polarity pol);
131static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
132
133struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
134		       msi_enable_intr, msi_disable_intr, msi_vector,
135		       msi_source_pending, NULL, NULL, msi_config_intr,
136		       msi_assign_cpu };
137
138#ifdef SMP
139/**
140 * Xen hypervisors prior to 4.6.0 do not properly handle updates to
141 * enabled MSI-X table entries.  Allow migration of MSI-X interrupts
142 * to be disabled via a tunable. Values have the following meaning:
143 *
144 * -1: automatic detection by FreeBSD
145 *  0: enable migration
146 *  1: disable migration
147 */
148int msix_disable_migration = -1;
149SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN,
150    &msix_disable_migration, 0,
151    "Disable migration of MSI-X interrupts between CPUs");
152#endif
153
154static int msi_enabled;
155static int msi_last_irq;
156static struct mtx msi_lock;
157
158static void
159msi_enable_source(struct intsrc *isrc)
160{
161}
162
163static void
164msi_disable_source(struct intsrc *isrc, int eoi)
165{
166
167	if (eoi == PIC_EOI)
168		lapic_eoi();
169}
170
171static void
172msi_eoi_source(struct intsrc *isrc)
173{
174
175	lapic_eoi();
176}
177
178static void
179msi_enable_intr(struct intsrc *isrc)
180{
181	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
182
183	apic_enable_vector(msi->msi_cpu, msi->msi_vector);
184}
185
186static void
187msi_disable_intr(struct intsrc *isrc)
188{
189	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
190
191	apic_disable_vector(msi->msi_cpu, msi->msi_vector);
192}
193
194static int
195msi_vector(struct intsrc *isrc)
196{
197	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
198
199	return (msi->msi_irq);
200}
201
202static int
203msi_source_pending(struct intsrc *isrc)
204{
205
206	return (0);
207}
208
209static int
210msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
211    enum intr_polarity pol)
212{
213
214	return (ENODEV);
215}
216
217static int
218msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
219{
220	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
221	int old_vector;
222	u_int old_id;
223	int i, vector;
224
225	/*
226	 * Only allow CPUs to be assigned to the first message for an
227	 * MSI group.
228	 */
229	if (msi->msi_first != msi)
230		return (EINVAL);
231
232#ifdef SMP
233	if (msix_disable_migration && msi->msi_msix)
234		return (EINVAL);
235#endif
236
237	/* Store information to free existing irq. */
238	old_vector = msi->msi_vector;
239	old_id = msi->msi_cpu;
240	if (old_id == apic_id)
241		return (0);
242
243	/* Allocate IDT vectors on this cpu. */
244	if (msi->msi_count > 1) {
245		KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
246		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
247		    msi->msi_count, msi->msi_maxcount);
248	} else
249		vector = apic_alloc_vector(apic_id, msi->msi_irq);
250	if (vector == 0)
251		return (ENOSPC);
252
253	msi->msi_cpu = apic_id;
254	msi->msi_vector = vector;
255	if (msi->msi_intsrc.is_handlers > 0)
256		apic_enable_vector(msi->msi_cpu, msi->msi_vector);
257	if (bootverbose)
258		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
259		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
260		    msi->msi_cpu, msi->msi_vector);
261	for (i = 1; i < msi->msi_count; i++) {
262		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
263		sib->msi_cpu = apic_id;
264		sib->msi_vector = vector + i;
265		if (sib->msi_intsrc.is_handlers > 0)
266			apic_enable_vector(sib->msi_cpu, sib->msi_vector);
267		if (bootverbose)
268			printf(
269		    "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
270			    sib->msi_irq, sib->msi_cpu, sib->msi_vector);
271	}
272	BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev,
273	    msi->msi_irq);
274
275	/*
276	 * Free the old vector after the new one is established.  This is done
277	 * to prevent races where we could miss an interrupt.
278	 */
279	if (msi->msi_intsrc.is_handlers > 0)
280		apic_disable_vector(old_id, old_vector);
281	apic_free_vector(old_id, old_vector, msi->msi_irq);
282	for (i = 1; i < msi->msi_count; i++) {
283		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
284		if (sib->msi_intsrc.is_handlers > 0)
285			apic_disable_vector(old_id, old_vector + i);
286		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
287	}
288	return (0);
289}
290
291void
292msi_init(void)
293{
294
295	/* Check if we have a supported CPU. */
296	switch (cpu_vendor_id) {
297	case CPU_VENDOR_INTEL:
298	case CPU_VENDOR_AMD:
299		break;
300	case CPU_VENDOR_CENTAUR:
301		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
302		    CPUID_TO_MODEL(cpu_id) >= 0xf)
303			break;
304		/* FALLTHROUGH */
305	default:
306		return;
307	}
308
309#ifdef SMP
310	if (msix_disable_migration == -1) {
311		/* The default is to allow migration of MSI-X interrupts. */
312		msix_disable_migration = 0;
313	}
314#endif
315
316	msi_enabled = 1;
317	intr_register_pic(&msi_pic);
318	mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
319}
320
321static void
322msi_create_source(void)
323{
324	struct msi_intsrc *msi;
325	u_int irq;
326
327	mtx_lock(&msi_lock);
328	if (msi_last_irq >= NUM_MSI_INTS) {
329		mtx_unlock(&msi_lock);
330		return;
331	}
332	irq = msi_last_irq + FIRST_MSI_INT;
333	msi_last_irq++;
334	mtx_unlock(&msi_lock);
335
336	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
337	msi->msi_intsrc.is_pic = &msi_pic;
338	msi->msi_irq = irq;
339	intr_register_source(&msi->msi_intsrc);
340	nexus_add_irq(irq);
341}
342
343/*
344 * Try to allocate 'count' interrupt sources with contiguous IDT values.
345 */
346int
347msi_alloc(device_t dev, int count, int maxcount, int *irqs)
348{
349	struct msi_intsrc *msi, *fsrc;
350	u_int cpu;
351	int cnt, i, *mirqs, vector;
352
353	if (!msi_enabled)
354		return (ENXIO);
355
356	if (count > 1)
357		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
358	else
359		mirqs = NULL;
360again:
361	mtx_lock(&msi_lock);
362
363	/* Try to find 'count' free IRQs. */
364	cnt = 0;
365	for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
366		msi = (struct msi_intsrc *)intr_lookup_source(i);
367
368		/* End of allocated sources, so break. */
369		if (msi == NULL)
370			break;
371
372		/* If this is a free one, save its IRQ in the array. */
373		if (msi->msi_dev == NULL) {
374			irqs[cnt] = i;
375			cnt++;
376			if (cnt == count)
377				break;
378		}
379	}
380
381	/* Do we need to create some new sources? */
382	if (cnt < count) {
383		/* If we would exceed the max, give up. */
384		if (i + (count - cnt) >= FIRST_MSI_INT + NUM_MSI_INTS) {
385			mtx_unlock(&msi_lock);
386			free(mirqs, M_MSI);
387			return (ENXIO);
388		}
389		mtx_unlock(&msi_lock);
390
391		/* We need count - cnt more sources. */
392		while (cnt < count) {
393			msi_create_source();
394			cnt++;
395		}
396		goto again;
397	}
398
399	/* Ok, we now have the IRQs allocated. */
400	KASSERT(cnt == count, ("count mismatch"));
401
402	/* Allocate 'count' IDT vectors. */
403	cpu = intr_next_cpu();
404	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
405	if (vector == 0) {
406		mtx_unlock(&msi_lock);
407		free(mirqs, M_MSI);
408		return (ENOSPC);
409	}
410
411	/* Assign IDT vectors and make these messages owned by 'dev'. */
412	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
413	for (i = 0; i < count; i++) {
414		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
415		msi->msi_cpu = cpu;
416		msi->msi_dev = dev;
417		msi->msi_vector = vector + i;
418		if (bootverbose)
419			printf(
420		    "msi: routing MSI IRQ %d to local APIC %u vector %u\n",
421			    msi->msi_irq, msi->msi_cpu, msi->msi_vector);
422		msi->msi_first = fsrc;
423		KASSERT(msi->msi_intsrc.is_handlers == 0,
424		    ("dead MSI has handlers"));
425	}
426	fsrc->msi_count = count;
427	fsrc->msi_maxcount = maxcount;
428	if (count > 1)
429		bcopy(irqs, mirqs, count * sizeof(*mirqs));
430	fsrc->msi_irqs = mirqs;
431	mtx_unlock(&msi_lock);
432
433	return (0);
434}
435
436int
437msi_release(int *irqs, int count)
438{
439	struct msi_intsrc *msi, *first;
440	int i;
441
442	mtx_lock(&msi_lock);
443	first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
444	if (first == NULL) {
445		mtx_unlock(&msi_lock);
446		return (ENOENT);
447	}
448
449	/* Make sure this isn't an MSI-X message. */
450	if (first->msi_msix) {
451		mtx_unlock(&msi_lock);
452		return (EINVAL);
453	}
454
455	/* Make sure this message is allocated to a group. */
456	if (first->msi_first == NULL) {
457		mtx_unlock(&msi_lock);
458		return (ENXIO);
459	}
460
461	/*
462	 * Make sure this is the start of a group and that we are releasing
463	 * the entire group.
464	 */
465	if (first->msi_first != first || first->msi_count != count) {
466		mtx_unlock(&msi_lock);
467		return (EINVAL);
468	}
469	KASSERT(first->msi_dev != NULL, ("unowned group"));
470
471	/* Clear all the extra messages in the group. */
472	for (i = 1; i < count; i++) {
473		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
474		KASSERT(msi->msi_first == first, ("message not in group"));
475		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
476		msi->msi_first = NULL;
477		msi->msi_dev = NULL;
478		apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
479		msi->msi_vector = 0;
480	}
481
482	/* Clear out the first message. */
483	first->msi_first = NULL;
484	first->msi_dev = NULL;
485	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
486	first->msi_vector = 0;
487	first->msi_count = 0;
488	first->msi_maxcount = 0;
489	free(first->msi_irqs, M_MSI);
490	first->msi_irqs = NULL;
491
492	mtx_unlock(&msi_lock);
493	return (0);
494}
495
496int
497msi_map(int irq, uint64_t *addr, uint32_t *data)
498{
499	struct msi_intsrc *msi;
500
501	mtx_lock(&msi_lock);
502	msi = (struct msi_intsrc *)intr_lookup_source(irq);
503	if (msi == NULL) {
504		mtx_unlock(&msi_lock);
505		return (ENOENT);
506	}
507
508	/* Make sure this message is allocated to a device. */
509	if (msi->msi_dev == NULL) {
510		mtx_unlock(&msi_lock);
511		return (ENXIO);
512	}
513
514	/*
515	 * If this message isn't an MSI-X message, make sure it's part
516	 * of a group, and switch to the first message in the
517	 * group.
518	 */
519	if (!msi->msi_msix) {
520		if (msi->msi_first == NULL) {
521			mtx_unlock(&msi_lock);
522			return (ENXIO);
523		}
524		msi = msi->msi_first;
525	}
526
527	*addr = INTEL_ADDR(msi);
528	*data = INTEL_DATA(msi);
529	mtx_unlock(&msi_lock);
530	return (0);
531}
532
533int
534msix_alloc(device_t dev, int *irq)
535{
536	struct msi_intsrc *msi;
537	u_int cpu;
538	int i, vector;
539
540	if (!msi_enabled)
541		return (ENXIO);
542
543again:
544	mtx_lock(&msi_lock);
545
546	/* Find a free IRQ. */
547	for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
548		msi = (struct msi_intsrc *)intr_lookup_source(i);
549
550		/* End of allocated sources, so break. */
551		if (msi == NULL)
552			break;
553
554		/* Stop at the first free source. */
555		if (msi->msi_dev == NULL)
556			break;
557	}
558
559	/* Do we need to create a new source? */
560	if (msi == NULL) {
561		/* If we would exceed the max, give up. */
562		if (i + 1 >= FIRST_MSI_INT + NUM_MSI_INTS) {
563			mtx_unlock(&msi_lock);
564			return (ENXIO);
565		}
566		mtx_unlock(&msi_lock);
567
568		/* Create a new source. */
569		msi_create_source();
570		goto again;
571	}
572
573	/* Allocate an IDT vector. */
574	cpu = intr_next_cpu();
575	vector = apic_alloc_vector(cpu, i);
576	if (vector == 0) {
577		mtx_unlock(&msi_lock);
578		return (ENOSPC);
579	}
580	if (bootverbose)
581		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
582		    msi->msi_irq, cpu, vector);
583
584	/* Setup source. */
585	msi->msi_cpu = cpu;
586	msi->msi_dev = dev;
587	msi->msi_first = msi;
588	msi->msi_vector = vector;
589	msi->msi_msix = 1;
590	msi->msi_count = 1;
591	msi->msi_maxcount = 1;
592	msi->msi_irqs = NULL;
593
594	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
595	mtx_unlock(&msi_lock);
596
597	*irq = i;
598	return (0);
599}
600
601int
602msix_release(int irq)
603{
604	struct msi_intsrc *msi;
605
606	mtx_lock(&msi_lock);
607	msi = (struct msi_intsrc *)intr_lookup_source(irq);
608	if (msi == NULL) {
609		mtx_unlock(&msi_lock);
610		return (ENOENT);
611	}
612
613	/* Make sure this is an MSI-X message. */
614	if (!msi->msi_msix) {
615		mtx_unlock(&msi_lock);
616		return (EINVAL);
617	}
618
619	KASSERT(msi->msi_dev != NULL, ("unowned message"));
620
621	/* Clear out the message. */
622	msi->msi_first = NULL;
623	msi->msi_dev = NULL;
624	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
625	msi->msi_vector = 0;
626	msi->msi_msix = 0;
627	msi->msi_count = 0;
628	msi->msi_maxcount = 0;
629
630	mtx_unlock(&msi_lock);
631	return (0);
632}
633