msi.c revision 333126
1/*- 2 * Copyright (c) 2006 Yahoo!, Inc. 3 * All rights reserved. 4 * Written by: John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of the author nor the names of any co-contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31/* 32 * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on 33 * x86 are basically APIC messages that the northbridge delivers directly 34 * to the local APICs as if they had come from an I/O APIC. 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: stable/10/sys/x86/x86/msi.c 333126 2018-04-30 20:29:28Z jhb $"); 39 40#include <sys/param.h> 41#include <sys/bus.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/malloc.h> 45#include <sys/mutex.h> 46#include <sys/sx.h> 47#include <sys/sysctl.h> 48#include <sys/systm.h> 49#include <x86/apicreg.h> 50#include <machine/cputypes.h> 51#include <machine/md_var.h> 52#include <machine/frame.h> 53#include <machine/intr_machdep.h> 54#include <machine/apicvar.h> 55#include <machine/specialreg.h> 56#include <dev/pci/pcivar.h> 57 58/* Fields in address for Intel MSI messages. */ 59#define MSI_INTEL_ADDR_DEST 0x000ff000 60#define MSI_INTEL_ADDR_RH 0x00000008 61# define MSI_INTEL_ADDR_RH_ON 0x00000008 62# define MSI_INTEL_ADDR_RH_OFF 0x00000000 63#define MSI_INTEL_ADDR_DM 0x00000004 64# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000 65# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004 66 67/* Fields in data for Intel MSI messages. */ 68#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */ 69# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG 70# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL 71#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */ 72# define MSI_INTEL_DATA_DEASSERT 0x00000000 73# define MSI_INTEL_DATA_ASSERT 0x00004000 74#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */ 75# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED 76# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI 77# define MSI_INTEL_DATA_DELSMI IOART_DELSMI 78# define MSI_INTEL_DATA_DELNMI IOART_DELNMI 79# define MSI_INTEL_DATA_DELINIT IOART_DELINIT 80# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT 81#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ 82 83/* 84 * Build Intel MSI message and data values from a source. AMD64 systems 85 * seem to be compatible, so we use the same function for both. 86 */ 87#define INTEL_ADDR(msi) \ 88 (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \ 89 MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL) 90#define INTEL_DATA(msi) \ 91 (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector) 92 93static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI"); 94 95/* 96 * MSI sources are bunched into groups. This is because MSI forces 97 * all of the messages to share the address and data registers and 98 * thus certain properties (such as the local APIC ID target on x86). 99 * Each group has a 'first' source that contains information global to 100 * the group. These fields are marked with (g) below. 101 * 102 * Note that local APIC ID is kind of special. Each message will be 103 * assigned an ID by the system; however, a group will use the ID from 104 * the first message. 105 * 106 * For MSI-X, each message is isolated. 107 */ 108struct msi_intsrc { 109 struct intsrc msi_intsrc; 110 device_t msi_dev; /* Owning device. (g) */ 111 struct msi_intsrc *msi_first; /* First source in group. */ 112 u_int msi_irq; /* IRQ cookie. */ 113 u_int msi_msix; /* MSI-X message. */ 114 u_int msi_vector:8; /* IDT vector. */ 115 u_int msi_cpu:8; /* Local APIC ID. (g) */ 116 u_int msi_count:8; /* Messages in this group. (g) */ 117 u_int msi_maxcount:8; /* Alignment for this group. (g) */ 118 int *msi_irqs; /* Group's IRQ list. (g) */ 119}; 120 121static void msi_create_source(void); 122static void msi_enable_source(struct intsrc *isrc); 123static void msi_disable_source(struct intsrc *isrc, int eoi); 124static void msi_eoi_source(struct intsrc *isrc); 125static void msi_enable_intr(struct intsrc *isrc); 126static void msi_disable_intr(struct intsrc *isrc); 127static int msi_vector(struct intsrc *isrc); 128static int msi_source_pending(struct intsrc *isrc); 129static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, 130 enum intr_polarity pol); 131static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id); 132 133struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, 134 msi_enable_intr, msi_disable_intr, msi_vector, 135 msi_source_pending, NULL, NULL, msi_config_intr, 136 msi_assign_cpu }; 137 138#ifdef SMP 139/** 140 * Xen hypervisors prior to 4.6.0 do not properly handle updates to 141 * enabled MSI-X table entries. Allow migration of MSI-X interrupts 142 * to be disabled via a tunable. Values have the following meaning: 143 * 144 * -1: automatic detection by FreeBSD 145 * 0: enable migration 146 * 1: disable migration 147 */ 148int msix_disable_migration = -1; 149SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN, 150 &msix_disable_migration, 0, 151 "Disable migration of MSI-X interrupts between CPUs"); 152#endif 153 154static int msi_enabled; 155static int msi_last_irq; 156static struct mtx msi_lock; 157 158static void 159msi_enable_source(struct intsrc *isrc) 160{ 161} 162 163static void 164msi_disable_source(struct intsrc *isrc, int eoi) 165{ 166 167 if (eoi == PIC_EOI) 168 lapic_eoi(); 169} 170 171static void 172msi_eoi_source(struct intsrc *isrc) 173{ 174 175 lapic_eoi(); 176} 177 178static void 179msi_enable_intr(struct intsrc *isrc) 180{ 181 struct msi_intsrc *msi = (struct msi_intsrc *)isrc; 182 183 apic_enable_vector(msi->msi_cpu, msi->msi_vector); 184} 185 186static void 187msi_disable_intr(struct intsrc *isrc) 188{ 189 struct msi_intsrc *msi = (struct msi_intsrc *)isrc; 190 191 apic_disable_vector(msi->msi_cpu, msi->msi_vector); 192} 193 194static int 195msi_vector(struct intsrc *isrc) 196{ 197 struct msi_intsrc *msi = (struct msi_intsrc *)isrc; 198 199 return (msi->msi_irq); 200} 201 202static int 203msi_source_pending(struct intsrc *isrc) 204{ 205 206 return (0); 207} 208 209static int 210msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, 211 enum intr_polarity pol) 212{ 213 214 return (ENODEV); 215} 216 217static int 218msi_assign_cpu(struct intsrc *isrc, u_int apic_id) 219{ 220 struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc; 221 int old_vector; 222 u_int old_id; 223 int i, vector; 224 225 /* 226 * Only allow CPUs to be assigned to the first message for an 227 * MSI group. 228 */ 229 if (msi->msi_first != msi) 230 return (EINVAL); 231 232#ifdef SMP 233 if (msix_disable_migration && msi->msi_msix) 234 return (EINVAL); 235#endif 236 237 /* Store information to free existing irq. */ 238 old_vector = msi->msi_vector; 239 old_id = msi->msi_cpu; 240 if (old_id == apic_id) 241 return (0); 242 243 /* Allocate IDT vectors on this cpu. */ 244 if (msi->msi_count > 1) { 245 KASSERT(msi->msi_msix == 0, ("MSI-X message group")); 246 vector = apic_alloc_vectors(apic_id, msi->msi_irqs, 247 msi->msi_count, msi->msi_maxcount); 248 } else 249 vector = apic_alloc_vector(apic_id, msi->msi_irq); 250 if (vector == 0) 251 return (ENOSPC); 252 253 msi->msi_cpu = apic_id; 254 msi->msi_vector = vector; 255 if (msi->msi_intsrc.is_handlers > 0) 256 apic_enable_vector(msi->msi_cpu, msi->msi_vector); 257 if (bootverbose) 258 printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n", 259 msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq, 260 msi->msi_cpu, msi->msi_vector); 261 for (i = 1; i < msi->msi_count; i++) { 262 sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]); 263 sib->msi_cpu = apic_id; 264 sib->msi_vector = vector + i; 265 if (sib->msi_intsrc.is_handlers > 0) 266 apic_enable_vector(sib->msi_cpu, sib->msi_vector); 267 if (bootverbose) 268 printf( 269 "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n", 270 sib->msi_irq, sib->msi_cpu, sib->msi_vector); 271 } 272 BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev, 273 msi->msi_irq); 274 275 /* 276 * Free the old vector after the new one is established. This is done 277 * to prevent races where we could miss an interrupt. 278 */ 279 if (msi->msi_intsrc.is_handlers > 0) 280 apic_disable_vector(old_id, old_vector); 281 apic_free_vector(old_id, old_vector, msi->msi_irq); 282 for (i = 1; i < msi->msi_count; i++) { 283 sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]); 284 if (sib->msi_intsrc.is_handlers > 0) 285 apic_disable_vector(old_id, old_vector + i); 286 apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]); 287 } 288 return (0); 289} 290 291void 292msi_init(void) 293{ 294 295 /* Check if we have a supported CPU. */ 296 switch (cpu_vendor_id) { 297 case CPU_VENDOR_INTEL: 298 case CPU_VENDOR_AMD: 299 break; 300 case CPU_VENDOR_CENTAUR: 301 if (CPUID_TO_FAMILY(cpu_id) == 0x6 && 302 CPUID_TO_MODEL(cpu_id) >= 0xf) 303 break; 304 /* FALLTHROUGH */ 305 default: 306 return; 307 } 308 309#ifdef SMP 310 if (msix_disable_migration == -1) { 311 /* The default is to allow migration of MSI-X interrupts. */ 312 msix_disable_migration = 0; 313 } 314#endif 315 316 msi_enabled = 1; 317 intr_register_pic(&msi_pic); 318 mtx_init(&msi_lock, "msi", NULL, MTX_DEF); 319} 320 321static void 322msi_create_source(void) 323{ 324 struct msi_intsrc *msi; 325 u_int irq; 326 327 mtx_lock(&msi_lock); 328 if (msi_last_irq >= NUM_MSI_INTS) { 329 mtx_unlock(&msi_lock); 330 return; 331 } 332 irq = msi_last_irq + FIRST_MSI_INT; 333 msi_last_irq++; 334 mtx_unlock(&msi_lock); 335 336 msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); 337 msi->msi_intsrc.is_pic = &msi_pic; 338 msi->msi_irq = irq; 339 intr_register_source(&msi->msi_intsrc); 340 nexus_add_irq(irq); 341} 342 343/* 344 * Try to allocate 'count' interrupt sources with contiguous IDT values. 345 */ 346int 347msi_alloc(device_t dev, int count, int maxcount, int *irqs) 348{ 349 struct msi_intsrc *msi, *fsrc; 350 u_int cpu; 351 int cnt, i, *mirqs, vector; 352 353 if (!msi_enabled) 354 return (ENXIO); 355 356 if (count > 1) 357 mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK); 358 else 359 mirqs = NULL; 360again: 361 mtx_lock(&msi_lock); 362 363 /* Try to find 'count' free IRQs. */ 364 cnt = 0; 365 for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { 366 msi = (struct msi_intsrc *)intr_lookup_source(i); 367 368 /* End of allocated sources, so break. */ 369 if (msi == NULL) 370 break; 371 372 /* If this is a free one, save its IRQ in the array. */ 373 if (msi->msi_dev == NULL) { 374 irqs[cnt] = i; 375 cnt++; 376 if (cnt == count) 377 break; 378 } 379 } 380 381 /* Do we need to create some new sources? */ 382 if (cnt < count) { 383 /* If we would exceed the max, give up. */ 384 if (i + (count - cnt) >= FIRST_MSI_INT + NUM_MSI_INTS) { 385 mtx_unlock(&msi_lock); 386 free(mirqs, M_MSI); 387 return (ENXIO); 388 } 389 mtx_unlock(&msi_lock); 390 391 /* We need count - cnt more sources. */ 392 while (cnt < count) { 393 msi_create_source(); 394 cnt++; 395 } 396 goto again; 397 } 398 399 /* Ok, we now have the IRQs allocated. */ 400 KASSERT(cnt == count, ("count mismatch")); 401 402 /* Allocate 'count' IDT vectors. */ 403 cpu = intr_next_cpu(); 404 vector = apic_alloc_vectors(cpu, irqs, count, maxcount); 405 if (vector == 0) { 406 mtx_unlock(&msi_lock); 407 free(mirqs, M_MSI); 408 return (ENOSPC); 409 } 410 411 /* Assign IDT vectors and make these messages owned by 'dev'. */ 412 fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]); 413 for (i = 0; i < count; i++) { 414 msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); 415 msi->msi_cpu = cpu; 416 msi->msi_dev = dev; 417 msi->msi_vector = vector + i; 418 if (bootverbose) 419 printf( 420 "msi: routing MSI IRQ %d to local APIC %u vector %u\n", 421 msi->msi_irq, msi->msi_cpu, msi->msi_vector); 422 msi->msi_first = fsrc; 423 KASSERT(msi->msi_intsrc.is_handlers == 0, 424 ("dead MSI has handlers")); 425 } 426 fsrc->msi_count = count; 427 fsrc->msi_maxcount = maxcount; 428 if (count > 1) 429 bcopy(irqs, mirqs, count * sizeof(*mirqs)); 430 fsrc->msi_irqs = mirqs; 431 mtx_unlock(&msi_lock); 432 433 return (0); 434} 435 436int 437msi_release(int *irqs, int count) 438{ 439 struct msi_intsrc *msi, *first; 440 int i; 441 442 mtx_lock(&msi_lock); 443 first = (struct msi_intsrc *)intr_lookup_source(irqs[0]); 444 if (first == NULL) { 445 mtx_unlock(&msi_lock); 446 return (ENOENT); 447 } 448 449 /* Make sure this isn't an MSI-X message. */ 450 if (first->msi_msix) { 451 mtx_unlock(&msi_lock); 452 return (EINVAL); 453 } 454 455 /* Make sure this message is allocated to a group. */ 456 if (first->msi_first == NULL) { 457 mtx_unlock(&msi_lock); 458 return (ENXIO); 459 } 460 461 /* 462 * Make sure this is the start of a group and that we are releasing 463 * the entire group. 464 */ 465 if (first->msi_first != first || first->msi_count != count) { 466 mtx_unlock(&msi_lock); 467 return (EINVAL); 468 } 469 KASSERT(first->msi_dev != NULL, ("unowned group")); 470 471 /* Clear all the extra messages in the group. */ 472 for (i = 1; i < count; i++) { 473 msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); 474 KASSERT(msi->msi_first == first, ("message not in group")); 475 KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch")); 476 msi->msi_first = NULL; 477 msi->msi_dev = NULL; 478 apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq); 479 msi->msi_vector = 0; 480 } 481 482 /* Clear out the first message. */ 483 first->msi_first = NULL; 484 first->msi_dev = NULL; 485 apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq); 486 first->msi_vector = 0; 487 first->msi_count = 0; 488 first->msi_maxcount = 0; 489 free(first->msi_irqs, M_MSI); 490 first->msi_irqs = NULL; 491 492 mtx_unlock(&msi_lock); 493 return (0); 494} 495 496int 497msi_map(int irq, uint64_t *addr, uint32_t *data) 498{ 499 struct msi_intsrc *msi; 500 501 mtx_lock(&msi_lock); 502 msi = (struct msi_intsrc *)intr_lookup_source(irq); 503 if (msi == NULL) { 504 mtx_unlock(&msi_lock); 505 return (ENOENT); 506 } 507 508 /* Make sure this message is allocated to a device. */ 509 if (msi->msi_dev == NULL) { 510 mtx_unlock(&msi_lock); 511 return (ENXIO); 512 } 513 514 /* 515 * If this message isn't an MSI-X message, make sure it's part 516 * of a group, and switch to the first message in the 517 * group. 518 */ 519 if (!msi->msi_msix) { 520 if (msi->msi_first == NULL) { 521 mtx_unlock(&msi_lock); 522 return (ENXIO); 523 } 524 msi = msi->msi_first; 525 } 526 527 *addr = INTEL_ADDR(msi); 528 *data = INTEL_DATA(msi); 529 mtx_unlock(&msi_lock); 530 return (0); 531} 532 533int 534msix_alloc(device_t dev, int *irq) 535{ 536 struct msi_intsrc *msi; 537 u_int cpu; 538 int i, vector; 539 540 if (!msi_enabled) 541 return (ENXIO); 542 543again: 544 mtx_lock(&msi_lock); 545 546 /* Find a free IRQ. */ 547 for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { 548 msi = (struct msi_intsrc *)intr_lookup_source(i); 549 550 /* End of allocated sources, so break. */ 551 if (msi == NULL) 552 break; 553 554 /* Stop at the first free source. */ 555 if (msi->msi_dev == NULL) 556 break; 557 } 558 559 /* Do we need to create a new source? */ 560 if (msi == NULL) { 561 /* If we would exceed the max, give up. */ 562 if (i + 1 >= FIRST_MSI_INT + NUM_MSI_INTS) { 563 mtx_unlock(&msi_lock); 564 return (ENXIO); 565 } 566 mtx_unlock(&msi_lock); 567 568 /* Create a new source. */ 569 msi_create_source(); 570 goto again; 571 } 572 573 /* Allocate an IDT vector. */ 574 cpu = intr_next_cpu(); 575 vector = apic_alloc_vector(cpu, i); 576 if (vector == 0) { 577 mtx_unlock(&msi_lock); 578 return (ENOSPC); 579 } 580 if (bootverbose) 581 printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n", 582 msi->msi_irq, cpu, vector); 583 584 /* Setup source. */ 585 msi->msi_cpu = cpu; 586 msi->msi_dev = dev; 587 msi->msi_first = msi; 588 msi->msi_vector = vector; 589 msi->msi_msix = 1; 590 msi->msi_count = 1; 591 msi->msi_maxcount = 1; 592 msi->msi_irqs = NULL; 593 594 KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers")); 595 mtx_unlock(&msi_lock); 596 597 *irq = i; 598 return (0); 599} 600 601int 602msix_release(int irq) 603{ 604 struct msi_intsrc *msi; 605 606 mtx_lock(&msi_lock); 607 msi = (struct msi_intsrc *)intr_lookup_source(irq); 608 if (msi == NULL) { 609 mtx_unlock(&msi_lock); 610 return (ENOENT); 611 } 612 613 /* Make sure this is an MSI-X message. */ 614 if (!msi->msi_msix) { 615 mtx_unlock(&msi_lock); 616 return (EINVAL); 617 } 618 619 KASSERT(msi->msi_dev != NULL, ("unowned message")); 620 621 /* Clear out the message. */ 622 msi->msi_first = NULL; 623 msi->msi_dev = NULL; 624 apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq); 625 msi->msi_vector = 0; 626 msi->msi_msix = 0; 627 msi->msi_count = 0; 628 msi->msi_maxcount = 0; 629 630 mtx_unlock(&msi_lock); 631 return (0); 632} 633