1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Machine dependent interrupt code for x86. For x86, we have to 30 * deal with different PICs. Thus, we use the passed in vector to lookup 31 * an interrupt source associated with that vector. The interrupt source 32 * describes which PIC the source belongs to and includes methods to handle 33 * that source. 34 */ 35 36#include "opt_atpic.h" 37#include "opt_ddb.h" 38#include "opt_smp.h" 39 40#include <sys/param.h> 41#include <sys/bus.h> 42#include <sys/interrupt.h> 43#include <sys/ktr.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mutex.h> 48#include <sys/proc.h> 49#include <sys/queue.h> 50#include <sys/sbuf.h> 51#include <sys/smp.h> 52#include <sys/sx.h> 53#include <sys/sysctl.h> 54#include <sys/syslog.h> 55#include <sys/systm.h> 56#include <sys/taskqueue.h> 57#include <sys/vmmeter.h> 58#include <machine/clock.h> 59#include <machine/intr_machdep.h> 60#include <machine/smp.h> 61#ifdef DDB 62#include <ddb/ddb.h> 63#endif 64 65#ifndef DEV_ATPIC 66#include <machine/segments.h> 67#include <machine/frame.h> 68#include <dev/ic/i8259.h> 69#include <x86/isa/icu.h> 70#include <isa/isareg.h> 71#endif 72 73#include <vm/vm.h> 74 75typedef void (*mask_fn)(void *); 76 77static int intrcnt_index; 78static struct intsrc **interrupt_sources; 79#ifdef SMP 80static struct intsrc **interrupt_sorted; 81static int intrbalance; 82SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RWTUN, &intrbalance, 0, 83 "Interrupt auto-balance interval (seconds). Zero disables."); 84static struct timeout_task intrbalance_task; 85#endif 86static struct sx intrsrc_lock; 87static struct mtx intrpic_lock; 88static struct mtx intrcnt_lock; 89static TAILQ_HEAD(pics_head, pic) pics; 90u_int num_io_irqs; 91 92#if defined(SMP) && !defined(EARLY_AP_STARTUP) 93#error EARLY_AP_STARTUP required on x86 94#endif 95 96#define INTRNAME_LEN (MAXCOMLEN + 1) 97u_long *intrcnt; 98char *intrnames; 99size_t sintrcnt = sizeof(intrcnt); 100size_t sintrnames = sizeof(intrnames); 101int nintrcnt; 102 103static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); 104 105static int intr_assign_cpu(void *arg, int cpu); 106static void intr_disable_src(void *arg); 107static void intr_init(void *__dummy); 108static int intr_pic_registered(struct pic *pic); 109static void intrcnt_setname(const char *name, int index); 110static void intrcnt_updatename(struct intsrc *is); 111static void intrcnt_register(struct intsrc *is); 112 113/* 114 * SYSINIT levels for SI_SUB_INTR: 115 * 116 * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init 117 * SI_ORDER_SECOND: Xen PICs 118 * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges 119 * SI_ORDER_FOURTH: Add 8259A PICs 120 * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources 121 * SI_ORDER_MIDDLE: SMP interrupt counters 122 * SI_ORDER_ANY: Enable interrupts on BSP 123 */ 124 125static int 126intr_pic_registered(struct pic *pic) 127{ 128 struct pic *p; 129 130 TAILQ_FOREACH(p, &pics, pics) { 131 if (p == pic) 132 return (1); 133 } 134 return (0); 135} 136 137/* 138 * Register a new interrupt controller (PIC). This is to support suspend 139 * and resume where we suspend/resume controllers rather than individual 140 * sources. This also allows controllers with no active sources (such as 141 * 8259As in a system using the APICs) to participate in suspend and resume. 142 */ 143int 144intr_register_pic(struct pic *pic) 145{ 146 int error; 147 148 mtx_lock(&intrpic_lock); 149 if (intr_pic_registered(pic)) 150 error = EBUSY; 151 else { 152 TAILQ_INSERT_TAIL(&pics, pic, pics); 153 error = 0; 154 } 155 mtx_unlock(&intrpic_lock); 156 return (error); 157} 158 159/* 160 * Allocate interrupt source arrays and register interrupt sources 161 * once the number of interrupts is known. 162 */ 163static void 164intr_init_sources(void *arg) 165{ 166 struct pic *pic; 167 168 MPASS(num_io_irqs > 0); 169 170 interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), 171 M_INTR, M_WAITOK | M_ZERO); 172#ifdef SMP 173 interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), 174 M_INTR, M_WAITOK | M_ZERO); 175#endif 176 177 /* 178 * - 1 ??? dummy counter. 179 * - 2 counters for each I/O interrupt. 180 * - 1 counter for each CPU for lapic timer. 181 * - 1 counter for each CPU for the Hyper-V vmbus driver. 182 * - 8 counters for each CPU for IPI counters for SMP. 183 */ 184 nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; 185#ifdef COUNT_IPIS 186 if (mp_ncpus > 1) 187 nintrcnt += 8 * mp_ncpus; 188#endif 189 intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | 190 M_ZERO); 191 intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTR, M_WAITOK | 192 M_ZERO); 193 sintrcnt = nintrcnt * sizeof(u_long); 194 sintrnames = nintrcnt * INTRNAME_LEN; 195 196 intrcnt_setname("???", 0); 197 intrcnt_index = 1; 198 199 /* 200 * NB: intrpic_lock is not held here to avoid LORs due to 201 * malloc() in intr_register_source(). However, we are still 202 * single-threaded at this point in startup so the list of 203 * PICs shouldn't change. 204 */ 205 TAILQ_FOREACH(pic, &pics, pics) { 206 if (pic->pic_register_sources != NULL) 207 pic->pic_register_sources(pic); 208 } 209} 210SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, 211 NULL); 212 213/* 214 * Register a new interrupt source with the global interrupt system. 215 * The global interrupts need to be disabled when this function is 216 * called. 217 */ 218int 219intr_register_source(struct intsrc *isrc) 220{ 221 int error, vector; 222 223 KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); 224 vector = isrc->is_pic->pic_vector(isrc); 225 KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, 226 num_io_irqs)); 227 if (interrupt_sources[vector] != NULL) 228 return (EEXIST); 229 error = intr_event_create(&isrc->is_event, isrc, 0, vector, 230 intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, 231 (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", 232 vector); 233 if (error) 234 return (error); 235 sx_xlock(&intrsrc_lock); 236 if (interrupt_sources[vector] != NULL) { 237 sx_xunlock(&intrsrc_lock); 238 intr_event_destroy(isrc->is_event); 239 return (EEXIST); 240 } 241 intrcnt_register(isrc); 242 interrupt_sources[vector] = isrc; 243 isrc->is_handlers = 0; 244 sx_xunlock(&intrsrc_lock); 245 return (0); 246} 247 248struct intsrc * 249intr_lookup_source(int vector) 250{ 251 252 if (vector < 0 || vector >= num_io_irqs) 253 return (NULL); 254 return (interrupt_sources[vector]); 255} 256 257int 258intr_add_handler(struct intsrc *isrc, const char *name, driver_filter_t filter, 259 driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, 260 int domain) 261{ 262 int error; 263 264 error = intr_event_add_handler(isrc->is_event, name, filter, handler, 265 arg, intr_priority(flags), flags, cookiep); 266 if (error == 0) { 267 sx_xlock(&intrsrc_lock); 268 intrcnt_updatename(isrc); 269 isrc->is_handlers++; 270 if (isrc->is_handlers == 1) { 271 isrc->is_domain = domain; 272 isrc->is_pic->pic_enable_intr(isrc); 273 isrc->is_pic->pic_enable_source(isrc); 274 } 275 sx_xunlock(&intrsrc_lock); 276 } 277 return (error); 278} 279 280int 281intr_remove_handler(void *cookie) 282{ 283 struct intsrc *isrc; 284 int error; 285 286 isrc = intr_handler_source(cookie); 287 error = intr_event_remove_handler(cookie); 288 if (error == 0) { 289 sx_xlock(&intrsrc_lock); 290 isrc->is_handlers--; 291 if (isrc->is_handlers == 0) { 292 isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); 293 isrc->is_pic->pic_disable_intr(isrc); 294 } 295 intrcnt_updatename(isrc); 296 sx_xunlock(&intrsrc_lock); 297 } 298 return (error); 299} 300 301int 302intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, 303 enum intr_polarity pol) 304{ 305 306 return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); 307} 308 309static void 310intr_disable_src(void *arg) 311{ 312 struct intsrc *isrc; 313 314 isrc = arg; 315 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 316} 317 318void 319intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) 320{ 321 struct intr_event *ie; 322 int vector; 323 324 /* 325 * We count software interrupts when we process them. The 326 * code here follows previous practice, but there's an 327 * argument for counting hardware interrupts when they're 328 * processed too. 329 */ 330 (*isrc->is_count)++; 331 VM_CNT_INC(v_intr); 332 333 ie = isrc->is_event; 334 335 /* 336 * XXX: We assume that IRQ 0 is only used for the ISA timer 337 * device (clk). 338 */ 339 vector = isrc->is_pic->pic_vector(isrc); 340 if (vector == 0) 341 clkintr_pending = 1; 342 343 /* 344 * For stray interrupts, mask and EOI the source, bump the 345 * stray count, and log the condition. 346 */ 347 if (intr_event_handle(ie, frame) != 0) { 348 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 349 (*isrc->is_straycount)++; 350 if (*isrc->is_straycount < INTR_STRAY_LOG_MAX) 351 log(LOG_ERR, "stray irq%d\n", vector); 352 else if (*isrc->is_straycount == INTR_STRAY_LOG_MAX) 353 log(LOG_CRIT, 354 "too many stray irq %d's: not logging anymore\n", 355 vector); 356 } 357} 358 359void 360intr_resume(bool suspend_cancelled) 361{ 362 struct pic *pic; 363 364#ifndef DEV_ATPIC 365 atpic_reset(); 366#endif 367 mtx_lock(&intrpic_lock); 368 TAILQ_FOREACH(pic, &pics, pics) { 369 if (pic->pic_resume != NULL) 370 pic->pic_resume(pic, suspend_cancelled); 371 } 372 mtx_unlock(&intrpic_lock); 373} 374 375void 376intr_suspend(void) 377{ 378 struct pic *pic; 379 380 mtx_lock(&intrpic_lock); 381 TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { 382 if (pic->pic_suspend != NULL) 383 pic->pic_suspend(pic); 384 } 385 mtx_unlock(&intrpic_lock); 386} 387 388static int 389intr_assign_cpu(void *arg, int cpu) 390{ 391#ifdef SMP 392 struct intsrc *isrc; 393 int error; 394 395 MPASS(mp_ncpus == 1 || smp_started); 396 397 /* Nothing to do if there is only a single CPU. */ 398 if (mp_ncpus > 1 && cpu != NOCPU) { 399 isrc = arg; 400 sx_xlock(&intrsrc_lock); 401 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); 402 if (error == 0) 403 isrc->is_cpu = cpu; 404 sx_xunlock(&intrsrc_lock); 405 } else 406 error = 0; 407 return (error); 408#else 409 return (EOPNOTSUPP); 410#endif 411} 412 413static void 414intrcnt_setname(const char *name, int index) 415{ 416 417 snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s", 418 INTRNAME_LEN - 1, name); 419} 420 421static void 422intrcnt_updatename(struct intsrc *is) 423{ 424 425 intrcnt_setname(is->is_event->ie_fullname, is->is_index); 426} 427 428static void 429intrcnt_register(struct intsrc *is) 430{ 431 char straystr[INTRNAME_LEN]; 432 433 KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); 434 mtx_lock_spin(&intrcnt_lock); 435 MPASS(intrcnt_index + 2 <= nintrcnt); 436 is->is_index = intrcnt_index; 437 intrcnt_index += 2; 438 snprintf(straystr, sizeof(straystr), "stray irq%d", 439 is->is_pic->pic_vector(is)); 440 intrcnt_updatename(is); 441 is->is_count = &intrcnt[is->is_index]; 442 intrcnt_setname(straystr, is->is_index + 1); 443 is->is_straycount = &intrcnt[is->is_index + 1]; 444 mtx_unlock_spin(&intrcnt_lock); 445} 446 447void 448intrcnt_add(const char *name, u_long **countp) 449{ 450 451 mtx_lock_spin(&intrcnt_lock); 452 MPASS(intrcnt_index < nintrcnt); 453 *countp = &intrcnt[intrcnt_index]; 454 intrcnt_setname(name, intrcnt_index); 455 intrcnt_index++; 456 mtx_unlock_spin(&intrcnt_lock); 457} 458 459static void 460intr_init(void *dummy __unused) 461{ 462 463 TAILQ_INIT(&pics); 464 mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); 465 sx_init(&intrsrc_lock, "intrsrc"); 466 mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); 467} 468SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); 469 470static void 471intr_init_final(void *dummy __unused) 472{ 473 474 /* 475 * Enable interrupts on the BSP after all of the interrupt 476 * controllers are initialized. Device interrupts are still 477 * disabled in the interrupt controllers until interrupt 478 * handlers are registered. Interrupts are enabled on each AP 479 * after their first context switch. 480 */ 481 enable_intr(); 482} 483SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); 484 485#ifndef DEV_ATPIC 486/* Initialize the two 8259A's to a known-good shutdown state. */ 487void 488atpic_reset(void) 489{ 490 491 outb(IO_ICU1, ICW1_RESET | ICW1_IC4); 492 outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); 493 outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); 494 outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); 495 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); 496 outb(IO_ICU1, OCW3_SEL | OCW3_RR); 497 498 outb(IO_ICU2, ICW1_RESET | ICW1_IC4); 499 outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); 500 outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); 501 outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); 502 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); 503 outb(IO_ICU2, OCW3_SEL | OCW3_RR); 504} 505#endif 506 507/* Add a description to an active interrupt handler. */ 508int 509intr_describe(struct intsrc *isrc, void *ih, const char *descr) 510{ 511 int error; 512 513 error = intr_event_describe_handler(isrc->is_event, ih, descr); 514 if (error) 515 return (error); 516 intrcnt_updatename(isrc); 517 return (0); 518} 519 520void 521intr_reprogram(void) 522{ 523 struct intsrc *is; 524 u_int v; 525 526 sx_xlock(&intrsrc_lock); 527 for (v = 0; v < num_io_irqs; v++) { 528 is = interrupt_sources[v]; 529 if (is == NULL) 530 continue; 531 if (is->is_pic->pic_reprogram_pin != NULL) 532 is->is_pic->pic_reprogram_pin(is); 533 } 534 sx_xunlock(&intrsrc_lock); 535} 536 537#ifdef DDB 538/* 539 * Dump data about interrupt handlers 540 */ 541DB_SHOW_COMMAND(irqs, db_show_irqs) 542{ 543 struct intsrc **isrc; 544 u_int i; 545 int verbose; 546 547 if (strcmp(modif, "v") == 0) 548 verbose = 1; 549 else 550 verbose = 0; 551 isrc = interrupt_sources; 552 for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) 553 if (*isrc != NULL) 554 db_dump_intr_event((*isrc)->is_event, verbose); 555} 556#endif 557 558#ifdef SMP 559/* 560 * Support for balancing interrupt sources across CPUs. For now we just 561 * allocate CPUs round-robin. 562 * 563 * XXX If the system has a domain with without any usable CPUs (e.g., where all 564 * APIC IDs are 256 or greater and we do not have an IOMMU) we use 565 * intr_no_domain to fall back to assigning interrupts without regard for 566 * domain. Once we can rely on the presence of an IOMMU on all x86 platforms 567 * we can revert this. 568 */ 569 570cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); 571static int current_cpu[MAXMEMDOM]; 572static bool intr_no_domain; 573 574static void 575intr_init_cpus(void) 576{ 577 int i; 578 579 for (i = 0; i < vm_ndomains; i++) { 580 if (CPU_OVERLAP(&cpuset_domain[i], &intr_cpus) == 0) { 581 intr_no_domain = true; 582 printf("%s: unable to route interrupts to CPUs in domain %d\n", 583 __func__, i); 584 } 585 586 current_cpu[i] = 0; 587 if (intr_no_domain && i > 0) 588 continue; 589 if (!CPU_ISSET(current_cpu[i], &intr_cpus) || 590 !CPU_ISSET(current_cpu[i], &cpuset_domain[i])) 591 intr_next_cpu(i); 592 } 593} 594 595/* 596 * Return the CPU that the next interrupt source should use. For now 597 * this just returns the next local APIC according to round-robin. 598 */ 599u_int 600intr_next_cpu(int domain) 601{ 602 u_int apic_id; 603 604 MPASS(mp_ncpus == 1 || smp_started); 605 if (mp_ncpus == 1) 606 return (PCPU_GET(apic_id)); 607 608 if (intr_no_domain) 609 domain = 0; 610 mtx_lock_spin(&icu_lock); 611 apic_id = cpu_apic_ids[current_cpu[domain]]; 612 do { 613 current_cpu[domain]++; 614 if (current_cpu[domain] > mp_maxid) 615 current_cpu[domain] = 0; 616 } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) || 617 (!CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]) && 618 !intr_no_domain)); 619 mtx_unlock_spin(&icu_lock); 620 return (apic_id); 621} 622 623/* 624 * Add a CPU to our mask of valid CPUs that can be destinations of 625 * interrupts. 626 */ 627void 628intr_add_cpu(u_int cpu) 629{ 630 631 if (cpu >= MAXCPU) 632 panic("%s: Invalid CPU ID %u", __func__, cpu); 633 if (bootverbose) 634 printf("INTR: Adding local APIC %d as a target\n", 635 cpu_apic_ids[cpu]); 636 637 CPU_SET(cpu, &intr_cpus); 638} 639 640static void 641intr_smp_startup(void *arg __unused) 642{ 643 644 intr_init_cpus(); 645 return; 646} 647SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup, 648 NULL); 649 650/* 651 * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. 652 */ 653static int 654sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) 655{ 656 struct sbuf sbuf; 657 struct intsrc *isrc; 658 u_int i; 659 int error; 660 661 error = sysctl_wire_old_buffer(req, 0); 662 if (error != 0) 663 return (error); 664 665 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 666 sx_slock(&intrsrc_lock); 667 for (i = 0; i < num_io_irqs; i++) { 668 isrc = interrupt_sources[i]; 669 if (isrc == NULL) 670 continue; 671 sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n", 672 isrc->is_event->ie_fullname, 673 isrc->is_index, 674 isrc->is_cpu, 675 isrc->is_domain, 676 *isrc->is_count); 677 } 678 679 sx_sunlock(&intrsrc_lock); 680 error = sbuf_finish(&sbuf); 681 sbuf_delete(&sbuf); 682 return (error); 683} 684SYSCTL_PROC(_hw, OID_AUTO, intrs, 685 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 686 0, 0, sysctl_hw_intrs, "A", 687 "interrupt:number @cpu: count"); 688 689/* 690 * Compare two, possibly NULL, entries in the interrupt source array 691 * by load. 692 */ 693static int 694intrcmp(const void *one, const void *two) 695{ 696 const struct intsrc *i1, *i2; 697 698 i1 = *(const struct intsrc * const *)one; 699 i2 = *(const struct intsrc * const *)two; 700 if (i1 != NULL && i2 != NULL) 701 return (*i1->is_count - *i2->is_count); 702 if (i1 != NULL) 703 return (1); 704 if (i2 != NULL) 705 return (-1); 706 return (0); 707} 708 709/* 710 * Balance IRQs across available CPUs according to load. 711 */ 712static void 713intr_balance(void *dummy __unused, int pending __unused) 714{ 715 struct intsrc *isrc; 716 int interval; 717 u_int cpu; 718 int i; 719 720 interval = intrbalance; 721 if (interval == 0) 722 goto out; 723 724 /* 725 * Sort interrupts according to count. 726 */ 727 sx_xlock(&intrsrc_lock); 728 memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * 729 sizeof(interrupt_sorted[0])); 730 qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), 731 intrcmp); 732 733 /* 734 * Restart the scan from the same location to avoid moving in the 735 * common case. 736 */ 737 intr_init_cpus(); 738 739 /* 740 * Assign round-robin from most loaded to least. 741 */ 742 for (i = num_io_irqs - 1; i >= 0; i--) { 743 isrc = interrupt_sorted[i]; 744 if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) 745 continue; 746 cpu = current_cpu[isrc->is_domain]; 747 intr_next_cpu(isrc->is_domain); 748 if (isrc->is_cpu != cpu && 749 isrc->is_pic->pic_assign_cpu(isrc, 750 cpu_apic_ids[cpu]) == 0) 751 isrc->is_cpu = cpu; 752 } 753 sx_xunlock(&intrsrc_lock); 754out: 755 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, 756 interval ? hz * interval : hz * 60); 757 758} 759 760static void 761intr_balance_init(void *dummy __unused) 762{ 763 764 TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, 765 NULL); 766 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); 767} 768SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); 769 770#else 771/* 772 * Always route interrupts to the current processor in the UP case. 773 */ 774u_int 775intr_next_cpu(int domain) 776{ 777 778 return (PCPU_GET(apic_id)); 779} 780#endif 781