pci_emul.c revision 268887
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 268887 2014-07-19 20:13:01Z jhb $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 268887 2014-07-19 20:13:01Z jhb $"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/errno.h> 35 36#include <ctype.h> 37#include <pthread.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <strings.h> 42#include <assert.h> 43#include <stdbool.h> 44 45#include <machine/vmm.h> 46#include <vmmapi.h> 47 48#include "acpi.h" 49#include "bhyverun.h" 50#include "inout.h" 51#include "ioapic.h" 52#include "mem.h" 53#include "pci_emul.h" 54#include "pci_lpc.h" 55 56#define CONF1_ADDR_PORT 0x0cf8 57#define CONF1_DATA_PORT 0x0cfc 58 59#define CONF1_ENABLE 0x80000000ul 60 61#define CFGWRITE(pi,off,val,b) \ 62do { \ 63 if ((b) == 1) { \ 64 pci_set_cfgdata8((pi),(off),(val)); \ 65 } else if ((b) == 2) { \ 66 pci_set_cfgdata16((pi),(off),(val)); \ 67 } else { \ 68 pci_set_cfgdata32((pi),(off),(val)); \ 69 } \ 70} while (0) 71 72#define MAXBUSES (PCI_BUSMAX + 1) 73#define MAXSLOTS (PCI_SLOTMAX + 1) 74#define MAXFUNCS (PCI_FUNCMAX + 1) 75 76struct funcinfo { 77 char *fi_name; 78 char *fi_param; 79 struct pci_devinst *fi_devi; 80}; 81 82struct intxinfo { 83 int ii_count; 84 int ii_ioapic_irq; 85}; 86 87struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90}; 91 92struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97}; 98 99static struct businfo *pci_businfo[MAXBUSES]; 100 101SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103static uint64_t pci_emul_iobase; 104static uint64_t pci_emul_membase32; 105static uint64_t pci_emul_membase64; 106 107#define PCI_EMUL_IOBASE 0x2000 108#define PCI_EMUL_IOLIMIT 0x10000 109 110#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 111 112#define PCI_EMUL_MEMBASE64 0xD000000000UL 113#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 114 115static struct pci_devemu *pci_emul_finddev(char *name); 116static void pci_lintr_update(struct pci_devinst *pi); 117 118static struct mem_range pci_mem_hole; 119 120/* 121 * I/O access 122 */ 123 124/* 125 * Slot options are in the form: 126 * 127 * <bus>:<slot>:<func>,<emul>[,<config>] 128 * <slot>[:<func>],<emul>[,<config>] 129 * 130 * slot is 0..31 131 * func is 0..7 132 * emul is a string describing the type of PCI device e.g. virtio-net 133 * config is an optional string, depending on the device, that can be 134 * used for configuration. 135 * Examples are: 136 * 1,virtio-net,tap0 137 * 3:0,dummy 138 */ 139static void 140pci_parse_slot_usage(char *aopt) 141{ 142 143 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 144} 145 146int 147pci_parse_slot(char *opt) 148{ 149 struct businfo *bi; 150 struct slotinfo *si; 151 char *emul, *config, *str, *cp; 152 int error, bnum, snum, fnum; 153 154 error = -1; 155 str = strdup(opt); 156 157 emul = config = NULL; 158 if ((cp = strchr(str, ',')) != NULL) { 159 *cp = '\0'; 160 emul = cp + 1; 161 if ((cp = strchr(emul, ',')) != NULL) { 162 *cp = '\0'; 163 config = cp + 1; 164 } 165 } else { 166 pci_parse_slot_usage(opt); 167 goto done; 168 } 169 170 /* <bus>:<slot>:<func> */ 171 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 172 bnum = 0; 173 /* <slot>:<func> */ 174 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 175 fnum = 0; 176 /* <slot> */ 177 if (sscanf(str, "%d", &snum) != 1) { 178 snum = -1; 179 } 180 } 181 } 182 183 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 184 fnum < 0 || fnum >= MAXFUNCS) { 185 pci_parse_slot_usage(opt); 186 goto done; 187 } 188 189 if (pci_businfo[bnum] == NULL) 190 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 191 192 bi = pci_businfo[bnum]; 193 si = &bi->slotinfo[snum]; 194 195 if (si->si_funcs[fnum].fi_name != NULL) { 196 fprintf(stderr, "pci slot %d:%d already occupied!\n", 197 snum, fnum); 198 goto done; 199 } 200 201 if (pci_emul_finddev(emul) == NULL) { 202 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 203 snum, fnum, emul); 204 goto done; 205 } 206 207 error = 0; 208 si->si_funcs[fnum].fi_name = emul; 209 si->si_funcs[fnum].fi_param = config; 210 211done: 212 if (error) 213 free(str); 214 215 return (error); 216} 217 218static int 219pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 220{ 221 222 if (offset < pi->pi_msix.pba_offset) 223 return (0); 224 225 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 226 return (0); 227 } 228 229 return (1); 230} 231 232int 233pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 234 uint64_t value) 235{ 236 int msix_entry_offset; 237 int tab_index; 238 char *dest; 239 240 /* support only 4 or 8 byte writes */ 241 if (size != 4 && size != 8) 242 return (-1); 243 244 /* 245 * Return if table index is beyond what device supports 246 */ 247 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 248 if (tab_index >= pi->pi_msix.table_count) 249 return (-1); 250 251 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 252 253 /* support only aligned writes */ 254 if ((msix_entry_offset % size) != 0) 255 return (-1); 256 257 dest = (char *)(pi->pi_msix.table + tab_index); 258 dest += msix_entry_offset; 259 260 if (size == 4) 261 *((uint32_t *)dest) = value; 262 else 263 *((uint64_t *)dest) = value; 264 265 return (0); 266} 267 268uint64_t 269pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 270{ 271 char *dest; 272 int msix_entry_offset; 273 int tab_index; 274 uint64_t retval = ~0; 275 276 /* 277 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 278 * table but we also allow 1 byte access to accomodate reads from 279 * ddb. 280 */ 281 if (size != 1 && size != 4 && size != 8) 282 return (retval); 283 284 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 285 286 /* support only aligned reads */ 287 if ((msix_entry_offset % size) != 0) { 288 return (retval); 289 } 290 291 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 292 293 if (tab_index < pi->pi_msix.table_count) { 294 /* valid MSI-X Table access */ 295 dest = (char *)(pi->pi_msix.table + tab_index); 296 dest += msix_entry_offset; 297 298 if (size == 1) 299 retval = *((uint8_t *)dest); 300 else if (size == 4) 301 retval = *((uint32_t *)dest); 302 else 303 retval = *((uint64_t *)dest); 304 } else if (pci_valid_pba_offset(pi, offset)) { 305 /* return 0 for PBA access */ 306 retval = 0; 307 } 308 309 return (retval); 310} 311 312int 313pci_msix_table_bar(struct pci_devinst *pi) 314{ 315 316 if (pi->pi_msix.table != NULL) 317 return (pi->pi_msix.table_bar); 318 else 319 return (-1); 320} 321 322int 323pci_msix_pba_bar(struct pci_devinst *pi) 324{ 325 326 if (pi->pi_msix.table != NULL) 327 return (pi->pi_msix.pba_bar); 328 else 329 return (-1); 330} 331 332static int 333pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 334 uint32_t *eax, void *arg) 335{ 336 struct pci_devinst *pdi = arg; 337 struct pci_devemu *pe = pdi->pi_d; 338 uint64_t offset; 339 int i; 340 341 for (i = 0; i <= PCI_BARMAX; i++) { 342 if (pdi->pi_bar[i].type == PCIBAR_IO && 343 port >= pdi->pi_bar[i].addr && 344 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 345 offset = port - pdi->pi_bar[i].addr; 346 if (in) 347 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 348 offset, bytes); 349 else 350 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 351 bytes, *eax); 352 return (0); 353 } 354 } 355 return (-1); 356} 357 358static int 359pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 360 int size, uint64_t *val, void *arg1, long arg2) 361{ 362 struct pci_devinst *pdi = arg1; 363 struct pci_devemu *pe = pdi->pi_d; 364 uint64_t offset; 365 int bidx = (int) arg2; 366 367 assert(bidx <= PCI_BARMAX); 368 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 369 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 370 assert(addr >= pdi->pi_bar[bidx].addr && 371 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 372 373 offset = addr - pdi->pi_bar[bidx].addr; 374 375 if (dir == MEM_F_WRITE) { 376 if (size == 8) { 377 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 378 4, *val & 0xffffffff); 379 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 380 4, *val >> 32); 381 } else { 382 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 383 size, *val); 384 } 385 } else { 386 if (size == 8) { 387 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 388 offset, 4); 389 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 390 offset + 4, 4) << 32; 391 } else { 392 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 393 offset, size); 394 } 395 } 396 397 return (0); 398} 399 400 401static int 402pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 403 uint64_t *addr) 404{ 405 uint64_t base; 406 407 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 408 409 base = roundup2(*baseptr, size); 410 411 if (base + size <= limit) { 412 *addr = base; 413 *baseptr = base + size; 414 return (0); 415 } else 416 return (-1); 417} 418 419int 420pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 421 uint64_t size) 422{ 423 424 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 425} 426 427/* 428 * Register (or unregister) the MMIO or I/O region associated with the BAR 429 * register 'idx' of an emulated pci device. 430 */ 431static void 432modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 433{ 434 int error; 435 struct inout_port iop; 436 struct mem_range mr; 437 438 switch (pi->pi_bar[idx].type) { 439 case PCIBAR_IO: 440 bzero(&iop, sizeof(struct inout_port)); 441 iop.name = pi->pi_name; 442 iop.port = pi->pi_bar[idx].addr; 443 iop.size = pi->pi_bar[idx].size; 444 if (registration) { 445 iop.flags = IOPORT_F_INOUT; 446 iop.handler = pci_emul_io_handler; 447 iop.arg = pi; 448 error = register_inout(&iop); 449 } else 450 error = unregister_inout(&iop); 451 break; 452 case PCIBAR_MEM32: 453 case PCIBAR_MEM64: 454 bzero(&mr, sizeof(struct mem_range)); 455 mr.name = pi->pi_name; 456 mr.base = pi->pi_bar[idx].addr; 457 mr.size = pi->pi_bar[idx].size; 458 if (registration) { 459 mr.flags = MEM_F_RW; 460 mr.handler = pci_emul_mem_handler; 461 mr.arg1 = pi; 462 mr.arg2 = idx; 463 error = register_mem(&mr); 464 } else 465 error = unregister_mem(&mr); 466 break; 467 default: 468 error = EINVAL; 469 break; 470 } 471 assert(error == 0); 472} 473 474static void 475unregister_bar(struct pci_devinst *pi, int idx) 476{ 477 478 modify_bar_registration(pi, idx, 0); 479} 480 481static void 482register_bar(struct pci_devinst *pi, int idx) 483{ 484 485 modify_bar_registration(pi, idx, 1); 486} 487 488/* Are we decoding i/o port accesses for the emulated pci device? */ 489static int 490porten(struct pci_devinst *pi) 491{ 492 uint16_t cmd; 493 494 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 495 496 return (cmd & PCIM_CMD_PORTEN); 497} 498 499/* Are we decoding memory accesses for the emulated pci device? */ 500static int 501memen(struct pci_devinst *pi) 502{ 503 uint16_t cmd; 504 505 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 506 507 return (cmd & PCIM_CMD_MEMEN); 508} 509 510/* 511 * Update the MMIO or I/O address that is decoded by the BAR register. 512 * 513 * If the pci device has enabled the address space decoding then intercept 514 * the address range decoded by the BAR register. 515 */ 516static void 517update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 518{ 519 int decode; 520 521 if (pi->pi_bar[idx].type == PCIBAR_IO) 522 decode = porten(pi); 523 else 524 decode = memen(pi); 525 526 if (decode) 527 unregister_bar(pi, idx); 528 529 switch (type) { 530 case PCIBAR_IO: 531 case PCIBAR_MEM32: 532 pi->pi_bar[idx].addr = addr; 533 break; 534 case PCIBAR_MEM64: 535 pi->pi_bar[idx].addr &= ~0xffffffffUL; 536 pi->pi_bar[idx].addr |= addr; 537 break; 538 case PCIBAR_MEMHI64: 539 pi->pi_bar[idx].addr &= 0xffffffff; 540 pi->pi_bar[idx].addr |= addr; 541 break; 542 default: 543 assert(0); 544 } 545 546 if (decode) 547 register_bar(pi, idx); 548} 549 550int 551pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 552 enum pcibar_type type, uint64_t size) 553{ 554 int error; 555 uint64_t *baseptr, limit, addr, mask, lobits, bar; 556 557 assert(idx >= 0 && idx <= PCI_BARMAX); 558 559 if ((size & (size - 1)) != 0) 560 size = 1UL << flsl(size); /* round up to a power of 2 */ 561 562 /* Enforce minimum BAR sizes required by the PCI standard */ 563 if (type == PCIBAR_IO) { 564 if (size < 4) 565 size = 4; 566 } else { 567 if (size < 16) 568 size = 16; 569 } 570 571 switch (type) { 572 case PCIBAR_NONE: 573 baseptr = NULL; 574 addr = mask = lobits = 0; 575 break; 576 case PCIBAR_IO: 577 baseptr = &pci_emul_iobase; 578 limit = PCI_EMUL_IOLIMIT; 579 mask = PCIM_BAR_IO_BASE; 580 lobits = PCIM_BAR_IO_SPACE; 581 break; 582 case PCIBAR_MEM64: 583 /* 584 * XXX 585 * Some drivers do not work well if the 64-bit BAR is allocated 586 * above 4GB. Allow for this by allocating small requests under 587 * 4GB unless then allocation size is larger than some arbitrary 588 * number (32MB currently). 589 */ 590 if (size > 32 * 1024 * 1024) { 591 /* 592 * XXX special case for device requiring peer-peer DMA 593 */ 594 if (size == 0x100000000UL) 595 baseptr = &hostbase; 596 else 597 baseptr = &pci_emul_membase64; 598 limit = PCI_EMUL_MEMLIMIT64; 599 mask = PCIM_BAR_MEM_BASE; 600 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 601 PCIM_BAR_MEM_PREFETCH; 602 break; 603 } else { 604 baseptr = &pci_emul_membase32; 605 limit = PCI_EMUL_MEMLIMIT32; 606 mask = PCIM_BAR_MEM_BASE; 607 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 608 } 609 break; 610 case PCIBAR_MEM32: 611 baseptr = &pci_emul_membase32; 612 limit = PCI_EMUL_MEMLIMIT32; 613 mask = PCIM_BAR_MEM_BASE; 614 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 615 break; 616 default: 617 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 618 assert(0); 619 } 620 621 if (baseptr != NULL) { 622 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 623 if (error != 0) 624 return (error); 625 } 626 627 pdi->pi_bar[idx].type = type; 628 pdi->pi_bar[idx].addr = addr; 629 pdi->pi_bar[idx].size = size; 630 631 /* Initialize the BAR register in config space */ 632 bar = (addr & mask) | lobits; 633 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 634 635 if (type == PCIBAR_MEM64) { 636 assert(idx + 1 <= PCI_BARMAX); 637 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 638 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 639 } 640 641 register_bar(pdi, idx); 642 643 return (0); 644} 645 646#define CAP_START_OFFSET 0x40 647static int 648pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 649{ 650 int i, capoff, reallen; 651 uint16_t sts; 652 653 assert(caplen > 0); 654 655 reallen = roundup2(caplen, 4); /* dword aligned */ 656 657 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 658 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 659 capoff = CAP_START_OFFSET; 660 else 661 capoff = pi->pi_capend + 1; 662 663 /* Check if we have enough space */ 664 if (capoff + reallen > PCI_REGMAX + 1) 665 return (-1); 666 667 /* Set the previous capability pointer */ 668 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 669 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 670 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 671 } else 672 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 673 674 /* Copy the capability */ 675 for (i = 0; i < caplen; i++) 676 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 677 678 /* Set the next capability pointer */ 679 pci_set_cfgdata8(pi, capoff + 1, 0); 680 681 pi->pi_prevcap = capoff; 682 pi->pi_capend = capoff + reallen - 1; 683 return (0); 684} 685 686static struct pci_devemu * 687pci_emul_finddev(char *name) 688{ 689 struct pci_devemu **pdpp, *pdp; 690 691 SET_FOREACH(pdpp, pci_devemu_set) { 692 pdp = *pdpp; 693 if (!strcmp(pdp->pe_emu, name)) { 694 return (pdp); 695 } 696 } 697 698 return (NULL); 699} 700 701static int 702pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 703 int func, struct funcinfo *fi) 704{ 705 struct pci_devinst *pdi; 706 int err; 707 708 pdi = malloc(sizeof(struct pci_devinst)); 709 bzero(pdi, sizeof(*pdi)); 710 711 pdi->pi_vmctx = ctx; 712 pdi->pi_bus = bus; 713 pdi->pi_slot = slot; 714 pdi->pi_func = func; 715 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 716 pdi->pi_lintr.pin = 0; 717 pdi->pi_lintr.state = IDLE; 718 pdi->pi_lintr.ioapic_irq = 0; 719 pdi->pi_d = pde; 720 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 721 722 /* Disable legacy interrupts */ 723 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 724 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 725 726 pci_set_cfgdata8(pdi, PCIR_COMMAND, 727 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 728 729 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 730 if (err == 0) 731 fi->fi_devi = pdi; 732 else 733 free(pdi); 734 735 return (err); 736} 737 738void 739pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 740{ 741 int mmc; 742 743 CTASSERT(sizeof(struct msicap) == 14); 744 745 /* Number of msi messages must be a power of 2 between 1 and 32 */ 746 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 747 mmc = ffs(msgnum) - 1; 748 749 bzero(msicap, sizeof(struct msicap)); 750 msicap->capid = PCIY_MSI; 751 msicap->nextptr = nextptr; 752 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 753} 754 755int 756pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 757{ 758 struct msicap msicap; 759 760 pci_populate_msicap(&msicap, msgnum, 0); 761 762 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 763} 764 765static void 766pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 767 uint32_t msix_tab_size) 768{ 769 CTASSERT(sizeof(struct msixcap) == 12); 770 771 assert(msix_tab_size % 4096 == 0); 772 773 bzero(msixcap, sizeof(struct msixcap)); 774 msixcap->capid = PCIY_MSIX; 775 776 /* 777 * Message Control Register, all fields set to 778 * zero except for the Table Size. 779 * Note: Table size N is encoded as N-1 780 */ 781 msixcap->msgctrl = msgnum - 1; 782 783 /* 784 * MSI-X BAR setup: 785 * - MSI-X table start at offset 0 786 * - PBA table starts at a 4K aligned offset after the MSI-X table 787 */ 788 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 789 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 790} 791 792static void 793pci_msix_table_init(struct pci_devinst *pi, int table_entries) 794{ 795 int i, table_size; 796 797 assert(table_entries > 0); 798 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 799 800 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 801 pi->pi_msix.table = malloc(table_size); 802 bzero(pi->pi_msix.table, table_size); 803 804 /* set mask bit of vector control register */ 805 for (i = 0; i < table_entries; i++) 806 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 807} 808 809int 810pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 811{ 812 uint32_t tab_size; 813 struct msixcap msixcap; 814 815 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 816 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 817 818 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 819 820 /* Align table size to nearest 4K */ 821 tab_size = roundup2(tab_size, 4096); 822 823 pi->pi_msix.table_bar = barnum; 824 pi->pi_msix.pba_bar = barnum; 825 pi->pi_msix.table_offset = 0; 826 pi->pi_msix.table_count = msgnum; 827 pi->pi_msix.pba_offset = tab_size; 828 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 829 830 pci_msix_table_init(pi, msgnum); 831 832 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 833 834 /* allocate memory for MSI-X Table and PBA */ 835 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 836 tab_size + pi->pi_msix.pba_size); 837 838 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 839 sizeof(msixcap))); 840} 841 842void 843msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 844 int bytes, uint32_t val) 845{ 846 uint16_t msgctrl, rwmask; 847 int off, table_bar; 848 849 off = offset - capoff; 850 table_bar = pi->pi_msix.table_bar; 851 /* Message Control Register */ 852 if (off == 2 && bytes == 2) { 853 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 854 msgctrl = pci_get_cfgdata16(pi, offset); 855 msgctrl &= ~rwmask; 856 msgctrl |= val & rwmask; 857 val = msgctrl; 858 859 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 860 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 861 pci_lintr_update(pi); 862 } 863 864 CFGWRITE(pi, offset, val, bytes); 865} 866 867void 868msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 869 int bytes, uint32_t val) 870{ 871 uint16_t msgctrl, rwmask, msgdata, mme; 872 uint32_t addrlo; 873 874 /* 875 * If guest is writing to the message control register make sure 876 * we do not overwrite read-only fields. 877 */ 878 if ((offset - capoff) == 2 && bytes == 2) { 879 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 880 msgctrl = pci_get_cfgdata16(pi, offset); 881 msgctrl &= ~rwmask; 882 msgctrl |= val & rwmask; 883 val = msgctrl; 884 885 addrlo = pci_get_cfgdata32(pi, capoff + 4); 886 if (msgctrl & PCIM_MSICTRL_64BIT) 887 msgdata = pci_get_cfgdata16(pi, capoff + 12); 888 else 889 msgdata = pci_get_cfgdata16(pi, capoff + 8); 890 891 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 892 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 893 if (pi->pi_msi.enabled) { 894 pi->pi_msi.addr = addrlo; 895 pi->pi_msi.msg_data = msgdata; 896 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 897 } else { 898 pi->pi_msi.maxmsgnum = 0; 899 } 900 pci_lintr_update(pi); 901 } 902 903 CFGWRITE(pi, offset, val, bytes); 904} 905 906void 907pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 908 int bytes, uint32_t val) 909{ 910 911 /* XXX don't write to the readonly parts */ 912 CFGWRITE(pi, offset, val, bytes); 913} 914 915#define PCIECAP_VERSION 0x2 916int 917pci_emul_add_pciecap(struct pci_devinst *pi, int type) 918{ 919 int err; 920 struct pciecap pciecap; 921 922 CTASSERT(sizeof(struct pciecap) == 60); 923 924 if (type != PCIEM_TYPE_ROOT_PORT) 925 return (-1); 926 927 bzero(&pciecap, sizeof(pciecap)); 928 929 pciecap.capid = PCIY_EXPRESS; 930 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 931 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 932 pciecap.link_status = 0x11; /* gen1, x1 */ 933 934 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 935 return (err); 936} 937 938/* 939 * This function assumes that 'coff' is in the capabilities region of the 940 * config space. 941 */ 942static void 943pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 944{ 945 int capid; 946 uint8_t capoff, nextoff; 947 948 /* Do not allow un-aligned writes */ 949 if ((offset & (bytes - 1)) != 0) 950 return; 951 952 /* Find the capability that we want to update */ 953 capoff = CAP_START_OFFSET; 954 while (1) { 955 nextoff = pci_get_cfgdata8(pi, capoff + 1); 956 if (nextoff == 0) 957 break; 958 if (offset >= capoff && offset < nextoff) 959 break; 960 961 capoff = nextoff; 962 } 963 assert(offset >= capoff); 964 965 /* 966 * Capability ID and Next Capability Pointer are readonly. 967 * However, some o/s's do 4-byte writes that include these. 968 * For this case, trim the write back to 2 bytes and adjust 969 * the data. 970 */ 971 if (offset == capoff || offset == capoff + 1) { 972 if (offset == capoff && bytes == 4) { 973 bytes = 2; 974 offset += 2; 975 val >>= 16; 976 } else 977 return; 978 } 979 980 capid = pci_get_cfgdata8(pi, capoff); 981 switch (capid) { 982 case PCIY_MSI: 983 msicap_cfgwrite(pi, capoff, offset, bytes, val); 984 break; 985 case PCIY_MSIX: 986 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 987 break; 988 case PCIY_EXPRESS: 989 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 990 break; 991 default: 992 break; 993 } 994} 995 996static int 997pci_emul_iscap(struct pci_devinst *pi, int offset) 998{ 999 uint16_t sts; 1000 1001 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1002 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1003 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1004 return (1); 1005 } 1006 return (0); 1007} 1008 1009static int 1010pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1011 int size, uint64_t *val, void *arg1, long arg2) 1012{ 1013 /* 1014 * Ignore writes; return 0xff's for reads. The mem read code 1015 * will take care of truncating to the correct size. 1016 */ 1017 if (dir == MEM_F_READ) { 1018 *val = 0xffffffffffffffff; 1019 } 1020 1021 return (0); 1022} 1023 1024#define BUSIO_ROUNDUP 32 1025#define BUSMEM_ROUNDUP (1024 * 1024) 1026 1027int 1028init_pci(struct vmctx *ctx) 1029{ 1030 struct pci_devemu *pde; 1031 struct businfo *bi; 1032 struct slotinfo *si; 1033 struct funcinfo *fi; 1034 size_t lowmem; 1035 int bus, slot, func; 1036 int error; 1037 1038 pci_emul_iobase = PCI_EMUL_IOBASE; 1039 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1040 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1041 1042 for (bus = 0; bus < MAXBUSES; bus++) { 1043 if ((bi = pci_businfo[bus]) == NULL) 1044 continue; 1045 /* 1046 * Keep track of the i/o and memory resources allocated to 1047 * this bus. 1048 */ 1049 bi->iobase = pci_emul_iobase; 1050 bi->membase32 = pci_emul_membase32; 1051 bi->membase64 = pci_emul_membase64; 1052 1053 for (slot = 0; slot < MAXSLOTS; slot++) { 1054 si = &bi->slotinfo[slot]; 1055 for (func = 0; func < MAXFUNCS; func++) { 1056 fi = &si->si_funcs[func]; 1057 if (fi->fi_name == NULL) 1058 continue; 1059 pde = pci_emul_finddev(fi->fi_name); 1060 assert(pde != NULL); 1061 error = pci_emul_init(ctx, pde, bus, slot, 1062 func, fi); 1063 if (error) 1064 return (error); 1065 } 1066 } 1067 1068 /* 1069 * Add some slop to the I/O and memory resources decoded by 1070 * this bus to give a guest some flexibility if it wants to 1071 * reprogram the BARs. 1072 */ 1073 pci_emul_iobase += BUSIO_ROUNDUP; 1074 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1075 bi->iolimit = pci_emul_iobase; 1076 1077 pci_emul_membase32 += BUSMEM_ROUNDUP; 1078 pci_emul_membase32 = roundup2(pci_emul_membase32, 1079 BUSMEM_ROUNDUP); 1080 bi->memlimit32 = pci_emul_membase32; 1081 1082 pci_emul_membase64 += BUSMEM_ROUNDUP; 1083 pci_emul_membase64 = roundup2(pci_emul_membase64, 1084 BUSMEM_ROUNDUP); 1085 bi->memlimit64 = pci_emul_membase64; 1086 } 1087 1088 /* 1089 * The guest physical memory map looks like the following: 1090 * [0, lowmem) guest system memory 1091 * [lowmem, lowmem_limit) memory hole (may be absent) 1092 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1093 * [4GB, 4GB + highmem) 1094 * 1095 * Accesses to memory addresses that are not allocated to system 1096 * memory or PCI devices return 0xff's. 1097 */ 1098 error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); 1099 assert(error == 0); 1100 1101 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1102 pci_mem_hole.name = "PCI hole"; 1103 pci_mem_hole.flags = MEM_F_RW; 1104 pci_mem_hole.base = lowmem; 1105 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1106 pci_mem_hole.handler = pci_emul_fallback_handler; 1107 1108 error = register_mem_fallback(&pci_mem_hole); 1109 assert(error == 0); 1110 1111 return (0); 1112} 1113 1114static void 1115pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) 1116{ 1117 int *count; 1118 1119 count = arg; 1120 dsdt_line(" Package (0x04)"); 1121 dsdt_line(" {"); 1122 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1123 dsdt_line(" 0x%02X,", pin - 1); 1124 dsdt_line(" Zero,"); 1125 dsdt_line(" 0x%X", ioapic_irq); 1126 dsdt_line(" }%s", *count == 1 ? "" : ","); 1127 (*count)--; 1128} 1129 1130/* 1131 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1132 * corresponding to each PCI bus. 1133 */ 1134static void 1135pci_bus_write_dsdt(int bus) 1136{ 1137 struct businfo *bi; 1138 struct slotinfo *si; 1139 struct pci_devinst *pi; 1140 int count, slot, func; 1141 1142 /* 1143 * If there are no devices on this 'bus' then just return. 1144 */ 1145 if ((bi = pci_businfo[bus]) == NULL) { 1146 /* 1147 * Bus 0 is special because it decodes the I/O ports used 1148 * for PCI config space access even if there are no devices 1149 * on it. 1150 */ 1151 if (bus != 0) 1152 return; 1153 } 1154 1155 dsdt_indent(1); 1156 dsdt_line("Scope (_SB)"); 1157 dsdt_line("{"); 1158 dsdt_line(" Device (PC%02X)", bus); 1159 dsdt_line(" {"); 1160 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1161 dsdt_line(" Name (_ADR, Zero)"); 1162 1163 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1164 dsdt_line(" {"); 1165 dsdt_line(" Return (0x%08X)", bus); 1166 dsdt_line(" }"); 1167 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1168 dsdt_line(" {"); 1169 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1170 "MaxFixed, PosDecode,"); 1171 dsdt_line(" 0x0000, // Granularity"); 1172 dsdt_line(" 0x%04X, // Range Minimum", bus); 1173 dsdt_line(" 0x%04X, // Range Maximum", bus); 1174 dsdt_line(" 0x0000, // Translation Offset"); 1175 dsdt_line(" 0x0001, // Length"); 1176 dsdt_line(" ,, )"); 1177 1178 if (bus == 0) { 1179 dsdt_indent(3); 1180 dsdt_fixed_ioport(0xCF8, 8); 1181 dsdt_unindent(3); 1182 1183 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1184 "PosDecode, EntireRange,"); 1185 dsdt_line(" 0x0000, // Granularity"); 1186 dsdt_line(" 0x0000, // Range Minimum"); 1187 dsdt_line(" 0x0CF7, // Range Maximum"); 1188 dsdt_line(" 0x0000, // Translation Offset"); 1189 dsdt_line(" 0x0CF8, // Length"); 1190 dsdt_line(" ,, , TypeStatic)"); 1191 1192 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1193 "PosDecode, EntireRange,"); 1194 dsdt_line(" 0x0000, // Granularity"); 1195 dsdt_line(" 0x0D00, // Range Minimum"); 1196 dsdt_line(" 0x%04X, // Range Maximum", 1197 PCI_EMUL_IOBASE - 1); 1198 dsdt_line(" 0x0000, // Translation Offset"); 1199 dsdt_line(" 0x%04X, // Length", 1200 PCI_EMUL_IOBASE - 0x0D00); 1201 dsdt_line(" ,, , TypeStatic)"); 1202 1203 if (bi == NULL) { 1204 dsdt_line(" })"); 1205 goto done; 1206 } 1207 } 1208 assert(bi != NULL); 1209 1210 /* i/o window */ 1211 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1212 "PosDecode, EntireRange,"); 1213 dsdt_line(" 0x0000, // Granularity"); 1214 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1215 dsdt_line(" 0x%04X, // Range Maximum", 1216 bi->iolimit - 1); 1217 dsdt_line(" 0x0000, // Translation Offset"); 1218 dsdt_line(" 0x%04X, // Length", 1219 bi->iolimit - bi->iobase); 1220 dsdt_line(" ,, , TypeStatic)"); 1221 1222 /* mmio window (32-bit) */ 1223 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1224 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1225 dsdt_line(" 0x00000000, // Granularity"); 1226 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1227 dsdt_line(" 0x%08X, // Range Maximum\n", 1228 bi->memlimit32 - 1); 1229 dsdt_line(" 0x00000000, // Translation Offset"); 1230 dsdt_line(" 0x%08X, // Length\n", 1231 bi->memlimit32 - bi->membase32); 1232 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1233 1234 /* mmio window (64-bit) */ 1235 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1236 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1237 dsdt_line(" 0x0000000000000000, // Granularity"); 1238 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1239 dsdt_line(" 0x%016lX, // Range Maximum\n", 1240 bi->memlimit64 - 1); 1241 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1242 dsdt_line(" 0x%016lX, // Length\n", 1243 bi->memlimit64 - bi->membase64); 1244 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1245 dsdt_line(" })"); 1246 1247 count = pci_count_lintr(bus); 1248 if (count != 0) { 1249 dsdt_indent(2); 1250 dsdt_line("Name (_PRT, Package (0x%02X)", count); 1251 dsdt_line("{"); 1252 pci_walk_lintr(bus, pci_prt_entry, &count); 1253 dsdt_line("})"); 1254 dsdt_unindent(2); 1255 } 1256 1257 dsdt_indent(2); 1258 for (slot = 0; slot < MAXSLOTS; slot++) { 1259 si = &bi->slotinfo[slot]; 1260 for (func = 0; func < MAXFUNCS; func++) { 1261 pi = si->si_funcs[func].fi_devi; 1262 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1263 pi->pi_d->pe_write_dsdt(pi); 1264 } 1265 } 1266 dsdt_unindent(2); 1267done: 1268 dsdt_line(" }"); 1269 dsdt_line("}"); 1270 dsdt_unindent(1); 1271} 1272 1273void 1274pci_write_dsdt(void) 1275{ 1276 int bus; 1277 1278 for (bus = 0; bus < MAXBUSES; bus++) 1279 pci_bus_write_dsdt(bus); 1280} 1281 1282int 1283pci_bus_configured(int bus) 1284{ 1285 assert(bus >= 0 && bus < MAXBUSES); 1286 return (pci_businfo[bus] != NULL); 1287} 1288 1289int 1290pci_msi_enabled(struct pci_devinst *pi) 1291{ 1292 return (pi->pi_msi.enabled); 1293} 1294 1295int 1296pci_msi_maxmsgnum(struct pci_devinst *pi) 1297{ 1298 if (pi->pi_msi.enabled) 1299 return (pi->pi_msi.maxmsgnum); 1300 else 1301 return (0); 1302} 1303 1304int 1305pci_msix_enabled(struct pci_devinst *pi) 1306{ 1307 1308 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1309} 1310 1311void 1312pci_generate_msix(struct pci_devinst *pi, int index) 1313{ 1314 struct msix_table_entry *mte; 1315 1316 if (!pci_msix_enabled(pi)) 1317 return; 1318 1319 if (pi->pi_msix.function_mask) 1320 return; 1321 1322 if (index >= pi->pi_msix.table_count) 1323 return; 1324 1325 mte = &pi->pi_msix.table[index]; 1326 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1327 /* XXX Set PBA bit if interrupt is disabled */ 1328 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1329 } 1330} 1331 1332void 1333pci_generate_msi(struct pci_devinst *pi, int index) 1334{ 1335 1336 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1337 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1338 pi->pi_msi.msg_data + index); 1339 } 1340} 1341 1342static bool 1343pci_lintr_permitted(struct pci_devinst *pi) 1344{ 1345 uint16_t cmd; 1346 1347 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1348 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1349 (cmd & PCIM_CMD_INTxDIS))); 1350} 1351 1352int 1353pci_lintr_request(struct pci_devinst *pi) 1354{ 1355 struct businfo *bi; 1356 struct slotinfo *si; 1357 int bestpin, bestcount, irq, pin; 1358 1359 bi = pci_businfo[pi->pi_bus]; 1360 assert(bi != NULL); 1361 1362 /* 1363 * First, allocate a pin from our slot. 1364 */ 1365 si = &bi->slotinfo[pi->pi_slot]; 1366 bestpin = 0; 1367 bestcount = si->si_intpins[0].ii_count; 1368 for (pin = 1; pin < 4; pin++) { 1369 if (si->si_intpins[pin].ii_count < bestcount) { 1370 bestpin = pin; 1371 bestcount = si->si_intpins[pin].ii_count; 1372 } 1373 } 1374 1375 /* 1376 * Attempt to allocate an I/O APIC pin for this intpin. If 1377 * 8259A support is added we will need a separate field to 1378 * assign the intpin to an input pin on the PCI interrupt 1379 * router. 1380 */ 1381 if (si->si_intpins[bestpin].ii_count == 0) { 1382 irq = ioapic_pci_alloc_irq(); 1383 if (irq < 0) 1384 return (-1); 1385 si->si_intpins[bestpin].ii_ioapic_irq = irq; 1386 } else 1387 irq = si->si_intpins[bestpin].ii_ioapic_irq; 1388 si->si_intpins[bestpin].ii_count++; 1389 1390 pi->pi_lintr.pin = bestpin + 1; 1391 pi->pi_lintr.ioapic_irq = irq; 1392 pci_set_cfgdata8(pi, PCIR_INTLINE, irq); 1393 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1394 return (0); 1395} 1396 1397void 1398pci_lintr_assert(struct pci_devinst *pi) 1399{ 1400 1401 assert(pi->pi_lintr.pin > 0); 1402 1403 pthread_mutex_lock(&pi->pi_lintr.lock); 1404 if (pi->pi_lintr.state == IDLE) { 1405 if (pci_lintr_permitted(pi)) { 1406 pi->pi_lintr.state = ASSERTED; 1407 vm_ioapic_assert_irq(pi->pi_vmctx, 1408 pi->pi_lintr.ioapic_irq); 1409 } else 1410 pi->pi_lintr.state = PENDING; 1411 } 1412 pthread_mutex_unlock(&pi->pi_lintr.lock); 1413} 1414 1415void 1416pci_lintr_deassert(struct pci_devinst *pi) 1417{ 1418 1419 assert(pi->pi_lintr.pin > 0); 1420 1421 pthread_mutex_lock(&pi->pi_lintr.lock); 1422 if (pi->pi_lintr.state == ASSERTED) { 1423 pi->pi_lintr.state = IDLE; 1424 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1425 } else if (pi->pi_lintr.state == PENDING) 1426 pi->pi_lintr.state = IDLE; 1427 pthread_mutex_unlock(&pi->pi_lintr.lock); 1428} 1429 1430static void 1431pci_lintr_update(struct pci_devinst *pi) 1432{ 1433 1434 pthread_mutex_lock(&pi->pi_lintr.lock); 1435 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1436 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1437 pi->pi_lintr.state = PENDING; 1438 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1439 pi->pi_lintr.state = ASSERTED; 1440 vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1441 } 1442 pthread_mutex_unlock(&pi->pi_lintr.lock); 1443} 1444 1445int 1446pci_count_lintr(int bus) 1447{ 1448 int count, slot, pin; 1449 struct slotinfo *slotinfo; 1450 1451 count = 0; 1452 if (pci_businfo[bus] != NULL) { 1453 for (slot = 0; slot < MAXSLOTS; slot++) { 1454 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1455 for (pin = 0; pin < 4; pin++) { 1456 if (slotinfo->si_intpins[pin].ii_count != 0) 1457 count++; 1458 } 1459 } 1460 } 1461 return (count); 1462} 1463 1464void 1465pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1466{ 1467 struct businfo *bi; 1468 struct slotinfo *si; 1469 struct intxinfo *ii; 1470 int slot, pin; 1471 1472 if ((bi = pci_businfo[bus]) == NULL) 1473 return; 1474 1475 for (slot = 0; slot < MAXSLOTS; slot++) { 1476 si = &bi->slotinfo[slot]; 1477 for (pin = 0; pin < 4; pin++) { 1478 ii = &si->si_intpins[pin]; 1479 if (ii->ii_count != 0) 1480 cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); 1481 } 1482 } 1483} 1484 1485/* 1486 * Return 1 if the emulated device in 'slot' is a multi-function device. 1487 * Return 0 otherwise. 1488 */ 1489static int 1490pci_emul_is_mfdev(int bus, int slot) 1491{ 1492 struct businfo *bi; 1493 struct slotinfo *si; 1494 int f, numfuncs; 1495 1496 numfuncs = 0; 1497 if ((bi = pci_businfo[bus]) != NULL) { 1498 si = &bi->slotinfo[slot]; 1499 for (f = 0; f < MAXFUNCS; f++) { 1500 if (si->si_funcs[f].fi_devi != NULL) { 1501 numfuncs++; 1502 } 1503 } 1504 } 1505 return (numfuncs > 1); 1506} 1507 1508/* 1509 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1510 * whether or not is a multi-function being emulated in the pci 'slot'. 1511 */ 1512static void 1513pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1514{ 1515 int mfdev; 1516 1517 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1518 mfdev = pci_emul_is_mfdev(bus, slot); 1519 switch (bytes) { 1520 case 1: 1521 case 2: 1522 *rv &= ~PCIM_MFDEV; 1523 if (mfdev) { 1524 *rv |= PCIM_MFDEV; 1525 } 1526 break; 1527 case 4: 1528 *rv &= ~(PCIM_MFDEV << 16); 1529 if (mfdev) { 1530 *rv |= (PCIM_MFDEV << 16); 1531 } 1532 break; 1533 } 1534 } 1535} 1536 1537static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1538 1539static int 1540pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1541 uint32_t *eax, void *arg) 1542{ 1543 uint32_t x; 1544 1545 if (bytes != 4) { 1546 if (in) 1547 *eax = (bytes == 2) ? 0xffff : 0xff; 1548 return (0); 1549 } 1550 1551 if (in) { 1552 x = (cfgbus << 16) | 1553 (cfgslot << 11) | 1554 (cfgfunc << 8) | 1555 cfgoff; 1556 if (cfgenable) 1557 x |= CONF1_ENABLE; 1558 *eax = x; 1559 } else { 1560 x = *eax; 1561 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1562 cfgoff = x & PCI_REGMAX; 1563 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1564 cfgslot = (x >> 11) & PCI_SLOTMAX; 1565 cfgbus = (x >> 16) & PCI_BUSMAX; 1566 } 1567 1568 return (0); 1569} 1570INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1571 1572static uint32_t 1573bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1574{ 1575 1576 return ((old ^ new) & mask); 1577} 1578 1579static void 1580pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1581{ 1582 int i; 1583 uint16_t old; 1584 1585 /* 1586 * The command register is at an offset of 4 bytes and thus the 1587 * guest could write 1, 2 or 4 bytes starting at this offset. 1588 */ 1589 1590 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1591 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1592 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1593 1594 /* 1595 * If the MMIO or I/O address space decoding has changed then 1596 * register/unregister all BARs that decode that address space. 1597 */ 1598 for (i = 0; i <= PCI_BARMAX; i++) { 1599 switch (pi->pi_bar[i].type) { 1600 case PCIBAR_NONE: 1601 case PCIBAR_MEMHI64: 1602 break; 1603 case PCIBAR_IO: 1604 /* I/O address space decoding changed? */ 1605 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1606 if (porten(pi)) 1607 register_bar(pi, i); 1608 else 1609 unregister_bar(pi, i); 1610 } 1611 break; 1612 case PCIBAR_MEM32: 1613 case PCIBAR_MEM64: 1614 /* MMIO address space decoding changed? */ 1615 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1616 if (memen(pi)) 1617 register_bar(pi, i); 1618 else 1619 unregister_bar(pi, i); 1620 } 1621 break; 1622 default: 1623 assert(0); 1624 } 1625 } 1626 1627 /* 1628 * If INTx has been unmasked and is pending, assert the 1629 * interrupt. 1630 */ 1631 pci_lintr_update(pi); 1632} 1633 1634static int 1635pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1636 uint32_t *eax, void *arg) 1637{ 1638 struct businfo *bi; 1639 struct slotinfo *si; 1640 struct pci_devinst *pi; 1641 struct pci_devemu *pe; 1642 int coff, idx, needcfg; 1643 uint64_t addr, bar, mask; 1644 1645 assert(bytes == 1 || bytes == 2 || bytes == 4); 1646 1647 if ((bi = pci_businfo[cfgbus]) != NULL) { 1648 si = &bi->slotinfo[cfgslot]; 1649 pi = si->si_funcs[cfgfunc].fi_devi; 1650 } else 1651 pi = NULL; 1652 1653 coff = cfgoff + (port - CONF1_DATA_PORT); 1654 1655#if 0 1656 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1657 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1658#endif 1659 1660 /* 1661 * Just return if there is no device at this cfgslot:cfgfunc, 1662 * if the guest is doing an un-aligned access, or if the config 1663 * address word isn't enabled. 1664 */ 1665 if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { 1666 if (in) 1667 *eax = 0xffffffff; 1668 return (0); 1669 } 1670 1671 pe = pi->pi_d; 1672 1673 /* 1674 * Config read 1675 */ 1676 if (in) { 1677 /* Let the device emulation override the default handler */ 1678 if (pe->pe_cfgread != NULL) { 1679 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1680 coff, bytes, eax); 1681 } else { 1682 needcfg = 1; 1683 } 1684 1685 if (needcfg) { 1686 if (bytes == 1) 1687 *eax = pci_get_cfgdata8(pi, coff); 1688 else if (bytes == 2) 1689 *eax = pci_get_cfgdata16(pi, coff); 1690 else 1691 *eax = pci_get_cfgdata32(pi, coff); 1692 } 1693 1694 pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); 1695 } else { 1696 /* Let the device emulation override the default handler */ 1697 if (pe->pe_cfgwrite != NULL && 1698 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1699 return (0); 1700 1701 /* 1702 * Special handling for write to BAR registers 1703 */ 1704 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1705 /* 1706 * Ignore writes to BAR registers that are not 1707 * 4-byte aligned. 1708 */ 1709 if (bytes != 4 || (coff & 0x3) != 0) 1710 return (0); 1711 idx = (coff - PCIR_BAR(0)) / 4; 1712 mask = ~(pi->pi_bar[idx].size - 1); 1713 switch (pi->pi_bar[idx].type) { 1714 case PCIBAR_NONE: 1715 pi->pi_bar[idx].addr = bar = 0; 1716 break; 1717 case PCIBAR_IO: 1718 addr = *eax & mask; 1719 addr &= 0xffff; 1720 bar = addr | PCIM_BAR_IO_SPACE; 1721 /* 1722 * Register the new BAR value for interception 1723 */ 1724 if (addr != pi->pi_bar[idx].addr) { 1725 update_bar_address(pi, addr, idx, 1726 PCIBAR_IO); 1727 } 1728 break; 1729 case PCIBAR_MEM32: 1730 addr = bar = *eax & mask; 1731 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1732 if (addr != pi->pi_bar[idx].addr) { 1733 update_bar_address(pi, addr, idx, 1734 PCIBAR_MEM32); 1735 } 1736 break; 1737 case PCIBAR_MEM64: 1738 addr = bar = *eax & mask; 1739 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1740 PCIM_BAR_MEM_PREFETCH; 1741 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1742 update_bar_address(pi, addr, idx, 1743 PCIBAR_MEM64); 1744 } 1745 break; 1746 case PCIBAR_MEMHI64: 1747 mask = ~(pi->pi_bar[idx - 1].size - 1); 1748 addr = ((uint64_t)*eax << 32) & mask; 1749 bar = addr >> 32; 1750 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1751 update_bar_address(pi, addr, idx - 1, 1752 PCIBAR_MEMHI64); 1753 } 1754 break; 1755 default: 1756 assert(0); 1757 } 1758 pci_set_cfgdata32(pi, coff, bar); 1759 1760 } else if (pci_emul_iscap(pi, coff)) { 1761 pci_emul_capwrite(pi, coff, bytes, *eax); 1762 } else if (coff == PCIR_COMMAND) { 1763 pci_emul_cmdwrite(pi, *eax, bytes); 1764 } else { 1765 CFGWRITE(pi, coff, *eax, bytes); 1766 } 1767 } 1768 1769 return (0); 1770} 1771 1772INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1773INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1774INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1775INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1776 1777/* 1778 * I/O ports to configure PCI IRQ routing. We ignore all writes to it. 1779 */ 1780static int 1781pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1782 uint32_t *eax, void *arg) 1783{ 1784 assert(in == 0); 1785 return (0); 1786} 1787INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); 1788INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); 1789SYSRES_IO(0xC00, 2); 1790 1791#define PCI_EMUL_TEST 1792#ifdef PCI_EMUL_TEST 1793/* 1794 * Define a dummy test device 1795 */ 1796#define DIOSZ 8 1797#define DMEMSZ 4096 1798struct pci_emul_dsoftc { 1799 uint8_t ioregs[DIOSZ]; 1800 uint8_t memregs[DMEMSZ]; 1801}; 1802 1803#define PCI_EMUL_MSI_MSGS 4 1804#define PCI_EMUL_MSIX_MSGS 16 1805 1806static int 1807pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1808{ 1809 int error; 1810 struct pci_emul_dsoftc *sc; 1811 1812 sc = malloc(sizeof(struct pci_emul_dsoftc)); 1813 memset(sc, 0, sizeof(struct pci_emul_dsoftc)); 1814 1815 pi->pi_arg = sc; 1816 1817 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1818 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1819 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1820 1821 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1822 assert(error == 0); 1823 1824 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1825 assert(error == 0); 1826 1827 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1828 assert(error == 0); 1829 1830 return (0); 1831} 1832 1833static void 1834pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1835 uint64_t offset, int size, uint64_t value) 1836{ 1837 int i; 1838 struct pci_emul_dsoftc *sc = pi->pi_arg; 1839 1840 if (baridx == 0) { 1841 if (offset + size > DIOSZ) { 1842 printf("diow: iow too large, offset %ld size %d\n", 1843 offset, size); 1844 return; 1845 } 1846 1847 if (size == 1) { 1848 sc->ioregs[offset] = value & 0xff; 1849 } else if (size == 2) { 1850 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1851 } else if (size == 4) { 1852 *(uint32_t *)&sc->ioregs[offset] = value; 1853 } else { 1854 printf("diow: iow unknown size %d\n", size); 1855 } 1856 1857 /* 1858 * Special magic value to generate an interrupt 1859 */ 1860 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1861 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1862 1863 if (value == 0xabcdef) { 1864 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1865 pci_generate_msi(pi, i); 1866 } 1867 } 1868 1869 if (baridx == 1) { 1870 if (offset + size > DMEMSZ) { 1871 printf("diow: memw too large, offset %ld size %d\n", 1872 offset, size); 1873 return; 1874 } 1875 1876 if (size == 1) { 1877 sc->memregs[offset] = value; 1878 } else if (size == 2) { 1879 *(uint16_t *)&sc->memregs[offset] = value; 1880 } else if (size == 4) { 1881 *(uint32_t *)&sc->memregs[offset] = value; 1882 } else if (size == 8) { 1883 *(uint64_t *)&sc->memregs[offset] = value; 1884 } else { 1885 printf("diow: memw unknown size %d\n", size); 1886 } 1887 1888 /* 1889 * magic interrupt ?? 1890 */ 1891 } 1892 1893 if (baridx > 1) { 1894 printf("diow: unknown bar idx %d\n", baridx); 1895 } 1896} 1897 1898static uint64_t 1899pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1900 uint64_t offset, int size) 1901{ 1902 struct pci_emul_dsoftc *sc = pi->pi_arg; 1903 uint32_t value; 1904 1905 if (baridx == 0) { 1906 if (offset + size > DIOSZ) { 1907 printf("dior: ior too large, offset %ld size %d\n", 1908 offset, size); 1909 return (0); 1910 } 1911 1912 if (size == 1) { 1913 value = sc->ioregs[offset]; 1914 } else if (size == 2) { 1915 value = *(uint16_t *) &sc->ioregs[offset]; 1916 } else if (size == 4) { 1917 value = *(uint32_t *) &sc->ioregs[offset]; 1918 } else { 1919 printf("dior: ior unknown size %d\n", size); 1920 } 1921 } 1922 1923 if (baridx == 1) { 1924 if (offset + size > DMEMSZ) { 1925 printf("dior: memr too large, offset %ld size %d\n", 1926 offset, size); 1927 return (0); 1928 } 1929 1930 if (size == 1) { 1931 value = sc->memregs[offset]; 1932 } else if (size == 2) { 1933 value = *(uint16_t *) &sc->memregs[offset]; 1934 } else if (size == 4) { 1935 value = *(uint32_t *) &sc->memregs[offset]; 1936 } else if (size == 8) { 1937 value = *(uint64_t *) &sc->memregs[offset]; 1938 } else { 1939 printf("dior: ior unknown size %d\n", size); 1940 } 1941 } 1942 1943 1944 if (baridx > 1) { 1945 printf("dior: unknown bar idx %d\n", baridx); 1946 return (0); 1947 } 1948 1949 return (value); 1950} 1951 1952struct pci_devemu pci_dummy = { 1953 .pe_emu = "dummy", 1954 .pe_init = pci_emul_dinit, 1955 .pe_barwrite = pci_emul_diow, 1956 .pe_barread = pci_emul_dior 1957}; 1958PCI_EMUL_SET(pci_dummy); 1959 1960#endif /* PCI_EMUL_TEST */ 1961