pci_emul.c revision 268953
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 268953 2014-07-21 19:08:02Z jhb $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 268953 2014-07-21 19:08:02Z jhb $"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/errno.h> 35 36#include <ctype.h> 37#include <pthread.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <strings.h> 42#include <assert.h> 43#include <stdbool.h> 44 45#include <machine/vmm.h> 46#include <vmmapi.h> 47 48#include "acpi.h" 49#include "bhyverun.h" 50#include "inout.h" 51#include "ioapic.h" 52#include "mem.h" 53#include "pci_emul.h" 54#include "pci_lpc.h" 55 56#define CONF1_ADDR_PORT 0x0cf8 57#define CONF1_DATA_PORT 0x0cfc 58 59#define CONF1_ENABLE 0x80000000ul 60 61#define CFGWRITE(pi,off,val,b) \ 62do { \ 63 if ((b) == 1) { \ 64 pci_set_cfgdata8((pi),(off),(val)); \ 65 } else if ((b) == 2) { \ 66 pci_set_cfgdata16((pi),(off),(val)); \ 67 } else { \ 68 pci_set_cfgdata32((pi),(off),(val)); \ 69 } \ 70} while (0) 71 72#define MAXBUSES (PCI_BUSMAX + 1) 73#define MAXSLOTS (PCI_SLOTMAX + 1) 74#define MAXFUNCS (PCI_FUNCMAX + 1) 75 76struct funcinfo { 77 char *fi_name; 78 char *fi_param; 79 struct pci_devinst *fi_devi; 80}; 81 82struct intxinfo { 83 int ii_count; 84 int ii_ioapic_irq; 85}; 86 87struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90}; 91 92struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97}; 98 99static struct businfo *pci_businfo[MAXBUSES]; 100 101SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103static uint64_t pci_emul_iobase; 104static uint64_t pci_emul_membase32; 105static uint64_t pci_emul_membase64; 106 107#define PCI_EMUL_IOBASE 0x2000 108#define PCI_EMUL_IOLIMIT 0x10000 109 110#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 111 112#define PCI_EMUL_MEMBASE64 0xD000000000UL 113#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 114 115static struct pci_devemu *pci_emul_finddev(char *name); 116static void pci_lintr_update(struct pci_devinst *pi); 117 118static struct mem_range pci_mem_hole; 119 120/* 121 * I/O access 122 */ 123 124/* 125 * Slot options are in the form: 126 * 127 * <bus>:<slot>:<func>,<emul>[,<config>] 128 * <slot>[:<func>],<emul>[,<config>] 129 * 130 * slot is 0..31 131 * func is 0..7 132 * emul is a string describing the type of PCI device e.g. virtio-net 133 * config is an optional string, depending on the device, that can be 134 * used for configuration. 135 * Examples are: 136 * 1,virtio-net,tap0 137 * 3:0,dummy 138 */ 139static void 140pci_parse_slot_usage(char *aopt) 141{ 142 143 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 144} 145 146int 147pci_parse_slot(char *opt) 148{ 149 struct businfo *bi; 150 struct slotinfo *si; 151 char *emul, *config, *str, *cp; 152 int error, bnum, snum, fnum; 153 154 error = -1; 155 str = strdup(opt); 156 157 emul = config = NULL; 158 if ((cp = strchr(str, ',')) != NULL) { 159 *cp = '\0'; 160 emul = cp + 1; 161 if ((cp = strchr(emul, ',')) != NULL) { 162 *cp = '\0'; 163 config = cp + 1; 164 } 165 } else { 166 pci_parse_slot_usage(opt); 167 goto done; 168 } 169 170 /* <bus>:<slot>:<func> */ 171 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 172 bnum = 0; 173 /* <slot>:<func> */ 174 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 175 fnum = 0; 176 /* <slot> */ 177 if (sscanf(str, "%d", &snum) != 1) { 178 snum = -1; 179 } 180 } 181 } 182 183 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 184 fnum < 0 || fnum >= MAXFUNCS) { 185 pci_parse_slot_usage(opt); 186 goto done; 187 } 188 189 if (pci_businfo[bnum] == NULL) 190 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 191 192 bi = pci_businfo[bnum]; 193 si = &bi->slotinfo[snum]; 194 195 if (si->si_funcs[fnum].fi_name != NULL) { 196 fprintf(stderr, "pci slot %d:%d already occupied!\n", 197 snum, fnum); 198 goto done; 199 } 200 201 if (pci_emul_finddev(emul) == NULL) { 202 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 203 snum, fnum, emul); 204 goto done; 205 } 206 207 error = 0; 208 si->si_funcs[fnum].fi_name = emul; 209 si->si_funcs[fnum].fi_param = config; 210 211done: 212 if (error) 213 free(str); 214 215 return (error); 216} 217 218static int 219pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 220{ 221 222 if (offset < pi->pi_msix.pba_offset) 223 return (0); 224 225 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 226 return (0); 227 } 228 229 return (1); 230} 231 232int 233pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 234 uint64_t value) 235{ 236 int msix_entry_offset; 237 int tab_index; 238 char *dest; 239 240 /* support only 4 or 8 byte writes */ 241 if (size != 4 && size != 8) 242 return (-1); 243 244 /* 245 * Return if table index is beyond what device supports 246 */ 247 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 248 if (tab_index >= pi->pi_msix.table_count) 249 return (-1); 250 251 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 252 253 /* support only aligned writes */ 254 if ((msix_entry_offset % size) != 0) 255 return (-1); 256 257 dest = (char *)(pi->pi_msix.table + tab_index); 258 dest += msix_entry_offset; 259 260 if (size == 4) 261 *((uint32_t *)dest) = value; 262 else 263 *((uint64_t *)dest) = value; 264 265 return (0); 266} 267 268uint64_t 269pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 270{ 271 char *dest; 272 int msix_entry_offset; 273 int tab_index; 274 uint64_t retval = ~0; 275 276 /* 277 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 278 * table but we also allow 1 byte access to accomodate reads from 279 * ddb. 280 */ 281 if (size != 1 && size != 4 && size != 8) 282 return (retval); 283 284 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 285 286 /* support only aligned reads */ 287 if ((msix_entry_offset % size) != 0) { 288 return (retval); 289 } 290 291 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 292 293 if (tab_index < pi->pi_msix.table_count) { 294 /* valid MSI-X Table access */ 295 dest = (char *)(pi->pi_msix.table + tab_index); 296 dest += msix_entry_offset; 297 298 if (size == 1) 299 retval = *((uint8_t *)dest); 300 else if (size == 4) 301 retval = *((uint32_t *)dest); 302 else 303 retval = *((uint64_t *)dest); 304 } else if (pci_valid_pba_offset(pi, offset)) { 305 /* return 0 for PBA access */ 306 retval = 0; 307 } 308 309 return (retval); 310} 311 312int 313pci_msix_table_bar(struct pci_devinst *pi) 314{ 315 316 if (pi->pi_msix.table != NULL) 317 return (pi->pi_msix.table_bar); 318 else 319 return (-1); 320} 321 322int 323pci_msix_pba_bar(struct pci_devinst *pi) 324{ 325 326 if (pi->pi_msix.table != NULL) 327 return (pi->pi_msix.pba_bar); 328 else 329 return (-1); 330} 331 332static int 333pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 334 uint32_t *eax, void *arg) 335{ 336 struct pci_devinst *pdi = arg; 337 struct pci_devemu *pe = pdi->pi_d; 338 uint64_t offset; 339 int i; 340 341 for (i = 0; i <= PCI_BARMAX; i++) { 342 if (pdi->pi_bar[i].type == PCIBAR_IO && 343 port >= pdi->pi_bar[i].addr && 344 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 345 offset = port - pdi->pi_bar[i].addr; 346 if (in) 347 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 348 offset, bytes); 349 else 350 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 351 bytes, *eax); 352 return (0); 353 } 354 } 355 return (-1); 356} 357 358static int 359pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 360 int size, uint64_t *val, void *arg1, long arg2) 361{ 362 struct pci_devinst *pdi = arg1; 363 struct pci_devemu *pe = pdi->pi_d; 364 uint64_t offset; 365 int bidx = (int) arg2; 366 367 assert(bidx <= PCI_BARMAX); 368 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 369 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 370 assert(addr >= pdi->pi_bar[bidx].addr && 371 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 372 373 offset = addr - pdi->pi_bar[bidx].addr; 374 375 if (dir == MEM_F_WRITE) { 376 if (size == 8) { 377 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 378 4, *val & 0xffffffff); 379 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 380 4, *val >> 32); 381 } else { 382 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 383 size, *val); 384 } 385 } else { 386 if (size == 8) { 387 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 388 offset, 4); 389 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 390 offset + 4, 4) << 32; 391 } else { 392 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 393 offset, size); 394 } 395 } 396 397 return (0); 398} 399 400 401static int 402pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 403 uint64_t *addr) 404{ 405 uint64_t base; 406 407 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 408 409 base = roundup2(*baseptr, size); 410 411 if (base + size <= limit) { 412 *addr = base; 413 *baseptr = base + size; 414 return (0); 415 } else 416 return (-1); 417} 418 419int 420pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 421 uint64_t size) 422{ 423 424 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 425} 426 427/* 428 * Register (or unregister) the MMIO or I/O region associated with the BAR 429 * register 'idx' of an emulated pci device. 430 */ 431static void 432modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 433{ 434 int error; 435 struct inout_port iop; 436 struct mem_range mr; 437 438 switch (pi->pi_bar[idx].type) { 439 case PCIBAR_IO: 440 bzero(&iop, sizeof(struct inout_port)); 441 iop.name = pi->pi_name; 442 iop.port = pi->pi_bar[idx].addr; 443 iop.size = pi->pi_bar[idx].size; 444 if (registration) { 445 iop.flags = IOPORT_F_INOUT; 446 iop.handler = pci_emul_io_handler; 447 iop.arg = pi; 448 error = register_inout(&iop); 449 } else 450 error = unregister_inout(&iop); 451 break; 452 case PCIBAR_MEM32: 453 case PCIBAR_MEM64: 454 bzero(&mr, sizeof(struct mem_range)); 455 mr.name = pi->pi_name; 456 mr.base = pi->pi_bar[idx].addr; 457 mr.size = pi->pi_bar[idx].size; 458 if (registration) { 459 mr.flags = MEM_F_RW; 460 mr.handler = pci_emul_mem_handler; 461 mr.arg1 = pi; 462 mr.arg2 = idx; 463 error = register_mem(&mr); 464 } else 465 error = unregister_mem(&mr); 466 break; 467 default: 468 error = EINVAL; 469 break; 470 } 471 assert(error == 0); 472} 473 474static void 475unregister_bar(struct pci_devinst *pi, int idx) 476{ 477 478 modify_bar_registration(pi, idx, 0); 479} 480 481static void 482register_bar(struct pci_devinst *pi, int idx) 483{ 484 485 modify_bar_registration(pi, idx, 1); 486} 487 488/* Are we decoding i/o port accesses for the emulated pci device? */ 489static int 490porten(struct pci_devinst *pi) 491{ 492 uint16_t cmd; 493 494 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 495 496 return (cmd & PCIM_CMD_PORTEN); 497} 498 499/* Are we decoding memory accesses for the emulated pci device? */ 500static int 501memen(struct pci_devinst *pi) 502{ 503 uint16_t cmd; 504 505 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 506 507 return (cmd & PCIM_CMD_MEMEN); 508} 509 510/* 511 * Update the MMIO or I/O address that is decoded by the BAR register. 512 * 513 * If the pci device has enabled the address space decoding then intercept 514 * the address range decoded by the BAR register. 515 */ 516static void 517update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 518{ 519 int decode; 520 521 if (pi->pi_bar[idx].type == PCIBAR_IO) 522 decode = porten(pi); 523 else 524 decode = memen(pi); 525 526 if (decode) 527 unregister_bar(pi, idx); 528 529 switch (type) { 530 case PCIBAR_IO: 531 case PCIBAR_MEM32: 532 pi->pi_bar[idx].addr = addr; 533 break; 534 case PCIBAR_MEM64: 535 pi->pi_bar[idx].addr &= ~0xffffffffUL; 536 pi->pi_bar[idx].addr |= addr; 537 break; 538 case PCIBAR_MEMHI64: 539 pi->pi_bar[idx].addr &= 0xffffffff; 540 pi->pi_bar[idx].addr |= addr; 541 break; 542 default: 543 assert(0); 544 } 545 546 if (decode) 547 register_bar(pi, idx); 548} 549 550int 551pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 552 enum pcibar_type type, uint64_t size) 553{ 554 int error; 555 uint64_t *baseptr, limit, addr, mask, lobits, bar; 556 557 assert(idx >= 0 && idx <= PCI_BARMAX); 558 559 if ((size & (size - 1)) != 0) 560 size = 1UL << flsl(size); /* round up to a power of 2 */ 561 562 /* Enforce minimum BAR sizes required by the PCI standard */ 563 if (type == PCIBAR_IO) { 564 if (size < 4) 565 size = 4; 566 } else { 567 if (size < 16) 568 size = 16; 569 } 570 571 switch (type) { 572 case PCIBAR_NONE: 573 baseptr = NULL; 574 addr = mask = lobits = 0; 575 break; 576 case PCIBAR_IO: 577 baseptr = &pci_emul_iobase; 578 limit = PCI_EMUL_IOLIMIT; 579 mask = PCIM_BAR_IO_BASE; 580 lobits = PCIM_BAR_IO_SPACE; 581 break; 582 case PCIBAR_MEM64: 583 /* 584 * XXX 585 * Some drivers do not work well if the 64-bit BAR is allocated 586 * above 4GB. Allow for this by allocating small requests under 587 * 4GB unless then allocation size is larger than some arbitrary 588 * number (32MB currently). 589 */ 590 if (size > 32 * 1024 * 1024) { 591 /* 592 * XXX special case for device requiring peer-peer DMA 593 */ 594 if (size == 0x100000000UL) 595 baseptr = &hostbase; 596 else 597 baseptr = &pci_emul_membase64; 598 limit = PCI_EMUL_MEMLIMIT64; 599 mask = PCIM_BAR_MEM_BASE; 600 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 601 PCIM_BAR_MEM_PREFETCH; 602 break; 603 } else { 604 baseptr = &pci_emul_membase32; 605 limit = PCI_EMUL_MEMLIMIT32; 606 mask = PCIM_BAR_MEM_BASE; 607 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 608 } 609 break; 610 case PCIBAR_MEM32: 611 baseptr = &pci_emul_membase32; 612 limit = PCI_EMUL_MEMLIMIT32; 613 mask = PCIM_BAR_MEM_BASE; 614 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 615 break; 616 default: 617 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 618 assert(0); 619 } 620 621 if (baseptr != NULL) { 622 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 623 if (error != 0) 624 return (error); 625 } 626 627 pdi->pi_bar[idx].type = type; 628 pdi->pi_bar[idx].addr = addr; 629 pdi->pi_bar[idx].size = size; 630 631 /* Initialize the BAR register in config space */ 632 bar = (addr & mask) | lobits; 633 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 634 635 if (type == PCIBAR_MEM64) { 636 assert(idx + 1 <= PCI_BARMAX); 637 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 638 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 639 } 640 641 register_bar(pdi, idx); 642 643 return (0); 644} 645 646#define CAP_START_OFFSET 0x40 647static int 648pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 649{ 650 int i, capoff, reallen; 651 uint16_t sts; 652 653 assert(caplen > 0); 654 655 reallen = roundup2(caplen, 4); /* dword aligned */ 656 657 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 658 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 659 capoff = CAP_START_OFFSET; 660 else 661 capoff = pi->pi_capend + 1; 662 663 /* Check if we have enough space */ 664 if (capoff + reallen > PCI_REGMAX + 1) 665 return (-1); 666 667 /* Set the previous capability pointer */ 668 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 669 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 670 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 671 } else 672 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 673 674 /* Copy the capability */ 675 for (i = 0; i < caplen; i++) 676 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 677 678 /* Set the next capability pointer */ 679 pci_set_cfgdata8(pi, capoff + 1, 0); 680 681 pi->pi_prevcap = capoff; 682 pi->pi_capend = capoff + reallen - 1; 683 return (0); 684} 685 686static struct pci_devemu * 687pci_emul_finddev(char *name) 688{ 689 struct pci_devemu **pdpp, *pdp; 690 691 SET_FOREACH(pdpp, pci_devemu_set) { 692 pdp = *pdpp; 693 if (!strcmp(pdp->pe_emu, name)) { 694 return (pdp); 695 } 696 } 697 698 return (NULL); 699} 700 701static int 702pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 703 int func, struct funcinfo *fi) 704{ 705 struct pci_devinst *pdi; 706 int err; 707 708 pdi = calloc(1, sizeof(struct pci_devinst)); 709 710 pdi->pi_vmctx = ctx; 711 pdi->pi_bus = bus; 712 pdi->pi_slot = slot; 713 pdi->pi_func = func; 714 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 715 pdi->pi_lintr.pin = 0; 716 pdi->pi_lintr.state = IDLE; 717 pdi->pi_lintr.ioapic_irq = 0; 718 pdi->pi_d = pde; 719 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 720 721 /* Disable legacy interrupts */ 722 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 723 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 724 725 pci_set_cfgdata8(pdi, PCIR_COMMAND, 726 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 727 728 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 729 if (err == 0) 730 fi->fi_devi = pdi; 731 else 732 free(pdi); 733 734 return (err); 735} 736 737void 738pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 739{ 740 int mmc; 741 742 CTASSERT(sizeof(struct msicap) == 14); 743 744 /* Number of msi messages must be a power of 2 between 1 and 32 */ 745 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 746 mmc = ffs(msgnum) - 1; 747 748 bzero(msicap, sizeof(struct msicap)); 749 msicap->capid = PCIY_MSI; 750 msicap->nextptr = nextptr; 751 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 752} 753 754int 755pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 756{ 757 struct msicap msicap; 758 759 pci_populate_msicap(&msicap, msgnum, 0); 760 761 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 762} 763 764static void 765pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 766 uint32_t msix_tab_size) 767{ 768 CTASSERT(sizeof(struct msixcap) == 12); 769 770 assert(msix_tab_size % 4096 == 0); 771 772 bzero(msixcap, sizeof(struct msixcap)); 773 msixcap->capid = PCIY_MSIX; 774 775 /* 776 * Message Control Register, all fields set to 777 * zero except for the Table Size. 778 * Note: Table size N is encoded as N-1 779 */ 780 msixcap->msgctrl = msgnum - 1; 781 782 /* 783 * MSI-X BAR setup: 784 * - MSI-X table start at offset 0 785 * - PBA table starts at a 4K aligned offset after the MSI-X table 786 */ 787 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 788 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 789} 790 791static void 792pci_msix_table_init(struct pci_devinst *pi, int table_entries) 793{ 794 int i, table_size; 795 796 assert(table_entries > 0); 797 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 798 799 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 800 pi->pi_msix.table = calloc(1, table_size); 801 802 /* set mask bit of vector control register */ 803 for (i = 0; i < table_entries; i++) 804 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 805} 806 807int 808pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 809{ 810 uint32_t tab_size; 811 struct msixcap msixcap; 812 813 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 814 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 815 816 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 817 818 /* Align table size to nearest 4K */ 819 tab_size = roundup2(tab_size, 4096); 820 821 pi->pi_msix.table_bar = barnum; 822 pi->pi_msix.pba_bar = barnum; 823 pi->pi_msix.table_offset = 0; 824 pi->pi_msix.table_count = msgnum; 825 pi->pi_msix.pba_offset = tab_size; 826 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 827 828 pci_msix_table_init(pi, msgnum); 829 830 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 831 832 /* allocate memory for MSI-X Table and PBA */ 833 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 834 tab_size + pi->pi_msix.pba_size); 835 836 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 837 sizeof(msixcap))); 838} 839 840void 841msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 842 int bytes, uint32_t val) 843{ 844 uint16_t msgctrl, rwmask; 845 int off, table_bar; 846 847 off = offset - capoff; 848 table_bar = pi->pi_msix.table_bar; 849 /* Message Control Register */ 850 if (off == 2 && bytes == 2) { 851 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 852 msgctrl = pci_get_cfgdata16(pi, offset); 853 msgctrl &= ~rwmask; 854 msgctrl |= val & rwmask; 855 val = msgctrl; 856 857 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 858 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 859 pci_lintr_update(pi); 860 } 861 862 CFGWRITE(pi, offset, val, bytes); 863} 864 865void 866msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 867 int bytes, uint32_t val) 868{ 869 uint16_t msgctrl, rwmask, msgdata, mme; 870 uint32_t addrlo; 871 872 /* 873 * If guest is writing to the message control register make sure 874 * we do not overwrite read-only fields. 875 */ 876 if ((offset - capoff) == 2 && bytes == 2) { 877 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 878 msgctrl = pci_get_cfgdata16(pi, offset); 879 msgctrl &= ~rwmask; 880 msgctrl |= val & rwmask; 881 val = msgctrl; 882 883 addrlo = pci_get_cfgdata32(pi, capoff + 4); 884 if (msgctrl & PCIM_MSICTRL_64BIT) 885 msgdata = pci_get_cfgdata16(pi, capoff + 12); 886 else 887 msgdata = pci_get_cfgdata16(pi, capoff + 8); 888 889 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 890 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 891 if (pi->pi_msi.enabled) { 892 pi->pi_msi.addr = addrlo; 893 pi->pi_msi.msg_data = msgdata; 894 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 895 } else { 896 pi->pi_msi.maxmsgnum = 0; 897 } 898 pci_lintr_update(pi); 899 } 900 901 CFGWRITE(pi, offset, val, bytes); 902} 903 904void 905pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 906 int bytes, uint32_t val) 907{ 908 909 /* XXX don't write to the readonly parts */ 910 CFGWRITE(pi, offset, val, bytes); 911} 912 913#define PCIECAP_VERSION 0x2 914int 915pci_emul_add_pciecap(struct pci_devinst *pi, int type) 916{ 917 int err; 918 struct pciecap pciecap; 919 920 CTASSERT(sizeof(struct pciecap) == 60); 921 922 if (type != PCIEM_TYPE_ROOT_PORT) 923 return (-1); 924 925 bzero(&pciecap, sizeof(pciecap)); 926 927 pciecap.capid = PCIY_EXPRESS; 928 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 929 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 930 pciecap.link_status = 0x11; /* gen1, x1 */ 931 932 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 933 return (err); 934} 935 936/* 937 * This function assumes that 'coff' is in the capabilities region of the 938 * config space. 939 */ 940static void 941pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 942{ 943 int capid; 944 uint8_t capoff, nextoff; 945 946 /* Do not allow un-aligned writes */ 947 if ((offset & (bytes - 1)) != 0) 948 return; 949 950 /* Find the capability that we want to update */ 951 capoff = CAP_START_OFFSET; 952 while (1) { 953 nextoff = pci_get_cfgdata8(pi, capoff + 1); 954 if (nextoff == 0) 955 break; 956 if (offset >= capoff && offset < nextoff) 957 break; 958 959 capoff = nextoff; 960 } 961 assert(offset >= capoff); 962 963 /* 964 * Capability ID and Next Capability Pointer are readonly. 965 * However, some o/s's do 4-byte writes that include these. 966 * For this case, trim the write back to 2 bytes and adjust 967 * the data. 968 */ 969 if (offset == capoff || offset == capoff + 1) { 970 if (offset == capoff && bytes == 4) { 971 bytes = 2; 972 offset += 2; 973 val >>= 16; 974 } else 975 return; 976 } 977 978 capid = pci_get_cfgdata8(pi, capoff); 979 switch (capid) { 980 case PCIY_MSI: 981 msicap_cfgwrite(pi, capoff, offset, bytes, val); 982 break; 983 case PCIY_MSIX: 984 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 985 break; 986 case PCIY_EXPRESS: 987 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 988 break; 989 default: 990 break; 991 } 992} 993 994static int 995pci_emul_iscap(struct pci_devinst *pi, int offset) 996{ 997 uint16_t sts; 998 999 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1000 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1001 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1002 return (1); 1003 } 1004 return (0); 1005} 1006 1007static int 1008pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1009 int size, uint64_t *val, void *arg1, long arg2) 1010{ 1011 /* 1012 * Ignore writes; return 0xff's for reads. The mem read code 1013 * will take care of truncating to the correct size. 1014 */ 1015 if (dir == MEM_F_READ) { 1016 *val = 0xffffffffffffffff; 1017 } 1018 1019 return (0); 1020} 1021 1022#define BUSIO_ROUNDUP 32 1023#define BUSMEM_ROUNDUP (1024 * 1024) 1024 1025int 1026init_pci(struct vmctx *ctx) 1027{ 1028 struct pci_devemu *pde; 1029 struct businfo *bi; 1030 struct slotinfo *si; 1031 struct funcinfo *fi; 1032 size_t lowmem; 1033 int bus, slot, func; 1034 int error; 1035 1036 pci_emul_iobase = PCI_EMUL_IOBASE; 1037 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1038 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1039 1040 for (bus = 0; bus < MAXBUSES; bus++) { 1041 if ((bi = pci_businfo[bus]) == NULL) 1042 continue; 1043 /* 1044 * Keep track of the i/o and memory resources allocated to 1045 * this bus. 1046 */ 1047 bi->iobase = pci_emul_iobase; 1048 bi->membase32 = pci_emul_membase32; 1049 bi->membase64 = pci_emul_membase64; 1050 1051 for (slot = 0; slot < MAXSLOTS; slot++) { 1052 si = &bi->slotinfo[slot]; 1053 for (func = 0; func < MAXFUNCS; func++) { 1054 fi = &si->si_funcs[func]; 1055 if (fi->fi_name == NULL) 1056 continue; 1057 pde = pci_emul_finddev(fi->fi_name); 1058 assert(pde != NULL); 1059 error = pci_emul_init(ctx, pde, bus, slot, 1060 func, fi); 1061 if (error) 1062 return (error); 1063 } 1064 } 1065 1066 /* 1067 * Add some slop to the I/O and memory resources decoded by 1068 * this bus to give a guest some flexibility if it wants to 1069 * reprogram the BARs. 1070 */ 1071 pci_emul_iobase += BUSIO_ROUNDUP; 1072 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1073 bi->iolimit = pci_emul_iobase; 1074 1075 pci_emul_membase32 += BUSMEM_ROUNDUP; 1076 pci_emul_membase32 = roundup2(pci_emul_membase32, 1077 BUSMEM_ROUNDUP); 1078 bi->memlimit32 = pci_emul_membase32; 1079 1080 pci_emul_membase64 += BUSMEM_ROUNDUP; 1081 pci_emul_membase64 = roundup2(pci_emul_membase64, 1082 BUSMEM_ROUNDUP); 1083 bi->memlimit64 = pci_emul_membase64; 1084 } 1085 1086 /* 1087 * The guest physical memory map looks like the following: 1088 * [0, lowmem) guest system memory 1089 * [lowmem, lowmem_limit) memory hole (may be absent) 1090 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1091 * [4GB, 4GB + highmem) 1092 * 1093 * Accesses to memory addresses that are not allocated to system 1094 * memory or PCI devices return 0xff's. 1095 */ 1096 error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); 1097 assert(error == 0); 1098 1099 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1100 pci_mem_hole.name = "PCI hole"; 1101 pci_mem_hole.flags = MEM_F_RW; 1102 pci_mem_hole.base = lowmem; 1103 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1104 pci_mem_hole.handler = pci_emul_fallback_handler; 1105 1106 error = register_mem_fallback(&pci_mem_hole); 1107 assert(error == 0); 1108 1109 return (0); 1110} 1111 1112static void 1113pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) 1114{ 1115 int *count; 1116 1117 count = arg; 1118 dsdt_line(" Package (0x04)"); 1119 dsdt_line(" {"); 1120 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1121 dsdt_line(" 0x%02X,", pin - 1); 1122 dsdt_line(" Zero,"); 1123 dsdt_line(" 0x%X", ioapic_irq); 1124 dsdt_line(" }%s", *count == 1 ? "" : ","); 1125 (*count)--; 1126} 1127 1128/* 1129 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1130 * corresponding to each PCI bus. 1131 */ 1132static void 1133pci_bus_write_dsdt(int bus) 1134{ 1135 struct businfo *bi; 1136 struct slotinfo *si; 1137 struct pci_devinst *pi; 1138 int count, slot, func; 1139 1140 /* 1141 * If there are no devices on this 'bus' then just return. 1142 */ 1143 if ((bi = pci_businfo[bus]) == NULL) { 1144 /* 1145 * Bus 0 is special because it decodes the I/O ports used 1146 * for PCI config space access even if there are no devices 1147 * on it. 1148 */ 1149 if (bus != 0) 1150 return; 1151 } 1152 1153 dsdt_indent(1); 1154 dsdt_line("Scope (_SB)"); 1155 dsdt_line("{"); 1156 dsdt_line(" Device (PC%02X)", bus); 1157 dsdt_line(" {"); 1158 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1159 dsdt_line(" Name (_ADR, Zero)"); 1160 1161 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1162 dsdt_line(" {"); 1163 dsdt_line(" Return (0x%08X)", bus); 1164 dsdt_line(" }"); 1165 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1166 dsdt_line(" {"); 1167 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1168 "MaxFixed, PosDecode,"); 1169 dsdt_line(" 0x0000, // Granularity"); 1170 dsdt_line(" 0x%04X, // Range Minimum", bus); 1171 dsdt_line(" 0x%04X, // Range Maximum", bus); 1172 dsdt_line(" 0x0000, // Translation Offset"); 1173 dsdt_line(" 0x0001, // Length"); 1174 dsdt_line(" ,, )"); 1175 1176 if (bus == 0) { 1177 dsdt_indent(3); 1178 dsdt_fixed_ioport(0xCF8, 8); 1179 dsdt_unindent(3); 1180 1181 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1182 "PosDecode, EntireRange,"); 1183 dsdt_line(" 0x0000, // Granularity"); 1184 dsdt_line(" 0x0000, // Range Minimum"); 1185 dsdt_line(" 0x0CF7, // Range Maximum"); 1186 dsdt_line(" 0x0000, // Translation Offset"); 1187 dsdt_line(" 0x0CF8, // Length"); 1188 dsdt_line(" ,, , TypeStatic)"); 1189 1190 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1191 "PosDecode, EntireRange,"); 1192 dsdt_line(" 0x0000, // Granularity"); 1193 dsdt_line(" 0x0D00, // Range Minimum"); 1194 dsdt_line(" 0x%04X, // Range Maximum", 1195 PCI_EMUL_IOBASE - 1); 1196 dsdt_line(" 0x0000, // Translation Offset"); 1197 dsdt_line(" 0x%04X, // Length", 1198 PCI_EMUL_IOBASE - 0x0D00); 1199 dsdt_line(" ,, , TypeStatic)"); 1200 1201 if (bi == NULL) { 1202 dsdt_line(" })"); 1203 goto done; 1204 } 1205 } 1206 assert(bi != NULL); 1207 1208 /* i/o window */ 1209 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1210 "PosDecode, EntireRange,"); 1211 dsdt_line(" 0x0000, // Granularity"); 1212 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1213 dsdt_line(" 0x%04X, // Range Maximum", 1214 bi->iolimit - 1); 1215 dsdt_line(" 0x0000, // Translation Offset"); 1216 dsdt_line(" 0x%04X, // Length", 1217 bi->iolimit - bi->iobase); 1218 dsdt_line(" ,, , TypeStatic)"); 1219 1220 /* mmio window (32-bit) */ 1221 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1222 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1223 dsdt_line(" 0x00000000, // Granularity"); 1224 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1225 dsdt_line(" 0x%08X, // Range Maximum\n", 1226 bi->memlimit32 - 1); 1227 dsdt_line(" 0x00000000, // Translation Offset"); 1228 dsdt_line(" 0x%08X, // Length\n", 1229 bi->memlimit32 - bi->membase32); 1230 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1231 1232 /* mmio window (64-bit) */ 1233 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1234 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1235 dsdt_line(" 0x0000000000000000, // Granularity"); 1236 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1237 dsdt_line(" 0x%016lX, // Range Maximum\n", 1238 bi->memlimit64 - 1); 1239 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1240 dsdt_line(" 0x%016lX, // Length\n", 1241 bi->memlimit64 - bi->membase64); 1242 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1243 dsdt_line(" })"); 1244 1245 count = pci_count_lintr(bus); 1246 if (count != 0) { 1247 dsdt_indent(2); 1248 dsdt_line("Name (_PRT, Package (0x%02X)", count); 1249 dsdt_line("{"); 1250 pci_walk_lintr(bus, pci_prt_entry, &count); 1251 dsdt_line("})"); 1252 dsdt_unindent(2); 1253 } 1254 1255 dsdt_indent(2); 1256 for (slot = 0; slot < MAXSLOTS; slot++) { 1257 si = &bi->slotinfo[slot]; 1258 for (func = 0; func < MAXFUNCS; func++) { 1259 pi = si->si_funcs[func].fi_devi; 1260 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1261 pi->pi_d->pe_write_dsdt(pi); 1262 } 1263 } 1264 dsdt_unindent(2); 1265done: 1266 dsdt_line(" }"); 1267 dsdt_line("}"); 1268 dsdt_unindent(1); 1269} 1270 1271void 1272pci_write_dsdt(void) 1273{ 1274 int bus; 1275 1276 for (bus = 0; bus < MAXBUSES; bus++) 1277 pci_bus_write_dsdt(bus); 1278} 1279 1280int 1281pci_bus_configured(int bus) 1282{ 1283 assert(bus >= 0 && bus < MAXBUSES); 1284 return (pci_businfo[bus] != NULL); 1285} 1286 1287int 1288pci_msi_enabled(struct pci_devinst *pi) 1289{ 1290 return (pi->pi_msi.enabled); 1291} 1292 1293int 1294pci_msi_maxmsgnum(struct pci_devinst *pi) 1295{ 1296 if (pi->pi_msi.enabled) 1297 return (pi->pi_msi.maxmsgnum); 1298 else 1299 return (0); 1300} 1301 1302int 1303pci_msix_enabled(struct pci_devinst *pi) 1304{ 1305 1306 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1307} 1308 1309void 1310pci_generate_msix(struct pci_devinst *pi, int index) 1311{ 1312 struct msix_table_entry *mte; 1313 1314 if (!pci_msix_enabled(pi)) 1315 return; 1316 1317 if (pi->pi_msix.function_mask) 1318 return; 1319 1320 if (index >= pi->pi_msix.table_count) 1321 return; 1322 1323 mte = &pi->pi_msix.table[index]; 1324 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1325 /* XXX Set PBA bit if interrupt is disabled */ 1326 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1327 } 1328} 1329 1330void 1331pci_generate_msi(struct pci_devinst *pi, int index) 1332{ 1333 1334 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1335 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1336 pi->pi_msi.msg_data + index); 1337 } 1338} 1339 1340static bool 1341pci_lintr_permitted(struct pci_devinst *pi) 1342{ 1343 uint16_t cmd; 1344 1345 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1346 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1347 (cmd & PCIM_CMD_INTxDIS))); 1348} 1349 1350int 1351pci_lintr_request(struct pci_devinst *pi) 1352{ 1353 struct businfo *bi; 1354 struct slotinfo *si; 1355 int bestpin, bestcount, irq, pin; 1356 1357 bi = pci_businfo[pi->pi_bus]; 1358 assert(bi != NULL); 1359 1360 /* 1361 * First, allocate a pin from our slot. 1362 */ 1363 si = &bi->slotinfo[pi->pi_slot]; 1364 bestpin = 0; 1365 bestcount = si->si_intpins[0].ii_count; 1366 for (pin = 1; pin < 4; pin++) { 1367 if (si->si_intpins[pin].ii_count < bestcount) { 1368 bestpin = pin; 1369 bestcount = si->si_intpins[pin].ii_count; 1370 } 1371 } 1372 1373 /* 1374 * Attempt to allocate an I/O APIC pin for this intpin. If 1375 * 8259A support is added we will need a separate field to 1376 * assign the intpin to an input pin on the PCI interrupt 1377 * router. 1378 */ 1379 if (si->si_intpins[bestpin].ii_count == 0) { 1380 irq = ioapic_pci_alloc_irq(); 1381 if (irq < 0) 1382 return (-1); 1383 si->si_intpins[bestpin].ii_ioapic_irq = irq; 1384 } else 1385 irq = si->si_intpins[bestpin].ii_ioapic_irq; 1386 si->si_intpins[bestpin].ii_count++; 1387 1388 pi->pi_lintr.pin = bestpin + 1; 1389 pi->pi_lintr.ioapic_irq = irq; 1390 pci_set_cfgdata8(pi, PCIR_INTLINE, irq); 1391 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1392 return (0); 1393} 1394 1395void 1396pci_lintr_assert(struct pci_devinst *pi) 1397{ 1398 1399 assert(pi->pi_lintr.pin > 0); 1400 1401 pthread_mutex_lock(&pi->pi_lintr.lock); 1402 if (pi->pi_lintr.state == IDLE) { 1403 if (pci_lintr_permitted(pi)) { 1404 pi->pi_lintr.state = ASSERTED; 1405 vm_ioapic_assert_irq(pi->pi_vmctx, 1406 pi->pi_lintr.ioapic_irq); 1407 } else 1408 pi->pi_lintr.state = PENDING; 1409 } 1410 pthread_mutex_unlock(&pi->pi_lintr.lock); 1411} 1412 1413void 1414pci_lintr_deassert(struct pci_devinst *pi) 1415{ 1416 1417 assert(pi->pi_lintr.pin > 0); 1418 1419 pthread_mutex_lock(&pi->pi_lintr.lock); 1420 if (pi->pi_lintr.state == ASSERTED) { 1421 pi->pi_lintr.state = IDLE; 1422 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1423 } else if (pi->pi_lintr.state == PENDING) 1424 pi->pi_lintr.state = IDLE; 1425 pthread_mutex_unlock(&pi->pi_lintr.lock); 1426} 1427 1428static void 1429pci_lintr_update(struct pci_devinst *pi) 1430{ 1431 1432 pthread_mutex_lock(&pi->pi_lintr.lock); 1433 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1434 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1435 pi->pi_lintr.state = PENDING; 1436 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1437 pi->pi_lintr.state = ASSERTED; 1438 vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1439 } 1440 pthread_mutex_unlock(&pi->pi_lintr.lock); 1441} 1442 1443int 1444pci_count_lintr(int bus) 1445{ 1446 int count, slot, pin; 1447 struct slotinfo *slotinfo; 1448 1449 count = 0; 1450 if (pci_businfo[bus] != NULL) { 1451 for (slot = 0; slot < MAXSLOTS; slot++) { 1452 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1453 for (pin = 0; pin < 4; pin++) { 1454 if (slotinfo->si_intpins[pin].ii_count != 0) 1455 count++; 1456 } 1457 } 1458 } 1459 return (count); 1460} 1461 1462void 1463pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1464{ 1465 struct businfo *bi; 1466 struct slotinfo *si; 1467 struct intxinfo *ii; 1468 int slot, pin; 1469 1470 if ((bi = pci_businfo[bus]) == NULL) 1471 return; 1472 1473 for (slot = 0; slot < MAXSLOTS; slot++) { 1474 si = &bi->slotinfo[slot]; 1475 for (pin = 0; pin < 4; pin++) { 1476 ii = &si->si_intpins[pin]; 1477 if (ii->ii_count != 0) 1478 cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); 1479 } 1480 } 1481} 1482 1483/* 1484 * Return 1 if the emulated device in 'slot' is a multi-function device. 1485 * Return 0 otherwise. 1486 */ 1487static int 1488pci_emul_is_mfdev(int bus, int slot) 1489{ 1490 struct businfo *bi; 1491 struct slotinfo *si; 1492 int f, numfuncs; 1493 1494 numfuncs = 0; 1495 if ((bi = pci_businfo[bus]) != NULL) { 1496 si = &bi->slotinfo[slot]; 1497 for (f = 0; f < MAXFUNCS; f++) { 1498 if (si->si_funcs[f].fi_devi != NULL) { 1499 numfuncs++; 1500 } 1501 } 1502 } 1503 return (numfuncs > 1); 1504} 1505 1506/* 1507 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1508 * whether or not is a multi-function being emulated in the pci 'slot'. 1509 */ 1510static void 1511pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1512{ 1513 int mfdev; 1514 1515 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1516 mfdev = pci_emul_is_mfdev(bus, slot); 1517 switch (bytes) { 1518 case 1: 1519 case 2: 1520 *rv &= ~PCIM_MFDEV; 1521 if (mfdev) { 1522 *rv |= PCIM_MFDEV; 1523 } 1524 break; 1525 case 4: 1526 *rv &= ~(PCIM_MFDEV << 16); 1527 if (mfdev) { 1528 *rv |= (PCIM_MFDEV << 16); 1529 } 1530 break; 1531 } 1532 } 1533} 1534 1535static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1536 1537static int 1538pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1539 uint32_t *eax, void *arg) 1540{ 1541 uint32_t x; 1542 1543 if (bytes != 4) { 1544 if (in) 1545 *eax = (bytes == 2) ? 0xffff : 0xff; 1546 return (0); 1547 } 1548 1549 if (in) { 1550 x = (cfgbus << 16) | 1551 (cfgslot << 11) | 1552 (cfgfunc << 8) | 1553 cfgoff; 1554 if (cfgenable) 1555 x |= CONF1_ENABLE; 1556 *eax = x; 1557 } else { 1558 x = *eax; 1559 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1560 cfgoff = x & PCI_REGMAX; 1561 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1562 cfgslot = (x >> 11) & PCI_SLOTMAX; 1563 cfgbus = (x >> 16) & PCI_BUSMAX; 1564 } 1565 1566 return (0); 1567} 1568INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1569 1570static uint32_t 1571bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1572{ 1573 1574 return ((old ^ new) & mask); 1575} 1576 1577static void 1578pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1579{ 1580 int i; 1581 uint16_t old; 1582 1583 /* 1584 * The command register is at an offset of 4 bytes and thus the 1585 * guest could write 1, 2 or 4 bytes starting at this offset. 1586 */ 1587 1588 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1589 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1590 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1591 1592 /* 1593 * If the MMIO or I/O address space decoding has changed then 1594 * register/unregister all BARs that decode that address space. 1595 */ 1596 for (i = 0; i <= PCI_BARMAX; i++) { 1597 switch (pi->pi_bar[i].type) { 1598 case PCIBAR_NONE: 1599 case PCIBAR_MEMHI64: 1600 break; 1601 case PCIBAR_IO: 1602 /* I/O address space decoding changed? */ 1603 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1604 if (porten(pi)) 1605 register_bar(pi, i); 1606 else 1607 unregister_bar(pi, i); 1608 } 1609 break; 1610 case PCIBAR_MEM32: 1611 case PCIBAR_MEM64: 1612 /* MMIO address space decoding changed? */ 1613 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1614 if (memen(pi)) 1615 register_bar(pi, i); 1616 else 1617 unregister_bar(pi, i); 1618 } 1619 break; 1620 default: 1621 assert(0); 1622 } 1623 } 1624 1625 /* 1626 * If INTx has been unmasked and is pending, assert the 1627 * interrupt. 1628 */ 1629 pci_lintr_update(pi); 1630} 1631 1632static int 1633pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1634 uint32_t *eax, void *arg) 1635{ 1636 struct businfo *bi; 1637 struct slotinfo *si; 1638 struct pci_devinst *pi; 1639 struct pci_devemu *pe; 1640 int coff, idx, needcfg; 1641 uint64_t addr, bar, mask; 1642 1643 assert(bytes == 1 || bytes == 2 || bytes == 4); 1644 1645 if ((bi = pci_businfo[cfgbus]) != NULL) { 1646 si = &bi->slotinfo[cfgslot]; 1647 pi = si->si_funcs[cfgfunc].fi_devi; 1648 } else 1649 pi = NULL; 1650 1651 coff = cfgoff + (port - CONF1_DATA_PORT); 1652 1653#if 0 1654 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1655 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1656#endif 1657 1658 /* 1659 * Just return if there is no device at this cfgslot:cfgfunc, 1660 * if the guest is doing an un-aligned access, or if the config 1661 * address word isn't enabled. 1662 */ 1663 if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { 1664 if (in) 1665 *eax = 0xffffffff; 1666 return (0); 1667 } 1668 1669 pe = pi->pi_d; 1670 1671 /* 1672 * Config read 1673 */ 1674 if (in) { 1675 /* Let the device emulation override the default handler */ 1676 if (pe->pe_cfgread != NULL) { 1677 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1678 coff, bytes, eax); 1679 } else { 1680 needcfg = 1; 1681 } 1682 1683 if (needcfg) { 1684 if (bytes == 1) 1685 *eax = pci_get_cfgdata8(pi, coff); 1686 else if (bytes == 2) 1687 *eax = pci_get_cfgdata16(pi, coff); 1688 else 1689 *eax = pci_get_cfgdata32(pi, coff); 1690 } 1691 1692 pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); 1693 } else { 1694 /* Let the device emulation override the default handler */ 1695 if (pe->pe_cfgwrite != NULL && 1696 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1697 return (0); 1698 1699 /* 1700 * Special handling for write to BAR registers 1701 */ 1702 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1703 /* 1704 * Ignore writes to BAR registers that are not 1705 * 4-byte aligned. 1706 */ 1707 if (bytes != 4 || (coff & 0x3) != 0) 1708 return (0); 1709 idx = (coff - PCIR_BAR(0)) / 4; 1710 mask = ~(pi->pi_bar[idx].size - 1); 1711 switch (pi->pi_bar[idx].type) { 1712 case PCIBAR_NONE: 1713 pi->pi_bar[idx].addr = bar = 0; 1714 break; 1715 case PCIBAR_IO: 1716 addr = *eax & mask; 1717 addr &= 0xffff; 1718 bar = addr | PCIM_BAR_IO_SPACE; 1719 /* 1720 * Register the new BAR value for interception 1721 */ 1722 if (addr != pi->pi_bar[idx].addr) { 1723 update_bar_address(pi, addr, idx, 1724 PCIBAR_IO); 1725 } 1726 break; 1727 case PCIBAR_MEM32: 1728 addr = bar = *eax & mask; 1729 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1730 if (addr != pi->pi_bar[idx].addr) { 1731 update_bar_address(pi, addr, idx, 1732 PCIBAR_MEM32); 1733 } 1734 break; 1735 case PCIBAR_MEM64: 1736 addr = bar = *eax & mask; 1737 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1738 PCIM_BAR_MEM_PREFETCH; 1739 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1740 update_bar_address(pi, addr, idx, 1741 PCIBAR_MEM64); 1742 } 1743 break; 1744 case PCIBAR_MEMHI64: 1745 mask = ~(pi->pi_bar[idx - 1].size - 1); 1746 addr = ((uint64_t)*eax << 32) & mask; 1747 bar = addr >> 32; 1748 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1749 update_bar_address(pi, addr, idx - 1, 1750 PCIBAR_MEMHI64); 1751 } 1752 break; 1753 default: 1754 assert(0); 1755 } 1756 pci_set_cfgdata32(pi, coff, bar); 1757 1758 } else if (pci_emul_iscap(pi, coff)) { 1759 pci_emul_capwrite(pi, coff, bytes, *eax); 1760 } else if (coff == PCIR_COMMAND) { 1761 pci_emul_cmdwrite(pi, *eax, bytes); 1762 } else { 1763 CFGWRITE(pi, coff, *eax, bytes); 1764 } 1765 } 1766 1767 return (0); 1768} 1769 1770INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1771INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1772INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1773INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1774 1775/* 1776 * I/O ports to configure PCI IRQ routing. We ignore all writes to it. 1777 */ 1778static int 1779pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1780 uint32_t *eax, void *arg) 1781{ 1782 assert(in == 0); 1783 return (0); 1784} 1785INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); 1786INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); 1787SYSRES_IO(0xC00, 2); 1788 1789#define PCI_EMUL_TEST 1790#ifdef PCI_EMUL_TEST 1791/* 1792 * Define a dummy test device 1793 */ 1794#define DIOSZ 8 1795#define DMEMSZ 4096 1796struct pci_emul_dsoftc { 1797 uint8_t ioregs[DIOSZ]; 1798 uint8_t memregs[DMEMSZ]; 1799}; 1800 1801#define PCI_EMUL_MSI_MSGS 4 1802#define PCI_EMUL_MSIX_MSGS 16 1803 1804static int 1805pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1806{ 1807 int error; 1808 struct pci_emul_dsoftc *sc; 1809 1810 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 1811 1812 pi->pi_arg = sc; 1813 1814 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1815 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1816 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1817 1818 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1819 assert(error == 0); 1820 1821 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1822 assert(error == 0); 1823 1824 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1825 assert(error == 0); 1826 1827 return (0); 1828} 1829 1830static void 1831pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1832 uint64_t offset, int size, uint64_t value) 1833{ 1834 int i; 1835 struct pci_emul_dsoftc *sc = pi->pi_arg; 1836 1837 if (baridx == 0) { 1838 if (offset + size > DIOSZ) { 1839 printf("diow: iow too large, offset %ld size %d\n", 1840 offset, size); 1841 return; 1842 } 1843 1844 if (size == 1) { 1845 sc->ioregs[offset] = value & 0xff; 1846 } else if (size == 2) { 1847 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1848 } else if (size == 4) { 1849 *(uint32_t *)&sc->ioregs[offset] = value; 1850 } else { 1851 printf("diow: iow unknown size %d\n", size); 1852 } 1853 1854 /* 1855 * Special magic value to generate an interrupt 1856 */ 1857 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1858 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1859 1860 if (value == 0xabcdef) { 1861 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1862 pci_generate_msi(pi, i); 1863 } 1864 } 1865 1866 if (baridx == 1) { 1867 if (offset + size > DMEMSZ) { 1868 printf("diow: memw too large, offset %ld size %d\n", 1869 offset, size); 1870 return; 1871 } 1872 1873 if (size == 1) { 1874 sc->memregs[offset] = value; 1875 } else if (size == 2) { 1876 *(uint16_t *)&sc->memregs[offset] = value; 1877 } else if (size == 4) { 1878 *(uint32_t *)&sc->memregs[offset] = value; 1879 } else if (size == 8) { 1880 *(uint64_t *)&sc->memregs[offset] = value; 1881 } else { 1882 printf("diow: memw unknown size %d\n", size); 1883 } 1884 1885 /* 1886 * magic interrupt ?? 1887 */ 1888 } 1889 1890 if (baridx > 1) { 1891 printf("diow: unknown bar idx %d\n", baridx); 1892 } 1893} 1894 1895static uint64_t 1896pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1897 uint64_t offset, int size) 1898{ 1899 struct pci_emul_dsoftc *sc = pi->pi_arg; 1900 uint32_t value; 1901 1902 if (baridx == 0) { 1903 if (offset + size > DIOSZ) { 1904 printf("dior: ior too large, offset %ld size %d\n", 1905 offset, size); 1906 return (0); 1907 } 1908 1909 if (size == 1) { 1910 value = sc->ioregs[offset]; 1911 } else if (size == 2) { 1912 value = *(uint16_t *) &sc->ioregs[offset]; 1913 } else if (size == 4) { 1914 value = *(uint32_t *) &sc->ioregs[offset]; 1915 } else { 1916 printf("dior: ior unknown size %d\n", size); 1917 } 1918 } 1919 1920 if (baridx == 1) { 1921 if (offset + size > DMEMSZ) { 1922 printf("dior: memr too large, offset %ld size %d\n", 1923 offset, size); 1924 return (0); 1925 } 1926 1927 if (size == 1) { 1928 value = sc->memregs[offset]; 1929 } else if (size == 2) { 1930 value = *(uint16_t *) &sc->memregs[offset]; 1931 } else if (size == 4) { 1932 value = *(uint32_t *) &sc->memregs[offset]; 1933 } else if (size == 8) { 1934 value = *(uint64_t *) &sc->memregs[offset]; 1935 } else { 1936 printf("dior: ior unknown size %d\n", size); 1937 } 1938 } 1939 1940 1941 if (baridx > 1) { 1942 printf("dior: unknown bar idx %d\n", baridx); 1943 return (0); 1944 } 1945 1946 return (value); 1947} 1948 1949struct pci_devemu pci_dummy = { 1950 .pe_emu = "dummy", 1951 .pe_init = pci_emul_dinit, 1952 .pe_barwrite = pci_emul_diow, 1953 .pe_barread = pci_emul_dior 1954}; 1955PCI_EMUL_SET(pci_dummy); 1956 1957#endif /* PCI_EMUL_TEST */ 1958