pci_emul.c revision 268972
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 268972 2014-07-22 03:14:37Z jhb $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 268972 2014-07-22 03:14:37Z jhb $"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/errno.h> 35 36#include <ctype.h> 37#include <pthread.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <strings.h> 42#include <assert.h> 43#include <stdbool.h> 44 45#include <machine/vmm.h> 46#include <vmmapi.h> 47 48#include "acpi.h" 49#include "bhyverun.h" 50#include "inout.h" 51#include "ioapic.h" 52#include "mem.h" 53#include "pci_emul.h" 54#include "pci_irq.h" 55#include "pci_lpc.h" 56 57#define CONF1_ADDR_PORT 0x0cf8 58#define CONF1_DATA_PORT 0x0cfc 59 60#define CONF1_ENABLE 0x80000000ul 61 62#define CFGWRITE(pi,off,val,b) \ 63do { \ 64 if ((b) == 1) { \ 65 pci_set_cfgdata8((pi),(off),(val)); \ 66 } else if ((b) == 2) { \ 67 pci_set_cfgdata16((pi),(off),(val)); \ 68 } else { \ 69 pci_set_cfgdata32((pi),(off),(val)); \ 70 } \ 71} while (0) 72 73#define MAXBUSES (PCI_BUSMAX + 1) 74#define MAXSLOTS (PCI_SLOTMAX + 1) 75#define MAXFUNCS (PCI_FUNCMAX + 1) 76 77struct funcinfo { 78 char *fi_name; 79 char *fi_param; 80 struct pci_devinst *fi_devi; 81}; 82 83struct intxinfo { 84 int ii_count; 85 int ii_pirq_pin; 86 int ii_ioapic_irq; 87}; 88 89struct slotinfo { 90 struct intxinfo si_intpins[4]; 91 struct funcinfo si_funcs[MAXFUNCS]; 92}; 93 94struct businfo { 95 uint16_t iobase, iolimit; /* I/O window */ 96 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 97 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 98 struct slotinfo slotinfo[MAXSLOTS]; 99}; 100 101static struct businfo *pci_businfo[MAXBUSES]; 102 103SET_DECLARE(pci_devemu_set, struct pci_devemu); 104 105static uint64_t pci_emul_iobase; 106static uint64_t pci_emul_membase32; 107static uint64_t pci_emul_membase64; 108 109#define PCI_EMUL_IOBASE 0x2000 110#define PCI_EMUL_IOLIMIT 0x10000 111 112#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 113 114#define PCI_EMUL_MEMBASE64 0xD000000000UL 115#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 116 117static struct pci_devemu *pci_emul_finddev(char *name); 118static void pci_lintr_route(struct pci_devinst *pi); 119static void pci_lintr_update(struct pci_devinst *pi); 120 121static struct mem_range pci_mem_hole; 122 123/* 124 * I/O access 125 */ 126 127/* 128 * Slot options are in the form: 129 * 130 * <bus>:<slot>:<func>,<emul>[,<config>] 131 * <slot>[:<func>],<emul>[,<config>] 132 * 133 * slot is 0..31 134 * func is 0..7 135 * emul is a string describing the type of PCI device e.g. virtio-net 136 * config is an optional string, depending on the device, that can be 137 * used for configuration. 138 * Examples are: 139 * 1,virtio-net,tap0 140 * 3:0,dummy 141 */ 142static void 143pci_parse_slot_usage(char *aopt) 144{ 145 146 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 147} 148 149int 150pci_parse_slot(char *opt) 151{ 152 struct businfo *bi; 153 struct slotinfo *si; 154 char *emul, *config, *str, *cp; 155 int error, bnum, snum, fnum; 156 157 error = -1; 158 str = strdup(opt); 159 160 emul = config = NULL; 161 if ((cp = strchr(str, ',')) != NULL) { 162 *cp = '\0'; 163 emul = cp + 1; 164 if ((cp = strchr(emul, ',')) != NULL) { 165 *cp = '\0'; 166 config = cp + 1; 167 } 168 } else { 169 pci_parse_slot_usage(opt); 170 goto done; 171 } 172 173 /* <bus>:<slot>:<func> */ 174 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 175 bnum = 0; 176 /* <slot>:<func> */ 177 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 178 fnum = 0; 179 /* <slot> */ 180 if (sscanf(str, "%d", &snum) != 1) { 181 snum = -1; 182 } 183 } 184 } 185 186 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 187 fnum < 0 || fnum >= MAXFUNCS) { 188 pci_parse_slot_usage(opt); 189 goto done; 190 } 191 192 if (pci_businfo[bnum] == NULL) 193 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 194 195 bi = pci_businfo[bnum]; 196 si = &bi->slotinfo[snum]; 197 198 if (si->si_funcs[fnum].fi_name != NULL) { 199 fprintf(stderr, "pci slot %d:%d already occupied!\n", 200 snum, fnum); 201 goto done; 202 } 203 204 if (pci_emul_finddev(emul) == NULL) { 205 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 206 snum, fnum, emul); 207 goto done; 208 } 209 210 error = 0; 211 si->si_funcs[fnum].fi_name = emul; 212 si->si_funcs[fnum].fi_param = config; 213 214done: 215 if (error) 216 free(str); 217 218 return (error); 219} 220 221static int 222pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 223{ 224 225 if (offset < pi->pi_msix.pba_offset) 226 return (0); 227 228 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 229 return (0); 230 } 231 232 return (1); 233} 234 235int 236pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 237 uint64_t value) 238{ 239 int msix_entry_offset; 240 int tab_index; 241 char *dest; 242 243 /* support only 4 or 8 byte writes */ 244 if (size != 4 && size != 8) 245 return (-1); 246 247 /* 248 * Return if table index is beyond what device supports 249 */ 250 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 251 if (tab_index >= pi->pi_msix.table_count) 252 return (-1); 253 254 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 255 256 /* support only aligned writes */ 257 if ((msix_entry_offset % size) != 0) 258 return (-1); 259 260 dest = (char *)(pi->pi_msix.table + tab_index); 261 dest += msix_entry_offset; 262 263 if (size == 4) 264 *((uint32_t *)dest) = value; 265 else 266 *((uint64_t *)dest) = value; 267 268 return (0); 269} 270 271uint64_t 272pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 273{ 274 char *dest; 275 int msix_entry_offset; 276 int tab_index; 277 uint64_t retval = ~0; 278 279 /* 280 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 281 * table but we also allow 1 byte access to accomodate reads from 282 * ddb. 283 */ 284 if (size != 1 && size != 4 && size != 8) 285 return (retval); 286 287 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 288 289 /* support only aligned reads */ 290 if ((msix_entry_offset % size) != 0) { 291 return (retval); 292 } 293 294 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 295 296 if (tab_index < pi->pi_msix.table_count) { 297 /* valid MSI-X Table access */ 298 dest = (char *)(pi->pi_msix.table + tab_index); 299 dest += msix_entry_offset; 300 301 if (size == 1) 302 retval = *((uint8_t *)dest); 303 else if (size == 4) 304 retval = *((uint32_t *)dest); 305 else 306 retval = *((uint64_t *)dest); 307 } else if (pci_valid_pba_offset(pi, offset)) { 308 /* return 0 for PBA access */ 309 retval = 0; 310 } 311 312 return (retval); 313} 314 315int 316pci_msix_table_bar(struct pci_devinst *pi) 317{ 318 319 if (pi->pi_msix.table != NULL) 320 return (pi->pi_msix.table_bar); 321 else 322 return (-1); 323} 324 325int 326pci_msix_pba_bar(struct pci_devinst *pi) 327{ 328 329 if (pi->pi_msix.table != NULL) 330 return (pi->pi_msix.pba_bar); 331 else 332 return (-1); 333} 334 335static int 336pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 337 uint32_t *eax, void *arg) 338{ 339 struct pci_devinst *pdi = arg; 340 struct pci_devemu *pe = pdi->pi_d; 341 uint64_t offset; 342 int i; 343 344 for (i = 0; i <= PCI_BARMAX; i++) { 345 if (pdi->pi_bar[i].type == PCIBAR_IO && 346 port >= pdi->pi_bar[i].addr && 347 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 348 offset = port - pdi->pi_bar[i].addr; 349 if (in) 350 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 351 offset, bytes); 352 else 353 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 354 bytes, *eax); 355 return (0); 356 } 357 } 358 return (-1); 359} 360 361static int 362pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 363 int size, uint64_t *val, void *arg1, long arg2) 364{ 365 struct pci_devinst *pdi = arg1; 366 struct pci_devemu *pe = pdi->pi_d; 367 uint64_t offset; 368 int bidx = (int) arg2; 369 370 assert(bidx <= PCI_BARMAX); 371 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 372 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 373 assert(addr >= pdi->pi_bar[bidx].addr && 374 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 375 376 offset = addr - pdi->pi_bar[bidx].addr; 377 378 if (dir == MEM_F_WRITE) { 379 if (size == 8) { 380 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 381 4, *val & 0xffffffff); 382 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 383 4, *val >> 32); 384 } else { 385 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 386 size, *val); 387 } 388 } else { 389 if (size == 8) { 390 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 391 offset, 4); 392 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 393 offset + 4, 4) << 32; 394 } else { 395 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 396 offset, size); 397 } 398 } 399 400 return (0); 401} 402 403 404static int 405pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 406 uint64_t *addr) 407{ 408 uint64_t base; 409 410 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 411 412 base = roundup2(*baseptr, size); 413 414 if (base + size <= limit) { 415 *addr = base; 416 *baseptr = base + size; 417 return (0); 418 } else 419 return (-1); 420} 421 422int 423pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 424 uint64_t size) 425{ 426 427 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 428} 429 430/* 431 * Register (or unregister) the MMIO or I/O region associated with the BAR 432 * register 'idx' of an emulated pci device. 433 */ 434static void 435modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 436{ 437 int error; 438 struct inout_port iop; 439 struct mem_range mr; 440 441 switch (pi->pi_bar[idx].type) { 442 case PCIBAR_IO: 443 bzero(&iop, sizeof(struct inout_port)); 444 iop.name = pi->pi_name; 445 iop.port = pi->pi_bar[idx].addr; 446 iop.size = pi->pi_bar[idx].size; 447 if (registration) { 448 iop.flags = IOPORT_F_INOUT; 449 iop.handler = pci_emul_io_handler; 450 iop.arg = pi; 451 error = register_inout(&iop); 452 } else 453 error = unregister_inout(&iop); 454 break; 455 case PCIBAR_MEM32: 456 case PCIBAR_MEM64: 457 bzero(&mr, sizeof(struct mem_range)); 458 mr.name = pi->pi_name; 459 mr.base = pi->pi_bar[idx].addr; 460 mr.size = pi->pi_bar[idx].size; 461 if (registration) { 462 mr.flags = MEM_F_RW; 463 mr.handler = pci_emul_mem_handler; 464 mr.arg1 = pi; 465 mr.arg2 = idx; 466 error = register_mem(&mr); 467 } else 468 error = unregister_mem(&mr); 469 break; 470 default: 471 error = EINVAL; 472 break; 473 } 474 assert(error == 0); 475} 476 477static void 478unregister_bar(struct pci_devinst *pi, int idx) 479{ 480 481 modify_bar_registration(pi, idx, 0); 482} 483 484static void 485register_bar(struct pci_devinst *pi, int idx) 486{ 487 488 modify_bar_registration(pi, idx, 1); 489} 490 491/* Are we decoding i/o port accesses for the emulated pci device? */ 492static int 493porten(struct pci_devinst *pi) 494{ 495 uint16_t cmd; 496 497 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 498 499 return (cmd & PCIM_CMD_PORTEN); 500} 501 502/* Are we decoding memory accesses for the emulated pci device? */ 503static int 504memen(struct pci_devinst *pi) 505{ 506 uint16_t cmd; 507 508 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 509 510 return (cmd & PCIM_CMD_MEMEN); 511} 512 513/* 514 * Update the MMIO or I/O address that is decoded by the BAR register. 515 * 516 * If the pci device has enabled the address space decoding then intercept 517 * the address range decoded by the BAR register. 518 */ 519static void 520update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 521{ 522 int decode; 523 524 if (pi->pi_bar[idx].type == PCIBAR_IO) 525 decode = porten(pi); 526 else 527 decode = memen(pi); 528 529 if (decode) 530 unregister_bar(pi, idx); 531 532 switch (type) { 533 case PCIBAR_IO: 534 case PCIBAR_MEM32: 535 pi->pi_bar[idx].addr = addr; 536 break; 537 case PCIBAR_MEM64: 538 pi->pi_bar[idx].addr &= ~0xffffffffUL; 539 pi->pi_bar[idx].addr |= addr; 540 break; 541 case PCIBAR_MEMHI64: 542 pi->pi_bar[idx].addr &= 0xffffffff; 543 pi->pi_bar[idx].addr |= addr; 544 break; 545 default: 546 assert(0); 547 } 548 549 if (decode) 550 register_bar(pi, idx); 551} 552 553int 554pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 555 enum pcibar_type type, uint64_t size) 556{ 557 int error; 558 uint64_t *baseptr, limit, addr, mask, lobits, bar; 559 560 assert(idx >= 0 && idx <= PCI_BARMAX); 561 562 if ((size & (size - 1)) != 0) 563 size = 1UL << flsl(size); /* round up to a power of 2 */ 564 565 /* Enforce minimum BAR sizes required by the PCI standard */ 566 if (type == PCIBAR_IO) { 567 if (size < 4) 568 size = 4; 569 } else { 570 if (size < 16) 571 size = 16; 572 } 573 574 switch (type) { 575 case PCIBAR_NONE: 576 baseptr = NULL; 577 addr = mask = lobits = 0; 578 break; 579 case PCIBAR_IO: 580 baseptr = &pci_emul_iobase; 581 limit = PCI_EMUL_IOLIMIT; 582 mask = PCIM_BAR_IO_BASE; 583 lobits = PCIM_BAR_IO_SPACE; 584 break; 585 case PCIBAR_MEM64: 586 /* 587 * XXX 588 * Some drivers do not work well if the 64-bit BAR is allocated 589 * above 4GB. Allow for this by allocating small requests under 590 * 4GB unless then allocation size is larger than some arbitrary 591 * number (32MB currently). 592 */ 593 if (size > 32 * 1024 * 1024) { 594 /* 595 * XXX special case for device requiring peer-peer DMA 596 */ 597 if (size == 0x100000000UL) 598 baseptr = &hostbase; 599 else 600 baseptr = &pci_emul_membase64; 601 limit = PCI_EMUL_MEMLIMIT64; 602 mask = PCIM_BAR_MEM_BASE; 603 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 604 PCIM_BAR_MEM_PREFETCH; 605 break; 606 } else { 607 baseptr = &pci_emul_membase32; 608 limit = PCI_EMUL_MEMLIMIT32; 609 mask = PCIM_BAR_MEM_BASE; 610 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 611 } 612 break; 613 case PCIBAR_MEM32: 614 baseptr = &pci_emul_membase32; 615 limit = PCI_EMUL_MEMLIMIT32; 616 mask = PCIM_BAR_MEM_BASE; 617 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 618 break; 619 default: 620 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 621 assert(0); 622 } 623 624 if (baseptr != NULL) { 625 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 626 if (error != 0) 627 return (error); 628 } 629 630 pdi->pi_bar[idx].type = type; 631 pdi->pi_bar[idx].addr = addr; 632 pdi->pi_bar[idx].size = size; 633 634 /* Initialize the BAR register in config space */ 635 bar = (addr & mask) | lobits; 636 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 637 638 if (type == PCIBAR_MEM64) { 639 assert(idx + 1 <= PCI_BARMAX); 640 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 641 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 642 } 643 644 register_bar(pdi, idx); 645 646 return (0); 647} 648 649#define CAP_START_OFFSET 0x40 650static int 651pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 652{ 653 int i, capoff, reallen; 654 uint16_t sts; 655 656 assert(caplen > 0); 657 658 reallen = roundup2(caplen, 4); /* dword aligned */ 659 660 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 661 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 662 capoff = CAP_START_OFFSET; 663 else 664 capoff = pi->pi_capend + 1; 665 666 /* Check if we have enough space */ 667 if (capoff + reallen > PCI_REGMAX + 1) 668 return (-1); 669 670 /* Set the previous capability pointer */ 671 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 672 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 673 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 674 } else 675 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 676 677 /* Copy the capability */ 678 for (i = 0; i < caplen; i++) 679 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 680 681 /* Set the next capability pointer */ 682 pci_set_cfgdata8(pi, capoff + 1, 0); 683 684 pi->pi_prevcap = capoff; 685 pi->pi_capend = capoff + reallen - 1; 686 return (0); 687} 688 689static struct pci_devemu * 690pci_emul_finddev(char *name) 691{ 692 struct pci_devemu **pdpp, *pdp; 693 694 SET_FOREACH(pdpp, pci_devemu_set) { 695 pdp = *pdpp; 696 if (!strcmp(pdp->pe_emu, name)) { 697 return (pdp); 698 } 699 } 700 701 return (NULL); 702} 703 704static int 705pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 706 int func, struct funcinfo *fi) 707{ 708 struct pci_devinst *pdi; 709 int err; 710 711 pdi = calloc(1, sizeof(struct pci_devinst)); 712 713 pdi->pi_vmctx = ctx; 714 pdi->pi_bus = bus; 715 pdi->pi_slot = slot; 716 pdi->pi_func = func; 717 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 718 pdi->pi_lintr.pin = 0; 719 pdi->pi_lintr.state = IDLE; 720 pdi->pi_lintr.pirq_pin = 0; 721 pdi->pi_lintr.ioapic_irq = 0; 722 pdi->pi_d = pde; 723 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 724 725 /* Disable legacy interrupts */ 726 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 727 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 728 729 pci_set_cfgdata8(pdi, PCIR_COMMAND, 730 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 731 732 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 733 if (err == 0) 734 fi->fi_devi = pdi; 735 else 736 free(pdi); 737 738 return (err); 739} 740 741void 742pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 743{ 744 int mmc; 745 746 CTASSERT(sizeof(struct msicap) == 14); 747 748 /* Number of msi messages must be a power of 2 between 1 and 32 */ 749 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 750 mmc = ffs(msgnum) - 1; 751 752 bzero(msicap, sizeof(struct msicap)); 753 msicap->capid = PCIY_MSI; 754 msicap->nextptr = nextptr; 755 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 756} 757 758int 759pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 760{ 761 struct msicap msicap; 762 763 pci_populate_msicap(&msicap, msgnum, 0); 764 765 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 766} 767 768static void 769pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 770 uint32_t msix_tab_size) 771{ 772 CTASSERT(sizeof(struct msixcap) == 12); 773 774 assert(msix_tab_size % 4096 == 0); 775 776 bzero(msixcap, sizeof(struct msixcap)); 777 msixcap->capid = PCIY_MSIX; 778 779 /* 780 * Message Control Register, all fields set to 781 * zero except for the Table Size. 782 * Note: Table size N is encoded as N-1 783 */ 784 msixcap->msgctrl = msgnum - 1; 785 786 /* 787 * MSI-X BAR setup: 788 * - MSI-X table start at offset 0 789 * - PBA table starts at a 4K aligned offset after the MSI-X table 790 */ 791 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 792 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 793} 794 795static void 796pci_msix_table_init(struct pci_devinst *pi, int table_entries) 797{ 798 int i, table_size; 799 800 assert(table_entries > 0); 801 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 802 803 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 804 pi->pi_msix.table = calloc(1, table_size); 805 806 /* set mask bit of vector control register */ 807 for (i = 0; i < table_entries; i++) 808 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 809} 810 811int 812pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 813{ 814 uint32_t tab_size; 815 struct msixcap msixcap; 816 817 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 818 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 819 820 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 821 822 /* Align table size to nearest 4K */ 823 tab_size = roundup2(tab_size, 4096); 824 825 pi->pi_msix.table_bar = barnum; 826 pi->pi_msix.pba_bar = barnum; 827 pi->pi_msix.table_offset = 0; 828 pi->pi_msix.table_count = msgnum; 829 pi->pi_msix.pba_offset = tab_size; 830 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 831 832 pci_msix_table_init(pi, msgnum); 833 834 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 835 836 /* allocate memory for MSI-X Table and PBA */ 837 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 838 tab_size + pi->pi_msix.pba_size); 839 840 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 841 sizeof(msixcap))); 842} 843 844void 845msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 846 int bytes, uint32_t val) 847{ 848 uint16_t msgctrl, rwmask; 849 int off, table_bar; 850 851 off = offset - capoff; 852 table_bar = pi->pi_msix.table_bar; 853 /* Message Control Register */ 854 if (off == 2 && bytes == 2) { 855 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 856 msgctrl = pci_get_cfgdata16(pi, offset); 857 msgctrl &= ~rwmask; 858 msgctrl |= val & rwmask; 859 val = msgctrl; 860 861 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 862 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 863 pci_lintr_update(pi); 864 } 865 866 CFGWRITE(pi, offset, val, bytes); 867} 868 869void 870msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 871 int bytes, uint32_t val) 872{ 873 uint16_t msgctrl, rwmask, msgdata, mme; 874 uint32_t addrlo; 875 876 /* 877 * If guest is writing to the message control register make sure 878 * we do not overwrite read-only fields. 879 */ 880 if ((offset - capoff) == 2 && bytes == 2) { 881 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 882 msgctrl = pci_get_cfgdata16(pi, offset); 883 msgctrl &= ~rwmask; 884 msgctrl |= val & rwmask; 885 val = msgctrl; 886 887 addrlo = pci_get_cfgdata32(pi, capoff + 4); 888 if (msgctrl & PCIM_MSICTRL_64BIT) 889 msgdata = pci_get_cfgdata16(pi, capoff + 12); 890 else 891 msgdata = pci_get_cfgdata16(pi, capoff + 8); 892 893 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 894 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 895 if (pi->pi_msi.enabled) { 896 pi->pi_msi.addr = addrlo; 897 pi->pi_msi.msg_data = msgdata; 898 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 899 } else { 900 pi->pi_msi.maxmsgnum = 0; 901 } 902 pci_lintr_update(pi); 903 } 904 905 CFGWRITE(pi, offset, val, bytes); 906} 907 908void 909pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 910 int bytes, uint32_t val) 911{ 912 913 /* XXX don't write to the readonly parts */ 914 CFGWRITE(pi, offset, val, bytes); 915} 916 917#define PCIECAP_VERSION 0x2 918int 919pci_emul_add_pciecap(struct pci_devinst *pi, int type) 920{ 921 int err; 922 struct pciecap pciecap; 923 924 CTASSERT(sizeof(struct pciecap) == 60); 925 926 if (type != PCIEM_TYPE_ROOT_PORT) 927 return (-1); 928 929 bzero(&pciecap, sizeof(pciecap)); 930 931 pciecap.capid = PCIY_EXPRESS; 932 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 933 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 934 pciecap.link_status = 0x11; /* gen1, x1 */ 935 936 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 937 return (err); 938} 939 940/* 941 * This function assumes that 'coff' is in the capabilities region of the 942 * config space. 943 */ 944static void 945pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 946{ 947 int capid; 948 uint8_t capoff, nextoff; 949 950 /* Do not allow un-aligned writes */ 951 if ((offset & (bytes - 1)) != 0) 952 return; 953 954 /* Find the capability that we want to update */ 955 capoff = CAP_START_OFFSET; 956 while (1) { 957 nextoff = pci_get_cfgdata8(pi, capoff + 1); 958 if (nextoff == 0) 959 break; 960 if (offset >= capoff && offset < nextoff) 961 break; 962 963 capoff = nextoff; 964 } 965 assert(offset >= capoff); 966 967 /* 968 * Capability ID and Next Capability Pointer are readonly. 969 * However, some o/s's do 4-byte writes that include these. 970 * For this case, trim the write back to 2 bytes and adjust 971 * the data. 972 */ 973 if (offset == capoff || offset == capoff + 1) { 974 if (offset == capoff && bytes == 4) { 975 bytes = 2; 976 offset += 2; 977 val >>= 16; 978 } else 979 return; 980 } 981 982 capid = pci_get_cfgdata8(pi, capoff); 983 switch (capid) { 984 case PCIY_MSI: 985 msicap_cfgwrite(pi, capoff, offset, bytes, val); 986 break; 987 case PCIY_MSIX: 988 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 989 break; 990 case PCIY_EXPRESS: 991 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 992 break; 993 default: 994 break; 995 } 996} 997 998static int 999pci_emul_iscap(struct pci_devinst *pi, int offset) 1000{ 1001 uint16_t sts; 1002 1003 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1004 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1005 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1006 return (1); 1007 } 1008 return (0); 1009} 1010 1011static int 1012pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1013 int size, uint64_t *val, void *arg1, long arg2) 1014{ 1015 /* 1016 * Ignore writes; return 0xff's for reads. The mem read code 1017 * will take care of truncating to the correct size. 1018 */ 1019 if (dir == MEM_F_READ) { 1020 *val = 0xffffffffffffffff; 1021 } 1022 1023 return (0); 1024} 1025 1026#define BUSIO_ROUNDUP 32 1027#define BUSMEM_ROUNDUP (1024 * 1024) 1028 1029int 1030init_pci(struct vmctx *ctx) 1031{ 1032 struct pci_devemu *pde; 1033 struct businfo *bi; 1034 struct slotinfo *si; 1035 struct funcinfo *fi; 1036 size_t lowmem; 1037 int bus, slot, func; 1038 int error; 1039 1040 pci_emul_iobase = PCI_EMUL_IOBASE; 1041 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1042 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1043 1044 for (bus = 0; bus < MAXBUSES; bus++) { 1045 if ((bi = pci_businfo[bus]) == NULL) 1046 continue; 1047 /* 1048 * Keep track of the i/o and memory resources allocated to 1049 * this bus. 1050 */ 1051 bi->iobase = pci_emul_iobase; 1052 bi->membase32 = pci_emul_membase32; 1053 bi->membase64 = pci_emul_membase64; 1054 1055 for (slot = 0; slot < MAXSLOTS; slot++) { 1056 si = &bi->slotinfo[slot]; 1057 for (func = 0; func < MAXFUNCS; func++) { 1058 fi = &si->si_funcs[func]; 1059 if (fi->fi_name == NULL) 1060 continue; 1061 pde = pci_emul_finddev(fi->fi_name); 1062 assert(pde != NULL); 1063 error = pci_emul_init(ctx, pde, bus, slot, 1064 func, fi); 1065 if (error) 1066 return (error); 1067 } 1068 } 1069 1070 /* 1071 * Add some slop to the I/O and memory resources decoded by 1072 * this bus to give a guest some flexibility if it wants to 1073 * reprogram the BARs. 1074 */ 1075 pci_emul_iobase += BUSIO_ROUNDUP; 1076 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1077 bi->iolimit = pci_emul_iobase; 1078 1079 pci_emul_membase32 += BUSMEM_ROUNDUP; 1080 pci_emul_membase32 = roundup2(pci_emul_membase32, 1081 BUSMEM_ROUNDUP); 1082 bi->memlimit32 = pci_emul_membase32; 1083 1084 pci_emul_membase64 += BUSMEM_ROUNDUP; 1085 pci_emul_membase64 = roundup2(pci_emul_membase64, 1086 BUSMEM_ROUNDUP); 1087 bi->memlimit64 = pci_emul_membase64; 1088 } 1089 1090 /* 1091 * PCI backends are initialized before routing INTx interrupts 1092 * so that LPC devices are able to reserve ISA IRQs before 1093 * routing PIRQ pins. 1094 */ 1095 for (bus = 0; bus < MAXBUSES; bus++) { 1096 if ((bi = pci_businfo[bus]) == NULL) 1097 continue; 1098 1099 for (slot = 0; slot < MAXSLOTS; slot++) { 1100 si = &bi->slotinfo[slot]; 1101 for (func = 0; func < MAXFUNCS; func++) { 1102 fi = &si->si_funcs[func]; 1103 if (fi->fi_devi == NULL) 1104 continue; 1105 pci_lintr_route(fi->fi_devi); 1106 } 1107 } 1108 } 1109 lpc_pirq_routed(); 1110 1111 /* 1112 * The guest physical memory map looks like the following: 1113 * [0, lowmem) guest system memory 1114 * [lowmem, lowmem_limit) memory hole (may be absent) 1115 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1116 * [4GB, 4GB + highmem) 1117 * 1118 * Accesses to memory addresses that are not allocated to system 1119 * memory or PCI devices return 0xff's. 1120 */ 1121 error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); 1122 assert(error == 0); 1123 1124 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1125 pci_mem_hole.name = "PCI hole"; 1126 pci_mem_hole.flags = MEM_F_RW; 1127 pci_mem_hole.base = lowmem; 1128 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1129 pci_mem_hole.handler = pci_emul_fallback_handler; 1130 1131 error = register_mem_fallback(&pci_mem_hole); 1132 assert(error == 0); 1133 1134 return (0); 1135} 1136 1137static void 1138pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1139 void *arg) 1140{ 1141 1142 dsdt_line(" Package ()"); 1143 dsdt_line(" {"); 1144 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1145 dsdt_line(" 0x%02X,", pin - 1); 1146 dsdt_line(" Zero,"); 1147 dsdt_line(" 0x%X", ioapic_irq); 1148 dsdt_line(" },"); 1149} 1150 1151static void 1152pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1153 void *arg) 1154{ 1155 char *name; 1156 1157 name = lpc_pirq_name(pirq_pin); 1158 if (name == NULL) 1159 return; 1160 dsdt_line(" Package ()"); 1161 dsdt_line(" {"); 1162 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1163 dsdt_line(" 0x%02X,", pin - 1); 1164 dsdt_line(" %s,", name); 1165 dsdt_line(" 0x00"); 1166 dsdt_line(" },"); 1167 free(name); 1168} 1169 1170/* 1171 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1172 * corresponding to each PCI bus. 1173 */ 1174static void 1175pci_bus_write_dsdt(int bus) 1176{ 1177 struct businfo *bi; 1178 struct slotinfo *si; 1179 struct pci_devinst *pi; 1180 int count, func, slot; 1181 1182 /* 1183 * If there are no devices on this 'bus' then just return. 1184 */ 1185 if ((bi = pci_businfo[bus]) == NULL) { 1186 /* 1187 * Bus 0 is special because it decodes the I/O ports used 1188 * for PCI config space access even if there are no devices 1189 * on it. 1190 */ 1191 if (bus != 0) 1192 return; 1193 } 1194 1195 dsdt_line(" Device (PC%02X)", bus); 1196 dsdt_line(" {"); 1197 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1198 dsdt_line(" Name (_ADR, Zero)"); 1199 1200 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1201 dsdt_line(" {"); 1202 dsdt_line(" Return (0x%08X)", bus); 1203 dsdt_line(" }"); 1204 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1205 dsdt_line(" {"); 1206 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1207 "MaxFixed, PosDecode,"); 1208 dsdt_line(" 0x0000, // Granularity"); 1209 dsdt_line(" 0x%04X, // Range Minimum", bus); 1210 dsdt_line(" 0x%04X, // Range Maximum", bus); 1211 dsdt_line(" 0x0000, // Translation Offset"); 1212 dsdt_line(" 0x0001, // Length"); 1213 dsdt_line(" ,, )"); 1214 1215 if (bus == 0) { 1216 dsdt_indent(3); 1217 dsdt_fixed_ioport(0xCF8, 8); 1218 dsdt_unindent(3); 1219 1220 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1221 "PosDecode, EntireRange,"); 1222 dsdt_line(" 0x0000, // Granularity"); 1223 dsdt_line(" 0x0000, // Range Minimum"); 1224 dsdt_line(" 0x0CF7, // Range Maximum"); 1225 dsdt_line(" 0x0000, // Translation Offset"); 1226 dsdt_line(" 0x0CF8, // Length"); 1227 dsdt_line(" ,, , TypeStatic)"); 1228 1229 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1230 "PosDecode, EntireRange,"); 1231 dsdt_line(" 0x0000, // Granularity"); 1232 dsdt_line(" 0x0D00, // Range Minimum"); 1233 dsdt_line(" 0x%04X, // Range Maximum", 1234 PCI_EMUL_IOBASE - 1); 1235 dsdt_line(" 0x0000, // Translation Offset"); 1236 dsdt_line(" 0x%04X, // Length", 1237 PCI_EMUL_IOBASE - 0x0D00); 1238 dsdt_line(" ,, , TypeStatic)"); 1239 1240 if (bi == NULL) { 1241 dsdt_line(" })"); 1242 goto done; 1243 } 1244 } 1245 assert(bi != NULL); 1246 1247 /* i/o window */ 1248 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1249 "PosDecode, EntireRange,"); 1250 dsdt_line(" 0x0000, // Granularity"); 1251 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1252 dsdt_line(" 0x%04X, // Range Maximum", 1253 bi->iolimit - 1); 1254 dsdt_line(" 0x0000, // Translation Offset"); 1255 dsdt_line(" 0x%04X, // Length", 1256 bi->iolimit - bi->iobase); 1257 dsdt_line(" ,, , TypeStatic)"); 1258 1259 /* mmio window (32-bit) */ 1260 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1261 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1262 dsdt_line(" 0x00000000, // Granularity"); 1263 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1264 dsdt_line(" 0x%08X, // Range Maximum\n", 1265 bi->memlimit32 - 1); 1266 dsdt_line(" 0x00000000, // Translation Offset"); 1267 dsdt_line(" 0x%08X, // Length\n", 1268 bi->memlimit32 - bi->membase32); 1269 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1270 1271 /* mmio window (64-bit) */ 1272 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1273 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1274 dsdt_line(" 0x0000000000000000, // Granularity"); 1275 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1276 dsdt_line(" 0x%016lX, // Range Maximum\n", 1277 bi->memlimit64 - 1); 1278 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1279 dsdt_line(" 0x%016lX, // Length\n", 1280 bi->memlimit64 - bi->membase64); 1281 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1282 dsdt_line(" })"); 1283 1284 count = pci_count_lintr(bus); 1285 if (count != 0) { 1286 dsdt_indent(2); 1287 dsdt_line("Name (PPRT, Package ()"); 1288 dsdt_line("{"); 1289 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1290 dsdt_line("})"); 1291 dsdt_line("Name (APRT, Package ()"); 1292 dsdt_line("{"); 1293 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1294 dsdt_line("})"); 1295 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1296 dsdt_line("{"); 1297 dsdt_line(" If (PICM)"); 1298 dsdt_line(" {"); 1299 dsdt_line(" Return (APRT)"); 1300 dsdt_line(" }"); 1301 dsdt_line(" Else"); 1302 dsdt_line(" {"); 1303 dsdt_line(" Return (PPRT)"); 1304 dsdt_line(" }"); 1305 dsdt_line("}"); 1306 dsdt_unindent(2); 1307 } 1308 1309 dsdt_indent(2); 1310 for (slot = 0; slot < MAXSLOTS; slot++) { 1311 si = &bi->slotinfo[slot]; 1312 for (func = 0; func < MAXFUNCS; func++) { 1313 pi = si->si_funcs[func].fi_devi; 1314 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1315 pi->pi_d->pe_write_dsdt(pi); 1316 } 1317 } 1318 dsdt_unindent(2); 1319done: 1320 dsdt_line(" }"); 1321} 1322 1323void 1324pci_write_dsdt(void) 1325{ 1326 int bus; 1327 1328 dsdt_indent(1); 1329 dsdt_line("Name (PICM, 0x00)"); 1330 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1331 dsdt_line("{"); 1332 dsdt_line(" Store (Arg0, PICM)"); 1333 dsdt_line("}"); 1334 dsdt_line(""); 1335 dsdt_line("Scope (_SB)"); 1336 dsdt_line("{"); 1337 for (bus = 0; bus < MAXBUSES; bus++) 1338 pci_bus_write_dsdt(bus); 1339 dsdt_line("}"); 1340 dsdt_unindent(1); 1341} 1342 1343int 1344pci_bus_configured(int bus) 1345{ 1346 assert(bus >= 0 && bus < MAXBUSES); 1347 return (pci_businfo[bus] != NULL); 1348} 1349 1350int 1351pci_msi_enabled(struct pci_devinst *pi) 1352{ 1353 return (pi->pi_msi.enabled); 1354} 1355 1356int 1357pci_msi_maxmsgnum(struct pci_devinst *pi) 1358{ 1359 if (pi->pi_msi.enabled) 1360 return (pi->pi_msi.maxmsgnum); 1361 else 1362 return (0); 1363} 1364 1365int 1366pci_msix_enabled(struct pci_devinst *pi) 1367{ 1368 1369 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1370} 1371 1372void 1373pci_generate_msix(struct pci_devinst *pi, int index) 1374{ 1375 struct msix_table_entry *mte; 1376 1377 if (!pci_msix_enabled(pi)) 1378 return; 1379 1380 if (pi->pi_msix.function_mask) 1381 return; 1382 1383 if (index >= pi->pi_msix.table_count) 1384 return; 1385 1386 mte = &pi->pi_msix.table[index]; 1387 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1388 /* XXX Set PBA bit if interrupt is disabled */ 1389 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1390 } 1391} 1392 1393void 1394pci_generate_msi(struct pci_devinst *pi, int index) 1395{ 1396 1397 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1398 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1399 pi->pi_msi.msg_data + index); 1400 } 1401} 1402 1403static bool 1404pci_lintr_permitted(struct pci_devinst *pi) 1405{ 1406 uint16_t cmd; 1407 1408 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1409 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1410 (cmd & PCIM_CMD_INTxDIS))); 1411} 1412 1413void 1414pci_lintr_request(struct pci_devinst *pi) 1415{ 1416 struct businfo *bi; 1417 struct slotinfo *si; 1418 int bestpin, bestcount, pin; 1419 1420 bi = pci_businfo[pi->pi_bus]; 1421 assert(bi != NULL); 1422 1423 /* 1424 * Just allocate a pin from our slot. The pin will be 1425 * assigned IRQs later when interrupts are routed. 1426 */ 1427 si = &bi->slotinfo[pi->pi_slot]; 1428 bestpin = 0; 1429 bestcount = si->si_intpins[0].ii_count; 1430 for (pin = 1; pin < 4; pin++) { 1431 if (si->si_intpins[pin].ii_count < bestcount) { 1432 bestpin = pin; 1433 bestcount = si->si_intpins[pin].ii_count; 1434 } 1435 } 1436 1437 si->si_intpins[bestpin].ii_count++; 1438 pi->pi_lintr.pin = bestpin + 1; 1439 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1440} 1441 1442static void 1443pci_lintr_route(struct pci_devinst *pi) 1444{ 1445 struct businfo *bi; 1446 struct intxinfo *ii; 1447 1448 if (pi->pi_lintr.pin == 0) 1449 return; 1450 1451 bi = pci_businfo[pi->pi_bus]; 1452 assert(bi != NULL); 1453 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1454 1455 /* 1456 * Attempt to allocate an I/O APIC pin for this intpin if one 1457 * is not yet assigned. 1458 */ 1459 if (ii->ii_ioapic_irq == 0) 1460 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(); 1461 assert(ii->ii_ioapic_irq > 0); 1462 1463 /* 1464 * Attempt to allocate a PIRQ pin for this intpin if one is 1465 * not yet assigned. 1466 */ 1467 if (ii->ii_pirq_pin == 0) 1468 ii->ii_pirq_pin = pirq_alloc_pin(pi->pi_vmctx); 1469 assert(ii->ii_pirq_pin > 0); 1470 1471 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1472 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1473 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1474} 1475 1476void 1477pci_lintr_assert(struct pci_devinst *pi) 1478{ 1479 1480 assert(pi->pi_lintr.pin > 0); 1481 1482 pthread_mutex_lock(&pi->pi_lintr.lock); 1483 if (pi->pi_lintr.state == IDLE) { 1484 if (pci_lintr_permitted(pi)) { 1485 pi->pi_lintr.state = ASSERTED; 1486 pci_irq_assert(pi); 1487 } else 1488 pi->pi_lintr.state = PENDING; 1489 } 1490 pthread_mutex_unlock(&pi->pi_lintr.lock); 1491} 1492 1493void 1494pci_lintr_deassert(struct pci_devinst *pi) 1495{ 1496 1497 assert(pi->pi_lintr.pin > 0); 1498 1499 pthread_mutex_lock(&pi->pi_lintr.lock); 1500 if (pi->pi_lintr.state == ASSERTED) { 1501 pi->pi_lintr.state = IDLE; 1502 pci_irq_deassert(pi); 1503 } else if (pi->pi_lintr.state == PENDING) 1504 pi->pi_lintr.state = IDLE; 1505 pthread_mutex_unlock(&pi->pi_lintr.lock); 1506} 1507 1508static void 1509pci_lintr_update(struct pci_devinst *pi) 1510{ 1511 1512 pthread_mutex_lock(&pi->pi_lintr.lock); 1513 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1514 pci_irq_deassert(pi); 1515 pi->pi_lintr.state = PENDING; 1516 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1517 pi->pi_lintr.state = ASSERTED; 1518 pci_irq_assert(pi); 1519 } 1520 pthread_mutex_unlock(&pi->pi_lintr.lock); 1521} 1522 1523int 1524pci_count_lintr(int bus) 1525{ 1526 int count, slot, pin; 1527 struct slotinfo *slotinfo; 1528 1529 count = 0; 1530 if (pci_businfo[bus] != NULL) { 1531 for (slot = 0; slot < MAXSLOTS; slot++) { 1532 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1533 for (pin = 0; pin < 4; pin++) { 1534 if (slotinfo->si_intpins[pin].ii_count != 0) 1535 count++; 1536 } 1537 } 1538 } 1539 return (count); 1540} 1541 1542void 1543pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1544{ 1545 struct businfo *bi; 1546 struct slotinfo *si; 1547 struct intxinfo *ii; 1548 int slot, pin; 1549 1550 if ((bi = pci_businfo[bus]) == NULL) 1551 return; 1552 1553 for (slot = 0; slot < MAXSLOTS; slot++) { 1554 si = &bi->slotinfo[slot]; 1555 for (pin = 0; pin < 4; pin++) { 1556 ii = &si->si_intpins[pin]; 1557 if (ii->ii_count != 0) 1558 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1559 ii->ii_ioapic_irq, arg); 1560 } 1561 } 1562} 1563 1564/* 1565 * Return 1 if the emulated device in 'slot' is a multi-function device. 1566 * Return 0 otherwise. 1567 */ 1568static int 1569pci_emul_is_mfdev(int bus, int slot) 1570{ 1571 struct businfo *bi; 1572 struct slotinfo *si; 1573 int f, numfuncs; 1574 1575 numfuncs = 0; 1576 if ((bi = pci_businfo[bus]) != NULL) { 1577 si = &bi->slotinfo[slot]; 1578 for (f = 0; f < MAXFUNCS; f++) { 1579 if (si->si_funcs[f].fi_devi != NULL) { 1580 numfuncs++; 1581 } 1582 } 1583 } 1584 return (numfuncs > 1); 1585} 1586 1587/* 1588 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1589 * whether or not is a multi-function being emulated in the pci 'slot'. 1590 */ 1591static void 1592pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1593{ 1594 int mfdev; 1595 1596 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1597 mfdev = pci_emul_is_mfdev(bus, slot); 1598 switch (bytes) { 1599 case 1: 1600 case 2: 1601 *rv &= ~PCIM_MFDEV; 1602 if (mfdev) { 1603 *rv |= PCIM_MFDEV; 1604 } 1605 break; 1606 case 4: 1607 *rv &= ~(PCIM_MFDEV << 16); 1608 if (mfdev) { 1609 *rv |= (PCIM_MFDEV << 16); 1610 } 1611 break; 1612 } 1613 } 1614} 1615 1616static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1617 1618static int 1619pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1620 uint32_t *eax, void *arg) 1621{ 1622 uint32_t x; 1623 1624 if (bytes != 4) { 1625 if (in) 1626 *eax = (bytes == 2) ? 0xffff : 0xff; 1627 return (0); 1628 } 1629 1630 if (in) { 1631 x = (cfgbus << 16) | 1632 (cfgslot << 11) | 1633 (cfgfunc << 8) | 1634 cfgoff; 1635 if (cfgenable) 1636 x |= CONF1_ENABLE; 1637 *eax = x; 1638 } else { 1639 x = *eax; 1640 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1641 cfgoff = x & PCI_REGMAX; 1642 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1643 cfgslot = (x >> 11) & PCI_SLOTMAX; 1644 cfgbus = (x >> 16) & PCI_BUSMAX; 1645 } 1646 1647 return (0); 1648} 1649INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1650 1651static uint32_t 1652bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1653{ 1654 1655 return ((old ^ new) & mask); 1656} 1657 1658static void 1659pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1660{ 1661 int i; 1662 uint16_t old; 1663 1664 /* 1665 * The command register is at an offset of 4 bytes and thus the 1666 * guest could write 1, 2 or 4 bytes starting at this offset. 1667 */ 1668 1669 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1670 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1671 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1672 1673 /* 1674 * If the MMIO or I/O address space decoding has changed then 1675 * register/unregister all BARs that decode that address space. 1676 */ 1677 for (i = 0; i <= PCI_BARMAX; i++) { 1678 switch (pi->pi_bar[i].type) { 1679 case PCIBAR_NONE: 1680 case PCIBAR_MEMHI64: 1681 break; 1682 case PCIBAR_IO: 1683 /* I/O address space decoding changed? */ 1684 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1685 if (porten(pi)) 1686 register_bar(pi, i); 1687 else 1688 unregister_bar(pi, i); 1689 } 1690 break; 1691 case PCIBAR_MEM32: 1692 case PCIBAR_MEM64: 1693 /* MMIO address space decoding changed? */ 1694 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1695 if (memen(pi)) 1696 register_bar(pi, i); 1697 else 1698 unregister_bar(pi, i); 1699 } 1700 break; 1701 default: 1702 assert(0); 1703 } 1704 } 1705 1706 /* 1707 * If INTx has been unmasked and is pending, assert the 1708 * interrupt. 1709 */ 1710 pci_lintr_update(pi); 1711} 1712 1713static int 1714pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1715 uint32_t *eax, void *arg) 1716{ 1717 struct businfo *bi; 1718 struct slotinfo *si; 1719 struct pci_devinst *pi; 1720 struct pci_devemu *pe; 1721 int coff, idx, needcfg; 1722 uint64_t addr, bar, mask; 1723 1724 assert(bytes == 1 || bytes == 2 || bytes == 4); 1725 1726 if ((bi = pci_businfo[cfgbus]) != NULL) { 1727 si = &bi->slotinfo[cfgslot]; 1728 pi = si->si_funcs[cfgfunc].fi_devi; 1729 } else 1730 pi = NULL; 1731 1732 coff = cfgoff + (port - CONF1_DATA_PORT); 1733 1734#if 0 1735 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1736 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1737#endif 1738 1739 /* 1740 * Just return if there is no device at this cfgslot:cfgfunc, 1741 * if the guest is doing an un-aligned access, or if the config 1742 * address word isn't enabled. 1743 */ 1744 if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { 1745 if (in) 1746 *eax = 0xffffffff; 1747 return (0); 1748 } 1749 1750 pe = pi->pi_d; 1751 1752 /* 1753 * Config read 1754 */ 1755 if (in) { 1756 /* Let the device emulation override the default handler */ 1757 if (pe->pe_cfgread != NULL) { 1758 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1759 coff, bytes, eax); 1760 } else { 1761 needcfg = 1; 1762 } 1763 1764 if (needcfg) { 1765 if (bytes == 1) 1766 *eax = pci_get_cfgdata8(pi, coff); 1767 else if (bytes == 2) 1768 *eax = pci_get_cfgdata16(pi, coff); 1769 else 1770 *eax = pci_get_cfgdata32(pi, coff); 1771 } 1772 1773 pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); 1774 } else { 1775 /* Let the device emulation override the default handler */ 1776 if (pe->pe_cfgwrite != NULL && 1777 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1778 return (0); 1779 1780 /* 1781 * Special handling for write to BAR registers 1782 */ 1783 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1784 /* 1785 * Ignore writes to BAR registers that are not 1786 * 4-byte aligned. 1787 */ 1788 if (bytes != 4 || (coff & 0x3) != 0) 1789 return (0); 1790 idx = (coff - PCIR_BAR(0)) / 4; 1791 mask = ~(pi->pi_bar[idx].size - 1); 1792 switch (pi->pi_bar[idx].type) { 1793 case PCIBAR_NONE: 1794 pi->pi_bar[idx].addr = bar = 0; 1795 break; 1796 case PCIBAR_IO: 1797 addr = *eax & mask; 1798 addr &= 0xffff; 1799 bar = addr | PCIM_BAR_IO_SPACE; 1800 /* 1801 * Register the new BAR value for interception 1802 */ 1803 if (addr != pi->pi_bar[idx].addr) { 1804 update_bar_address(pi, addr, idx, 1805 PCIBAR_IO); 1806 } 1807 break; 1808 case PCIBAR_MEM32: 1809 addr = bar = *eax & mask; 1810 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1811 if (addr != pi->pi_bar[idx].addr) { 1812 update_bar_address(pi, addr, idx, 1813 PCIBAR_MEM32); 1814 } 1815 break; 1816 case PCIBAR_MEM64: 1817 addr = bar = *eax & mask; 1818 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1819 PCIM_BAR_MEM_PREFETCH; 1820 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1821 update_bar_address(pi, addr, idx, 1822 PCIBAR_MEM64); 1823 } 1824 break; 1825 case PCIBAR_MEMHI64: 1826 mask = ~(pi->pi_bar[idx - 1].size - 1); 1827 addr = ((uint64_t)*eax << 32) & mask; 1828 bar = addr >> 32; 1829 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1830 update_bar_address(pi, addr, idx - 1, 1831 PCIBAR_MEMHI64); 1832 } 1833 break; 1834 default: 1835 assert(0); 1836 } 1837 pci_set_cfgdata32(pi, coff, bar); 1838 1839 } else if (pci_emul_iscap(pi, coff)) { 1840 pci_emul_capwrite(pi, coff, bytes, *eax); 1841 } else if (coff == PCIR_COMMAND) { 1842 pci_emul_cmdwrite(pi, *eax, bytes); 1843 } else { 1844 CFGWRITE(pi, coff, *eax, bytes); 1845 } 1846 } 1847 1848 return (0); 1849} 1850 1851INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1852INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1853INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1854INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1855 1856#define PCI_EMUL_TEST 1857#ifdef PCI_EMUL_TEST 1858/* 1859 * Define a dummy test device 1860 */ 1861#define DIOSZ 8 1862#define DMEMSZ 4096 1863struct pci_emul_dsoftc { 1864 uint8_t ioregs[DIOSZ]; 1865 uint8_t memregs[DMEMSZ]; 1866}; 1867 1868#define PCI_EMUL_MSI_MSGS 4 1869#define PCI_EMUL_MSIX_MSGS 16 1870 1871static int 1872pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1873{ 1874 int error; 1875 struct pci_emul_dsoftc *sc; 1876 1877 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 1878 1879 pi->pi_arg = sc; 1880 1881 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1882 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1883 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1884 1885 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1886 assert(error == 0); 1887 1888 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1889 assert(error == 0); 1890 1891 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1892 assert(error == 0); 1893 1894 return (0); 1895} 1896 1897static void 1898pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1899 uint64_t offset, int size, uint64_t value) 1900{ 1901 int i; 1902 struct pci_emul_dsoftc *sc = pi->pi_arg; 1903 1904 if (baridx == 0) { 1905 if (offset + size > DIOSZ) { 1906 printf("diow: iow too large, offset %ld size %d\n", 1907 offset, size); 1908 return; 1909 } 1910 1911 if (size == 1) { 1912 sc->ioregs[offset] = value & 0xff; 1913 } else if (size == 2) { 1914 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1915 } else if (size == 4) { 1916 *(uint32_t *)&sc->ioregs[offset] = value; 1917 } else { 1918 printf("diow: iow unknown size %d\n", size); 1919 } 1920 1921 /* 1922 * Special magic value to generate an interrupt 1923 */ 1924 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1925 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1926 1927 if (value == 0xabcdef) { 1928 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1929 pci_generate_msi(pi, i); 1930 } 1931 } 1932 1933 if (baridx == 1) { 1934 if (offset + size > DMEMSZ) { 1935 printf("diow: memw too large, offset %ld size %d\n", 1936 offset, size); 1937 return; 1938 } 1939 1940 if (size == 1) { 1941 sc->memregs[offset] = value; 1942 } else if (size == 2) { 1943 *(uint16_t *)&sc->memregs[offset] = value; 1944 } else if (size == 4) { 1945 *(uint32_t *)&sc->memregs[offset] = value; 1946 } else if (size == 8) { 1947 *(uint64_t *)&sc->memregs[offset] = value; 1948 } else { 1949 printf("diow: memw unknown size %d\n", size); 1950 } 1951 1952 /* 1953 * magic interrupt ?? 1954 */ 1955 } 1956 1957 if (baridx > 1) { 1958 printf("diow: unknown bar idx %d\n", baridx); 1959 } 1960} 1961 1962static uint64_t 1963pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1964 uint64_t offset, int size) 1965{ 1966 struct pci_emul_dsoftc *sc = pi->pi_arg; 1967 uint32_t value; 1968 1969 if (baridx == 0) { 1970 if (offset + size > DIOSZ) { 1971 printf("dior: ior too large, offset %ld size %d\n", 1972 offset, size); 1973 return (0); 1974 } 1975 1976 if (size == 1) { 1977 value = sc->ioregs[offset]; 1978 } else if (size == 2) { 1979 value = *(uint16_t *) &sc->ioregs[offset]; 1980 } else if (size == 4) { 1981 value = *(uint32_t *) &sc->ioregs[offset]; 1982 } else { 1983 printf("dior: ior unknown size %d\n", size); 1984 } 1985 } 1986 1987 if (baridx == 1) { 1988 if (offset + size > DMEMSZ) { 1989 printf("dior: memr too large, offset %ld size %d\n", 1990 offset, size); 1991 return (0); 1992 } 1993 1994 if (size == 1) { 1995 value = sc->memregs[offset]; 1996 } else if (size == 2) { 1997 value = *(uint16_t *) &sc->memregs[offset]; 1998 } else if (size == 4) { 1999 value = *(uint32_t *) &sc->memregs[offset]; 2000 } else if (size == 8) { 2001 value = *(uint64_t *) &sc->memregs[offset]; 2002 } else { 2003 printf("dior: ior unknown size %d\n", size); 2004 } 2005 } 2006 2007 2008 if (baridx > 1) { 2009 printf("dior: unknown bar idx %d\n", baridx); 2010 return (0); 2011 } 2012 2013 return (value); 2014} 2015 2016struct pci_devemu pci_dummy = { 2017 .pe_emu = "dummy", 2018 .pe_init = pci_emul_dinit, 2019 .pe_barwrite = pci_emul_diow, 2020 .pe_barread = pci_emul_dior 2021}; 2022PCI_EMUL_SET(pci_dummy); 2023 2024#endif /* PCI_EMUL_TEST */ 2025