hvm.c revision 291239
1/* 2 * Copyright (c) 2008, 2013 Citrix Systems, Inc. 3 * Copyright (c) 2012 Spectra Logic Corporation 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/sys/x86/xen/hvm.c 291239 2015-11-24 08:41:27Z royger $"); 30 31#include <sys/param.h> 32#include <sys/bus.h> 33#include <sys/kernel.h> 34#include <sys/malloc.h> 35#include <sys/proc.h> 36#include <sys/smp.h> 37#include <sys/systm.h> 38 39#include <vm/vm.h> 40#include <vm/pmap.h> 41 42#include <dev/pci/pcivar.h> 43 44#include <machine/cpufunc.h> 45#include <machine/cpu.h> 46#include <machine/smp.h> 47 48#include <x86/apicreg.h> 49 50#include <xen/xen-os.h> 51#include <xen/features.h> 52#include <xen/gnttab.h> 53#include <xen/hypervisor.h> 54#include <xen/hvm.h> 55#include <xen/xen_intr.h> 56 57#include <xen/interface/hvm/params.h> 58#include <xen/interface/vcpu.h> 59 60/*--------------------------- Forward Declarations ---------------------------*/ 61#ifdef SMP 62static driver_filter_t xen_smp_rendezvous_action; 63static driver_filter_t xen_invltlb; 64static driver_filter_t xen_invlpg; 65static driver_filter_t xen_invlrng; 66static driver_filter_t xen_invlcache; 67#ifdef __i386__ 68static driver_filter_t xen_lazypmap; 69#endif 70static driver_filter_t xen_ipi_bitmap_handler; 71static driver_filter_t xen_cpustop_handler; 72static driver_filter_t xen_cpususpend_handler; 73static driver_filter_t xen_cpustophard_handler; 74static void xen_ipi_vectored(u_int vector, int dest); 75static void xen_hvm_cpu_resume(void); 76#endif 77static void xen_hvm_cpu_init(void); 78 79/*---------------------------- Extern Declarations ---------------------------*/ 80#ifdef __i386__ 81extern void pmap_lazyfix_action(void); 82#endif 83#ifdef __amd64__ 84extern int pmap_pcid_enabled; 85#endif 86 87/* Variables used by mp_machdep to perform the bitmap IPI */ 88extern volatile u_int cpu_ipi_pending[MAXCPU]; 89 90/*---------------------------------- Macros ----------------------------------*/ 91#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) 92 93/*-------------------------------- Local Types -------------------------------*/ 94enum xen_hvm_init_type { 95 XEN_HVM_INIT_COLD, 96 XEN_HVM_INIT_CANCELLED_SUSPEND, 97 XEN_HVM_INIT_RESUME 98}; 99 100struct xen_ipi_handler 101{ 102 driver_filter_t *filter; 103 const char *description; 104}; 105 106/*-------------------------------- Global Data -------------------------------*/ 107enum xen_domain_type xen_domain_type = XEN_NATIVE; 108 109#ifdef SMP 110struct cpu_ops xen_hvm_cpu_ops = { 111 .ipi_vectored = lapic_ipi_vectored, 112 .cpu_init = xen_hvm_cpu_init, 113 .cpu_resume = xen_hvm_cpu_resume 114}; 115#endif 116 117static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); 118 119#ifdef SMP 120static struct xen_ipi_handler xen_ipis[] = 121{ 122 [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, 123 [IPI_TO_IDX(IPI_INVLTLB)] = { xen_invltlb, "itlb"}, 124 [IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" }, 125 [IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" }, 126 [IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" }, 127#ifdef __i386__ 128 [IPI_TO_IDX(IPI_LAZYPMAP)] = { xen_lazypmap, "lp" }, 129#endif 130 [IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" }, 131 [IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" }, 132 [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, 133 [IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" }, 134}; 135#endif 136 137/** 138 * If non-zero, the hypervisor has been configured to use a direct 139 * IDT event callback for interrupt injection. 140 */ 141int xen_vector_callback_enabled; 142 143/*------------------------------- Per-CPU Data -------------------------------*/ 144DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); 145DPCPU_DEFINE(struct vcpu_info *, vcpu_info); 146#ifdef SMP 147DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); 148#endif 149 150/*------------------ Hypervisor Access Shared Memory Regions -----------------*/ 151/** Hypercall table accessed via HYPERVISOR_*_op() methods. */ 152char *hypercall_stubs; 153shared_info_t *HYPERVISOR_shared_info; 154 155 156/*------------------------------ Sysctl tunables -----------------------------*/ 157int xen_disable_pv_disks = 0; 158int xen_disable_pv_nics = 0; 159TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); 160TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); 161 162#ifdef SMP 163/*---------------------------- XEN PV IPI Handlers ---------------------------*/ 164/* 165 * This are C clones of the ASM functions found in apic_vector.s 166 */ 167static int 168xen_ipi_bitmap_handler(void *arg) 169{ 170 struct trapframe *frame; 171 172 frame = arg; 173 ipi_bitmap_handler(*frame); 174 return (FILTER_HANDLED); 175} 176 177static int 178xen_smp_rendezvous_action(void *arg) 179{ 180#ifdef COUNT_IPIS 181 (*ipi_rendezvous_counts[PCPU_GET(cpuid)])++; 182#endif /* COUNT_IPIS */ 183 184 smp_rendezvous_action(); 185 return (FILTER_HANDLED); 186} 187 188static int 189xen_invltlb(void *arg) 190{ 191 192 invltlb_handler(); 193 return (FILTER_HANDLED); 194} 195 196#ifdef __amd64__ 197static int 198xen_invltlb_pcid(void *arg) 199{ 200 201 invltlb_pcid_handler(); 202 return (FILTER_HANDLED); 203} 204#endif 205 206static int 207xen_invlpg(void *arg) 208{ 209 210 invlpg_handler(); 211 return (FILTER_HANDLED); 212} 213 214#ifdef __amd64__ 215static int 216xen_invlpg_pcid(void *arg) 217{ 218 219 invlpg_pcid_handler(); 220 return (FILTER_HANDLED); 221} 222#endif 223 224static int 225xen_invlrng(void *arg) 226{ 227 228 invlrng_handler(); 229 return (FILTER_HANDLED); 230} 231 232static int 233xen_invlcache(void *arg) 234{ 235 236 invlcache_handler(); 237 return (FILTER_HANDLED); 238} 239 240#ifdef __i386__ 241static int 242xen_lazypmap(void *arg) 243{ 244 245 pmap_lazyfix_action(); 246 return (FILTER_HANDLED); 247} 248#endif 249 250static int 251xen_cpustop_handler(void *arg) 252{ 253 254 cpustop_handler(); 255 return (FILTER_HANDLED); 256} 257 258static int 259xen_cpususpend_handler(void *arg) 260{ 261 262 cpususpend_handler(); 263 return (FILTER_HANDLED); 264} 265 266static int 267xen_cpustophard_handler(void *arg) 268{ 269 270 ipi_nmi_handler(); 271 return (FILTER_HANDLED); 272} 273 274/* Xen PV IPI sender */ 275static void 276xen_ipi_vectored(u_int vector, int dest) 277{ 278 xen_intr_handle_t *ipi_handle; 279 int ipi_idx, to_cpu, self; 280 281 ipi_idx = IPI_TO_IDX(vector); 282 if (ipi_idx > nitems(xen_ipis)) 283 panic("IPI out of range"); 284 285 switch(dest) { 286 case APIC_IPI_DEST_SELF: 287 ipi_handle = DPCPU_GET(ipi_handle); 288 xen_intr_signal(ipi_handle[ipi_idx]); 289 break; 290 case APIC_IPI_DEST_ALL: 291 CPU_FOREACH(to_cpu) { 292 ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); 293 xen_intr_signal(ipi_handle[ipi_idx]); 294 } 295 break; 296 case APIC_IPI_DEST_OTHERS: 297 self = PCPU_GET(cpuid); 298 CPU_FOREACH(to_cpu) { 299 if (to_cpu != self) { 300 ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); 301 xen_intr_signal(ipi_handle[ipi_idx]); 302 } 303 } 304 break; 305 default: 306 to_cpu = apic_cpuid(dest); 307 ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); 308 xen_intr_signal(ipi_handle[ipi_idx]); 309 break; 310 } 311} 312 313/*---------------------- XEN diverged cpu operations -------------------------*/ 314static void 315xen_hvm_cpu_resume(void) 316{ 317 u_int cpuid = PCPU_GET(cpuid); 318 319 /* 320 * Reset pending bitmap IPIs, because Xen doesn't preserve pending 321 * event channels on migration. 322 */ 323 cpu_ipi_pending[cpuid] = 0; 324 325 /* register vcpu_info area */ 326 xen_hvm_cpu_init(); 327} 328 329static void 330xen_cpu_ipi_init(int cpu) 331{ 332 xen_intr_handle_t *ipi_handle; 333 const struct xen_ipi_handler *ipi; 334 device_t dev; 335 int idx, rc; 336 337 ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); 338 dev = pcpu_find(cpu)->pc_device; 339 KASSERT((dev != NULL), ("NULL pcpu device_t")); 340 341 for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { 342 343 if (ipi->filter == NULL) { 344 ipi_handle[idx] = NULL; 345 continue; 346 } 347 348 rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter, 349 INTR_TYPE_TTY, &ipi_handle[idx]); 350 if (rc != 0) 351 panic("Unable to allocate a XEN IPI port"); 352 xen_intr_describe(ipi_handle[idx], "%s", ipi->description); 353 } 354} 355 356static void 357xen_setup_cpus(void) 358{ 359 int i; 360 361 if (!xen_hvm_domain() || !xen_vector_callback_enabled) 362 return; 363 364#ifdef __amd64__ 365 if (pmap_pcid_enabled) { 366 xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid; 367 xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid; 368 } 369#endif 370 CPU_FOREACH(i) 371 xen_cpu_ipi_init(i); 372 373 /* Set the xen pv ipi ops to replace the native ones */ 374 cpu_ops.ipi_vectored = xen_ipi_vectored; 375} 376#endif 377 378/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ 379static uint32_t 380xen_hvm_cpuid_base(void) 381{ 382 uint32_t base, regs[4]; 383 384 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 385 do_cpuid(base, regs); 386 if (!memcmp("XenVMMXenVMM", ®s[1], 12) 387 && (regs[0] - base) >= 2) 388 return (base); 389 } 390 return (0); 391} 392 393/* 394 * Allocate and fill in the hypcall page. 395 */ 396static int 397xen_hvm_init_hypercall_stubs(void) 398{ 399 uint32_t base, regs[4]; 400 int i; 401 402 base = xen_hvm_cpuid_base(); 403 if (base == 0) 404 return (ENXIO); 405 406 if (hypercall_stubs == NULL) { 407 do_cpuid(base + 1, regs); 408 printf("XEN: Hypervisor version %d.%d detected.\n", 409 regs[0] >> 16, regs[0] & 0xffff); 410 } 411 412 /* 413 * Find the hypercall pages. 414 */ 415 do_cpuid(base + 2, regs); 416 417 if (hypercall_stubs == NULL) { 418 size_t call_region_size; 419 420 call_region_size = regs[0] * PAGE_SIZE; 421 hypercall_stubs = malloc(call_region_size, M_XENHVM, M_NOWAIT); 422 if (hypercall_stubs == NULL) 423 panic("Unable to allocate Xen hypercall region"); 424 } 425 426 for (i = 0; i < regs[0]; i++) 427 wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); 428 429 return (0); 430} 431 432static void 433xen_hvm_init_shared_info_page(void) 434{ 435 struct xen_add_to_physmap xatp; 436 437 if (HYPERVISOR_shared_info == NULL) { 438 HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); 439 if (HYPERVISOR_shared_info == NULL) 440 panic("Unable to allocate Xen shared info page"); 441 } 442 443 xatp.domid = DOMID_SELF; 444 xatp.idx = 0; 445 xatp.space = XENMAPSPACE_shared_info; 446 xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; 447 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 448 panic("HYPERVISOR_memory_op failed"); 449} 450 451/* 452 * Tell the hypervisor how to contact us for event channel callbacks. 453 */ 454void 455xen_hvm_set_callback(device_t dev) 456{ 457 struct xen_hvm_param xhp; 458 int irq; 459 460 if (xen_vector_callback_enabled) 461 return; 462 463 xhp.domid = DOMID_SELF; 464 xhp.index = HVM_PARAM_CALLBACK_IRQ; 465 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { 466 int error; 467 468 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); 469 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); 470 if (error == 0) { 471 xen_vector_callback_enabled = 1; 472 return; 473 } 474 printf("Xen HVM callback vector registration failed (%d). " 475 "Falling back to emulated device interrupt\n", error); 476 } 477 xen_vector_callback_enabled = 0; 478 if (dev == NULL) { 479 /* 480 * Called from early boot or resume. 481 * xenpci will invoke us again later. 482 */ 483 return; 484 } 485 486 irq = pci_get_irq(dev); 487 if (irq < 16) { 488 xhp.value = HVM_CALLBACK_GSI(irq); 489 } else { 490 u_int slot; 491 u_int pin; 492 493 slot = pci_get_slot(dev); 494 pin = pci_get_intpin(dev) - 1; 495 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); 496 } 497 498 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) 499 panic("Can't set evtchn callback"); 500} 501 502#define XEN_MAGIC_IOPORT 0x10 503enum { 504 XMI_MAGIC = 0x49d2, 505 XMI_UNPLUG_IDE_DISKS = 0x01, 506 XMI_UNPLUG_NICS = 0x02, 507 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 508}; 509 510static void 511xen_hvm_disable_emulated_devices(void) 512{ 513 u_short disable_devs = 0; 514 515 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) 516 return; 517 518 if (xen_disable_pv_disks == 0) { 519 if (bootverbose) 520 printf("XEN: disabling emulated disks\n"); 521 disable_devs |= XMI_UNPLUG_IDE_DISKS; 522 } 523 if (xen_disable_pv_nics == 0) { 524 if (bootverbose) 525 printf("XEN: disabling emulated nics\n"); 526 disable_devs |= XMI_UNPLUG_NICS; 527 } 528 529 if (disable_devs != 0) 530 outw(XEN_MAGIC_IOPORT, disable_devs); 531} 532 533static void 534xen_hvm_init(enum xen_hvm_init_type init_type) 535{ 536 int error; 537 int i; 538 539 if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) 540 return; 541 542 error = xen_hvm_init_hypercall_stubs(); 543 544 switch (init_type) { 545 case XEN_HVM_INIT_COLD: 546 if (error != 0) 547 return; 548 549 setup_xen_features(); 550#ifdef SMP 551 cpu_ops = xen_hvm_cpu_ops; 552#endif 553 vm_guest = VM_GUEST_XEN; 554 break; 555 case XEN_HVM_INIT_RESUME: 556 if (error != 0) 557 panic("Unable to init Xen hypercall stubs on resume"); 558 559 /* Clear stale vcpu_info. */ 560 CPU_FOREACH(i) 561 DPCPU_ID_SET(i, vcpu_info, NULL); 562 break; 563 default: 564 panic("Unsupported HVM initialization type"); 565 } 566 567 xen_vector_callback_enabled = 0; 568 xen_domain_type = XEN_HVM_DOMAIN; 569 xen_hvm_init_shared_info_page(); 570 xen_hvm_set_callback(NULL); 571 xen_hvm_disable_emulated_devices(); 572} 573 574void 575xen_hvm_suspend(void) 576{ 577} 578 579void 580xen_hvm_resume(bool suspend_cancelled) 581{ 582 583 xen_hvm_init(suspend_cancelled ? 584 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); 585 586 /* Register vcpu_info area for CPU#0. */ 587 xen_hvm_cpu_init(); 588} 589 590static void 591xen_hvm_sysinit(void *arg __unused) 592{ 593 xen_hvm_init(XEN_HVM_INIT_COLD); 594} 595 596static void 597xen_set_vcpu_id(void) 598{ 599 struct pcpu *pc; 600 int i; 601 602 /* Set vcpu_id to acpi_id */ 603 CPU_FOREACH(i) { 604 pc = pcpu_find(i); 605 pc->pc_vcpu_id = pc->pc_acpi_id; 606 if (bootverbose) 607 printf("XEN: CPU %u has VCPU ID %u\n", 608 i, pc->pc_vcpu_id); 609 } 610} 611 612static void 613xen_hvm_cpu_init(void) 614{ 615 struct vcpu_register_vcpu_info info; 616 struct vcpu_info *vcpu_info; 617 int cpu, rc; 618 619 if (!xen_domain()) 620 return; 621 622 if (DPCPU_GET(vcpu_info) != NULL) { 623 /* 624 * vcpu_info is already set. We're resuming 625 * from a failed migration and our pre-suspend 626 * configuration is still valid. 627 */ 628 return; 629 } 630 631 vcpu_info = DPCPU_PTR(vcpu_local_info); 632 cpu = PCPU_GET(vcpu_id); 633 info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; 634 info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); 635 636 rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); 637 if (rc != 0) 638 DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]); 639 else 640 DPCPU_SET(vcpu_info, vcpu_info); 641} 642 643SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); 644#ifdef SMP 645SYSINIT(xen_setup_cpus, SI_SUB_SMP, SI_ORDER_FIRST, xen_setup_cpus, NULL); 646#endif 647SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); 648SYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL); 649