hvm.c revision 303776
1/*
2 * Copyright (c) 2008, 2013 Citrix Systems, Inc.
3 * Copyright (c) 2012 Spectra Logic Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/x86/xen/hvm.c 303776 2016-08-05 17:13:25Z jhb $");
30
31#include <sys/param.h>
32#include <sys/bus.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/proc.h>
36#include <sys/smp.h>
37#include <sys/systm.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41
42#include <dev/pci/pcivar.h>
43
44#include <machine/cpufunc.h>
45#include <machine/cpu.h>
46#include <machine/smp.h>
47
48#include <x86/apicreg.h>
49
50#include <xen/xen-os.h>
51#include <xen/features.h>
52#include <xen/gnttab.h>
53#include <xen/hypervisor.h>
54#include <xen/hvm.h>
55#include <xen/xen_intr.h>
56
57#include <xen/interface/hvm/params.h>
58#include <xen/interface/vcpu.h>
59
60/*--------------------------- Forward Declarations ---------------------------*/
61#ifdef SMP
62static driver_filter_t xen_smp_rendezvous_action;
63static driver_filter_t xen_invltlb;
64static driver_filter_t xen_invlpg;
65static driver_filter_t xen_invlrng;
66static driver_filter_t xen_invlcache;
67#ifdef __i386__
68static driver_filter_t xen_lazypmap;
69#endif
70static driver_filter_t xen_ipi_bitmap_handler;
71static driver_filter_t xen_cpustop_handler;
72static driver_filter_t xen_cpususpend_handler;
73static driver_filter_t xen_cpustophard_handler;
74static void xen_ipi_vectored(u_int vector, int dest);
75#endif
76static void xen_hvm_cpu_init(void);
77
78/*---------------------------- Extern Declarations ---------------------------*/
79#ifdef __i386__
80extern void pmap_lazyfix_action(void);
81#endif
82#ifdef __amd64__
83extern int pmap_pcid_enabled;
84#endif
85
86/*---------------------------------- Macros ----------------------------------*/
87#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
88
89/*-------------------------------- Local Types -------------------------------*/
90enum xen_hvm_init_type {
91	XEN_HVM_INIT_COLD,
92	XEN_HVM_INIT_CANCELLED_SUSPEND,
93	XEN_HVM_INIT_RESUME
94};
95
96struct xen_ipi_handler
97{
98	driver_filter_t	*filter;
99	const char	*description;
100};
101
102/*-------------------------------- Global Data -------------------------------*/
103enum xen_domain_type xen_domain_type = XEN_NATIVE;
104
105#ifdef SMP
106struct cpu_ops xen_hvm_cpu_ops = {
107	.ipi_vectored	= lapic_ipi_vectored,
108	.cpu_init	= xen_hvm_cpu_init,
109	.cpu_resume	= xen_hvm_cpu_init
110};
111#endif
112
113static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
114
115#ifdef SMP
116static struct xen_ipi_handler xen_ipis[] =
117{
118	[IPI_TO_IDX(IPI_RENDEZVOUS)]	= { xen_smp_rendezvous_action,	"r"   },
119	[IPI_TO_IDX(IPI_INVLTLB)]	= { xen_invltlb,		"itlb"},
120	[IPI_TO_IDX(IPI_INVLPG)]	= { xen_invlpg,			"ipg" },
121	[IPI_TO_IDX(IPI_INVLRNG)]	= { xen_invlrng,		"irg" },
122	[IPI_TO_IDX(IPI_INVLCACHE)]	= { xen_invlcache,		"ic"  },
123#ifdef __i386__
124	[IPI_TO_IDX(IPI_LAZYPMAP)]	= { xen_lazypmap,		"lp"  },
125#endif
126	[IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler,	"b"   },
127	[IPI_TO_IDX(IPI_STOP)]		= { xen_cpustop_handler,	"st"  },
128	[IPI_TO_IDX(IPI_SUSPEND)]	= { xen_cpususpend_handler,	"sp"  },
129	[IPI_TO_IDX(IPI_STOP_HARD)]	= { xen_cpustophard_handler,	"sth" },
130};
131#endif
132
133/**
134 * If non-zero, the hypervisor has been configured to use a direct
135 * IDT event callback for interrupt injection.
136 */
137int xen_vector_callback_enabled;
138
139/*------------------------------- Per-CPU Data -------------------------------*/
140DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
141DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
142#ifdef SMP
143DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
144#endif
145
146/*------------------ Hypervisor Access Shared Memory Regions -----------------*/
147/** Hypercall table accessed via HYPERVISOR_*_op() methods. */
148char *hypercall_stubs;
149shared_info_t *HYPERVISOR_shared_info;
150
151
152/*------------------------------ Sysctl tunables -----------------------------*/
153int xen_disable_pv_disks = 0;
154int xen_disable_pv_nics = 0;
155TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
156TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
157
158#ifdef SMP
159/*---------------------------- XEN PV IPI Handlers ---------------------------*/
160/*
161 * This are C clones of the ASM functions found in apic_vector.s
162 */
163static int
164xen_ipi_bitmap_handler(void *arg)
165{
166	struct trapframe *frame;
167
168	frame = arg;
169	ipi_bitmap_handler(*frame);
170	return (FILTER_HANDLED);
171}
172
173static int
174xen_smp_rendezvous_action(void *arg)
175{
176#ifdef COUNT_IPIS
177	(*ipi_rendezvous_counts[PCPU_GET(cpuid)])++;
178#endif /* COUNT_IPIS */
179
180	smp_rendezvous_action();
181	return (FILTER_HANDLED);
182}
183
184static int
185xen_invltlb(void *arg)
186{
187
188	invltlb_handler();
189	return (FILTER_HANDLED);
190}
191
192#ifdef __amd64__
193static int
194xen_invltlb_pcid(void *arg)
195{
196
197	invltlb_pcid_handler();
198	return (FILTER_HANDLED);
199}
200#endif
201
202static int
203xen_invlpg(void *arg)
204{
205
206	invlpg_handler();
207	return (FILTER_HANDLED);
208}
209
210#ifdef __amd64__
211static int
212xen_invlpg_pcid(void *arg)
213{
214
215	invlpg_pcid_handler();
216	return (FILTER_HANDLED);
217}
218#endif
219
220static int
221xen_invlrng(void *arg)
222{
223
224	invlrng_handler();
225	return (FILTER_HANDLED);
226}
227
228static int
229xen_invlcache(void *arg)
230{
231
232	invlcache_handler();
233	return (FILTER_HANDLED);
234}
235
236#ifdef __i386__
237static int
238xen_lazypmap(void *arg)
239{
240
241	pmap_lazyfix_action();
242	return (FILTER_HANDLED);
243}
244#endif
245
246static int
247xen_cpustop_handler(void *arg)
248{
249
250	cpustop_handler();
251	return (FILTER_HANDLED);
252}
253
254static int
255xen_cpususpend_handler(void *arg)
256{
257
258	cpususpend_handler();
259	return (FILTER_HANDLED);
260}
261
262static int
263xen_cpustophard_handler(void *arg)
264{
265
266	ipi_nmi_handler();
267	return (FILTER_HANDLED);
268}
269
270/* Xen PV IPI sender */
271static void
272xen_ipi_vectored(u_int vector, int dest)
273{
274	xen_intr_handle_t *ipi_handle;
275	int ipi_idx, to_cpu, self;
276
277	ipi_idx = IPI_TO_IDX(vector);
278	if (ipi_idx > nitems(xen_ipis))
279		panic("IPI out of range");
280
281	switch(dest) {
282	case APIC_IPI_DEST_SELF:
283		ipi_handle = DPCPU_GET(ipi_handle);
284		xen_intr_signal(ipi_handle[ipi_idx]);
285		break;
286	case APIC_IPI_DEST_ALL:
287		CPU_FOREACH(to_cpu) {
288			ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
289			xen_intr_signal(ipi_handle[ipi_idx]);
290		}
291		break;
292	case APIC_IPI_DEST_OTHERS:
293		self = PCPU_GET(cpuid);
294		CPU_FOREACH(to_cpu) {
295			if (to_cpu != self) {
296				ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
297				xen_intr_signal(ipi_handle[ipi_idx]);
298			}
299		}
300		break;
301	default:
302		to_cpu = apic_cpuid(dest);
303		ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
304		xen_intr_signal(ipi_handle[ipi_idx]);
305		break;
306	}
307}
308
309/*---------------------- XEN diverged cpu operations -------------------------*/
310static void
311xen_cpu_ipi_init(int cpu)
312{
313	xen_intr_handle_t *ipi_handle;
314	const struct xen_ipi_handler *ipi;
315	device_t dev;
316	int idx, rc;
317
318	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
319	dev = pcpu_find(cpu)->pc_device;
320	KASSERT((dev != NULL), ("NULL pcpu device_t"));
321
322	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
323
324		if (ipi->filter == NULL) {
325			ipi_handle[idx] = NULL;
326			continue;
327		}
328
329		rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter,
330		    INTR_TYPE_TTY, &ipi_handle[idx]);
331		if (rc != 0)
332			panic("Unable to allocate a XEN IPI port");
333		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
334	}
335}
336
337static void
338xen_setup_cpus(void)
339{
340	int i;
341
342	if (!xen_hvm_domain() || !xen_vector_callback_enabled)
343		return;
344
345#ifdef __amd64__
346	if (pmap_pcid_enabled) {
347		xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid;
348		xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid;
349	}
350#endif
351	CPU_FOREACH(i)
352		xen_cpu_ipi_init(i);
353
354	/* Set the xen pv ipi ops to replace the native ones */
355	cpu_ops.ipi_vectored = xen_ipi_vectored;
356}
357#endif
358
359/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
360static uint32_t
361xen_hvm_cpuid_base(void)
362{
363	uint32_t base, regs[4];
364
365	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
366		do_cpuid(base, regs);
367		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
368		    && (regs[0] - base) >= 2)
369			return (base);
370	}
371	return (0);
372}
373
374/*
375 * Allocate and fill in the hypcall page.
376 */
377static int
378xen_hvm_init_hypercall_stubs(void)
379{
380	uint32_t base, regs[4];
381	int i;
382
383	base = xen_hvm_cpuid_base();
384	if (base == 0)
385		return (ENXIO);
386
387	if (hypercall_stubs == NULL) {
388		int major, minor;
389
390		do_cpuid(base + 1, regs);
391
392		major = regs[0] >> 16;
393		minor = regs[0] & 0xffff;
394		printf("XEN: Hypervisor version %d.%d detected.\n", major,
395			minor);
396
397		if (((major < 4) || (major == 4 && minor <= 5)) &&
398		    msix_disable_migration == -1) {
399			/*
400			 * Xen hypervisors prior to 4.6.0 do not properly
401			 * handle updates to enabled MSI-X table entries,
402			 * so disable MSI-X interrupt migration in that
403			 * case.
404			 */
405			if (bootverbose)
406				printf(
407"Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
408"Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
409			msix_disable_migration = 1;
410		}
411	}
412
413	/*
414	 * Find the hypercall pages.
415	 */
416	do_cpuid(base + 2, regs);
417
418	if (hypercall_stubs == NULL) {
419		size_t call_region_size;
420
421		call_region_size = regs[0] * PAGE_SIZE;
422		hypercall_stubs = malloc(call_region_size, M_XENHVM, M_NOWAIT);
423		if (hypercall_stubs == NULL)
424			panic("Unable to allocate Xen hypercall region");
425	}
426
427	for (i = 0; i < regs[0]; i++)
428		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
429
430	return (0);
431}
432
433static void
434xen_hvm_init_shared_info_page(void)
435{
436	struct xen_add_to_physmap xatp;
437
438	if (HYPERVISOR_shared_info == NULL) {
439		HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
440		if (HYPERVISOR_shared_info == NULL)
441			panic("Unable to allocate Xen shared info page");
442	}
443
444	xatp.domid = DOMID_SELF;
445	xatp.idx = 0;
446	xatp.space = XENMAPSPACE_shared_info;
447	xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
448	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
449		panic("HYPERVISOR_memory_op failed");
450}
451
452/*
453 * Tell the hypervisor how to contact us for event channel callbacks.
454 */
455void
456xen_hvm_set_callback(device_t dev)
457{
458	struct xen_hvm_param xhp;
459	int irq;
460
461	if (xen_vector_callback_enabled)
462		return;
463
464	xhp.domid = DOMID_SELF;
465	xhp.index = HVM_PARAM_CALLBACK_IRQ;
466	if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
467		int error;
468
469		xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
470		error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
471		if (error == 0) {
472			xen_vector_callback_enabled = 1;
473			return;
474		}
475		printf("Xen HVM callback vector registration failed (%d). "
476		    "Falling back to emulated device interrupt\n", error);
477	}
478	xen_vector_callback_enabled = 0;
479	if (dev == NULL) {
480		/*
481		 * Called from early boot or resume.
482		 * xenpci will invoke us again later.
483		 */
484		return;
485	}
486
487	irq = pci_get_irq(dev);
488	if (irq < 16) {
489		xhp.value = HVM_CALLBACK_GSI(irq);
490	} else {
491		u_int slot;
492		u_int pin;
493
494		slot = pci_get_slot(dev);
495		pin = pci_get_intpin(dev) - 1;
496		xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
497	}
498
499	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
500		panic("Can't set evtchn callback");
501}
502
503#define	XEN_MAGIC_IOPORT 0x10
504enum {
505	XMI_MAGIC			 = 0x49d2,
506	XMI_UNPLUG_IDE_DISKS		 = 0x01,
507	XMI_UNPLUG_NICS			 = 0x02,
508	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
509};
510
511static void
512xen_hvm_disable_emulated_devices(void)
513{
514	u_short disable_devs = 0;
515
516	if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
517		return;
518
519	if (xen_disable_pv_disks == 0) {
520		if (bootverbose)
521			printf("XEN: disabling emulated disks\n");
522		disable_devs |= XMI_UNPLUG_IDE_DISKS;
523	}
524	if (xen_disable_pv_nics == 0) {
525		if (bootverbose)
526			printf("XEN: disabling emulated nics\n");
527		disable_devs |= XMI_UNPLUG_NICS;
528	}
529
530	if (disable_devs != 0)
531		outw(XEN_MAGIC_IOPORT, disable_devs);
532}
533
534static void
535xen_hvm_init(enum xen_hvm_init_type init_type)
536{
537	int error;
538	int i;
539
540	if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
541		return;
542
543	error = xen_hvm_init_hypercall_stubs();
544
545	switch (init_type) {
546	case XEN_HVM_INIT_COLD:
547		if (error != 0)
548			return;
549
550		setup_xen_features();
551#ifdef SMP
552		cpu_ops = xen_hvm_cpu_ops;
553#endif
554 		vm_guest = VM_GUEST_XEN;
555		break;
556	case XEN_HVM_INIT_RESUME:
557		if (error != 0)
558			panic("Unable to init Xen hypercall stubs on resume");
559
560		/* Clear stale vcpu_info. */
561		CPU_FOREACH(i)
562			DPCPU_ID_SET(i, vcpu_info, NULL);
563		break;
564	default:
565		panic("Unsupported HVM initialization type");
566	}
567
568	xen_vector_callback_enabled = 0;
569	xen_domain_type = XEN_HVM_DOMAIN;
570	xen_hvm_init_shared_info_page();
571	xen_hvm_set_callback(NULL);
572	xen_hvm_disable_emulated_devices();
573}
574
575void
576xen_hvm_suspend(void)
577{
578}
579
580void
581xen_hvm_resume(bool suspend_cancelled)
582{
583
584	xen_hvm_init(suspend_cancelled ?
585	    XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
586
587	/* Register vcpu_info area for CPU#0. */
588	xen_hvm_cpu_init();
589}
590
591static void
592xen_hvm_sysinit(void *arg __unused)
593{
594	xen_hvm_init(XEN_HVM_INIT_COLD);
595}
596
597static void
598xen_set_vcpu_id(void)
599{
600	struct pcpu *pc;
601	int i;
602
603	/* Set vcpu_id to acpi_id */
604	CPU_FOREACH(i) {
605		pc = pcpu_find(i);
606		pc->pc_vcpu_id = pc->pc_acpi_id;
607		if (bootverbose)
608			printf("XEN: CPU %u has VCPU ID %u\n",
609			       i, pc->pc_vcpu_id);
610	}
611}
612
613static void
614xen_hvm_cpu_init(void)
615{
616	struct vcpu_register_vcpu_info info;
617	struct vcpu_info *vcpu_info;
618	int cpu, rc;
619
620	if (!xen_domain())
621		return;
622
623	if (DPCPU_GET(vcpu_info) != NULL) {
624		/*
625		 * vcpu_info is already set.  We're resuming
626		 * from a failed migration and our pre-suspend
627		 * configuration is still valid.
628		 */
629		return;
630	}
631
632	vcpu_info = DPCPU_PTR(vcpu_local_info);
633	cpu = PCPU_GET(vcpu_id);
634	info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
635	info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
636
637	rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
638	if (rc != 0)
639		DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]);
640	else
641		DPCPU_SET(vcpu_info, vcpu_info);
642}
643
644SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
645#ifdef SMP
646SYSINIT(xen_setup_cpus, SI_SUB_SMP, SI_ORDER_FIRST, xen_setup_cpus, NULL);
647#endif
648SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
649SYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL);
650