hvm.c revision 291687
1/*
2 * Copyright (c) 2008, 2013 Citrix Systems, Inc.
3 * Copyright (c) 2012 Spectra Logic Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/x86/xen/hvm.c 291687 2015-12-03 11:05:35Z royger $");
30
31#include <sys/param.h>
32#include <sys/bus.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/proc.h>
36#include <sys/smp.h>
37#include <sys/systm.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41
42#include <dev/pci/pcivar.h>
43
44#include <machine/cpufunc.h>
45#include <machine/cpu.h>
46#include <machine/smp.h>
47
48#include <x86/apicreg.h>
49
50#include <xen/xen-os.h>
51#include <xen/features.h>
52#include <xen/gnttab.h>
53#include <xen/hypervisor.h>
54#include <xen/hvm.h>
55#include <xen/xen_intr.h>
56
57#include <xen/interface/hvm/params.h>
58#include <xen/interface/vcpu.h>
59
60/*--------------------------- Forward Declarations ---------------------------*/
61#ifdef SMP
62static driver_filter_t xen_smp_rendezvous_action;
63static driver_filter_t xen_invltlb;
64static driver_filter_t xen_invlpg;
65static driver_filter_t xen_invlrng;
66static driver_filter_t xen_invlcache;
67#ifdef __i386__
68static driver_filter_t xen_lazypmap;
69#endif
70static driver_filter_t xen_ipi_bitmap_handler;
71static driver_filter_t xen_cpustop_handler;
72static driver_filter_t xen_cpususpend_handler;
73static driver_filter_t xen_cpustophard_handler;
74static void xen_ipi_vectored(u_int vector, int dest);
75#endif
76static void xen_hvm_cpu_init(void);
77
78/*---------------------------- Extern Declarations ---------------------------*/
79#ifdef __i386__
80extern void pmap_lazyfix_action(void);
81#endif
82#ifdef __amd64__
83extern int pmap_pcid_enabled;
84#endif
85
86/*---------------------------------- Macros ----------------------------------*/
87#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
88
89/*-------------------------------- Local Types -------------------------------*/
90enum xen_hvm_init_type {
91	XEN_HVM_INIT_COLD,
92	XEN_HVM_INIT_CANCELLED_SUSPEND,
93	XEN_HVM_INIT_RESUME
94};
95
96struct xen_ipi_handler
97{
98	driver_filter_t	*filter;
99	const char	*description;
100};
101
102/*-------------------------------- Global Data -------------------------------*/
103enum xen_domain_type xen_domain_type = XEN_NATIVE;
104
105#ifdef SMP
106struct cpu_ops xen_hvm_cpu_ops = {
107	.ipi_vectored	= lapic_ipi_vectored,
108	.cpu_init	= xen_hvm_cpu_init,
109	.cpu_resume	= xen_hvm_cpu_init
110};
111#endif
112
113static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
114
115#ifdef SMP
116static struct xen_ipi_handler xen_ipis[] =
117{
118	[IPI_TO_IDX(IPI_RENDEZVOUS)]	= { xen_smp_rendezvous_action,	"r"   },
119	[IPI_TO_IDX(IPI_INVLTLB)]	= { xen_invltlb,		"itlb"},
120	[IPI_TO_IDX(IPI_INVLPG)]	= { xen_invlpg,			"ipg" },
121	[IPI_TO_IDX(IPI_INVLRNG)]	= { xen_invlrng,		"irg" },
122	[IPI_TO_IDX(IPI_INVLCACHE)]	= { xen_invlcache,		"ic"  },
123#ifdef __i386__
124	[IPI_TO_IDX(IPI_LAZYPMAP)]	= { xen_lazypmap,		"lp"  },
125#endif
126	[IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler,	"b"   },
127	[IPI_TO_IDX(IPI_STOP)]		= { xen_cpustop_handler,	"st"  },
128	[IPI_TO_IDX(IPI_SUSPEND)]	= { xen_cpususpend_handler,	"sp"  },
129	[IPI_TO_IDX(IPI_STOP_HARD)]	= { xen_cpustophard_handler,	"sth" },
130};
131#endif
132
133/**
134 * If non-zero, the hypervisor has been configured to use a direct
135 * IDT event callback for interrupt injection.
136 */
137int xen_vector_callback_enabled;
138
139/*------------------------------- Per-CPU Data -------------------------------*/
140DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
141DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
142#ifdef SMP
143DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
144#endif
145
146/*------------------ Hypervisor Access Shared Memory Regions -----------------*/
147/** Hypercall table accessed via HYPERVISOR_*_op() methods. */
148char *hypercall_stubs;
149shared_info_t *HYPERVISOR_shared_info;
150
151
152/*------------------------------ Sysctl tunables -----------------------------*/
153int xen_disable_pv_disks = 0;
154int xen_disable_pv_nics = 0;
155TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
156TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
157
158#ifdef SMP
159/*---------------------------- XEN PV IPI Handlers ---------------------------*/
160/*
161 * This are C clones of the ASM functions found in apic_vector.s
162 */
163static int
164xen_ipi_bitmap_handler(void *arg)
165{
166	struct trapframe *frame;
167
168	frame = arg;
169	ipi_bitmap_handler(*frame);
170	return (FILTER_HANDLED);
171}
172
173static int
174xen_smp_rendezvous_action(void *arg)
175{
176#ifdef COUNT_IPIS
177	(*ipi_rendezvous_counts[PCPU_GET(cpuid)])++;
178#endif /* COUNT_IPIS */
179
180	smp_rendezvous_action();
181	return (FILTER_HANDLED);
182}
183
184static int
185xen_invltlb(void *arg)
186{
187
188	invltlb_handler();
189	return (FILTER_HANDLED);
190}
191
192#ifdef __amd64__
193static int
194xen_invltlb_pcid(void *arg)
195{
196
197	invltlb_pcid_handler();
198	return (FILTER_HANDLED);
199}
200#endif
201
202static int
203xen_invlpg(void *arg)
204{
205
206	invlpg_handler();
207	return (FILTER_HANDLED);
208}
209
210#ifdef __amd64__
211static int
212xen_invlpg_pcid(void *arg)
213{
214
215	invlpg_pcid_handler();
216	return (FILTER_HANDLED);
217}
218#endif
219
220static int
221xen_invlrng(void *arg)
222{
223
224	invlrng_handler();
225	return (FILTER_HANDLED);
226}
227
228static int
229xen_invlcache(void *arg)
230{
231
232	invlcache_handler();
233	return (FILTER_HANDLED);
234}
235
236#ifdef __i386__
237static int
238xen_lazypmap(void *arg)
239{
240
241	pmap_lazyfix_action();
242	return (FILTER_HANDLED);
243}
244#endif
245
246static int
247xen_cpustop_handler(void *arg)
248{
249
250	cpustop_handler();
251	return (FILTER_HANDLED);
252}
253
254static int
255xen_cpususpend_handler(void *arg)
256{
257
258	cpususpend_handler();
259	return (FILTER_HANDLED);
260}
261
262static int
263xen_cpustophard_handler(void *arg)
264{
265
266	ipi_nmi_handler();
267	return (FILTER_HANDLED);
268}
269
270/* Xen PV IPI sender */
271static void
272xen_ipi_vectored(u_int vector, int dest)
273{
274	xen_intr_handle_t *ipi_handle;
275	int ipi_idx, to_cpu, self;
276
277	ipi_idx = IPI_TO_IDX(vector);
278	if (ipi_idx > nitems(xen_ipis))
279		panic("IPI out of range");
280
281	switch(dest) {
282	case APIC_IPI_DEST_SELF:
283		ipi_handle = DPCPU_GET(ipi_handle);
284		xen_intr_signal(ipi_handle[ipi_idx]);
285		break;
286	case APIC_IPI_DEST_ALL:
287		CPU_FOREACH(to_cpu) {
288			ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
289			xen_intr_signal(ipi_handle[ipi_idx]);
290		}
291		break;
292	case APIC_IPI_DEST_OTHERS:
293		self = PCPU_GET(cpuid);
294		CPU_FOREACH(to_cpu) {
295			if (to_cpu != self) {
296				ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
297				xen_intr_signal(ipi_handle[ipi_idx]);
298			}
299		}
300		break;
301	default:
302		to_cpu = apic_cpuid(dest);
303		ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
304		xen_intr_signal(ipi_handle[ipi_idx]);
305		break;
306	}
307}
308
309/*---------------------- XEN diverged cpu operations -------------------------*/
310static void
311xen_cpu_ipi_init(int cpu)
312{
313	xen_intr_handle_t *ipi_handle;
314	const struct xen_ipi_handler *ipi;
315	device_t dev;
316	int idx, rc;
317
318	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
319	dev = pcpu_find(cpu)->pc_device;
320	KASSERT((dev != NULL), ("NULL pcpu device_t"));
321
322	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
323
324		if (ipi->filter == NULL) {
325			ipi_handle[idx] = NULL;
326			continue;
327		}
328
329		rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter,
330		    INTR_TYPE_TTY, &ipi_handle[idx]);
331		if (rc != 0)
332			panic("Unable to allocate a XEN IPI port");
333		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
334	}
335}
336
337static void
338xen_setup_cpus(void)
339{
340	int i;
341
342	if (!xen_hvm_domain() || !xen_vector_callback_enabled)
343		return;
344
345#ifdef __amd64__
346	if (pmap_pcid_enabled) {
347		xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid;
348		xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid;
349	}
350#endif
351	CPU_FOREACH(i)
352		xen_cpu_ipi_init(i);
353
354	/* Set the xen pv ipi ops to replace the native ones */
355	cpu_ops.ipi_vectored = xen_ipi_vectored;
356}
357#endif
358
359/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
360static uint32_t
361xen_hvm_cpuid_base(void)
362{
363	uint32_t base, regs[4];
364
365	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
366		do_cpuid(base, regs);
367		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
368		    && (regs[0] - base) >= 2)
369			return (base);
370	}
371	return (0);
372}
373
374/*
375 * Allocate and fill in the hypcall page.
376 */
377static int
378xen_hvm_init_hypercall_stubs(void)
379{
380	uint32_t base, regs[4];
381	int i;
382
383	base = xen_hvm_cpuid_base();
384	if (base == 0)
385		return (ENXIO);
386
387	if (hypercall_stubs == NULL) {
388		do_cpuid(base + 1, regs);
389		printf("XEN: Hypervisor version %d.%d detected.\n",
390		    regs[0] >> 16, regs[0] & 0xffff);
391	}
392
393	/*
394	 * Find the hypercall pages.
395	 */
396	do_cpuid(base + 2, regs);
397
398	if (hypercall_stubs == NULL) {
399		size_t call_region_size;
400
401		call_region_size = regs[0] * PAGE_SIZE;
402		hypercall_stubs = malloc(call_region_size, M_XENHVM, M_NOWAIT);
403		if (hypercall_stubs == NULL)
404			panic("Unable to allocate Xen hypercall region");
405	}
406
407	for (i = 0; i < regs[0]; i++)
408		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
409
410	return (0);
411}
412
413static void
414xen_hvm_init_shared_info_page(void)
415{
416	struct xen_add_to_physmap xatp;
417
418	if (HYPERVISOR_shared_info == NULL) {
419		HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
420		if (HYPERVISOR_shared_info == NULL)
421			panic("Unable to allocate Xen shared info page");
422	}
423
424	xatp.domid = DOMID_SELF;
425	xatp.idx = 0;
426	xatp.space = XENMAPSPACE_shared_info;
427	xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
428	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
429		panic("HYPERVISOR_memory_op failed");
430}
431
432/*
433 * Tell the hypervisor how to contact us for event channel callbacks.
434 */
435void
436xen_hvm_set_callback(device_t dev)
437{
438	struct xen_hvm_param xhp;
439	int irq;
440
441	if (xen_vector_callback_enabled)
442		return;
443
444	xhp.domid = DOMID_SELF;
445	xhp.index = HVM_PARAM_CALLBACK_IRQ;
446	if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
447		int error;
448
449		xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
450		error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
451		if (error == 0) {
452			xen_vector_callback_enabled = 1;
453			return;
454		}
455		printf("Xen HVM callback vector registration failed (%d). "
456		    "Falling back to emulated device interrupt\n", error);
457	}
458	xen_vector_callback_enabled = 0;
459	if (dev == NULL) {
460		/*
461		 * Called from early boot or resume.
462		 * xenpci will invoke us again later.
463		 */
464		return;
465	}
466
467	irq = pci_get_irq(dev);
468	if (irq < 16) {
469		xhp.value = HVM_CALLBACK_GSI(irq);
470	} else {
471		u_int slot;
472		u_int pin;
473
474		slot = pci_get_slot(dev);
475		pin = pci_get_intpin(dev) - 1;
476		xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
477	}
478
479	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
480		panic("Can't set evtchn callback");
481}
482
483#define	XEN_MAGIC_IOPORT 0x10
484enum {
485	XMI_MAGIC			 = 0x49d2,
486	XMI_UNPLUG_IDE_DISKS		 = 0x01,
487	XMI_UNPLUG_NICS			 = 0x02,
488	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
489};
490
491static void
492xen_hvm_disable_emulated_devices(void)
493{
494	u_short disable_devs = 0;
495
496	if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
497		return;
498
499	if (xen_disable_pv_disks == 0) {
500		if (bootverbose)
501			printf("XEN: disabling emulated disks\n");
502		disable_devs |= XMI_UNPLUG_IDE_DISKS;
503	}
504	if (xen_disable_pv_nics == 0) {
505		if (bootverbose)
506			printf("XEN: disabling emulated nics\n");
507		disable_devs |= XMI_UNPLUG_NICS;
508	}
509
510	if (disable_devs != 0)
511		outw(XEN_MAGIC_IOPORT, disable_devs);
512}
513
514static void
515xen_hvm_init(enum xen_hvm_init_type init_type)
516{
517	int error;
518	int i;
519
520	if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
521		return;
522
523	error = xen_hvm_init_hypercall_stubs();
524
525	switch (init_type) {
526	case XEN_HVM_INIT_COLD:
527		if (error != 0)
528			return;
529
530		setup_xen_features();
531#ifdef SMP
532		cpu_ops = xen_hvm_cpu_ops;
533#endif
534 		vm_guest = VM_GUEST_XEN;
535		break;
536	case XEN_HVM_INIT_RESUME:
537		if (error != 0)
538			panic("Unable to init Xen hypercall stubs on resume");
539
540		/* Clear stale vcpu_info. */
541		CPU_FOREACH(i)
542			DPCPU_ID_SET(i, vcpu_info, NULL);
543		break;
544	default:
545		panic("Unsupported HVM initialization type");
546	}
547
548	xen_vector_callback_enabled = 0;
549	xen_domain_type = XEN_HVM_DOMAIN;
550	xen_hvm_init_shared_info_page();
551	xen_hvm_set_callback(NULL);
552	xen_hvm_disable_emulated_devices();
553}
554
555void
556xen_hvm_suspend(void)
557{
558}
559
560void
561xen_hvm_resume(bool suspend_cancelled)
562{
563
564	xen_hvm_init(suspend_cancelled ?
565	    XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
566
567	/* Register vcpu_info area for CPU#0. */
568	xen_hvm_cpu_init();
569}
570
571static void
572xen_hvm_sysinit(void *arg __unused)
573{
574	xen_hvm_init(XEN_HVM_INIT_COLD);
575}
576
577static void
578xen_set_vcpu_id(void)
579{
580	struct pcpu *pc;
581	int i;
582
583	/* Set vcpu_id to acpi_id */
584	CPU_FOREACH(i) {
585		pc = pcpu_find(i);
586		pc->pc_vcpu_id = pc->pc_acpi_id;
587		if (bootverbose)
588			printf("XEN: CPU %u has VCPU ID %u\n",
589			       i, pc->pc_vcpu_id);
590	}
591}
592
593static void
594xen_hvm_cpu_init(void)
595{
596	struct vcpu_register_vcpu_info info;
597	struct vcpu_info *vcpu_info;
598	int cpu, rc;
599
600	if (!xen_domain())
601		return;
602
603	if (DPCPU_GET(vcpu_info) != NULL) {
604		/*
605		 * vcpu_info is already set.  We're resuming
606		 * from a failed migration and our pre-suspend
607		 * configuration is still valid.
608		 */
609		return;
610	}
611
612	vcpu_info = DPCPU_PTR(vcpu_local_info);
613	cpu = PCPU_GET(vcpu_id);
614	info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
615	info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
616
617	rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
618	if (rc != 0)
619		DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]);
620	else
621		DPCPU_SET(vcpu_info, vcpu_info);
622}
623
624SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
625#ifdef SMP
626SYSINIT(xen_setup_cpus, SI_SUB_SMP, SI_ORDER_FIRST, xen_setup_cpus, NULL);
627#endif
628SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
629SYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL);
630