control.c revision 315676
1/*-
2 * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions, and the following disclaimer,
10 *    without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 *    substantially similar to the "NO WARRANTY" disclaimer below
13 *    ("Disclaimer") and any redistribution must be conditioned upon
14 *    including a substantially similar Disclaimer requirement for further
15 *    binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
29 */
30
31/*-
32 * PV suspend/resume support:
33 *
34 * Copyright (c) 2004 Christian Limpach.
35 * Copyright (c) 2004-2006,2008 Kip Macy
36 * All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *      This product includes software developed by Christian Limpach.
49 * 4. The name of the author may not be used to endorse or promote products
50 *    derived from this software without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
53 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
56 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
57 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
61 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62 */
63
64/*-
65 * HVM suspend/resume support:
66 *
67 * Copyright (c) 2008 Citrix Systems, Inc.
68 * All rights reserved.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 *    notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 *    notice, this list of conditions and the following disclaimer in the
77 *    documentation and/or other materials provided with the distribution.
78 *
79 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
80 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
81 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
82 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
83 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
85 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
86 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
87 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
88 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
89 * SUCH DAMAGE.
90 */
91#include <sys/cdefs.h>
92__FBSDID("$FreeBSD: stable/10/sys/dev/xen/control/control.c 315676 2017-03-21 09:38:59Z royger $");
93
94/**
95 * \file control.c
96 *
97 * \brief Device driver to repond to control domain events that impact
98 *        this VM.
99 */
100
101#include <sys/param.h>
102#include <sys/systm.h>
103#include <sys/kernel.h>
104#include <sys/malloc.h>
105
106#include <sys/bio.h>
107#include <sys/bus.h>
108#include <sys/conf.h>
109#include <sys/disk.h>
110#include <sys/fcntl.h>
111#include <sys/filedesc.h>
112#include <sys/kdb.h>
113#include <sys/module.h>
114#include <sys/namei.h>
115#include <sys/proc.h>
116#include <sys/reboot.h>
117#include <sys/rman.h>
118#include <sys/sched.h>
119#include <sys/taskqueue.h>
120#include <sys/types.h>
121#include <sys/vnode.h>
122#include <sys/sched.h>
123#include <sys/smp.h>
124#include <sys/eventhandler.h>
125
126#include <geom/geom.h>
127
128#include <machine/_inttypes.h>
129#include <machine/intr_machdep.h>
130#include <machine/apicvar.h>
131
132#include <vm/vm.h>
133#include <vm/vm_extern.h>
134#include <vm/vm_kern.h>
135
136#include <xen/xen-os.h>
137#include <xen/blkif.h>
138#include <xen/evtchn.h>
139#include <xen/gnttab.h>
140#include <xen/xen_intr.h>
141
142#ifdef XENHVM
143#include <xen/hvm.h>
144#endif
145
146#include <xen/interface/event_channel.h>
147#include <xen/interface/grant_table.h>
148
149#include <xen/xenbus/xenbusvar.h>
150
151#include <machine/xen/xenvar.h>
152#include <machine/xen/xenfunc.h>
153
154bool xen_suspend_cancelled;
155/*--------------------------- Forward Declarations --------------------------*/
156/** Function signature for shutdown event handlers. */
157typedef	void (xctrl_shutdown_handler_t)(void);
158
159static xctrl_shutdown_handler_t xctrl_poweroff;
160static xctrl_shutdown_handler_t xctrl_reboot;
161static xctrl_shutdown_handler_t xctrl_suspend;
162static xctrl_shutdown_handler_t xctrl_crash;
163
164/*-------------------------- Private Data Structures -------------------------*/
165/** Element type for lookup table of event name to handler. */
166struct xctrl_shutdown_reason {
167	const char		 *name;
168	xctrl_shutdown_handler_t *handler;
169};
170
171/** Lookup table for shutdown event name to handler. */
172static const struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = {
173	{ "poweroff", xctrl_poweroff },
174	{ "reboot",   xctrl_reboot   },
175	{ "suspend",  xctrl_suspend  },
176	{ "crash",    xctrl_crash    },
177	{ "halt",     xctrl_poweroff },
178};
179
180struct xctrl_softc {
181	struct xs_watch    xctrl_watch;
182};
183
184/*------------------------------ Event Handlers ------------------------------*/
185static void
186xctrl_poweroff()
187{
188	shutdown_nice(RB_POWEROFF|RB_HALT);
189}
190
191static void
192xctrl_reboot()
193{
194	shutdown_nice(0);
195}
196
197#ifndef XENHVM
198extern void xencons_suspend(void);
199extern void xencons_resume(void);
200
201/* Full PV mode suspension. */
202static void
203xctrl_suspend()
204{
205	int i, j, k, fpp, suspend_cancelled;
206	unsigned long max_pfn, start_info_mfn;
207
208	EVENTHANDLER_INVOKE(power_suspend);
209
210#ifdef SMP
211	struct thread *td;
212	cpuset_t map;
213	u_int cpuid;
214
215	/*
216	 * Bind us to CPU 0 and stop any other VCPUs.
217	 */
218	td = curthread;
219	thread_lock(td);
220	sched_bind(td, 0);
221	thread_unlock(td);
222	cpuid = PCPU_GET(cpuid);
223	KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0"));
224
225	map = all_cpus;
226	CPU_CLR(cpuid, &map);
227	CPU_NAND(&map, &stopped_cpus);
228	if (!CPU_EMPTY(&map))
229		stop_cpus(map);
230#endif
231
232	/*
233	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
234	 * drivers need this.
235	 */
236	mtx_lock(&Giant);
237	if (DEVICE_SUSPEND(root_bus) != 0) {
238		mtx_unlock(&Giant);
239		printf("%s: device_suspend failed\n", __func__);
240#ifdef SMP
241		if (!CPU_EMPTY(&map))
242			restart_cpus(map);
243#endif
244		return;
245	}
246	mtx_unlock(&Giant);
247
248	local_irq_disable();
249
250	xencons_suspend();
251	gnttab_suspend();
252	intr_suspend();
253
254	max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
255
256	void *shared_info = HYPERVISOR_shared_info;
257	HYPERVISOR_shared_info = NULL;
258	pmap_kremove((vm_offset_t) shared_info);
259	PT_UPDATES_FLUSH();
260
261	xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
262	xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
263
264	/*
265	 * We'll stop somewhere inside this hypercall. When it returns,
266	 * we'll start resuming after the restore.
267	 */
268	start_info_mfn = VTOMFN(xen_start_info);
269	pmap_suspend();
270	suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
271	pmap_resume();
272
273	pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
274	HYPERVISOR_shared_info = shared_info;
275
276	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
277		VTOMFN(xen_pfn_to_mfn_frame_list_list);
278
279	fpp = PAGE_SIZE/sizeof(unsigned long);
280	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
281		if ((j % fpp) == 0) {
282			k++;
283			xen_pfn_to_mfn_frame_list_list[k] =
284				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
285			j = 0;
286		}
287		xen_pfn_to_mfn_frame_list[k][j] =
288			VTOMFN(&xen_phys_machine[i]);
289	}
290	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
291
292	gnttab_resume();
293	intr_resume(suspend_cancelled != 0);
294	local_irq_enable();
295	xencons_resume();
296
297#ifdef CONFIG_SMP
298	for_each_cpu(i)
299		vcpu_prepare(i);
300
301#endif
302
303	/*
304	 * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
305	 * the VCPU hotplug callback can race with our vcpu_prepare
306	 */
307	mtx_lock(&Giant);
308	DEVICE_RESUME(root_bus);
309	mtx_unlock(&Giant);
310
311#ifdef SMP
312	thread_lock(curthread);
313	sched_unbind(curthread);
314	thread_unlock(curthread);
315	if (!CPU_EMPTY(&map))
316		restart_cpus(map);
317#endif
318	EVENTHANDLER_INVOKE(power_resume);
319}
320
321static void
322xen_pv_shutdown_final(void *arg, int howto)
323{
324	/*
325	 * Inform the hypervisor that shutdown is complete.
326	 * This is not necessary in HVM domains since Xen
327	 * emulates ACPI in that mode and FreeBSD's ACPI
328	 * support will request this transition.
329	 */
330	if (howto & (RB_HALT | RB_POWEROFF))
331		HYPERVISOR_shutdown(SHUTDOWN_poweroff);
332	else
333		HYPERVISOR_shutdown(SHUTDOWN_reboot);
334}
335
336#else
337
338/* HVM mode suspension. */
339static void
340xctrl_suspend()
341{
342#ifdef SMP
343	cpuset_t cpu_suspend_map;
344#endif
345
346	EVENTHANDLER_INVOKE(power_suspend_early);
347	xs_lock();
348	stop_all_proc();
349	xs_unlock();
350	EVENTHANDLER_INVOKE(power_suspend);
351
352	if (smp_started) {
353		thread_lock(curthread);
354		sched_bind(curthread, 0);
355		thread_unlock(curthread);
356	}
357	KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
358
359	/*
360	 * Clear our XenStore node so the toolstack knows we are
361	 * responding to the suspend request.
362	 */
363	xs_write(XST_NIL, "control", "shutdown", "");
364
365	/*
366	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
367	 * drivers need this.
368	 */
369	mtx_lock(&Giant);
370	if (DEVICE_SUSPEND(root_bus) != 0) {
371		mtx_unlock(&Giant);
372		printf("%s: device_suspend failed\n", __func__);
373		return;
374	}
375	mtx_unlock(&Giant);
376
377#ifdef SMP
378	CPU_ZERO(&cpu_suspend_map);	/* silence gcc */
379	if (smp_started) {
380		/*
381		 * Suspend other CPUs. This prevents IPIs while we
382		 * are resuming, and will allow us to reset per-cpu
383		 * vcpu_info on resume.
384		 */
385		cpu_suspend_map = all_cpus;
386		CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
387		if (!CPU_EMPTY(&cpu_suspend_map))
388			suspend_cpus(cpu_suspend_map);
389	}
390#endif
391
392	/*
393	 * Prevent any races with evtchn_interrupt() handler.
394	 */
395	disable_intr();
396	intr_suspend();
397	xen_hvm_suspend();
398
399	xen_suspend_cancelled = !!HYPERVISOR_suspend(0);
400
401	if (!xen_suspend_cancelled) {
402		xen_hvm_resume(false);
403	}
404	intr_resume(xen_suspend_cancelled != 0);
405	enable_intr();
406
407	/*
408	 * Reset grant table info.
409	 */
410	if (!xen_suspend_cancelled) {
411		gnttab_resume();
412	}
413
414#ifdef SMP
415	/* Send an IPI_BITMAP in case there are pending bitmap IPIs. */
416	lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL);
417	if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
418		/*
419		 * Now that event channels have been initialized,
420		 * resume CPUs.
421		 */
422		resume_cpus(cpu_suspend_map);
423	}
424#endif
425
426	/*
427	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
428	 * similar.
429	 */
430	mtx_lock(&Giant);
431	DEVICE_RESUME(root_bus);
432	mtx_unlock(&Giant);
433
434	if (smp_started) {
435		thread_lock(curthread);
436		sched_unbind(curthread);
437		thread_unlock(curthread);
438	}
439
440	resume_all_proc();
441
442	EVENTHANDLER_INVOKE(power_resume);
443
444	if (bootverbose)
445		printf("System resumed after suspension\n");
446
447}
448#endif
449
450static void
451xctrl_crash()
452{
453	panic("Xen directed crash");
454}
455
456/*------------------------------ Event Reception -----------------------------*/
457static void
458xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)
459{
460	const struct xctrl_shutdown_reason *reason;
461	const struct xctrl_shutdown_reason *last_reason;
462	char *result;
463	int   error;
464	int   result_len;
465
466	error = xs_read(XST_NIL, "control", "shutdown",
467			&result_len, (void **)&result);
468	if (error != 0)
469		return;
470
471	reason = xctrl_shutdown_reasons;
472	last_reason = reason + nitems(xctrl_shutdown_reasons);
473	while (reason < last_reason) {
474
475		if (!strcmp(result, reason->name)) {
476			reason->handler();
477			break;
478		}
479		reason++;
480	}
481
482	free(result, M_XENSTORE);
483}
484
485/*------------------ Private Device Attachment Functions  --------------------*/
486/**
487 * \brief Identify instances of this device type in the system.
488 *
489 * \param driver  The driver performing this identify action.
490 * \param parent  The NewBus parent device for any devices this method adds.
491 */
492static void
493xctrl_identify(driver_t *driver __unused, device_t parent)
494{
495	/*
496	 * A single device instance for our driver is always present
497	 * in a system operating under Xen.
498	 */
499	BUS_ADD_CHILD(parent, 0, driver->name, 0);
500}
501
502/**
503 * \brief Probe for the existance of the Xen Control device
504 *
505 * \param dev  NewBus device_t for this Xen control instance.
506 *
507 * \return  Always returns 0 indicating success.
508 */
509static int
510xctrl_probe(device_t dev)
511{
512	device_set_desc(dev, "Xen Control Device");
513
514	return (0);
515}
516
517/**
518 * \brief Attach the Xen control device.
519 *
520 * \param dev  NewBus device_t for this Xen control instance.
521 *
522 * \return  On success, 0. Otherwise an errno value indicating the
523 *          type of failure.
524 */
525static int
526xctrl_attach(device_t dev)
527{
528	struct xctrl_softc *xctrl;
529
530	xctrl = device_get_softc(dev);
531
532	/* Activate watch */
533	xctrl->xctrl_watch.node = "control/shutdown";
534	xctrl->xctrl_watch.callback = xctrl_on_watch_event;
535	xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl;
536	xs_register_watch(&xctrl->xctrl_watch);
537
538#ifndef XENHVM
539	EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL,
540			      SHUTDOWN_PRI_LAST);
541#endif
542
543	return (0);
544}
545
546/**
547 * \brief Detach the Xen control device.
548 *
549 * \param dev  NewBus device_t for this Xen control device instance.
550 *
551 * \return  On success, 0. Otherwise an errno value indicating the
552 *          type of failure.
553 */
554static int
555xctrl_detach(device_t dev)
556{
557	struct xctrl_softc *xctrl;
558
559	xctrl = device_get_softc(dev);
560
561	/* Release watch */
562	xs_unregister_watch(&xctrl->xctrl_watch);
563
564	return (0);
565}
566
567/*-------------------- Private Device Attachment Data  -----------------------*/
568static device_method_t xctrl_methods[] = {
569	/* Device interface */
570	DEVMETHOD(device_identify,	xctrl_identify),
571	DEVMETHOD(device_probe,         xctrl_probe),
572	DEVMETHOD(device_attach,        xctrl_attach),
573	DEVMETHOD(device_detach,        xctrl_detach),
574
575	DEVMETHOD_END
576};
577
578DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc));
579devclass_t xctrl_devclass;
580
581DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, NULL, NULL);
582