hv_snapshot.c revision 310735
1/*-
2 * Copyright (c) 2016 Microsoft Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/utilities/hv_snapshot.c 310735 2016-12-29 05:32:34Z sephe $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/conf.h>
33#include <sys/uio.h>
34#include <sys/bus.h>
35#include <sys/malloc.h>
36#include <sys/mbuf.h>
37#include <sys/module.h>
38#include <sys/lock.h>
39#include <sys/taskqueue.h>
40#include <sys/selinfo.h>
41#include <sys/sysctl.h>
42#include <sys/poll.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/kthread.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/un.h>
49#include <sys/endian.h>
50#include <sys/sema.h>
51#include <sys/signal.h>
52#include <sys/syslog.h>
53#include <sys/systm.h>
54#include <sys/mutex.h>
55#include <sys/callout.h>
56
57#include <dev/hyperv/include/hyperv.h>
58#include <dev/hyperv/utilities/hv_utilreg.h>
59#include <dev/hyperv/utilities/vmbus_icreg.h>
60
61#include "hv_util.h"
62#include "hv_snapshot.h"
63#include "vmbus_if.h"
64
65#define VSS_MAJOR		5
66#define VSS_MINOR		0
67#define VSS_MSGVER		VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR)
68
69#define VSS_FWVER_MAJOR		3
70#define VSS_FWVER		VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0)
71
72#define TIMEOUT_LIMIT		(15)	// seconds
73enum hv_vss_op {
74	VSS_OP_CREATE = 0,
75	VSS_OP_DELETE,
76	VSS_OP_HOT_BACKUP,
77	VSS_OP_GET_DM_INFO,
78	VSS_OP_BU_COMPLETE,
79	/*
80	 * Following operations are only supported with IC version >= 5.0
81	 */
82	VSS_OP_FREEZE, /* Freeze the file systems in the VM */
83	VSS_OP_THAW, /* Unfreeze the file systems */
84	VSS_OP_AUTO_RECOVER,
85	VSS_OP_COUNT /* Number of operations, must be last */
86};
87
88/*
89 * Header for all VSS messages.
90 */
91struct hv_vss_hdr {
92	struct vmbus_icmsg_hdr	ic_hdr;
93	uint8_t			operation;
94	uint8_t			reserved[7];
95} __packed;
96
97
98/*
99 * Flag values for the hv_vss_check_feature. Here supports only
100 * one value.
101 */
102#define VSS_HBU_NO_AUTO_RECOVERY		0x00000005
103
104struct hv_vss_check_feature {
105	uint32_t flags;
106} __packed;
107
108struct hv_vss_check_dm_info {
109	uint32_t flags;
110} __packed;
111
112struct hv_vss_msg {
113	union {
114		struct hv_vss_hdr vss_hdr;
115	} hdr;
116	union {
117		struct hv_vss_check_feature vss_cf;
118		struct hv_vss_check_dm_info dm_info;
119	} body;
120} __packed;
121
122struct hv_vss_req {
123	struct hv_vss_opt_msg	opt_msg;	/* used to communicate with daemon */
124	struct hv_vss_msg	msg;		/* used to communicate with host */
125} __packed;
126
127/* hv_vss debug control */
128static int hv_vss_log = 0;
129
130#define	hv_vss_log_error(...)	do {				\
131	if (hv_vss_log > 0)					\
132		log(LOG_ERR, "hv_vss: " __VA_ARGS__);		\
133} while (0)
134
135#define	hv_vss_log_info(...) do {				\
136	if (hv_vss_log > 1)					\
137		log(LOG_INFO, "hv_vss: " __VA_ARGS__);		\
138} while (0)
139
140static const struct vmbus_ic_desc vmbus_vss_descs[] = {
141	{
142		.ic_guid = { .hv_guid = {
143		    0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
144		    0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4,  0x40} },
145		.ic_desc = "Hyper-V VSS"
146	},
147	VMBUS_IC_DESC_END
148};
149
150static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"};
151
152/* character device prototypes */
153static d_open_t		hv_vss_dev_open;
154static d_close_t	hv_vss_dev_close;
155static d_poll_t		hv_vss_dev_daemon_poll;
156static d_ioctl_t	hv_vss_dev_daemon_ioctl;
157
158static d_open_t		hv_appvss_dev_open;
159static d_close_t	hv_appvss_dev_close;
160static d_poll_t		hv_appvss_dev_poll;
161static d_ioctl_t	hv_appvss_dev_ioctl;
162
163/* hv_vss character device structure */
164static struct cdevsw hv_vss_cdevsw =
165{
166	.d_version	= D_VERSION,
167	.d_open		= hv_vss_dev_open,
168	.d_close	= hv_vss_dev_close,
169	.d_poll		= hv_vss_dev_daemon_poll,
170	.d_ioctl	= hv_vss_dev_daemon_ioctl,
171	.d_name		= FS_VSS_DEV_NAME,
172};
173
174static struct cdevsw hv_appvss_cdevsw =
175{
176	.d_version	= D_VERSION,
177	.d_open		= hv_appvss_dev_open,
178	.d_close	= hv_appvss_dev_close,
179	.d_poll		= hv_appvss_dev_poll,
180	.d_ioctl	= hv_appvss_dev_ioctl,
181	.d_name		= APP_VSS_DEV_NAME,
182};
183
184struct hv_vss_sc;
185/*
186 * Global state to track cdev
187 */
188struct hv_vss_dev_sc {
189	/*
190	 * msg was transferred from host to notify queue, and
191	 * ack queue. Finally, it was recyled to free list.
192	 */
193	STAILQ_HEAD(, hv_vss_req_internal) 	to_notify_queue;
194	STAILQ_HEAD(, hv_vss_req_internal) 	to_ack_queue;
195	struct hv_vss_sc			*sc;
196	struct proc				*proc_task;
197	struct selinfo				hv_vss_selinfo;
198};
199/*
200 * Global state to track and synchronize the transaction requests from the host.
201 * The VSS allows user to register their function to do freeze/thaw for application.
202 * VSS kernel will notify both vss daemon and user application if it is registered.
203 * The implementation state transition is illustrated by:
204 * https://clovertrail.github.io/assets/vssdot.png
205 */
206typedef struct hv_vss_sc {
207	struct hv_util_sc			util_sc;
208	device_t				dev;
209
210	struct task				task;
211
212	/*
213	 * mutex is used to protect access of list/queue,
214	 * callout in request is also used this mutex.
215	 */
216	struct mtx				pending_mutex;
217	/*
218	 * req_free_list contains all free items
219	 */
220	LIST_HEAD(, hv_vss_req_internal)	req_free_list;
221
222	/* Indicates if daemon registered with driver */
223	boolean_t				register_done;
224
225	boolean_t				app_register_done;
226
227	/* cdev for file system freeze/thaw */
228	struct cdev				*hv_vss_dev;
229	/* cdev for application freeze/thaw */
230	struct cdev				*hv_appvss_dev;
231
232	/* sc for app */
233	struct hv_vss_dev_sc			app_sc;
234	/* sc for deamon */
235	struct hv_vss_dev_sc			daemon_sc;
236} hv_vss_sc;
237
238typedef struct hv_vss_req_internal {
239	LIST_ENTRY(hv_vss_req_internal)		link;
240	STAILQ_ENTRY(hv_vss_req_internal)	slink;
241	struct hv_vss_req			vss_req;
242
243	/* Rcv buffer for communicating with the host*/
244	uint8_t					*rcv_buf;
245	/* Length of host message */
246	uint32_t				host_msg_len;
247	/* Host message id */
248	uint64_t				host_msg_id;
249
250	hv_vss_sc				*sc;
251
252	struct callout				callout;
253} hv_vss_req_internal;
254
255#define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id)		\
256	do {								\
257		STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) {		\
258			if (reqp->vss_req.opt_msg.msgid == id) {	\
259				STAILQ_REMOVE(queue,			\
260				    reqp, hv_vss_req_internal, link);	\
261				break;					\
262			}						\
263		}							\
264	} while (0)
265
266static bool
267hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc)
268{
269	return (!sc->register_done && sc->daemon_sc.proc_task);
270}
271
272/*
273 * Callback routine that gets called whenever there is a message from host
274 */
275static void
276hv_vss_callback(struct vmbus_channel *chan __unused, void *context)
277{
278	hv_vss_sc *sc = (hv_vss_sc*)context;
279	if (hv_vss_is_daemon_killed_after_launch(sc))
280		hv_vss_log_info("%s: daemon was killed!\n", __func__);
281	if (sc->register_done || sc->daemon_sc.proc_task) {
282		hv_vss_log_info("%s: Queuing work item\n", __func__);
283		if (hv_vss_is_daemon_killed_after_launch(sc))
284			hv_vss_log_info("%s: daemon was killed!\n", __func__);
285		taskqueue_enqueue(taskqueue_thread, &sc->task);
286	} else {
287		hv_vss_log_info("%s: daemon has never been registered\n", __func__);
288	}
289	hv_vss_log_info("%s: received msg from host\n", __func__);
290}
291/*
292 * Send the response back to the host.
293 */
294static void
295hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch,
296    uint32_t recvlen, uint64_t requestid, uint32_t error)
297{
298	struct vmbus_icmsg_hdr *hv_icmsg_hdrp;
299
300	hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf;
301
302	hv_icmsg_hdrp->ic_status = error;
303	hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
304
305	error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0,
306	    rcv_buf, recvlen, requestid);
307	if (error)
308		hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n",
309		    __func__, error);
310}
311
312static void
313hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status)
314{
315	struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf;
316	hv_vss_sc *sc = reqp->sc;
317	if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) {
318		msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
319	}
320	hv_vss_log_info("%s, %s response %s to host\n", __func__,
321	    vss_opt_name[reqp->vss_req.opt_msg.opt],
322	    status == HV_S_OK ? "Success" : "Fail");
323	hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev),
324	    reqp->host_msg_len, reqp->host_msg_id, status);
325	/* recycle the request */
326	LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
327}
328
329static void
330hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status)
331{
332	mtx_lock(&reqp->sc->pending_mutex);
333	hv_vss_notify_host_result_locked(reqp, status);
334	mtx_unlock(&reqp->sc->pending_mutex);
335}
336
337static void
338hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp,
339    struct hv_vss_opt_msg *userdata)
340{
341	struct hv_vss_req *hv_vss_dev_buf;
342	hv_vss_dev_buf = &reqp->vss_req;
343	hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE;
344	switch (reqp->vss_req.msg.hdr.vss_hdr.operation) {
345	case VSS_OP_FREEZE:
346		hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE;
347		break;
348	case VSS_OP_THAW:
349		hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW;
350		break;
351	case VSS_OP_HOT_BACKUP:
352		hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK;
353		break;
354	}
355	*userdata = hv_vss_dev_buf->opt_msg;
356	hv_vss_log_info("%s, read data from user for "
357	    "%s (%ju) \n", __func__, vss_opt_name[userdata->opt],
358	    (uintmax_t)userdata->msgid);
359}
360
361/**
362 * Remove the request id from app notifiy or ack queue,
363 * and recyle the request by inserting it to free list.
364 *
365 * When app was notified but not yet sending ack, the request
366 * should locate in either notify queue or ack queue.
367 */
368static struct hv_vss_req_internal*
369hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id)
370{
371	struct hv_vss_req_internal *reqp, *tmp;
372	SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue,
373	    slink, tmp, req_id);
374	if (reqp == NULL)
375		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue,
376		    slink, tmp, req_id);
377	if (reqp == NULL)
378		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue,
379		    slink, tmp, req_id);
380	if (reqp == NULL)
381		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink,
382		    tmp, req_id);
383	return (reqp);
384}
385/**
386 * Actions for daemon who has been notified.
387 */
388static void
389hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
390{
391	struct hv_vss_req_internal *reqp;
392	mtx_lock(&dev_sc->sc->pending_mutex);
393	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) {
394		reqp = STAILQ_FIRST(&dev_sc->to_notify_queue);
395		hv_vss_cp_vssreq_to_user(reqp, userdata);
396		STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink);
397		/* insert the msg to queue for write */
398		STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink);
399		userdata->status = VSS_SUCCESS;
400	} else {
401		/* Timeout occur, thus request was removed from queue. */
402		hv_vss_log_info("%s: notify queue is empty!\n", __func__);
403		userdata->status = VSS_FAIL;
404	}
405	mtx_unlock(&dev_sc->sc->pending_mutex);
406}
407
408static void
409hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp)
410{
411	uint32_t opt = reqp->vss_req.opt_msg.opt;
412	mtx_lock(&dev_sc->sc->pending_mutex);
413	STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink);
414	hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__,
415	    vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid,
416	    &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon");
417	mtx_unlock(&dev_sc->sc->pending_mutex);
418	selwakeup(&dev_sc->hv_vss_selinfo);
419}
420
421/**
422 * Actions for daemon who has acknowledged.
423 */
424static void
425hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
426{
427	struct hv_vss_req_internal	*reqp, *tmp;
428	uint64_t			req_id;
429	int				opt;
430	uint32_t			status;
431
432	opt = userdata->opt;
433	req_id = userdata->msgid;
434	status = userdata->status;
435	/* make sure the reserved fields are all zeros. */
436	memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
437	    __offsetof(struct hv_vss_opt_msg, reserved));
438	mtx_lock(&dev_sc->sc->pending_mutex);
439	SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
440	mtx_unlock(&dev_sc->sc->pending_mutex);
441	if (reqp == NULL) {
442		hv_vss_log_info("%s Timeout: fail to find daemon ack request\n",
443		    __func__);
444		userdata->status = VSS_FAIL;
445		return;
446	}
447	KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
448	hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__,
449	    status, vss_opt_name[opt], (uintmax_t)req_id);
450	switch (opt) {
451	case HV_VSS_CHECK:
452	case HV_VSS_FREEZE:
453		callout_drain(&reqp->callout);
454		hv_vss_notify_host_result(reqp,
455		    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
456		break;
457	case HV_VSS_THAW:
458		if (dev_sc->sc->app_register_done) {
459			if (status == VSS_SUCCESS) {
460				hv_vss_notify(&dev_sc->sc->app_sc, reqp);
461			} else {
462				/* handle error */
463				callout_drain(&reqp->callout);
464				hv_vss_notify_host_result(reqp, HV_E_FAIL);
465			}
466		} else {
467			callout_drain(&reqp->callout);
468			hv_vss_notify_host_result(reqp,
469			    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
470		}
471		break;
472	}
473}
474
475/**
476 * Actions for app who has acknowledged.
477 */
478static void
479hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
480{
481	struct hv_vss_req_internal	*reqp, *tmp;
482	uint64_t			req_id;
483	int				opt;
484	uint8_t				status;
485
486	opt = userdata->opt;
487	req_id = userdata->msgid;
488	status = userdata->status;
489	/* make sure the reserved fields are all zeros. */
490	memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
491	    __offsetof(struct hv_vss_opt_msg, reserved));
492	mtx_lock(&dev_sc->sc->pending_mutex);
493	SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
494	mtx_unlock(&dev_sc->sc->pending_mutex);
495	if (reqp == NULL) {
496		hv_vss_log_info("%s Timeout: fail to find app ack request\n",
497		    __func__);
498		userdata->status = VSS_FAIL;
499		return;
500	}
501	KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
502	hv_vss_log_info("%s, get response %d from app for %s (%ju) \n",
503	    __func__, status, vss_opt_name[opt], (uintmax_t)req_id);
504	if (dev_sc->sc->register_done) {
505		switch (opt) {
506		case HV_VSS_CHECK:
507		case HV_VSS_FREEZE:
508			if (status == VSS_SUCCESS) {
509				hv_vss_notify(&dev_sc->sc->daemon_sc, reqp);
510			} else {
511				/* handle error */
512				callout_drain(&reqp->callout);
513				hv_vss_notify_host_result(reqp, HV_E_FAIL);
514			}
515			break;
516		case HV_VSS_THAW:
517			callout_drain(&reqp->callout);
518			hv_vss_notify_host_result(reqp,
519			    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
520			break;
521		}
522	} else {
523		hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__);
524	}
525}
526
527static int
528hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
529{
530	struct proc     *td_proc;
531	td_proc = td->td_proc;
532
533	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
534	hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
535	    __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
536
537	if (dev_sc->sc->register_done)
538		return (EBUSY);
539
540	dev_sc->sc->register_done = true;
541	hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc);
542
543	dev_sc->proc_task = curproc;
544	return (0);
545}
546
547static int
548hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
549				 struct thread *td)
550{
551	struct proc     *td_proc;
552	td_proc = td->td_proc;
553
554	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
555
556	hv_vss_log_info("%s: %s closes device \"%s\"\n",
557	    __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
558	dev_sc->sc->register_done = false;
559	return (0);
560}
561
562static int
563hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
564    struct thread *td)
565{
566	struct proc			*td_proc;
567	struct hv_vss_dev_sc		*sc;
568
569	td_proc = td->td_proc;
570	sc = (struct hv_vss_dev_sc*)dev->si_drv1;
571
572	hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
573
574	struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
575	switch(cmd) {
576	case IOCHVVSSREAD:
577		hv_vss_notified(sc, userdata);
578		break;
579	case IOCHVVSSWRITE:
580		hv_vss_daemon_acked(sc, userdata);
581		break;
582	}
583	return (0);
584}
585
586/*
587 * hv_vss_daemon poll invokes this function to check if data is available
588 * for daemon to read.
589 */
590static int
591hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
592{
593	int revent = 0;
594	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
595
596	mtx_lock(&dev_sc->sc->pending_mutex);
597	/**
598	 * if there is data ready, inform daemon's poll
599	 */
600	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
601		revent = POLLIN;
602	if (revent == 0)
603		selrecord(td, &dev_sc->hv_vss_selinfo);
604	hv_vss_log_info("%s return 0x%x\n", __func__, revent);
605	mtx_unlock(&dev_sc->sc->pending_mutex);
606	return (revent);
607}
608
609static int
610hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
611{
612	struct proc     *td_proc;
613	td_proc = td->td_proc;
614
615	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
616	hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
617	    __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
618
619	if (dev_sc->sc->app_register_done)
620		return (EBUSY);
621
622	dev_sc->sc->app_register_done = true;
623	dev_sc->proc_task = curproc;
624	return (0);
625}
626
627static int
628hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
629				 struct thread *td)
630{
631	struct proc     *td_proc;
632	td_proc = td->td_proc;
633
634	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
635
636	hv_vss_log_info("%s: %s closes device \"%s\".\n",
637	    __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
638	dev_sc->sc->app_register_done = false;
639	return (0);
640}
641
642static int
643hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
644    struct thread *td)
645{
646	struct proc			*td_proc;
647	struct hv_vss_dev_sc		*dev_sc;
648
649	td_proc = td->td_proc;
650	dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
651
652	hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
653
654	struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
655	switch(cmd) {
656	case IOCHVVSSREAD:
657		hv_vss_notified(dev_sc, userdata);
658		break;
659	case IOCHVVSSWRITE:
660		hv_vss_app_acked(dev_sc, userdata);
661		break;
662	}
663	return (0);
664}
665
666/*
667 * hv_vss_daemon poll invokes this function to check if data is available
668 * for daemon to read.
669 */
670static int
671hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td)
672{
673	int revent = 0;
674	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
675
676	mtx_lock(&dev_sc->sc->pending_mutex);
677	/**
678	 * if there is data ready, inform daemon's poll
679	 */
680	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
681		revent = POLLIN;
682	if (revent == 0)
683		selrecord(td, &dev_sc->hv_vss_selinfo);
684	hv_vss_log_info("%s return 0x%x\n", __func__, revent);
685	mtx_unlock(&dev_sc->sc->pending_mutex);
686	return (revent);
687}
688
689static void
690hv_vss_timeout(void *arg)
691{
692	hv_vss_req_internal *reqp = arg;
693	hv_vss_req_internal *request;
694	hv_vss_sc* sc = reqp->sc;
695	uint64_t req_id = reqp->vss_req.opt_msg.msgid;
696	/* This thread is locked */
697	KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!"));
698	request = hv_vss_drain_req_queue_locked(sc, req_id);
699	KASSERT(request != NULL, ("timeout but fail to find request"));
700	hv_vss_notify_host_result_locked(reqp, HV_E_FAIL);
701}
702
703/*
704 * This routine is called whenever a message is received from the host
705 */
706static void
707hv_vss_init_req(hv_vss_req_internal *reqp,
708    uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc)
709{
710	struct timespec vm_ts;
711	struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
712
713	memset(reqp, 0, __offsetof(hv_vss_req_internal, callout));
714	reqp->host_msg_len = recvlen;
715	reqp->host_msg_id = requestid;
716	reqp->rcv_buf = vss_buf;
717	reqp->sc = sc;
718	memcpy(&reqp->vss_req.msg,
719	    (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg));
720	/* set the opt for users */
721	switch (msg->hdr.vss_hdr.operation) {
722	case VSS_OP_FREEZE:
723		reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE;
724		break;
725	case VSS_OP_THAW:
726		reqp->vss_req.opt_msg.opt = HV_VSS_THAW;
727		break;
728	case VSS_OP_HOT_BACKUP:
729		reqp->vss_req.opt_msg.opt = HV_VSS_CHECK;
730		break;
731	}
732	/* Use a timestamp as msg request ID */
733	nanotime(&vm_ts);
734	reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec;
735}
736
737static hv_vss_req_internal*
738hv_vss_get_new_req_locked(hv_vss_sc *sc)
739{
740	hv_vss_req_internal *reqp;
741	if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) ||
742	    !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) ||
743	    !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) ||
744	    !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
745		/*
746		 * There is request coming from host before
747		 * finishing previous requests
748		 */
749		hv_vss_log_info("%s: Warning: there is new request "
750		    "coming before finishing previous requests\n", __func__);
751		return (NULL);
752	}
753	if (LIST_EMPTY(&sc->req_free_list)) {
754		/* TODO Error: no buffer */
755		hv_vss_log_info("Error: No buffer\n");
756		return (NULL);
757	}
758	reqp = LIST_FIRST(&sc->req_free_list);
759	LIST_REMOVE(reqp, link);
760	return (reqp);
761}
762
763static void
764hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt)
765{
766	hv_vss_sc *sc = reqp->sc;
767	/*
768	 * Freeze/Check notification sequence: kernel -> app -> daemon(fs)
769	 * Thaw notification sequence:         kernel -> daemon(fs) -> app
770	 *
771	 * We should wake up the daemon, in case it's doing poll().
772	 * The response should be received after 5s, otherwise, trigger timeout.
773	 */
774	switch (opt) {
775	case VSS_OP_FREEZE:
776	case VSS_OP_HOT_BACKUP:
777		if (sc->app_register_done)
778			hv_vss_notify(&sc->app_sc, reqp);
779		else
780			hv_vss_notify(&sc->daemon_sc, reqp);
781		callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
782		    hv_vss_timeout, reqp);
783		break;
784	case VSS_OP_THAW:
785		hv_vss_notify(&sc->daemon_sc, reqp);
786		callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
787		    hv_vss_timeout, reqp);
788		break;
789	}
790}
791
792/*
793 * Function to read the vss request buffer from host
794 * and interact with daemon
795 */
796static void
797hv_vss_process_request(void *context, int pending __unused)
798{
799	uint8_t *vss_buf;
800	struct vmbus_channel *channel;
801	uint32_t recvlen = 0;
802	uint64_t requestid;
803	struct vmbus_icmsg_hdr *icmsghdrp;
804	int ret = 0;
805	hv_vss_sc *sc;
806	hv_vss_req_internal *reqp;
807
808	hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__);
809
810	sc = (hv_vss_sc*)context;
811	vss_buf = sc->util_sc.receive_buffer;
812	channel = vmbus_get_channel(sc->dev);
813
814	recvlen = sc->util_sc.ic_buflen;
815	ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
816	KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
817	/* XXX check recvlen to make sure that it contains enough data */
818
819	while ((ret == 0) && (recvlen > 0)) {
820		icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf;
821
822		if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) {
823			ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf,
824			    &recvlen, VSS_FWVER, VSS_MSGVER);
825			hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
826			    recvlen, requestid, ret);
827			hv_vss_log_info("%s: version negotiated\n", __func__);
828		} else if (!hv_vss_is_daemon_killed_after_launch(sc)) {
829			struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
830			switch(msg->hdr.vss_hdr.operation) {
831			case VSS_OP_FREEZE:
832			case VSS_OP_THAW:
833			case VSS_OP_HOT_BACKUP:
834				mtx_lock(&sc->pending_mutex);
835				reqp = hv_vss_get_new_req_locked(sc);
836				mtx_unlock(&sc->pending_mutex);
837				if (reqp == NULL) {
838					/* ignore this request from host */
839					break;
840				}
841				hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc);
842				hv_vss_log_info("%s: receive %s (%ju) from host\n",
843				    __func__,
844				    vss_opt_name[reqp->vss_req.opt_msg.opt],
845				    (uintmax_t)reqp->vss_req.opt_msg.msgid);
846				hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation);
847				break;
848			case VSS_OP_GET_DM_INFO:
849				hv_vss_log_info("%s: receive GET_DM_INFO from host\n",
850				    __func__);
851				msg->body.dm_info.flags = 0;
852				hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
853				    recvlen, requestid, HV_S_OK);
854				break;
855			default:
856				device_printf(sc->dev, "Unknown opt from host: %d\n",
857				    msg->hdr.vss_hdr.operation);
858				break;
859			}
860		} else {
861			/* daemon was killed for some reason after it was launched */
862			struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
863			switch(msg->hdr.vss_hdr.operation) {
864			case VSS_OP_FREEZE:
865				hv_vss_log_info("%s: response fail for FREEZE\n",
866				    __func__);
867				break;
868			case VSS_OP_THAW:
869				hv_vss_log_info("%s: response fail for THAW\n",
870				    __func__);
871				break;
872			case VSS_OP_HOT_BACKUP:
873				hv_vss_log_info("%s: response fail for HOT_BACKUP\n",
874				    __func__);
875				msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
876				break;
877			case VSS_OP_GET_DM_INFO:
878				hv_vss_log_info("%s: response fail for GET_DM_INFO\n",
879				    __func__);
880				msg->body.dm_info.flags = 0;
881				break;
882			default:
883				device_printf(sc->dev, "Unknown opt from host: %d\n",
884				    msg->hdr.vss_hdr.operation);
885				break;
886			}
887			hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
888			    recvlen, requestid, HV_E_FAIL);
889		}
890		/*
891		 * Try reading next buffer
892		 */
893		recvlen = sc->util_sc.ic_buflen;
894		ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
895		KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
896		/* XXX check recvlen to make sure that it contains enough data */
897
898		hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
899		    __func__, context, ret, recvlen);
900	}
901}
902
903static int
904hv_vss_probe(device_t dev)
905{
906	return (vmbus_ic_probe(dev, vmbus_vss_descs));
907}
908
909static int
910hv_vss_init_send_receive_queue(device_t dev)
911{
912	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
913	int i;
914	const int max_list = 4; /* It is big enough for the list */
915	struct hv_vss_req_internal* reqp;
916
917	LIST_INIT(&sc->req_free_list);
918	STAILQ_INIT(&sc->daemon_sc.to_notify_queue);
919	STAILQ_INIT(&sc->daemon_sc.to_ack_queue);
920	STAILQ_INIT(&sc->app_sc.to_notify_queue);
921	STAILQ_INIT(&sc->app_sc.to_ack_queue);
922
923	for (i = 0; i < max_list; i++) {
924		reqp = malloc(sizeof(struct hv_vss_req_internal),
925		    M_DEVBUF, M_WAITOK|M_ZERO);
926		LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
927		callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0);
928	}
929	return (0);
930}
931
932static int
933hv_vss_destroy_send_receive_queue(device_t dev)
934{
935	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
936	hv_vss_req_internal* reqp;
937
938	while (!LIST_EMPTY(&sc->req_free_list)) {
939		reqp = LIST_FIRST(&sc->req_free_list);
940		LIST_REMOVE(reqp, link);
941		free(reqp, M_DEVBUF);
942	}
943
944	while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) {
945		reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue);
946		STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink);
947		free(reqp, M_DEVBUF);
948	}
949
950	while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) {
951		reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue);
952		STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink);
953		free(reqp, M_DEVBUF);
954	}
955
956	while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) {
957		reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue);
958		STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink);
959		free(reqp, M_DEVBUF);
960	}
961
962	while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
963		reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue);
964		STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink);
965		free(reqp, M_DEVBUF);
966	}
967	return (0);
968}
969
970static int
971hv_vss_attach(device_t dev)
972{
973	int error;
974	struct sysctl_oid_list *child;
975	struct sysctl_ctx_list *ctx;
976
977	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
978
979	sc->dev = dev;
980	mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF);
981
982	ctx = device_get_sysctl_ctx(dev);
983	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
984
985	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log",
986	    CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level");
987
988	TASK_INIT(&sc->task, 0, hv_vss_process_request, sc);
989	hv_vss_init_send_receive_queue(dev);
990	/* create character device for file system freeze/thaw */
991	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
992		    &sc->hv_vss_dev,
993		    &hv_vss_cdevsw,
994		    0,
995		    UID_ROOT,
996		    GID_WHEEL,
997		    0640,
998		    FS_VSS_DEV_NAME);
999
1000	if (error != 0) {
1001		hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error);
1002		return (error);
1003	}
1004	sc->hv_vss_dev->si_drv1 = &sc->daemon_sc;
1005	sc->daemon_sc.sc = sc;
1006	/* create character device for application freeze/thaw */
1007	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
1008		    &sc->hv_appvss_dev,
1009		    &hv_appvss_cdevsw,
1010		    0,
1011		    UID_ROOT,
1012		    GID_WHEEL,
1013		    0640,
1014		    APP_VSS_DEV_NAME);
1015
1016	if (error != 0) {
1017		hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error);
1018		return (error);
1019	}
1020	sc->hv_appvss_dev->si_drv1 = &sc->app_sc;
1021	sc->app_sc.sc = sc;
1022
1023	return hv_util_attach(dev, hv_vss_callback);
1024}
1025
1026static int
1027hv_vss_detach(device_t dev)
1028{
1029	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
1030	mtx_destroy(&sc->pending_mutex);
1031	if (sc->daemon_sc.proc_task != NULL) {
1032		PROC_LOCK(sc->daemon_sc.proc_task);
1033		kern_psignal(sc->daemon_sc.proc_task, SIGKILL);
1034		PROC_UNLOCK(sc->daemon_sc.proc_task);
1035	}
1036	if (sc->app_sc.proc_task != NULL) {
1037		PROC_LOCK(sc->app_sc.proc_task);
1038		kern_psignal(sc->app_sc.proc_task, SIGKILL);
1039		PROC_UNLOCK(sc->app_sc.proc_task);
1040	}
1041	hv_vss_destroy_send_receive_queue(dev);
1042	destroy_dev(sc->hv_vss_dev);
1043	destroy_dev(sc->hv_appvss_dev);
1044	return hv_util_detach(dev);
1045}
1046
1047static device_method_t vss_methods[] = {
1048	/* Device interface */
1049	DEVMETHOD(device_probe, hv_vss_probe),
1050	DEVMETHOD(device_attach, hv_vss_attach),
1051	DEVMETHOD(device_detach, hv_vss_detach),
1052	{ 0, 0 }
1053};
1054
1055static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)};
1056
1057static devclass_t vss_devclass;
1058
1059DRIVER_MODULE(hv_vss, vmbus, vss_driver, vss_devclass, NULL, NULL);
1060MODULE_VERSION(hv_vss, 1);
1061MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1);
1062