xenstore.c revision 315675
1/******************************************************************************
2 * xenstore.c
3 *
4 * Low-level kernel interface to the XenStore.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2009,2010 Spectra Logic Corporation
8 *
9 * This file may be distributed separately from the Linux kernel, or
10 * incorporated into other software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/xen/xenstore/xenstore.c 315675 2017-03-21 09:27:24Z royger $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/sx.h>
42#include <sys/syslog.h>
43#include <sys/malloc.h>
44#include <sys/systm.h>
45#include <sys/proc.h>
46#include <sys/kthread.h>
47#include <sys/sbuf.h>
48#include <sys/sysctl.h>
49#include <sys/uio.h>
50#include <sys/unistd.h>
51
52#include <machine/stdarg.h>
53
54#include <xen/xen-os.h>
55#include <xen/gnttab.h>
56#include <xen/hypervisor.h>
57#include <xen/xen_intr.h>
58
59#include <xen/interface/hvm/params.h>
60#include <xen/hvm.h>
61
62#include <xen/xenstore/xenstorevar.h>
63#include <xen/xenstore/xenstore_internal.h>
64
65#include <vm/vm.h>
66#include <vm/pmap.h>
67
68/**
69 * \file xenstore.c
70 * \brief XenStore interface
71 *
72 * The XenStore interface is a simple storage system that is a means of
73 * communicating state and configuration data between the Xen Domain 0
74 * and the various guest domains.  All configuration data other than
75 * a small amount of essential information required during the early
76 * boot process of launching a Xen aware guest, is managed using the
77 * XenStore.
78 *
79 * The XenStore is ASCII string based, and has a structure and semantics
80 * similar to a filesystem.  There are files and directories, the directories
81 * able to contain files or other directories.  The depth of the hierachy
82 * is only limited by the XenStore's maximum path length.
83 *
84 * The communication channel between the XenStore service and other
85 * domains is via two, guest specific, ring buffers in a shared memory
86 * area.  One ring buffer is used for communicating in each direction.
87 * The grant table references for this shared memory are given to the
88 * guest either via the xen_start_info structure for a fully para-
89 * virtualized guest, or via HVM hypercalls for a hardware virtualized
90 * guest.
91 *
92 * The XenStore communication relies on an event channel and thus
93 * interrupts.  For this reason, the attachment of the XenStore
94 * relies on an interrupt driven configuration hook to hold off
95 * boot processing until communication with the XenStore service
96 * can be established.
97 *
98 * Several Xen services depend on the XenStore, most notably the
99 * XenBus used to discover and manage Xen devices.  These services
100 * are implemented as NewBus child attachments to a bus exported
101 * by this XenStore driver.
102 */
103
104static struct xs_watch *find_watch(const char *token);
105
106MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
107
108/**
109 * Pointer to shared memory communication structures allowing us
110 * to communicate with the XenStore service.
111 *
112 * When operating in full PV mode, this pointer is set early in kernel
113 * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
114 * to get the guest frame number for the shared page and then map it
115 * into kva.  See xs_init() for details.
116 */
117struct xenstore_domain_interface *xen_store;
118
119/*-------------------------- Private Data Structures ------------------------*/
120
121/**
122 * Structure capturing messages received from the XenStore service.
123 */
124struct xs_stored_msg {
125	TAILQ_ENTRY(xs_stored_msg) list;
126
127	struct xsd_sockmsg hdr;
128
129	union {
130		/* Queued replies. */
131		struct {
132			char *body;
133		} reply;
134
135		/* Queued watch events. */
136		struct {
137			struct xs_watch *handle;
138			const char **vec;
139			u_int vec_size;
140		} watch;
141	} u;
142};
143TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
144
145/**
146 * Container for all XenStore related state.
147 */
148struct xs_softc {
149	/** Newbus device for the XenStore. */
150	device_t xs_dev;
151
152	/**
153	 * Lock serializing access to ring producer/consumer
154	 * indexes.  Use of this lock guarantees that wakeups
155	 * of blocking readers/writers are not missed due to
156	 * races with the XenStore service.
157	 */
158	struct mtx ring_lock;
159
160	/*
161	 * Mutex used to insure exclusive access to the outgoing
162	 * communication ring.  We use a lock type that can be
163	 * held while sleeping so that xs_write() can block waiting
164	 * for space in the ring to free up, without allowing another
165	 * writer to come in and corrupt a partial message write.
166	 */
167	struct sx request_mutex;
168
169	/**
170	 * A list of replies to our requests.
171	 *
172	 * The reply list is filled by xs_rcv_thread().  It
173	 * is consumed by the context that issued the request
174	 * to which a reply is made.  The requester blocks in
175	 * xs_read_reply().
176	 *
177	 * /note Only one requesting context can be active at a time.
178	 *       This is guaranteed by the request_mutex and insures
179	 *	 that the requester sees replies matching the order
180	 *	 of its requests.
181	 */
182	struct xs_stored_msg_list reply_list;
183
184	/** Lock protecting the reply list. */
185	struct mtx reply_lock;
186
187	/**
188	 * List of registered watches.
189	 */
190	struct xs_watch_list  registered_watches;
191
192	/** Lock protecting the registered watches list. */
193	struct mtx registered_watches_lock;
194
195	/**
196	 * List of pending watch callback events.
197	 */
198	struct xs_stored_msg_list watch_events;
199
200	/** Lock protecting the watch calback list. */
201	struct mtx watch_events_lock;
202
203	/**
204	 * Sleepable lock used to prevent VM suspension while a
205	 * xenstore transaction is outstanding.
206	 *
207	 * Each active transaction holds a shared lock on the
208	 * suspend mutex.  Our suspend method blocks waiting
209	 * to acquire an exclusive lock.  This guarantees that
210	 * suspend processing will only proceed once all active
211	 * transactions have been retired.
212	 */
213	struct sx suspend_mutex;
214
215	/**
216	 * The processid of the xenwatch thread.
217	 */
218	pid_t xenwatch_pid;
219
220	/**
221	 * Sleepable mutex used to gate the execution of XenStore
222	 * watch event callbacks.
223	 *
224	 * xenwatch_thread holds an exclusive lock on this mutex
225	 * while delivering event callbacks, and xenstore_unregister_watch()
226	 * uses an exclusive lock of this mutex to guarantee that no
227	 * callbacks of the just unregistered watch are pending
228	 * before returning to its caller.
229	 */
230	struct sx xenwatch_mutex;
231
232#ifdef XENHVM
233	/**
234	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
235	 * of the true machine frame number into our "physical address space".
236	 */
237	unsigned long gpfn;
238#endif
239
240	/**
241	 * The event channel for communicating with the
242	 * XenStore service.
243	 */
244	int evtchn;
245
246	/** Handle for XenStore interrupts. */
247	xen_intr_handle_t xen_intr_handle;
248
249	/**
250	 * Interrupt driven config hook allowing us to defer
251	 * attaching children until interrupts (and thus communication
252	 * with the XenStore service) are available.
253	 */
254	struct intr_config_hook xs_attachcb;
255};
256
257/*-------------------------------- Global Data ------------------------------*/
258static struct xs_softc xs;
259
260/*------------------------- Private Utility Functions -----------------------*/
261
262/**
263 * Count and optionally record pointers to a number of NUL terminated
264 * strings in a buffer.
265 *
266 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
267 * \param dest	   An array to store pointers to each string found in strings.
268 * \param len	   The length of the buffer pointed to by strings.
269 *
270 * \return  A count of the number of strings found.
271 */
272static u_int
273extract_strings(const char *strings, const char **dest, u_int len)
274{
275	u_int num;
276	const char *p;
277
278	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
279		if (dest != NULL)
280			*dest++ = p;
281		num++;
282	}
283
284	return (num);
285}
286
287/**
288 * Convert a contiguous buffer containing a series of NUL terminated
289 * strings into an array of pointers to strings.
290 *
291 * The returned pointer references the array of string pointers which
292 * is followed by the storage for the string data.  It is the client's
293 * responsibility to free this storage.
294 *
295 * The storage addressed by strings is free'd prior to split returning.
296 *
297 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
298 * \param len	   The length of the buffer pointed to by strings.
299 * \param num	   The number of strings found and returned in the strings
300 *                 array.
301 *
302 * \return  An array of pointers to the strings found in the input buffer.
303 */
304static const char **
305split(char *strings, u_int len, u_int *num)
306{
307	const char **ret;
308
309	/* Protect against unterminated buffers. */
310	if (len > 0)
311		strings[len - 1] = '\0';
312
313	/* Count the strings. */
314	*num = extract_strings(strings, /*dest*/NULL, len);
315
316	/* Transfer to one big alloc for easy freeing by the caller. */
317	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
318	memcpy(&ret[*num], strings, len);
319	free(strings, M_XENSTORE);
320
321	/* Extract pointers to newly allocated array. */
322	strings = (char *)&ret[*num];
323	(void)extract_strings(strings, /*dest*/ret, len);
324
325	return (ret);
326}
327
328/*------------------------- Public Utility Functions -------------------------*/
329/*------- API comments for these methods can be found in xenstorevar.h -------*/
330struct sbuf *
331xs_join(const char *dir, const char *name)
332{
333	struct sbuf *sb;
334
335	sb = sbuf_new_auto();
336	sbuf_cat(sb, dir);
337	if (name[0] != '\0') {
338		sbuf_putc(sb, '/');
339		sbuf_cat(sb, name);
340	}
341	sbuf_finish(sb);
342
343	return (sb);
344}
345
346/*-------------------- Low Level Communication Management --------------------*/
347/**
348 * Interrupt handler for the XenStore event channel.
349 *
350 * XenStore reads and writes block on "xen_store" for buffer
351 * space.  Wakeup any blocking operations when the XenStore
352 * service has modified the queues.
353 */
354static void
355xs_intr(void * arg __unused /*__attribute__((unused))*/)
356{
357
358	/*
359	 * Hold ring lock across wakeup so that clients
360	 * cannot miss a wakeup.
361	 */
362	mtx_lock(&xs.ring_lock);
363	wakeup(xen_store);
364	mtx_unlock(&xs.ring_lock);
365}
366
367/**
368 * Verify that the indexes for a ring are valid.
369 *
370 * The difference between the producer and consumer cannot
371 * exceed the size of the ring.
372 *
373 * \param cons  The consumer index for the ring to test.
374 * \param prod  The producer index for the ring to test.
375 *
376 * \retval 1  If indexes are in range.
377 * \retval 0  If the indexes are out of range.
378 */
379static int
380xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
381{
382
383	return ((prod - cons) <= XENSTORE_RING_SIZE);
384}
385
386/**
387 * Return a pointer to, and the length of, the contiguous
388 * free region available for output in a ring buffer.
389 *
390 * \param cons  The consumer index for the ring.
391 * \param prod  The producer index for the ring.
392 * \param buf   The base address of the ring's storage.
393 * \param len   The amount of contiguous storage available.
394 *
395 * \return  A pointer to the start location of the free region.
396 */
397static void *
398xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
399    char *buf, uint32_t *len)
400{
401
402	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
403	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
404		*len = XENSTORE_RING_SIZE - (prod - cons);
405	return (buf + MASK_XENSTORE_IDX(prod));
406}
407
408/**
409 * Return a pointer to, and the length of, the contiguous
410 * data available to read from a ring buffer.
411 *
412 * \param cons  The consumer index for the ring.
413 * \param prod  The producer index for the ring.
414 * \param buf   The base address of the ring's storage.
415 * \param len   The amount of contiguous data available to read.
416 *
417 * \return  A pointer to the start location of the available data.
418 */
419static const void *
420xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
421    const char *buf, uint32_t *len)
422{
423
424	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
425	if ((prod - cons) < *len)
426		*len = prod - cons;
427	return (buf + MASK_XENSTORE_IDX(cons));
428}
429
430/**
431 * Transmit data to the XenStore service.
432 *
433 * \param tdata  A pointer to the contiguous data to send.
434 * \param len    The amount of data to send.
435 *
436 * \return  On success 0, otherwise an errno value indicating the
437 *          cause of failure.
438 *
439 * \invariant  Called from thread context.
440 * \invariant  The buffer pointed to by tdata is at least len bytes
441 *             in length.
442 * \invariant  xs.request_mutex exclusively locked.
443 */
444static int
445xs_write_store(const void *tdata, unsigned len)
446{
447	XENSTORE_RING_IDX cons, prod;
448	const char *data = (const char *)tdata;
449	int error;
450
451	sx_assert(&xs.request_mutex, SX_XLOCKED);
452	while (len != 0) {
453		void *dst;
454		u_int avail;
455
456		/* Hold lock so we can't miss wakeups should we block. */
457		mtx_lock(&xs.ring_lock);
458		cons = xen_store->req_cons;
459		prod = xen_store->req_prod;
460		if ((prod - cons) == XENSTORE_RING_SIZE) {
461			/*
462			 * Output ring is full. Wait for a ring event.
463			 *
464			 * Note that the events from both queues
465			 * are combined, so being woken does not
466			 * guarantee that data exist in the read
467			 * ring.
468			 *
469			 * To simplify error recovery and the retry,
470			 * we specify PDROP so our lock is *not* held
471			 * when msleep returns.
472			 */
473			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
474			     "xbwrite", /*timeout*/0);
475			if (error && error != EWOULDBLOCK)
476				return (error);
477
478			/* Try again. */
479			continue;
480		}
481		mtx_unlock(&xs.ring_lock);
482
483		/* Verify queue sanity. */
484		if (!xs_check_indexes(cons, prod)) {
485			xen_store->req_cons = xen_store->req_prod = 0;
486			return (EIO);
487		}
488
489		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
490		if (avail > len)
491			avail = len;
492
493		memcpy(dst, data, avail);
494		data += avail;
495		len -= avail;
496
497		/*
498		 * The store to the producer index, which indicates
499		 * to the other side that new data has arrived, must
500		 * be visible only after our copy of the data into the
501		 * ring has completed.
502		 */
503		wmb();
504		xen_store->req_prod += avail;
505
506		/*
507		 * xen_intr_signal() implies mb(). The other side will see
508		 * the change to req_prod at the time of the interrupt.
509		 */
510		xen_intr_signal(xs.xen_intr_handle);
511	}
512
513	return (0);
514}
515
516/**
517 * Receive data from the XenStore service.
518 *
519 * \param tdata  A pointer to the contiguous buffer to receive the data.
520 * \param len    The amount of data to receive.
521 *
522 * \return  On success 0, otherwise an errno value indicating the
523 *          cause of failure.
524 *
525 * \invariant  Called from thread context.
526 * \invariant  The buffer pointed to by tdata is at least len bytes
527 *             in length.
528 *
529 * \note xs_read does not perform any internal locking to guarantee
530 *       serial access to the incoming ring buffer.  However, there
531 *	 is only one context processing reads: xs_rcv_thread().
532 */
533static int
534xs_read_store(void *tdata, unsigned len)
535{
536	XENSTORE_RING_IDX cons, prod;
537	char *data = (char *)tdata;
538	int error;
539
540	while (len != 0) {
541		u_int avail;
542		const char *src;
543
544		/* Hold lock so we can't miss wakeups should we block. */
545		mtx_lock(&xs.ring_lock);
546		cons = xen_store->rsp_cons;
547		prod = xen_store->rsp_prod;
548		if (cons == prod) {
549			/*
550			 * Nothing to read. Wait for a ring event.
551			 *
552			 * Note that the events from both queues
553			 * are combined, so being woken does not
554			 * guarantee that data exist in the read
555			 * ring.
556			 *
557			 * To simplify error recovery and the retry,
558			 * we specify PDROP so our lock is *not* held
559			 * when msleep returns.
560			 */
561			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
562			    "xbread", /*timeout*/0);
563			if (error && error != EWOULDBLOCK)
564				return (error);
565			continue;
566		}
567		mtx_unlock(&xs.ring_lock);
568
569		/* Verify queue sanity. */
570		if (!xs_check_indexes(cons, prod)) {
571			xen_store->rsp_cons = xen_store->rsp_prod = 0;
572			return (EIO);
573		}
574
575		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
576		if (avail > len)
577			avail = len;
578
579		/*
580		 * Insure the data we read is related to the indexes
581		 * we read above.
582		 */
583		rmb();
584
585		memcpy(data, src, avail);
586		data += avail;
587		len -= avail;
588
589		/*
590		 * Insure that the producer of this ring does not see
591		 * the ring space as free until after we have copied it
592		 * out.
593		 */
594		mb();
595		xen_store->rsp_cons += avail;
596
597		/*
598		 * xen_intr_signal() implies mb(). The producer will see
599		 * the updated consumer index when the event is delivered.
600		 */
601		xen_intr_signal(xs.xen_intr_handle);
602	}
603
604	return (0);
605}
606
607/*----------------------- Received Message Processing ------------------------*/
608/**
609 * Block reading the next message from the XenStore service and
610 * process the result.
611 *
612 * \param type  The returned type of the XenStore message received.
613 *
614 * \return  0 on success.  Otherwise an errno value indicating the
615 *          type of failure encountered.
616 */
617static int
618xs_process_msg(enum xsd_sockmsg_type *type)
619{
620	struct xs_stored_msg *msg;
621	char *body;
622	int error;
623
624	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
625	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
626	if (error) {
627		free(msg, M_XENSTORE);
628		return (error);
629	}
630
631	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
632	error = xs_read_store(body, msg->hdr.len);
633	if (error) {
634		free(body, M_XENSTORE);
635		free(msg, M_XENSTORE);
636		return (error);
637	}
638	body[msg->hdr.len] = '\0';
639
640	*type = msg->hdr.type;
641	if (msg->hdr.type == XS_WATCH_EVENT) {
642		msg->u.watch.vec = split(body, msg->hdr.len,
643		    &msg->u.watch.vec_size);
644
645		mtx_lock(&xs.registered_watches_lock);
646		msg->u.watch.handle = find_watch(
647		    msg->u.watch.vec[XS_WATCH_TOKEN]);
648		if (msg->u.watch.handle != NULL) {
649			mtx_lock(&xs.watch_events_lock);
650			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
651			wakeup(&xs.watch_events);
652			mtx_unlock(&xs.watch_events_lock);
653		} else {
654			free(msg->u.watch.vec, M_XENSTORE);
655			free(msg, M_XENSTORE);
656		}
657		mtx_unlock(&xs.registered_watches_lock);
658	} else {
659		msg->u.reply.body = body;
660		mtx_lock(&xs.reply_lock);
661		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
662		wakeup(&xs.reply_list);
663		mtx_unlock(&xs.reply_lock);
664	}
665
666	return (0);
667}
668
669/**
670 * Thread body of the XenStore receive thread.
671 *
672 * This thread blocks waiting for data from the XenStore service
673 * and processes and received messages.
674 */
675static void
676xs_rcv_thread(void *arg __unused)
677{
678	int error;
679	enum xsd_sockmsg_type type;
680
681	for (;;) {
682		error = xs_process_msg(&type);
683		if (error)
684			printf("XENSTORE error %d while reading message\n",
685			    error);
686	}
687}
688
689/*---------------- XenStore Message Request/Reply Processing -----------------*/
690/**
691 * Filter invoked before transmitting any message to the XenStore service.
692 *
693 * The role of the filter may expand, but currently serves to manage
694 * the interactions of messages with transaction state.
695 *
696 * \param request_msg_type  The message type for the request.
697 */
698static inline void
699xs_request_filter(uint32_t request_msg_type)
700{
701	if (request_msg_type == XS_TRANSACTION_START)
702		sx_slock(&xs.suspend_mutex);
703}
704
705/**
706 * Filter invoked after transmitting any message to the XenStore service.
707 *
708 * The role of the filter may expand, but currently serves to manage
709 * the interactions of messages with transaction state.
710 *
711 * \param request_msg_type     The message type for the original request.
712 * \param reply_msg_type       The message type for any received reply.
713 * \param request_reply_error  The error status from the attempt to send
714 *                             the request or retrieve the reply.
715 */
716static inline void
717xs_reply_filter(uint32_t request_msg_type,
718    uint32_t reply_msg_type, int request_reply_error)
719{
720	/*
721	 * The count of transactions drops if we attempted
722	 * to end a transaction (even if that attempt fails
723	 * in error), we receive a transaction end acknowledgement,
724	 * or if our attempt to begin a transaction fails.
725	 */
726	if (request_msg_type == XS_TRANSACTION_END
727	 || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END)
728	 || (request_msg_type == XS_TRANSACTION_START
729	  && (request_reply_error != 0 || reply_msg_type == XS_ERROR)))
730		sx_sunlock(&xs.suspend_mutex);
731
732}
733
734#define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
735
736/**
737 * Convert a XenStore error string into an errno number.
738 *
739 * \param errorstring  The error string to convert.
740 *
741 * \return  The errno best matching the input string.
742 *
743 * \note Unknown error strings are converted to EINVAL.
744 */
745static int
746xs_get_error(const char *errorstring)
747{
748	u_int i;
749
750	for (i = 0; i < xsd_error_count; i++) {
751		if (!strcmp(errorstring, xsd_errors[i].errstring))
752			return (xsd_errors[i].errnum);
753	}
754	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
755	    errorstring);
756	return (EINVAL);
757}
758
759/**
760 * Block waiting for a reply to a message request.
761 *
762 * \param type	  The returned type of the reply.
763 * \param len	  The returned body length of the reply.
764 * \param result  The returned body of the reply.
765 *
766 * \return  0 on success.  Otherwise an errno indicating the
767 *          cause of failure.
768 */
769static int
770xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
771{
772	struct xs_stored_msg *msg;
773	char *body;
774	int error;
775
776	mtx_lock(&xs.reply_lock);
777	while (TAILQ_EMPTY(&xs.reply_list)) {
778		error = mtx_sleep(&xs.reply_list, &xs.reply_lock,
779		    PCATCH, "xswait", hz/10);
780		if (error && error != EWOULDBLOCK) {
781			mtx_unlock(&xs.reply_lock);
782			return (error);
783		}
784	}
785	msg = TAILQ_FIRST(&xs.reply_list);
786	TAILQ_REMOVE(&xs.reply_list, msg, list);
787	mtx_unlock(&xs.reply_lock);
788
789	*type = msg->hdr.type;
790	if (len)
791		*len = msg->hdr.len;
792	body = msg->u.reply.body;
793
794	free(msg, M_XENSTORE);
795	*result = body;
796	return (0);
797}
798
799/**
800 * Pass-thru interface for XenStore access by userland processes
801 * via the XenStore device.
802 *
803 * Reply type and length data are returned by overwriting these
804 * fields in the passed in request message.
805 *
806 * \param msg	  A properly formatted message to transmit to
807 *		  the XenStore service.
808 * \param result  The returned body of the reply.
809 *
810 * \return  0 on success.  Otherwise an errno indicating the cause
811 *          of failure.
812 *
813 * \note The returned result is provided in malloced storage and thus
814 *       must be free'd by the caller with 'free(result, M_XENSTORE);
815 */
816int
817xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
818{
819	uint32_t request_type;
820	int error;
821
822	request_type = msg->type;
823	xs_request_filter(request_type);
824
825	sx_xlock(&xs.request_mutex);
826	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
827		error = xs_read_reply(&msg->type, &msg->len, result);
828	sx_xunlock(&xs.request_mutex);
829
830	xs_reply_filter(request_type, msg->type, error);
831
832	return (error);
833}
834
835/**
836 * Send a message with an optionally muti-part body to the XenStore service.
837 *
838 * \param t              The transaction to use for this request.
839 * \param request_type   The type of message to send.
840 * \param iovec          Pointers to the body sections of the request.
841 * \param num_vecs       The number of body sections in the request.
842 * \param len            The returned length of the reply.
843 * \param result         The returned body of the reply.
844 *
845 * \return  0 on success.  Otherwise an errno indicating
846 *          the cause of failure.
847 *
848 * \note The returned result is provided in malloced storage and thus
849 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
850 */
851static int
852xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
853    const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
854{
855	struct xsd_sockmsg msg;
856	void *ret = NULL;
857	u_int i;
858	int error;
859
860	msg.tx_id = t.id;
861	msg.req_id = 0;
862	msg.type = request_type;
863	msg.len = 0;
864	for (i = 0; i < num_vecs; i++)
865		msg.len += iovec[i].iov_len;
866
867	xs_request_filter(request_type);
868
869	sx_xlock(&xs.request_mutex);
870	error = xs_write_store(&msg, sizeof(msg));
871	if (error) {
872		printf("xs_talkv failed %d\n", error);
873		goto error_lock_held;
874	}
875
876	for (i = 0; i < num_vecs; i++) {
877		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
878		if (error) {
879			printf("xs_talkv failed %d\n", error);
880			goto error_lock_held;
881		}
882	}
883
884	error = xs_read_reply(&msg.type, len, &ret);
885
886error_lock_held:
887	sx_xunlock(&xs.request_mutex);
888	xs_reply_filter(request_type, msg.type, error);
889	if (error)
890		return (error);
891
892	if (msg.type == XS_ERROR) {
893		error = xs_get_error(ret);
894		free(ret, M_XENSTORE);
895		return (error);
896	}
897
898	/* Reply is either error or an echo of our request message type. */
899	KASSERT(msg.type == request_type, ("bad xenstore message type"));
900
901	if (result)
902		*result = ret;
903	else
904		free(ret, M_XENSTORE);
905
906	return (0);
907}
908
909/**
910 * Wrapper for xs_talkv allowing easy transmission of a message with
911 * a single, contiguous, message body.
912 *
913 * \param t              The transaction to use for this request.
914 * \param request_type   The type of message to send.
915 * \param body           The body of the request.
916 * \param len            The returned length of the reply.
917 * \param result         The returned body of the reply.
918 *
919 * \return  0 on success.  Otherwise an errno indicating
920 *          the cause of failure.
921 *
922 * \note The returned result is provided in malloced storage and thus
923 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
924 */
925static int
926xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
927    const char *body, u_int *len, void **result)
928{
929	struct iovec iovec;
930
931	iovec.iov_base = (void *)(uintptr_t)body;
932	iovec.iov_len = strlen(body) + 1;
933
934	return (xs_talkv(t, request_type, &iovec, 1, len, result));
935}
936
937/*------------------------- XenStore Watch Support ---------------------------*/
938/**
939 * Transmit a watch request to the XenStore service.
940 *
941 * \param path    The path in the XenStore to watch.
942 * \param tocken  A unique identifier for this watch.
943 *
944 * \return  0 on success.  Otherwise an errno indicating the
945 *          cause of failure.
946 */
947static int
948xs_watch(const char *path, const char *token)
949{
950	struct iovec iov[2];
951
952	iov[0].iov_base = (void *)(uintptr_t) path;
953	iov[0].iov_len = strlen(path) + 1;
954	iov[1].iov_base = (void *)(uintptr_t) token;
955	iov[1].iov_len = strlen(token) + 1;
956
957	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
958}
959
960/**
961 * Transmit an uwatch request to the XenStore service.
962 *
963 * \param path    The path in the XenStore to watch.
964 * \param tocken  A unique identifier for this watch.
965 *
966 * \return  0 on success.  Otherwise an errno indicating the
967 *          cause of failure.
968 */
969static int
970xs_unwatch(const char *path, const char *token)
971{
972	struct iovec iov[2];
973
974	iov[0].iov_base = (void *)(uintptr_t) path;
975	iov[0].iov_len = strlen(path) + 1;
976	iov[1].iov_base = (void *)(uintptr_t) token;
977	iov[1].iov_len = strlen(token) + 1;
978
979	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
980}
981
982/**
983 * Convert from watch token (unique identifier) to the associated
984 * internal tracking structure for this watch.
985 *
986 * \param tocken  The unique identifier for the watch to find.
987 *
988 * \return  A pointer to the found watch structure or NULL.
989 */
990static struct xs_watch *
991find_watch(const char *token)
992{
993	struct xs_watch *i, *cmp;
994
995	cmp = (void *)strtoul(token, NULL, 16);
996
997	LIST_FOREACH(i, &xs.registered_watches, list)
998		if (i == cmp)
999			return (i);
1000
1001	return (NULL);
1002}
1003
1004/**
1005 * Thread body of the XenStore watch event dispatch thread.
1006 */
1007static void
1008xenwatch_thread(void *unused)
1009{
1010	struct xs_stored_msg *msg;
1011
1012	for (;;) {
1013
1014		mtx_lock(&xs.watch_events_lock);
1015		while (TAILQ_EMPTY(&xs.watch_events))
1016			mtx_sleep(&xs.watch_events,
1017			    &xs.watch_events_lock,
1018			    PWAIT | PCATCH, "waitev", hz/10);
1019
1020		mtx_unlock(&xs.watch_events_lock);
1021		sx_xlock(&xs.xenwatch_mutex);
1022
1023		mtx_lock(&xs.watch_events_lock);
1024		msg = TAILQ_FIRST(&xs.watch_events);
1025		if (msg)
1026			TAILQ_REMOVE(&xs.watch_events, msg, list);
1027		mtx_unlock(&xs.watch_events_lock);
1028
1029		if (msg != NULL) {
1030			/*
1031			 * XXX There are messages coming in with a NULL
1032			 * XXX callback.  This deserves further investigation;
1033			 * XXX the workaround here simply prevents the kernel
1034			 * XXX from panic'ing on startup.
1035			 */
1036			if (msg->u.watch.handle->callback != NULL)
1037				msg->u.watch.handle->callback(
1038					msg->u.watch.handle,
1039					(const char **)msg->u.watch.vec,
1040					msg->u.watch.vec_size);
1041			free(msg->u.watch.vec, M_XENSTORE);
1042			free(msg, M_XENSTORE);
1043		}
1044
1045		sx_xunlock(&xs.xenwatch_mutex);
1046	}
1047}
1048
1049/*----------- XenStore Configuration, Initialization, and Control ------------*/
1050/**
1051 * Setup communication channels with the XenStore service.
1052 *
1053 * \return  On success, 0. Otherwise an errno value indicating the
1054 *          type of failure.
1055 */
1056static int
1057xs_init_comms(void)
1058{
1059	int error;
1060
1061	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1062		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1063		    "(%08x:%08x): fixing up\n",
1064		    xen_store->rsp_cons, xen_store->rsp_prod);
1065		xen_store->rsp_cons = xen_store->rsp_prod;
1066	}
1067
1068	xen_intr_unbind(&xs.xen_intr_handle);
1069
1070	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1071	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1072	    &xs.xen_intr_handle);
1073	if (error) {
1074		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1075		return (error);
1076	}
1077
1078	return (0);
1079}
1080
1081/*------------------ Private Device Attachment Functions  --------------------*/
1082static void
1083xs_identify(driver_t *driver, device_t parent)
1084{
1085
1086	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1087}
1088
1089/**
1090 * Probe for the existance of the XenStore.
1091 *
1092 * \param dev
1093 */
1094static int
1095xs_probe(device_t dev)
1096{
1097	/*
1098	 * We are either operating within a PV kernel or being probed
1099	 * as the child of the successfully attached xenpci device.
1100	 * Thus we are in a Xen environment and there will be a XenStore.
1101	 * Unconditionally return success.
1102	 */
1103	device_set_desc(dev, "XenStore");
1104	return (0);
1105}
1106
1107static void
1108xs_attach_deferred(void *arg)
1109{
1110	xs_dev_init();
1111
1112	bus_generic_probe(xs.xs_dev);
1113	bus_generic_attach(xs.xs_dev);
1114
1115	config_intrhook_disestablish(&xs.xs_attachcb);
1116}
1117
1118/**
1119 * Attach to the XenStore.
1120 *
1121 * This routine also prepares for the probe/attach of drivers that rely
1122 * on the XenStore.
1123 */
1124static int
1125xs_attach(device_t dev)
1126{
1127	int error;
1128
1129	/* Allow us to get device_t from softc and vice-versa. */
1130	xs.xs_dev = dev;
1131	device_set_softc(dev, &xs);
1132
1133	/*
1134	 * This seems to be a layering violation.  The XenStore is just
1135	 * one of many clients of the Grant Table facility.  It happens
1136	 * to be the first and a gating consumer to all other devices,
1137	 * so this does work.  A better place would be in the PV support
1138	 * code for fully PV kernels and the xenpci driver for HVM kernels.
1139	 */
1140	error = gnttab_init();
1141	if (error != 0) {
1142		log(LOG_WARNING,
1143		    "XENSTORE: Error initializing grant tables: %d\n", error);
1144		return (ENXIO);
1145	}
1146
1147	/* Initialize the interface to xenstore. */
1148	struct proc *p;
1149
1150#ifdef XENHVM
1151	xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
1152	xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
1153	xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE);
1154#else
1155	xs.evtchn = xen_start_info->store_evtchn;
1156#endif
1157
1158	TAILQ_INIT(&xs.reply_list);
1159	TAILQ_INIT(&xs.watch_events);
1160
1161	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1162	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1163	sx_init(&xs.xenwatch_mutex, "xenwatch");
1164	sx_init(&xs.request_mutex, "xenstore request");
1165	sx_init(&xs.suspend_mutex, "xenstore suspend");
1166	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1167	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1168
1169	/* Initialize the shared memory rings to talk to xenstored */
1170	error = xs_init_comms();
1171	if (error)
1172		return (error);
1173
1174	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1175	    0, "xenwatch");
1176	if (error)
1177		return (error);
1178	xs.xenwatch_pid = p->p_pid;
1179
1180	error = kproc_create(xs_rcv_thread, NULL, NULL,
1181	    RFHIGHPID, 0, "xenstore_rcv");
1182
1183	xs.xs_attachcb.ich_func = xs_attach_deferred;
1184	xs.xs_attachcb.ich_arg = NULL;
1185	config_intrhook_establish(&xs.xs_attachcb);
1186
1187	return (error);
1188}
1189
1190/**
1191 * Prepare for suspension of this VM by halting XenStore access after
1192 * all transactions and individual requests have completed.
1193 */
1194static int
1195xs_suspend(device_t dev)
1196{
1197	int error;
1198
1199	/* Suspend child Xen devices. */
1200	error = bus_generic_suspend(dev);
1201	if (error != 0)
1202		return (error);
1203
1204	sx_xlock(&xs.suspend_mutex);
1205	sx_xlock(&xs.request_mutex);
1206
1207	return (0);
1208}
1209
1210/**
1211 * Resume XenStore operations after this VM is resumed.
1212 */
1213static int
1214xs_resume(device_t dev __unused)
1215{
1216	struct xs_watch *watch;
1217	char token[sizeof(watch) * 2 + 1];
1218
1219	xs_init_comms();
1220
1221	sx_xunlock(&xs.request_mutex);
1222
1223	/*
1224	 * No need for registered_watches_lock: the suspend_mutex
1225	 * is sufficient.
1226	 */
1227	LIST_FOREACH(watch, &xs.registered_watches, list) {
1228		sprintf(token, "%lX", (long)watch);
1229		xs_watch(watch->node, token);
1230	}
1231
1232	sx_xunlock(&xs.suspend_mutex);
1233
1234	/* Resume child Xen devices. */
1235	bus_generic_resume(dev);
1236
1237	return (0);
1238}
1239
1240/*-------------------- Private Device Attachment Data  -----------------------*/
1241static device_method_t xenstore_methods[] = {
1242	/* Device interface */
1243	DEVMETHOD(device_identify,	xs_identify),
1244	DEVMETHOD(device_probe,         xs_probe),
1245	DEVMETHOD(device_attach,        xs_attach),
1246	DEVMETHOD(device_detach,        bus_generic_detach),
1247	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1248	DEVMETHOD(device_suspend,       xs_suspend),
1249	DEVMETHOD(device_resume,        xs_resume),
1250
1251	/* Bus interface */
1252	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1253	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1254	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1255	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1256	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1257
1258	DEVMETHOD_END
1259};
1260
1261DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1262static devclass_t xenstore_devclass;
1263
1264#ifdef XENHVM
1265DRIVER_MODULE(xenstore, xenpci, xenstore_driver, xenstore_devclass, 0, 0);
1266#else
1267DRIVER_MODULE(xenstore, nexus, xenstore_driver, xenstore_devclass, 0, 0);
1268#endif
1269
1270/*------------------------------- Sysctl Data --------------------------------*/
1271/* XXX Shouldn't the node be somewhere else? */
1272SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
1273SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1274SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1275
1276/*-------------------------------- Public API --------------------------------*/
1277/*------- API comments for these methods can be found in xenstorevar.h -------*/
1278int
1279xs_directory(struct xs_transaction t, const char *dir, const char *node,
1280    u_int *num, const char ***result)
1281{
1282	struct sbuf *path;
1283	char *strings;
1284	u_int len = 0;
1285	int error;
1286
1287	path = xs_join(dir, node);
1288	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1289	    (void **)&strings);
1290	sbuf_delete(path);
1291	if (error)
1292		return (error);
1293
1294	*result = split(strings, len, num);
1295
1296	return (0);
1297}
1298
1299int
1300xs_exists(struct xs_transaction t, const char *dir, const char *node)
1301{
1302	const char **d;
1303	int error, dir_n;
1304
1305	error = xs_directory(t, dir, node, &dir_n, &d);
1306	if (error)
1307		return (0);
1308	free(d, M_XENSTORE);
1309	return (1);
1310}
1311
1312int
1313xs_read(struct xs_transaction t, const char *dir, const char *node,
1314    u_int *len, void **result)
1315{
1316	struct sbuf *path;
1317	void *ret;
1318	int error;
1319
1320	path = xs_join(dir, node);
1321	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1322	sbuf_delete(path);
1323	if (error)
1324		return (error);
1325	*result = ret;
1326	return (0);
1327}
1328
1329int
1330xs_write(struct xs_transaction t, const char *dir, const char *node,
1331    const char *string)
1332{
1333	struct sbuf *path;
1334	struct iovec iovec[2];
1335	int error;
1336
1337	path = xs_join(dir, node);
1338
1339	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1340	iovec[0].iov_len = sbuf_len(path) + 1;
1341	iovec[1].iov_base = (void *)(uintptr_t) string;
1342	iovec[1].iov_len = strlen(string);
1343
1344	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1345	sbuf_delete(path);
1346
1347	return (error);
1348}
1349
1350int
1351xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1352{
1353	struct sbuf *path;
1354	int ret;
1355
1356	path = xs_join(dir, node);
1357	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1358	sbuf_delete(path);
1359
1360	return (ret);
1361}
1362
1363int
1364xs_rm(struct xs_transaction t, const char *dir, const char *node)
1365{
1366	struct sbuf *path;
1367	int ret;
1368
1369	path = xs_join(dir, node);
1370	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1371	sbuf_delete(path);
1372
1373	return (ret);
1374}
1375
1376int
1377xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1378{
1379	struct xs_transaction local_xbt;
1380	struct sbuf *root_path_sbuf;
1381	struct sbuf *cur_path_sbuf;
1382	char *root_path;
1383	char *cur_path;
1384	const char **dir;
1385	int error;
1386	int empty;
1387
1388retry:
1389	root_path_sbuf = xs_join(base, node);
1390	cur_path_sbuf  = xs_join(base, node);
1391	root_path      = sbuf_data(root_path_sbuf);
1392	cur_path       = sbuf_data(cur_path_sbuf);
1393	dir            = NULL;
1394	local_xbt.id   = 0;
1395
1396	if (xbt.id == 0) {
1397		error = xs_transaction_start(&local_xbt);
1398		if (error != 0)
1399			goto out;
1400		xbt = local_xbt;
1401	}
1402
1403	empty = 0;
1404	while (1) {
1405		u_int count;
1406		u_int i;
1407
1408		error = xs_directory(xbt, cur_path, "", &count, &dir);
1409		if (error)
1410			goto out;
1411
1412		for (i = 0; i < count; i++) {
1413			error = xs_rm(xbt, cur_path, dir[i]);
1414			if (error == ENOTEMPTY) {
1415				struct sbuf *push_dir;
1416
1417				/*
1418				 * Descend to clear out this sub directory.
1419				 * We'll return to cur_dir once push_dir
1420				 * is empty.
1421				 */
1422				push_dir = xs_join(cur_path, dir[i]);
1423				sbuf_delete(cur_path_sbuf);
1424				cur_path_sbuf = push_dir;
1425				cur_path = sbuf_data(cur_path_sbuf);
1426				break;
1427			} else if (error != 0) {
1428				goto out;
1429			}
1430		}
1431
1432		free(dir, M_XENSTORE);
1433		dir = NULL;
1434
1435		if (i == count) {
1436			char *last_slash;
1437
1438			/* Directory is empty.  It is now safe to remove. */
1439			error = xs_rm(xbt, cur_path, "");
1440			if (error != 0)
1441				goto out;
1442
1443			if (!strcmp(cur_path, root_path))
1444				break;
1445
1446			/* Return to processing the parent directory. */
1447			last_slash = strrchr(cur_path, '/');
1448			KASSERT(last_slash != NULL,
1449				("xs_rm_tree: mangled path %s", cur_path));
1450			*last_slash = '\0';
1451		}
1452	}
1453
1454out:
1455	sbuf_delete(cur_path_sbuf);
1456	sbuf_delete(root_path_sbuf);
1457	if (dir != NULL)
1458		free(dir, M_XENSTORE);
1459
1460	if (local_xbt.id != 0) {
1461		int terror;
1462
1463		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1464		xbt.id = 0;
1465		if (terror == EAGAIN && error == 0)
1466			goto retry;
1467	}
1468	return (error);
1469}
1470
1471int
1472xs_transaction_start(struct xs_transaction *t)
1473{
1474	char *id_str;
1475	int error;
1476
1477	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1478	    (void **)&id_str);
1479	if (error == 0) {
1480		t->id = strtoul(id_str, NULL, 0);
1481		free(id_str, M_XENSTORE);
1482	}
1483	return (error);
1484}
1485
1486int
1487xs_transaction_end(struct xs_transaction t, int abort)
1488{
1489	char abortstr[2];
1490
1491	if (abort)
1492		strcpy(abortstr, "F");
1493	else
1494		strcpy(abortstr, "T");
1495
1496	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1497}
1498
1499int
1500xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1501     int *scancountp, const char *fmt, ...)
1502{
1503	va_list ap;
1504	int error, ns;
1505	char *val;
1506
1507	error = xs_read(t, dir, node, NULL, (void **) &val);
1508	if (error)
1509		return (error);
1510
1511	va_start(ap, fmt);
1512	ns = vsscanf(val, fmt, ap);
1513	va_end(ap);
1514	free(val, M_XENSTORE);
1515	/* Distinctive errno. */
1516	if (ns == 0)
1517		return (ERANGE);
1518	if (scancountp)
1519		*scancountp = ns;
1520	return (0);
1521}
1522
1523int
1524xs_vprintf(struct xs_transaction t,
1525    const char *dir, const char *node, const char *fmt, va_list ap)
1526{
1527	struct sbuf *sb;
1528	int error;
1529
1530	sb = sbuf_new_auto();
1531	sbuf_vprintf(sb, fmt, ap);
1532	sbuf_finish(sb);
1533	error = xs_write(t, dir, node, sbuf_data(sb));
1534	sbuf_delete(sb);
1535
1536	return (error);
1537}
1538
1539int
1540xs_printf(struct xs_transaction t, const char *dir, const char *node,
1541     const char *fmt, ...)
1542{
1543	va_list ap;
1544	int error;
1545
1546	va_start(ap, fmt);
1547	error = xs_vprintf(t, dir, node, fmt, ap);
1548	va_end(ap);
1549
1550	return (error);
1551}
1552
1553int
1554xs_gather(struct xs_transaction t, const char *dir, ...)
1555{
1556	va_list ap;
1557	const char *name;
1558	int error;
1559
1560	va_start(ap, dir);
1561	error = 0;
1562	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1563		const char *fmt = va_arg(ap, char *);
1564		void *result = va_arg(ap, void *);
1565		char *p;
1566
1567		error = xs_read(t, dir, name, NULL, (void **) &p);
1568		if (error)
1569			break;
1570
1571		if (fmt) {
1572			if (sscanf(p, fmt, result) == 0)
1573				error = EINVAL;
1574			free(p, M_XENSTORE);
1575		} else
1576			*(char **)result = p;
1577	}
1578	va_end(ap);
1579
1580	return (error);
1581}
1582
1583int
1584xs_register_watch(struct xs_watch *watch)
1585{
1586	/* Pointer in ascii is the token. */
1587	char token[sizeof(watch) * 2 + 1];
1588	int error;
1589
1590	sprintf(token, "%lX", (long)watch);
1591
1592	sx_slock(&xs.suspend_mutex);
1593
1594	mtx_lock(&xs.registered_watches_lock);
1595	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1596	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1597	mtx_unlock(&xs.registered_watches_lock);
1598
1599	error = xs_watch(watch->node, token);
1600
1601	/* Ignore errors due to multiple registration. */
1602	if (error == EEXIST)
1603		error = 0;
1604
1605	if (error != 0) {
1606		mtx_lock(&xs.registered_watches_lock);
1607		LIST_REMOVE(watch, list);
1608		mtx_unlock(&xs.registered_watches_lock);
1609	}
1610
1611	sx_sunlock(&xs.suspend_mutex);
1612
1613	return (error);
1614}
1615
1616void
1617xs_unregister_watch(struct xs_watch *watch)
1618{
1619	struct xs_stored_msg *msg, *tmp;
1620	char token[sizeof(watch) * 2 + 1];
1621	int error;
1622
1623	sprintf(token, "%lX", (long)watch);
1624
1625	sx_slock(&xs.suspend_mutex);
1626
1627	mtx_lock(&xs.registered_watches_lock);
1628	if (find_watch(token) == NULL) {
1629		mtx_unlock(&xs.registered_watches_lock);
1630		sx_sunlock(&xs.suspend_mutex);
1631		return;
1632	}
1633	LIST_REMOVE(watch, list);
1634	mtx_unlock(&xs.registered_watches_lock);
1635
1636	error = xs_unwatch(watch->node, token);
1637	if (error)
1638		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1639		    watch->node, error);
1640
1641	sx_sunlock(&xs.suspend_mutex);
1642
1643	/* Cancel pending watch events. */
1644	mtx_lock(&xs.watch_events_lock);
1645	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1646		if (msg->u.watch.handle != watch)
1647			continue;
1648		TAILQ_REMOVE(&xs.watch_events, msg, list);
1649		free(msg->u.watch.vec, M_XENSTORE);
1650		free(msg, M_XENSTORE);
1651	}
1652	mtx_unlock(&xs.watch_events_lock);
1653
1654	/* Flush any currently-executing callback, unless we are it. :-) */
1655	if (curproc->p_pid != xs.xenwatch_pid) {
1656		sx_xlock(&xs.xenwatch_mutex);
1657		sx_xunlock(&xs.xenwatch_mutex);
1658	}
1659}
1660
1661void
1662xs_lock(void)
1663{
1664
1665	sx_xlock(&xs.request_mutex);
1666	return;
1667}
1668
1669void
1670xs_unlock(void)
1671{
1672
1673	sx_xunlock(&xs.request_mutex);
1674	return;
1675}
1676
1677