xenstore.c revision 316722
1/******************************************************************************
2 * xenstore.c
3 *
4 * Low-level kernel interface to the XenStore.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2009,2010 Spectra Logic Corporation
8 *
9 * This file may be distributed separately from the Linux kernel, or
10 * incorporated into other software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: releng/11.0/sys/dev/xen/xenstore/xenstore.c 316722 2017-04-12 06:24:35Z delphij $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/sx.h>
42#include <sys/syslog.h>
43#include <sys/malloc.h>
44#include <sys/systm.h>
45#include <sys/proc.h>
46#include <sys/kthread.h>
47#include <sys/sbuf.h>
48#include <sys/sysctl.h>
49#include <sys/uio.h>
50#include <sys/unistd.h>
51#include <sys/queue.h>
52#include <sys/taskqueue.h>
53
54#include <machine/stdarg.h>
55
56#include <xen/xen-os.h>
57#include <xen/hypervisor.h>
58#include <xen/xen_intr.h>
59
60#include <xen/interface/hvm/params.h>
61#include <xen/hvm.h>
62
63#include <xen/xenstore/xenstorevar.h>
64#include <xen/xenstore/xenstore_internal.h>
65
66#include <vm/vm.h>
67#include <vm/pmap.h>
68
69/**
70 * \file xenstore.c
71 * \brief XenStore interface
72 *
73 * The XenStore interface is a simple storage system that is a means of
74 * communicating state and configuration data between the Xen Domain 0
75 * and the various guest domains.  All configuration data other than
76 * a small amount of essential information required during the early
77 * boot process of launching a Xen aware guest, is managed using the
78 * XenStore.
79 *
80 * The XenStore is ASCII string based, and has a structure and semantics
81 * similar to a filesystem.  There are files and directories, the directories
82 * able to contain files or other directories.  The depth of the hierarchy
83 * is only limited by the XenStore's maximum path length.
84 *
85 * The communication channel between the XenStore service and other
86 * domains is via two, guest specific, ring buffers in a shared memory
87 * area.  One ring buffer is used for communicating in each direction.
88 * The grant table references for this shared memory are given to the
89 * guest either via the xen_start_info structure for a fully para-
90 * virtualized guest, or via HVM hypercalls for a hardware virtualized
91 * guest.
92 *
93 * The XenStore communication relies on an event channel and thus
94 * interrupts.  For this reason, the attachment of the XenStore
95 * relies on an interrupt driven configuration hook to hold off
96 * boot processing until communication with the XenStore service
97 * can be established.
98 *
99 * Several Xen services depend on the XenStore, most notably the
100 * XenBus used to discover and manage Xen devices.  These services
101 * are implemented as NewBus child attachments to a bus exported
102 * by this XenStore driver.
103 */
104
105static struct xs_watch *find_watch(const char *token);
106
107MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
108
109/**
110 * Pointer to shared memory communication structures allowing us
111 * to communicate with the XenStore service.
112 *
113 * When operating in full PV mode, this pointer is set early in kernel
114 * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
115 * to get the guest frame number for the shared page and then map it
116 * into kva.  See xs_init() for details.
117 */
118struct xenstore_domain_interface *xen_store;
119
120/*-------------------------- Private Data Structures ------------------------*/
121
122/**
123 * Structure capturing messages received from the XenStore service.
124 */
125struct xs_stored_msg {
126	TAILQ_ENTRY(xs_stored_msg) list;
127
128	struct xsd_sockmsg hdr;
129
130	union {
131		/* Queued replies. */
132		struct {
133			char *body;
134		} reply;
135
136		/* Queued watch events. */
137		struct {
138			struct xs_watch *handle;
139			const char **vec;
140			u_int vec_size;
141		} watch;
142	} u;
143};
144TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
145
146/**
147 * Container for all XenStore related state.
148 */
149struct xs_softc {
150	/** Newbus device for the XenStore. */
151	device_t xs_dev;
152
153	/**
154	 * Lock serializing access to ring producer/consumer
155	 * indexes.  Use of this lock guarantees that wakeups
156	 * of blocking readers/writers are not missed due to
157	 * races with the XenStore service.
158	 */
159	struct mtx ring_lock;
160
161	/*
162	 * Mutex used to insure exclusive access to the outgoing
163	 * communication ring.  We use a lock type that can be
164	 * held while sleeping so that xs_write() can block waiting
165	 * for space in the ring to free up, without allowing another
166	 * writer to come in and corrupt a partial message write.
167	 */
168	struct sx request_mutex;
169
170	/**
171	 * A list of replies to our requests.
172	 *
173	 * The reply list is filled by xs_rcv_thread().  It
174	 * is consumed by the context that issued the request
175	 * to which a reply is made.  The requester blocks in
176	 * xs_read_reply().
177	 *
178	 * /note Only one requesting context can be active at a time.
179	 *       This is guaranteed by the request_mutex and insures
180	 *	 that the requester sees replies matching the order
181	 *	 of its requests.
182	 */
183	struct xs_stored_msg_list reply_list;
184
185	/** Lock protecting the reply list. */
186	struct mtx reply_lock;
187
188	/**
189	 * List of registered watches.
190	 */
191	struct xs_watch_list  registered_watches;
192
193	/** Lock protecting the registered watches list. */
194	struct mtx registered_watches_lock;
195
196	/**
197	 * List of pending watch callback events.
198	 */
199	struct xs_stored_msg_list watch_events;
200
201	/** Lock protecting the watch calback list. */
202	struct mtx watch_events_lock;
203
204	/**
205	 * Sleepable lock used to prevent VM suspension while a
206	 * xenstore transaction is outstanding.
207	 *
208	 * Each active transaction holds a shared lock on the
209	 * suspend mutex.  Our suspend method blocks waiting
210	 * to acquire an exclusive lock.  This guarantees that
211	 * suspend processing will only proceed once all active
212	 * transactions have been retired.
213	 */
214	struct sx suspend_mutex;
215
216	/**
217	 * The processid of the xenwatch thread.
218	 */
219	pid_t xenwatch_pid;
220
221	/**
222	 * Sleepable mutex used to gate the execution of XenStore
223	 * watch event callbacks.
224	 *
225	 * xenwatch_thread holds an exclusive lock on this mutex
226	 * while delivering event callbacks, and xenstore_unregister_watch()
227	 * uses an exclusive lock of this mutex to guarantee that no
228	 * callbacks of the just unregistered watch are pending
229	 * before returning to its caller.
230	 */
231	struct sx xenwatch_mutex;
232
233	/**
234	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
235	 * of the true machine frame number into our "physical address space".
236	 */
237	unsigned long gpfn;
238
239	/**
240	 * The event channel for communicating with the
241	 * XenStore service.
242	 */
243	int evtchn;
244
245	/** Handle for XenStore interrupts. */
246	xen_intr_handle_t xen_intr_handle;
247
248	/**
249	 * Interrupt driven config hook allowing us to defer
250	 * attaching children until interrupts (and thus communication
251	 * with the XenStore service) are available.
252	 */
253	struct intr_config_hook xs_attachcb;
254
255	/**
256	 * Xenstore is a user-space process that usually runs in Dom0,
257	 * so if this domain is booting as Dom0, xenstore wont we accessible,
258	 * and we have to defer the initialization of xenstore related
259	 * devices to later (when xenstore is started).
260	 */
261	bool initialized;
262
263	/**
264	 * Task to run when xenstore is initialized (Dom0 only), will
265	 * take care of attaching xenstore related devices.
266	 */
267	struct task xs_late_init;
268};
269
270/*-------------------------------- Global Data ------------------------------*/
271static struct xs_softc xs;
272
273/*------------------------- Private Utility Functions -----------------------*/
274
275/**
276 * Count and optionally record pointers to a number of NUL terminated
277 * strings in a buffer.
278 *
279 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
280 * \param dest	   An array to store pointers to each string found in strings.
281 * \param len	   The length of the buffer pointed to by strings.
282 *
283 * \return  A count of the number of strings found.
284 */
285static u_int
286extract_strings(const char *strings, const char **dest, u_int len)
287{
288	u_int num;
289	const char *p;
290
291	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
292		if (dest != NULL)
293			*dest++ = p;
294		num++;
295	}
296
297	return (num);
298}
299
300/**
301 * Convert a contiguous buffer containing a series of NUL terminated
302 * strings into an array of pointers to strings.
303 *
304 * The returned pointer references the array of string pointers which
305 * is followed by the storage for the string data.  It is the client's
306 * responsibility to free this storage.
307 *
308 * The storage addressed by strings is free'd prior to split returning.
309 *
310 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
311 * \param len	   The length of the buffer pointed to by strings.
312 * \param num	   The number of strings found and returned in the strings
313 *                 array.
314 *
315 * \return  An array of pointers to the strings found in the input buffer.
316 */
317static const char **
318split(char *strings, u_int len, u_int *num)
319{
320	const char **ret;
321
322	/* Protect against unterminated buffers. */
323	if (len > 0)
324		strings[len - 1] = '\0';
325
326	/* Count the strings. */
327	*num = extract_strings(strings, /*dest*/NULL, len);
328
329	/* Transfer to one big alloc for easy freeing by the caller. */
330	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
331	memcpy(&ret[*num], strings, len);
332	free(strings, M_XENSTORE);
333
334	/* Extract pointers to newly allocated array. */
335	strings = (char *)&ret[*num];
336	(void)extract_strings(strings, /*dest*/ret, len);
337
338	return (ret);
339}
340
341/*------------------------- Public Utility Functions -------------------------*/
342/*------- API comments for these methods can be found in xenstorevar.h -------*/
343struct sbuf *
344xs_join(const char *dir, const char *name)
345{
346	struct sbuf *sb;
347
348	sb = sbuf_new_auto();
349	sbuf_cat(sb, dir);
350	if (name[0] != '\0') {
351		sbuf_putc(sb, '/');
352		sbuf_cat(sb, name);
353	}
354	sbuf_finish(sb);
355
356	return (sb);
357}
358
359/*-------------------- Low Level Communication Management --------------------*/
360/**
361 * Interrupt handler for the XenStore event channel.
362 *
363 * XenStore reads and writes block on "xen_store" for buffer
364 * space.  Wakeup any blocking operations when the XenStore
365 * service has modified the queues.
366 */
367static void
368xs_intr(void * arg __unused /*__attribute__((unused))*/)
369{
370
371	/* If xenstore has not been initialized, initialize it now */
372	if (!xs.initialized) {
373		xs.initialized = true;
374		/*
375		 * Since this task is probing and attaching devices we
376		 * have to hold the Giant lock.
377		 */
378		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
379	}
380
381	/*
382	 * Hold ring lock across wakeup so that clients
383	 * cannot miss a wakeup.
384	 */
385	mtx_lock(&xs.ring_lock);
386	wakeup(xen_store);
387	mtx_unlock(&xs.ring_lock);
388}
389
390/**
391 * Verify that the indexes for a ring are valid.
392 *
393 * The difference between the producer and consumer cannot
394 * exceed the size of the ring.
395 *
396 * \param cons  The consumer index for the ring to test.
397 * \param prod  The producer index for the ring to test.
398 *
399 * \retval 1  If indexes are in range.
400 * \retval 0  If the indexes are out of range.
401 */
402static int
403xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
404{
405
406	return ((prod - cons) <= XENSTORE_RING_SIZE);
407}
408
409/**
410 * Return a pointer to, and the length of, the contiguous
411 * free region available for output in a ring buffer.
412 *
413 * \param cons  The consumer index for the ring.
414 * \param prod  The producer index for the ring.
415 * \param buf   The base address of the ring's storage.
416 * \param len   The amount of contiguous storage available.
417 *
418 * \return  A pointer to the start location of the free region.
419 */
420static void *
421xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
422    char *buf, uint32_t *len)
423{
424
425	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
426	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
427		*len = XENSTORE_RING_SIZE - (prod - cons);
428	return (buf + MASK_XENSTORE_IDX(prod));
429}
430
431/**
432 * Return a pointer to, and the length of, the contiguous
433 * data available to read from a ring buffer.
434 *
435 * \param cons  The consumer index for the ring.
436 * \param prod  The producer index for the ring.
437 * \param buf   The base address of the ring's storage.
438 * \param len   The amount of contiguous data available to read.
439 *
440 * \return  A pointer to the start location of the available data.
441 */
442static const void *
443xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
444    const char *buf, uint32_t *len)
445{
446
447	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
448	if ((prod - cons) < *len)
449		*len = prod - cons;
450	return (buf + MASK_XENSTORE_IDX(cons));
451}
452
453/**
454 * Transmit data to the XenStore service.
455 *
456 * \param tdata  A pointer to the contiguous data to send.
457 * \param len    The amount of data to send.
458 *
459 * \return  On success 0, otherwise an errno value indicating the
460 *          cause of failure.
461 *
462 * \invariant  Called from thread context.
463 * \invariant  The buffer pointed to by tdata is at least len bytes
464 *             in length.
465 * \invariant  xs.request_mutex exclusively locked.
466 */
467static int
468xs_write_store(const void *tdata, unsigned len)
469{
470	XENSTORE_RING_IDX cons, prod;
471	const char *data = (const char *)tdata;
472	int error;
473
474	sx_assert(&xs.request_mutex, SX_XLOCKED);
475	while (len != 0) {
476		void *dst;
477		u_int avail;
478
479		/* Hold lock so we can't miss wakeups should we block. */
480		mtx_lock(&xs.ring_lock);
481		cons = xen_store->req_cons;
482		prod = xen_store->req_prod;
483		if ((prod - cons) == XENSTORE_RING_SIZE) {
484			/*
485			 * Output ring is full. Wait for a ring event.
486			 *
487			 * Note that the events from both queues
488			 * are combined, so being woken does not
489			 * guarantee that data exist in the read
490			 * ring.
491			 *
492			 * To simplify error recovery and the retry,
493			 * we specify PDROP so our lock is *not* held
494			 * when msleep returns.
495			 */
496			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
497			     "xbwrite", /*timeout*/0);
498			if (error && error != EWOULDBLOCK)
499				return (error);
500
501			/* Try again. */
502			continue;
503		}
504		mtx_unlock(&xs.ring_lock);
505
506		/* Verify queue sanity. */
507		if (!xs_check_indexes(cons, prod)) {
508			xen_store->req_cons = xen_store->req_prod = 0;
509			return (EIO);
510		}
511
512		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
513		if (avail > len)
514			avail = len;
515
516		memcpy(dst, data, avail);
517		data += avail;
518		len -= avail;
519
520		/*
521		 * The store to the producer index, which indicates
522		 * to the other side that new data has arrived, must
523		 * be visible only after our copy of the data into the
524		 * ring has completed.
525		 */
526		wmb();
527		xen_store->req_prod += avail;
528
529		/*
530		 * xen_intr_signal() implies mb(). The other side will see
531		 * the change to req_prod at the time of the interrupt.
532		 */
533		xen_intr_signal(xs.xen_intr_handle);
534	}
535
536	return (0);
537}
538
539/**
540 * Receive data from the XenStore service.
541 *
542 * \param tdata  A pointer to the contiguous buffer to receive the data.
543 * \param len    The amount of data to receive.
544 *
545 * \return  On success 0, otherwise an errno value indicating the
546 *          cause of failure.
547 *
548 * \invariant  Called from thread context.
549 * \invariant  The buffer pointed to by tdata is at least len bytes
550 *             in length.
551 *
552 * \note xs_read does not perform any internal locking to guarantee
553 *       serial access to the incoming ring buffer.  However, there
554 *	 is only one context processing reads: xs_rcv_thread().
555 */
556static int
557xs_read_store(void *tdata, unsigned len)
558{
559	XENSTORE_RING_IDX cons, prod;
560	char *data = (char *)tdata;
561	int error;
562
563	while (len != 0) {
564		u_int avail;
565		const char *src;
566
567		/* Hold lock so we can't miss wakeups should we block. */
568		mtx_lock(&xs.ring_lock);
569		cons = xen_store->rsp_cons;
570		prod = xen_store->rsp_prod;
571		if (cons == prod) {
572			/*
573			 * Nothing to read. Wait for a ring event.
574			 *
575			 * Note that the events from both queues
576			 * are combined, so being woken does not
577			 * guarantee that data exist in the read
578			 * ring.
579			 *
580			 * To simplify error recovery and the retry,
581			 * we specify PDROP so our lock is *not* held
582			 * when msleep returns.
583			 */
584			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
585			    "xbread", /*timeout*/0);
586			if (error && error != EWOULDBLOCK)
587				return (error);
588			continue;
589		}
590		mtx_unlock(&xs.ring_lock);
591
592		/* Verify queue sanity. */
593		if (!xs_check_indexes(cons, prod)) {
594			xen_store->rsp_cons = xen_store->rsp_prod = 0;
595			return (EIO);
596		}
597
598		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
599		if (avail > len)
600			avail = len;
601
602		/*
603		 * Insure the data we read is related to the indexes
604		 * we read above.
605		 */
606		rmb();
607
608		memcpy(data, src, avail);
609		data += avail;
610		len -= avail;
611
612		/*
613		 * Insure that the producer of this ring does not see
614		 * the ring space as free until after we have copied it
615		 * out.
616		 */
617		mb();
618		xen_store->rsp_cons += avail;
619
620		/*
621		 * xen_intr_signal() implies mb(). The producer will see
622		 * the updated consumer index when the event is delivered.
623		 */
624		xen_intr_signal(xs.xen_intr_handle);
625	}
626
627	return (0);
628}
629
630/*----------------------- Received Message Processing ------------------------*/
631/**
632 * Block reading the next message from the XenStore service and
633 * process the result.
634 *
635 * \param type  The returned type of the XenStore message received.
636 *
637 * \return  0 on success.  Otherwise an errno value indicating the
638 *          type of failure encountered.
639 */
640static int
641xs_process_msg(enum xsd_sockmsg_type *type)
642{
643	struct xs_stored_msg *msg;
644	char *body;
645	int error;
646
647	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
648	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
649	if (error) {
650		free(msg, M_XENSTORE);
651		return (error);
652	}
653
654	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
655	error = xs_read_store(body, msg->hdr.len);
656	if (error) {
657		free(body, M_XENSTORE);
658		free(msg, M_XENSTORE);
659		return (error);
660	}
661	body[msg->hdr.len] = '\0';
662
663	*type = msg->hdr.type;
664	if (msg->hdr.type == XS_WATCH_EVENT) {
665		msg->u.watch.vec = split(body, msg->hdr.len,
666		    &msg->u.watch.vec_size);
667
668		mtx_lock(&xs.registered_watches_lock);
669		msg->u.watch.handle = find_watch(
670		    msg->u.watch.vec[XS_WATCH_TOKEN]);
671		if (msg->u.watch.handle != NULL) {
672			mtx_lock(&xs.watch_events_lock);
673			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
674			wakeup(&xs.watch_events);
675			mtx_unlock(&xs.watch_events_lock);
676		} else {
677			free(msg->u.watch.vec, M_XENSTORE);
678			free(msg, M_XENSTORE);
679		}
680		mtx_unlock(&xs.registered_watches_lock);
681	} else {
682		msg->u.reply.body = body;
683		mtx_lock(&xs.reply_lock);
684		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
685		wakeup(&xs.reply_list);
686		mtx_unlock(&xs.reply_lock);
687	}
688
689	return (0);
690}
691
692/**
693 * Thread body of the XenStore receive thread.
694 *
695 * This thread blocks waiting for data from the XenStore service
696 * and processes and received messages.
697 */
698static void
699xs_rcv_thread(void *arg __unused)
700{
701	int error;
702	enum xsd_sockmsg_type type;
703
704	for (;;) {
705		error = xs_process_msg(&type);
706		if (error)
707			printf("XENSTORE error %d while reading message\n",
708			    error);
709	}
710}
711
712/*---------------- XenStore Message Request/Reply Processing -----------------*/
713/**
714 * Filter invoked before transmitting any message to the XenStore service.
715 *
716 * The role of the filter may expand, but currently serves to manage
717 * the interactions of messages with transaction state.
718 *
719 * \param request_msg_type  The message type for the request.
720 */
721static inline void
722xs_request_filter(uint32_t request_msg_type)
723{
724	if (request_msg_type == XS_TRANSACTION_START)
725		sx_slock(&xs.suspend_mutex);
726}
727
728/**
729 * Filter invoked after transmitting any message to the XenStore service.
730 *
731 * The role of the filter may expand, but currently serves to manage
732 * the interactions of messages with transaction state.
733 *
734 * \param request_msg_type     The message type for the original request.
735 * \param reply_msg_type       The message type for any received reply.
736 * \param request_reply_error  The error status from the attempt to send
737 *                             the request or retrieve the reply.
738 */
739static inline void
740xs_reply_filter(uint32_t request_msg_type,
741    uint32_t reply_msg_type, int request_reply_error)
742{
743	/*
744	 * The count of transactions drops if we attempted
745	 * to end a transaction (even if that attempt fails
746	 * in error), we receive a transaction end acknowledgement,
747	 * or if our attempt to begin a transaction fails.
748	 */
749	if (request_msg_type == XS_TRANSACTION_END
750	 || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END)
751	 || (request_msg_type == XS_TRANSACTION_START
752	  && (request_reply_error != 0 || reply_msg_type == XS_ERROR)))
753		sx_sunlock(&xs.suspend_mutex);
754
755}
756
757#define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
758
759/**
760 * Convert a XenStore error string into an errno number.
761 *
762 * \param errorstring  The error string to convert.
763 *
764 * \return  The errno best matching the input string.
765 *
766 * \note Unknown error strings are converted to EINVAL.
767 */
768static int
769xs_get_error(const char *errorstring)
770{
771	u_int i;
772
773	for (i = 0; i < xsd_error_count; i++) {
774		if (!strcmp(errorstring, xsd_errors[i].errstring))
775			return (xsd_errors[i].errnum);
776	}
777	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
778	    errorstring);
779	return (EINVAL);
780}
781
782/**
783 * Block waiting for a reply to a message request.
784 *
785 * \param type	  The returned type of the reply.
786 * \param len	  The returned body length of the reply.
787 * \param result  The returned body of the reply.
788 *
789 * \return  0 on success.  Otherwise an errno indicating the
790 *          cause of failure.
791 */
792static int
793xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
794{
795	struct xs_stored_msg *msg;
796	char *body;
797	int error;
798
799	mtx_lock(&xs.reply_lock);
800	while (TAILQ_EMPTY(&xs.reply_list)) {
801		error = mtx_sleep(&xs.reply_list, &xs.reply_lock,
802		    PCATCH, "xswait", hz/10);
803		if (error && error != EWOULDBLOCK) {
804			mtx_unlock(&xs.reply_lock);
805			return (error);
806		}
807	}
808	msg = TAILQ_FIRST(&xs.reply_list);
809	TAILQ_REMOVE(&xs.reply_list, msg, list);
810	mtx_unlock(&xs.reply_lock);
811
812	*type = msg->hdr.type;
813	if (len)
814		*len = msg->hdr.len;
815	body = msg->u.reply.body;
816
817	free(msg, M_XENSTORE);
818	*result = body;
819	return (0);
820}
821
822/**
823 * Pass-thru interface for XenStore access by userland processes
824 * via the XenStore device.
825 *
826 * Reply type and length data are returned by overwriting these
827 * fields in the passed in request message.
828 *
829 * \param msg	  A properly formatted message to transmit to
830 *		  the XenStore service.
831 * \param result  The returned body of the reply.
832 *
833 * \return  0 on success.  Otherwise an errno indicating the cause
834 *          of failure.
835 *
836 * \note The returned result is provided in malloced storage and thus
837 *       must be free'd by the caller with 'free(result, M_XENSTORE);
838 */
839int
840xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
841{
842	uint32_t request_type;
843	int error;
844
845	request_type = msg->type;
846	xs_request_filter(request_type);
847
848	sx_xlock(&xs.request_mutex);
849	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
850		error = xs_read_reply(&msg->type, &msg->len, result);
851	sx_xunlock(&xs.request_mutex);
852
853	xs_reply_filter(request_type, msg->type, error);
854
855	return (error);
856}
857
858/**
859 * Send a message with an optionally muti-part body to the XenStore service.
860 *
861 * \param t              The transaction to use for this request.
862 * \param request_type   The type of message to send.
863 * \param iovec          Pointers to the body sections of the request.
864 * \param num_vecs       The number of body sections in the request.
865 * \param len            The returned length of the reply.
866 * \param result         The returned body of the reply.
867 *
868 * \return  0 on success.  Otherwise an errno indicating
869 *          the cause of failure.
870 *
871 * \note The returned result is provided in malloced storage and thus
872 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
873 */
874static int
875xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
876    const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
877{
878	struct xsd_sockmsg msg;
879	void *ret = NULL;
880	u_int i;
881	int error;
882
883	msg.tx_id = t.id;
884	msg.req_id = 0;
885	msg.type = request_type;
886	msg.len = 0;
887	for (i = 0; i < num_vecs; i++)
888		msg.len += iovec[i].iov_len;
889
890	xs_request_filter(request_type);
891
892	sx_xlock(&xs.request_mutex);
893	error = xs_write_store(&msg, sizeof(msg));
894	if (error) {
895		printf("xs_talkv failed %d\n", error);
896		goto error_lock_held;
897	}
898
899	for (i = 0; i < num_vecs; i++) {
900		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
901		if (error) {
902			printf("xs_talkv failed %d\n", error);
903			goto error_lock_held;
904		}
905	}
906
907	error = xs_read_reply(&msg.type, len, &ret);
908
909error_lock_held:
910	sx_xunlock(&xs.request_mutex);
911	xs_reply_filter(request_type, msg.type, error);
912	if (error)
913		return (error);
914
915	if (msg.type == XS_ERROR) {
916		error = xs_get_error(ret);
917		free(ret, M_XENSTORE);
918		return (error);
919	}
920
921	/* Reply is either error or an echo of our request message type. */
922	KASSERT(msg.type == request_type, ("bad xenstore message type"));
923
924	if (result)
925		*result = ret;
926	else
927		free(ret, M_XENSTORE);
928
929	return (0);
930}
931
932/**
933 * Wrapper for xs_talkv allowing easy transmission of a message with
934 * a single, contiguous, message body.
935 *
936 * \param t              The transaction to use for this request.
937 * \param request_type   The type of message to send.
938 * \param body           The body of the request.
939 * \param len            The returned length of the reply.
940 * \param result         The returned body of the reply.
941 *
942 * \return  0 on success.  Otherwise an errno indicating
943 *          the cause of failure.
944 *
945 * \note The returned result is provided in malloced storage and thus
946 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
947 */
948static int
949xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
950    const char *body, u_int *len, void **result)
951{
952	struct iovec iovec;
953
954	iovec.iov_base = (void *)(uintptr_t)body;
955	iovec.iov_len = strlen(body) + 1;
956
957	return (xs_talkv(t, request_type, &iovec, 1, len, result));
958}
959
960/*------------------------- XenStore Watch Support ---------------------------*/
961/**
962 * Transmit a watch request to the XenStore service.
963 *
964 * \param path    The path in the XenStore to watch.
965 * \param tocken  A unique identifier for this watch.
966 *
967 * \return  0 on success.  Otherwise an errno indicating the
968 *          cause of failure.
969 */
970static int
971xs_watch(const char *path, const char *token)
972{
973	struct iovec iov[2];
974
975	iov[0].iov_base = (void *)(uintptr_t) path;
976	iov[0].iov_len = strlen(path) + 1;
977	iov[1].iov_base = (void *)(uintptr_t) token;
978	iov[1].iov_len = strlen(token) + 1;
979
980	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
981}
982
983/**
984 * Transmit an uwatch request to the XenStore service.
985 *
986 * \param path    The path in the XenStore to watch.
987 * \param tocken  A unique identifier for this watch.
988 *
989 * \return  0 on success.  Otherwise an errno indicating the
990 *          cause of failure.
991 */
992static int
993xs_unwatch(const char *path, const char *token)
994{
995	struct iovec iov[2];
996
997	iov[0].iov_base = (void *)(uintptr_t) path;
998	iov[0].iov_len = strlen(path) + 1;
999	iov[1].iov_base = (void *)(uintptr_t) token;
1000	iov[1].iov_len = strlen(token) + 1;
1001
1002	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
1003}
1004
1005/**
1006 * Convert from watch token (unique identifier) to the associated
1007 * internal tracking structure for this watch.
1008 *
1009 * \param tocken  The unique identifier for the watch to find.
1010 *
1011 * \return  A pointer to the found watch structure or NULL.
1012 */
1013static struct xs_watch *
1014find_watch(const char *token)
1015{
1016	struct xs_watch *i, *cmp;
1017
1018	cmp = (void *)strtoul(token, NULL, 16);
1019
1020	LIST_FOREACH(i, &xs.registered_watches, list)
1021		if (i == cmp)
1022			return (i);
1023
1024	return (NULL);
1025}
1026
1027/**
1028 * Thread body of the XenStore watch event dispatch thread.
1029 */
1030static void
1031xenwatch_thread(void *unused)
1032{
1033	struct xs_stored_msg *msg;
1034
1035	for (;;) {
1036
1037		mtx_lock(&xs.watch_events_lock);
1038		while (TAILQ_EMPTY(&xs.watch_events))
1039			mtx_sleep(&xs.watch_events,
1040			    &xs.watch_events_lock,
1041			    PWAIT | PCATCH, "waitev", hz/10);
1042
1043		mtx_unlock(&xs.watch_events_lock);
1044		sx_xlock(&xs.xenwatch_mutex);
1045
1046		mtx_lock(&xs.watch_events_lock);
1047		msg = TAILQ_FIRST(&xs.watch_events);
1048		if (msg)
1049			TAILQ_REMOVE(&xs.watch_events, msg, list);
1050		mtx_unlock(&xs.watch_events_lock);
1051
1052		if (msg != NULL) {
1053			/*
1054			 * XXX There are messages coming in with a NULL
1055			 * XXX callback.  This deserves further investigation;
1056			 * XXX the workaround here simply prevents the kernel
1057			 * XXX from panic'ing on startup.
1058			 */
1059			if (msg->u.watch.handle->callback != NULL)
1060				msg->u.watch.handle->callback(
1061					msg->u.watch.handle,
1062					(const char **)msg->u.watch.vec,
1063					msg->u.watch.vec_size);
1064			free(msg->u.watch.vec, M_XENSTORE);
1065			free(msg, M_XENSTORE);
1066		}
1067
1068		sx_xunlock(&xs.xenwatch_mutex);
1069	}
1070}
1071
1072/*----------- XenStore Configuration, Initialization, and Control ------------*/
1073/**
1074 * Setup communication channels with the XenStore service.
1075 *
1076 * \return  On success, 0. Otherwise an errno value indicating the
1077 *          type of failure.
1078 */
1079static int
1080xs_init_comms(void)
1081{
1082	int error;
1083
1084	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1085		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1086		    "(%08x:%08x): fixing up\n",
1087		    xen_store->rsp_cons, xen_store->rsp_prod);
1088		xen_store->rsp_cons = xen_store->rsp_prod;
1089	}
1090
1091	xen_intr_unbind(&xs.xen_intr_handle);
1092
1093	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1094	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1095	    &xs.xen_intr_handle);
1096	if (error) {
1097		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1098		return (error);
1099	}
1100
1101	return (0);
1102}
1103
1104/*------------------ Private Device Attachment Functions  --------------------*/
1105static void
1106xs_identify(driver_t *driver, device_t parent)
1107{
1108
1109	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1110}
1111
1112/**
1113 * Probe for the existence of the XenStore.
1114 *
1115 * \param dev
1116 */
1117static int
1118xs_probe(device_t dev)
1119{
1120	/*
1121	 * We are either operating within a PV kernel or being probed
1122	 * as the child of the successfully attached xenpci device.
1123	 * Thus we are in a Xen environment and there will be a XenStore.
1124	 * Unconditionally return success.
1125	 */
1126	device_set_desc(dev, "XenStore");
1127	return (BUS_PROBE_NOWILDCARD);
1128}
1129
1130static void
1131xs_attach_deferred(void *arg)
1132{
1133
1134	bus_generic_probe(xs.xs_dev);
1135	bus_generic_attach(xs.xs_dev);
1136
1137	config_intrhook_disestablish(&xs.xs_attachcb);
1138}
1139
1140static void
1141xs_attach_late(void *arg, int pending)
1142{
1143
1144	KASSERT((pending == 1), ("xs late attach queued several times"));
1145	bus_generic_probe(xs.xs_dev);
1146	bus_generic_attach(xs.xs_dev);
1147}
1148
1149/**
1150 * Attach to the XenStore.
1151 *
1152 * This routine also prepares for the probe/attach of drivers that rely
1153 * on the XenStore.
1154 */
1155static int
1156xs_attach(device_t dev)
1157{
1158	int error;
1159
1160	/* Allow us to get device_t from softc and vice-versa. */
1161	xs.xs_dev = dev;
1162	device_set_softc(dev, &xs);
1163
1164	/* Initialize the interface to xenstore. */
1165	struct proc *p;
1166
1167	xs.initialized = false;
1168	if (xen_hvm_domain()) {
1169		xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
1170		xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
1171		xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE);
1172		xs.initialized = true;
1173	} else if (xen_pv_domain()) {
1174		if (HYPERVISOR_start_info->store_evtchn == 0) {
1175			struct evtchn_alloc_unbound alloc_unbound;
1176
1177			/* Allocate a local event channel for xenstore */
1178			alloc_unbound.dom = DOMID_SELF;
1179			alloc_unbound.remote_dom = DOMID_SELF;
1180			error = HYPERVISOR_event_channel_op(
1181			    EVTCHNOP_alloc_unbound, &alloc_unbound);
1182			if (error != 0)
1183				panic(
1184				   "unable to alloc event channel for Dom0: %d",
1185				    error);
1186
1187			HYPERVISOR_start_info->store_evtchn =
1188			    alloc_unbound.port;
1189			xs.evtchn = alloc_unbound.port;
1190
1191			/* Allocate memory for the xs shared ring */
1192			xen_store = malloc(PAGE_SIZE, M_XENSTORE,
1193			    M_WAITOK | M_ZERO);
1194		} else {
1195			xs.evtchn = HYPERVISOR_start_info->store_evtchn;
1196			xs.initialized = true;
1197		}
1198	} else {
1199		panic("Unknown domain type, cannot initialize xenstore.");
1200	}
1201
1202	TAILQ_INIT(&xs.reply_list);
1203	TAILQ_INIT(&xs.watch_events);
1204
1205	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1206	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1207	sx_init(&xs.xenwatch_mutex, "xenwatch");
1208	sx_init(&xs.request_mutex, "xenstore request");
1209	sx_init(&xs.suspend_mutex, "xenstore suspend");
1210	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1211	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1212
1213	/* Initialize the shared memory rings to talk to xenstored */
1214	error = xs_init_comms();
1215	if (error)
1216		return (error);
1217
1218	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1219	    0, "xenwatch");
1220	if (error)
1221		return (error);
1222	xs.xenwatch_pid = p->p_pid;
1223
1224	error = kproc_create(xs_rcv_thread, NULL, NULL,
1225	    RFHIGHPID, 0, "xenstore_rcv");
1226
1227	xs.xs_attachcb.ich_func = xs_attach_deferred;
1228	xs.xs_attachcb.ich_arg = NULL;
1229	if (xs.initialized) {
1230		config_intrhook_establish(&xs.xs_attachcb);
1231	} else {
1232		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1233	}
1234
1235	return (error);
1236}
1237
1238/**
1239 * Prepare for suspension of this VM by halting XenStore access after
1240 * all transactions and individual requests have completed.
1241 */
1242static int
1243xs_suspend(device_t dev)
1244{
1245	int error;
1246
1247	/* Suspend child Xen devices. */
1248	error = bus_generic_suspend(dev);
1249	if (error != 0)
1250		return (error);
1251
1252	sx_xlock(&xs.suspend_mutex);
1253	sx_xlock(&xs.request_mutex);
1254
1255	return (0);
1256}
1257
1258/**
1259 * Resume XenStore operations after this VM is resumed.
1260 */
1261static int
1262xs_resume(device_t dev __unused)
1263{
1264	struct xs_watch *watch;
1265	char token[sizeof(watch) * 2 + 1];
1266
1267	xs_init_comms();
1268
1269	sx_xunlock(&xs.request_mutex);
1270
1271	/*
1272	 * No need for registered_watches_lock: the suspend_mutex
1273	 * is sufficient.
1274	 */
1275	LIST_FOREACH(watch, &xs.registered_watches, list) {
1276		sprintf(token, "%lX", (long)watch);
1277		xs_watch(watch->node, token);
1278	}
1279
1280	sx_xunlock(&xs.suspend_mutex);
1281
1282	/* Resume child Xen devices. */
1283	bus_generic_resume(dev);
1284
1285	return (0);
1286}
1287
1288/*-------------------- Private Device Attachment Data  -----------------------*/
1289static device_method_t xenstore_methods[] = {
1290	/* Device interface */
1291	DEVMETHOD(device_identify,	xs_identify),
1292	DEVMETHOD(device_probe,         xs_probe),
1293	DEVMETHOD(device_attach,        xs_attach),
1294	DEVMETHOD(device_detach,        bus_generic_detach),
1295	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1296	DEVMETHOD(device_suspend,       xs_suspend),
1297	DEVMETHOD(device_resume,        xs_resume),
1298
1299	/* Bus interface */
1300	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1301	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1302	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1303	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1304	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1305
1306	DEVMETHOD_END
1307};
1308
1309DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1310static devclass_t xenstore_devclass;
1311
1312DRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0);
1313
1314/*------------------------------- Sysctl Data --------------------------------*/
1315/* XXX Shouldn't the node be somewhere else? */
1316SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
1317SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1318SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1319
1320/*-------------------------------- Public API --------------------------------*/
1321/*------- API comments for these methods can be found in xenstorevar.h -------*/
1322int
1323xs_directory(struct xs_transaction t, const char *dir, const char *node,
1324    u_int *num, const char ***result)
1325{
1326	struct sbuf *path;
1327	char *strings;
1328	u_int len = 0;
1329	int error;
1330
1331	path = xs_join(dir, node);
1332	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1333	    (void **)&strings);
1334	sbuf_delete(path);
1335	if (error)
1336		return (error);
1337
1338	*result = split(strings, len, num);
1339
1340	return (0);
1341}
1342
1343int
1344xs_exists(struct xs_transaction t, const char *dir, const char *node)
1345{
1346	const char **d;
1347	int error, dir_n;
1348
1349	error = xs_directory(t, dir, node, &dir_n, &d);
1350	if (error)
1351		return (0);
1352	free(d, M_XENSTORE);
1353	return (1);
1354}
1355
1356int
1357xs_read(struct xs_transaction t, const char *dir, const char *node,
1358    u_int *len, void **result)
1359{
1360	struct sbuf *path;
1361	void *ret;
1362	int error;
1363
1364	path = xs_join(dir, node);
1365	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1366	sbuf_delete(path);
1367	if (error)
1368		return (error);
1369	*result = ret;
1370	return (0);
1371}
1372
1373int
1374xs_write(struct xs_transaction t, const char *dir, const char *node,
1375    const char *string)
1376{
1377	struct sbuf *path;
1378	struct iovec iovec[2];
1379	int error;
1380
1381	path = xs_join(dir, node);
1382
1383	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1384	iovec[0].iov_len = sbuf_len(path) + 1;
1385	iovec[1].iov_base = (void *)(uintptr_t) string;
1386	iovec[1].iov_len = strlen(string);
1387
1388	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1389	sbuf_delete(path);
1390
1391	return (error);
1392}
1393
1394int
1395xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1396{
1397	struct sbuf *path;
1398	int ret;
1399
1400	path = xs_join(dir, node);
1401	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1402	sbuf_delete(path);
1403
1404	return (ret);
1405}
1406
1407int
1408xs_rm(struct xs_transaction t, const char *dir, const char *node)
1409{
1410	struct sbuf *path;
1411	int ret;
1412
1413	path = xs_join(dir, node);
1414	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1415	sbuf_delete(path);
1416
1417	return (ret);
1418}
1419
1420int
1421xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1422{
1423	struct xs_transaction local_xbt;
1424	struct sbuf *root_path_sbuf;
1425	struct sbuf *cur_path_sbuf;
1426	char *root_path;
1427	char *cur_path;
1428	const char **dir;
1429	int error;
1430
1431retry:
1432	root_path_sbuf = xs_join(base, node);
1433	cur_path_sbuf  = xs_join(base, node);
1434	root_path      = sbuf_data(root_path_sbuf);
1435	cur_path       = sbuf_data(cur_path_sbuf);
1436	dir            = NULL;
1437	local_xbt.id   = 0;
1438
1439	if (xbt.id == 0) {
1440		error = xs_transaction_start(&local_xbt);
1441		if (error != 0)
1442			goto out;
1443		xbt = local_xbt;
1444	}
1445
1446	while (1) {
1447		u_int count;
1448		u_int i;
1449
1450		error = xs_directory(xbt, cur_path, "", &count, &dir);
1451		if (error)
1452			goto out;
1453
1454		for (i = 0; i < count; i++) {
1455			error = xs_rm(xbt, cur_path, dir[i]);
1456			if (error == ENOTEMPTY) {
1457				struct sbuf *push_dir;
1458
1459				/*
1460				 * Descend to clear out this sub directory.
1461				 * We'll return to cur_dir once push_dir
1462				 * is empty.
1463				 */
1464				push_dir = xs_join(cur_path, dir[i]);
1465				sbuf_delete(cur_path_sbuf);
1466				cur_path_sbuf = push_dir;
1467				cur_path = sbuf_data(cur_path_sbuf);
1468				break;
1469			} else if (error != 0) {
1470				goto out;
1471			}
1472		}
1473
1474		free(dir, M_XENSTORE);
1475		dir = NULL;
1476
1477		if (i == count) {
1478			char *last_slash;
1479
1480			/* Directory is empty.  It is now safe to remove. */
1481			error = xs_rm(xbt, cur_path, "");
1482			if (error != 0)
1483				goto out;
1484
1485			if (!strcmp(cur_path, root_path))
1486				break;
1487
1488			/* Return to processing the parent directory. */
1489			last_slash = strrchr(cur_path, '/');
1490			KASSERT(last_slash != NULL,
1491				("xs_rm_tree: mangled path %s", cur_path));
1492			*last_slash = '\0';
1493		}
1494	}
1495
1496out:
1497	sbuf_delete(cur_path_sbuf);
1498	sbuf_delete(root_path_sbuf);
1499	if (dir != NULL)
1500		free(dir, M_XENSTORE);
1501
1502	if (local_xbt.id != 0) {
1503		int terror;
1504
1505		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1506		xbt.id = 0;
1507		if (terror == EAGAIN && error == 0)
1508			goto retry;
1509	}
1510	return (error);
1511}
1512
1513int
1514xs_transaction_start(struct xs_transaction *t)
1515{
1516	char *id_str;
1517	int error;
1518
1519	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1520	    (void **)&id_str);
1521	if (error == 0) {
1522		t->id = strtoul(id_str, NULL, 0);
1523		free(id_str, M_XENSTORE);
1524	}
1525	return (error);
1526}
1527
1528int
1529xs_transaction_end(struct xs_transaction t, int abort)
1530{
1531	char abortstr[2];
1532
1533	if (abort)
1534		strcpy(abortstr, "F");
1535	else
1536		strcpy(abortstr, "T");
1537
1538	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1539}
1540
1541int
1542xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1543     int *scancountp, const char *fmt, ...)
1544{
1545	va_list ap;
1546	int error, ns;
1547	char *val;
1548
1549	error = xs_read(t, dir, node, NULL, (void **) &val);
1550	if (error)
1551		return (error);
1552
1553	va_start(ap, fmt);
1554	ns = vsscanf(val, fmt, ap);
1555	va_end(ap);
1556	free(val, M_XENSTORE);
1557	/* Distinctive errno. */
1558	if (ns == 0)
1559		return (ERANGE);
1560	if (scancountp)
1561		*scancountp = ns;
1562	return (0);
1563}
1564
1565int
1566xs_vprintf(struct xs_transaction t,
1567    const char *dir, const char *node, const char *fmt, va_list ap)
1568{
1569	struct sbuf *sb;
1570	int error;
1571
1572	sb = sbuf_new_auto();
1573	sbuf_vprintf(sb, fmt, ap);
1574	sbuf_finish(sb);
1575	error = xs_write(t, dir, node, sbuf_data(sb));
1576	sbuf_delete(sb);
1577
1578	return (error);
1579}
1580
1581int
1582xs_printf(struct xs_transaction t, const char *dir, const char *node,
1583     const char *fmt, ...)
1584{
1585	va_list ap;
1586	int error;
1587
1588	va_start(ap, fmt);
1589	error = xs_vprintf(t, dir, node, fmt, ap);
1590	va_end(ap);
1591
1592	return (error);
1593}
1594
1595int
1596xs_gather(struct xs_transaction t, const char *dir, ...)
1597{
1598	va_list ap;
1599	const char *name;
1600	int error;
1601
1602	va_start(ap, dir);
1603	error = 0;
1604	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1605		const char *fmt = va_arg(ap, char *);
1606		void *result = va_arg(ap, void *);
1607		char *p;
1608
1609		error = xs_read(t, dir, name, NULL, (void **) &p);
1610		if (error)
1611			break;
1612
1613		if (fmt) {
1614			if (sscanf(p, fmt, result) == 0)
1615				error = EINVAL;
1616			free(p, M_XENSTORE);
1617		} else
1618			*(char **)result = p;
1619	}
1620	va_end(ap);
1621
1622	return (error);
1623}
1624
1625int
1626xs_register_watch(struct xs_watch *watch)
1627{
1628	/* Pointer in ascii is the token. */
1629	char token[sizeof(watch) * 2 + 1];
1630	int error;
1631
1632	sprintf(token, "%lX", (long)watch);
1633
1634	sx_slock(&xs.suspend_mutex);
1635
1636	mtx_lock(&xs.registered_watches_lock);
1637	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1638	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1639	mtx_unlock(&xs.registered_watches_lock);
1640
1641	error = xs_watch(watch->node, token);
1642
1643	/* Ignore errors due to multiple registration. */
1644	if (error == EEXIST)
1645		error = 0;
1646
1647	if (error != 0) {
1648		mtx_lock(&xs.registered_watches_lock);
1649		LIST_REMOVE(watch, list);
1650		mtx_unlock(&xs.registered_watches_lock);
1651	}
1652
1653	sx_sunlock(&xs.suspend_mutex);
1654
1655	return (error);
1656}
1657
1658void
1659xs_unregister_watch(struct xs_watch *watch)
1660{
1661	struct xs_stored_msg *msg, *tmp;
1662	char token[sizeof(watch) * 2 + 1];
1663	int error;
1664
1665	sprintf(token, "%lX", (long)watch);
1666
1667	sx_slock(&xs.suspend_mutex);
1668
1669	mtx_lock(&xs.registered_watches_lock);
1670	if (find_watch(token) == NULL) {
1671		mtx_unlock(&xs.registered_watches_lock);
1672		sx_sunlock(&xs.suspend_mutex);
1673		return;
1674	}
1675	LIST_REMOVE(watch, list);
1676	mtx_unlock(&xs.registered_watches_lock);
1677
1678	error = xs_unwatch(watch->node, token);
1679	if (error)
1680		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1681		    watch->node, error);
1682
1683	sx_sunlock(&xs.suspend_mutex);
1684
1685	/* Cancel pending watch events. */
1686	mtx_lock(&xs.watch_events_lock);
1687	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1688		if (msg->u.watch.handle != watch)
1689			continue;
1690		TAILQ_REMOVE(&xs.watch_events, msg, list);
1691		free(msg->u.watch.vec, M_XENSTORE);
1692		free(msg, M_XENSTORE);
1693	}
1694	mtx_unlock(&xs.watch_events_lock);
1695
1696	/* Flush any currently-executing callback, unless we are it. :-) */
1697	if (curproc->p_pid != xs.xenwatch_pid) {
1698		sx_xlock(&xs.xenwatch_mutex);
1699		sx_xunlock(&xs.xenwatch_mutex);
1700	}
1701}
1702
1703void
1704xs_lock(void)
1705{
1706
1707	sx_xlock(&xs.request_mutex);
1708	return;
1709}
1710
1711void
1712xs_unlock(void)
1713{
1714
1715	sx_xunlock(&xs.request_mutex);
1716	return;
1717}
1718
1719