1/*
2 * xenstore_dev.c
3 *
4 * Driver giving user-space access to the kernel's connection to the
5 * XenStore service.
6 *
7 * Copyright (c) 2005, Christian Limpach
8 * Copyright (c) 2005, Rusty Russell, IBM Corporation
9 *
10 * This file may be distributed separately from the Linux kernel, or
11 * incorporated into other software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
31
32#include <sys/types.h>
33#include <sys/cdefs.h>
34#include <sys/errno.h>
35#include <sys/uio.h>
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/proc.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/conf.h>
42#include <sys/module.h>
43#include <sys/selinfo.h>
44#include <sys/sysctl.h>
45#include <sys/poll.h>
46
47#include <xen/xen-os.h>
48
49#include <xen/hypervisor.h>
50#include <xen/xenstore/xenstorevar.h>
51#include <xen/xenstore/xenstore_internal.h>
52
53static unsigned int max_pending_watches = 1000;
54
55struct xs_dev_transaction {
56	LIST_ENTRY(xs_dev_transaction) list;
57	struct xs_transaction handle;
58};
59
60struct xs_dev_watch {
61	LIST_ENTRY(xs_dev_watch) list;
62	struct xs_watch watch;
63	char *token;
64	struct xs_dev_data *user;
65};
66
67struct xs_dev_data {
68	/* In-progress transaction. */
69	LIST_HEAD(, xs_dev_transaction) transactions;
70
71	/* Active watches. */
72	LIST_HEAD(, xs_dev_watch) watches;
73
74	/* Partial request. */
75	unsigned int len;
76	union {
77		struct xsd_sockmsg msg;
78		char buffer[PAGE_SIZE];
79	} u;
80
81	/* Response queue. */
82#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
83	char read_buffer[PAGE_SIZE];
84	unsigned int read_cons, read_prod;
85
86	/* Serializes writes to the read buffer. */
87	struct mtx lock;
88
89	/* Polling structure (for reads only ATM). */
90	struct selinfo ev_rsel;
91};
92
93static void
94xs_queue_reply(struct xs_dev_data *u, const char *data, unsigned int len)
95{
96	unsigned int i;
97
98	for (i = 0; i < len; i++, u->read_prod++)
99		u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
100
101	KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer),
102	    ("xenstore reply too big"));
103
104	wakeup(u);
105	selwakeup(&u->ev_rsel);
106}
107
108static const char *
109xs_dev_error_to_string(int error)
110{
111	unsigned int i;
112
113	for (i = 0; i < nitems(xsd_errors); i++)
114		if (xsd_errors[i].errnum == error)
115			return (xsd_errors[i].errstring);
116
117	return (NULL);
118}
119
120static void
121xs_dev_return_error(struct xs_dev_data *u, int error, int req_id, int tx_id)
122{
123	struct xsd_sockmsg msg;
124	const char *payload;
125
126	msg.type = XS_ERROR;
127	msg.req_id = req_id;
128	msg.tx_id = tx_id;
129	payload = NULL;
130
131	payload = xs_dev_error_to_string(error);
132	if (payload == NULL)
133		payload = xs_dev_error_to_string(EINVAL);
134	KASSERT(payload != NULL, ("Unable to find string for EINVAL errno"));
135
136	msg.len = strlen(payload) + 1;
137
138	mtx_lock(&u->lock);
139	xs_queue_reply(u, (char *)&msg, sizeof(msg));
140	xs_queue_reply(u, payload, msg.len);
141	mtx_unlock(&u->lock);
142}
143
144static int
145xs_dev_watch_message_parse_string(const char **p, const char *end,
146    const char **string_r)
147{
148	const char *nul;
149
150	nul = memchr(*p, 0, end - *p);
151	if (!nul)
152		return (EINVAL);
153
154	*string_r = *p;
155	*p = nul+1;
156
157	return (0);
158}
159
160static int
161xs_dev_watch_message_parse(const struct xsd_sockmsg *msg, const char **path_r,
162    const char **token_r)
163{
164	const char *p, *end;
165	int error;
166
167	p = (const char *)msg + sizeof(*msg);
168	end = p + msg->len;
169	KASSERT(p <= end, ("payload overflow"));
170
171	error = xs_dev_watch_message_parse_string(&p, end, path_r);
172	if (error)
173		return (error);
174	error = xs_dev_watch_message_parse_string(&p, end, token_r);
175	if (error)
176		return (error);
177
178	return (0);
179}
180
181static struct xs_dev_watch *
182xs_dev_find_watch(struct xs_dev_data *u, const char *token)
183{
184	struct xs_dev_watch *watch;
185
186	LIST_FOREACH(watch, &u->watches, list)
187		if (strcmp(watch->token, token) == 0)
188			return (watch);
189
190	return (NULL);
191}
192
193static void
194xs_dev_watch_cb(struct xs_watch *watch, const char **vec, unsigned int len)
195{
196	struct xs_dev_watch *dwatch;
197	struct xsd_sockmsg msg;
198	char *payload;
199
200	dwatch = (struct xs_dev_watch *)watch->callback_data;
201	msg.type = XS_WATCH_EVENT;
202	msg.req_id = msg.tx_id = 0;
203	msg.len = strlen(vec[XS_WATCH_PATH]) + strlen(dwatch->token) + 2;
204
205	payload = malloc(msg.len, M_XENSTORE, M_WAITOK);
206	strcpy(payload, vec[XS_WATCH_PATH]);
207	strcpy(&payload[strlen(vec[XS_WATCH_PATH]) + 1], dwatch->token);
208	mtx_lock(&dwatch->user->lock);
209	xs_queue_reply(dwatch->user, (char *)&msg, sizeof(msg));
210	xs_queue_reply(dwatch->user, payload, msg.len);
211	mtx_unlock(&dwatch->user->lock);
212	free(payload, M_XENSTORE);
213}
214
215static struct xs_dev_transaction *
216xs_dev_find_transaction(struct xs_dev_data *u, uint32_t tx_id)
217{
218	struct xs_dev_transaction *trans;
219
220	LIST_FOREACH(trans, &u->transactions, list)
221		if (trans->handle.id == tx_id)
222			return (trans);
223
224	return (NULL);
225}
226
227static int
228xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
229{
230	int error;
231	struct xs_dev_data *u;
232
233	error = devfs_get_cdevpriv((void **)&u);
234	if (error != 0)
235		return (error);
236
237	while (u->read_prod == u->read_cons) {
238		error = tsleep(u, PCATCH, "xsdread", hz/10);
239		if (error && error != EWOULDBLOCK)
240			return (error);
241	}
242
243	while (uio->uio_resid > 0) {
244		if (u->read_cons == u->read_prod)
245			break;
246		error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
247		    1, uio);
248		if (error)
249			return (error);
250		u->read_cons++;
251	}
252	return (0);
253}
254
255static int
256xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
257{
258	int error;
259	const char *wpath, *wtoken;
260	struct xs_dev_data *u;
261	struct xs_dev_transaction *trans;
262	struct xs_dev_watch *watch;
263	void *reply;
264	static const char *ok = "OK";
265	int len = uio->uio_resid;
266
267	error = devfs_get_cdevpriv((void **)&u);
268	if (error != 0)
269		return (error);
270
271	if ((len + u->len) > sizeof(u->u.buffer))
272		return (EINVAL);
273
274	error = uiomove(u->u.buffer + u->len, len, uio);
275	if (error)
276		return (error);
277
278	u->len += len;
279	if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
280		return (0);
281
282	switch (u->u.msg.type) {
283	case XS_TRANSACTION_START:
284	case XS_TRANSACTION_END:
285	case XS_DIRECTORY:
286	case XS_READ:
287	case XS_GET_PERMS:
288	case XS_RELEASE:
289	case XS_GET_DOMAIN_PATH:
290	case XS_WRITE:
291	case XS_MKDIR:
292	case XS_RM:
293	case XS_SET_PERMS:
294		/* Check that this transaction id is not hijacked. */
295		if (u->u.msg.tx_id != 0 &&
296		    xs_dev_find_transaction(u, u->u.msg.tx_id) == NULL) {
297			error = EINVAL;
298			break;
299		}
300		error = xs_dev_request_and_reply(&u->u.msg, &reply);
301		if (!error) {
302			if (u->u.msg.type == XS_TRANSACTION_START) {
303				trans = malloc(sizeof(*trans), M_XENSTORE,
304				    M_WAITOK);
305				trans->handle.id = strtoul(reply, NULL, 0);
306				LIST_INSERT_HEAD(&u->transactions, trans, list);
307			} else if (u->u.msg.type == XS_TRANSACTION_END) {
308				trans = xs_dev_find_transaction(u,
309				    u->u.msg.tx_id);
310				KASSERT(trans != NULL,
311				    ("Unable to find transaction"));
312				LIST_REMOVE(trans, list);
313				free(trans, M_XENSTORE);
314			}
315			mtx_lock(&u->lock);
316			xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
317			xs_queue_reply(u, (char *)reply, u->u.msg.len);
318			mtx_unlock(&u->lock);
319			free(reply, M_XENSTORE);
320		}
321		break;
322	case XS_WATCH:
323		u->u.msg.tx_id = 0;
324		error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken);
325		if (error)
326			break;
327		if (xs_dev_find_watch(u, wtoken) != NULL) {
328			error = EINVAL;
329			break;
330		}
331
332		watch = malloc(sizeof(*watch), M_XENSTORE, M_WAITOK);
333		watch->watch.node = strdup(wpath, M_XENSTORE);
334		watch->watch.callback = xs_dev_watch_cb;
335		watch->watch.callback_data = (uintptr_t)watch;
336		watch->watch.max_pending = max_pending_watches;
337		watch->token = strdup(wtoken, M_XENSTORE);
338		watch->user = u;
339
340		error = xs_register_watch(&watch->watch);
341		if (error != 0) {
342			free(watch->token, M_XENSTORE);
343			free(watch->watch.node, M_XENSTORE);
344			free(watch, M_XENSTORE);
345			break;
346		}
347
348		LIST_INSERT_HEAD(&u->watches, watch, list);
349		u->u.msg.len = sizeof(ok);
350		mtx_lock(&u->lock);
351		xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
352		xs_queue_reply(u, ok, sizeof(ok));
353		mtx_unlock(&u->lock);
354		break;
355	case XS_UNWATCH:
356		u->u.msg.tx_id = 0;
357		error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken);
358		if (error)
359			break;
360		watch = xs_dev_find_watch(u, wtoken);
361		if (watch == NULL) {
362			error = EINVAL;
363			break;
364		}
365
366		LIST_REMOVE(watch, list);
367		xs_unregister_watch(&watch->watch);
368		free(watch->watch.node, M_XENSTORE);
369		free(watch->token, M_XENSTORE);
370		free(watch, M_XENSTORE);
371		u->u.msg.len = sizeof(ok);
372		mtx_lock(&u->lock);
373		xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
374		xs_queue_reply(u, ok, sizeof(ok));
375		mtx_unlock(&u->lock);
376		break;
377	default:
378		error = EINVAL;
379		break;
380	}
381
382	if (error != 0)
383		xs_dev_return_error(u, error, u->u.msg.req_id, u->u.msg.tx_id);
384
385	/* Reset the write buffer. */
386	u->len = 0;
387
388	return (0);
389}
390
391static int
392xs_dev_poll(struct cdev *dev, int events, struct thread *td)
393{
394	struct xs_dev_data *u;
395	int error, mask;
396
397	error = devfs_get_cdevpriv((void **)&u);
398	if (error != 0)
399		return (POLLERR);
400
401	/* we can always write */
402	mask = events & (POLLOUT | POLLWRNORM);
403
404	if (events & (POLLIN | POLLRDNORM)) {
405		if (u->read_cons != u->read_prod) {
406			mask |= events & (POLLIN | POLLRDNORM);
407		} else {
408			/* Record that someone is waiting */
409			selrecord(td, &u->ev_rsel);
410		}
411	}
412
413	return (mask);
414}
415
416static void
417xs_dev_dtor(void *arg)
418{
419	struct xs_dev_data *u = arg;
420	struct xs_dev_transaction *trans, *tmpt;
421	struct xs_dev_watch *watch, *tmpw;
422
423	seldrain(&u->ev_rsel);
424
425	LIST_FOREACH_SAFE(trans, &u->transactions, list, tmpt) {
426		xs_transaction_end(trans->handle, 1);
427		LIST_REMOVE(trans, list);
428		free(trans, M_XENSTORE);
429	}
430
431	LIST_FOREACH_SAFE(watch, &u->watches, list, tmpw) {
432		LIST_REMOVE(watch, list);
433		xs_unregister_watch(&watch->watch);
434		free(watch->watch.node, M_XENSTORE);
435		free(watch->token, M_XENSTORE);
436		free(watch, M_XENSTORE);
437	}
438	mtx_destroy(&u->lock);
439
440	free(u, M_XENSTORE);
441}
442
443static int
444xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
445{
446	struct xs_dev_data *u;
447	int error;
448
449	u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO);
450	mtx_init(&u->lock, "xsdev_lock", NULL, MTX_DEF);
451	LIST_INIT(&u->transactions);
452	LIST_INIT(&u->watches);
453	error = devfs_set_cdevpriv(u, xs_dev_dtor);
454	if (error != 0)
455		free(u, M_XENSTORE);
456
457	return (error);
458}
459
460static struct cdevsw xs_dev_cdevsw = {
461	.d_version = D_VERSION,
462	.d_read = xs_dev_read,
463	.d_write = xs_dev_write,
464	.d_open = xs_dev_open,
465	.d_poll = xs_dev_poll,
466	.d_name = "xs_dev",
467};
468
469/*------------------ Private Device Attachment Functions  --------------------*/
470/**
471 * \brief Identify instances of this device type in the system.
472 *
473 * \param driver  The driver performing this identify action.
474 * \param parent  The NewBus parent device for any devices this method adds.
475 */
476static void
477xs_dev_identify(driver_t *driver, device_t parent)
478{
479	/*
480	 * A single device instance for our driver is always present
481	 * in a system operating under Xen.
482	 */
483	BUS_ADD_CHILD(parent, 0, driver->name, 0);
484}
485
486/**
487 * \brief Probe for the existence of the Xenstore device
488 *
489 * \param dev  NewBus device_t for this instance.
490 *
491 * \return  Always returns 0 indicating success.
492 */
493static int
494xs_dev_probe(device_t dev)
495{
496
497	device_set_desc(dev, "Xenstore user-space device");
498	return (0);
499}
500
501/**
502 * \brief Attach the Xenstore device.
503 *
504 * \param dev  NewBus device_t for this instance.
505 *
506 * \return  On success, 0. Otherwise an errno value indicating the
507 *          type of failure.
508 */
509static int
510xs_dev_attach(device_t dev)
511{
512	struct cdev *xs_cdev;
513	struct sysctl_ctx_list *sysctl_ctx;
514	struct sysctl_oid *sysctl_tree;
515
516	sysctl_ctx = device_get_sysctl_ctx(dev);
517	sysctl_tree = device_get_sysctl_tree(dev);
518	if (sysctl_ctx == NULL || sysctl_tree == NULL)
519	    return (EINVAL);
520
521	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
522	    "max_pending_watch_events", CTLFLAG_RW, &max_pending_watches, 0,
523	    "maximum amount of pending watch events to be delivered");
524
525	xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL,
526	    UID_ROOT, GID_WHEEL, 0400, "xen/xenstore");
527	if (xs_cdev == NULL)
528		return (EINVAL);
529
530	return (0);
531}
532
533/*-------------------- Private Device Attachment Data  -----------------------*/
534static device_method_t xs_dev_methods[] = {
535	/* Device interface */
536	DEVMETHOD(device_identify,	xs_dev_identify),
537	DEVMETHOD(device_probe,         xs_dev_probe),
538	DEVMETHOD(device_attach,        xs_dev_attach),
539
540	DEVMETHOD_END
541};
542
543DEFINE_CLASS_0(xs_dev, xs_dev_driver, xs_dev_methods, 0);
544
545DRIVER_MODULE(xs_dev, xenstore, xs_dev_driver, NULL, NULL);
546