1/*
2 * xenstore_dev.c
3 *
4 * Driver giving user-space access to the kernel's connection to the
5 * XenStore service.
6 *
7 * Copyright (c) 2005, Christian Limpach
8 * Copyright (c) 2005, Rusty Russell, IBM Corporation
9 *
10 * This file may be distributed separately from the Linux kernel, or
11 * incorporated into other software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/types.h>
36#include <sys/cdefs.h>
37#include <sys/errno.h>
38#include <sys/uio.h>
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/proc.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/conf.h>
45#include <sys/module.h>
46#include <sys/selinfo.h>
47#include <sys/sysctl.h>
48#include <sys/poll.h>
49
50#include <xen/xen-os.h>
51
52#include <xen/hypervisor.h>
53#include <xen/xenstore/xenstorevar.h>
54#include <xen/xenstore/xenstore_internal.h>
55
56static unsigned int max_pending_watches = 1000;
57
58struct xs_dev_transaction {
59	LIST_ENTRY(xs_dev_transaction) list;
60	struct xs_transaction handle;
61};
62
63struct xs_dev_watch {
64	LIST_ENTRY(xs_dev_watch) list;
65	struct xs_watch watch;
66	char *token;
67	struct xs_dev_data *user;
68};
69
70struct xs_dev_data {
71	/* In-progress transaction. */
72	LIST_HEAD(, xs_dev_transaction) transactions;
73
74	/* Active watches. */
75	LIST_HEAD(, xs_dev_watch) watches;
76
77	/* Partial request. */
78	unsigned int len;
79	union {
80		struct xsd_sockmsg msg;
81		char buffer[PAGE_SIZE];
82	} u;
83
84	/* Response queue. */
85#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
86	char read_buffer[PAGE_SIZE];
87	unsigned int read_cons, read_prod;
88
89	/* Serializes writes to the read buffer. */
90	struct mtx lock;
91
92	/* Polling structure (for reads only ATM). */
93	struct selinfo ev_rsel;
94};
95
96static void
97xs_queue_reply(struct xs_dev_data *u, const char *data, unsigned int len)
98{
99	unsigned int i;
100
101	for (i = 0; i < len; i++, u->read_prod++)
102		u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
103
104	KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer),
105	    ("xenstore reply too big"));
106
107	wakeup(u);
108	selwakeup(&u->ev_rsel);
109}
110
111static const char *
112xs_dev_error_to_string(int error)
113{
114	unsigned int i;
115
116	for (i = 0; i < nitems(xsd_errors); i++)
117		if (xsd_errors[i].errnum == error)
118			return (xsd_errors[i].errstring);
119
120	return (NULL);
121}
122
123static void
124xs_dev_return_error(struct xs_dev_data *u, int error, int req_id, int tx_id)
125{
126	struct xsd_sockmsg msg;
127	const char *payload;
128
129	msg.type = XS_ERROR;
130	msg.req_id = req_id;
131	msg.tx_id = tx_id;
132	payload = NULL;
133
134	payload = xs_dev_error_to_string(error);
135	if (payload == NULL)
136		payload = xs_dev_error_to_string(EINVAL);
137	KASSERT(payload != NULL, ("Unable to find string for EINVAL errno"));
138
139	msg.len = strlen(payload) + 1;
140
141	mtx_lock(&u->lock);
142	xs_queue_reply(u, (char *)&msg, sizeof(msg));
143	xs_queue_reply(u, payload, msg.len);
144	mtx_unlock(&u->lock);
145}
146
147static int
148xs_dev_watch_message_parse_string(const char **p, const char *end,
149    const char **string_r)
150{
151	const char *nul;
152
153	nul = memchr(*p, 0, end - *p);
154	if (!nul)
155		return (EINVAL);
156
157	*string_r = *p;
158	*p = nul+1;
159
160	return (0);
161}
162
163static int
164xs_dev_watch_message_parse(const struct xsd_sockmsg *msg, const char **path_r,
165    const char **token_r)
166{
167	const char *p, *end;
168	int error;
169
170	p = (const char *)msg + sizeof(*msg);
171	end = p + msg->len;
172	KASSERT(p <= end, ("payload overflow"));
173
174	error = xs_dev_watch_message_parse_string(&p, end, path_r);
175	if (error)
176		return (error);
177	error = xs_dev_watch_message_parse_string(&p, end, token_r);
178	if (error)
179		return (error);
180
181	return (0);
182}
183
184static struct xs_dev_watch *
185xs_dev_find_watch(struct xs_dev_data *u, const char *token)
186{
187	struct xs_dev_watch *watch;
188
189	LIST_FOREACH(watch, &u->watches, list)
190		if (strcmp(watch->token, token) == 0)
191			return (watch);
192
193	return (NULL);
194}
195
196static void
197xs_dev_watch_cb(struct xs_watch *watch, const char **vec, unsigned int len)
198{
199	struct xs_dev_watch *dwatch;
200	struct xsd_sockmsg msg;
201	char *payload;
202
203	dwatch = (struct xs_dev_watch *)watch->callback_data;
204	msg.type = XS_WATCH_EVENT;
205	msg.req_id = msg.tx_id = 0;
206	msg.len = strlen(vec[XS_WATCH_PATH]) + strlen(dwatch->token) + 2;
207
208	payload = malloc(msg.len, M_XENSTORE, M_WAITOK);
209	strcpy(payload, vec[XS_WATCH_PATH]);
210	strcpy(&payload[strlen(vec[XS_WATCH_PATH]) + 1], dwatch->token);
211	mtx_lock(&dwatch->user->lock);
212	xs_queue_reply(dwatch->user, (char *)&msg, sizeof(msg));
213	xs_queue_reply(dwatch->user, payload, msg.len);
214	mtx_unlock(&dwatch->user->lock);
215	free(payload, M_XENSTORE);
216}
217
218static struct xs_dev_transaction *
219xs_dev_find_transaction(struct xs_dev_data *u, uint32_t tx_id)
220{
221	struct xs_dev_transaction *trans;
222
223	LIST_FOREACH(trans, &u->transactions, list)
224		if (trans->handle.id == tx_id)
225			return (trans);
226
227	return (NULL);
228}
229
230static int
231xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
232{
233	int error;
234	struct xs_dev_data *u;
235
236	error = devfs_get_cdevpriv((void **)&u);
237	if (error != 0)
238		return (error);
239
240	while (u->read_prod == u->read_cons) {
241		error = tsleep(u, PCATCH, "xsdread", hz/10);
242		if (error && error != EWOULDBLOCK)
243			return (error);
244	}
245
246	while (uio->uio_resid > 0) {
247		if (u->read_cons == u->read_prod)
248			break;
249		error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
250		    1, uio);
251		if (error)
252			return (error);
253		u->read_cons++;
254	}
255	return (0);
256}
257
258static int
259xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
260{
261	int error;
262	const char *wpath, *wtoken;
263	struct xs_dev_data *u;
264	struct xs_dev_transaction *trans;
265	struct xs_dev_watch *watch;
266	void *reply;
267	static const char *ok = "OK";
268	int len = uio->uio_resid;
269
270	error = devfs_get_cdevpriv((void **)&u);
271	if (error != 0)
272		return (error);
273
274	if ((len + u->len) > sizeof(u->u.buffer))
275		return (EINVAL);
276
277	error = uiomove(u->u.buffer + u->len, len, uio);
278	if (error)
279		return (error);
280
281	u->len += len;
282	if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
283		return (0);
284
285	switch (u->u.msg.type) {
286	case XS_TRANSACTION_START:
287	case XS_TRANSACTION_END:
288	case XS_DIRECTORY:
289	case XS_READ:
290	case XS_GET_PERMS:
291	case XS_RELEASE:
292	case XS_GET_DOMAIN_PATH:
293	case XS_WRITE:
294	case XS_MKDIR:
295	case XS_RM:
296	case XS_SET_PERMS:
297		/* Check that this transaction id is not hijacked. */
298		if (u->u.msg.tx_id != 0 &&
299		    xs_dev_find_transaction(u, u->u.msg.tx_id) == NULL) {
300			error = EINVAL;
301			break;
302		}
303		error = xs_dev_request_and_reply(&u->u.msg, &reply);
304		if (!error) {
305			if (u->u.msg.type == XS_TRANSACTION_START) {
306				trans = malloc(sizeof(*trans), M_XENSTORE,
307				    M_WAITOK);
308				trans->handle.id = strtoul(reply, NULL, 0);
309				LIST_INSERT_HEAD(&u->transactions, trans, list);
310			} else if (u->u.msg.type == XS_TRANSACTION_END) {
311				trans = xs_dev_find_transaction(u,
312				    u->u.msg.tx_id);
313				KASSERT(trans != NULL,
314				    ("Unable to find transaction"));
315				LIST_REMOVE(trans, list);
316				free(trans, M_XENSTORE);
317			}
318			mtx_lock(&u->lock);
319			xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
320			xs_queue_reply(u, (char *)reply, u->u.msg.len);
321			mtx_unlock(&u->lock);
322			free(reply, M_XENSTORE);
323		}
324		break;
325	case XS_WATCH:
326		u->u.msg.tx_id = 0;
327		error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken);
328		if (error)
329			break;
330		if (xs_dev_find_watch(u, wtoken) != NULL) {
331			error = EINVAL;
332			break;
333		}
334
335		watch = malloc(sizeof(*watch), M_XENSTORE, M_WAITOK);
336		watch->watch.node = strdup(wpath, M_XENSTORE);
337		watch->watch.callback = xs_dev_watch_cb;
338		watch->watch.callback_data = (uintptr_t)watch;
339		watch->watch.max_pending = max_pending_watches;
340		watch->token = strdup(wtoken, M_XENSTORE);
341		watch->user = u;
342
343		error = xs_register_watch(&watch->watch);
344		if (error != 0) {
345			free(watch->token, M_XENSTORE);
346			free(watch->watch.node, M_XENSTORE);
347			free(watch, M_XENSTORE);
348			break;
349		}
350
351		LIST_INSERT_HEAD(&u->watches, watch, list);
352		u->u.msg.len = sizeof(ok);
353		mtx_lock(&u->lock);
354		xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
355		xs_queue_reply(u, ok, sizeof(ok));
356		mtx_unlock(&u->lock);
357		break;
358	case XS_UNWATCH:
359		u->u.msg.tx_id = 0;
360		error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken);
361		if (error)
362			break;
363		watch = xs_dev_find_watch(u, wtoken);
364		if (watch == NULL) {
365			error = EINVAL;
366			break;
367		}
368
369		LIST_REMOVE(watch, list);
370		xs_unregister_watch(&watch->watch);
371		free(watch->watch.node, M_XENSTORE);
372		free(watch->token, M_XENSTORE);
373		free(watch, M_XENSTORE);
374		u->u.msg.len = sizeof(ok);
375		mtx_lock(&u->lock);
376		xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
377		xs_queue_reply(u, ok, sizeof(ok));
378		mtx_unlock(&u->lock);
379		break;
380	default:
381		error = EINVAL;
382		break;
383	}
384
385	if (error != 0)
386		xs_dev_return_error(u, error, u->u.msg.req_id, u->u.msg.tx_id);
387
388	/* Reset the write buffer. */
389	u->len = 0;
390
391	return (0);
392}
393
394static int
395xs_dev_poll(struct cdev *dev, int events, struct thread *td)
396{
397	struct xs_dev_data *u;
398	int error, mask;
399
400	error = devfs_get_cdevpriv((void **)&u);
401	if (error != 0)
402		return (POLLERR);
403
404	/* we can always write */
405	mask = events & (POLLOUT | POLLWRNORM);
406
407	if (events & (POLLIN | POLLRDNORM)) {
408		if (u->read_cons != u->read_prod) {
409			mask |= events & (POLLIN | POLLRDNORM);
410		} else {
411			/* Record that someone is waiting */
412			selrecord(td, &u->ev_rsel);
413		}
414	}
415
416	return (mask);
417}
418
419static void
420xs_dev_dtor(void *arg)
421{
422	struct xs_dev_data *u = arg;
423	struct xs_dev_transaction *trans, *tmpt;
424	struct xs_dev_watch *watch, *tmpw;
425
426	seldrain(&u->ev_rsel);
427
428	LIST_FOREACH_SAFE(trans, &u->transactions, list, tmpt) {
429		xs_transaction_end(trans->handle, 1);
430		LIST_REMOVE(trans, list);
431		free(trans, M_XENSTORE);
432	}
433
434	LIST_FOREACH_SAFE(watch, &u->watches, list, tmpw) {
435		LIST_REMOVE(watch, list);
436		xs_unregister_watch(&watch->watch);
437		free(watch->watch.node, M_XENSTORE);
438		free(watch->token, M_XENSTORE);
439		free(watch, M_XENSTORE);
440	}
441	mtx_destroy(&u->lock);
442
443	free(u, M_XENSTORE);
444}
445
446static int
447xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
448{
449	struct xs_dev_data *u;
450	int error;
451
452	u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO);
453	mtx_init(&u->lock, "xsdev_lock", NULL, MTX_DEF);
454	LIST_INIT(&u->transactions);
455	LIST_INIT(&u->watches);
456	error = devfs_set_cdevpriv(u, xs_dev_dtor);
457	if (error != 0)
458		free(u, M_XENSTORE);
459
460	return (error);
461}
462
463static struct cdevsw xs_dev_cdevsw = {
464	.d_version = D_VERSION,
465	.d_read = xs_dev_read,
466	.d_write = xs_dev_write,
467	.d_open = xs_dev_open,
468	.d_poll = xs_dev_poll,
469	.d_name = "xs_dev",
470};
471
472/*------------------ Private Device Attachment Functions  --------------------*/
473/**
474 * \brief Identify instances of this device type in the system.
475 *
476 * \param driver  The driver performing this identify action.
477 * \param parent  The NewBus parent device for any devices this method adds.
478 */
479static void
480xs_dev_identify(driver_t *driver __unused, device_t parent)
481{
482	/*
483	 * A single device instance for our driver is always present
484	 * in a system operating under Xen.
485	 */
486	BUS_ADD_CHILD(parent, 0, driver->name, 0);
487}
488
489/**
490 * \brief Probe for the existence of the Xenstore device
491 *
492 * \param dev  NewBus device_t for this instance.
493 *
494 * \return  Always returns 0 indicating success.
495 */
496static int
497xs_dev_probe(device_t dev)
498{
499
500	device_set_desc(dev, "Xenstore user-space device");
501	return (0);
502}
503
504/**
505 * \brief Attach the Xenstore device.
506 *
507 * \param dev  NewBus device_t for this instance.
508 *
509 * \return  On success, 0. Otherwise an errno value indicating the
510 *          type of failure.
511 */
512static int
513xs_dev_attach(device_t dev)
514{
515	struct cdev *xs_cdev;
516	struct sysctl_ctx_list *sysctl_ctx;
517	struct sysctl_oid *sysctl_tree;
518
519	sysctl_ctx = device_get_sysctl_ctx(dev);
520	sysctl_tree = device_get_sysctl_tree(dev);
521	if (sysctl_ctx == NULL || sysctl_tree == NULL)
522	    return (EINVAL);
523
524	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
525	    "max_pending_watch_events", CTLFLAG_RW, &max_pending_watches, 0,
526	    "maximum amount of pending watch events to be delivered");
527
528	xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL,
529	    UID_ROOT, GID_WHEEL, 0400, "xen/xenstore");
530	if (xs_cdev == NULL)
531		return (EINVAL);
532
533	return (0);
534}
535
536/*-------------------- Private Device Attachment Data  -----------------------*/
537static device_method_t xs_dev_methods[] = {
538	/* Device interface */
539	DEVMETHOD(device_identify,	xs_dev_identify),
540	DEVMETHOD(device_probe,         xs_dev_probe),
541	DEVMETHOD(device_attach,        xs_dev_attach),
542
543	DEVMETHOD_END
544};
545
546DEFINE_CLASS_0(xs_dev, xs_dev_driver, xs_dev_methods, 0);
547devclass_t xs_dev_devclass;
548
549DRIVER_MODULE(xs_dev, xenstore, xs_dev_driver, xs_dev_devclass,
550    NULL, NULL);
551