1/******************************************************************************
2 * gnttab.c
3 *
4 * Two sets of functionality:
5 * 1. Granting foreign access to our memory reservation.
6 * 2. Accessing others' memory reservations via grant references.
7 * (i.e., mechanisms for both sender and recipient of grant references)
8 *
9 * Copyright (c) 2005, Christopher Clark
10 * Copyright (c) 2004, K A Fraser
11 */
12
13#include <sys/param.h>
14#include <sys/systm.h>
15#include <sys/bus.h>
16#include <sys/conf.h>
17#include <sys/module.h>
18#include <sys/kernel.h>
19#include <sys/lock.h>
20#include <sys/malloc.h>
21#include <sys/mman.h>
22#include <sys/limits.h>
23#include <sys/rman.h>
24#include <machine/resource.h>
25#include <machine/cpu.h>
26
27#include <xen/xen-os.h>
28#include <xen/hypervisor.h>
29#include <xen/gnttab.h>
30
31#include <vm/vm.h>
32#include <vm/vm_kern.h>
33#include <vm/vm_extern.h>
34#include <vm/pmap.h>
35
36/* External tools reserve first few grant table entries. */
37#define NR_RESERVED_ENTRIES 8
38#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t))
39
40static grant_ref_t **gnttab_list;
41static unsigned int nr_grant_frames;
42static unsigned int boot_max_nr_grant_frames;
43static int gnttab_free_count;
44static grant_ref_t gnttab_free_head;
45static struct mtx gnttab_list_lock;
46
47/*
48 * Resource representing allocated physical address space
49 * for the grant table metainfo
50 */
51static struct resource *gnttab_pseudo_phys_res;
52
53/* Resource id for allocated physical address space. */
54static int gnttab_pseudo_phys_res_id;
55
56static grant_entry_v1_t *shared;
57
58static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
59
60static int gnttab_expand(unsigned int req_entries);
61
62#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
63#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
64
65static int
66get_free_entries(int count, int *entries)
67{
68	int ref, error;
69	grant_ref_t head;
70
71	mtx_lock(&gnttab_list_lock);
72	if ((gnttab_free_count < count) &&
73	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
74		mtx_unlock(&gnttab_list_lock);
75		return (error);
76	}
77	ref = head = gnttab_free_head;
78	gnttab_free_count -= count;
79	while (count-- > 1)
80		head = gnttab_entry(head);
81	gnttab_free_head = gnttab_entry(head);
82	gnttab_entry(head) = GNTTAB_LIST_END;
83	mtx_unlock(&gnttab_list_lock);
84
85	*entries = ref;
86	return (0);
87}
88
89static void
90do_free_callbacks(void)
91{
92	struct gnttab_free_callback *callback, *next;
93
94	callback = gnttab_free_callback_list;
95	gnttab_free_callback_list = NULL;
96
97	while (callback != NULL) {
98		next = callback->next;
99		if (gnttab_free_count >= callback->count) {
100			callback->next = NULL;
101			callback->fn(callback->arg);
102		} else {
103			callback->next = gnttab_free_callback_list;
104			gnttab_free_callback_list = callback;
105		}
106		callback = next;
107	}
108}
109
110static inline void
111check_free_callbacks(void)
112{
113	if (__predict_false(gnttab_free_callback_list != NULL))
114		do_free_callbacks();
115}
116
117static void
118put_free_entry(grant_ref_t ref)
119{
120
121	mtx_lock(&gnttab_list_lock);
122	gnttab_entry(ref) = gnttab_free_head;
123	gnttab_free_head = ref;
124	gnttab_free_count++;
125	check_free_callbacks();
126	mtx_unlock(&gnttab_list_lock);
127}
128
129/*
130 * Public grant-issuing interface functions
131 */
132
133int
134gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
135	grant_ref_t *result)
136{
137	int error, ref;
138
139	error = get_free_entries(1, &ref);
140
141	if (__predict_false(error))
142		return (error);
143
144	shared[ref].frame = frame;
145	shared[ref].domid = domid;
146	wmb();
147	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
148
149	if (result)
150		*result = ref;
151
152	return (0);
153}
154
155void
156gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
157				unsigned long frame, int readonly)
158{
159
160	shared[ref].frame = frame;
161	shared[ref].domid = domid;
162	wmb();
163	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
164}
165
166int
167gnttab_query_foreign_access(grant_ref_t ref)
168{
169	uint16_t nflags;
170
171	nflags = shared[ref].flags;
172
173	return (nflags & (GTF_reading|GTF_writing));
174}
175
176int
177gnttab_end_foreign_access_ref(grant_ref_t ref)
178{
179	uint16_t flags;
180
181	while (!((flags = atomic_load_16(&shared[ref].flags)) &
182	    (GTF_reading|GTF_writing)))
183		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
184			return (1);
185
186	printf("%s: WARNING: g.e. still in use!\n", __func__);
187	return (0);
188}
189
190void
191gnttab_end_foreign_access(grant_ref_t ref, void *page)
192{
193	if (gnttab_end_foreign_access_ref(ref)) {
194		put_free_entry(ref);
195		if (page != NULL) {
196			free(page, M_DEVBUF);
197		}
198	}
199	else {
200		/* XXX This needs to be fixed so that the ref and page are
201		   placed on a list to be freed up later. */
202		printf("%s: WARNING: leaking g.e. and page still in use!\n",
203		       __func__);
204	}
205}
206
207void
208gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
209{
210	grant_ref_t *last_ref;
211	grant_ref_t  head;
212	grant_ref_t  tail;
213
214	head = GNTTAB_LIST_END;
215	tail = *refs;
216	last_ref = refs + count;
217	while (refs != last_ref) {
218		if (gnttab_end_foreign_access_ref(*refs)) {
219			gnttab_entry(*refs) = head;
220			head = *refs;
221		} else {
222			/*
223			 * XXX This needs to be fixed so that the ref
224			 * is placed on a list to be freed up later.
225			 */
226			printf("%s: WARNING: leaking g.e. still in use!\n",
227			       __func__);
228			count--;
229		}
230		refs++;
231	}
232
233	if (count != 0) {
234		mtx_lock(&gnttab_list_lock);
235		gnttab_free_count += count;
236		gnttab_entry(tail) = gnttab_free_head;
237		gnttab_free_head = head;
238		check_free_callbacks();
239		mtx_unlock(&gnttab_list_lock);
240	}
241}
242
243int
244gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
245    grant_ref_t *result)
246{
247	int error, ref;
248
249	error = get_free_entries(1, &ref);
250	if (__predict_false(error))
251		return (error);
252
253	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
254
255	*result = ref;
256	return (0);
257}
258
259void
260gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
261	unsigned long pfn)
262{
263	shared[ref].frame = pfn;
264	shared[ref].domid = domid;
265	wmb();
266	shared[ref].flags = GTF_accept_transfer;
267}
268
269unsigned long
270gnttab_end_foreign_transfer_ref(grant_ref_t ref)
271{
272	unsigned long frame;
273	uint16_t      flags;
274
275	/*
276         * If a transfer is not even yet started, try to reclaim the grant
277         * reference and return failure (== 0).
278	 *
279	 * NOTE: This is a loop since the atomic cmpset can fail multiple
280	 * times.  In normal operation it will be rare to execute more than
281	 * twice.  Attempting an attack would consume a great deal of
282	 * attacker resources and be unlikely to prolong the loop very much.
283         */
284	while (!((flags = atomic_load_16(&shared[ref].flags)) &
285	    GTF_transfer_committed))
286		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
287			return (0);
288
289	/* If a transfer is in progress then wait until it is completed. */
290	while (!(flags & GTF_transfer_completed)) {
291		cpu_spinwait();
292		flags = atomic_load_16(&shared[ref].flags);
293	}
294
295	/* Read the frame number /after/ reading completion status. */
296	rmb();
297	frame = shared[ref].frame;
298	KASSERT(frame != 0, ("grant table inconsistent"));
299
300	return (frame);
301}
302
303unsigned long
304gnttab_end_foreign_transfer(grant_ref_t ref)
305{
306	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
307
308	put_free_entry(ref);
309	return (frame);
310}
311
312void
313gnttab_free_grant_reference(grant_ref_t ref)
314{
315
316	put_free_entry(ref);
317}
318
319void
320gnttab_free_grant_references(grant_ref_t head)
321{
322	grant_ref_t ref;
323	int count = 1;
324
325	if (head == GNTTAB_LIST_END)
326		return;
327
328	ref = head;
329	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
330		ref = gnttab_entry(ref);
331		count++;
332	}
333	mtx_lock(&gnttab_list_lock);
334	gnttab_entry(ref) = gnttab_free_head;
335	gnttab_free_head = head;
336	gnttab_free_count += count;
337	check_free_callbacks();
338	mtx_unlock(&gnttab_list_lock);
339}
340
341int
342gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
343{
344	int ref, error;
345
346	error = get_free_entries(count, &ref);
347	if (__predict_false(error))
348		return (error);
349
350	*head = ref;
351	return (0);
352}
353
354int
355gnttab_empty_grant_references(const grant_ref_t *private_head)
356{
357
358	return (*private_head == GNTTAB_LIST_END);
359}
360
361int
362gnttab_claim_grant_reference(grant_ref_t *private_head)
363{
364	grant_ref_t g = *private_head;
365
366	if (__predict_false(g == GNTTAB_LIST_END))
367		return (g);
368	*private_head = gnttab_entry(g);
369	return (g);
370}
371
372void
373gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
374{
375
376	gnttab_entry(release) = *private_head;
377	*private_head = release;
378}
379
380void
381gnttab_request_free_callback(struct gnttab_free_callback *callback,
382    void (*fn)(void *), void *arg, uint16_t count)
383{
384
385	mtx_lock(&gnttab_list_lock);
386	if (callback->next)
387		goto out;
388	callback->fn = fn;
389	callback->arg = arg;
390	callback->count = count;
391	callback->next = gnttab_free_callback_list;
392	gnttab_free_callback_list = callback;
393	check_free_callbacks();
394 out:
395	mtx_unlock(&gnttab_list_lock);
396
397}
398
399void
400gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
401{
402	struct gnttab_free_callback **pcb;
403
404	mtx_lock(&gnttab_list_lock);
405	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
406		if (*pcb == callback) {
407			*pcb = callback->next;
408			break;
409		}
410	}
411	mtx_unlock(&gnttab_list_lock);
412}
413
414static int
415grow_gnttab_list(unsigned int more_frames)
416{
417	unsigned int new_nr_grant_frames, extra_entries, i;
418
419	new_nr_grant_frames = nr_grant_frames + more_frames;
420	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
421
422	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
423	{
424		gnttab_list[i] = (grant_ref_t *)
425			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
426
427		if (!gnttab_list[i])
428			goto grow_nomem;
429	}
430
431	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
432	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
433		gnttab_entry(i) = i + 1;
434
435	gnttab_entry(i) = gnttab_free_head;
436	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
437	gnttab_free_count += extra_entries;
438
439	nr_grant_frames = new_nr_grant_frames;
440
441	check_free_callbacks();
442
443	return (0);
444
445grow_nomem:
446	for ( ; i >= nr_grant_frames; i--)
447		free(gnttab_list[i], M_DEVBUF);
448	return (ENOMEM);
449}
450
451static unsigned int
452__max_nr_grant_frames(void)
453{
454	struct gnttab_query_size query;
455	int rc;
456
457	query.dom = DOMID_SELF;
458
459	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
460	if ((rc < 0) || (query.status != GNTST_okay))
461		return (4); /* Legacy max supported number of frames */
462
463	return (query.max_nr_frames);
464}
465
466static inline
467unsigned int max_nr_grant_frames(void)
468{
469
470	return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames));
471}
472
473#ifdef notyet
474/*
475 * XXX needed for backend support
476 *
477 */
478static int
479map_pte_fn(pte_t *pte, struct page *pmd_page,
480		      unsigned long addr, void *data)
481{
482	unsigned long **frames = (unsigned long **)data;
483
484	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
485	(*frames)++;
486	return 0;
487}
488
489static int
490unmap_pte_fn(pte_t *pte, struct page *pmd_page,
491			unsigned long addr, void *data)
492{
493
494	set_pte_at(&init_mm, addr, pte, __pte(0));
495	return 0;
496}
497#endif
498
499static vm_paddr_t resume_frames;
500
501static void
502gnttab_map(unsigned int start_idx, unsigned int end_idx)
503{
504	struct xen_add_to_physmap xatp;
505	unsigned int i = end_idx;
506
507	/*
508	 * Loop backwards, so that the first hypercall has the largest index,
509	 * ensuring that the table will grow only once.
510	 */
511	do {
512		xatp.domid = DOMID_SELF;
513		xatp.idx = i;
514		xatp.space = XENMAPSPACE_grant_table;
515		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
516		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
517			panic("HYPERVISOR_memory_op failed to map gnttab");
518	} while (i-- > start_idx);
519}
520
521int
522gnttab_resume(device_t dev)
523{
524	unsigned int max_nr_gframes, nr_gframes;
525
526	nr_gframes = nr_grant_frames;
527	max_nr_gframes = max_nr_grant_frames();
528	if (max_nr_gframes < nr_gframes)
529		return (ENOSYS);
530
531	if (!resume_frames) {
532		KASSERT(dev != NULL,
533		    ("No resume frames and no device provided"));
534
535		gnttab_pseudo_phys_res = xenmem_alloc(dev,
536		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
537		if (gnttab_pseudo_phys_res == NULL)
538			panic("Unable to reserve physical memory for gnttab");
539		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
540		shared = rman_get_virtual(gnttab_pseudo_phys_res);
541	}
542	gnttab_map(0, nr_gframes - 1);
543
544	return (0);
545}
546
547static int
548gnttab_expand(unsigned int req_entries)
549{
550	unsigned int cur, extra;
551
552	cur = nr_grant_frames;
553	extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
554	if (cur + extra > max_nr_grant_frames())
555		return (ENOSPC);
556
557	gnttab_map(cur, cur + extra - 1);
558
559	return (grow_gnttab_list(extra));
560}
561
562MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
563
564/*------------------ Private Device Attachment Functions  --------------------*/
565/**
566 * \brief Identify instances of this device type in the system.
567 *
568 * \param driver  The driver performing this identify action.
569 * \param parent  The NewBus parent device for any devices this method adds.
570 */
571static void
572granttable_identify(driver_t *driver, device_t parent)
573{
574
575	KASSERT(xen_domain(),
576	    ("Trying to attach grant-table device on non Xen domain"));
577	/*
578	 * A single device instance for our driver is always present
579	 * in a system operating under Xen.
580	 */
581	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
582		panic("unable to attach Xen Grant-table device");
583}
584
585/**
586 * \brief Probe for the existence of the Xen Grant-table device
587 *
588 * \param dev  NewBus device_t for this instance.
589 *
590 * \return  Always returns 0 indicating success.
591 */
592static int
593granttable_probe(device_t dev)
594{
595
596	device_set_desc(dev, "Xen Grant-table Device");
597	return (BUS_PROBE_NOWILDCARD);
598}
599
600/**
601 * \brief Attach the Xen Grant-table device.
602 *
603 * \param dev  NewBus device_t for this instance.
604 *
605 * \return  On success, 0. Otherwise an errno value indicating the
606 *          type of failure.
607 */
608static int
609granttable_attach(device_t dev)
610{
611	int i;
612	unsigned int nr_init_grefs;
613
614	nr_grant_frames = 1;
615	boot_max_nr_grant_frames = __max_nr_grant_frames();
616
617	gnttab_list = malloc(boot_max_nr_grant_frames * sizeof(grant_ref_t *),
618	    M_DEVBUF, M_NOWAIT);
619
620	if (gnttab_list == NULL)
621		return (ENOMEM);
622
623	for (i = 0; i < nr_grant_frames; i++) {
624		gnttab_list[i] = (grant_ref_t *)
625			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
626		if (gnttab_list[i] == NULL)
627			goto ini_nomem;
628	}
629
630	if (gnttab_resume(dev))
631		return (ENODEV);
632
633	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
634
635	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
636		gnttab_entry(i) = i + 1;
637
638	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
639	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
640	gnttab_free_head  = NR_RESERVED_ENTRIES;
641
642	if (bootverbose)
643		printf("Grant table initialized\n");
644
645	return (0);
646
647ini_nomem:
648	for (i--; i >= 0; i--)
649		free(gnttab_list[i], M_DEVBUF);
650	free(gnttab_list, M_DEVBUF);
651	return (ENOMEM);
652}
653
654/*-------------------- Private Device Attachment Data  -----------------------*/
655static device_method_t granttable_methods[] = {
656	/* Device interface */
657	DEVMETHOD(device_identify,	granttable_identify),
658	DEVMETHOD(device_probe,         granttable_probe),
659	DEVMETHOD(device_attach,        granttable_attach),
660
661	DEVMETHOD_END
662};
663
664DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
665
666DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL,
667    SI_ORDER_FIRST);
668