1/****************************************************************************** 2 * gnttab.c 3 * 4 * Two sets of functionality: 5 * 1. Granting foreign access to our memory reservation. 6 * 2. Accessing others' memory reservations via grant references. 7 * (i.e., mechanisms for both sender and recipient of grant references) 8 * 9 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2004, K A Fraser 11 */ 12 13#include <sys/param.h> 14#include <sys/systm.h> 15#include <sys/bus.h> 16#include <sys/conf.h> 17#include <sys/module.h> 18#include <sys/kernel.h> 19#include <sys/lock.h> 20#include <sys/malloc.h> 21#include <sys/mman.h> 22#include <sys/limits.h> 23#include <sys/rman.h> 24#include <machine/resource.h> 25#include <machine/cpu.h> 26 27#include <xen/xen-os.h> 28#include <xen/hypervisor.h> 29#include <xen/gnttab.h> 30 31#include <vm/vm.h> 32#include <vm/vm_kern.h> 33#include <vm/vm_extern.h> 34#include <vm/pmap.h> 35 36/* External tools reserve first few grant table entries. */ 37#define NR_RESERVED_ENTRIES 8 38#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) 39 40static grant_ref_t **gnttab_list; 41static unsigned int nr_grant_frames; 42static unsigned int boot_max_nr_grant_frames; 43static int gnttab_free_count; 44static grant_ref_t gnttab_free_head; 45static struct mtx gnttab_list_lock; 46 47/* 48 * Resource representing allocated physical address space 49 * for the grant table metainfo 50 */ 51static struct resource *gnttab_pseudo_phys_res; 52 53/* Resource id for allocated physical address space. */ 54static int gnttab_pseudo_phys_res_id; 55 56static grant_entry_v1_t *shared; 57 58static struct gnttab_free_callback *gnttab_free_callback_list = NULL; 59 60static int gnttab_expand(unsigned int req_entries); 61 62#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 63#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) 64 65static int 66get_free_entries(int count, int *entries) 67{ 68 int ref, error; 69 grant_ref_t head; 70 71 mtx_lock(&gnttab_list_lock); 72 if ((gnttab_free_count < count) && 73 ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { 74 mtx_unlock(&gnttab_list_lock); 75 return (error); 76 } 77 ref = head = gnttab_free_head; 78 gnttab_free_count -= count; 79 while (count-- > 1) 80 head = gnttab_entry(head); 81 gnttab_free_head = gnttab_entry(head); 82 gnttab_entry(head) = GNTTAB_LIST_END; 83 mtx_unlock(&gnttab_list_lock); 84 85 *entries = ref; 86 return (0); 87} 88 89static void 90do_free_callbacks(void) 91{ 92 struct gnttab_free_callback *callback, *next; 93 94 callback = gnttab_free_callback_list; 95 gnttab_free_callback_list = NULL; 96 97 while (callback != NULL) { 98 next = callback->next; 99 if (gnttab_free_count >= callback->count) { 100 callback->next = NULL; 101 callback->fn(callback->arg); 102 } else { 103 callback->next = gnttab_free_callback_list; 104 gnttab_free_callback_list = callback; 105 } 106 callback = next; 107 } 108} 109 110static inline void 111check_free_callbacks(void) 112{ 113 if (__predict_false(gnttab_free_callback_list != NULL)) 114 do_free_callbacks(); 115} 116 117static void 118put_free_entry(grant_ref_t ref) 119{ 120 121 mtx_lock(&gnttab_list_lock); 122 gnttab_entry(ref) = gnttab_free_head; 123 gnttab_free_head = ref; 124 gnttab_free_count++; 125 check_free_callbacks(); 126 mtx_unlock(&gnttab_list_lock); 127} 128 129/* 130 * Public grant-issuing interface functions 131 */ 132 133int 134gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, 135 grant_ref_t *result) 136{ 137 int error, ref; 138 139 error = get_free_entries(1, &ref); 140 141 if (__predict_false(error)) 142 return (error); 143 144 shared[ref].frame = frame; 145 shared[ref].domid = domid; 146 wmb(); 147 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 148 149 if (result) 150 *result = ref; 151 152 return (0); 153} 154 155void 156gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 157 unsigned long frame, int readonly) 158{ 159 160 shared[ref].frame = frame; 161 shared[ref].domid = domid; 162 wmb(); 163 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 164} 165 166int 167gnttab_query_foreign_access(grant_ref_t ref) 168{ 169 uint16_t nflags; 170 171 nflags = shared[ref].flags; 172 173 return (nflags & (GTF_reading|GTF_writing)); 174} 175 176int 177gnttab_end_foreign_access_ref(grant_ref_t ref) 178{ 179 uint16_t flags; 180 181 while (!((flags = atomic_load_16(&shared[ref].flags)) & 182 (GTF_reading|GTF_writing))) 183 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 184 return (1); 185 186 printf("%s: WARNING: g.e. still in use!\n", __func__); 187 return (0); 188} 189 190void 191gnttab_end_foreign_access(grant_ref_t ref, void *page) 192{ 193 if (gnttab_end_foreign_access_ref(ref)) { 194 put_free_entry(ref); 195 if (page != NULL) { 196 free(page, M_DEVBUF); 197 } 198 } 199 else { 200 /* XXX This needs to be fixed so that the ref and page are 201 placed on a list to be freed up later. */ 202 printf("%s: WARNING: leaking g.e. and page still in use!\n", 203 __func__); 204 } 205} 206 207void 208gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) 209{ 210 grant_ref_t *last_ref; 211 grant_ref_t head; 212 grant_ref_t tail; 213 214 head = GNTTAB_LIST_END; 215 tail = *refs; 216 last_ref = refs + count; 217 while (refs != last_ref) { 218 if (gnttab_end_foreign_access_ref(*refs)) { 219 gnttab_entry(*refs) = head; 220 head = *refs; 221 } else { 222 /* 223 * XXX This needs to be fixed so that the ref 224 * is placed on a list to be freed up later. 225 */ 226 printf("%s: WARNING: leaking g.e. still in use!\n", 227 __func__); 228 count--; 229 } 230 refs++; 231 } 232 233 if (count != 0) { 234 mtx_lock(&gnttab_list_lock); 235 gnttab_free_count += count; 236 gnttab_entry(tail) = gnttab_free_head; 237 gnttab_free_head = head; 238 check_free_callbacks(); 239 mtx_unlock(&gnttab_list_lock); 240 } 241} 242 243int 244gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, 245 grant_ref_t *result) 246{ 247 int error, ref; 248 249 error = get_free_entries(1, &ref); 250 if (__predict_false(error)) 251 return (error); 252 253 gnttab_grant_foreign_transfer_ref(ref, domid, pfn); 254 255 *result = ref; 256 return (0); 257} 258 259void 260gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 261 unsigned long pfn) 262{ 263 shared[ref].frame = pfn; 264 shared[ref].domid = domid; 265 wmb(); 266 shared[ref].flags = GTF_accept_transfer; 267} 268 269unsigned long 270gnttab_end_foreign_transfer_ref(grant_ref_t ref) 271{ 272 unsigned long frame; 273 uint16_t flags; 274 275 /* 276 * If a transfer is not even yet started, try to reclaim the grant 277 * reference and return failure (== 0). 278 * 279 * NOTE: This is a loop since the atomic cmpset can fail multiple 280 * times. In normal operation it will be rare to execute more than 281 * twice. Attempting an attack would consume a great deal of 282 * attacker resources and be unlikely to prolong the loop very much. 283 */ 284 while (!((flags = atomic_load_16(&shared[ref].flags)) & 285 GTF_transfer_committed)) 286 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 287 return (0); 288 289 /* If a transfer is in progress then wait until it is completed. */ 290 while (!(flags & GTF_transfer_completed)) { 291 cpu_spinwait(); 292 flags = atomic_load_16(&shared[ref].flags); 293 } 294 295 /* Read the frame number /after/ reading completion status. */ 296 rmb(); 297 frame = shared[ref].frame; 298 KASSERT(frame != 0, ("grant table inconsistent")); 299 300 return (frame); 301} 302 303unsigned long 304gnttab_end_foreign_transfer(grant_ref_t ref) 305{ 306 unsigned long frame = gnttab_end_foreign_transfer_ref(ref); 307 308 put_free_entry(ref); 309 return (frame); 310} 311 312void 313gnttab_free_grant_reference(grant_ref_t ref) 314{ 315 316 put_free_entry(ref); 317} 318 319void 320gnttab_free_grant_references(grant_ref_t head) 321{ 322 grant_ref_t ref; 323 int count = 1; 324 325 if (head == GNTTAB_LIST_END) 326 return; 327 328 ref = head; 329 while (gnttab_entry(ref) != GNTTAB_LIST_END) { 330 ref = gnttab_entry(ref); 331 count++; 332 } 333 mtx_lock(&gnttab_list_lock); 334 gnttab_entry(ref) = gnttab_free_head; 335 gnttab_free_head = head; 336 gnttab_free_count += count; 337 check_free_callbacks(); 338 mtx_unlock(&gnttab_list_lock); 339} 340 341int 342gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) 343{ 344 int ref, error; 345 346 error = get_free_entries(count, &ref); 347 if (__predict_false(error)) 348 return (error); 349 350 *head = ref; 351 return (0); 352} 353 354int 355gnttab_empty_grant_references(const grant_ref_t *private_head) 356{ 357 358 return (*private_head == GNTTAB_LIST_END); 359} 360 361int 362gnttab_claim_grant_reference(grant_ref_t *private_head) 363{ 364 grant_ref_t g = *private_head; 365 366 if (__predict_false(g == GNTTAB_LIST_END)) 367 return (g); 368 *private_head = gnttab_entry(g); 369 return (g); 370} 371 372void 373gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) 374{ 375 376 gnttab_entry(release) = *private_head; 377 *private_head = release; 378} 379 380void 381gnttab_request_free_callback(struct gnttab_free_callback *callback, 382 void (*fn)(void *), void *arg, uint16_t count) 383{ 384 385 mtx_lock(&gnttab_list_lock); 386 if (callback->next) 387 goto out; 388 callback->fn = fn; 389 callback->arg = arg; 390 callback->count = count; 391 callback->next = gnttab_free_callback_list; 392 gnttab_free_callback_list = callback; 393 check_free_callbacks(); 394 out: 395 mtx_unlock(&gnttab_list_lock); 396 397} 398 399void 400gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 401{ 402 struct gnttab_free_callback **pcb; 403 404 mtx_lock(&gnttab_list_lock); 405 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 406 if (*pcb == callback) { 407 *pcb = callback->next; 408 break; 409 } 410 } 411 mtx_unlock(&gnttab_list_lock); 412} 413 414static int 415grow_gnttab_list(unsigned int more_frames) 416{ 417 unsigned int new_nr_grant_frames, extra_entries, i; 418 419 new_nr_grant_frames = nr_grant_frames + more_frames; 420 extra_entries = more_frames * GREFS_PER_GRANT_FRAME; 421 422 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) 423 { 424 gnttab_list[i] = (grant_ref_t *) 425 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 426 427 if (!gnttab_list[i]) 428 goto grow_nomem; 429 } 430 431 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; 432 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) 433 gnttab_entry(i) = i + 1; 434 435 gnttab_entry(i) = gnttab_free_head; 436 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; 437 gnttab_free_count += extra_entries; 438 439 nr_grant_frames = new_nr_grant_frames; 440 441 check_free_callbacks(); 442 443 return (0); 444 445grow_nomem: 446 for ( ; i >= nr_grant_frames; i--) 447 free(gnttab_list[i], M_DEVBUF); 448 return (ENOMEM); 449} 450 451static unsigned int 452__max_nr_grant_frames(void) 453{ 454 struct gnttab_query_size query; 455 int rc; 456 457 query.dom = DOMID_SELF; 458 459 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 460 if ((rc < 0) || (query.status != GNTST_okay)) 461 return (4); /* Legacy max supported number of frames */ 462 463 return (query.max_nr_frames); 464} 465 466static inline 467unsigned int max_nr_grant_frames(void) 468{ 469 470 return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); 471} 472 473#ifdef notyet 474/* 475 * XXX needed for backend support 476 * 477 */ 478static int 479map_pte_fn(pte_t *pte, struct page *pmd_page, 480 unsigned long addr, void *data) 481{ 482 unsigned long **frames = (unsigned long **)data; 483 484 set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); 485 (*frames)++; 486 return 0; 487} 488 489static int 490unmap_pte_fn(pte_t *pte, struct page *pmd_page, 491 unsigned long addr, void *data) 492{ 493 494 set_pte_at(&init_mm, addr, pte, __pte(0)); 495 return 0; 496} 497#endif 498 499static vm_paddr_t resume_frames; 500 501static void 502gnttab_map(unsigned int start_idx, unsigned int end_idx) 503{ 504 struct xen_add_to_physmap xatp; 505 unsigned int i = end_idx; 506 507 /* 508 * Loop backwards, so that the first hypercall has the largest index, 509 * ensuring that the table will grow only once. 510 */ 511 do { 512 xatp.domid = DOMID_SELF; 513 xatp.idx = i; 514 xatp.space = XENMAPSPACE_grant_table; 515 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; 516 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 517 panic("HYPERVISOR_memory_op failed to map gnttab"); 518 } while (i-- > start_idx); 519} 520 521int 522gnttab_resume(device_t dev) 523{ 524 unsigned int max_nr_gframes, nr_gframes; 525 526 nr_gframes = nr_grant_frames; 527 max_nr_gframes = max_nr_grant_frames(); 528 if (max_nr_gframes < nr_gframes) 529 return (ENOSYS); 530 531 if (!resume_frames) { 532 KASSERT(dev != NULL, 533 ("No resume frames and no device provided")); 534 535 gnttab_pseudo_phys_res = xenmem_alloc(dev, 536 &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); 537 if (gnttab_pseudo_phys_res == NULL) 538 panic("Unable to reserve physical memory for gnttab"); 539 resume_frames = rman_get_start(gnttab_pseudo_phys_res); 540 shared = rman_get_virtual(gnttab_pseudo_phys_res); 541 } 542 gnttab_map(0, nr_gframes - 1); 543 544 return (0); 545} 546 547static int 548gnttab_expand(unsigned int req_entries) 549{ 550 unsigned int cur, extra; 551 552 cur = nr_grant_frames; 553 extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); 554 if (cur + extra > max_nr_grant_frames()) 555 return (ENOSPC); 556 557 gnttab_map(cur, cur + extra - 1); 558 559 return (grow_gnttab_list(extra)); 560} 561 562MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); 563 564/*------------------ Private Device Attachment Functions --------------------*/ 565/** 566 * \brief Identify instances of this device type in the system. 567 * 568 * \param driver The driver performing this identify action. 569 * \param parent The NewBus parent device for any devices this method adds. 570 */ 571static void 572granttable_identify(driver_t *driver, device_t parent) 573{ 574 575 KASSERT(xen_domain(), 576 ("Trying to attach grant-table device on non Xen domain")); 577 /* 578 * A single device instance for our driver is always present 579 * in a system operating under Xen. 580 */ 581 if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) 582 panic("unable to attach Xen Grant-table device"); 583} 584 585/** 586 * \brief Probe for the existence of the Xen Grant-table device 587 * 588 * \param dev NewBus device_t for this instance. 589 * 590 * \return Always returns 0 indicating success. 591 */ 592static int 593granttable_probe(device_t dev) 594{ 595 596 device_set_desc(dev, "Xen Grant-table Device"); 597 return (BUS_PROBE_NOWILDCARD); 598} 599 600/** 601 * \brief Attach the Xen Grant-table device. 602 * 603 * \param dev NewBus device_t for this instance. 604 * 605 * \return On success, 0. Otherwise an errno value indicating the 606 * type of failure. 607 */ 608static int 609granttable_attach(device_t dev) 610{ 611 int i; 612 unsigned int nr_init_grefs; 613 614 nr_grant_frames = 1; 615 boot_max_nr_grant_frames = __max_nr_grant_frames(); 616 617 gnttab_list = malloc(boot_max_nr_grant_frames * sizeof(grant_ref_t *), 618 M_DEVBUF, M_NOWAIT); 619 620 if (gnttab_list == NULL) 621 return (ENOMEM); 622 623 for (i = 0; i < nr_grant_frames; i++) { 624 gnttab_list[i] = (grant_ref_t *) 625 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 626 if (gnttab_list[i] == NULL) 627 goto ini_nomem; 628 } 629 630 if (gnttab_resume(dev)) 631 return (ENODEV); 632 633 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 634 635 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 636 gnttab_entry(i) = i + 1; 637 638 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 639 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; 640 gnttab_free_head = NR_RESERVED_ENTRIES; 641 642 if (bootverbose) 643 printf("Grant table initialized\n"); 644 645 return (0); 646 647ini_nomem: 648 for (i--; i >= 0; i--) 649 free(gnttab_list[i], M_DEVBUF); 650 free(gnttab_list, M_DEVBUF); 651 return (ENOMEM); 652} 653 654/*-------------------- Private Device Attachment Data -----------------------*/ 655static device_method_t granttable_methods[] = { 656 /* Device interface */ 657 DEVMETHOD(device_identify, granttable_identify), 658 DEVMETHOD(device_probe, granttable_probe), 659 DEVMETHOD(device_attach, granttable_attach), 660 661 DEVMETHOD_END 662}; 663 664DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); 665 666DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, 667 SI_ORDER_FIRST); 668