blkfront.c revision 315676
1/* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2010-2013 Spectra Logic Corporation 5 * Copyright (c) 2009 Scott Long, Yahoo! 6 * Copyright (c) 2009 Frank Suchomel, Citrix 7 * Copyright (c) 2009 Doug F. Rabson, Citrix 8 * Copyright (c) 2005 Kip Macy 9 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 10 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 11 * 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this software and associated documentation files (the "Software"), to 15 * deal in the Software without restriction, including without limitation the 16 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 17 * sell copies of the Software, and to permit persons to whom the Software is 18 * furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 * DEALINGS IN THE SOFTWARE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: stable/10/sys/dev/xen/blkfront/blkfront.c 315676 2017-03-21 09:38:59Z royger $"); 33 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/malloc.h> 37#include <sys/kernel.h> 38#include <vm/vm.h> 39#include <vm/pmap.h> 40 41#include <sys/bio.h> 42#include <sys/bus.h> 43#include <sys/conf.h> 44#include <sys/module.h> 45#include <sys/sysctl.h> 46 47#include <machine/bus.h> 48#include <sys/rman.h> 49#include <machine/resource.h> 50#include <machine/intr_machdep.h> 51#include <machine/vmparam.h> 52#include <sys/bus_dma.h> 53 54#include <xen/xen-os.h> 55#include <xen/hypervisor.h> 56#include <xen/xen_intr.h> 57#include <xen/gnttab.h> 58#include <xen/interface/grant_table.h> 59#include <xen/interface/io/protocols.h> 60#include <xen/xenbus/xenbusvar.h> 61 62#include <machine/_inttypes.h> 63#include <machine/xen/xenvar.h> 64 65#include <geom/geom_disk.h> 66 67#include <dev/xen/blkfront/block.h> 68 69#include "xenbus_if.h" 70 71/*--------------------------- Forward Declarations ---------------------------*/ 72static void xbd_closing(device_t); 73static void xbd_startio(struct xbd_softc *sc); 74 75/*---------------------------------- Macros ----------------------------------*/ 76#if 0 77#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 78#else 79#define DPRINTK(fmt, args...) 80#endif 81 82#define XBD_SECTOR_SHFT 9 83 84/*---------------------------- Global Static Data ----------------------------*/ 85static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); 86 87static int xbd_enable_indirect = 1; 88SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters"); 89SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, 90 &xbd_enable_indirect, 0, "Enable xbd indirect segments"); 91 92/*---------------------------- Command Processing ----------------------------*/ 93static void 94xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) 95{ 96 if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0) 97 return; 98 99 sc->xbd_flags |= xbd_flag; 100 sc->xbd_qfrozen_cnt++; 101} 102 103static void 104xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) 105{ 106 if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0) 107 return; 108 109 if (sc->xbd_qfrozen_cnt == 0) 110 panic("%s: Thaw with flag 0x%x while not frozen.", 111 __func__, xbd_flag); 112 113 sc->xbd_flags &= ~xbd_flag; 114 sc->xbd_qfrozen_cnt--; 115} 116 117static void 118xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) 119{ 120 if ((cm->cm_flags & XBDCF_FROZEN) != 0) 121 return; 122 123 cm->cm_flags |= XBDCF_FROZEN|cm_flag; 124 xbd_freeze(sc, XBDF_NONE); 125} 126 127static void 128xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) 129{ 130 if ((cm->cm_flags & XBDCF_FROZEN) == 0) 131 return; 132 133 cm->cm_flags &= ~XBDCF_FROZEN; 134 xbd_thaw(sc, XBDF_NONE); 135} 136 137static inline void 138xbd_flush_requests(struct xbd_softc *sc) 139{ 140 int notify; 141 142 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify); 143 144 if (notify) 145 xen_intr_signal(sc->xen_intr_handle); 146} 147 148static void 149xbd_free_command(struct xbd_command *cm) 150{ 151 152 KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE, 153 ("Freeing command that is still on queue %d.", 154 cm->cm_flags & XBDCF_Q_MASK)); 155 156 cm->cm_flags = XBDCF_INITIALIZER; 157 cm->cm_bp = NULL; 158 cm->cm_complete = NULL; 159 xbd_enqueue_cm(cm, XBD_Q_FREE); 160 xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE); 161} 162 163static void 164xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, 165 grant_ref_t * gref_head, int otherend_id, int readonly, 166 grant_ref_t * sg_ref, blkif_request_segment_t * sg) 167{ 168 struct blkif_request_segment *last_block_sg = sg + nsegs; 169 vm_paddr_t buffer_ma; 170 uint64_t fsect, lsect; 171 int ref; 172 173 while (sg < last_block_sg) { 174 buffer_ma = segs->ds_addr; 175 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 176 lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; 177 178 KASSERT(lsect <= 7, ("XEN disk driver data cannot " 179 "cross a page boundary")); 180 181 /* install a grant reference. */ 182 ref = gnttab_claim_grant_reference(gref_head); 183 184 /* 185 * GNTTAB_LIST_END == 0xffffffff, but it is private 186 * to gnttab.c. 187 */ 188 KASSERT(ref != ~0, ("grant_reference failed")); 189 190 gnttab_grant_foreign_access_ref( 191 ref, 192 otherend_id, 193 buffer_ma >> PAGE_SHIFT, 194 readonly); 195 196 *sg_ref = ref; 197 *sg = (struct blkif_request_segment) { 198 .gref = ref, 199 .first_sect = fsect, 200 .last_sect = lsect 201 }; 202 sg++; 203 sg_ref++; 204 segs++; 205 } 206} 207 208static void 209xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 210{ 211 struct xbd_softc *sc; 212 struct xbd_command *cm; 213 int op; 214 215 cm = arg; 216 sc = cm->cm_sc; 217 218 if (error) { 219 cm->cm_bp->bio_error = EIO; 220 biodone(cm->cm_bp); 221 xbd_free_command(cm); 222 return; 223 } 224 225 KASSERT(nsegs <= sc->xbd_max_request_segments, 226 ("Too many segments in a blkfront I/O")); 227 228 if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) { 229 blkif_request_t *ring_req; 230 231 /* Fill out a blkif_request_t structure. */ 232 ring_req = (blkif_request_t *) 233 RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); 234 sc->xbd_ring.req_prod_pvt++; 235 ring_req->id = cm->cm_id; 236 ring_req->operation = cm->cm_operation; 237 ring_req->sector_number = cm->cm_sector_number; 238 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; 239 ring_req->nr_segments = nsegs; 240 cm->cm_nseg = nsegs; 241 xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, 242 xenbus_get_otherend_id(sc->xbd_dev), 243 cm->cm_operation == BLKIF_OP_WRITE, 244 cm->cm_sg_refs, ring_req->seg); 245 } else { 246 blkif_request_indirect_t *ring_req; 247 248 /* Fill out a blkif_request_indirect_t structure. */ 249 ring_req = (blkif_request_indirect_t *) 250 RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); 251 sc->xbd_ring.req_prod_pvt++; 252 ring_req->id = cm->cm_id; 253 ring_req->operation = BLKIF_OP_INDIRECT; 254 ring_req->indirect_op = cm->cm_operation; 255 ring_req->sector_number = cm->cm_sector_number; 256 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; 257 ring_req->nr_segments = nsegs; 258 cm->cm_nseg = nsegs; 259 xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, 260 xenbus_get_otherend_id(sc->xbd_dev), 261 cm->cm_operation == BLKIF_OP_WRITE, 262 cm->cm_sg_refs, cm->cm_indirectionpages); 263 memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs, 264 sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages); 265 } 266 267 if (cm->cm_operation == BLKIF_OP_READ) 268 op = BUS_DMASYNC_PREREAD; 269 else if (cm->cm_operation == BLKIF_OP_WRITE) 270 op = BUS_DMASYNC_PREWRITE; 271 else 272 op = 0; 273 bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); 274 275 gnttab_free_grant_references(cm->cm_gref_head); 276 277 xbd_enqueue_cm(cm, XBD_Q_BUSY); 278 279 /* 280 * If bus dma had to asynchronously call us back to dispatch 281 * this command, we are no longer executing in the context of 282 * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to 283 * xbd_flush_requests() to publish this command to the backend 284 * along with any other commands that it could batch. 285 */ 286 if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0) 287 xbd_flush_requests(sc); 288 289 return; 290} 291 292static int 293xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) 294{ 295 int error; 296 297 error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data, 298 cm->cm_datalen, xbd_queue_cb, cm, 0); 299 if (error == EINPROGRESS) { 300 /* 301 * Maintain queuing order by freezing the queue. The next 302 * command may not require as many resources as the command 303 * we just attempted to map, so we can't rely on bus dma 304 * blocking for it too. 305 */ 306 xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); 307 return (0); 308 } 309 310 return (error); 311} 312 313static void 314xbd_restart_queue_callback(void *arg) 315{ 316 struct xbd_softc *sc = arg; 317 318 mtx_lock(&sc->xbd_io_lock); 319 320 xbd_thaw(sc, XBDF_GNT_SHORTAGE); 321 322 xbd_startio(sc); 323 324 mtx_unlock(&sc->xbd_io_lock); 325} 326 327static struct xbd_command * 328xbd_bio_command(struct xbd_softc *sc) 329{ 330 struct xbd_command *cm; 331 struct bio *bp; 332 333 if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED)) 334 return (NULL); 335 336 bp = xbd_dequeue_bio(sc); 337 if (bp == NULL) 338 return (NULL); 339 340 if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) { 341 xbd_freeze(sc, XBDF_CM_SHORTAGE); 342 xbd_requeue_bio(sc, bp); 343 return (NULL); 344 } 345 346 if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, 347 &cm->cm_gref_head) != 0) { 348 gnttab_request_free_callback(&sc->xbd_callback, 349 xbd_restart_queue_callback, sc, 350 sc->xbd_max_request_segments); 351 xbd_freeze(sc, XBDF_GNT_SHORTAGE); 352 xbd_requeue_bio(sc, bp); 353 xbd_enqueue_cm(cm, XBD_Q_FREE); 354 return (NULL); 355 } 356 357 cm->cm_bp = bp; 358 cm->cm_data = bp->bio_data; 359 cm->cm_datalen = bp->bio_bcount; 360 cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; 361 362 switch (bp->bio_cmd) { 363 case BIO_READ: 364 cm->cm_operation = BLKIF_OP_READ; 365 break; 366 case BIO_WRITE: 367 cm->cm_operation = BLKIF_OP_WRITE; 368 if ((bp->bio_flags & BIO_ORDERED) != 0) { 369 if ((sc->xbd_flags & XBDF_BARRIER) != 0) { 370 cm->cm_operation = BLKIF_OP_WRITE_BARRIER; 371 } else { 372 /* 373 * Single step this command. 374 */ 375 cm->cm_flags |= XBDCF_Q_FREEZE; 376 if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 377 /* 378 * Wait for in-flight requests to 379 * finish. 380 */ 381 xbd_freeze(sc, XBDF_WAIT_IDLE); 382 xbd_requeue_cm(cm, XBD_Q_READY); 383 return (NULL); 384 } 385 } 386 } 387 break; 388 case BIO_FLUSH: 389 if ((sc->xbd_flags & XBDF_FLUSH) != 0) 390 cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; 391 else if ((sc->xbd_flags & XBDF_BARRIER) != 0) 392 cm->cm_operation = BLKIF_OP_WRITE_BARRIER; 393 else 394 panic("flush request, but no flush support available"); 395 break; 396 default: 397 panic("unknown bio command %d", bp->bio_cmd); 398 } 399 400 return (cm); 401} 402 403/* 404 * Dequeue buffers and place them in the shared communication ring. 405 * Return when no more requests can be accepted or all buffers have 406 * been queued. 407 * 408 * Signal XEN once the ring has been filled out. 409 */ 410static void 411xbd_startio(struct xbd_softc *sc) 412{ 413 struct xbd_command *cm; 414 int error, queued = 0; 415 416 mtx_assert(&sc->xbd_io_lock, MA_OWNED); 417 418 if (sc->xbd_state != XBD_STATE_CONNECTED) 419 return; 420 421 while (!RING_FULL(&sc->xbd_ring)) { 422 423 if (sc->xbd_qfrozen_cnt != 0) 424 break; 425 426 cm = xbd_dequeue_cm(sc, XBD_Q_READY); 427 428 if (cm == NULL) 429 cm = xbd_bio_command(sc); 430 431 if (cm == NULL) 432 break; 433 434 if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { 435 /* 436 * Single step command. Future work is 437 * held off until this command completes. 438 */ 439 xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); 440 } 441 442 if ((error = xbd_queue_request(sc, cm)) != 0) { 443 printf("xbd_queue_request returned %d\n", error); 444 break; 445 } 446 queued++; 447 } 448 449 if (queued != 0) 450 xbd_flush_requests(sc); 451} 452 453static void 454xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm) 455{ 456 struct bio *bp; 457 458 bp = cm->cm_bp; 459 460 if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) { 461 disk_err(bp, "disk error" , -1, 0); 462 printf(" status: %x\n", cm->cm_status); 463 bp->bio_flags |= BIO_ERROR; 464 } 465 466 if (bp->bio_flags & BIO_ERROR) 467 bp->bio_error = EIO; 468 else 469 bp->bio_resid = 0; 470 471 xbd_free_command(cm); 472 biodone(bp); 473} 474 475static void 476xbd_int(void *xsc) 477{ 478 struct xbd_softc *sc = xsc; 479 struct xbd_command *cm; 480 blkif_response_t *bret; 481 RING_IDX i, rp; 482 int op; 483 484 mtx_lock(&sc->xbd_io_lock); 485 486 if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) { 487 mtx_unlock(&sc->xbd_io_lock); 488 return; 489 } 490 491 again: 492 rp = sc->xbd_ring.sring->rsp_prod; 493 rmb(); /* Ensure we see queued responses up to 'rp'. */ 494 495 for (i = sc->xbd_ring.rsp_cons; i != rp;) { 496 bret = RING_GET_RESPONSE(&sc->xbd_ring, i); 497 cm = &sc->xbd_shadow[bret->id]; 498 499 xbd_remove_cm(cm, XBD_Q_BUSY); 500 gnttab_end_foreign_access_references(cm->cm_nseg, 501 cm->cm_sg_refs); 502 i++; 503 504 if (cm->cm_operation == BLKIF_OP_READ) 505 op = BUS_DMASYNC_POSTREAD; 506 else if (cm->cm_operation == BLKIF_OP_WRITE || 507 cm->cm_operation == BLKIF_OP_WRITE_BARRIER) 508 op = BUS_DMASYNC_POSTWRITE; 509 else 510 op = 0; 511 bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); 512 bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map); 513 514 /* 515 * Release any hold this command has on future command 516 * dispatch. 517 */ 518 xbd_cm_thaw(sc, cm); 519 520 /* 521 * Directly call the i/o complete routine to save an 522 * an indirection in the common case. 523 */ 524 cm->cm_status = bret->status; 525 if (cm->cm_bp) 526 xbd_bio_complete(sc, cm); 527 else if (cm->cm_complete != NULL) 528 cm->cm_complete(cm); 529 else 530 xbd_free_command(cm); 531 } 532 533 sc->xbd_ring.rsp_cons = i; 534 535 if (i != sc->xbd_ring.req_prod_pvt) { 536 int more_to_do; 537 RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do); 538 if (more_to_do) 539 goto again; 540 } else { 541 sc->xbd_ring.sring->rsp_event = i + 1; 542 } 543 544 if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) 545 xbd_thaw(sc, XBDF_WAIT_IDLE); 546 547 xbd_startio(sc); 548 549 if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED)) 550 wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]); 551 552 mtx_unlock(&sc->xbd_io_lock); 553} 554 555/*------------------------------- Dump Support -------------------------------*/ 556/** 557 * Quiesce the disk writes for a dump file before allowing the next buffer. 558 */ 559static void 560xbd_quiesce(struct xbd_softc *sc) 561{ 562 int mtd; 563 564 // While there are outstanding requests 565 while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 566 RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); 567 if (mtd) { 568 /* Recieved request completions, update queue. */ 569 xbd_int(sc); 570 } 571 if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 572 /* 573 * Still pending requests, wait for the disk i/o 574 * to complete. 575 */ 576 HYPERVISOR_yield(); 577 } 578 } 579} 580 581/* Kernel dump function for a paravirtualized disk device */ 582static void 583xbd_dump_complete(struct xbd_command *cm) 584{ 585 586 xbd_enqueue_cm(cm, XBD_Q_COMPLETE); 587} 588 589static int 590xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 591 size_t length) 592{ 593 struct disk *dp = arg; 594 struct xbd_softc *sc = dp->d_drv1; 595 struct xbd_command *cm; 596 size_t chunk; 597 int sbp; 598 int rc = 0; 599 600 if (length <= 0) 601 return (rc); 602 603 xbd_quiesce(sc); /* All quiet on the western front. */ 604 605 /* 606 * If this lock is held, then this module is failing, and a 607 * successful kernel dump is highly unlikely anyway. 608 */ 609 mtx_lock(&sc->xbd_io_lock); 610 611 /* Split the 64KB block as needed */ 612 for (sbp=0; length > 0; sbp++) { 613 cm = xbd_dequeue_cm(sc, XBD_Q_FREE); 614 if (cm == NULL) { 615 mtx_unlock(&sc->xbd_io_lock); 616 device_printf(sc->xbd_dev, "dump: no more commands?\n"); 617 return (EBUSY); 618 } 619 620 if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, 621 &cm->cm_gref_head) != 0) { 622 xbd_free_command(cm); 623 mtx_unlock(&sc->xbd_io_lock); 624 device_printf(sc->xbd_dev, "no more grant allocs?\n"); 625 return (EBUSY); 626 } 627 628 chunk = length > sc->xbd_max_request_size ? 629 sc->xbd_max_request_size : length; 630 cm->cm_data = virtual; 631 cm->cm_datalen = chunk; 632 cm->cm_operation = BLKIF_OP_WRITE; 633 cm->cm_sector_number = offset / dp->d_sectorsize; 634 cm->cm_complete = xbd_dump_complete; 635 636 xbd_enqueue_cm(cm, XBD_Q_READY); 637 638 length -= chunk; 639 offset += chunk; 640 virtual = (char *) virtual + chunk; 641 } 642 643 /* Tell DOM0 to do the I/O */ 644 xbd_startio(sc); 645 mtx_unlock(&sc->xbd_io_lock); 646 647 /* Poll for the completion. */ 648 xbd_quiesce(sc); /* All quite on the eastern front */ 649 650 /* If there were any errors, bail out... */ 651 while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) { 652 if (cm->cm_status != BLKIF_RSP_OKAY) { 653 device_printf(sc->xbd_dev, 654 "Dump I/O failed at sector %jd\n", 655 cm->cm_sector_number); 656 rc = EIO; 657 } 658 xbd_free_command(cm); 659 } 660 661 return (rc); 662} 663 664/*----------------------------- Disk Entrypoints -----------------------------*/ 665static int 666xbd_open(struct disk *dp) 667{ 668 struct xbd_softc *sc = dp->d_drv1; 669 670 if (sc == NULL) { 671 printf("xb%d: not found", sc->xbd_unit); 672 return (ENXIO); 673 } 674 675 sc->xbd_flags |= XBDF_OPEN; 676 sc->xbd_users++; 677 return (0); 678} 679 680static int 681xbd_close(struct disk *dp) 682{ 683 struct xbd_softc *sc = dp->d_drv1; 684 685 if (sc == NULL) 686 return (ENXIO); 687 sc->xbd_flags &= ~XBDF_OPEN; 688 if (--(sc->xbd_users) == 0) { 689 /* 690 * Check whether we have been instructed to close. We will 691 * have ignored this request initially, as the device was 692 * still mounted. 693 */ 694 if (xenbus_get_otherend_state(sc->xbd_dev) == 695 XenbusStateClosing) 696 xbd_closing(sc->xbd_dev); 697 } 698 return (0); 699} 700 701static int 702xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 703{ 704 struct xbd_softc *sc = dp->d_drv1; 705 706 if (sc == NULL) 707 return (ENXIO); 708 709 return (ENOTTY); 710} 711 712/* 713 * Read/write routine for a buffer. Finds the proper unit, place it on 714 * the sortq and kick the controller. 715 */ 716static void 717xbd_strategy(struct bio *bp) 718{ 719 struct xbd_softc *sc = bp->bio_disk->d_drv1; 720 721 /* bogus disk? */ 722 if (sc == NULL) { 723 bp->bio_error = EINVAL; 724 bp->bio_flags |= BIO_ERROR; 725 bp->bio_resid = bp->bio_bcount; 726 biodone(bp); 727 return; 728 } 729 730 /* 731 * Place it in the queue of disk activities for this disk 732 */ 733 mtx_lock(&sc->xbd_io_lock); 734 735 xbd_enqueue_bio(sc, bp); 736 xbd_startio(sc); 737 738 mtx_unlock(&sc->xbd_io_lock); 739 return; 740} 741 742/*------------------------------ Ring Management -----------------------------*/ 743static int 744xbd_alloc_ring(struct xbd_softc *sc) 745{ 746 blkif_sring_t *sring; 747 uintptr_t sring_page_addr; 748 int error; 749 int i; 750 751 sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, 752 M_NOWAIT|M_ZERO); 753 if (sring == NULL) { 754 xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring"); 755 return (ENOMEM); 756 } 757 SHARED_RING_INIT(sring); 758 FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE); 759 760 for (i = 0, sring_page_addr = (uintptr_t)sring; 761 i < sc->xbd_ring_pages; 762 i++, sring_page_addr += PAGE_SIZE) { 763 764 error = xenbus_grant_ring(sc->xbd_dev, 765 (vtomach(sring_page_addr) >> PAGE_SHIFT), 766 &sc->xbd_ring_ref[i]); 767 if (error) { 768 xenbus_dev_fatal(sc->xbd_dev, error, 769 "granting ring_ref(%d)", i); 770 return (error); 771 } 772 } 773 if (sc->xbd_ring_pages == 1) { 774 error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), 775 "ring-ref", "%u", sc->xbd_ring_ref[0]); 776 if (error) { 777 xenbus_dev_fatal(sc->xbd_dev, error, 778 "writing %s/ring-ref", 779 xenbus_get_node(sc->xbd_dev)); 780 return (error); 781 } 782 } else { 783 for (i = 0; i < sc->xbd_ring_pages; i++) { 784 char ring_ref_name[]= "ring_refXX"; 785 786 snprintf(ring_ref_name, sizeof(ring_ref_name), 787 "ring-ref%u", i); 788 error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), 789 ring_ref_name, "%u", sc->xbd_ring_ref[i]); 790 if (error) { 791 xenbus_dev_fatal(sc->xbd_dev, error, 792 "writing %s/%s", 793 xenbus_get_node(sc->xbd_dev), 794 ring_ref_name); 795 return (error); 796 } 797 } 798 } 799 800 error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev, 801 xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc, 802 INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); 803 if (error) { 804 xenbus_dev_fatal(sc->xbd_dev, error, 805 "xen_intr_alloc_and_bind_local_port failed"); 806 return (error); 807 } 808 809 return (0); 810} 811 812static void 813xbd_free_ring(struct xbd_softc *sc) 814{ 815 int i; 816 817 if (sc->xbd_ring.sring == NULL) 818 return; 819 820 for (i = 0; i < sc->xbd_ring_pages; i++) { 821 if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) { 822 gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]); 823 sc->xbd_ring_ref[i] = GRANT_REF_INVALID; 824 } 825 } 826 free(sc->xbd_ring.sring, M_XENBLOCKFRONT); 827 sc->xbd_ring.sring = NULL; 828} 829 830/*-------------------------- Initialization/Teardown -------------------------*/ 831static int 832xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) 833{ 834 struct sbuf sb; 835 int feature_cnt; 836 837 sbuf_new(&sb, features, len, SBUF_FIXEDLEN); 838 839 feature_cnt = 0; 840 if ((sc->xbd_flags & XBDF_FLUSH) != 0) { 841 sbuf_printf(&sb, "flush"); 842 feature_cnt++; 843 } 844 845 if ((sc->xbd_flags & XBDF_BARRIER) != 0) { 846 if (feature_cnt != 0) 847 sbuf_printf(&sb, ", "); 848 sbuf_printf(&sb, "write_barrier"); 849 feature_cnt++; 850 } 851 852 if ((sc->xbd_flags & XBDF_DISCARD) != 0) { 853 if (feature_cnt != 0) 854 sbuf_printf(&sb, ", "); 855 sbuf_printf(&sb, "discard"); 856 feature_cnt++; 857 } 858 859 if ((sc->xbd_flags & XBDF_PERSISTENT) != 0) { 860 if (feature_cnt != 0) 861 sbuf_printf(&sb, ", "); 862 sbuf_printf(&sb, "persistent_grants"); 863 feature_cnt++; 864 } 865 866 (void) sbuf_finish(&sb); 867 return (sbuf_len(&sb)); 868} 869 870static int 871xbd_sysctl_features(SYSCTL_HANDLER_ARGS) 872{ 873 char features[80]; 874 struct xbd_softc *sc = arg1; 875 int error; 876 int len; 877 878 error = sysctl_wire_old_buffer(req, 0); 879 if (error != 0) 880 return (error); 881 882 len = xbd_feature_string(sc, features, sizeof(features)); 883 884 /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ 885 return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); 886} 887 888static void 889xbd_setup_sysctl(struct xbd_softc *xbd) 890{ 891 struct sysctl_ctx_list *sysctl_ctx = NULL; 892 struct sysctl_oid *sysctl_tree = NULL; 893 struct sysctl_oid_list *children; 894 895 sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); 896 if (sysctl_ctx == NULL) 897 return; 898 899 sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev); 900 if (sysctl_tree == NULL) 901 return; 902 903 children = SYSCTL_CHILDREN(sysctl_tree); 904 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 905 "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, 906 "maximum outstanding requests (negotiated)"); 907 908 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 909 "max_request_segments", CTLFLAG_RD, 910 &xbd->xbd_max_request_segments, 0, 911 "maximum number of pages per requests (negotiated)"); 912 913 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 914 "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, 915 "maximum size in bytes of a request (negotiated)"); 916 917 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 918 "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, 919 "communication channel pages (negotiated)"); 920 921 SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, 922 "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, 923 xbd_sysctl_features, "A", "protocol features (negotiated)"); 924} 925 926/* 927 * Translate Linux major/minor to an appropriate name and unit 928 * number. For HVM guests, this allows us to use the same drive names 929 * with blkfront as the emulated drives, easing transition slightly. 930 */ 931static void 932xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name) 933{ 934 static struct vdev_info { 935 int major; 936 int shift; 937 int base; 938 const char *name; 939 } info[] = { 940 {3, 6, 0, "ada"}, /* ide0 */ 941 {22, 6, 2, "ada"}, /* ide1 */ 942 {33, 6, 4, "ada"}, /* ide2 */ 943 {34, 6, 6, "ada"}, /* ide3 */ 944 {56, 6, 8, "ada"}, /* ide4 */ 945 {57, 6, 10, "ada"}, /* ide5 */ 946 {88, 6, 12, "ada"}, /* ide6 */ 947 {89, 6, 14, "ada"}, /* ide7 */ 948 {90, 6, 16, "ada"}, /* ide8 */ 949 {91, 6, 18, "ada"}, /* ide9 */ 950 951 {8, 4, 0, "da"}, /* scsi disk0 */ 952 {65, 4, 16, "da"}, /* scsi disk1 */ 953 {66, 4, 32, "da"}, /* scsi disk2 */ 954 {67, 4, 48, "da"}, /* scsi disk3 */ 955 {68, 4, 64, "da"}, /* scsi disk4 */ 956 {69, 4, 80, "da"}, /* scsi disk5 */ 957 {70, 4, 96, "da"}, /* scsi disk6 */ 958 {71, 4, 112, "da"}, /* scsi disk7 */ 959 {128, 4, 128, "da"}, /* scsi disk8 */ 960 {129, 4, 144, "da"}, /* scsi disk9 */ 961 {130, 4, 160, "da"}, /* scsi disk10 */ 962 {131, 4, 176, "da"}, /* scsi disk11 */ 963 {132, 4, 192, "da"}, /* scsi disk12 */ 964 {133, 4, 208, "da"}, /* scsi disk13 */ 965 {134, 4, 224, "da"}, /* scsi disk14 */ 966 {135, 4, 240, "da"}, /* scsi disk15 */ 967 968 {202, 4, 0, "xbd"}, /* xbd */ 969 970 {0, 0, 0, NULL}, 971 }; 972 int major = vdevice >> 8; 973 int minor = vdevice & 0xff; 974 int i; 975 976 if (vdevice & (1 << 28)) { 977 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 978 *name = "xbd"; 979 return; 980 } 981 982 for (i = 0; info[i].major; i++) { 983 if (info[i].major == major) { 984 *unit = info[i].base + (minor >> info[i].shift); 985 *name = info[i].name; 986 return; 987 } 988 } 989 990 *unit = minor >> 4; 991 *name = "xbd"; 992} 993 994int 995xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, 996 int vdevice, uint16_t vdisk_info, unsigned long sector_size, 997 unsigned long phys_sector_size) 998{ 999 char features[80]; 1000 int unit, error = 0; 1001 const char *name; 1002 1003 xbd_vdevice_to_unit(vdevice, &unit, &name); 1004 1005 sc->xbd_unit = unit; 1006 1007 if (strcmp(name, "xbd") != 0) 1008 device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); 1009 1010 if (xbd_feature_string(sc, features, sizeof(features)) > 0) { 1011 device_printf(sc->xbd_dev, "features: %s\n", 1012 features); 1013 } 1014 1015 sc->xbd_disk = disk_alloc(); 1016 sc->xbd_disk->d_unit = sc->xbd_unit; 1017 sc->xbd_disk->d_open = xbd_open; 1018 sc->xbd_disk->d_close = xbd_close; 1019 sc->xbd_disk->d_ioctl = xbd_ioctl; 1020 sc->xbd_disk->d_strategy = xbd_strategy; 1021 sc->xbd_disk->d_dump = xbd_dump; 1022 sc->xbd_disk->d_name = name; 1023 sc->xbd_disk->d_drv1 = sc; 1024 sc->xbd_disk->d_sectorsize = sector_size; 1025 sc->xbd_disk->d_stripesize = phys_sector_size; 1026 sc->xbd_disk->d_stripeoffset = 0; 1027 1028 sc->xbd_disk->d_mediasize = sectors * sector_size; 1029 sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; 1030 sc->xbd_disk->d_flags = 0; 1031 if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { 1032 sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; 1033 device_printf(sc->xbd_dev, 1034 "synchronize cache commands enabled.\n"); 1035 } 1036 disk_create(sc->xbd_disk, DISK_VERSION); 1037 1038 return error; 1039} 1040 1041static void 1042xbd_free(struct xbd_softc *sc) 1043{ 1044 int i; 1045 1046 /* Prevent new requests being issued until we fix things up. */ 1047 mtx_lock(&sc->xbd_io_lock); 1048 sc->xbd_state = XBD_STATE_DISCONNECTED; 1049 mtx_unlock(&sc->xbd_io_lock); 1050 1051 /* Free resources associated with old device channel. */ 1052 xbd_free_ring(sc); 1053 if (sc->xbd_shadow) { 1054 1055 for (i = 0; i < sc->xbd_max_requests; i++) { 1056 struct xbd_command *cm; 1057 1058 cm = &sc->xbd_shadow[i]; 1059 if (cm->cm_sg_refs != NULL) { 1060 free(cm->cm_sg_refs, M_XENBLOCKFRONT); 1061 cm->cm_sg_refs = NULL; 1062 } 1063 1064 if (cm->cm_indirectionpages != NULL) { 1065 gnttab_end_foreign_access_references( 1066 sc->xbd_max_request_indirectpages, 1067 &cm->cm_indirectionrefs[0]); 1068 contigfree(cm->cm_indirectionpages, PAGE_SIZE * 1069 sc->xbd_max_request_indirectpages, 1070 M_XENBLOCKFRONT); 1071 cm->cm_indirectionpages = NULL; 1072 } 1073 1074 bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); 1075 } 1076 free(sc->xbd_shadow, M_XENBLOCKFRONT); 1077 sc->xbd_shadow = NULL; 1078 1079 bus_dma_tag_destroy(sc->xbd_io_dmat); 1080 1081 xbd_initq_cm(sc, XBD_Q_FREE); 1082 xbd_initq_cm(sc, XBD_Q_READY); 1083 xbd_initq_cm(sc, XBD_Q_COMPLETE); 1084 } 1085 1086 xen_intr_unbind(&sc->xen_intr_handle); 1087 1088} 1089 1090/*--------------------------- State Change Handlers --------------------------*/ 1091static void 1092xbd_initialize(struct xbd_softc *sc) 1093{ 1094 const char *otherend_path; 1095 const char *node_path; 1096 uint32_t max_ring_page_order; 1097 int error; 1098 1099 if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { 1100 /* Initialization has already been performed. */ 1101 return; 1102 } 1103 1104 /* 1105 * Protocol defaults valid even if negotiation for a 1106 * setting fails. 1107 */ 1108 max_ring_page_order = 0; 1109 sc->xbd_ring_pages = 1; 1110 1111 /* 1112 * Protocol negotiation. 1113 * 1114 * \note xs_gather() returns on the first encountered error, so 1115 * we must use independant calls in order to guarantee 1116 * we don't miss information in a sparsly populated back-end 1117 * tree. 1118 * 1119 * \note xs_scanf() does not update variables for unmatched 1120 * fields. 1121 */ 1122 otherend_path = xenbus_get_otherend_path(sc->xbd_dev); 1123 node_path = xenbus_get_node(sc->xbd_dev); 1124 1125 /* Support both backend schemes for relaying ring page limits. */ 1126 (void)xs_scanf(XST_NIL, otherend_path, 1127 "max-ring-page-order", NULL, "%" PRIu32, 1128 &max_ring_page_order); 1129 sc->xbd_ring_pages = 1 << max_ring_page_order; 1130 (void)xs_scanf(XST_NIL, otherend_path, 1131 "max-ring-pages", NULL, "%" PRIu32, 1132 &sc->xbd_ring_pages); 1133 if (sc->xbd_ring_pages < 1) 1134 sc->xbd_ring_pages = 1; 1135 1136 if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) { 1137 device_printf(sc->xbd_dev, 1138 "Back-end specified ring-pages of %u " 1139 "limited to front-end limit of %u.\n", 1140 sc->xbd_ring_pages, XBD_MAX_RING_PAGES); 1141 sc->xbd_ring_pages = XBD_MAX_RING_PAGES; 1142 } 1143 1144 if (powerof2(sc->xbd_ring_pages) == 0) { 1145 uint32_t new_page_limit; 1146 1147 new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1); 1148 device_printf(sc->xbd_dev, 1149 "Back-end specified ring-pages of %u " 1150 "is not a power of 2. Limited to %u.\n", 1151 sc->xbd_ring_pages, new_page_limit); 1152 sc->xbd_ring_pages = new_page_limit; 1153 } 1154 1155 sc->xbd_max_requests = 1156 BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE); 1157 if (sc->xbd_max_requests > XBD_MAX_REQUESTS) { 1158 device_printf(sc->xbd_dev, 1159 "Back-end specified max_requests of %u " 1160 "limited to front-end limit of %zu.\n", 1161 sc->xbd_max_requests, XBD_MAX_REQUESTS); 1162 sc->xbd_max_requests = XBD_MAX_REQUESTS; 1163 } 1164 1165 if (xbd_alloc_ring(sc) != 0) 1166 return; 1167 1168 /* Support both backend schemes for relaying ring page limits. */ 1169 if (sc->xbd_ring_pages > 1) { 1170 error = xs_printf(XST_NIL, node_path, 1171 "num-ring-pages","%u", 1172 sc->xbd_ring_pages); 1173 if (error) { 1174 xenbus_dev_fatal(sc->xbd_dev, error, 1175 "writing %s/num-ring-pages", 1176 node_path); 1177 return; 1178 } 1179 1180 error = xs_printf(XST_NIL, node_path, 1181 "ring-page-order", "%u", 1182 fls(sc->xbd_ring_pages) - 1); 1183 if (error) { 1184 xenbus_dev_fatal(sc->xbd_dev, error, 1185 "writing %s/ring-page-order", 1186 node_path); 1187 return; 1188 } 1189 } 1190 1191 error = xs_printf(XST_NIL, node_path, "event-channel", 1192 "%u", xen_intr_port(sc->xen_intr_handle)); 1193 if (error) { 1194 xenbus_dev_fatal(sc->xbd_dev, error, 1195 "writing %s/event-channel", 1196 node_path); 1197 return; 1198 } 1199 1200 error = xs_printf(XST_NIL, node_path, "protocol", 1201 "%s", XEN_IO_PROTO_ABI_NATIVE); 1202 if (error) { 1203 xenbus_dev_fatal(sc->xbd_dev, error, 1204 "writing %s/protocol", 1205 node_path); 1206 return; 1207 } 1208 1209 xenbus_set_state(sc->xbd_dev, XenbusStateInitialised); 1210} 1211 1212/* 1213 * Invoked when the backend is finally 'ready' (and has published 1214 * the details about the physical device - #sectors, size, etc). 1215 */ 1216static void 1217xbd_connect(struct xbd_softc *sc) 1218{ 1219 device_t dev = sc->xbd_dev; 1220 unsigned long sectors, sector_size, phys_sector_size; 1221 unsigned int binfo; 1222 int err, feature_barrier, feature_flush; 1223 int i, j; 1224 1225 if (sc->xbd_state == XBD_STATE_CONNECTED || 1226 sc->xbd_state == XBD_STATE_SUSPENDED) 1227 return; 1228 1229 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 1230 1231 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1232 "sectors", "%lu", §ors, 1233 "info", "%u", &binfo, 1234 "sector-size", "%lu", §or_size, 1235 NULL); 1236 if (err) { 1237 xenbus_dev_fatal(dev, err, 1238 "reading backend fields at %s", 1239 xenbus_get_otherend_path(dev)); 1240 return; 1241 } 1242 if ((sectors == 0) || (sector_size == 0)) { 1243 xenbus_dev_fatal(dev, 0, 1244 "invalid parameters from %s:" 1245 " sectors = %lu, sector_size = %lu", 1246 xenbus_get_otherend_path(dev), 1247 sectors, sector_size); 1248 return; 1249 } 1250 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1251 "physical-sector-size", "%lu", &phys_sector_size, 1252 NULL); 1253 if (err || phys_sector_size <= sector_size) 1254 phys_sector_size = 0; 1255 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1256 "feature-barrier", "%d", &feature_barrier, 1257 NULL); 1258 if (err == 0 && feature_barrier != 0) 1259 sc->xbd_flags |= XBDF_BARRIER; 1260 1261 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1262 "feature-flush-cache", "%d", &feature_flush, 1263 NULL); 1264 if (err == 0 && feature_flush != 0) 1265 sc->xbd_flags |= XBDF_FLUSH; 1266 1267 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1268 "feature-max-indirect-segments", "%" PRIu32, 1269 &sc->xbd_max_request_segments, NULL); 1270 if ((err != 0) || (xbd_enable_indirect == 0)) 1271 sc->xbd_max_request_segments = 0; 1272 if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS) 1273 sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS; 1274 if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS)) 1275 sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS); 1276 sc->xbd_max_request_indirectpages = 1277 XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments); 1278 if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) 1279 sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1280 sc->xbd_max_request_size = 1281 XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); 1282 1283 /* Allocate datastructures based on negotiated values. */ 1284 err = bus_dma_tag_create( 1285 bus_get_dma_tag(sc->xbd_dev), /* parent */ 1286 512, PAGE_SIZE, /* algnmnt, boundary */ 1287 BUS_SPACE_MAXADDR, /* lowaddr */ 1288 BUS_SPACE_MAXADDR, /* highaddr */ 1289 NULL, NULL, /* filter, filterarg */ 1290 sc->xbd_max_request_size, 1291 sc->xbd_max_request_segments, 1292 PAGE_SIZE, /* maxsegsize */ 1293 BUS_DMA_ALLOCNOW, /* flags */ 1294 busdma_lock_mutex, /* lockfunc */ 1295 &sc->xbd_io_lock, /* lockarg */ 1296 &sc->xbd_io_dmat); 1297 if (err != 0) { 1298 xenbus_dev_fatal(sc->xbd_dev, err, 1299 "Cannot allocate parent DMA tag\n"); 1300 return; 1301 } 1302 1303 /* Per-transaction data allocation. */ 1304 sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, 1305 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); 1306 if (sc->xbd_shadow == NULL) { 1307 bus_dma_tag_destroy(sc->xbd_io_dmat); 1308 xenbus_dev_fatal(sc->xbd_dev, ENOMEM, 1309 "Cannot allocate request structures\n"); 1310 return; 1311 } 1312 1313 for (i = 0; i < sc->xbd_max_requests; i++) { 1314 struct xbd_command *cm; 1315 void * indirectpages; 1316 1317 cm = &sc->xbd_shadow[i]; 1318 cm->cm_sg_refs = malloc( 1319 sizeof(grant_ref_t) * sc->xbd_max_request_segments, 1320 M_XENBLOCKFRONT, M_NOWAIT); 1321 if (cm->cm_sg_refs == NULL) 1322 break; 1323 cm->cm_id = i; 1324 cm->cm_flags = XBDCF_INITIALIZER; 1325 cm->cm_sc = sc; 1326 if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) 1327 break; 1328 if (sc->xbd_max_request_indirectpages > 0) { 1329 indirectpages = contigmalloc( 1330 PAGE_SIZE * sc->xbd_max_request_indirectpages, 1331 M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0); 1332 } else { 1333 indirectpages = NULL; 1334 } 1335 for (j = 0; j < sc->xbd_max_request_indirectpages; j++) { 1336 if (gnttab_grant_foreign_access( 1337 xenbus_get_otherend_id(sc->xbd_dev), 1338 (vtomach(indirectpages) >> PAGE_SHIFT) + j, 1339 1 /* grant read-only access */, 1340 &cm->cm_indirectionrefs[j])) 1341 break; 1342 } 1343 if (j < sc->xbd_max_request_indirectpages) 1344 break; 1345 cm->cm_indirectionpages = indirectpages; 1346 xbd_free_command(cm); 1347 } 1348 1349 if (sc->xbd_disk == NULL) { 1350 device_printf(dev, "%juMB <%s> at %s", 1351 (uintmax_t) sectors / (1048576 / sector_size), 1352 device_get_desc(dev), 1353 xenbus_get_node(dev)); 1354 bus_print_child_footer(device_get_parent(dev), dev); 1355 1356 xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo, 1357 sector_size, phys_sector_size); 1358 } 1359 1360 (void)xenbus_set_state(dev, XenbusStateConnected); 1361 1362 /* Kick pending requests. */ 1363 mtx_lock(&sc->xbd_io_lock); 1364 sc->xbd_state = XBD_STATE_CONNECTED; 1365 xbd_startio(sc); 1366 sc->xbd_flags |= XBDF_READY; 1367 mtx_unlock(&sc->xbd_io_lock); 1368} 1369 1370/** 1371 * Handle the change of state of the backend to Closing. We must delete our 1372 * device-layer structures now, to ensure that writes are flushed through to 1373 * the backend. Once this is done, we can switch to Closed in 1374 * acknowledgement. 1375 */ 1376static void 1377xbd_closing(device_t dev) 1378{ 1379 struct xbd_softc *sc = device_get_softc(dev); 1380 1381 xenbus_set_state(dev, XenbusStateClosing); 1382 1383 DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev)); 1384 1385 if (sc->xbd_disk != NULL) { 1386 disk_destroy(sc->xbd_disk); 1387 sc->xbd_disk = NULL; 1388 } 1389 1390 xenbus_set_state(dev, XenbusStateClosed); 1391} 1392 1393/*---------------------------- NewBus Entrypoints ----------------------------*/ 1394static int 1395xbd_probe(device_t dev) 1396{ 1397 if (strcmp(xenbus_get_type(dev), "vbd") != 0) 1398 return (ENXIO); 1399 1400#ifdef XENHVM 1401 if (xen_disable_pv_disks != 0) 1402 return (ENXIO); 1403#endif 1404 1405 if (xen_hvm_domain()) { 1406 int error; 1407 char *type; 1408 1409 /* 1410 * When running in an HVM domain, IDE disk emulation is 1411 * disabled early in boot so that native drivers will 1412 * not see emulated hardware. However, CDROM device 1413 * emulation cannot be disabled. 1414 * 1415 * Through use of FreeBSD's vm_guest and xen_hvm_domain() 1416 * APIs, we could modify the native CDROM driver to fail its 1417 * probe when running under Xen. Unfortunatlely, the PV 1418 * CDROM support in XenServer (up through at least version 1419 * 6.2) isn't functional, so we instead rely on the emulated 1420 * CDROM instance, and fail to attach the PV one here in 1421 * the blkfront driver. 1422 */ 1423 error = xs_read(XST_NIL, xenbus_get_node(dev), 1424 "device-type", NULL, (void **) &type); 1425 if (error) 1426 return (ENXIO); 1427 1428 if (strncmp(type, "cdrom", 5) == 0) { 1429 free(type, M_XENSTORE); 1430 return (ENXIO); 1431 } 1432 free(type, M_XENSTORE); 1433 } 1434 1435 device_set_desc(dev, "Virtual Block Device"); 1436 device_quiet(dev); 1437 return (0); 1438} 1439 1440/* 1441 * Setup supplies the backend dir, virtual device. We place an event 1442 * channel and shared frame entries. We watch backend to wait if it's 1443 * ok. 1444 */ 1445static int 1446xbd_attach(device_t dev) 1447{ 1448 struct xbd_softc *sc; 1449 const char *name; 1450 uint32_t vdevice; 1451 int error; 1452 int i; 1453 int unit; 1454 1455 /* FIXME: Use dynamic device id if this is not set. */ 1456 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 1457 "virtual-device", NULL, "%" PRIu32, &vdevice); 1458 if (error) 1459 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 1460 "virtual-device-ext", NULL, "%" PRIu32, &vdevice); 1461 if (error) { 1462 xenbus_dev_fatal(dev, error, "reading virtual-device"); 1463 device_printf(dev, "Couldn't determine virtual device.\n"); 1464 return (error); 1465 } 1466 1467 xbd_vdevice_to_unit(vdevice, &unit, &name); 1468 if (!strcmp(name, "xbd")) 1469 device_set_unit(dev, unit); 1470 1471 sc = device_get_softc(dev); 1472 mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF); 1473 xbd_initqs(sc); 1474 for (i = 0; i < XBD_MAX_RING_PAGES; i++) 1475 sc->xbd_ring_ref[i] = GRANT_REF_INVALID; 1476 1477 sc->xbd_dev = dev; 1478 sc->xbd_vdevice = vdevice; 1479 sc->xbd_state = XBD_STATE_DISCONNECTED; 1480 1481 xbd_setup_sysctl(sc); 1482 1483 /* Wait for backend device to publish its protocol capabilities. */ 1484 xenbus_set_state(dev, XenbusStateInitialising); 1485 1486 return (0); 1487} 1488 1489static int 1490xbd_detach(device_t dev) 1491{ 1492 struct xbd_softc *sc = device_get_softc(dev); 1493 1494 DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev)); 1495 1496 xbd_free(sc); 1497 mtx_destroy(&sc->xbd_io_lock); 1498 1499 return 0; 1500} 1501 1502static int 1503xbd_suspend(device_t dev) 1504{ 1505 struct xbd_softc *sc = device_get_softc(dev); 1506 int retval; 1507 int saved_state; 1508 1509 /* Prevent new requests being issued until we fix things up. */ 1510 mtx_lock(&sc->xbd_io_lock); 1511 saved_state = sc->xbd_state; 1512 sc->xbd_state = XBD_STATE_SUSPENDED; 1513 1514 /* Wait for outstanding I/O to drain. */ 1515 retval = 0; 1516 while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 1517 if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, 1518 PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { 1519 retval = EBUSY; 1520 break; 1521 } 1522 } 1523 mtx_unlock(&sc->xbd_io_lock); 1524 1525 if (retval != 0) 1526 sc->xbd_state = saved_state; 1527 1528 return (retval); 1529} 1530 1531static int 1532xbd_resume(device_t dev) 1533{ 1534 struct xbd_softc *sc = device_get_softc(dev); 1535 1536 if (xen_suspend_cancelled) { 1537 sc->xbd_state = XBD_STATE_CONNECTED; 1538 return (0); 1539 } 1540 1541 DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev)); 1542 1543 xbd_free(sc); 1544 xbd_initialize(sc); 1545 return (0); 1546} 1547 1548/** 1549 * Callback received when the backend's state changes. 1550 */ 1551static void 1552xbd_backend_changed(device_t dev, XenbusState backend_state) 1553{ 1554 struct xbd_softc *sc = device_get_softc(dev); 1555 1556 DPRINTK("backend_state=%d\n", backend_state); 1557 1558 switch (backend_state) { 1559 case XenbusStateUnknown: 1560 case XenbusStateInitialising: 1561 case XenbusStateReconfigured: 1562 case XenbusStateReconfiguring: 1563 case XenbusStateClosed: 1564 break; 1565 1566 case XenbusStateInitWait: 1567 case XenbusStateInitialised: 1568 xbd_initialize(sc); 1569 break; 1570 1571 case XenbusStateConnected: 1572 xbd_initialize(sc); 1573 xbd_connect(sc); 1574 break; 1575 1576 case XenbusStateClosing: 1577 if (sc->xbd_users > 0) 1578 xenbus_dev_error(dev, -EBUSY, 1579 "Device in use; refusing to close"); 1580 else 1581 xbd_closing(dev); 1582 break; 1583 } 1584} 1585 1586/*---------------------------- NewBus Registration ---------------------------*/ 1587static device_method_t xbd_methods[] = { 1588 /* Device interface */ 1589 DEVMETHOD(device_probe, xbd_probe), 1590 DEVMETHOD(device_attach, xbd_attach), 1591 DEVMETHOD(device_detach, xbd_detach), 1592 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1593 DEVMETHOD(device_suspend, xbd_suspend), 1594 DEVMETHOD(device_resume, xbd_resume), 1595 1596 /* Xenbus interface */ 1597 DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed), 1598 1599 { 0, 0 } 1600}; 1601 1602static driver_t xbd_driver = { 1603 "xbd", 1604 xbd_methods, 1605 sizeof(struct xbd_softc), 1606}; 1607devclass_t xbd_devclass; 1608 1609DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); 1610