blkfront.c revision 287802
1/* 2 * XenBSD block device driver 3 * 4 * Copyright (c) 2010-2013 Spectra Logic Corporation 5 * Copyright (c) 2009 Scott Long, Yahoo! 6 * Copyright (c) 2009 Frank Suchomel, Citrix 7 * Copyright (c) 2009 Doug F. Rabson, Citrix 8 * Copyright (c) 2005 Kip Macy 9 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 10 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 11 * 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this software and associated documentation files (the "Software"), to 15 * deal in the Software without restriction, including without limitation the 16 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 17 * sell copies of the Software, and to permit persons to whom the Software is 18 * furnished to do so, subject to the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 * DEALINGS IN THE SOFTWARE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: stable/10/sys/dev/xen/blkfront/blkfront.c 287802 2015-09-14 19:37:51Z cperciva $"); 33 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/malloc.h> 37#include <sys/kernel.h> 38#include <vm/vm.h> 39#include <vm/pmap.h> 40 41#include <sys/bio.h> 42#include <sys/bus.h> 43#include <sys/conf.h> 44#include <sys/module.h> 45#include <sys/sysctl.h> 46 47#include <machine/bus.h> 48#include <sys/rman.h> 49#include <machine/resource.h> 50#include <machine/intr_machdep.h> 51#include <machine/vmparam.h> 52#include <sys/bus_dma.h> 53 54#include <xen/xen-os.h> 55#include <xen/hypervisor.h> 56#include <xen/xen_intr.h> 57#include <xen/gnttab.h> 58#include <xen/interface/grant_table.h> 59#include <xen/interface/io/protocols.h> 60#include <xen/xenbus/xenbusvar.h> 61 62#include <machine/_inttypes.h> 63#include <machine/xen/xenvar.h> 64 65#include <geom/geom_disk.h> 66 67#include <dev/xen/blkfront/block.h> 68 69#include "xenbus_if.h" 70 71/*--------------------------- Forward Declarations ---------------------------*/ 72static void xbd_closing(device_t); 73static void xbd_startio(struct xbd_softc *sc); 74 75/*---------------------------------- Macros ----------------------------------*/ 76#if 0 77#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) 78#else 79#define DPRINTK(fmt, args...) 80#endif 81 82#define XBD_SECTOR_SHFT 9 83 84/*---------------------------- Global Static Data ----------------------------*/ 85static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); 86 87static int xbd_enable_indirect = 1; 88SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters"); 89SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, 90 &xbd_enable_indirect, 0, "Enable xbd indirect segments"); 91 92/*---------------------------- Command Processing ----------------------------*/ 93static void 94xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) 95{ 96 if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0) 97 return; 98 99 sc->xbd_flags |= xbd_flag; 100 sc->xbd_qfrozen_cnt++; 101} 102 103static void 104xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) 105{ 106 if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0) 107 return; 108 109 if (sc->xbd_qfrozen_cnt == 0) 110 panic("%s: Thaw with flag 0x%x while not frozen.", 111 __func__, xbd_flag); 112 113 sc->xbd_flags &= ~xbd_flag; 114 sc->xbd_qfrozen_cnt--; 115} 116 117static void 118xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) 119{ 120 if ((cm->cm_flags & XBDCF_FROZEN) != 0) 121 return; 122 123 cm->cm_flags |= XBDCF_FROZEN|cm_flag; 124 xbd_freeze(sc, XBDF_NONE); 125} 126 127static void 128xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) 129{ 130 if ((cm->cm_flags & XBDCF_FROZEN) == 0) 131 return; 132 133 cm->cm_flags &= ~XBDCF_FROZEN; 134 xbd_thaw(sc, XBDF_NONE); 135} 136 137static inline void 138xbd_flush_requests(struct xbd_softc *sc) 139{ 140 int notify; 141 142 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify); 143 144 if (notify) 145 xen_intr_signal(sc->xen_intr_handle); 146} 147 148static void 149xbd_free_command(struct xbd_command *cm) 150{ 151 152 KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE, 153 ("Freeing command that is still on queue %d.", 154 cm->cm_flags & XBDCF_Q_MASK)); 155 156 cm->cm_flags = XBDCF_INITIALIZER; 157 cm->cm_bp = NULL; 158 cm->cm_complete = NULL; 159 xbd_enqueue_cm(cm, XBD_Q_FREE); 160 xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE); 161} 162 163static void 164xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, 165 grant_ref_t * gref_head, int otherend_id, int readonly, 166 grant_ref_t * sg_ref, blkif_request_segment_t * sg) 167{ 168 struct blkif_request_segment *last_block_sg = sg + nsegs; 169 vm_paddr_t buffer_ma; 170 uint64_t fsect, lsect; 171 int ref; 172 173 while (sg < last_block_sg) { 174 buffer_ma = segs->ds_addr; 175 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; 176 lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; 177 178 KASSERT(lsect <= 7, ("XEN disk driver data cannot " 179 "cross a page boundary")); 180 181 /* install a grant reference. */ 182 ref = gnttab_claim_grant_reference(gref_head); 183 184 /* 185 * GNTTAB_LIST_END == 0xffffffff, but it is private 186 * to gnttab.c. 187 */ 188 KASSERT(ref != ~0, ("grant_reference failed")); 189 190 gnttab_grant_foreign_access_ref( 191 ref, 192 otherend_id, 193 buffer_ma >> PAGE_SHIFT, 194 readonly); 195 196 *sg_ref = ref; 197 *sg = (struct blkif_request_segment) { 198 .gref = ref, 199 .first_sect = fsect, 200 .last_sect = lsect 201 }; 202 sg++; 203 sg_ref++; 204 segs++; 205 } 206} 207 208static void 209xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 210{ 211 struct xbd_softc *sc; 212 struct xbd_command *cm; 213 int op; 214 215 cm = arg; 216 sc = cm->cm_sc; 217 218 if (error) { 219 cm->cm_bp->bio_error = EIO; 220 biodone(cm->cm_bp); 221 xbd_free_command(cm); 222 return; 223 } 224 225 KASSERT(nsegs <= sc->xbd_max_request_segments, 226 ("Too many segments in a blkfront I/O")); 227 228 if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) { 229 blkif_request_t *ring_req; 230 231 /* Fill out a blkif_request_t structure. */ 232 ring_req = (blkif_request_t *) 233 RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); 234 sc->xbd_ring.req_prod_pvt++; 235 ring_req->id = cm->cm_id; 236 ring_req->operation = cm->cm_operation; 237 ring_req->sector_number = cm->cm_sector_number; 238 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; 239 ring_req->nr_segments = nsegs; 240 cm->cm_nseg = nsegs; 241 xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, 242 xenbus_get_otherend_id(sc->xbd_dev), 243 cm->cm_operation == BLKIF_OP_WRITE, 244 cm->cm_sg_refs, ring_req->seg); 245 } else { 246 blkif_request_indirect_t *ring_req; 247 248 /* Fill out a blkif_request_indirect_t structure. */ 249 ring_req = (blkif_request_indirect_t *) 250 RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); 251 sc->xbd_ring.req_prod_pvt++; 252 ring_req->id = cm->cm_id; 253 ring_req->operation = BLKIF_OP_INDIRECT; 254 ring_req->indirect_op = cm->cm_operation; 255 ring_req->sector_number = cm->cm_sector_number; 256 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; 257 ring_req->nr_segments = nsegs; 258 cm->cm_nseg = nsegs; 259 xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, 260 xenbus_get_otherend_id(sc->xbd_dev), 261 cm->cm_operation == BLKIF_OP_WRITE, 262 cm->cm_sg_refs, cm->cm_indirectionpages); 263 memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs, 264 sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages); 265 } 266 267 if (cm->cm_operation == BLKIF_OP_READ) 268 op = BUS_DMASYNC_PREREAD; 269 else if (cm->cm_operation == BLKIF_OP_WRITE) 270 op = BUS_DMASYNC_PREWRITE; 271 else 272 op = 0; 273 bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); 274 275 gnttab_free_grant_references(cm->cm_gref_head); 276 277 xbd_enqueue_cm(cm, XBD_Q_BUSY); 278 279 /* 280 * If bus dma had to asynchronously call us back to dispatch 281 * this command, we are no longer executing in the context of 282 * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to 283 * xbd_flush_requests() to publish this command to the backend 284 * along with any other commands that it could batch. 285 */ 286 if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0) 287 xbd_flush_requests(sc); 288 289 return; 290} 291 292static int 293xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) 294{ 295 int error; 296 297 error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data, 298 cm->cm_datalen, xbd_queue_cb, cm, 0); 299 if (error == EINPROGRESS) { 300 /* 301 * Maintain queuing order by freezing the queue. The next 302 * command may not require as many resources as the command 303 * we just attempted to map, so we can't rely on bus dma 304 * blocking for it too. 305 */ 306 xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); 307 return (0); 308 } 309 310 return (error); 311} 312 313static void 314xbd_restart_queue_callback(void *arg) 315{ 316 struct xbd_softc *sc = arg; 317 318 mtx_lock(&sc->xbd_io_lock); 319 320 xbd_thaw(sc, XBDF_GNT_SHORTAGE); 321 322 xbd_startio(sc); 323 324 mtx_unlock(&sc->xbd_io_lock); 325} 326 327static struct xbd_command * 328xbd_bio_command(struct xbd_softc *sc) 329{ 330 struct xbd_command *cm; 331 struct bio *bp; 332 333 if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED)) 334 return (NULL); 335 336 bp = xbd_dequeue_bio(sc); 337 if (bp == NULL) 338 return (NULL); 339 340 if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) { 341 xbd_freeze(sc, XBDF_CM_SHORTAGE); 342 xbd_requeue_bio(sc, bp); 343 return (NULL); 344 } 345 346 if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, 347 &cm->cm_gref_head) != 0) { 348 gnttab_request_free_callback(&sc->xbd_callback, 349 xbd_restart_queue_callback, sc, 350 sc->xbd_max_request_segments); 351 xbd_freeze(sc, XBDF_GNT_SHORTAGE); 352 xbd_requeue_bio(sc, bp); 353 xbd_enqueue_cm(cm, XBD_Q_FREE); 354 return (NULL); 355 } 356 357 cm->cm_bp = bp; 358 cm->cm_data = bp->bio_data; 359 cm->cm_datalen = bp->bio_bcount; 360 cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; 361 362 switch (bp->bio_cmd) { 363 case BIO_READ: 364 cm->cm_operation = BLKIF_OP_READ; 365 break; 366 case BIO_WRITE: 367 cm->cm_operation = BLKIF_OP_WRITE; 368 if ((bp->bio_flags & BIO_ORDERED) != 0) { 369 if ((sc->xbd_flags & XBDF_BARRIER) != 0) { 370 cm->cm_operation = BLKIF_OP_WRITE_BARRIER; 371 } else { 372 /* 373 * Single step this command. 374 */ 375 cm->cm_flags |= XBDCF_Q_FREEZE; 376 if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 377 /* 378 * Wait for in-flight requests to 379 * finish. 380 */ 381 xbd_freeze(sc, XBDF_WAIT_IDLE); 382 xbd_requeue_cm(cm, XBD_Q_READY); 383 return (NULL); 384 } 385 } 386 } 387 break; 388 case BIO_FLUSH: 389 if ((sc->xbd_flags & XBDF_FLUSH) != 0) 390 cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; 391 else if ((sc->xbd_flags & XBDF_BARRIER) != 0) 392 cm->cm_operation = BLKIF_OP_WRITE_BARRIER; 393 else 394 panic("flush request, but no flush support available"); 395 break; 396 default: 397 panic("unknown bio command %d", bp->bio_cmd); 398 } 399 400 return (cm); 401} 402 403/* 404 * Dequeue buffers and place them in the shared communication ring. 405 * Return when no more requests can be accepted or all buffers have 406 * been queued. 407 * 408 * Signal XEN once the ring has been filled out. 409 */ 410static void 411xbd_startio(struct xbd_softc *sc) 412{ 413 struct xbd_command *cm; 414 int error, queued = 0; 415 416 mtx_assert(&sc->xbd_io_lock, MA_OWNED); 417 418 if (sc->xbd_state != XBD_STATE_CONNECTED) 419 return; 420 421 while (!RING_FULL(&sc->xbd_ring)) { 422 423 if (sc->xbd_qfrozen_cnt != 0) 424 break; 425 426 cm = xbd_dequeue_cm(sc, XBD_Q_READY); 427 428 if (cm == NULL) 429 cm = xbd_bio_command(sc); 430 431 if (cm == NULL) 432 break; 433 434 if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { 435 /* 436 * Single step command. Future work is 437 * held off until this command completes. 438 */ 439 xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); 440 } 441 442 if ((error = xbd_queue_request(sc, cm)) != 0) { 443 printf("xbd_queue_request returned %d\n", error); 444 break; 445 } 446 queued++; 447 } 448 449 if (queued != 0) 450 xbd_flush_requests(sc); 451} 452 453static void 454xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm) 455{ 456 struct bio *bp; 457 458 bp = cm->cm_bp; 459 460 if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) { 461 disk_err(bp, "disk error" , -1, 0); 462 printf(" status: %x\n", cm->cm_status); 463 bp->bio_flags |= BIO_ERROR; 464 } 465 466 if (bp->bio_flags & BIO_ERROR) 467 bp->bio_error = EIO; 468 else 469 bp->bio_resid = 0; 470 471 xbd_free_command(cm); 472 biodone(bp); 473} 474 475static void 476xbd_int(void *xsc) 477{ 478 struct xbd_softc *sc = xsc; 479 struct xbd_command *cm; 480 blkif_response_t *bret; 481 RING_IDX i, rp; 482 int op; 483 484 mtx_lock(&sc->xbd_io_lock); 485 486 if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) { 487 mtx_unlock(&sc->xbd_io_lock); 488 return; 489 } 490 491 again: 492 rp = sc->xbd_ring.sring->rsp_prod; 493 rmb(); /* Ensure we see queued responses up to 'rp'. */ 494 495 for (i = sc->xbd_ring.rsp_cons; i != rp;) { 496 bret = RING_GET_RESPONSE(&sc->xbd_ring, i); 497 cm = &sc->xbd_shadow[bret->id]; 498 499 xbd_remove_cm(cm, XBD_Q_BUSY); 500 gnttab_end_foreign_access_references(cm->cm_nseg, 501 cm->cm_sg_refs); 502 i++; 503 504 if (cm->cm_operation == BLKIF_OP_READ) 505 op = BUS_DMASYNC_POSTREAD; 506 else if (cm->cm_operation == BLKIF_OP_WRITE || 507 cm->cm_operation == BLKIF_OP_WRITE_BARRIER) 508 op = BUS_DMASYNC_POSTWRITE; 509 else 510 op = 0; 511 bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); 512 bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map); 513 514 /* 515 * Release any hold this command has on future command 516 * dispatch. 517 */ 518 xbd_cm_thaw(sc, cm); 519 520 /* 521 * Directly call the i/o complete routine to save an 522 * an indirection in the common case. 523 */ 524 cm->cm_status = bret->status; 525 if (cm->cm_bp) 526 xbd_bio_complete(sc, cm); 527 else if (cm->cm_complete != NULL) 528 cm->cm_complete(cm); 529 else 530 xbd_free_command(cm); 531 } 532 533 sc->xbd_ring.rsp_cons = i; 534 535 if (i != sc->xbd_ring.req_prod_pvt) { 536 int more_to_do; 537 RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do); 538 if (more_to_do) 539 goto again; 540 } else { 541 sc->xbd_ring.sring->rsp_event = i + 1; 542 } 543 544 if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) 545 xbd_thaw(sc, XBDF_WAIT_IDLE); 546 547 xbd_startio(sc); 548 549 if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED)) 550 wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]); 551 552 mtx_unlock(&sc->xbd_io_lock); 553} 554 555/*------------------------------- Dump Support -------------------------------*/ 556/** 557 * Quiesce the disk writes for a dump file before allowing the next buffer. 558 */ 559static void 560xbd_quiesce(struct xbd_softc *sc) 561{ 562 int mtd; 563 564 // While there are outstanding requests 565 while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 566 RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); 567 if (mtd) { 568 /* Recieved request completions, update queue. */ 569 xbd_int(sc); 570 } 571 if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 572 /* 573 * Still pending requests, wait for the disk i/o 574 * to complete. 575 */ 576 HYPERVISOR_yield(); 577 } 578 } 579} 580 581/* Kernel dump function for a paravirtualized disk device */ 582static void 583xbd_dump_complete(struct xbd_command *cm) 584{ 585 586 xbd_enqueue_cm(cm, XBD_Q_COMPLETE); 587} 588 589static int 590xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 591 size_t length) 592{ 593 struct disk *dp = arg; 594 struct xbd_softc *sc = dp->d_drv1; 595 struct xbd_command *cm; 596 size_t chunk; 597 int sbp; 598 int rc = 0; 599 600 if (length <= 0) 601 return (rc); 602 603 xbd_quiesce(sc); /* All quiet on the western front. */ 604 605 /* 606 * If this lock is held, then this module is failing, and a 607 * successful kernel dump is highly unlikely anyway. 608 */ 609 mtx_lock(&sc->xbd_io_lock); 610 611 /* Split the 64KB block as needed */ 612 for (sbp=0; length > 0; sbp++) { 613 cm = xbd_dequeue_cm(sc, XBD_Q_FREE); 614 if (cm == NULL) { 615 mtx_unlock(&sc->xbd_io_lock); 616 device_printf(sc->xbd_dev, "dump: no more commands?\n"); 617 return (EBUSY); 618 } 619 620 if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, 621 &cm->cm_gref_head) != 0) { 622 xbd_free_command(cm); 623 mtx_unlock(&sc->xbd_io_lock); 624 device_printf(sc->xbd_dev, "no more grant allocs?\n"); 625 return (EBUSY); 626 } 627 628 chunk = length > sc->xbd_max_request_size ? 629 sc->xbd_max_request_size : length; 630 cm->cm_data = virtual; 631 cm->cm_datalen = chunk; 632 cm->cm_operation = BLKIF_OP_WRITE; 633 cm->cm_sector_number = offset / dp->d_sectorsize; 634 cm->cm_complete = xbd_dump_complete; 635 636 xbd_enqueue_cm(cm, XBD_Q_READY); 637 638 length -= chunk; 639 offset += chunk; 640 virtual = (char *) virtual + chunk; 641 } 642 643 /* Tell DOM0 to do the I/O */ 644 xbd_startio(sc); 645 mtx_unlock(&sc->xbd_io_lock); 646 647 /* Poll for the completion. */ 648 xbd_quiesce(sc); /* All quite on the eastern front */ 649 650 /* If there were any errors, bail out... */ 651 while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) { 652 if (cm->cm_status != BLKIF_RSP_OKAY) { 653 device_printf(sc->xbd_dev, 654 "Dump I/O failed at sector %jd\n", 655 cm->cm_sector_number); 656 rc = EIO; 657 } 658 xbd_free_command(cm); 659 } 660 661 return (rc); 662} 663 664/*----------------------------- Disk Entrypoints -----------------------------*/ 665static int 666xbd_open(struct disk *dp) 667{ 668 struct xbd_softc *sc = dp->d_drv1; 669 670 if (sc == NULL) { 671 printf("xb%d: not found", sc->xbd_unit); 672 return (ENXIO); 673 } 674 675 sc->xbd_flags |= XBDF_OPEN; 676 sc->xbd_users++; 677 return (0); 678} 679 680static int 681xbd_close(struct disk *dp) 682{ 683 struct xbd_softc *sc = dp->d_drv1; 684 685 if (sc == NULL) 686 return (ENXIO); 687 sc->xbd_flags &= ~XBDF_OPEN; 688 if (--(sc->xbd_users) == 0) { 689 /* 690 * Check whether we have been instructed to close. We will 691 * have ignored this request initially, as the device was 692 * still mounted. 693 */ 694 if (xenbus_get_otherend_state(sc->xbd_dev) == 695 XenbusStateClosing) 696 xbd_closing(sc->xbd_dev); 697 } 698 return (0); 699} 700 701static int 702xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) 703{ 704 struct xbd_softc *sc = dp->d_drv1; 705 706 if (sc == NULL) 707 return (ENXIO); 708 709 return (ENOTTY); 710} 711 712/* 713 * Read/write routine for a buffer. Finds the proper unit, place it on 714 * the sortq and kick the controller. 715 */ 716static void 717xbd_strategy(struct bio *bp) 718{ 719 struct xbd_softc *sc = bp->bio_disk->d_drv1; 720 721 /* bogus disk? */ 722 if (sc == NULL) { 723 bp->bio_error = EINVAL; 724 bp->bio_flags |= BIO_ERROR; 725 bp->bio_resid = bp->bio_bcount; 726 biodone(bp); 727 return; 728 } 729 730 /* 731 * Place it in the queue of disk activities for this disk 732 */ 733 mtx_lock(&sc->xbd_io_lock); 734 735 xbd_enqueue_bio(sc, bp); 736 xbd_startio(sc); 737 738 mtx_unlock(&sc->xbd_io_lock); 739 return; 740} 741 742/*------------------------------ Ring Management -----------------------------*/ 743static int 744xbd_alloc_ring(struct xbd_softc *sc) 745{ 746 blkif_sring_t *sring; 747 uintptr_t sring_page_addr; 748 int error; 749 int i; 750 751 sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, 752 M_NOWAIT|M_ZERO); 753 if (sring == NULL) { 754 xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring"); 755 return (ENOMEM); 756 } 757 SHARED_RING_INIT(sring); 758 FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE); 759 760 for (i = 0, sring_page_addr = (uintptr_t)sring; 761 i < sc->xbd_ring_pages; 762 i++, sring_page_addr += PAGE_SIZE) { 763 764 error = xenbus_grant_ring(sc->xbd_dev, 765 (vtomach(sring_page_addr) >> PAGE_SHIFT), 766 &sc->xbd_ring_ref[i]); 767 if (error) { 768 xenbus_dev_fatal(sc->xbd_dev, error, 769 "granting ring_ref(%d)", i); 770 return (error); 771 } 772 } 773 if (sc->xbd_ring_pages == 1) { 774 error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), 775 "ring-ref", "%u", sc->xbd_ring_ref[0]); 776 if (error) { 777 xenbus_dev_fatal(sc->xbd_dev, error, 778 "writing %s/ring-ref", 779 xenbus_get_node(sc->xbd_dev)); 780 return (error); 781 } 782 } else { 783 for (i = 0; i < sc->xbd_ring_pages; i++) { 784 char ring_ref_name[]= "ring_refXX"; 785 786 snprintf(ring_ref_name, sizeof(ring_ref_name), 787 "ring-ref%u", i); 788 error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), 789 ring_ref_name, "%u", sc->xbd_ring_ref[i]); 790 if (error) { 791 xenbus_dev_fatal(sc->xbd_dev, error, 792 "writing %s/%s", 793 xenbus_get_node(sc->xbd_dev), 794 ring_ref_name); 795 return (error); 796 } 797 } 798 } 799 800 error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev, 801 xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc, 802 INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); 803 if (error) { 804 xenbus_dev_fatal(sc->xbd_dev, error, 805 "xen_intr_alloc_and_bind_local_port failed"); 806 return (error); 807 } 808 809 return (0); 810} 811 812static void 813xbd_free_ring(struct xbd_softc *sc) 814{ 815 int i; 816 817 if (sc->xbd_ring.sring == NULL) 818 return; 819 820 for (i = 0; i < sc->xbd_ring_pages; i++) { 821 if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) { 822 gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]); 823 sc->xbd_ring_ref[i] = GRANT_REF_INVALID; 824 } 825 } 826 free(sc->xbd_ring.sring, M_XENBLOCKFRONT); 827 sc->xbd_ring.sring = NULL; 828} 829 830/*-------------------------- Initialization/Teardown -------------------------*/ 831static int 832xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) 833{ 834 struct sbuf sb; 835 int feature_cnt; 836 837 sbuf_new(&sb, features, len, SBUF_FIXEDLEN); 838 839 feature_cnt = 0; 840 if ((sc->xbd_flags & XBDF_FLUSH) != 0) { 841 sbuf_printf(&sb, "flush"); 842 feature_cnt++; 843 } 844 845 if ((sc->xbd_flags & XBDF_BARRIER) != 0) { 846 if (feature_cnt != 0) 847 sbuf_printf(&sb, ", "); 848 sbuf_printf(&sb, "write_barrier"); 849 feature_cnt++; 850 } 851 852 (void) sbuf_finish(&sb); 853 return (sbuf_len(&sb)); 854} 855 856static int 857xbd_sysctl_features(SYSCTL_HANDLER_ARGS) 858{ 859 char features[80]; 860 struct xbd_softc *sc = arg1; 861 int error; 862 int len; 863 864 error = sysctl_wire_old_buffer(req, 0); 865 if (error != 0) 866 return (error); 867 868 len = xbd_feature_string(sc, features, sizeof(features)); 869 870 /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ 871 return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); 872} 873 874static void 875xbd_setup_sysctl(struct xbd_softc *xbd) 876{ 877 struct sysctl_ctx_list *sysctl_ctx = NULL; 878 struct sysctl_oid *sysctl_tree = NULL; 879 struct sysctl_oid_list *children; 880 881 sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); 882 if (sysctl_ctx == NULL) 883 return; 884 885 sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev); 886 if (sysctl_tree == NULL) 887 return; 888 889 children = SYSCTL_CHILDREN(sysctl_tree); 890 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 891 "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, 892 "maximum outstanding requests (negotiated)"); 893 894 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 895 "max_request_segments", CTLFLAG_RD, 896 &xbd->xbd_max_request_segments, 0, 897 "maximum number of pages per requests (negotiated)"); 898 899 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 900 "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, 901 "maximum size in bytes of a request (negotiated)"); 902 903 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, 904 "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, 905 "communication channel pages (negotiated)"); 906 907 SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, 908 "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, 909 xbd_sysctl_features, "A", "protocol features (negotiated)"); 910} 911 912/* 913 * Translate Linux major/minor to an appropriate name and unit 914 * number. For HVM guests, this allows us to use the same drive names 915 * with blkfront as the emulated drives, easing transition slightly. 916 */ 917static void 918xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name) 919{ 920 static struct vdev_info { 921 int major; 922 int shift; 923 int base; 924 const char *name; 925 } info[] = { 926 {3, 6, 0, "ada"}, /* ide0 */ 927 {22, 6, 2, "ada"}, /* ide1 */ 928 {33, 6, 4, "ada"}, /* ide2 */ 929 {34, 6, 6, "ada"}, /* ide3 */ 930 {56, 6, 8, "ada"}, /* ide4 */ 931 {57, 6, 10, "ada"}, /* ide5 */ 932 {88, 6, 12, "ada"}, /* ide6 */ 933 {89, 6, 14, "ada"}, /* ide7 */ 934 {90, 6, 16, "ada"}, /* ide8 */ 935 {91, 6, 18, "ada"}, /* ide9 */ 936 937 {8, 4, 0, "da"}, /* scsi disk0 */ 938 {65, 4, 16, "da"}, /* scsi disk1 */ 939 {66, 4, 32, "da"}, /* scsi disk2 */ 940 {67, 4, 48, "da"}, /* scsi disk3 */ 941 {68, 4, 64, "da"}, /* scsi disk4 */ 942 {69, 4, 80, "da"}, /* scsi disk5 */ 943 {70, 4, 96, "da"}, /* scsi disk6 */ 944 {71, 4, 112, "da"}, /* scsi disk7 */ 945 {128, 4, 128, "da"}, /* scsi disk8 */ 946 {129, 4, 144, "da"}, /* scsi disk9 */ 947 {130, 4, 160, "da"}, /* scsi disk10 */ 948 {131, 4, 176, "da"}, /* scsi disk11 */ 949 {132, 4, 192, "da"}, /* scsi disk12 */ 950 {133, 4, 208, "da"}, /* scsi disk13 */ 951 {134, 4, 224, "da"}, /* scsi disk14 */ 952 {135, 4, 240, "da"}, /* scsi disk15 */ 953 954 {202, 4, 0, "xbd"}, /* xbd */ 955 956 {0, 0, 0, NULL}, 957 }; 958 int major = vdevice >> 8; 959 int minor = vdevice & 0xff; 960 int i; 961 962 if (vdevice & (1 << 28)) { 963 *unit = (vdevice & ((1 << 28) - 1)) >> 8; 964 *name = "xbd"; 965 return; 966 } 967 968 for (i = 0; info[i].major; i++) { 969 if (info[i].major == major) { 970 *unit = info[i].base + (minor >> info[i].shift); 971 *name = info[i].name; 972 return; 973 } 974 } 975 976 *unit = minor >> 4; 977 *name = "xbd"; 978} 979 980int 981xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, 982 int vdevice, uint16_t vdisk_info, unsigned long sector_size) 983{ 984 char features[80]; 985 int unit, error = 0; 986 const char *name; 987 988 xbd_vdevice_to_unit(vdevice, &unit, &name); 989 990 sc->xbd_unit = unit; 991 992 if (strcmp(name, "xbd") != 0) 993 device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); 994 995 if (xbd_feature_string(sc, features, sizeof(features)) > 0) { 996 device_printf(sc->xbd_dev, "features: %s\n", 997 features); 998 } 999 1000 sc->xbd_disk = disk_alloc(); 1001 sc->xbd_disk->d_unit = sc->xbd_unit; 1002 sc->xbd_disk->d_open = xbd_open; 1003 sc->xbd_disk->d_close = xbd_close; 1004 sc->xbd_disk->d_ioctl = xbd_ioctl; 1005 sc->xbd_disk->d_strategy = xbd_strategy; 1006 sc->xbd_disk->d_dump = xbd_dump; 1007 sc->xbd_disk->d_name = name; 1008 sc->xbd_disk->d_drv1 = sc; 1009 sc->xbd_disk->d_sectorsize = sector_size; 1010 1011 sc->xbd_disk->d_mediasize = sectors * sector_size; 1012 sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; 1013 sc->xbd_disk->d_flags = 0; 1014 if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { 1015 sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; 1016 device_printf(sc->xbd_dev, 1017 "synchronize cache commands enabled.\n"); 1018 } 1019 disk_create(sc->xbd_disk, DISK_VERSION); 1020 1021 return error; 1022} 1023 1024static void 1025xbd_free(struct xbd_softc *sc) 1026{ 1027 int i; 1028 1029 /* Prevent new requests being issued until we fix things up. */ 1030 mtx_lock(&sc->xbd_io_lock); 1031 sc->xbd_state = XBD_STATE_DISCONNECTED; 1032 mtx_unlock(&sc->xbd_io_lock); 1033 1034 /* Free resources associated with old device channel. */ 1035 xbd_free_ring(sc); 1036 if (sc->xbd_shadow) { 1037 1038 for (i = 0; i < sc->xbd_max_requests; i++) { 1039 struct xbd_command *cm; 1040 1041 cm = &sc->xbd_shadow[i]; 1042 if (cm->cm_sg_refs != NULL) { 1043 free(cm->cm_sg_refs, M_XENBLOCKFRONT); 1044 cm->cm_sg_refs = NULL; 1045 } 1046 1047 if (cm->cm_indirectionpages != NULL) { 1048 gnttab_end_foreign_access_references( 1049 sc->xbd_max_request_indirectpages, 1050 &cm->cm_indirectionrefs[0]); 1051 contigfree(cm->cm_indirectionpages, PAGE_SIZE * 1052 sc->xbd_max_request_indirectpages, 1053 M_XENBLOCKFRONT); 1054 cm->cm_indirectionpages = NULL; 1055 } 1056 1057 bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); 1058 } 1059 free(sc->xbd_shadow, M_XENBLOCKFRONT); 1060 sc->xbd_shadow = NULL; 1061 1062 bus_dma_tag_destroy(sc->xbd_io_dmat); 1063 1064 xbd_initq_cm(sc, XBD_Q_FREE); 1065 xbd_initq_cm(sc, XBD_Q_READY); 1066 xbd_initq_cm(sc, XBD_Q_COMPLETE); 1067 } 1068 1069 xen_intr_unbind(&sc->xen_intr_handle); 1070 1071} 1072 1073/*--------------------------- State Change Handlers --------------------------*/ 1074static void 1075xbd_initialize(struct xbd_softc *sc) 1076{ 1077 const char *otherend_path; 1078 const char *node_path; 1079 uint32_t max_ring_page_order; 1080 int error; 1081 1082 if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { 1083 /* Initialization has already been performed. */ 1084 return; 1085 } 1086 1087 /* 1088 * Protocol defaults valid even if negotiation for a 1089 * setting fails. 1090 */ 1091 max_ring_page_order = 0; 1092 sc->xbd_ring_pages = 1; 1093 1094 /* 1095 * Protocol negotiation. 1096 * 1097 * \note xs_gather() returns on the first encountered error, so 1098 * we must use independant calls in order to guarantee 1099 * we don't miss information in a sparsly populated back-end 1100 * tree. 1101 * 1102 * \note xs_scanf() does not update variables for unmatched 1103 * fields. 1104 */ 1105 otherend_path = xenbus_get_otherend_path(sc->xbd_dev); 1106 node_path = xenbus_get_node(sc->xbd_dev); 1107 1108 /* Support both backend schemes for relaying ring page limits. */ 1109 (void)xs_scanf(XST_NIL, otherend_path, 1110 "max-ring-page-order", NULL, "%" PRIu32, 1111 &max_ring_page_order); 1112 sc->xbd_ring_pages = 1 << max_ring_page_order; 1113 (void)xs_scanf(XST_NIL, otherend_path, 1114 "max-ring-pages", NULL, "%" PRIu32, 1115 &sc->xbd_ring_pages); 1116 if (sc->xbd_ring_pages < 1) 1117 sc->xbd_ring_pages = 1; 1118 1119 if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) { 1120 device_printf(sc->xbd_dev, 1121 "Back-end specified ring-pages of %u " 1122 "limited to front-end limit of %u.\n", 1123 sc->xbd_ring_pages, XBD_MAX_RING_PAGES); 1124 sc->xbd_ring_pages = XBD_MAX_RING_PAGES; 1125 } 1126 1127 if (powerof2(sc->xbd_ring_pages) == 0) { 1128 uint32_t new_page_limit; 1129 1130 new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1); 1131 device_printf(sc->xbd_dev, 1132 "Back-end specified ring-pages of %u " 1133 "is not a power of 2. Limited to %u.\n", 1134 sc->xbd_ring_pages, new_page_limit); 1135 sc->xbd_ring_pages = new_page_limit; 1136 } 1137 1138 sc->xbd_max_requests = 1139 BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE); 1140 if (sc->xbd_max_requests > XBD_MAX_REQUESTS) { 1141 device_printf(sc->xbd_dev, 1142 "Back-end specified max_requests of %u " 1143 "limited to front-end limit of %zu.\n", 1144 sc->xbd_max_requests, XBD_MAX_REQUESTS); 1145 sc->xbd_max_requests = XBD_MAX_REQUESTS; 1146 } 1147 1148 if (xbd_alloc_ring(sc) != 0) 1149 return; 1150 1151 /* Support both backend schemes for relaying ring page limits. */ 1152 if (sc->xbd_ring_pages > 1) { 1153 error = xs_printf(XST_NIL, node_path, 1154 "num-ring-pages","%u", 1155 sc->xbd_ring_pages); 1156 if (error) { 1157 xenbus_dev_fatal(sc->xbd_dev, error, 1158 "writing %s/num-ring-pages", 1159 node_path); 1160 return; 1161 } 1162 1163 error = xs_printf(XST_NIL, node_path, 1164 "ring-page-order", "%u", 1165 fls(sc->xbd_ring_pages) - 1); 1166 if (error) { 1167 xenbus_dev_fatal(sc->xbd_dev, error, 1168 "writing %s/ring-page-order", 1169 node_path); 1170 return; 1171 } 1172 } 1173 1174 error = xs_printf(XST_NIL, node_path, "event-channel", 1175 "%u", xen_intr_port(sc->xen_intr_handle)); 1176 if (error) { 1177 xenbus_dev_fatal(sc->xbd_dev, error, 1178 "writing %s/event-channel", 1179 node_path); 1180 return; 1181 } 1182 1183 error = xs_printf(XST_NIL, node_path, "protocol", 1184 "%s", XEN_IO_PROTO_ABI_NATIVE); 1185 if (error) { 1186 xenbus_dev_fatal(sc->xbd_dev, error, 1187 "writing %s/protocol", 1188 node_path); 1189 return; 1190 } 1191 1192 xenbus_set_state(sc->xbd_dev, XenbusStateInitialised); 1193} 1194 1195/* 1196 * Invoked when the backend is finally 'ready' (and has published 1197 * the details about the physical device - #sectors, size, etc). 1198 */ 1199static void 1200xbd_connect(struct xbd_softc *sc) 1201{ 1202 device_t dev = sc->xbd_dev; 1203 unsigned long sectors, sector_size; 1204 unsigned int binfo; 1205 int err, feature_barrier, feature_flush; 1206 int i, j; 1207 1208 if (sc->xbd_state == XBD_STATE_CONNECTED || 1209 sc->xbd_state == XBD_STATE_SUSPENDED) 1210 return; 1211 1212 DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); 1213 1214 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1215 "sectors", "%lu", §ors, 1216 "info", "%u", &binfo, 1217 "sector-size", "%lu", §or_size, 1218 NULL); 1219 if (err) { 1220 xenbus_dev_fatal(dev, err, 1221 "reading backend fields at %s", 1222 xenbus_get_otherend_path(dev)); 1223 return; 1224 } 1225 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1226 "feature-barrier", "%lu", &feature_barrier, 1227 NULL); 1228 if (err == 0 && feature_barrier != 0) 1229 sc->xbd_flags |= XBDF_BARRIER; 1230 1231 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1232 "feature-flush-cache", "%lu", &feature_flush, 1233 NULL); 1234 if (err == 0 && feature_flush != 0) 1235 sc->xbd_flags |= XBDF_FLUSH; 1236 1237 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), 1238 "feature-max-indirect-segments", "%" PRIu32, 1239 &sc->xbd_max_request_segments, NULL); 1240 if ((err != 0) || (xbd_enable_indirect == 0)) 1241 sc->xbd_max_request_segments = 0; 1242 if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS) 1243 sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS; 1244 if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS)) 1245 sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS); 1246 sc->xbd_max_request_indirectpages = 1247 XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments); 1248 if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) 1249 sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1250 sc->xbd_max_request_size = 1251 XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); 1252 1253 /* Allocate datastructures based on negotiated values. */ 1254 err = bus_dma_tag_create( 1255 bus_get_dma_tag(sc->xbd_dev), /* parent */ 1256 512, PAGE_SIZE, /* algnmnt, boundary */ 1257 BUS_SPACE_MAXADDR, /* lowaddr */ 1258 BUS_SPACE_MAXADDR, /* highaddr */ 1259 NULL, NULL, /* filter, filterarg */ 1260 sc->xbd_max_request_size, 1261 sc->xbd_max_request_segments, 1262 PAGE_SIZE, /* maxsegsize */ 1263 BUS_DMA_ALLOCNOW, /* flags */ 1264 busdma_lock_mutex, /* lockfunc */ 1265 &sc->xbd_io_lock, /* lockarg */ 1266 &sc->xbd_io_dmat); 1267 if (err != 0) { 1268 xenbus_dev_fatal(sc->xbd_dev, err, 1269 "Cannot allocate parent DMA tag\n"); 1270 return; 1271 } 1272 1273 /* Per-transaction data allocation. */ 1274 sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, 1275 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); 1276 if (sc->xbd_shadow == NULL) { 1277 bus_dma_tag_destroy(sc->xbd_io_dmat); 1278 xenbus_dev_fatal(sc->xbd_dev, ENOMEM, 1279 "Cannot allocate request structures\n"); 1280 return; 1281 } 1282 1283 for (i = 0; i < sc->xbd_max_requests; i++) { 1284 struct xbd_command *cm; 1285 void * indirectpages; 1286 1287 cm = &sc->xbd_shadow[i]; 1288 cm->cm_sg_refs = malloc( 1289 sizeof(grant_ref_t) * sc->xbd_max_request_segments, 1290 M_XENBLOCKFRONT, M_NOWAIT); 1291 if (cm->cm_sg_refs == NULL) 1292 break; 1293 cm->cm_id = i; 1294 cm->cm_flags = XBDCF_INITIALIZER; 1295 cm->cm_sc = sc; 1296 if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) 1297 break; 1298 if (sc->xbd_max_request_indirectpages > 0) { 1299 indirectpages = contigmalloc( 1300 PAGE_SIZE * sc->xbd_max_request_indirectpages, 1301 M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0); 1302 } else { 1303 indirectpages = NULL; 1304 } 1305 for (j = 0; j < sc->xbd_max_request_indirectpages; j++) { 1306 if (gnttab_grant_foreign_access( 1307 xenbus_get_otherend_id(sc->xbd_dev), 1308 (vtomach(indirectpages) >> PAGE_SHIFT) + j, 1309 1 /* grant read-only access */, 1310 &cm->cm_indirectionrefs[j])) 1311 break; 1312 } 1313 if (j < sc->xbd_max_request_indirectpages) 1314 break; 1315 cm->cm_indirectionpages = indirectpages; 1316 xbd_free_command(cm); 1317 } 1318 1319 if (sc->xbd_disk == NULL) { 1320 device_printf(dev, "%juMB <%s> at %s", 1321 (uintmax_t) sectors / (1048576 / sector_size), 1322 device_get_desc(dev), 1323 xenbus_get_node(dev)); 1324 bus_print_child_footer(device_get_parent(dev), dev); 1325 1326 xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo, 1327 sector_size); 1328 } 1329 1330 (void)xenbus_set_state(dev, XenbusStateConnected); 1331 1332 /* Kick pending requests. */ 1333 mtx_lock(&sc->xbd_io_lock); 1334 sc->xbd_state = XBD_STATE_CONNECTED; 1335 xbd_startio(sc); 1336 sc->xbd_flags |= XBDF_READY; 1337 mtx_unlock(&sc->xbd_io_lock); 1338} 1339 1340/** 1341 * Handle the change of state of the backend to Closing. We must delete our 1342 * device-layer structures now, to ensure that writes are flushed through to 1343 * the backend. Once this is done, we can switch to Closed in 1344 * acknowledgement. 1345 */ 1346static void 1347xbd_closing(device_t dev) 1348{ 1349 struct xbd_softc *sc = device_get_softc(dev); 1350 1351 xenbus_set_state(dev, XenbusStateClosing); 1352 1353 DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev)); 1354 1355 if (sc->xbd_disk != NULL) { 1356 disk_destroy(sc->xbd_disk); 1357 sc->xbd_disk = NULL; 1358 } 1359 1360 xenbus_set_state(dev, XenbusStateClosed); 1361} 1362 1363/*---------------------------- NewBus Entrypoints ----------------------------*/ 1364static int 1365xbd_probe(device_t dev) 1366{ 1367 if (strcmp(xenbus_get_type(dev), "vbd") != 0) 1368 return (ENXIO); 1369 1370 if (xen_hvm_domain()) { 1371 int error; 1372 char *type; 1373 1374 /* 1375 * When running in an HVM domain, IDE disk emulation is 1376 * disabled early in boot so that native drivers will 1377 * not see emulated hardware. However, CDROM device 1378 * emulation cannot be disabled. 1379 * 1380 * Through use of FreeBSD's vm_guest and xen_hvm_domain() 1381 * APIs, we could modify the native CDROM driver to fail its 1382 * probe when running under Xen. Unfortunatlely, the PV 1383 * CDROM support in XenServer (up through at least version 1384 * 6.2) isn't functional, so we instead rely on the emulated 1385 * CDROM instance, and fail to attach the PV one here in 1386 * the blkfront driver. 1387 */ 1388 error = xs_read(XST_NIL, xenbus_get_node(dev), 1389 "device-type", NULL, (void **) &type); 1390 if (error) 1391 return (ENXIO); 1392 1393 if (strncmp(type, "cdrom", 5) == 0) { 1394 free(type, M_XENSTORE); 1395 return (ENXIO); 1396 } 1397 free(type, M_XENSTORE); 1398 } 1399 1400 device_set_desc(dev, "Virtual Block Device"); 1401 device_quiet(dev); 1402 return (0); 1403} 1404 1405/* 1406 * Setup supplies the backend dir, virtual device. We place an event 1407 * channel and shared frame entries. We watch backend to wait if it's 1408 * ok. 1409 */ 1410static int 1411xbd_attach(device_t dev) 1412{ 1413 struct xbd_softc *sc; 1414 const char *name; 1415 uint32_t vdevice; 1416 int error; 1417 int i; 1418 int unit; 1419 1420 /* FIXME: Use dynamic device id if this is not set. */ 1421 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 1422 "virtual-device", NULL, "%" PRIu32, &vdevice); 1423 if (error) 1424 error = xs_scanf(XST_NIL, xenbus_get_node(dev), 1425 "virtual-device-ext", NULL, "%" PRIu32, &vdevice); 1426 if (error) { 1427 xenbus_dev_fatal(dev, error, "reading virtual-device"); 1428 device_printf(dev, "Couldn't determine virtual device.\n"); 1429 return (error); 1430 } 1431 1432 xbd_vdevice_to_unit(vdevice, &unit, &name); 1433 if (!strcmp(name, "xbd")) 1434 device_set_unit(dev, unit); 1435 1436 sc = device_get_softc(dev); 1437 mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF); 1438 xbd_initqs(sc); 1439 for (i = 0; i < XBD_MAX_RING_PAGES; i++) 1440 sc->xbd_ring_ref[i] = GRANT_REF_INVALID; 1441 1442 sc->xbd_dev = dev; 1443 sc->xbd_vdevice = vdevice; 1444 sc->xbd_state = XBD_STATE_DISCONNECTED; 1445 1446 xbd_setup_sysctl(sc); 1447 1448 /* Wait for backend device to publish its protocol capabilities. */ 1449 xenbus_set_state(dev, XenbusStateInitialising); 1450 1451 return (0); 1452} 1453 1454static int 1455xbd_detach(device_t dev) 1456{ 1457 struct xbd_softc *sc = device_get_softc(dev); 1458 1459 DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev)); 1460 1461 xbd_free(sc); 1462 mtx_destroy(&sc->xbd_io_lock); 1463 1464 return 0; 1465} 1466 1467static int 1468xbd_suspend(device_t dev) 1469{ 1470 struct xbd_softc *sc = device_get_softc(dev); 1471 int retval; 1472 int saved_state; 1473 1474 /* Prevent new requests being issued until we fix things up. */ 1475 mtx_lock(&sc->xbd_io_lock); 1476 saved_state = sc->xbd_state; 1477 sc->xbd_state = XBD_STATE_SUSPENDED; 1478 1479 /* Wait for outstanding I/O to drain. */ 1480 retval = 0; 1481 while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { 1482 if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, 1483 PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { 1484 retval = EBUSY; 1485 break; 1486 } 1487 } 1488 mtx_unlock(&sc->xbd_io_lock); 1489 1490 if (retval != 0) 1491 sc->xbd_state = saved_state; 1492 1493 return (retval); 1494} 1495 1496static int 1497xbd_resume(device_t dev) 1498{ 1499 struct xbd_softc *sc = device_get_softc(dev); 1500 1501 DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev)); 1502 1503 xbd_free(sc); 1504 xbd_initialize(sc); 1505 return (0); 1506} 1507 1508/** 1509 * Callback received when the backend's state changes. 1510 */ 1511static void 1512xbd_backend_changed(device_t dev, XenbusState backend_state) 1513{ 1514 struct xbd_softc *sc = device_get_softc(dev); 1515 1516 DPRINTK("backend_state=%d\n", backend_state); 1517 1518 switch (backend_state) { 1519 case XenbusStateUnknown: 1520 case XenbusStateInitialising: 1521 case XenbusStateReconfigured: 1522 case XenbusStateReconfiguring: 1523 case XenbusStateClosed: 1524 break; 1525 1526 case XenbusStateInitWait: 1527 case XenbusStateInitialised: 1528 xbd_initialize(sc); 1529 break; 1530 1531 case XenbusStateConnected: 1532 xbd_initialize(sc); 1533 xbd_connect(sc); 1534 break; 1535 1536 case XenbusStateClosing: 1537 if (sc->xbd_users > 0) 1538 xenbus_dev_error(dev, -EBUSY, 1539 "Device in use; refusing to close"); 1540 else 1541 xbd_closing(dev); 1542 break; 1543 } 1544} 1545 1546/*---------------------------- NewBus Registration ---------------------------*/ 1547static device_method_t xbd_methods[] = { 1548 /* Device interface */ 1549 DEVMETHOD(device_probe, xbd_probe), 1550 DEVMETHOD(device_attach, xbd_attach), 1551 DEVMETHOD(device_detach, xbd_detach), 1552 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1553 DEVMETHOD(device_suspend, xbd_suspend), 1554 DEVMETHOD(device_resume, xbd_resume), 1555 1556 /* Xenbus interface */ 1557 DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed), 1558 1559 { 0, 0 } 1560}; 1561 1562static driver_t xbd_driver = { 1563 "xbd", 1564 xbd_methods, 1565 sizeof(struct xbd_softc), 1566}; 1567devclass_t xbd_devclass; 1568 1569DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); 1570