linux_compat.c revision 271127
1/*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/malloc.h> 33#include <sys/kernel.h> 34#include <sys/sysctl.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/bus.h> 38#include <sys/fcntl.h> 39#include <sys/file.h> 40#include <sys/filio.h> 41#include <sys/rwlock.h> 42 43#include <vm/vm.h> 44#include <vm/pmap.h> 45 46#include <machine/stdarg.h> 47#include <machine/pmap.h> 48 49#include <linux/kobject.h> 50#include <linux/device.h> 51#include <linux/slab.h> 52#include <linux/module.h> 53#include <linux/cdev.h> 54#include <linux/file.h> 55#include <linux/sysfs.h> 56#include <linux/mm.h> 57#include <linux/io.h> 58#include <linux/vmalloc.h> 59 60#include <vm/vm_pager.h> 61 62MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 63 64#include <linux/rbtree.h> 65/* Undo Linux compat changes. */ 66#undef RB_ROOT 67#undef file 68#undef cdev 69#define RB_ROOT(head) (head)->rbh_root 70#undef LIST_HEAD 71/* From sys/queue.h */ 72#define LIST_HEAD(name, type) \ 73struct name { \ 74 struct type *lh_first; /* first element */ \ 75} 76 77struct kobject class_root; 78struct device linux_rootdev; 79struct class miscclass; 80struct list_head pci_drivers; 81struct list_head pci_devices; 82spinlock_t pci_lock; 83 84int 85panic_cmp(struct rb_node *one, struct rb_node *two) 86{ 87 panic("no cmp"); 88} 89 90RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 91 92int 93kobject_set_name(struct kobject *kobj, const char *fmt, ...) 94{ 95 va_list args; 96 int error; 97 98 va_start(args, fmt); 99 error = kobject_set_name_vargs(kobj, fmt, args); 100 va_end(args); 101 102 return (error); 103} 104 105static inline int 106kobject_add_complete(struct kobject *kobj, struct kobject *parent) 107{ 108 struct kobj_type *t; 109 int error; 110 111 kobj->parent = kobject_get(parent); 112 error = sysfs_create_dir(kobj); 113 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 114 struct attribute **attr; 115 t = kobj->ktype; 116 117 for (attr = t->default_attrs; *attr != NULL; attr++) { 118 error = sysfs_create_file(kobj, *attr); 119 if (error) 120 break; 121 } 122 if (error) 123 sysfs_remove_dir(kobj); 124 125 } 126 return (error); 127} 128 129int 130kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 131{ 132 va_list args; 133 int error; 134 135 va_start(args, fmt); 136 error = kobject_set_name_vargs(kobj, fmt, args); 137 va_end(args); 138 if (error) 139 return (error); 140 141 return kobject_add_complete(kobj, parent); 142} 143 144void 145kobject_release(struct kref *kref) 146{ 147 struct kobject *kobj; 148 char *name; 149 150 kobj = container_of(kref, struct kobject, kref); 151 sysfs_remove_dir(kobj); 152 if (kobj->parent) 153 kobject_put(kobj->parent); 154 kobj->parent = NULL; 155 name = kobj->name; 156 if (kobj->ktype && kobj->ktype->release) 157 kobj->ktype->release(kobj); 158 kfree(name); 159} 160 161static void 162kobject_kfree(struct kobject *kobj) 163{ 164 kfree(kobj); 165} 166 167static void 168kobject_kfree_name(struct kobject *kobj) 169{ 170 if (kobj) { 171 kfree(kobj->name); 172 } 173} 174 175struct kobj_type kfree_type = { .release = kobject_kfree }; 176 177struct device * 178device_create(struct class *class, struct device *parent, dev_t devt, 179 void *drvdata, const char *fmt, ...) 180{ 181 struct device *dev; 182 va_list args; 183 184 dev = kzalloc(sizeof(*dev), M_WAITOK); 185 dev->parent = parent; 186 dev->class = class; 187 dev->devt = devt; 188 dev->driver_data = drvdata; 189 va_start(args, fmt); 190 kobject_set_name_vargs(&dev->kobj, fmt, args); 191 va_end(args); 192 device_register(dev); 193 194 return (dev); 195} 196 197int 198kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, 199 struct kobject *parent, const char *fmt, ...) 200{ 201 va_list args; 202 int error; 203 204 kobject_init(kobj, ktype); 205 kobj->ktype = ktype; 206 kobj->parent = parent; 207 kobj->name = NULL; 208 209 va_start(args, fmt); 210 error = kobject_set_name_vargs(kobj, fmt, args); 211 va_end(args); 212 if (error) 213 return (error); 214 return kobject_add_complete(kobj, parent); 215} 216 217static void 218linux_file_dtor(void *cdp) 219{ 220 struct linux_file *filp; 221 222 filp = cdp; 223 filp->f_op->release(filp->f_vnode, filp); 224 vdrop(filp->f_vnode); 225 kfree(filp); 226} 227 228static int 229linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 230{ 231 struct linux_cdev *ldev; 232 struct linux_file *filp; 233 struct file *file; 234 int error; 235 236 file = curthread->td_fpop; 237 ldev = dev->si_drv1; 238 if (ldev == NULL) 239 return (ENODEV); 240 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 241 filp->f_dentry = &filp->f_dentry_store; 242 filp->f_op = ldev->ops; 243 filp->f_flags = file->f_flag; 244 vhold(file->f_vnode); 245 filp->f_vnode = file->f_vnode; 246 if (filp->f_op->open) { 247 error = -filp->f_op->open(file->f_vnode, filp); 248 if (error) { 249 kfree(filp); 250 return (error); 251 } 252 } 253 error = devfs_set_cdevpriv(filp, linux_file_dtor); 254 if (error) { 255 filp->f_op->release(file->f_vnode, filp); 256 kfree(filp); 257 return (error); 258 } 259 260 return 0; 261} 262 263static int 264linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 265{ 266 struct linux_cdev *ldev; 267 struct linux_file *filp; 268 struct file *file; 269 int error; 270 271 file = curthread->td_fpop; 272 ldev = dev->si_drv1; 273 if (ldev == NULL) 274 return (0); 275 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 276 return (error); 277 filp->f_flags = file->f_flag; 278 devfs_clear_cdevpriv(); 279 280 281 return (0); 282} 283 284static int 285linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 286 struct thread *td) 287{ 288 struct linux_cdev *ldev; 289 struct linux_file *filp; 290 struct file *file; 291 int error; 292 293 file = curthread->td_fpop; 294 ldev = dev->si_drv1; 295 if (ldev == NULL) 296 return (0); 297 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 298 return (error); 299 filp->f_flags = file->f_flag; 300 /* 301 * Linux does not have a generic ioctl copyin/copyout layer. All 302 * linux ioctls must be converted to void ioctls which pass a 303 * pointer to the address of the data. We want the actual user 304 * address so we dereference here. 305 */ 306 data = *(void **)data; 307 if (filp->f_op->unlocked_ioctl) 308 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 309 else 310 error = ENOTTY; 311 312 return (error); 313} 314 315static int 316linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 317{ 318 struct linux_cdev *ldev; 319 struct linux_file *filp; 320 struct file *file; 321 ssize_t bytes; 322 int error; 323 324 file = curthread->td_fpop; 325 ldev = dev->si_drv1; 326 if (ldev == NULL) 327 return (0); 328 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 329 return (error); 330 filp->f_flags = file->f_flag; 331 if (uio->uio_iovcnt != 1) 332 panic("linux_dev_read: uio %p iovcnt %d", 333 uio, uio->uio_iovcnt); 334 if (filp->f_op->read) { 335 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 336 uio->uio_iov->iov_len, &uio->uio_offset); 337 if (bytes >= 0) { 338 uio->uio_iov->iov_base += bytes; 339 uio->uio_iov->iov_len -= bytes; 340 uio->uio_resid -= bytes; 341 } else 342 error = -bytes; 343 } else 344 error = ENXIO; 345 346 return (error); 347} 348 349static int 350linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 351{ 352 struct linux_cdev *ldev; 353 struct linux_file *filp; 354 struct file *file; 355 ssize_t bytes; 356 int error; 357 358 file = curthread->td_fpop; 359 ldev = dev->si_drv1; 360 if (ldev == NULL) 361 return (0); 362 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 363 return (error); 364 filp->f_flags = file->f_flag; 365 if (uio->uio_iovcnt != 1) 366 panic("linux_dev_write: uio %p iovcnt %d", 367 uio, uio->uio_iovcnt); 368 if (filp->f_op->write) { 369 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 370 uio->uio_iov->iov_len, &uio->uio_offset); 371 if (bytes >= 0) { 372 uio->uio_iov->iov_base += bytes; 373 uio->uio_iov->iov_len -= bytes; 374 uio->uio_resid -= bytes; 375 } else 376 error = -bytes; 377 } else 378 error = ENXIO; 379 380 return (error); 381} 382 383static int 384linux_dev_poll(struct cdev *dev, int events, struct thread *td) 385{ 386 struct linux_cdev *ldev; 387 struct linux_file *filp; 388 struct file *file; 389 int revents; 390 int error; 391 392 file = curthread->td_fpop; 393 ldev = dev->si_drv1; 394 if (ldev == NULL) 395 return (0); 396 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 397 return (error); 398 filp->f_flags = file->f_flag; 399 if (filp->f_op->poll) 400 revents = filp->f_op->poll(filp, NULL) & events; 401 else 402 revents = 0; 403 404 return (revents); 405} 406 407static int 408linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 409 int nprot, vm_memattr_t *memattr) 410{ 411 412 /* XXX memattr not honored. */ 413 *paddr = offset; 414 return (0); 415} 416 417static int 418linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 419 vm_size_t size, struct vm_object **object, int nprot) 420{ 421 struct linux_cdev *ldev; 422 struct linux_file *filp; 423 struct file *file; 424 struct vm_area_struct vma; 425 vm_paddr_t paddr; 426 vm_page_t m; 427 int error; 428 429 file = curthread->td_fpop; 430 ldev = dev->si_drv1; 431 if (ldev == NULL) 432 return (ENODEV); 433 if (size != PAGE_SIZE) 434 return (EINVAL); 435 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 436 return (error); 437 filp->f_flags = file->f_flag; 438 vma.vm_start = 0; 439 vma.vm_end = PAGE_SIZE; 440 vma.vm_pgoff = *offset / PAGE_SIZE; 441 vma.vm_pfn = 0; 442 vma.vm_page_prot = 0; 443 if (filp->f_op->mmap) { 444 error = -filp->f_op->mmap(filp, &vma); 445 if (error == 0) { 446 paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; 447 *offset = paddr; 448 m = PHYS_TO_VM_PAGE(paddr); 449 *object = vm_pager_allocate(OBJT_DEVICE, dev, 450 PAGE_SIZE, nprot, *offset, curthread->td_ucred); 451 if (*object == NULL) 452 return (EINVAL); 453 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) 454 pmap_page_set_memattr(m, vma.vm_page_prot); 455 } 456 } else 457 error = ENODEV; 458 459 return (error); 460} 461 462struct cdevsw linuxcdevsw = { 463 .d_version = D_VERSION, 464 .d_flags = D_TRACKCLOSE, 465 .d_open = linux_dev_open, 466 .d_close = linux_dev_close, 467 .d_read = linux_dev_read, 468 .d_write = linux_dev_write, 469 .d_ioctl = linux_dev_ioctl, 470 .d_mmap_single = linux_dev_mmap_single, 471 .d_mmap = linux_dev_mmap, 472 .d_poll = linux_dev_poll, 473}; 474 475static int 476linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 477 int flags, struct thread *td) 478{ 479 struct linux_file *filp; 480 ssize_t bytes; 481 int error; 482 483 error = 0; 484 filp = (struct linux_file *)file->f_data; 485 filp->f_flags = file->f_flag; 486 if (uio->uio_iovcnt != 1) 487 panic("linux_file_read: uio %p iovcnt %d", 488 uio, uio->uio_iovcnt); 489 if (filp->f_op->read) { 490 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 491 uio->uio_iov->iov_len, &uio->uio_offset); 492 if (bytes >= 0) { 493 uio->uio_iov->iov_base += bytes; 494 uio->uio_iov->iov_len -= bytes; 495 uio->uio_resid -= bytes; 496 } else 497 error = -bytes; 498 } else 499 error = ENXIO; 500 501 return (error); 502} 503 504static int 505linux_file_poll(struct file *file, int events, struct ucred *active_cred, 506 struct thread *td) 507{ 508 struct linux_file *filp; 509 int revents; 510 511 filp = (struct linux_file *)file->f_data; 512 filp->f_flags = file->f_flag; 513 if (filp->f_op->poll) 514 revents = filp->f_op->poll(filp, NULL) & events; 515 else 516 revents = 0; 517 518 return (0); 519} 520 521static int 522linux_file_close(struct file *file, struct thread *td) 523{ 524 struct linux_file *filp; 525 int error; 526 527 filp = (struct linux_file *)file->f_data; 528 filp->f_flags = file->f_flag; 529 error = -filp->f_op->release(NULL, filp); 530 funsetown(&filp->f_sigio); 531 kfree(filp); 532 533 return (error); 534} 535 536static int 537linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 538 struct thread *td) 539{ 540 struct linux_file *filp; 541 int error; 542 543 filp = (struct linux_file *)fp->f_data; 544 filp->f_flags = fp->f_flag; 545 error = 0; 546 547 switch (cmd) { 548 case FIONBIO: 549 break; 550 case FIOASYNC: 551 if (filp->f_op->fasync == NULL) 552 break; 553 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 554 break; 555 case FIOSETOWN: 556 error = fsetown(*(int *)data, &filp->f_sigio); 557 if (error == 0) 558 error = filp->f_op->fasync(0, filp, 559 fp->f_flag & FASYNC); 560 break; 561 case FIOGETOWN: 562 *(int *)data = fgetown(&filp->f_sigio); 563 break; 564 default: 565 error = ENOTTY; 566 break; 567 } 568 return (error); 569} 570 571struct fileops linuxfileops = { 572 .fo_read = linux_file_read, 573 .fo_poll = linux_file_poll, 574 .fo_close = linux_file_close, 575 .fo_ioctl = linux_file_ioctl, 576 .fo_chmod = invfo_chmod, 577 .fo_chown = invfo_chown, 578 .fo_sendfile = invfo_sendfile, 579}; 580 581/* 582 * Hash of vmmap addresses. This is infrequently accessed and does not 583 * need to be particularly large. This is done because we must store the 584 * caller's idea of the map size to properly unmap. 585 */ 586struct vmmap { 587 LIST_ENTRY(vmmap) vm_next; 588 void *vm_addr; 589 unsigned long vm_size; 590}; 591 592LIST_HEAD(vmmaphd, vmmap); 593#define VMMAP_HASH_SIZE 64 594#define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 595#define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 596static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 597static struct mtx vmmaplock; 598 599static void 600vmmap_add(void *addr, unsigned long size) 601{ 602 struct vmmap *vmmap; 603 604 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 605 mtx_lock(&vmmaplock); 606 vmmap->vm_size = size; 607 vmmap->vm_addr = addr; 608 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 609 mtx_unlock(&vmmaplock); 610} 611 612static struct vmmap * 613vmmap_remove(void *addr) 614{ 615 struct vmmap *vmmap; 616 617 mtx_lock(&vmmaplock); 618 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 619 if (vmmap->vm_addr == addr) 620 break; 621 if (vmmap) 622 LIST_REMOVE(vmmap, vm_next); 623 mtx_unlock(&vmmaplock); 624 625 return (vmmap); 626} 627 628void * 629_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 630{ 631 void *addr; 632 633 addr = pmap_mapdev_attr(phys_addr, size, attr); 634 if (addr == NULL) 635 return (NULL); 636 vmmap_add(addr, size); 637 638 return (addr); 639} 640 641void 642iounmap(void *addr) 643{ 644 struct vmmap *vmmap; 645 646 vmmap = vmmap_remove(addr); 647 if (vmmap == NULL) 648 return; 649 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 650 kfree(vmmap); 651} 652 653 654void * 655vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 656{ 657 vm_offset_t off; 658 size_t size; 659 660 size = count * PAGE_SIZE; 661 off = kva_alloc(size); 662 if (off == 0) 663 return (NULL); 664 vmmap_add((void *)off, size); 665 pmap_qenter(off, pages, count); 666 667 return ((void *)off); 668} 669 670void 671vunmap(void *addr) 672{ 673 struct vmmap *vmmap; 674 675 vmmap = vmmap_remove(addr); 676 if (vmmap == NULL) 677 return; 678 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 679 kva_free((vm_offset_t)addr, vmmap->vm_size); 680 kfree(vmmap); 681} 682 683static void 684linux_compat_init(void) 685{ 686 struct sysctl_oid *rootoid; 687 int i; 688 689 rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(), 690 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 691 kobject_init(&class_root, &class_ktype); 692 kobject_set_name(&class_root, "class"); 693 class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 694 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 695 kobject_init(&linux_rootdev.kobj, &dev_ktype); 696 kobject_set_name(&linux_rootdev.kobj, "device"); 697 linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, 698 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 699 "device"); 700 linux_rootdev.bsddev = root_bus; 701 miscclass.name = "misc"; 702 class_register(&miscclass); 703 INIT_LIST_HEAD(&pci_drivers); 704 INIT_LIST_HEAD(&pci_devices); 705 spin_lock_init(&pci_lock); 706 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 707 for (i = 0; i < VMMAP_HASH_SIZE; i++) 708 LIST_INIT(&vmmaphead[i]); 709} 710 711SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 712 713static void 714linux_compat_uninit(void) 715{ 716 kobject_kfree_name(&class_root); 717 kobject_kfree_name(&linux_rootdev.kobj); 718 kobject_kfree_name(&miscclass.kobj); 719} 720SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 721