kernel.c revision 263393
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25#include <assert.h> 26#include <fcntl.h> 27#include <poll.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <string.h> 31#include <zlib.h> 32#include <sys/spa.h> 33#include <sys/stat.h> 34#include <sys/processor.h> 35#include <sys/zfs_context.h> 36#include <sys/rrwlock.h> 37#include <sys/zmod.h> 38#include <sys/utsname.h> 39#include <sys/systeminfo.h> 40 41/* 42 * Emulation of kernel services in userland. 43 */ 44 45int aok; 46uint64_t physmem; 47vnode_t *rootdir = (vnode_t *)0xabcd1234; 48char hw_serial[HW_HOSTID_LEN]; 49#ifdef illumos 50kmutex_t cpu_lock; 51#endif 52 53struct utsname utsname = { 54 "userland", "libzpool", "1", "1", "na" 55}; 56 57/* this only exists to have its address taken */ 58struct proc p0; 59 60/* 61 * ========================================================================= 62 * threads 63 * ========================================================================= 64 */ 65/*ARGSUSED*/ 66kthread_t * 67zk_thread_create(void (*func)(), void *arg) 68{ 69 thread_t tid; 70 71 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 72 &tid) == 0); 73 74 return ((void *)(uintptr_t)tid); 75} 76 77/* 78 * ========================================================================= 79 * kstats 80 * ========================================================================= 81 */ 82/*ARGSUSED*/ 83kstat_t * 84kstat_create(char *module, int instance, char *name, char *class, 85 uchar_t type, ulong_t ndata, uchar_t ks_flag) 86{ 87 return (NULL); 88} 89 90/*ARGSUSED*/ 91void 92kstat_install(kstat_t *ksp) 93{} 94 95/*ARGSUSED*/ 96void 97kstat_delete(kstat_t *ksp) 98{} 99 100/* 101 * ========================================================================= 102 * mutexes 103 * ========================================================================= 104 */ 105void 106zmutex_init(kmutex_t *mp) 107{ 108 mp->m_owner = NULL; 109 mp->initialized = B_TRUE; 110 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 111} 112 113void 114zmutex_destroy(kmutex_t *mp) 115{ 116 ASSERT(mp->initialized == B_TRUE); 117 ASSERT(mp->m_owner == NULL); 118 (void) _mutex_destroy(&(mp)->m_lock); 119 mp->m_owner = (void *)-1UL; 120 mp->initialized = B_FALSE; 121} 122 123int 124zmutex_owned(kmutex_t *mp) 125{ 126 ASSERT(mp->initialized == B_TRUE); 127 128 return (mp->m_owner == curthread); 129} 130 131void 132mutex_enter(kmutex_t *mp) 133{ 134 ASSERT(mp->initialized == B_TRUE); 135 ASSERT(mp->m_owner != (void *)-1UL); 136 ASSERT(mp->m_owner != curthread); 137 VERIFY(mutex_lock(&mp->m_lock) == 0); 138 ASSERT(mp->m_owner == NULL); 139 mp->m_owner = curthread; 140} 141 142int 143mutex_tryenter(kmutex_t *mp) 144{ 145 ASSERT(mp->initialized == B_TRUE); 146 ASSERT(mp->m_owner != (void *)-1UL); 147 if (0 == mutex_trylock(&mp->m_lock)) { 148 ASSERT(mp->m_owner == NULL); 149 mp->m_owner = curthread; 150 return (1); 151 } else { 152 return (0); 153 } 154} 155 156void 157mutex_exit(kmutex_t *mp) 158{ 159 ASSERT(mp->initialized == B_TRUE); 160 ASSERT(mutex_owner(mp) == curthread); 161 mp->m_owner = NULL; 162 VERIFY(mutex_unlock(&mp->m_lock) == 0); 163} 164 165void * 166mutex_owner(kmutex_t *mp) 167{ 168 ASSERT(mp->initialized == B_TRUE); 169 return (mp->m_owner); 170} 171 172/* 173 * ========================================================================= 174 * rwlocks 175 * ========================================================================= 176 */ 177/*ARGSUSED*/ 178void 179rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 180{ 181 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 182 rwlp->rw_owner = NULL; 183 rwlp->initialized = B_TRUE; 184 rwlp->rw_count = 0; 185} 186 187void 188rw_destroy(krwlock_t *rwlp) 189{ 190 ASSERT(rwlp->rw_count == 0); 191 rwlock_destroy(&rwlp->rw_lock); 192 rwlp->rw_owner = (void *)-1UL; 193 rwlp->initialized = B_FALSE; 194} 195 196void 197rw_enter(krwlock_t *rwlp, krw_t rw) 198{ 199 //ASSERT(!RW_LOCK_HELD(rwlp)); 200 ASSERT(rwlp->initialized == B_TRUE); 201 ASSERT(rwlp->rw_owner != (void *)-1UL); 202 ASSERT(rwlp->rw_owner != curthread); 203 204 if (rw == RW_READER) { 205 VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); 206 ASSERT(rwlp->rw_count >= 0); 207 atomic_add_int(&rwlp->rw_count, 1); 208 } else { 209 VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); 210 ASSERT(rwlp->rw_count == 0); 211 rwlp->rw_count = -1; 212 rwlp->rw_owner = curthread; 213 } 214} 215 216void 217rw_exit(krwlock_t *rwlp) 218{ 219 ASSERT(rwlp->initialized == B_TRUE); 220 ASSERT(rwlp->rw_owner != (void *)-1UL); 221 222 if (rwlp->rw_owner == curthread) { 223 /* Write locked. */ 224 ASSERT(rwlp->rw_count == -1); 225 rwlp->rw_count = 0; 226 rwlp->rw_owner = NULL; 227 } else { 228 /* Read locked. */ 229 ASSERT(rwlp->rw_count > 0); 230 atomic_add_int(&rwlp->rw_count, -1); 231 } 232 VERIFY(rw_unlock(&rwlp->rw_lock) == 0); 233} 234 235int 236rw_tryenter(krwlock_t *rwlp, krw_t rw) 237{ 238 int rv; 239 240 ASSERT(rwlp->initialized == B_TRUE); 241 ASSERT(rwlp->rw_owner != (void *)-1UL); 242 ASSERT(rwlp->rw_owner != curthread); 243 244 if (rw == RW_READER) 245 rv = rw_tryrdlock(&rwlp->rw_lock); 246 else 247 rv = rw_trywrlock(&rwlp->rw_lock); 248 249 if (rv == 0) { 250 ASSERT(rwlp->rw_owner == NULL); 251 if (rw == RW_READER) { 252 ASSERT(rwlp->rw_count >= 0); 253 atomic_add_int(&rwlp->rw_count, 1); 254 } else { 255 ASSERT(rwlp->rw_count == 0); 256 rwlp->rw_count = -1; 257 rwlp->rw_owner = curthread; 258 } 259 return (1); 260 } 261 262 return (0); 263} 264 265/*ARGSUSED*/ 266int 267rw_tryupgrade(krwlock_t *rwlp) 268{ 269 ASSERT(rwlp->initialized == B_TRUE); 270 ASSERT(rwlp->rw_owner != (void *)-1UL); 271 272 return (0); 273} 274 275int 276rw_lock_held(krwlock_t *rwlp) 277{ 278 279 return (rwlp->rw_count != 0); 280} 281 282/* 283 * ========================================================================= 284 * condition variables 285 * ========================================================================= 286 */ 287/*ARGSUSED*/ 288void 289cv_init(kcondvar_t *cv, char *name, int type, void *arg) 290{ 291 VERIFY(cond_init(cv, name, NULL) == 0); 292} 293 294void 295cv_destroy(kcondvar_t *cv) 296{ 297 VERIFY(cond_destroy(cv) == 0); 298} 299 300void 301cv_wait(kcondvar_t *cv, kmutex_t *mp) 302{ 303 ASSERT(mutex_owner(mp) == curthread); 304 mp->m_owner = NULL; 305 int ret = cond_wait(cv, &mp->m_lock); 306 VERIFY(ret == 0 || ret == EINTR); 307 mp->m_owner = curthread; 308} 309 310clock_t 311cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 312{ 313 int error; 314 struct timespec ts; 315 struct timeval tv; 316 clock_t delta; 317 318 abstime += ddi_get_lbolt(); 319top: 320 delta = abstime - ddi_get_lbolt(); 321 if (delta <= 0) 322 return (-1); 323 324 if (gettimeofday(&tv, NULL) != 0) 325 assert(!"gettimeofday() failed"); 326 327 ts.tv_sec = tv.tv_sec + delta / hz; 328 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); 329 ASSERT(ts.tv_nsec >= 0); 330 331 if (ts.tv_nsec >= NANOSEC) { 332 ts.tv_sec++; 333 ts.tv_nsec -= NANOSEC; 334 } 335 336 ASSERT(mutex_owner(mp) == curthread); 337 mp->m_owner = NULL; 338 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); 339 mp->m_owner = curthread; 340 341 if (error == EINTR) 342 goto top; 343 344 if (error == ETIMEDOUT) 345 return (-1); 346 347 ASSERT(error == 0); 348 349 return (1); 350} 351 352/*ARGSUSED*/ 353clock_t 354cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, 355 int flag) 356{ 357 int error; 358 timestruc_t ts; 359 hrtime_t delta; 360 361 ASSERT(flag == 0); 362 363top: 364 delta = tim - gethrtime(); 365 if (delta <= 0) 366 return (-1); 367 368 ts.tv_sec = delta / NANOSEC; 369 ts.tv_nsec = delta % NANOSEC; 370 371 ASSERT(mutex_owner(mp) == curthread); 372 mp->m_owner = NULL; 373 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); 374 mp->m_owner = curthread; 375 376 if (error == ETIMEDOUT) 377 return (-1); 378 379 if (error == EINTR) 380 goto top; 381 382 ASSERT(error == 0); 383 384 return (1); 385} 386 387void 388cv_signal(kcondvar_t *cv) 389{ 390 VERIFY(cond_signal(cv) == 0); 391} 392 393void 394cv_broadcast(kcondvar_t *cv) 395{ 396 VERIFY(cond_broadcast(cv) == 0); 397} 398 399/* 400 * ========================================================================= 401 * vnode operations 402 * ========================================================================= 403 */ 404/* 405 * Note: for the xxxat() versions of these functions, we assume that the 406 * starting vp is always rootdir (which is true for spa_directory.c, the only 407 * ZFS consumer of these interfaces). We assert this is true, and then emulate 408 * them by adding '/' in front of the path. 409 */ 410 411/*ARGSUSED*/ 412int 413vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 414{ 415 int fd; 416 vnode_t *vp; 417 int old_umask; 418 char realpath[MAXPATHLEN]; 419 struct stat64 st; 420 421 /* 422 * If we're accessing a real disk from userland, we need to use 423 * the character interface to avoid caching. This is particularly 424 * important if we're trying to look at a real in-kernel storage 425 * pool from userland, e.g. via zdb, because otherwise we won't 426 * see the changes occurring under the segmap cache. 427 * On the other hand, the stupid character device returns zero 428 * for its size. So -- gag -- we open the block device to get 429 * its size, and remember it for subsequent VOP_GETATTR(). 430 */ 431 if (strncmp(path, "/dev/", 5) == 0) { 432 char *dsk; 433 fd = open64(path, O_RDONLY); 434 if (fd == -1) 435 return (errno); 436 if (fstat64(fd, &st) == -1) { 437 close(fd); 438 return (errno); 439 } 440 close(fd); 441 (void) sprintf(realpath, "%s", path); 442 dsk = strstr(path, "/dsk/"); 443 if (dsk != NULL) 444 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 445 dsk + 1); 446 } else { 447 (void) sprintf(realpath, "%s", path); 448 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 449 return (errno); 450 } 451 452 if (flags & FCREAT) 453 old_umask = umask(0); 454 455 /* 456 * The construct 'flags - FREAD' conveniently maps combinations of 457 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 458 */ 459 fd = open64(realpath, flags - FREAD, mode); 460 461 if (flags & FCREAT) 462 (void) umask(old_umask); 463 464 if (fd == -1) 465 return (errno); 466 467 if (fstat64(fd, &st) == -1) { 468 close(fd); 469 return (errno); 470 } 471 472 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 473 474 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 475 476 vp->v_fd = fd; 477 vp->v_size = st.st_size; 478 vp->v_path = spa_strdup(path); 479 480 return (0); 481} 482 483/*ARGSUSED*/ 484int 485vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 486 int x3, vnode_t *startvp, int fd) 487{ 488 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 489 int ret; 490 491 ASSERT(startvp == rootdir); 492 (void) sprintf(realpath, "/%s", path); 493 494 /* fd ignored for now, need if want to simulate nbmand support */ 495 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 496 497 umem_free(realpath, strlen(path) + 2); 498 499 return (ret); 500} 501 502/*ARGSUSED*/ 503int 504vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 505 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 506{ 507 ssize_t iolen, split; 508 509 if (uio == UIO_READ) { 510 iolen = pread64(vp->v_fd, addr, len, offset); 511 } else { 512 /* 513 * To simulate partial disk writes, we split writes into two 514 * system calls so that the process can be killed in between. 515 */ 516 int sectors = len >> SPA_MINBLOCKSHIFT; 517 split = (sectors > 0 ? rand() % sectors : 0) << 518 SPA_MINBLOCKSHIFT; 519 iolen = pwrite64(vp->v_fd, addr, split, offset); 520 iolen += pwrite64(vp->v_fd, (char *)addr + split, 521 len - split, offset + split); 522 } 523 524 if (iolen == -1) 525 return (errno); 526 if (residp) 527 *residp = len - iolen; 528 else if (iolen != len) 529 return (EIO); 530 return (0); 531} 532 533void 534vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td) 535{ 536 close(vp->v_fd); 537 spa_strfree(vp->v_path); 538 umem_free(vp, sizeof (vnode_t)); 539} 540 541/* 542 * At a minimum we need to update the size since vdev_reopen() 543 * will no longer call vn_openat(). 544 */ 545int 546fop_getattr(vnode_t *vp, vattr_t *vap) 547{ 548 struct stat64 st; 549 550 if (fstat64(vp->v_fd, &st) == -1) { 551 close(vp->v_fd); 552 return (errno); 553 } 554 555 vap->va_size = st.st_size; 556 return (0); 557} 558 559#ifdef ZFS_DEBUG 560 561/* 562 * ========================================================================= 563 * Figure out which debugging statements to print 564 * ========================================================================= 565 */ 566 567static char *dprintf_string; 568static int dprintf_print_all; 569 570int 571dprintf_find_string(const char *string) 572{ 573 char *tmp_str = dprintf_string; 574 int len = strlen(string); 575 576 /* 577 * Find out if this is a string we want to print. 578 * String format: file1.c,function_name1,file2.c,file3.c 579 */ 580 581 while (tmp_str != NULL) { 582 if (strncmp(tmp_str, string, len) == 0 && 583 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 584 return (1); 585 tmp_str = strchr(tmp_str, ','); 586 if (tmp_str != NULL) 587 tmp_str++; /* Get rid of , */ 588 } 589 return (0); 590} 591 592void 593dprintf_setup(int *argc, char **argv) 594{ 595 int i, j; 596 597 /* 598 * Debugging can be specified two ways: by setting the 599 * environment variable ZFS_DEBUG, or by including a 600 * "debug=..." argument on the command line. The command 601 * line setting overrides the environment variable. 602 */ 603 604 for (i = 1; i < *argc; i++) { 605 int len = strlen("debug="); 606 /* First look for a command line argument */ 607 if (strncmp("debug=", argv[i], len) == 0) { 608 dprintf_string = argv[i] + len; 609 /* Remove from args */ 610 for (j = i; j < *argc; j++) 611 argv[j] = argv[j+1]; 612 argv[j] = NULL; 613 (*argc)--; 614 } 615 } 616 617 if (dprintf_string == NULL) { 618 /* Look for ZFS_DEBUG environment variable */ 619 dprintf_string = getenv("ZFS_DEBUG"); 620 } 621 622 /* 623 * Are we just turning on all debugging? 624 */ 625 if (dprintf_find_string("on")) 626 dprintf_print_all = 1; 627} 628 629int 630sysctl_handle_64(SYSCTL_HANDLER_ARGS) 631{ 632 return (0); 633} 634 635/* 636 * ========================================================================= 637 * debug printfs 638 * ========================================================================= 639 */ 640void 641__dprintf(const char *file, const char *func, int line, const char *fmt, ...) 642{ 643 const char *newfile; 644 va_list adx; 645 646 /* 647 * Get rid of annoying "../common/" prefix to filename. 648 */ 649 newfile = strrchr(file, '/'); 650 if (newfile != NULL) { 651 newfile = newfile + 1; /* Get rid of leading / */ 652 } else { 653 newfile = file; 654 } 655 656 if (dprintf_print_all || 657 dprintf_find_string(newfile) || 658 dprintf_find_string(func)) { 659 /* Print out just the function name if requested */ 660 flockfile(stdout); 661 if (dprintf_find_string("pid")) 662 (void) printf("%d ", getpid()); 663 if (dprintf_find_string("tid")) 664 (void) printf("%ul ", thr_self()); 665#if 0 666 if (dprintf_find_string("cpu")) 667 (void) printf("%u ", getcpuid()); 668#endif 669 if (dprintf_find_string("time")) 670 (void) printf("%llu ", gethrtime()); 671 if (dprintf_find_string("long")) 672 (void) printf("%s, line %d: ", newfile, line); 673 (void) printf("%s: ", func); 674 va_start(adx, fmt); 675 (void) vprintf(fmt, adx); 676 va_end(adx); 677 funlockfile(stdout); 678 } 679} 680 681#endif /* ZFS_DEBUG */ 682 683/* 684 * ========================================================================= 685 * cmn_err() and panic() 686 * ========================================================================= 687 */ 688static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 689static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 690 691void 692vpanic(const char *fmt, va_list adx) 693{ 694 (void) fprintf(stderr, "error: "); 695 (void) vfprintf(stderr, fmt, adx); 696 (void) fprintf(stderr, "\n"); 697 698 abort(); /* think of it as a "user-level crash dump" */ 699} 700 701void 702panic(const char *fmt, ...) 703{ 704 va_list adx; 705 706 va_start(adx, fmt); 707 vpanic(fmt, adx); 708 va_end(adx); 709} 710 711void 712vcmn_err(int ce, const char *fmt, va_list adx) 713{ 714 if (ce == CE_PANIC) 715 vpanic(fmt, adx); 716 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 717 (void) fprintf(stderr, "%s", ce_prefix[ce]); 718 (void) vfprintf(stderr, fmt, adx); 719 (void) fprintf(stderr, "%s", ce_suffix[ce]); 720 } 721} 722 723/*PRINTFLIKE2*/ 724void 725cmn_err(int ce, const char *fmt, ...) 726{ 727 va_list adx; 728 729 va_start(adx, fmt); 730 vcmn_err(ce, fmt, adx); 731 va_end(adx); 732} 733 734/* 735 * ========================================================================= 736 * kobj interfaces 737 * ========================================================================= 738 */ 739struct _buf * 740kobj_open_file(char *name) 741{ 742 struct _buf *file; 743 vnode_t *vp; 744 745 /* set vp as the _fd field of the file */ 746 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, 747 -1) != 0) 748 return ((void *)-1UL); 749 750 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 751 file->_fd = (intptr_t)vp; 752 return (file); 753} 754 755int 756kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 757{ 758 ssize_t resid; 759 760 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 761 UIO_SYSSPACE, 0, 0, 0, &resid); 762 763 return (size - resid); 764} 765 766void 767kobj_close_file(struct _buf *file) 768{ 769 vn_close((vnode_t *)file->_fd, 0, NULL, NULL); 770 umem_free(file, sizeof (struct _buf)); 771} 772 773int 774kobj_get_filesize(struct _buf *file, uint64_t *size) 775{ 776 struct stat64 st; 777 vnode_t *vp = (vnode_t *)file->_fd; 778 779 if (fstat64(vp->v_fd, &st) == -1) { 780 vn_close(vp, 0, NULL, NULL); 781 return (errno); 782 } 783 *size = st.st_size; 784 return (0); 785} 786 787/* 788 * ========================================================================= 789 * misc routines 790 * ========================================================================= 791 */ 792 793void 794delay(clock_t ticks) 795{ 796 poll(0, 0, ticks * (1000 / hz)); 797} 798 799#if 0 800/* 801 * Find highest one bit set. 802 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 803 * High order bit is 31 (or 63 in _LP64 kernel). 804 */ 805int 806highbit(ulong_t i) 807{ 808 register int h = 1; 809 810 if (i == 0) 811 return (0); 812#ifdef _LP64 813 if (i & 0xffffffff00000000ul) { 814 h += 32; i >>= 32; 815 } 816#endif 817 if (i & 0xffff0000) { 818 h += 16; i >>= 16; 819 } 820 if (i & 0xff00) { 821 h += 8; i >>= 8; 822 } 823 if (i & 0xf0) { 824 h += 4; i >>= 4; 825 } 826 if (i & 0xc) { 827 h += 2; i >>= 2; 828 } 829 if (i & 0x2) { 830 h += 1; 831 } 832 return (h); 833} 834#endif 835 836static int random_fd = -1, urandom_fd = -1; 837 838static int 839random_get_bytes_common(uint8_t *ptr, size_t len, int fd) 840{ 841 size_t resid = len; 842 ssize_t bytes; 843 844 ASSERT(fd != -1); 845 846 while (resid != 0) { 847 bytes = read(fd, ptr, resid); 848 ASSERT3S(bytes, >=, 0); 849 ptr += bytes; 850 resid -= bytes; 851 } 852 853 return (0); 854} 855 856int 857random_get_bytes(uint8_t *ptr, size_t len) 858{ 859 return (random_get_bytes_common(ptr, len, random_fd)); 860} 861 862int 863random_get_pseudo_bytes(uint8_t *ptr, size_t len) 864{ 865 return (random_get_bytes_common(ptr, len, urandom_fd)); 866} 867 868int 869ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) 870{ 871 char *end; 872 873 *result = strtoul(hw_serial, &end, base); 874 if (*result == 0) 875 return (errno); 876 return (0); 877} 878 879int 880ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) 881{ 882 char *end; 883 884 *result = strtoull(str, &end, base); 885 if (*result == 0) 886 return (errno); 887 return (0); 888} 889 890#ifdef illumos 891/* ARGSUSED */ 892cyclic_id_t 893cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) 894{ 895 return (1); 896} 897 898/* ARGSUSED */ 899void 900cyclic_remove(cyclic_id_t id) 901{ 902} 903 904/* ARGSUSED */ 905int 906cyclic_reprogram(cyclic_id_t id, hrtime_t expiration) 907{ 908 return (1); 909} 910#endif 911 912/* 913 * ========================================================================= 914 * kernel emulation setup & teardown 915 * ========================================================================= 916 */ 917static int 918umem_out_of_memory(void) 919{ 920 char errmsg[] = "out of memory -- generating core dump\n"; 921 922 write(fileno(stderr), errmsg, sizeof (errmsg)); 923 abort(); 924 return (0); 925} 926 927void 928kernel_init(int mode) 929{ 930 extern uint_t rrw_tsd_key; 931 932 umem_nofail_callback(umem_out_of_memory); 933 934 physmem = sysconf(_SC_PHYS_PAGES); 935 936 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 937 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 938 939 (void) snprintf(hw_serial, sizeof (hw_serial), "%lu", 940 (mode & FWRITE) ? (unsigned long)gethostid() : 0); 941 942 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); 943 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); 944 945 system_taskq_init(); 946 947#ifdef illumos 948 mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL); 949#endif 950 951 spa_init(mode); 952 953 tsd_create(&rrw_tsd_key, rrw_tsd_destroy); 954} 955 956void 957kernel_fini(void) 958{ 959 spa_fini(); 960 961 system_taskq_fini(); 962 963 close(random_fd); 964 close(urandom_fd); 965 966 random_fd = -1; 967 urandom_fd = -1; 968} 969 970int 971z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) 972{ 973 int ret; 974 uLongf len = *dstlen; 975 976 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) 977 *dstlen = (size_t)len; 978 979 return (ret); 980} 981 982int 983z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, 984 int level) 985{ 986 int ret; 987 uLongf len = *dstlen; 988 989 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) 990 *dstlen = (size_t)len; 991 992 return (ret); 993} 994 995uid_t 996crgetuid(cred_t *cr) 997{ 998 return (0); 999} 1000 1001uid_t 1002crgetruid(cred_t *cr) 1003{ 1004 return (0); 1005} 1006 1007gid_t 1008crgetgid(cred_t *cr) 1009{ 1010 return (0); 1011} 1012 1013int 1014crgetngroups(cred_t *cr) 1015{ 1016 return (0); 1017} 1018 1019gid_t * 1020crgetgroups(cred_t *cr) 1021{ 1022 return (NULL); 1023} 1024 1025int 1026zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) 1027{ 1028 return (0); 1029} 1030 1031int 1032zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) 1033{ 1034 return (0); 1035} 1036 1037int 1038zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) 1039{ 1040 return (0); 1041} 1042 1043ksiddomain_t * 1044ksid_lookupdomain(const char *dom) 1045{ 1046 ksiddomain_t *kd; 1047 1048 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); 1049 kd->kd_name = spa_strdup(dom); 1050 return (kd); 1051} 1052 1053void 1054ksiddomain_rele(ksiddomain_t *ksid) 1055{ 1056 spa_strfree(ksid->kd_name); 1057 umem_free(ksid, sizeof (ksiddomain_t)); 1058} 1059 1060/* 1061 * Do not change the length of the returned string; it must be freed 1062 * with strfree(). 1063 */ 1064char * 1065kmem_asprintf(const char *fmt, ...) 1066{ 1067 int size; 1068 va_list adx; 1069 char *buf; 1070 1071 va_start(adx, fmt); 1072 size = vsnprintf(NULL, 0, fmt, adx) + 1; 1073 va_end(adx); 1074 1075 buf = kmem_alloc(size, KM_SLEEP); 1076 1077 va_start(adx, fmt); 1078 size = vsnprintf(buf, size, fmt, adx); 1079 va_end(adx); 1080 1081 return (buf); 1082} 1083 1084/* ARGSUSED */ 1085int 1086zfs_onexit_fd_hold(int fd, minor_t *minorp) 1087{ 1088 *minorp = 0; 1089 return (0); 1090} 1091 1092/* ARGSUSED */ 1093void 1094zfs_onexit_fd_rele(int fd) 1095{ 1096} 1097 1098/* ARGSUSED */ 1099int 1100zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, 1101 uint64_t *action_handle) 1102{ 1103 return (0); 1104} 1105 1106/* ARGSUSED */ 1107int 1108zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) 1109{ 1110 return (0); 1111} 1112 1113/* ARGSUSED */ 1114int 1115zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) 1116{ 1117 return (0); 1118} 1119 1120#ifdef __FreeBSD__ 1121/* ARGSUSED */ 1122int 1123zvol_create_minors(const char *name) 1124{ 1125 return (0); 1126} 1127#endif 1128 1129#ifdef illumos 1130void 1131bioinit(buf_t *bp) 1132{ 1133 bzero(bp, sizeof (buf_t)); 1134} 1135 1136void 1137biodone(buf_t *bp) 1138{ 1139 if (bp->b_iodone != NULL) { 1140 (*(bp->b_iodone))(bp); 1141 return; 1142 } 1143 ASSERT((bp->b_flags & B_DONE) == 0); 1144 bp->b_flags |= B_DONE; 1145} 1146 1147void 1148bioerror(buf_t *bp, int error) 1149{ 1150 ASSERT(bp != NULL); 1151 ASSERT(error >= 0); 1152 1153 if (error != 0) { 1154 bp->b_flags |= B_ERROR; 1155 } else { 1156 bp->b_flags &= ~B_ERROR; 1157 } 1158 bp->b_error = error; 1159} 1160 1161 1162int 1163geterror(struct buf *bp) 1164{ 1165 int error = 0; 1166 1167 if (bp->b_flags & B_ERROR) { 1168 error = bp->b_error; 1169 if (!error) 1170 error = EIO; 1171 } 1172 return (error); 1173} 1174#endif 1175