kernel.c revision 268653
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
25 */
26
27#include <assert.h>
28#include <fcntl.h>
29#include <poll.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <zlib.h>
34#include <libgen.h>
35#include <sys/spa.h>
36#include <sys/stat.h>
37#include <sys/processor.h>
38#include <sys/zfs_context.h>
39#include <sys/rrwlock.h>
40#include <sys/zmod.h>
41#include <sys/utsname.h>
42#include <sys/systeminfo.h>
43
44/*
45 * Emulation of kernel services in userland.
46 */
47
48int aok;
49uint64_t physmem;
50vnode_t *rootdir = (vnode_t *)0xabcd1234;
51char hw_serial[HW_HOSTID_LEN];
52#ifdef illumos
53kmutex_t cpu_lock;
54#endif
55
56/* If set, all blocks read will be copied to the specified directory. */
57char *vn_dumpdir = NULL;
58
59struct utsname utsname = {
60	"userland", "libzpool", "1", "1", "na"
61};
62
63/* this only exists to have its address taken */
64struct proc p0;
65
66/*
67 * =========================================================================
68 * threads
69 * =========================================================================
70 */
71/*ARGSUSED*/
72kthread_t *
73zk_thread_create(void (*func)(), void *arg)
74{
75	thread_t tid;
76
77	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
78	    &tid) == 0);
79
80	return ((void *)(uintptr_t)tid);
81}
82
83/*
84 * =========================================================================
85 * kstats
86 * =========================================================================
87 */
88/*ARGSUSED*/
89kstat_t *
90kstat_create(char *module, int instance, char *name, char *class,
91    uchar_t type, ulong_t ndata, uchar_t ks_flag)
92{
93	return (NULL);
94}
95
96/*ARGSUSED*/
97void
98kstat_install(kstat_t *ksp)
99{}
100
101/*ARGSUSED*/
102void
103kstat_delete(kstat_t *ksp)
104{}
105
106/*
107 * =========================================================================
108 * mutexes
109 * =========================================================================
110 */
111void
112zmutex_init(kmutex_t *mp)
113{
114	mp->m_owner = NULL;
115	mp->initialized = B_TRUE;
116	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
117}
118
119void
120zmutex_destroy(kmutex_t *mp)
121{
122	ASSERT(mp->initialized == B_TRUE);
123	ASSERT(mp->m_owner == NULL);
124	(void) _mutex_destroy(&(mp)->m_lock);
125	mp->m_owner = (void *)-1UL;
126	mp->initialized = B_FALSE;
127}
128
129int
130zmutex_owned(kmutex_t *mp)
131{
132	ASSERT(mp->initialized == B_TRUE);
133
134	return (mp->m_owner == curthread);
135}
136
137void
138mutex_enter(kmutex_t *mp)
139{
140	ASSERT(mp->initialized == B_TRUE);
141	ASSERT(mp->m_owner != (void *)-1UL);
142	ASSERT(mp->m_owner != curthread);
143	VERIFY(mutex_lock(&mp->m_lock) == 0);
144	ASSERT(mp->m_owner == NULL);
145	mp->m_owner = curthread;
146}
147
148int
149mutex_tryenter(kmutex_t *mp)
150{
151	ASSERT(mp->initialized == B_TRUE);
152	ASSERT(mp->m_owner != (void *)-1UL);
153	if (0 == mutex_trylock(&mp->m_lock)) {
154		ASSERT(mp->m_owner == NULL);
155		mp->m_owner = curthread;
156		return (1);
157	} else {
158		return (0);
159	}
160}
161
162void
163mutex_exit(kmutex_t *mp)
164{
165	ASSERT(mp->initialized == B_TRUE);
166	ASSERT(mutex_owner(mp) == curthread);
167	mp->m_owner = NULL;
168	VERIFY(mutex_unlock(&mp->m_lock) == 0);
169}
170
171void *
172mutex_owner(kmutex_t *mp)
173{
174	ASSERT(mp->initialized == B_TRUE);
175	return (mp->m_owner);
176}
177
178/*
179 * =========================================================================
180 * rwlocks
181 * =========================================================================
182 */
183/*ARGSUSED*/
184void
185rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
186{
187	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
188	rwlp->rw_owner = NULL;
189	rwlp->initialized = B_TRUE;
190	rwlp->rw_count = 0;
191}
192
193void
194rw_destroy(krwlock_t *rwlp)
195{
196	ASSERT(rwlp->rw_count == 0);
197	rwlock_destroy(&rwlp->rw_lock);
198	rwlp->rw_owner = (void *)-1UL;
199	rwlp->initialized = B_FALSE;
200}
201
202void
203rw_enter(krwlock_t *rwlp, krw_t rw)
204{
205	//ASSERT(!RW_LOCK_HELD(rwlp));
206	ASSERT(rwlp->initialized == B_TRUE);
207	ASSERT(rwlp->rw_owner != (void *)-1UL);
208	ASSERT(rwlp->rw_owner != curthread);
209
210	if (rw == RW_READER) {
211		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
212		ASSERT(rwlp->rw_count >= 0);
213		atomic_add_int(&rwlp->rw_count, 1);
214	} else {
215		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
216		ASSERT(rwlp->rw_count == 0);
217		rwlp->rw_count = -1;
218		rwlp->rw_owner = curthread;
219	}
220}
221
222void
223rw_exit(krwlock_t *rwlp)
224{
225	ASSERT(rwlp->initialized == B_TRUE);
226	ASSERT(rwlp->rw_owner != (void *)-1UL);
227
228	if (rwlp->rw_owner == curthread) {
229		/* Write locked. */
230		ASSERT(rwlp->rw_count == -1);
231		rwlp->rw_count = 0;
232		rwlp->rw_owner = NULL;
233	} else {
234		/* Read locked. */
235		ASSERT(rwlp->rw_count > 0);
236		atomic_add_int(&rwlp->rw_count, -1);
237	}
238	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
239}
240
241int
242rw_tryenter(krwlock_t *rwlp, krw_t rw)
243{
244	int rv;
245
246	ASSERT(rwlp->initialized == B_TRUE);
247	ASSERT(rwlp->rw_owner != (void *)-1UL);
248	ASSERT(rwlp->rw_owner != curthread);
249
250	if (rw == RW_READER)
251		rv = rw_tryrdlock(&rwlp->rw_lock);
252	else
253		rv = rw_trywrlock(&rwlp->rw_lock);
254
255	if (rv == 0) {
256		ASSERT(rwlp->rw_owner == NULL);
257		if (rw == RW_READER) {
258			ASSERT(rwlp->rw_count >= 0);
259			atomic_add_int(&rwlp->rw_count, 1);
260		} else {
261			ASSERT(rwlp->rw_count == 0);
262			rwlp->rw_count = -1;
263			rwlp->rw_owner = curthread;
264		}
265		return (1);
266	}
267
268	return (0);
269}
270
271/*ARGSUSED*/
272int
273rw_tryupgrade(krwlock_t *rwlp)
274{
275	ASSERT(rwlp->initialized == B_TRUE);
276	ASSERT(rwlp->rw_owner != (void *)-1UL);
277
278	return (0);
279}
280
281int
282rw_lock_held(krwlock_t *rwlp)
283{
284
285	return (rwlp->rw_count != 0);
286}
287
288/*
289 * =========================================================================
290 * condition variables
291 * =========================================================================
292 */
293/*ARGSUSED*/
294void
295cv_init(kcondvar_t *cv, char *name, int type, void *arg)
296{
297	VERIFY(cond_init(cv, name, NULL) == 0);
298}
299
300void
301cv_destroy(kcondvar_t *cv)
302{
303	VERIFY(cond_destroy(cv) == 0);
304}
305
306void
307cv_wait(kcondvar_t *cv, kmutex_t *mp)
308{
309	ASSERT(mutex_owner(mp) == curthread);
310	mp->m_owner = NULL;
311	int ret = cond_wait(cv, &mp->m_lock);
312	VERIFY(ret == 0 || ret == EINTR);
313	mp->m_owner = curthread;
314}
315
316clock_t
317cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
318{
319	int error;
320	struct timespec ts;
321	struct timeval tv;
322	clock_t delta;
323
324	abstime += ddi_get_lbolt();
325top:
326	delta = abstime - ddi_get_lbolt();
327	if (delta <= 0)
328		return (-1);
329
330	if (gettimeofday(&tv, NULL) != 0)
331		assert(!"gettimeofday() failed");
332
333	ts.tv_sec = tv.tv_sec + delta / hz;
334	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
335	ASSERT(ts.tv_nsec >= 0);
336
337	if (ts.tv_nsec >= NANOSEC) {
338		ts.tv_sec++;
339		ts.tv_nsec -= NANOSEC;
340	}
341
342	ASSERT(mutex_owner(mp) == curthread);
343	mp->m_owner = NULL;
344	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
345	mp->m_owner = curthread;
346
347	if (error == EINTR)
348		goto top;
349
350	if (error == ETIMEDOUT)
351		return (-1);
352
353	ASSERT(error == 0);
354
355	return (1);
356}
357
358/*ARGSUSED*/
359clock_t
360cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
361    int flag)
362{
363	int error;
364	timestruc_t ts;
365	hrtime_t delta;
366
367	ASSERT(flag == 0);
368
369top:
370	delta = tim - gethrtime();
371	if (delta <= 0)
372		return (-1);
373
374	ts.tv_sec = delta / NANOSEC;
375	ts.tv_nsec = delta % NANOSEC;
376
377	ASSERT(mutex_owner(mp) == curthread);
378	mp->m_owner = NULL;
379	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
380	mp->m_owner = curthread;
381
382	if (error == ETIMEDOUT)
383		return (-1);
384
385	if (error == EINTR)
386		goto top;
387
388	ASSERT(error == 0);
389
390	return (1);
391}
392
393void
394cv_signal(kcondvar_t *cv)
395{
396	VERIFY(cond_signal(cv) == 0);
397}
398
399void
400cv_broadcast(kcondvar_t *cv)
401{
402	VERIFY(cond_broadcast(cv) == 0);
403}
404
405/*
406 * =========================================================================
407 * vnode operations
408 * =========================================================================
409 */
410/*
411 * Note: for the xxxat() versions of these functions, we assume that the
412 * starting vp is always rootdir (which is true for spa_directory.c, the only
413 * ZFS consumer of these interfaces).  We assert this is true, and then emulate
414 * them by adding '/' in front of the path.
415 */
416
417/*ARGSUSED*/
418int
419vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
420{
421	int fd;
422	int dump_fd;
423	vnode_t *vp;
424	int old_umask;
425	char realpath[MAXPATHLEN];
426	struct stat64 st;
427
428	/*
429	 * If we're accessing a real disk from userland, we need to use
430	 * the character interface to avoid caching.  This is particularly
431	 * important if we're trying to look at a real in-kernel storage
432	 * pool from userland, e.g. via zdb, because otherwise we won't
433	 * see the changes occurring under the segmap cache.
434	 * On the other hand, the stupid character device returns zero
435	 * for its size.  So -- gag -- we open the block device to get
436	 * its size, and remember it for subsequent VOP_GETATTR().
437	 */
438	if (strncmp(path, "/dev/", 5) == 0) {
439		char *dsk;
440		fd = open64(path, O_RDONLY);
441		if (fd == -1)
442			return (errno);
443		if (fstat64(fd, &st) == -1) {
444			close(fd);
445			return (errno);
446		}
447		close(fd);
448		(void) sprintf(realpath, "%s", path);
449		dsk = strstr(path, "/dsk/");
450		if (dsk != NULL)
451			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
452			    dsk + 1);
453	} else {
454		(void) sprintf(realpath, "%s", path);
455		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
456			return (errno);
457	}
458
459	if (flags & FCREAT)
460		old_umask = umask(0);
461
462	/*
463	 * The construct 'flags - FREAD' conveniently maps combinations of
464	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
465	 */
466	fd = open64(realpath, flags - FREAD, mode);
467
468	if (flags & FCREAT)
469		(void) umask(old_umask);
470
471	if (vn_dumpdir != NULL) {
472		char dumppath[MAXPATHLEN];
473		(void) snprintf(dumppath, sizeof (dumppath),
474		    "%s/%s", vn_dumpdir, basename(realpath));
475		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
476		if (dump_fd == -1)
477			return (errno);
478	} else {
479		dump_fd = -1;
480	}
481
482	if (fd == -1)
483		return (errno);
484
485	if (fstat64(fd, &st) == -1) {
486		close(fd);
487		return (errno);
488	}
489
490	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
491
492	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
493
494	vp->v_fd = fd;
495	vp->v_size = st.st_size;
496	vp->v_path = spa_strdup(path);
497	vp->v_dump_fd = dump_fd;
498
499	return (0);
500}
501
502/*ARGSUSED*/
503int
504vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
505    int x3, vnode_t *startvp, int fd)
506{
507	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
508	int ret;
509
510	ASSERT(startvp == rootdir);
511	(void) sprintf(realpath, "/%s", path);
512
513	/* fd ignored for now, need if want to simulate nbmand support */
514	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
515
516	umem_free(realpath, strlen(path) + 2);
517
518	return (ret);
519}
520
521/*ARGSUSED*/
522int
523vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
524	int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
525{
526	ssize_t iolen, split;
527
528	if (uio == UIO_READ) {
529		iolen = pread64(vp->v_fd, addr, len, offset);
530		if (vp->v_dump_fd != -1) {
531			int status =
532			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
533			ASSERT(status != -1);
534		}
535	} else {
536		/*
537		 * To simulate partial disk writes, we split writes into two
538		 * system calls so that the process can be killed in between.
539		 */
540		int sectors = len >> SPA_MINBLOCKSHIFT;
541		split = (sectors > 0 ? rand() % sectors : 0) <<
542		    SPA_MINBLOCKSHIFT;
543		iolen = pwrite64(vp->v_fd, addr, split, offset);
544		iolen += pwrite64(vp->v_fd, (char *)addr + split,
545		    len - split, offset + split);
546	}
547
548	if (iolen == -1)
549		return (errno);
550	if (residp)
551		*residp = len - iolen;
552	else if (iolen != len)
553		return (EIO);
554	return (0);
555}
556
557void
558vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
559{
560	close(vp->v_fd);
561	if (vp->v_dump_fd != -1)
562		close(vp->v_dump_fd);
563	spa_strfree(vp->v_path);
564	umem_free(vp, sizeof (vnode_t));
565}
566
567/*
568 * At a minimum we need to update the size since vdev_reopen()
569 * will no longer call vn_openat().
570 */
571int
572fop_getattr(vnode_t *vp, vattr_t *vap)
573{
574	struct stat64 st;
575
576	if (fstat64(vp->v_fd, &st) == -1) {
577		close(vp->v_fd);
578		return (errno);
579	}
580
581	vap->va_size = st.st_size;
582	return (0);
583}
584
585#ifdef ZFS_DEBUG
586
587/*
588 * =========================================================================
589 * Figure out which debugging statements to print
590 * =========================================================================
591 */
592
593static char *dprintf_string;
594static int dprintf_print_all;
595
596int
597dprintf_find_string(const char *string)
598{
599	char *tmp_str = dprintf_string;
600	int len = strlen(string);
601
602	/*
603	 * Find out if this is a string we want to print.
604	 * String format: file1.c,function_name1,file2.c,file3.c
605	 */
606
607	while (tmp_str != NULL) {
608		if (strncmp(tmp_str, string, len) == 0 &&
609		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
610			return (1);
611		tmp_str = strchr(tmp_str, ',');
612		if (tmp_str != NULL)
613			tmp_str++; /* Get rid of , */
614	}
615	return (0);
616}
617
618void
619dprintf_setup(int *argc, char **argv)
620{
621	int i, j;
622
623	/*
624	 * Debugging can be specified two ways: by setting the
625	 * environment variable ZFS_DEBUG, or by including a
626	 * "debug=..."  argument on the command line.  The command
627	 * line setting overrides the environment variable.
628	 */
629
630	for (i = 1; i < *argc; i++) {
631		int len = strlen("debug=");
632		/* First look for a command line argument */
633		if (strncmp("debug=", argv[i], len) == 0) {
634			dprintf_string = argv[i] + len;
635			/* Remove from args */
636			for (j = i; j < *argc; j++)
637				argv[j] = argv[j+1];
638			argv[j] = NULL;
639			(*argc)--;
640		}
641	}
642
643	if (dprintf_string == NULL) {
644		/* Look for ZFS_DEBUG environment variable */
645		dprintf_string = getenv("ZFS_DEBUG");
646	}
647
648	/*
649	 * Are we just turning on all debugging?
650	 */
651	if (dprintf_find_string("on"))
652		dprintf_print_all = 1;
653}
654
655int
656sysctl_handle_64(SYSCTL_HANDLER_ARGS)
657{
658	return (0);
659}
660
661/*
662 * =========================================================================
663 * debug printfs
664 * =========================================================================
665 */
666void
667__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
668{
669	const char *newfile;
670	va_list adx;
671
672	/*
673	 * Get rid of annoying "../common/" prefix to filename.
674	 */
675	newfile = strrchr(file, '/');
676	if (newfile != NULL) {
677		newfile = newfile + 1; /* Get rid of leading / */
678	} else {
679		newfile = file;
680	}
681
682	if (dprintf_print_all ||
683	    dprintf_find_string(newfile) ||
684	    dprintf_find_string(func)) {
685		/* Print out just the function name if requested */
686		flockfile(stdout);
687		if (dprintf_find_string("pid"))
688			(void) printf("%d ", getpid());
689		if (dprintf_find_string("tid"))
690			(void) printf("%ul ", thr_self());
691#if 0
692		if (dprintf_find_string("cpu"))
693			(void) printf("%u ", getcpuid());
694#endif
695		if (dprintf_find_string("time"))
696			(void) printf("%llu ", gethrtime());
697		if (dprintf_find_string("long"))
698			(void) printf("%s, line %d: ", newfile, line);
699		(void) printf("%s: ", func);
700		va_start(adx, fmt);
701		(void) vprintf(fmt, adx);
702		va_end(adx);
703		funlockfile(stdout);
704	}
705}
706
707#endif /* ZFS_DEBUG */
708
709/*
710 * =========================================================================
711 * cmn_err() and panic()
712 * =========================================================================
713 */
714static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
715static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
716
717void
718vpanic(const char *fmt, va_list adx)
719{
720	(void) fprintf(stderr, "error: ");
721	(void) vfprintf(stderr, fmt, adx);
722	(void) fprintf(stderr, "\n");
723
724	abort();	/* think of it as a "user-level crash dump" */
725}
726
727void
728panic(const char *fmt, ...)
729{
730	va_list adx;
731
732	va_start(adx, fmt);
733	vpanic(fmt, adx);
734	va_end(adx);
735}
736
737void
738vcmn_err(int ce, const char *fmt, va_list adx)
739{
740	if (ce == CE_PANIC)
741		vpanic(fmt, adx);
742	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
743		(void) fprintf(stderr, "%s", ce_prefix[ce]);
744		(void) vfprintf(stderr, fmt, adx);
745		(void) fprintf(stderr, "%s", ce_suffix[ce]);
746	}
747}
748
749/*PRINTFLIKE2*/
750void
751cmn_err(int ce, const char *fmt, ...)
752{
753	va_list adx;
754
755	va_start(adx, fmt);
756	vcmn_err(ce, fmt, adx);
757	va_end(adx);
758}
759
760/*
761 * =========================================================================
762 * kobj interfaces
763 * =========================================================================
764 */
765struct _buf *
766kobj_open_file(char *name)
767{
768	struct _buf *file;
769	vnode_t *vp;
770
771	/* set vp as the _fd field of the file */
772	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
773	    -1) != 0)
774		return ((void *)-1UL);
775
776	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
777	file->_fd = (intptr_t)vp;
778	return (file);
779}
780
781int
782kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
783{
784	ssize_t resid;
785
786	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
787	    UIO_SYSSPACE, 0, 0, 0, &resid);
788
789	return (size - resid);
790}
791
792void
793kobj_close_file(struct _buf *file)
794{
795	vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
796	umem_free(file, sizeof (struct _buf));
797}
798
799int
800kobj_get_filesize(struct _buf *file, uint64_t *size)
801{
802	struct stat64 st;
803	vnode_t *vp = (vnode_t *)file->_fd;
804
805	if (fstat64(vp->v_fd, &st) == -1) {
806		vn_close(vp, 0, NULL, NULL);
807		return (errno);
808	}
809	*size = st.st_size;
810	return (0);
811}
812
813/*
814 * =========================================================================
815 * misc routines
816 * =========================================================================
817 */
818
819void
820delay(clock_t ticks)
821{
822	poll(0, 0, ticks * (1000 / hz));
823}
824
825#if 0
826/*
827 * Find highest one bit set.
828 *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
829 */
830int
831highbit64(uint64_t i)
832{
833	int h = 1;
834
835	if (i == 0)
836		return (0);
837	if (i & 0xffffffff00000000ULL) {
838		h += 32; i >>= 32;
839	}
840	if (i & 0xffff0000) {
841		h += 16; i >>= 16;
842	}
843	if (i & 0xff00) {
844		h += 8; i >>= 8;
845	}
846	if (i & 0xf0) {
847		h += 4; i >>= 4;
848	}
849	if (i & 0xc) {
850		h += 2; i >>= 2;
851	}
852	if (i & 0x2) {
853		h += 1;
854	}
855	return (h);
856}
857#endif
858
859static int random_fd = -1, urandom_fd = -1;
860
861static int
862random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
863{
864	size_t resid = len;
865	ssize_t bytes;
866
867	ASSERT(fd != -1);
868
869	while (resid != 0) {
870		bytes = read(fd, ptr, resid);
871		ASSERT3S(bytes, >=, 0);
872		ptr += bytes;
873		resid -= bytes;
874	}
875
876	return (0);
877}
878
879int
880random_get_bytes(uint8_t *ptr, size_t len)
881{
882	return (random_get_bytes_common(ptr, len, random_fd));
883}
884
885int
886random_get_pseudo_bytes(uint8_t *ptr, size_t len)
887{
888	return (random_get_bytes_common(ptr, len, urandom_fd));
889}
890
891int
892ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
893{
894	char *end;
895
896	*result = strtoul(hw_serial, &end, base);
897	if (*result == 0)
898		return (errno);
899	return (0);
900}
901
902int
903ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
904{
905	char *end;
906
907	*result = strtoull(str, &end, base);
908	if (*result == 0)
909		return (errno);
910	return (0);
911}
912
913#ifdef illumos
914/* ARGSUSED */
915cyclic_id_t
916cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
917{
918	return (1);
919}
920
921/* ARGSUSED */
922void
923cyclic_remove(cyclic_id_t id)
924{
925}
926
927/* ARGSUSED */
928int
929cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
930{
931	return (1);
932}
933#endif
934
935/*
936 * =========================================================================
937 * kernel emulation setup & teardown
938 * =========================================================================
939 */
940static int
941umem_out_of_memory(void)
942{
943	char errmsg[] = "out of memory -- generating core dump\n";
944
945	write(fileno(stderr), errmsg, sizeof (errmsg));
946	abort();
947	return (0);
948}
949
950void
951kernel_init(int mode)
952{
953	extern uint_t rrw_tsd_key;
954
955	umem_nofail_callback(umem_out_of_memory);
956
957	physmem = sysconf(_SC_PHYS_PAGES);
958
959	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
960	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
961
962	(void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
963	    (mode & FWRITE) ? (unsigned long)gethostid() : 0);
964
965	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
966	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
967
968	system_taskq_init();
969
970#ifdef illumos
971	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
972#endif
973
974	spa_init(mode);
975
976	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
977}
978
979void
980kernel_fini(void)
981{
982	spa_fini();
983
984	system_taskq_fini();
985
986	close(random_fd);
987	close(urandom_fd);
988
989	random_fd = -1;
990	urandom_fd = -1;
991}
992
993int
994z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
995{
996	int ret;
997	uLongf len = *dstlen;
998
999	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
1000		*dstlen = (size_t)len;
1001
1002	return (ret);
1003}
1004
1005int
1006z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
1007    int level)
1008{
1009	int ret;
1010	uLongf len = *dstlen;
1011
1012	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
1013		*dstlen = (size_t)len;
1014
1015	return (ret);
1016}
1017
1018uid_t
1019crgetuid(cred_t *cr)
1020{
1021	return (0);
1022}
1023
1024uid_t
1025crgetruid(cred_t *cr)
1026{
1027	return (0);
1028}
1029
1030gid_t
1031crgetgid(cred_t *cr)
1032{
1033	return (0);
1034}
1035
1036int
1037crgetngroups(cred_t *cr)
1038{
1039	return (0);
1040}
1041
1042gid_t *
1043crgetgroups(cred_t *cr)
1044{
1045	return (NULL);
1046}
1047
1048int
1049zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1050{
1051	return (0);
1052}
1053
1054int
1055zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1056{
1057	return (0);
1058}
1059
1060int
1061zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1062{
1063	return (0);
1064}
1065
1066ksiddomain_t *
1067ksid_lookupdomain(const char *dom)
1068{
1069	ksiddomain_t *kd;
1070
1071	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1072	kd->kd_name = spa_strdup(dom);
1073	return (kd);
1074}
1075
1076void
1077ksiddomain_rele(ksiddomain_t *ksid)
1078{
1079	spa_strfree(ksid->kd_name);
1080	umem_free(ksid, sizeof (ksiddomain_t));
1081}
1082
1083/*
1084 * Do not change the length of the returned string; it must be freed
1085 * with strfree().
1086 */
1087char *
1088kmem_asprintf(const char *fmt, ...)
1089{
1090	int size;
1091	va_list adx;
1092	char *buf;
1093
1094	va_start(adx, fmt);
1095	size = vsnprintf(NULL, 0, fmt, adx) + 1;
1096	va_end(adx);
1097
1098	buf = kmem_alloc(size, KM_SLEEP);
1099
1100	va_start(adx, fmt);
1101	size = vsnprintf(buf, size, fmt, adx);
1102	va_end(adx);
1103
1104	return (buf);
1105}
1106
1107/* ARGSUSED */
1108int
1109zfs_onexit_fd_hold(int fd, minor_t *minorp)
1110{
1111	*minorp = 0;
1112	return (0);
1113}
1114
1115/* ARGSUSED */
1116void
1117zfs_onexit_fd_rele(int fd)
1118{
1119}
1120
1121/* ARGSUSED */
1122int
1123zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1124    uint64_t *action_handle)
1125{
1126	return (0);
1127}
1128
1129/* ARGSUSED */
1130int
1131zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1132{
1133	return (0);
1134}
1135
1136/* ARGSUSED */
1137int
1138zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1139{
1140	return (0);
1141}
1142
1143#ifdef __FreeBSD__
1144/* ARGSUSED */
1145int
1146zvol_create_minors(const char *name)
1147{
1148	return (0);
1149}
1150#endif
1151
1152#ifdef illumos
1153void
1154bioinit(buf_t *bp)
1155{
1156	bzero(bp, sizeof (buf_t));
1157}
1158
1159void
1160biodone(buf_t *bp)
1161{
1162	if (bp->b_iodone != NULL) {
1163		(*(bp->b_iodone))(bp);
1164		return;
1165	}
1166	ASSERT((bp->b_flags & B_DONE) == 0);
1167	bp->b_flags |= B_DONE;
1168}
1169
1170void
1171bioerror(buf_t *bp, int error)
1172{
1173	ASSERT(bp != NULL);
1174	ASSERT(error >= 0);
1175
1176	if (error != 0) {
1177		bp->b_flags |= B_ERROR;
1178	} else {
1179		bp->b_flags &= ~B_ERROR;
1180	}
1181	bp->b_error = error;
1182}
1183
1184
1185int
1186geterror(struct buf *bp)
1187{
1188	int error = 0;
1189
1190	if (bp->b_flags & B_ERROR) {
1191		error = bp->b_error;
1192		if (!error)
1193			error = EIO;
1194	}
1195	return (error);
1196}
1197#endif
1198