kernel.c revision 265740
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
25 */
26
27#include <assert.h>
28#include <fcntl.h>
29#include <poll.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <zlib.h>
34#include <sys/spa.h>
35#include <sys/stat.h>
36#include <sys/processor.h>
37#include <sys/zfs_context.h>
38#include <sys/rrwlock.h>
39#include <sys/zmod.h>
40#include <sys/utsname.h>
41#include <sys/systeminfo.h>
42
43/*
44 * Emulation of kernel services in userland.
45 */
46
47int aok;
48uint64_t physmem;
49vnode_t *rootdir = (vnode_t *)0xabcd1234;
50char hw_serial[HW_HOSTID_LEN];
51#ifdef illumos
52kmutex_t cpu_lock;
53#endif
54
55struct utsname utsname = {
56	"userland", "libzpool", "1", "1", "na"
57};
58
59/* this only exists to have its address taken */
60struct proc p0;
61
62/*
63 * =========================================================================
64 * threads
65 * =========================================================================
66 */
67/*ARGSUSED*/
68kthread_t *
69zk_thread_create(void (*func)(), void *arg)
70{
71	thread_t tid;
72
73	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
74	    &tid) == 0);
75
76	return ((void *)(uintptr_t)tid);
77}
78
79/*
80 * =========================================================================
81 * kstats
82 * =========================================================================
83 */
84/*ARGSUSED*/
85kstat_t *
86kstat_create(char *module, int instance, char *name, char *class,
87    uchar_t type, ulong_t ndata, uchar_t ks_flag)
88{
89	return (NULL);
90}
91
92/*ARGSUSED*/
93void
94kstat_install(kstat_t *ksp)
95{}
96
97/*ARGSUSED*/
98void
99kstat_delete(kstat_t *ksp)
100{}
101
102/*
103 * =========================================================================
104 * mutexes
105 * =========================================================================
106 */
107void
108zmutex_init(kmutex_t *mp)
109{
110	mp->m_owner = NULL;
111	mp->initialized = B_TRUE;
112	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
113}
114
115void
116zmutex_destroy(kmutex_t *mp)
117{
118	ASSERT(mp->initialized == B_TRUE);
119	ASSERT(mp->m_owner == NULL);
120	(void) _mutex_destroy(&(mp)->m_lock);
121	mp->m_owner = (void *)-1UL;
122	mp->initialized = B_FALSE;
123}
124
125int
126zmutex_owned(kmutex_t *mp)
127{
128	ASSERT(mp->initialized == B_TRUE);
129
130	return (mp->m_owner == curthread);
131}
132
133void
134mutex_enter(kmutex_t *mp)
135{
136	ASSERT(mp->initialized == B_TRUE);
137	ASSERT(mp->m_owner != (void *)-1UL);
138	ASSERT(mp->m_owner != curthread);
139	VERIFY(mutex_lock(&mp->m_lock) == 0);
140	ASSERT(mp->m_owner == NULL);
141	mp->m_owner = curthread;
142}
143
144int
145mutex_tryenter(kmutex_t *mp)
146{
147	ASSERT(mp->initialized == B_TRUE);
148	ASSERT(mp->m_owner != (void *)-1UL);
149	if (0 == mutex_trylock(&mp->m_lock)) {
150		ASSERT(mp->m_owner == NULL);
151		mp->m_owner = curthread;
152		return (1);
153	} else {
154		return (0);
155	}
156}
157
158void
159mutex_exit(kmutex_t *mp)
160{
161	ASSERT(mp->initialized == B_TRUE);
162	ASSERT(mutex_owner(mp) == curthread);
163	mp->m_owner = NULL;
164	VERIFY(mutex_unlock(&mp->m_lock) == 0);
165}
166
167void *
168mutex_owner(kmutex_t *mp)
169{
170	ASSERT(mp->initialized == B_TRUE);
171	return (mp->m_owner);
172}
173
174/*
175 * =========================================================================
176 * rwlocks
177 * =========================================================================
178 */
179/*ARGSUSED*/
180void
181rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
182{
183	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
184	rwlp->rw_owner = NULL;
185	rwlp->initialized = B_TRUE;
186	rwlp->rw_count = 0;
187}
188
189void
190rw_destroy(krwlock_t *rwlp)
191{
192	ASSERT(rwlp->rw_count == 0);
193	rwlock_destroy(&rwlp->rw_lock);
194	rwlp->rw_owner = (void *)-1UL;
195	rwlp->initialized = B_FALSE;
196}
197
198void
199rw_enter(krwlock_t *rwlp, krw_t rw)
200{
201	//ASSERT(!RW_LOCK_HELD(rwlp));
202	ASSERT(rwlp->initialized == B_TRUE);
203	ASSERT(rwlp->rw_owner != (void *)-1UL);
204	ASSERT(rwlp->rw_owner != curthread);
205
206	if (rw == RW_READER) {
207		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
208		ASSERT(rwlp->rw_count >= 0);
209		atomic_add_int(&rwlp->rw_count, 1);
210	} else {
211		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
212		ASSERT(rwlp->rw_count == 0);
213		rwlp->rw_count = -1;
214		rwlp->rw_owner = curthread;
215	}
216}
217
218void
219rw_exit(krwlock_t *rwlp)
220{
221	ASSERT(rwlp->initialized == B_TRUE);
222	ASSERT(rwlp->rw_owner != (void *)-1UL);
223
224	if (rwlp->rw_owner == curthread) {
225		/* Write locked. */
226		ASSERT(rwlp->rw_count == -1);
227		rwlp->rw_count = 0;
228		rwlp->rw_owner = NULL;
229	} else {
230		/* Read locked. */
231		ASSERT(rwlp->rw_count > 0);
232		atomic_add_int(&rwlp->rw_count, -1);
233	}
234	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
235}
236
237int
238rw_tryenter(krwlock_t *rwlp, krw_t rw)
239{
240	int rv;
241
242	ASSERT(rwlp->initialized == B_TRUE);
243	ASSERT(rwlp->rw_owner != (void *)-1UL);
244	ASSERT(rwlp->rw_owner != curthread);
245
246	if (rw == RW_READER)
247		rv = rw_tryrdlock(&rwlp->rw_lock);
248	else
249		rv = rw_trywrlock(&rwlp->rw_lock);
250
251	if (rv == 0) {
252		ASSERT(rwlp->rw_owner == NULL);
253		if (rw == RW_READER) {
254			ASSERT(rwlp->rw_count >= 0);
255			atomic_add_int(&rwlp->rw_count, 1);
256		} else {
257			ASSERT(rwlp->rw_count == 0);
258			rwlp->rw_count = -1;
259			rwlp->rw_owner = curthread;
260		}
261		return (1);
262	}
263
264	return (0);
265}
266
267/*ARGSUSED*/
268int
269rw_tryupgrade(krwlock_t *rwlp)
270{
271	ASSERT(rwlp->initialized == B_TRUE);
272	ASSERT(rwlp->rw_owner != (void *)-1UL);
273
274	return (0);
275}
276
277int
278rw_lock_held(krwlock_t *rwlp)
279{
280
281	return (rwlp->rw_count != 0);
282}
283
284/*
285 * =========================================================================
286 * condition variables
287 * =========================================================================
288 */
289/*ARGSUSED*/
290void
291cv_init(kcondvar_t *cv, char *name, int type, void *arg)
292{
293	VERIFY(cond_init(cv, name, NULL) == 0);
294}
295
296void
297cv_destroy(kcondvar_t *cv)
298{
299	VERIFY(cond_destroy(cv) == 0);
300}
301
302void
303cv_wait(kcondvar_t *cv, kmutex_t *mp)
304{
305	ASSERT(mutex_owner(mp) == curthread);
306	mp->m_owner = NULL;
307	int ret = cond_wait(cv, &mp->m_lock);
308	VERIFY(ret == 0 || ret == EINTR);
309	mp->m_owner = curthread;
310}
311
312clock_t
313cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
314{
315	int error;
316	struct timespec ts;
317	struct timeval tv;
318	clock_t delta;
319
320	abstime += ddi_get_lbolt();
321top:
322	delta = abstime - ddi_get_lbolt();
323	if (delta <= 0)
324		return (-1);
325
326	if (gettimeofday(&tv, NULL) != 0)
327		assert(!"gettimeofday() failed");
328
329	ts.tv_sec = tv.tv_sec + delta / hz;
330	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
331	ASSERT(ts.tv_nsec >= 0);
332
333	if (ts.tv_nsec >= NANOSEC) {
334		ts.tv_sec++;
335		ts.tv_nsec -= NANOSEC;
336	}
337
338	ASSERT(mutex_owner(mp) == curthread);
339	mp->m_owner = NULL;
340	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
341	mp->m_owner = curthread;
342
343	if (error == EINTR)
344		goto top;
345
346	if (error == ETIMEDOUT)
347		return (-1);
348
349	ASSERT(error == 0);
350
351	return (1);
352}
353
354/*ARGSUSED*/
355clock_t
356cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
357    int flag)
358{
359	int error;
360	timestruc_t ts;
361	hrtime_t delta;
362
363	ASSERT(flag == 0);
364
365top:
366	delta = tim - gethrtime();
367	if (delta <= 0)
368		return (-1);
369
370	ts.tv_sec = delta / NANOSEC;
371	ts.tv_nsec = delta % NANOSEC;
372
373	ASSERT(mutex_owner(mp) == curthread);
374	mp->m_owner = NULL;
375	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
376	mp->m_owner = curthread;
377
378	if (error == ETIMEDOUT)
379		return (-1);
380
381	if (error == EINTR)
382		goto top;
383
384	ASSERT(error == 0);
385
386	return (1);
387}
388
389void
390cv_signal(kcondvar_t *cv)
391{
392	VERIFY(cond_signal(cv) == 0);
393}
394
395void
396cv_broadcast(kcondvar_t *cv)
397{
398	VERIFY(cond_broadcast(cv) == 0);
399}
400
401/*
402 * =========================================================================
403 * vnode operations
404 * =========================================================================
405 */
406/*
407 * Note: for the xxxat() versions of these functions, we assume that the
408 * starting vp is always rootdir (which is true for spa_directory.c, the only
409 * ZFS consumer of these interfaces).  We assert this is true, and then emulate
410 * them by adding '/' in front of the path.
411 */
412
413/*ARGSUSED*/
414int
415vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
416{
417	int fd;
418	vnode_t *vp;
419	int old_umask;
420	char realpath[MAXPATHLEN];
421	struct stat64 st;
422
423	/*
424	 * If we're accessing a real disk from userland, we need to use
425	 * the character interface to avoid caching.  This is particularly
426	 * important if we're trying to look at a real in-kernel storage
427	 * pool from userland, e.g. via zdb, because otherwise we won't
428	 * see the changes occurring under the segmap cache.
429	 * On the other hand, the stupid character device returns zero
430	 * for its size.  So -- gag -- we open the block device to get
431	 * its size, and remember it for subsequent VOP_GETATTR().
432	 */
433	if (strncmp(path, "/dev/", 5) == 0) {
434		char *dsk;
435		fd = open64(path, O_RDONLY);
436		if (fd == -1)
437			return (errno);
438		if (fstat64(fd, &st) == -1) {
439			close(fd);
440			return (errno);
441		}
442		close(fd);
443		(void) sprintf(realpath, "%s", path);
444		dsk = strstr(path, "/dsk/");
445		if (dsk != NULL)
446			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
447			    dsk + 1);
448	} else {
449		(void) sprintf(realpath, "%s", path);
450		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
451			return (errno);
452	}
453
454	if (flags & FCREAT)
455		old_umask = umask(0);
456
457	/*
458	 * The construct 'flags - FREAD' conveniently maps combinations of
459	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
460	 */
461	fd = open64(realpath, flags - FREAD, mode);
462
463	if (flags & FCREAT)
464		(void) umask(old_umask);
465
466	if (fd == -1)
467		return (errno);
468
469	if (fstat64(fd, &st) == -1) {
470		close(fd);
471		return (errno);
472	}
473
474	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
475
476	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
477
478	vp->v_fd = fd;
479	vp->v_size = st.st_size;
480	vp->v_path = spa_strdup(path);
481
482	return (0);
483}
484
485/*ARGSUSED*/
486int
487vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
488    int x3, vnode_t *startvp, int fd)
489{
490	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
491	int ret;
492
493	ASSERT(startvp == rootdir);
494	(void) sprintf(realpath, "/%s", path);
495
496	/* fd ignored for now, need if want to simulate nbmand support */
497	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
498
499	umem_free(realpath, strlen(path) + 2);
500
501	return (ret);
502}
503
504/*ARGSUSED*/
505int
506vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
507	int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
508{
509	ssize_t iolen, split;
510
511	if (uio == UIO_READ) {
512		iolen = pread64(vp->v_fd, addr, len, offset);
513	} else {
514		/*
515		 * To simulate partial disk writes, we split writes into two
516		 * system calls so that the process can be killed in between.
517		 */
518		int sectors = len >> SPA_MINBLOCKSHIFT;
519		split = (sectors > 0 ? rand() % sectors : 0) <<
520		    SPA_MINBLOCKSHIFT;
521		iolen = pwrite64(vp->v_fd, addr, split, offset);
522		iolen += pwrite64(vp->v_fd, (char *)addr + split,
523		    len - split, offset + split);
524	}
525
526	if (iolen == -1)
527		return (errno);
528	if (residp)
529		*residp = len - iolen;
530	else if (iolen != len)
531		return (EIO);
532	return (0);
533}
534
535void
536vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
537{
538	close(vp->v_fd);
539	spa_strfree(vp->v_path);
540	umem_free(vp, sizeof (vnode_t));
541}
542
543/*
544 * At a minimum we need to update the size since vdev_reopen()
545 * will no longer call vn_openat().
546 */
547int
548fop_getattr(vnode_t *vp, vattr_t *vap)
549{
550	struct stat64 st;
551
552	if (fstat64(vp->v_fd, &st) == -1) {
553		close(vp->v_fd);
554		return (errno);
555	}
556
557	vap->va_size = st.st_size;
558	return (0);
559}
560
561#ifdef ZFS_DEBUG
562
563/*
564 * =========================================================================
565 * Figure out which debugging statements to print
566 * =========================================================================
567 */
568
569static char *dprintf_string;
570static int dprintf_print_all;
571
572int
573dprintf_find_string(const char *string)
574{
575	char *tmp_str = dprintf_string;
576	int len = strlen(string);
577
578	/*
579	 * Find out if this is a string we want to print.
580	 * String format: file1.c,function_name1,file2.c,file3.c
581	 */
582
583	while (tmp_str != NULL) {
584		if (strncmp(tmp_str, string, len) == 0 &&
585		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
586			return (1);
587		tmp_str = strchr(tmp_str, ',');
588		if (tmp_str != NULL)
589			tmp_str++; /* Get rid of , */
590	}
591	return (0);
592}
593
594void
595dprintf_setup(int *argc, char **argv)
596{
597	int i, j;
598
599	/*
600	 * Debugging can be specified two ways: by setting the
601	 * environment variable ZFS_DEBUG, or by including a
602	 * "debug=..."  argument on the command line.  The command
603	 * line setting overrides the environment variable.
604	 */
605
606	for (i = 1; i < *argc; i++) {
607		int len = strlen("debug=");
608		/* First look for a command line argument */
609		if (strncmp("debug=", argv[i], len) == 0) {
610			dprintf_string = argv[i] + len;
611			/* Remove from args */
612			for (j = i; j < *argc; j++)
613				argv[j] = argv[j+1];
614			argv[j] = NULL;
615			(*argc)--;
616		}
617	}
618
619	if (dprintf_string == NULL) {
620		/* Look for ZFS_DEBUG environment variable */
621		dprintf_string = getenv("ZFS_DEBUG");
622	}
623
624	/*
625	 * Are we just turning on all debugging?
626	 */
627	if (dprintf_find_string("on"))
628		dprintf_print_all = 1;
629}
630
631int
632sysctl_handle_64(SYSCTL_HANDLER_ARGS)
633{
634	return (0);
635}
636
637/*
638 * =========================================================================
639 * debug printfs
640 * =========================================================================
641 */
642void
643__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
644{
645	const char *newfile;
646	va_list adx;
647
648	/*
649	 * Get rid of annoying "../common/" prefix to filename.
650	 */
651	newfile = strrchr(file, '/');
652	if (newfile != NULL) {
653		newfile = newfile + 1; /* Get rid of leading / */
654	} else {
655		newfile = file;
656	}
657
658	if (dprintf_print_all ||
659	    dprintf_find_string(newfile) ||
660	    dprintf_find_string(func)) {
661		/* Print out just the function name if requested */
662		flockfile(stdout);
663		if (dprintf_find_string("pid"))
664			(void) printf("%d ", getpid());
665		if (dprintf_find_string("tid"))
666			(void) printf("%ul ", thr_self());
667#if 0
668		if (dprintf_find_string("cpu"))
669			(void) printf("%u ", getcpuid());
670#endif
671		if (dprintf_find_string("time"))
672			(void) printf("%llu ", gethrtime());
673		if (dprintf_find_string("long"))
674			(void) printf("%s, line %d: ", newfile, line);
675		(void) printf("%s: ", func);
676		va_start(adx, fmt);
677		(void) vprintf(fmt, adx);
678		va_end(adx);
679		funlockfile(stdout);
680	}
681}
682
683#endif /* ZFS_DEBUG */
684
685/*
686 * =========================================================================
687 * cmn_err() and panic()
688 * =========================================================================
689 */
690static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
691static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
692
693void
694vpanic(const char *fmt, va_list adx)
695{
696	(void) fprintf(stderr, "error: ");
697	(void) vfprintf(stderr, fmt, adx);
698	(void) fprintf(stderr, "\n");
699
700	abort();	/* think of it as a "user-level crash dump" */
701}
702
703void
704panic(const char *fmt, ...)
705{
706	va_list adx;
707
708	va_start(adx, fmt);
709	vpanic(fmt, adx);
710	va_end(adx);
711}
712
713void
714vcmn_err(int ce, const char *fmt, va_list adx)
715{
716	if (ce == CE_PANIC)
717		vpanic(fmt, adx);
718	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
719		(void) fprintf(stderr, "%s", ce_prefix[ce]);
720		(void) vfprintf(stderr, fmt, adx);
721		(void) fprintf(stderr, "%s", ce_suffix[ce]);
722	}
723}
724
725/*PRINTFLIKE2*/
726void
727cmn_err(int ce, const char *fmt, ...)
728{
729	va_list adx;
730
731	va_start(adx, fmt);
732	vcmn_err(ce, fmt, adx);
733	va_end(adx);
734}
735
736/*
737 * =========================================================================
738 * kobj interfaces
739 * =========================================================================
740 */
741struct _buf *
742kobj_open_file(char *name)
743{
744	struct _buf *file;
745	vnode_t *vp;
746
747	/* set vp as the _fd field of the file */
748	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
749	    -1) != 0)
750		return ((void *)-1UL);
751
752	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
753	file->_fd = (intptr_t)vp;
754	return (file);
755}
756
757int
758kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
759{
760	ssize_t resid;
761
762	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
763	    UIO_SYSSPACE, 0, 0, 0, &resid);
764
765	return (size - resid);
766}
767
768void
769kobj_close_file(struct _buf *file)
770{
771	vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
772	umem_free(file, sizeof (struct _buf));
773}
774
775int
776kobj_get_filesize(struct _buf *file, uint64_t *size)
777{
778	struct stat64 st;
779	vnode_t *vp = (vnode_t *)file->_fd;
780
781	if (fstat64(vp->v_fd, &st) == -1) {
782		vn_close(vp, 0, NULL, NULL);
783		return (errno);
784	}
785	*size = st.st_size;
786	return (0);
787}
788
789/*
790 * =========================================================================
791 * misc routines
792 * =========================================================================
793 */
794
795void
796delay(clock_t ticks)
797{
798	poll(0, 0, ticks * (1000 / hz));
799}
800
801#if 0
802/*
803 * Find highest one bit set.
804 *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
805 */
806int
807highbit64(uint64_t i)
808{
809	int h = 1;
810
811	if (i == 0)
812		return (0);
813	if (i & 0xffffffff00000000ULL) {
814		h += 32; i >>= 32;
815	}
816	if (i & 0xffff0000) {
817		h += 16; i >>= 16;
818	}
819	if (i & 0xff00) {
820		h += 8; i >>= 8;
821	}
822	if (i & 0xf0) {
823		h += 4; i >>= 4;
824	}
825	if (i & 0xc) {
826		h += 2; i >>= 2;
827	}
828	if (i & 0x2) {
829		h += 1;
830	}
831	return (h);
832}
833#endif
834
835static int random_fd = -1, urandom_fd = -1;
836
837static int
838random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
839{
840	size_t resid = len;
841	ssize_t bytes;
842
843	ASSERT(fd != -1);
844
845	while (resid != 0) {
846		bytes = read(fd, ptr, resid);
847		ASSERT3S(bytes, >=, 0);
848		ptr += bytes;
849		resid -= bytes;
850	}
851
852	return (0);
853}
854
855int
856random_get_bytes(uint8_t *ptr, size_t len)
857{
858	return (random_get_bytes_common(ptr, len, random_fd));
859}
860
861int
862random_get_pseudo_bytes(uint8_t *ptr, size_t len)
863{
864	return (random_get_bytes_common(ptr, len, urandom_fd));
865}
866
867int
868ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
869{
870	char *end;
871
872	*result = strtoul(hw_serial, &end, base);
873	if (*result == 0)
874		return (errno);
875	return (0);
876}
877
878int
879ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
880{
881	char *end;
882
883	*result = strtoull(str, &end, base);
884	if (*result == 0)
885		return (errno);
886	return (0);
887}
888
889#ifdef illumos
890/* ARGSUSED */
891cyclic_id_t
892cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
893{
894	return (1);
895}
896
897/* ARGSUSED */
898void
899cyclic_remove(cyclic_id_t id)
900{
901}
902
903/* ARGSUSED */
904int
905cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
906{
907	return (1);
908}
909#endif
910
911/*
912 * =========================================================================
913 * kernel emulation setup & teardown
914 * =========================================================================
915 */
916static int
917umem_out_of_memory(void)
918{
919	char errmsg[] = "out of memory -- generating core dump\n";
920
921	write(fileno(stderr), errmsg, sizeof (errmsg));
922	abort();
923	return (0);
924}
925
926void
927kernel_init(int mode)
928{
929	extern uint_t rrw_tsd_key;
930
931	umem_nofail_callback(umem_out_of_memory);
932
933	physmem = sysconf(_SC_PHYS_PAGES);
934
935	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
936	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
937
938	(void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
939	    (mode & FWRITE) ? (unsigned long)gethostid() : 0);
940
941	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
942	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
943
944	system_taskq_init();
945
946#ifdef illumos
947	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
948#endif
949
950	spa_init(mode);
951
952	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
953}
954
955void
956kernel_fini(void)
957{
958	spa_fini();
959
960	system_taskq_fini();
961
962	close(random_fd);
963	close(urandom_fd);
964
965	random_fd = -1;
966	urandom_fd = -1;
967}
968
969int
970z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
971{
972	int ret;
973	uLongf len = *dstlen;
974
975	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
976		*dstlen = (size_t)len;
977
978	return (ret);
979}
980
981int
982z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
983    int level)
984{
985	int ret;
986	uLongf len = *dstlen;
987
988	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
989		*dstlen = (size_t)len;
990
991	return (ret);
992}
993
994uid_t
995crgetuid(cred_t *cr)
996{
997	return (0);
998}
999
1000uid_t
1001crgetruid(cred_t *cr)
1002{
1003	return (0);
1004}
1005
1006gid_t
1007crgetgid(cred_t *cr)
1008{
1009	return (0);
1010}
1011
1012int
1013crgetngroups(cred_t *cr)
1014{
1015	return (0);
1016}
1017
1018gid_t *
1019crgetgroups(cred_t *cr)
1020{
1021	return (NULL);
1022}
1023
1024int
1025zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1026{
1027	return (0);
1028}
1029
1030int
1031zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1032{
1033	return (0);
1034}
1035
1036int
1037zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1038{
1039	return (0);
1040}
1041
1042ksiddomain_t *
1043ksid_lookupdomain(const char *dom)
1044{
1045	ksiddomain_t *kd;
1046
1047	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1048	kd->kd_name = spa_strdup(dom);
1049	return (kd);
1050}
1051
1052void
1053ksiddomain_rele(ksiddomain_t *ksid)
1054{
1055	spa_strfree(ksid->kd_name);
1056	umem_free(ksid, sizeof (ksiddomain_t));
1057}
1058
1059/*
1060 * Do not change the length of the returned string; it must be freed
1061 * with strfree().
1062 */
1063char *
1064kmem_asprintf(const char *fmt, ...)
1065{
1066	int size;
1067	va_list adx;
1068	char *buf;
1069
1070	va_start(adx, fmt);
1071	size = vsnprintf(NULL, 0, fmt, adx) + 1;
1072	va_end(adx);
1073
1074	buf = kmem_alloc(size, KM_SLEEP);
1075
1076	va_start(adx, fmt);
1077	size = vsnprintf(buf, size, fmt, adx);
1078	va_end(adx);
1079
1080	return (buf);
1081}
1082
1083/* ARGSUSED */
1084int
1085zfs_onexit_fd_hold(int fd, minor_t *minorp)
1086{
1087	*minorp = 0;
1088	return (0);
1089}
1090
1091/* ARGSUSED */
1092void
1093zfs_onexit_fd_rele(int fd)
1094{
1095}
1096
1097/* ARGSUSED */
1098int
1099zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1100    uint64_t *action_handle)
1101{
1102	return (0);
1103}
1104
1105/* ARGSUSED */
1106int
1107zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1108{
1109	return (0);
1110}
1111
1112/* ARGSUSED */
1113int
1114zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1115{
1116	return (0);
1117}
1118
1119#ifdef __FreeBSD__
1120/* ARGSUSED */
1121int
1122zvol_create_minors(const char *name)
1123{
1124	return (0);
1125}
1126#endif
1127
1128#ifdef illumos
1129void
1130bioinit(buf_t *bp)
1131{
1132	bzero(bp, sizeof (buf_t));
1133}
1134
1135void
1136biodone(buf_t *bp)
1137{
1138	if (bp->b_iodone != NULL) {
1139		(*(bp->b_iodone))(bp);
1140		return;
1141	}
1142	ASSERT((bp->b_flags & B_DONE) == 0);
1143	bp->b_flags |= B_DONE;
1144}
1145
1146void
1147bioerror(buf_t *bp, int error)
1148{
1149	ASSERT(bp != NULL);
1150	ASSERT(error >= 0);
1151
1152	if (error != 0) {
1153		bp->b_flags |= B_ERROR;
1154	} else {
1155		bp->b_flags &= ~B_ERROR;
1156	}
1157	bp->b_error = error;
1158}
1159
1160
1161int
1162geterror(struct buf *bp)
1163{
1164	int error = 0;
1165
1166	if (bp->b_flags & B_ERROR) {
1167		error = bp->b_error;
1168		if (!error)
1169			error = EIO;
1170	}
1171	return (error);
1172}
1173#endif
1174