kernel.c revision 321529
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
25 */
26
27#include <assert.h>
28#include <fcntl.h>
29#include <poll.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <zlib.h>
34#include <libgen.h>
35#include <sys/spa.h>
36#include <sys/stat.h>
37#include <sys/processor.h>
38#include <sys/zfs_context.h>
39#include <sys/rrwlock.h>
40#include <sys/zmod.h>
41#include <sys/utsname.h>
42#include <sys/systeminfo.h>
43
44/*
45 * Emulation of kernel services in userland.
46 */
47
48#ifndef __FreeBSD__
49int aok;
50#endif
51uint64_t physmem;
52vnode_t *rootdir = (vnode_t *)0xabcd1234;
53char hw_serial[HW_HOSTID_LEN];
54#ifdef illumos
55kmutex_t cpu_lock;
56#endif
57
58/* If set, all blocks read will be copied to the specified directory. */
59char *vn_dumpdir = NULL;
60
61struct utsname utsname = {
62	"userland", "libzpool", "1", "1", "na"
63};
64
65/* this only exists to have its address taken */
66struct proc p0;
67
68/*
69 * =========================================================================
70 * threads
71 * =========================================================================
72 */
73/*ARGSUSED*/
74kthread_t *
75zk_thread_create(void (*func)(), void *arg)
76{
77	thread_t tid;
78
79	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
80	    &tid) == 0);
81
82	return ((void *)(uintptr_t)tid);
83}
84
85/*
86 * =========================================================================
87 * kstats
88 * =========================================================================
89 */
90/*ARGSUSED*/
91kstat_t *
92kstat_create(char *module, int instance, char *name, char *class,
93    uchar_t type, ulong_t ndata, uchar_t ks_flag)
94{
95	return (NULL);
96}
97
98/*ARGSUSED*/
99void
100kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
101{}
102
103/*ARGSUSED*/
104void
105kstat_install(kstat_t *ksp)
106{}
107
108/*ARGSUSED*/
109void
110kstat_delete(kstat_t *ksp)
111{}
112
113/*
114 * =========================================================================
115 * mutexes
116 * =========================================================================
117 */
118void
119zmutex_init(kmutex_t *mp)
120{
121	mp->m_owner = NULL;
122	mp->initialized = B_TRUE;
123	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
124}
125
126void
127zmutex_destroy(kmutex_t *mp)
128{
129	ASSERT(mp->initialized == B_TRUE);
130	ASSERT(mp->m_owner == NULL);
131	(void) _mutex_destroy(&(mp)->m_lock);
132	mp->m_owner = (void *)-1UL;
133	mp->initialized = B_FALSE;
134}
135
136int
137zmutex_owned(kmutex_t *mp)
138{
139	ASSERT(mp->initialized == B_TRUE);
140
141	return (mp->m_owner == curthread);
142}
143
144void
145mutex_enter(kmutex_t *mp)
146{
147	ASSERT(mp->initialized == B_TRUE);
148	ASSERT(mp->m_owner != (void *)-1UL);
149	ASSERT(mp->m_owner != curthread);
150	VERIFY(mutex_lock(&mp->m_lock) == 0);
151	ASSERT(mp->m_owner == NULL);
152	mp->m_owner = curthread;
153}
154
155int
156mutex_tryenter(kmutex_t *mp)
157{
158	ASSERT(mp->initialized == B_TRUE);
159	ASSERT(mp->m_owner != (void *)-1UL);
160	if (0 == mutex_trylock(&mp->m_lock)) {
161		ASSERT(mp->m_owner == NULL);
162		mp->m_owner = curthread;
163		return (1);
164	} else {
165		return (0);
166	}
167}
168
169void
170mutex_exit(kmutex_t *mp)
171{
172	ASSERT(mp->initialized == B_TRUE);
173	ASSERT(mutex_owner(mp) == curthread);
174	mp->m_owner = NULL;
175	VERIFY(mutex_unlock(&mp->m_lock) == 0);
176}
177
178void *
179mutex_owner(kmutex_t *mp)
180{
181	ASSERT(mp->initialized == B_TRUE);
182	return (mp->m_owner);
183}
184
185/*
186 * =========================================================================
187 * rwlocks
188 * =========================================================================
189 */
190/*ARGSUSED*/
191void
192rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
193{
194	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
195	rwlp->rw_owner = NULL;
196	rwlp->initialized = B_TRUE;
197	rwlp->rw_count = 0;
198}
199
200void
201rw_destroy(krwlock_t *rwlp)
202{
203	ASSERT(rwlp->rw_count == 0);
204	rwlock_destroy(&rwlp->rw_lock);
205	rwlp->rw_owner = (void *)-1UL;
206	rwlp->initialized = B_FALSE;
207}
208
209void
210rw_enter(krwlock_t *rwlp, krw_t rw)
211{
212	//ASSERT(!RW_LOCK_HELD(rwlp));
213	ASSERT(rwlp->initialized == B_TRUE);
214	ASSERT(rwlp->rw_owner != (void *)-1UL);
215	ASSERT(rwlp->rw_owner != curthread);
216
217	if (rw == RW_READER) {
218		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
219		ASSERT(rwlp->rw_count >= 0);
220		atomic_add_int(&rwlp->rw_count, 1);
221	} else {
222		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
223		ASSERT(rwlp->rw_count == 0);
224		rwlp->rw_count = -1;
225		rwlp->rw_owner = curthread;
226	}
227}
228
229void
230rw_exit(krwlock_t *rwlp)
231{
232	ASSERT(rwlp->initialized == B_TRUE);
233	ASSERT(rwlp->rw_owner != (void *)-1UL);
234
235	if (rwlp->rw_owner == curthread) {
236		/* Write locked. */
237		ASSERT(rwlp->rw_count == -1);
238		rwlp->rw_count = 0;
239		rwlp->rw_owner = NULL;
240	} else {
241		/* Read locked. */
242		ASSERT(rwlp->rw_count > 0);
243		atomic_add_int(&rwlp->rw_count, -1);
244	}
245	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
246}
247
248int
249rw_tryenter(krwlock_t *rwlp, krw_t rw)
250{
251	int rv;
252
253	ASSERT(rwlp->initialized == B_TRUE);
254	ASSERT(rwlp->rw_owner != (void *)-1UL);
255	ASSERT(rwlp->rw_owner != curthread);
256
257	if (rw == RW_READER)
258		rv = rw_tryrdlock(&rwlp->rw_lock);
259	else
260		rv = rw_trywrlock(&rwlp->rw_lock);
261
262	if (rv == 0) {
263		ASSERT(rwlp->rw_owner == NULL);
264		if (rw == RW_READER) {
265			ASSERT(rwlp->rw_count >= 0);
266			atomic_add_int(&rwlp->rw_count, 1);
267		} else {
268			ASSERT(rwlp->rw_count == 0);
269			rwlp->rw_count = -1;
270			rwlp->rw_owner = curthread;
271		}
272		return (1);
273	}
274
275	return (0);
276}
277
278/*ARGSUSED*/
279int
280rw_tryupgrade(krwlock_t *rwlp)
281{
282	ASSERT(rwlp->initialized == B_TRUE);
283	ASSERT(rwlp->rw_owner != (void *)-1UL);
284
285	return (0);
286}
287
288int
289rw_lock_held(krwlock_t *rwlp)
290{
291
292	return (rwlp->rw_count != 0);
293}
294
295/*
296 * =========================================================================
297 * condition variables
298 * =========================================================================
299 */
300/*ARGSUSED*/
301void
302cv_init(kcondvar_t *cv, char *name, int type, void *arg)
303{
304	VERIFY(cond_init(cv, name, NULL) == 0);
305}
306
307void
308cv_destroy(kcondvar_t *cv)
309{
310	VERIFY(cond_destroy(cv) == 0);
311}
312
313void
314cv_wait(kcondvar_t *cv, kmutex_t *mp)
315{
316	ASSERT(mutex_owner(mp) == curthread);
317	mp->m_owner = NULL;
318	int ret = cond_wait(cv, &mp->m_lock);
319	VERIFY(ret == 0 || ret == EINTR);
320	mp->m_owner = curthread;
321}
322
323clock_t
324cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
325{
326	int error;
327	struct timespec ts;
328	struct timeval tv;
329	clock_t delta;
330
331	abstime += ddi_get_lbolt();
332top:
333	delta = abstime - ddi_get_lbolt();
334	if (delta <= 0)
335		return (-1);
336
337	if (gettimeofday(&tv, NULL) != 0)
338		assert(!"gettimeofday() failed");
339
340	ts.tv_sec = tv.tv_sec + delta / hz;
341	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
342	ASSERT(ts.tv_nsec >= 0);
343
344	if (ts.tv_nsec >= NANOSEC) {
345		ts.tv_sec++;
346		ts.tv_nsec -= NANOSEC;
347	}
348
349	ASSERT(mutex_owner(mp) == curthread);
350	mp->m_owner = NULL;
351	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
352	mp->m_owner = curthread;
353
354	if (error == EINTR)
355		goto top;
356
357	if (error == ETIMEDOUT)
358		return (-1);
359
360	ASSERT(error == 0);
361
362	return (1);
363}
364
365/*ARGSUSED*/
366clock_t
367cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
368    int flag)
369{
370	int error;
371	timespec_t ts;
372	hrtime_t delta;
373
374	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
375
376top:
377	delta = tim;
378	if (flag & CALLOUT_FLAG_ABSOLUTE)
379		delta -= gethrtime();
380
381	if (delta <= 0)
382		return (-1);
383
384	clock_gettime(CLOCK_REALTIME, &ts);
385	ts.tv_sec += delta / NANOSEC;
386	ts.tv_nsec += delta % NANOSEC;
387	if (ts.tv_nsec >= NANOSEC) {
388		ts.tv_sec++;
389		ts.tv_nsec -= NANOSEC;
390	}
391
392	ASSERT(mutex_owner(mp) == curthread);
393	mp->m_owner = NULL;
394	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
395	mp->m_owner = curthread;
396
397	if (error == ETIMEDOUT)
398		return (-1);
399
400	if (error == EINTR)
401		goto top;
402
403	ASSERT(error == 0);
404
405	return (1);
406}
407
408void
409cv_signal(kcondvar_t *cv)
410{
411	VERIFY(cond_signal(cv) == 0);
412}
413
414void
415cv_broadcast(kcondvar_t *cv)
416{
417	VERIFY(cond_broadcast(cv) == 0);
418}
419
420/*
421 * =========================================================================
422 * vnode operations
423 * =========================================================================
424 */
425/*
426 * Note: for the xxxat() versions of these functions, we assume that the
427 * starting vp is always rootdir (which is true for spa_directory.c, the only
428 * ZFS consumer of these interfaces).  We assert this is true, and then emulate
429 * them by adding '/' in front of the path.
430 */
431
432/*ARGSUSED*/
433int
434vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
435{
436	int fd;
437	int dump_fd;
438	vnode_t *vp;
439	int old_umask;
440	char realpath[MAXPATHLEN];
441	struct stat64 st;
442
443	/*
444	 * If we're accessing a real disk from userland, we need to use
445	 * the character interface to avoid caching.  This is particularly
446	 * important if we're trying to look at a real in-kernel storage
447	 * pool from userland, e.g. via zdb, because otherwise we won't
448	 * see the changes occurring under the segmap cache.
449	 * On the other hand, the stupid character device returns zero
450	 * for its size.  So -- gag -- we open the block device to get
451	 * its size, and remember it for subsequent VOP_GETATTR().
452	 */
453	if (strncmp(path, "/dev/", 5) == 0) {
454		char *dsk;
455		fd = open64(path, O_RDONLY);
456		if (fd == -1)
457			return (errno);
458		if (fstat64(fd, &st) == -1) {
459			close(fd);
460			return (errno);
461		}
462		close(fd);
463		(void) sprintf(realpath, "%s", path);
464		dsk = strstr(path, "/dsk/");
465		if (dsk != NULL)
466			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
467			    dsk + 1);
468	} else {
469		(void) sprintf(realpath, "%s", path);
470		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
471			return (errno);
472	}
473
474	if (flags & FCREAT)
475		old_umask = umask(0);
476
477	/*
478	 * The construct 'flags - FREAD' conveniently maps combinations of
479	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
480	 */
481	fd = open64(realpath, flags - FREAD, mode);
482
483	if (flags & FCREAT)
484		(void) umask(old_umask);
485
486	if (vn_dumpdir != NULL) {
487		char dumppath[MAXPATHLEN];
488		(void) snprintf(dumppath, sizeof (dumppath),
489		    "%s/%s", vn_dumpdir, basename(realpath));
490		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
491		if (dump_fd == -1)
492			return (errno);
493	} else {
494		dump_fd = -1;
495	}
496
497	if (fd == -1)
498		return (errno);
499
500	if (fstat64(fd, &st) == -1) {
501		close(fd);
502		return (errno);
503	}
504
505	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
506
507	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
508
509	vp->v_fd = fd;
510	vp->v_size = st.st_size;
511	vp->v_path = spa_strdup(path);
512	vp->v_dump_fd = dump_fd;
513
514	return (0);
515}
516
517/*ARGSUSED*/
518int
519vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
520    int x3, vnode_t *startvp, int fd)
521{
522	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
523	int ret;
524
525	ASSERT(startvp == rootdir);
526	(void) sprintf(realpath, "/%s", path);
527
528	/* fd ignored for now, need if want to simulate nbmand support */
529	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
530
531	umem_free(realpath, strlen(path) + 2);
532
533	return (ret);
534}
535
536/*ARGSUSED*/
537int
538vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
539    int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
540{
541	ssize_t iolen, split;
542
543	if (uio == UIO_READ) {
544		iolen = pread64(vp->v_fd, addr, len, offset);
545		if (vp->v_dump_fd != -1) {
546			int status =
547			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
548			ASSERT(status != -1);
549		}
550	} else {
551		/*
552		 * To simulate partial disk writes, we split writes into two
553		 * system calls so that the process can be killed in between.
554		 */
555		int sectors = len >> SPA_MINBLOCKSHIFT;
556		split = (sectors > 0 ? rand() % sectors : 0) <<
557		    SPA_MINBLOCKSHIFT;
558		iolen = pwrite64(vp->v_fd, addr, split, offset);
559		iolen += pwrite64(vp->v_fd, (char *)addr + split,
560		    len - split, offset + split);
561	}
562
563	if (iolen == -1)
564		return (errno);
565	if (residp)
566		*residp = len - iolen;
567	else if (iolen != len)
568		return (EIO);
569	return (0);
570}
571
572void
573vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
574{
575	close(vp->v_fd);
576	if (vp->v_dump_fd != -1)
577		close(vp->v_dump_fd);
578	spa_strfree(vp->v_path);
579	umem_free(vp, sizeof (vnode_t));
580}
581
582/*
583 * At a minimum we need to update the size since vdev_reopen()
584 * will no longer call vn_openat().
585 */
586int
587fop_getattr(vnode_t *vp, vattr_t *vap)
588{
589	struct stat64 st;
590
591	if (fstat64(vp->v_fd, &st) == -1) {
592		close(vp->v_fd);
593		return (errno);
594	}
595
596	vap->va_size = st.st_size;
597	return (0);
598}
599
600#ifdef ZFS_DEBUG
601
602/*
603 * =========================================================================
604 * Figure out which debugging statements to print
605 * =========================================================================
606 */
607
608static char *dprintf_string;
609static int dprintf_print_all;
610
611int
612dprintf_find_string(const char *string)
613{
614	char *tmp_str = dprintf_string;
615	int len = strlen(string);
616
617	/*
618	 * Find out if this is a string we want to print.
619	 * String format: file1.c,function_name1,file2.c,file3.c
620	 */
621
622	while (tmp_str != NULL) {
623		if (strncmp(tmp_str, string, len) == 0 &&
624		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
625			return (1);
626		tmp_str = strchr(tmp_str, ',');
627		if (tmp_str != NULL)
628			tmp_str++; /* Get rid of , */
629	}
630	return (0);
631}
632
633void
634dprintf_setup(int *argc, char **argv)
635{
636	int i, j;
637
638	/*
639	 * Debugging can be specified two ways: by setting the
640	 * environment variable ZFS_DEBUG, or by including a
641	 * "debug=..."  argument on the command line.  The command
642	 * line setting overrides the environment variable.
643	 */
644
645	for (i = 1; i < *argc; i++) {
646		int len = strlen("debug=");
647		/* First look for a command line argument */
648		if (strncmp("debug=", argv[i], len) == 0) {
649			dprintf_string = argv[i] + len;
650			/* Remove from args */
651			for (j = i; j < *argc; j++)
652				argv[j] = argv[j+1];
653			argv[j] = NULL;
654			(*argc)--;
655		}
656	}
657
658	if (dprintf_string == NULL) {
659		/* Look for ZFS_DEBUG environment variable */
660		dprintf_string = getenv("ZFS_DEBUG");
661	}
662
663	/*
664	 * Are we just turning on all debugging?
665	 */
666	if (dprintf_find_string("on"))
667		dprintf_print_all = 1;
668
669	if (dprintf_string != NULL)
670		zfs_flags |= ZFS_DEBUG_DPRINTF;
671}
672
673int
674sysctl_handle_64(SYSCTL_HANDLER_ARGS)
675{
676	return (0);
677}
678
679/*
680 * =========================================================================
681 * debug printfs
682 * =========================================================================
683 */
684void
685__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
686{
687	const char *newfile;
688	va_list adx;
689
690	/*
691	 * Get rid of annoying "../common/" prefix to filename.
692	 */
693	newfile = strrchr(file, '/');
694	if (newfile != NULL) {
695		newfile = newfile + 1; /* Get rid of leading / */
696	} else {
697		newfile = file;
698	}
699
700	if (dprintf_print_all ||
701	    dprintf_find_string(newfile) ||
702	    dprintf_find_string(func)) {
703		/* Print out just the function name if requested */
704		flockfile(stdout);
705		if (dprintf_find_string("pid"))
706			(void) printf("%d ", getpid());
707		if (dprintf_find_string("tid"))
708			(void) printf("%lu ", thr_self());
709#if 0
710		if (dprintf_find_string("cpu"))
711			(void) printf("%u ", getcpuid());
712#endif
713		if (dprintf_find_string("time"))
714			(void) printf("%llu ", gethrtime());
715		if (dprintf_find_string("long"))
716			(void) printf("%s, line %d: ", newfile, line);
717		(void) printf("%s: ", func);
718		va_start(adx, fmt);
719		(void) vprintf(fmt, adx);
720		va_end(adx);
721		funlockfile(stdout);
722	}
723}
724
725#endif /* ZFS_DEBUG */
726
727/*
728 * =========================================================================
729 * cmn_err() and panic()
730 * =========================================================================
731 */
732static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
733static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
734
735void
736vpanic(const char *fmt, va_list adx)
737{
738	(void) fprintf(stderr, "error: ");
739	(void) vfprintf(stderr, fmt, adx);
740	(void) fprintf(stderr, "\n");
741
742	abort();	/* think of it as a "user-level crash dump" */
743}
744
745void
746panic(const char *fmt, ...)
747{
748	va_list adx;
749
750	va_start(adx, fmt);
751	vpanic(fmt, adx);
752	va_end(adx);
753}
754
755void
756vcmn_err(int ce, const char *fmt, va_list adx)
757{
758	if (ce == CE_PANIC)
759		vpanic(fmt, adx);
760	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
761		(void) fprintf(stderr, "%s", ce_prefix[ce]);
762		(void) vfprintf(stderr, fmt, adx);
763		(void) fprintf(stderr, "%s", ce_suffix[ce]);
764	}
765}
766
767/*PRINTFLIKE2*/
768void
769cmn_err(int ce, const char *fmt, ...)
770{
771	va_list adx;
772
773	va_start(adx, fmt);
774	vcmn_err(ce, fmt, adx);
775	va_end(adx);
776}
777
778/*
779 * =========================================================================
780 * kobj interfaces
781 * =========================================================================
782 */
783struct _buf *
784kobj_open_file(char *name)
785{
786	struct _buf *file;
787	vnode_t *vp;
788
789	/* set vp as the _fd field of the file */
790	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
791	    -1) != 0)
792		return ((void *)-1UL);
793
794	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
795	file->_fd = (intptr_t)vp;
796	return (file);
797}
798
799int
800kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
801{
802	ssize_t resid;
803
804	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
805	    UIO_SYSSPACE, 0, 0, 0, &resid);
806
807	return (size - resid);
808}
809
810void
811kobj_close_file(struct _buf *file)
812{
813	vn_close((vnode_t *)file->_fd, 0, NULL, NULL);
814	umem_free(file, sizeof (struct _buf));
815}
816
817int
818kobj_get_filesize(struct _buf *file, uint64_t *size)
819{
820	struct stat64 st;
821	vnode_t *vp = (vnode_t *)file->_fd;
822
823	if (fstat64(vp->v_fd, &st) == -1) {
824		vn_close(vp, 0, NULL, NULL);
825		return (errno);
826	}
827	*size = st.st_size;
828	return (0);
829}
830
831/*
832 * =========================================================================
833 * misc routines
834 * =========================================================================
835 */
836
837void
838delay(clock_t ticks)
839{
840	poll(0, 0, ticks * (1000 / hz));
841}
842
843#if 0
844/*
845 * Find highest one bit set.
846 *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
847 */
848int
849highbit64(uint64_t i)
850{
851	int h = 1;
852
853	if (i == 0)
854		return (0);
855	if (i & 0xffffffff00000000ULL) {
856		h += 32; i >>= 32;
857	}
858	if (i & 0xffff0000) {
859		h += 16; i >>= 16;
860	}
861	if (i & 0xff00) {
862		h += 8; i >>= 8;
863	}
864	if (i & 0xf0) {
865		h += 4; i >>= 4;
866	}
867	if (i & 0xc) {
868		h += 2; i >>= 2;
869	}
870	if (i & 0x2) {
871		h += 1;
872	}
873	return (h);
874}
875#endif
876
877static int random_fd = -1, urandom_fd = -1;
878
879static int
880random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
881{
882	size_t resid = len;
883	ssize_t bytes;
884
885	ASSERT(fd != -1);
886
887	while (resid != 0) {
888		bytes = read(fd, ptr, resid);
889		ASSERT3S(bytes, >=, 0);
890		ptr += bytes;
891		resid -= bytes;
892	}
893
894	return (0);
895}
896
897int
898random_get_bytes(uint8_t *ptr, size_t len)
899{
900	return (random_get_bytes_common(ptr, len, random_fd));
901}
902
903int
904random_get_pseudo_bytes(uint8_t *ptr, size_t len)
905{
906	return (random_get_bytes_common(ptr, len, urandom_fd));
907}
908
909int
910ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
911{
912	char *end;
913
914	*result = strtoul(hw_serial, &end, base);
915	if (*result == 0)
916		return (errno);
917	return (0);
918}
919
920int
921ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
922{
923	char *end;
924
925	*result = strtoull(str, &end, base);
926	if (*result == 0)
927		return (errno);
928	return (0);
929}
930
931#ifdef illumos
932/* ARGSUSED */
933cyclic_id_t
934cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
935{
936	return (1);
937}
938
939/* ARGSUSED */
940void
941cyclic_remove(cyclic_id_t id)
942{
943}
944
945/* ARGSUSED */
946int
947cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
948{
949	return (1);
950}
951#endif
952
953/*
954 * =========================================================================
955 * kernel emulation setup & teardown
956 * =========================================================================
957 */
958static int
959umem_out_of_memory(void)
960{
961	char errmsg[] = "out of memory -- generating core dump\n";
962
963	write(fileno(stderr), errmsg, sizeof (errmsg));
964	abort();
965	return (0);
966}
967
968void
969kernel_init(int mode)
970{
971	extern uint_t rrw_tsd_key;
972
973	umem_nofail_callback(umem_out_of_memory);
974
975	physmem = sysconf(_SC_PHYS_PAGES);
976
977	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
978	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
979
980	(void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
981	    (mode & FWRITE) ? (unsigned long)gethostid() : 0);
982
983	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
984	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
985
986	system_taskq_init();
987
988#ifdef illumos
989	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
990#endif
991
992	spa_init(mode);
993
994	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
995}
996
997void
998kernel_fini(void)
999{
1000	spa_fini();
1001
1002	system_taskq_fini();
1003
1004	close(random_fd);
1005	close(urandom_fd);
1006
1007	random_fd = -1;
1008	urandom_fd = -1;
1009}
1010
1011int
1012z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
1013{
1014	int ret;
1015	uLongf len = *dstlen;
1016
1017	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
1018		*dstlen = (size_t)len;
1019
1020	return (ret);
1021}
1022
1023int
1024z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
1025    int level)
1026{
1027	int ret;
1028	uLongf len = *dstlen;
1029
1030	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
1031		*dstlen = (size_t)len;
1032
1033	return (ret);
1034}
1035
1036uid_t
1037crgetuid(cred_t *cr)
1038{
1039	return (0);
1040}
1041
1042uid_t
1043crgetruid(cred_t *cr)
1044{
1045	return (0);
1046}
1047
1048gid_t
1049crgetgid(cred_t *cr)
1050{
1051	return (0);
1052}
1053
1054int
1055crgetngroups(cred_t *cr)
1056{
1057	return (0);
1058}
1059
1060gid_t *
1061crgetgroups(cred_t *cr)
1062{
1063	return (NULL);
1064}
1065
1066int
1067zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1068{
1069	return (0);
1070}
1071
1072int
1073zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1074{
1075	return (0);
1076}
1077
1078int
1079zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1080{
1081	return (0);
1082}
1083
1084ksiddomain_t *
1085ksid_lookupdomain(const char *dom)
1086{
1087	ksiddomain_t *kd;
1088
1089	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1090	kd->kd_name = spa_strdup(dom);
1091	return (kd);
1092}
1093
1094void
1095ksiddomain_rele(ksiddomain_t *ksid)
1096{
1097	spa_strfree(ksid->kd_name);
1098	umem_free(ksid, sizeof (ksiddomain_t));
1099}
1100
1101/*
1102 * Do not change the length of the returned string; it must be freed
1103 * with strfree().
1104 */
1105char *
1106kmem_asprintf(const char *fmt, ...)
1107{
1108	int size;
1109	va_list adx;
1110	char *buf;
1111
1112	va_start(adx, fmt);
1113	size = vsnprintf(NULL, 0, fmt, adx) + 1;
1114	va_end(adx);
1115
1116	buf = kmem_alloc(size, KM_SLEEP);
1117
1118	va_start(adx, fmt);
1119	size = vsnprintf(buf, size, fmt, adx);
1120	va_end(adx);
1121
1122	return (buf);
1123}
1124
1125/* ARGSUSED */
1126int
1127zfs_onexit_fd_hold(int fd, minor_t *minorp)
1128{
1129	*minorp = 0;
1130	return (0);
1131}
1132
1133/* ARGSUSED */
1134void
1135zfs_onexit_fd_rele(int fd)
1136{
1137}
1138
1139/* ARGSUSED */
1140int
1141zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1142    uint64_t *action_handle)
1143{
1144	return (0);
1145}
1146
1147/* ARGSUSED */
1148int
1149zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1150{
1151	return (0);
1152}
1153
1154/* ARGSUSED */
1155int
1156zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1157{
1158	return (0);
1159}
1160
1161#ifdef __FreeBSD__
1162/* ARGSUSED */
1163int
1164zvol_create_minors(const char *name)
1165{
1166	return (0);
1167}
1168#endif
1169
1170#ifdef illumos
1171void
1172bioinit(buf_t *bp)
1173{
1174	bzero(bp, sizeof (buf_t));
1175}
1176
1177void
1178biodone(buf_t *bp)
1179{
1180	if (bp->b_iodone != NULL) {
1181		(*(bp->b_iodone))(bp);
1182		return;
1183	}
1184	ASSERT((bp->b_flags & B_DONE) == 0);
1185	bp->b_flags |= B_DONE;
1186}
1187
1188void
1189bioerror(buf_t *bp, int error)
1190{
1191	ASSERT(bp != NULL);
1192	ASSERT(error >= 0);
1193
1194	if (error != 0) {
1195		bp->b_flags |= B_ERROR;
1196	} else {
1197		bp->b_flags &= ~B_ERROR;
1198	}
1199	bp->b_error = error;
1200}
1201
1202
1203int
1204geterror(struct buf *bp)
1205{
1206	int error = 0;
1207
1208	if (bp->b_flags & B_ERROR) {
1209		error = bp->b_error;
1210		if (!error)
1211			error = EIO;
1212	}
1213	return (error);
1214}
1215#endif
1216