1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
25 */
26
27#include <assert.h>
28#include <fcntl.h>
29#include <libgen.h>
30#include <poll.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <limits.h>
35#include <libzutil.h>
36#include <sys/crypto/icp.h>
37#include <sys/processor.h>
38#include <sys/rrwlock.h>
39#include <sys/spa.h>
40#include <sys/stat.h>
41#include <sys/systeminfo.h>
42#include <sys/time.h>
43#include <sys/utsname.h>
44#include <sys/zfs_context.h>
45#include <sys/zfs_onexit.h>
46#include <sys/zfs_vfsops.h>
47#include <sys/zstd/zstd.h>
48#include <sys/zvol.h>
49#include <zfs_fletcher.h>
50#include <zlib.h>
51
52/*
53 * Emulation of kernel services in userland.
54 */
55
56uint64_t physmem;
57uint32_t hostid;
58struct utsname hw_utsname;
59
60/* If set, all blocks read will be copied to the specified directory. */
61char *vn_dumpdir = NULL;
62
63/* this only exists to have its address taken */
64struct proc p0;
65
66/*
67 * =========================================================================
68 * threads
69 * =========================================================================
70 *
71 * TS_STACK_MIN is dictated by the minimum allowed pthread stack size.  While
72 * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
73 * the expected stack depth while small enough to avoid exhausting address
74 * space with high thread counts.
75 */
76#define	TS_STACK_MIN	MAX(PTHREAD_STACK_MIN, 32768)
77#define	TS_STACK_MAX	(256 * 1024)
78
79struct zk_thread_wrapper {
80	void (*func)(void *);
81	void *arg;
82};
83
84static void *
85zk_thread_wrapper(void *arg)
86{
87	struct zk_thread_wrapper ztw;
88	memcpy(&ztw, arg, sizeof (ztw));
89	free(arg);
90	ztw.func(ztw.arg);
91	return (NULL);
92}
93
94kthread_t *
95zk_thread_create(const char *name, void (*func)(void *), void *arg,
96    size_t stksize, int state)
97{
98	pthread_attr_t attr;
99	pthread_t tid;
100	char *stkstr;
101	struct zk_thread_wrapper *ztw;
102	int detachstate = PTHREAD_CREATE_DETACHED;
103
104	VERIFY0(pthread_attr_init(&attr));
105
106	if (state & TS_JOINABLE)
107		detachstate = PTHREAD_CREATE_JOINABLE;
108
109	VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
110
111	/*
112	 * We allow the default stack size in user space to be specified by
113	 * setting the ZFS_STACK_SIZE environment variable.  This allows us
114	 * the convenience of observing and debugging stack overruns in
115	 * user space.  Explicitly specified stack sizes will be honored.
116	 * The usage of ZFS_STACK_SIZE is discussed further in the
117	 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
118	 */
119	if (stksize == 0) {
120		stkstr = getenv("ZFS_STACK_SIZE");
121
122		if (stkstr == NULL)
123			stksize = TS_STACK_MAX;
124		else
125			stksize = MAX(atoi(stkstr), TS_STACK_MIN);
126	}
127
128	VERIFY3S(stksize, >, 0);
129	stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
130
131	/*
132	 * If this ever fails, it may be because the stack size is not a
133	 * multiple of system page size.
134	 */
135	VERIFY0(pthread_attr_setstacksize(&attr, stksize));
136	VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
137
138	VERIFY(ztw = malloc(sizeof (*ztw)));
139	ztw->func = func;
140	ztw->arg = arg;
141	VERIFY0(pthread_create(&tid, &attr, zk_thread_wrapper, ztw));
142	VERIFY0(pthread_attr_destroy(&attr));
143
144	pthread_setname_np(tid, name);
145
146	return ((void *)(uintptr_t)tid);
147}
148
149/*
150 * =========================================================================
151 * kstats
152 * =========================================================================
153 */
154kstat_t *
155kstat_create(const char *module, int instance, const char *name,
156    const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
157{
158	(void) module, (void) instance, (void) name, (void) class, (void) type,
159	    (void) ndata, (void) ks_flag;
160	return (NULL);
161}
162
163void
164kstat_install(kstat_t *ksp)
165{
166	(void) ksp;
167}
168
169void
170kstat_delete(kstat_t *ksp)
171{
172	(void) ksp;
173}
174
175void
176kstat_set_raw_ops(kstat_t *ksp,
177    int (*headers)(char *buf, size_t size),
178    int (*data)(char *buf, size_t size, void *data),
179    void *(*addr)(kstat_t *ksp, loff_t index))
180{
181	(void) ksp, (void) headers, (void) data, (void) addr;
182}
183
184/*
185 * =========================================================================
186 * mutexes
187 * =========================================================================
188 */
189
190void
191mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
192{
193	(void) name, (void) type, (void) cookie;
194	VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
195	memset(&mp->m_owner, 0, sizeof (pthread_t));
196}
197
198void
199mutex_destroy(kmutex_t *mp)
200{
201	VERIFY0(pthread_mutex_destroy(&mp->m_lock));
202}
203
204void
205mutex_enter(kmutex_t *mp)
206{
207	VERIFY0(pthread_mutex_lock(&mp->m_lock));
208	mp->m_owner = pthread_self();
209}
210
211int
212mutex_enter_check_return(kmutex_t *mp)
213{
214	int error = pthread_mutex_lock(&mp->m_lock);
215	if (error == 0)
216		mp->m_owner = pthread_self();
217	return (error);
218}
219
220int
221mutex_tryenter(kmutex_t *mp)
222{
223	int error = pthread_mutex_trylock(&mp->m_lock);
224	if (error == 0) {
225		mp->m_owner = pthread_self();
226		return (1);
227	} else {
228		VERIFY3S(error, ==, EBUSY);
229		return (0);
230	}
231}
232
233void
234mutex_exit(kmutex_t *mp)
235{
236	memset(&mp->m_owner, 0, sizeof (pthread_t));
237	VERIFY0(pthread_mutex_unlock(&mp->m_lock));
238}
239
240/*
241 * =========================================================================
242 * rwlocks
243 * =========================================================================
244 */
245
246void
247rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
248{
249	(void) name, (void) type, (void) arg;
250	VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
251	rwlp->rw_readers = 0;
252	rwlp->rw_owner = 0;
253}
254
255void
256rw_destroy(krwlock_t *rwlp)
257{
258	VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
259}
260
261void
262rw_enter(krwlock_t *rwlp, krw_t rw)
263{
264	if (rw == RW_READER) {
265		VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
266		atomic_inc_uint(&rwlp->rw_readers);
267	} else {
268		VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
269		rwlp->rw_owner = pthread_self();
270	}
271}
272
273void
274rw_exit(krwlock_t *rwlp)
275{
276	if (RW_READ_HELD(rwlp))
277		atomic_dec_uint(&rwlp->rw_readers);
278	else
279		rwlp->rw_owner = 0;
280
281	VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
282}
283
284int
285rw_tryenter(krwlock_t *rwlp, krw_t rw)
286{
287	int error;
288
289	if (rw == RW_READER)
290		error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
291	else
292		error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
293
294	if (error == 0) {
295		if (rw == RW_READER)
296			atomic_inc_uint(&rwlp->rw_readers);
297		else
298			rwlp->rw_owner = pthread_self();
299
300		return (1);
301	}
302
303	VERIFY3S(error, ==, EBUSY);
304
305	return (0);
306}
307
308uint32_t
309zone_get_hostid(void *zonep)
310{
311	/*
312	 * We're emulating the system's hostid in userland.
313	 */
314	(void) zonep;
315	return (hostid);
316}
317
318int
319rw_tryupgrade(krwlock_t *rwlp)
320{
321	(void) rwlp;
322	return (0);
323}
324
325/*
326 * =========================================================================
327 * condition variables
328 * =========================================================================
329 */
330
331void
332cv_init(kcondvar_t *cv, char *name, int type, void *arg)
333{
334	(void) name, (void) type, (void) arg;
335	VERIFY0(pthread_cond_init(cv, NULL));
336}
337
338void
339cv_destroy(kcondvar_t *cv)
340{
341	VERIFY0(pthread_cond_destroy(cv));
342}
343
344void
345cv_wait(kcondvar_t *cv, kmutex_t *mp)
346{
347	memset(&mp->m_owner, 0, sizeof (pthread_t));
348	VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
349	mp->m_owner = pthread_self();
350}
351
352int
353cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
354{
355	cv_wait(cv, mp);
356	return (1);
357}
358
359int
360cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
361{
362	int error;
363	struct timeval tv;
364	struct timespec ts;
365	clock_t delta;
366
367	delta = abstime - ddi_get_lbolt();
368	if (delta <= 0)
369		return (-1);
370
371	VERIFY(gettimeofday(&tv, NULL) == 0);
372
373	ts.tv_sec = tv.tv_sec + delta / hz;
374	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
375	if (ts.tv_nsec >= NANOSEC) {
376		ts.tv_sec++;
377		ts.tv_nsec -= NANOSEC;
378	}
379
380	memset(&mp->m_owner, 0, sizeof (pthread_t));
381	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
382	mp->m_owner = pthread_self();
383
384	if (error == ETIMEDOUT)
385		return (-1);
386
387	VERIFY0(error);
388
389	return (1);
390}
391
392int
393cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
394    int flag)
395{
396	(void) res;
397	int error;
398	struct timeval tv;
399	struct timespec ts;
400	hrtime_t delta;
401
402	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
403
404	delta = tim;
405	if (flag & CALLOUT_FLAG_ABSOLUTE)
406		delta -= gethrtime();
407
408	if (delta <= 0)
409		return (-1);
410
411	VERIFY0(gettimeofday(&tv, NULL));
412
413	ts.tv_sec = tv.tv_sec + delta / NANOSEC;
414	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
415	if (ts.tv_nsec >= NANOSEC) {
416		ts.tv_sec++;
417		ts.tv_nsec -= NANOSEC;
418	}
419
420	memset(&mp->m_owner, 0, sizeof (pthread_t));
421	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
422	mp->m_owner = pthread_self();
423
424	if (error == ETIMEDOUT)
425		return (-1);
426
427	VERIFY0(error);
428
429	return (1);
430}
431
432void
433cv_signal(kcondvar_t *cv)
434{
435	VERIFY0(pthread_cond_signal(cv));
436}
437
438void
439cv_broadcast(kcondvar_t *cv)
440{
441	VERIFY0(pthread_cond_broadcast(cv));
442}
443
444/*
445 * =========================================================================
446 * procfs list
447 * =========================================================================
448 */
449
450void
451seq_printf(struct seq_file *m, const char *fmt, ...)
452{
453	(void) m, (void) fmt;
454}
455
456void
457procfs_list_install(const char *module,
458    const char *submodule,
459    const char *name,
460    mode_t mode,
461    procfs_list_t *procfs_list,
462    int (*show)(struct seq_file *f, void *p),
463    int (*show_header)(struct seq_file *f),
464    int (*clear)(procfs_list_t *procfs_list),
465    size_t procfs_list_node_off)
466{
467	(void) module, (void) submodule, (void) name, (void) mode, (void) show,
468	    (void) show_header, (void) clear;
469	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
470	list_create(&procfs_list->pl_list,
471	    procfs_list_node_off + sizeof (procfs_list_node_t),
472	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
473	procfs_list->pl_next_id = 1;
474	procfs_list->pl_node_offset = procfs_list_node_off;
475}
476
477void
478procfs_list_uninstall(procfs_list_t *procfs_list)
479{
480	(void) procfs_list;
481}
482
483void
484procfs_list_destroy(procfs_list_t *procfs_list)
485{
486	ASSERT(list_is_empty(&procfs_list->pl_list));
487	list_destroy(&procfs_list->pl_list);
488	mutex_destroy(&procfs_list->pl_lock);
489}
490
491#define	NODE_ID(procfs_list, obj) \
492		(((procfs_list_node_t *)(((char *)obj) + \
493		(procfs_list)->pl_node_offset))->pln_id)
494
495void
496procfs_list_add(procfs_list_t *procfs_list, void *p)
497{
498	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
499	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
500	list_insert_tail(&procfs_list->pl_list, p);
501}
502
503/*
504 * =========================================================================
505 * vnode operations
506 * =========================================================================
507 */
508
509/*
510 * =========================================================================
511 * Figure out which debugging statements to print
512 * =========================================================================
513 */
514
515static char *dprintf_string;
516static int dprintf_print_all;
517
518int
519dprintf_find_string(const char *string)
520{
521	char *tmp_str = dprintf_string;
522	int len = strlen(string);
523
524	/*
525	 * Find out if this is a string we want to print.
526	 * String format: file1.c,function_name1,file2.c,file3.c
527	 */
528
529	while (tmp_str != NULL) {
530		if (strncmp(tmp_str, string, len) == 0 &&
531		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
532			return (1);
533		tmp_str = strchr(tmp_str, ',');
534		if (tmp_str != NULL)
535			tmp_str++; /* Get rid of , */
536	}
537	return (0);
538}
539
540void
541dprintf_setup(int *argc, char **argv)
542{
543	int i, j;
544
545	/*
546	 * Debugging can be specified two ways: by setting the
547	 * environment variable ZFS_DEBUG, or by including a
548	 * "debug=..."  argument on the command line.  The command
549	 * line setting overrides the environment variable.
550	 */
551
552	for (i = 1; i < *argc; i++) {
553		int len = strlen("debug=");
554		/* First look for a command line argument */
555		if (strncmp("debug=", argv[i], len) == 0) {
556			dprintf_string = argv[i] + len;
557			/* Remove from args */
558			for (j = i; j < *argc; j++)
559				argv[j] = argv[j+1];
560			argv[j] = NULL;
561			(*argc)--;
562		}
563	}
564
565	if (dprintf_string == NULL) {
566		/* Look for ZFS_DEBUG environment variable */
567		dprintf_string = getenv("ZFS_DEBUG");
568	}
569
570	/*
571	 * Are we just turning on all debugging?
572	 */
573	if (dprintf_find_string("on"))
574		dprintf_print_all = 1;
575
576	if (dprintf_string != NULL)
577		zfs_flags |= ZFS_DEBUG_DPRINTF;
578}
579
580/*
581 * =========================================================================
582 * debug printfs
583 * =========================================================================
584 */
585void
586__dprintf(boolean_t dprint, const char *file, const char *func,
587    int line, const char *fmt, ...)
588{
589	/* Get rid of annoying "../common/" prefix to filename. */
590	const char *newfile = zfs_basename(file);
591
592	va_list adx;
593	if (dprint) {
594		/* dprintf messages are printed immediately */
595
596		if (!dprintf_print_all &&
597		    !dprintf_find_string(newfile) &&
598		    !dprintf_find_string(func))
599			return;
600
601		/* Print out just the function name if requested */
602		flockfile(stdout);
603		if (dprintf_find_string("pid"))
604			(void) printf("%d ", getpid());
605		if (dprintf_find_string("tid"))
606			(void) printf("%ju ",
607			    (uintmax_t)(uintptr_t)pthread_self());
608		if (dprintf_find_string("cpu"))
609			(void) printf("%u ", getcpuid());
610		if (dprintf_find_string("time"))
611			(void) printf("%llu ", gethrtime());
612		if (dprintf_find_string("long"))
613			(void) printf("%s, line %d: ", newfile, line);
614		(void) printf("dprintf: %s: ", func);
615		va_start(adx, fmt);
616		(void) vprintf(fmt, adx);
617		va_end(adx);
618		funlockfile(stdout);
619	} else {
620		/* zfs_dbgmsg is logged for dumping later */
621		size_t size;
622		char *buf;
623		int i;
624
625		size = 1024;
626		buf = umem_alloc(size, UMEM_NOFAIL);
627		i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
628
629		if (i < size) {
630			va_start(adx, fmt);
631			(void) vsnprintf(buf + i, size - i, fmt, adx);
632			va_end(adx);
633		}
634
635		__zfs_dbgmsg(buf);
636
637		umem_free(buf, size);
638	}
639}
640
641/*
642 * =========================================================================
643 * cmn_err() and panic()
644 * =========================================================================
645 */
646static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
647static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
648
649__attribute__((noreturn)) void
650vpanic(const char *fmt, va_list adx)
651{
652	(void) fprintf(stderr, "error: ");
653	(void) vfprintf(stderr, fmt, adx);
654	(void) fprintf(stderr, "\n");
655
656	abort();	/* think of it as a "user-level crash dump" */
657}
658
659__attribute__((noreturn)) void
660panic(const char *fmt, ...)
661{
662	va_list adx;
663
664	va_start(adx, fmt);
665	vpanic(fmt, adx);
666	va_end(adx);
667}
668
669void
670vcmn_err(int ce, const char *fmt, va_list adx)
671{
672	if (ce == CE_PANIC)
673		vpanic(fmt, adx);
674	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
675		(void) fprintf(stderr, "%s", ce_prefix[ce]);
676		(void) vfprintf(stderr, fmt, adx);
677		(void) fprintf(stderr, "%s", ce_suffix[ce]);
678	}
679}
680
681void
682cmn_err(int ce, const char *fmt, ...)
683{
684	va_list adx;
685
686	va_start(adx, fmt);
687	vcmn_err(ce, fmt, adx);
688	va_end(adx);
689}
690
691/*
692 * =========================================================================
693 * misc routines
694 * =========================================================================
695 */
696
697void
698delay(clock_t ticks)
699{
700	(void) poll(0, 0, ticks * (1000 / hz));
701}
702
703/*
704 * Find highest one bit set.
705 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
706 * The __builtin_clzll() function is supported by both GCC and Clang.
707 */
708int
709highbit64(uint64_t i)
710{
711	if (i == 0)
712	return (0);
713
714	return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
715}
716
717/*
718 * Find lowest one bit set.
719 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
720 * The __builtin_ffsll() function is supported by both GCC and Clang.
721 */
722int
723lowbit64(uint64_t i)
724{
725	if (i == 0)
726		return (0);
727
728	return (__builtin_ffsll(i));
729}
730
731const char *random_path = "/dev/random";
732const char *urandom_path = "/dev/urandom";
733static int random_fd = -1, urandom_fd = -1;
734
735void
736random_init(void)
737{
738	VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
739	VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
740}
741
742void
743random_fini(void)
744{
745	close(random_fd);
746	close(urandom_fd);
747
748	random_fd = -1;
749	urandom_fd = -1;
750}
751
752static int
753random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
754{
755	size_t resid = len;
756	ssize_t bytes;
757
758	ASSERT(fd != -1);
759
760	while (resid != 0) {
761		bytes = read(fd, ptr, resid);
762		ASSERT3S(bytes, >=, 0);
763		ptr += bytes;
764		resid -= bytes;
765	}
766
767	return (0);
768}
769
770int
771random_get_bytes(uint8_t *ptr, size_t len)
772{
773	return (random_get_bytes_common(ptr, len, random_fd));
774}
775
776int
777random_get_pseudo_bytes(uint8_t *ptr, size_t len)
778{
779	return (random_get_bytes_common(ptr, len, urandom_fd));
780}
781
782int
783ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
784{
785	errno = 0;
786	*result = strtoull(str, nptr, base);
787	if (*result == 0)
788		return (errno);
789	return (0);
790}
791
792utsname_t *
793utsname(void)
794{
795	return (&hw_utsname);
796}
797
798/*
799 * =========================================================================
800 * kernel emulation setup & teardown
801 * =========================================================================
802 */
803static int
804umem_out_of_memory(void)
805{
806	char errmsg[] = "out of memory -- generating core dump\n";
807
808	(void) fprintf(stderr, "%s", errmsg);
809	abort();
810	return (0);
811}
812
813void
814kernel_init(int mode)
815{
816	extern uint_t rrw_tsd_key;
817
818	umem_nofail_callback(umem_out_of_memory);
819
820	physmem = sysconf(_SC_PHYS_PAGES);
821
822	dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
823	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
824
825	hostid = (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0;
826
827	random_init();
828
829	VERIFY0(uname(&hw_utsname));
830
831	system_taskq_init();
832	icp_init();
833
834	zstd_init();
835
836	spa_init((spa_mode_t)mode);
837
838	fletcher_4_init();
839
840	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
841}
842
843void
844kernel_fini(void)
845{
846	fletcher_4_fini();
847	spa_fini();
848
849	zstd_fini();
850
851	icp_fini();
852	system_taskq_fini();
853
854	random_fini();
855}
856
857uid_t
858crgetuid(cred_t *cr)
859{
860	(void) cr;
861	return (0);
862}
863
864uid_t
865crgetruid(cred_t *cr)
866{
867	(void) cr;
868	return (0);
869}
870
871gid_t
872crgetgid(cred_t *cr)
873{
874	(void) cr;
875	return (0);
876}
877
878int
879crgetngroups(cred_t *cr)
880{
881	(void) cr;
882	return (0);
883}
884
885gid_t *
886crgetgroups(cred_t *cr)
887{
888	(void) cr;
889	return (NULL);
890}
891
892int
893zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
894{
895	(void) name, (void) cr;
896	return (0);
897}
898
899int
900zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
901{
902	(void) from, (void) to, (void) cr;
903	return (0);
904}
905
906int
907zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
908{
909	(void) name, (void) cr;
910	return (0);
911}
912
913int
914secpolicy_zfs(const cred_t *cr)
915{
916	(void) cr;
917	return (0);
918}
919
920int
921secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
922{
923	(void) cr, (void) proc;
924	return (0);
925}
926
927ksiddomain_t *
928ksid_lookupdomain(const char *dom)
929{
930	ksiddomain_t *kd;
931
932	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
933	kd->kd_name = spa_strdup(dom);
934	return (kd);
935}
936
937void
938ksiddomain_rele(ksiddomain_t *ksid)
939{
940	spa_strfree(ksid->kd_name);
941	umem_free(ksid, sizeof (ksiddomain_t));
942}
943
944char *
945kmem_vasprintf(const char *fmt, va_list adx)
946{
947	char *buf = NULL;
948	va_list adx_copy;
949
950	va_copy(adx_copy, adx);
951	VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
952	va_end(adx_copy);
953
954	return (buf);
955}
956
957char *
958kmem_asprintf(const char *fmt, ...)
959{
960	char *buf = NULL;
961	va_list adx;
962
963	va_start(adx, fmt);
964	VERIFY(vasprintf(&buf, fmt, adx) != -1);
965	va_end(adx);
966
967	return (buf);
968}
969
970/*
971 * kmem_scnprintf() will return the number of characters that it would have
972 * printed whenever it is limited by value of the size variable, rather than
973 * the number of characters that it did print. This can cause misbehavior on
974 * subsequent uses of the return value, so we define a safe version that will
975 * return the number of characters actually printed, minus the NULL format
976 * character.  Subsequent use of this by the safe string functions is safe
977 * whether it is snprintf(), strlcat() or strlcpy().
978 */
979int
980kmem_scnprintf(char *restrict str, size_t size, const char *restrict fmt, ...)
981{
982	int n;
983	va_list ap;
984
985	/* Make the 0 case a no-op so that we do not return -1 */
986	if (size == 0)
987		return (0);
988
989	va_start(ap, fmt);
990	n = vsnprintf(str, size, fmt, ap);
991	va_end(ap);
992
993	if (n >= size)
994		n = size - 1;
995
996	return (n);
997}
998
999zfs_file_t *
1000zfs_onexit_fd_hold(int fd, minor_t *minorp)
1001{
1002	(void) fd;
1003	*minorp = 0;
1004	return (NULL);
1005}
1006
1007void
1008zfs_onexit_fd_rele(zfs_file_t *fp)
1009{
1010	(void) fp;
1011}
1012
1013int
1014zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1015    uintptr_t *action_handle)
1016{
1017	(void) minor, (void) func, (void) data, (void) action_handle;
1018	return (0);
1019}
1020
1021fstrans_cookie_t
1022spl_fstrans_mark(void)
1023{
1024	return ((fstrans_cookie_t)0);
1025}
1026
1027void
1028spl_fstrans_unmark(fstrans_cookie_t cookie)
1029{
1030	(void) cookie;
1031}
1032
1033int
1034__spl_pf_fstrans_check(void)
1035{
1036	return (0);
1037}
1038
1039int
1040kmem_cache_reap_active(void)
1041{
1042	return (0);
1043}
1044
1045void
1046zvol_create_minor(const char *name)
1047{
1048	(void) name;
1049}
1050
1051void
1052zvol_create_minors_recursive(const char *name)
1053{
1054	(void) name;
1055}
1056
1057void
1058zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
1059{
1060	(void) spa, (void) name, (void) async;
1061}
1062
1063void
1064zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
1065    boolean_t async)
1066{
1067	(void) spa, (void) oldname, (void) newname, (void) async;
1068}
1069
1070/*
1071 * Open file
1072 *
1073 * path - fully qualified path to file
1074 * flags - file attributes O_READ / O_WRITE / O_EXCL
1075 * fpp - pointer to return file pointer
1076 *
1077 * Returns 0 on success underlying error on failure.
1078 */
1079int
1080zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
1081{
1082	int fd = -1;
1083	int dump_fd = -1;
1084	int err;
1085	int old_umask = 0;
1086	zfs_file_t *fp;
1087	struct stat64 st;
1088
1089	if (!(flags & O_CREAT) && stat64(path, &st) == -1)
1090		return (errno);
1091
1092	if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
1093		flags |= O_DIRECT;
1094
1095	if (flags & O_CREAT)
1096		old_umask = umask(0);
1097
1098	fd = open64(path, flags, mode);
1099	if (fd == -1)
1100		return (errno);
1101
1102	if (flags & O_CREAT)
1103		(void) umask(old_umask);
1104
1105	if (vn_dumpdir != NULL) {
1106		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
1107		const char *inpath = zfs_basename(path);
1108
1109		(void) snprintf(dumppath, MAXPATHLEN,
1110		    "%s/%s", vn_dumpdir, inpath);
1111		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
1112		umem_free(dumppath, MAXPATHLEN);
1113		if (dump_fd == -1) {
1114			err = errno;
1115			close(fd);
1116			return (err);
1117		}
1118	} else {
1119		dump_fd = -1;
1120	}
1121
1122	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
1123
1124	fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
1125	fp->f_fd = fd;
1126	fp->f_dump_fd = dump_fd;
1127	*fpp = fp;
1128
1129	return (0);
1130}
1131
1132void
1133zfs_file_close(zfs_file_t *fp)
1134{
1135	close(fp->f_fd);
1136	if (fp->f_dump_fd != -1)
1137		close(fp->f_dump_fd);
1138
1139	umem_free(fp, sizeof (zfs_file_t));
1140}
1141
1142/*
1143 * Stateful write - use os internal file pointer to determine where to
1144 * write and update on successful completion.
1145 *
1146 * fp -  pointer to file (pipe, socket, etc) to write to
1147 * buf - buffer to write
1148 * count - # of bytes to write
1149 * resid -  pointer to count of unwritten bytes  (if short write)
1150 *
1151 * Returns 0 on success errno on failure.
1152 */
1153int
1154zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
1155{
1156	ssize_t rc;
1157
1158	rc = write(fp->f_fd, buf, count);
1159	if (rc < 0)
1160		return (errno);
1161
1162	if (resid) {
1163		*resid = count - rc;
1164	} else if (rc != count) {
1165		return (EIO);
1166	}
1167
1168	return (0);
1169}
1170
1171/*
1172 * Stateless write - os internal file pointer is not updated.
1173 *
1174 * fp -  pointer to file (pipe, socket, etc) to write to
1175 * buf - buffer to write
1176 * count - # of bytes to write
1177 * off - file offset to write to (only valid for seekable types)
1178 * resid -  pointer to count of unwritten bytes
1179 *
1180 * Returns 0 on success errno on failure.
1181 */
1182int
1183zfs_file_pwrite(zfs_file_t *fp, const void *buf,
1184    size_t count, loff_t pos, ssize_t *resid)
1185{
1186	ssize_t rc, split, done;
1187	int sectors;
1188
1189	/*
1190	 * To simulate partial disk writes, we split writes into two
1191	 * system calls so that the process can be killed in between.
1192	 * This is used by ztest to simulate realistic failure modes.
1193	 */
1194	sectors = count >> SPA_MINBLOCKSHIFT;
1195	split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
1196	rc = pwrite64(fp->f_fd, buf, split, pos);
1197	if (rc != -1) {
1198		done = rc;
1199		rc = pwrite64(fp->f_fd, (char *)buf + split,
1200		    count - split, pos + split);
1201	}
1202#ifdef __linux__
1203	if (rc == -1 && errno == EINVAL) {
1204		/*
1205		 * Under Linux, this most likely means an alignment issue
1206		 * (memory or disk) due to O_DIRECT, so we abort() in order
1207		 * to catch the offender.
1208		 */
1209		abort();
1210	}
1211#endif
1212
1213	if (rc < 0)
1214		return (errno);
1215
1216	done += rc;
1217
1218	if (resid) {
1219		*resid = count - done;
1220	} else if (done != count) {
1221		return (EIO);
1222	}
1223
1224	return (0);
1225}
1226
1227/*
1228 * Stateful read - use os internal file pointer to determine where to
1229 * read and update on successful completion.
1230 *
1231 * fp -  pointer to file (pipe, socket, etc) to read from
1232 * buf - buffer to write
1233 * count - # of bytes to read
1234 * resid -  pointer to count of unread bytes (if short read)
1235 *
1236 * Returns 0 on success errno on failure.
1237 */
1238int
1239zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
1240{
1241	int rc;
1242
1243	rc = read(fp->f_fd, buf, count);
1244	if (rc < 0)
1245		return (errno);
1246
1247	if (resid) {
1248		*resid = count - rc;
1249	} else if (rc != count) {
1250		return (EIO);
1251	}
1252
1253	return (0);
1254}
1255
1256/*
1257 * Stateless read - os internal file pointer is not updated.
1258 *
1259 * fp -  pointer to file (pipe, socket, etc) to read from
1260 * buf - buffer to write
1261 * count - # of bytes to write
1262 * off - file offset to read from (only valid for seekable types)
1263 * resid -  pointer to count of unwritten bytes (if short write)
1264 *
1265 * Returns 0 on success errno on failure.
1266 */
1267int
1268zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
1269    ssize_t *resid)
1270{
1271	ssize_t rc;
1272
1273	rc = pread64(fp->f_fd, buf, count, off);
1274	if (rc < 0) {
1275#ifdef __linux__
1276		/*
1277		 * Under Linux, this most likely means an alignment issue
1278		 * (memory or disk) due to O_DIRECT, so we abort() in order to
1279		 * catch the offender.
1280		 */
1281		if (errno == EINVAL)
1282			abort();
1283#endif
1284		return (errno);
1285	}
1286
1287	if (fp->f_dump_fd != -1) {
1288		int status;
1289
1290		status = pwrite64(fp->f_dump_fd, buf, rc, off);
1291		ASSERT(status != -1);
1292	}
1293
1294	if (resid) {
1295		*resid = count - rc;
1296	} else if (rc != count) {
1297		return (EIO);
1298	}
1299
1300	return (0);
1301}
1302
1303/*
1304 * lseek - set / get file pointer
1305 *
1306 * fp -  pointer to file (pipe, socket, etc) to read from
1307 * offp - value to seek to, returns current value plus passed offset
1308 * whence - see man pages for standard lseek whence values
1309 *
1310 * Returns 0 on success errno on failure (ESPIPE for non seekable types)
1311 */
1312int
1313zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
1314{
1315	loff_t rc;
1316
1317	rc = lseek(fp->f_fd, *offp, whence);
1318	if (rc < 0)
1319		return (errno);
1320
1321	*offp = rc;
1322
1323	return (0);
1324}
1325
1326/*
1327 * Get file attributes
1328 *
1329 * filp - file pointer
1330 * zfattr - pointer to file attr structure
1331 *
1332 * Currently only used for fetching size and file mode
1333 *
1334 * Returns 0 on success or error code of underlying getattr call on failure.
1335 */
1336int
1337zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
1338{
1339	struct stat64 st;
1340
1341	if (fstat64_blk(fp->f_fd, &st) == -1)
1342		return (errno);
1343
1344	zfattr->zfa_size = st.st_size;
1345	zfattr->zfa_mode = st.st_mode;
1346
1347	return (0);
1348}
1349
1350/*
1351 * Sync file to disk
1352 *
1353 * filp - file pointer
1354 * flags - O_SYNC and or O_DSYNC
1355 *
1356 * Returns 0 on success or error code of underlying sync call on failure.
1357 */
1358int
1359zfs_file_fsync(zfs_file_t *fp, int flags)
1360{
1361	(void) flags;
1362
1363	if (fsync(fp->f_fd) < 0)
1364		return (errno);
1365
1366	return (0);
1367}
1368
1369/*
1370 * fallocate - allocate or free space on disk
1371 *
1372 * fp - file pointer
1373 * mode (non-standard options for hole punching etc)
1374 * offset - offset to start allocating or freeing from
1375 * len - length to free / allocate
1376 *
1377 * OPTIONAL
1378 */
1379int
1380zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
1381{
1382#ifdef __linux__
1383	return (fallocate(fp->f_fd, mode, offset, len));
1384#else
1385	(void) fp, (void) mode, (void) offset, (void) len;
1386	return (EOPNOTSUPP);
1387#endif
1388}
1389
1390/*
1391 * Request current file pointer offset
1392 *
1393 * fp - pointer to file
1394 *
1395 * Returns current file offset.
1396 */
1397loff_t
1398zfs_file_off(zfs_file_t *fp)
1399{
1400	return (lseek(fp->f_fd, SEEK_CUR, 0));
1401}
1402
1403/*
1404 * unlink file
1405 *
1406 * path - fully qualified file path
1407 *
1408 * Returns 0 on success.
1409 *
1410 * OPTIONAL
1411 */
1412int
1413zfs_file_unlink(const char *path)
1414{
1415	return (remove(path));
1416}
1417
1418/*
1419 * Get reference to file pointer
1420 *
1421 * fd - input file descriptor
1422 *
1423 * Returns pointer to file struct or NULL.
1424 * Unsupported in user space.
1425 */
1426zfs_file_t *
1427zfs_file_get(int fd)
1428{
1429	(void) fd;
1430	abort();
1431	return (NULL);
1432}
1433/*
1434 * Drop reference to file pointer
1435 *
1436 * fp - pointer to file struct
1437 *
1438 * Unsupported in user space.
1439 */
1440void
1441zfs_file_put(zfs_file_t *fp)
1442{
1443	abort();
1444	(void) fp;
1445}
1446
1447void
1448zfsvfs_update_fromname(const char *oldname, const char *newname)
1449{
1450	(void) oldname, (void) newname;
1451}
1452
1453void
1454spa_import_os(spa_t *spa)
1455{
1456	(void) spa;
1457}
1458
1459void
1460spa_export_os(spa_t *spa)
1461{
1462	(void) spa;
1463}
1464
1465void
1466spa_activate_os(spa_t *spa)
1467{
1468	(void) spa;
1469}
1470
1471void
1472spa_deactivate_os(spa_t *spa)
1473{
1474	(void) spa;
1475}
1476