1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 1995 Terrence R. Lambert
5 * All rights reserved.
6 *
7 * Copyright (c) 1990, 1993
8 *	The Regents of the University of California.  All rights reserved.
9 * (c) UNIX System Laboratories, Inc.
10 * All or some portions of this file are derived from material licensed
11 * to the University of California by American Telephone and Telegraph
12 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
13 * the permission of UNIX System Laboratories, Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 */
43
44#ifndef _SYS_KERNEL_H_
45#define	_SYS_KERNEL_H_
46
47#include <sys/linker_set.h>
48
49#ifdef _KERNEL
50
51/* for intrhook and sysinit linked list below */
52#include <sys/queue.h>
53
54/* for timestamping SYSINITs; other files may assume this is included here */
55#include <sys/tslog.h>
56
57/* Global variables for the kernel. */
58
59/* 1.1 */
60extern char kernelname[MAXPATHLEN];
61
62extern int tick;			/* usec per tick (1000000 / hz) */
63extern int hz;				/* system clock's frequency */
64extern int psratio;			/* ratio: prof / stat */
65extern int stathz;			/* statistics clock's frequency */
66extern int profhz;			/* profiling clock's frequency */
67extern int profprocs;			/* number of process's profiling */
68extern volatile int ticks;
69
70#endif /* _KERNEL */
71
72/*
73 * Enumerated types for known system startup interfaces.
74 *
75 * Startup occurs in ascending numeric order; the list entries are
76 * sorted prior to attempting startup to guarantee order.  Items
77 * of the same level are arbitrated for order based on the 'order'
78 * element.
79 *
80 * These numbers are arbitrary and are chosen ONLY for ordering; the
81 * enumeration values are explicit rather than implicit to provide
82 * for binary compatibility with inserted elements.
83 *
84 * The SI_SUB_LAST value must have the highest lexical value.
85 */
86enum sysinit_sub_id {
87	SI_SUB_DUMMY		= 0x0000000,	/* not executed; for linker*/
88	SI_SUB_DONE		= 0x0000001,	/* processed*/
89	SI_SUB_TUNABLES		= 0x0700000,	/* establish tunable values */
90	SI_SUB_COPYRIGHT	= 0x0800001,	/* first use of console*/
91	SI_SUB_VM		= 0x1000000,	/* virtual memory system init */
92	SI_SUB_COUNTER		= 0x1100000,	/* counter(9) is initialized */
93	SI_SUB_KMEM		= 0x1800000,	/* kernel memory*/
94	SI_SUB_HYPERVISOR	= 0x1A40000,	/*
95						 * Hypervisor detection and
96						 * virtualization support
97						 * setup.
98						 */
99	SI_SUB_WITNESS		= 0x1A80000,	/* witness initialization */
100	SI_SUB_MTX_POOL_DYNAMIC	= 0x1AC0000,	/* dynamic mutex pool */
101	SI_SUB_LOCK		= 0x1B00000,	/* various locks */
102	SI_SUB_EVENTHANDLER	= 0x1C00000,	/* eventhandler init */
103	SI_SUB_VNET_PRELINK	= 0x1E00000,	/* vnet init before modules */
104	SI_SUB_KLD		= 0x2000000,	/* KLD and module setup */
105	SI_SUB_KHELP		= 0x2080000,	/* khelp modules */
106	SI_SUB_CPU		= 0x2100000,	/* CPU resource(s)*/
107	SI_SUB_RACCT		= 0x2110000,	/* resource accounting */
108	SI_SUB_KDTRACE		= 0x2140000,	/* Kernel dtrace hooks */
109	SI_SUB_RANDOM		= 0x2160000,	/* random number generator */
110	SI_SUB_MAC		= 0x2180000,	/* TrustedBSD MAC subsystem */
111	SI_SUB_MAC_POLICY	= 0x21C0000,	/* TrustedBSD MAC policies */
112	SI_SUB_MAC_LATE		= 0x21D0000,	/* TrustedBSD MAC subsystem */
113	SI_SUB_VNET		= 0x21E0000,	/* vnet 0 */
114	SI_SUB_INTRINSIC	= 0x2200000,	/* proc 0*/
115	SI_SUB_VM_CONF		= 0x2300000,	/* config VM, set limits*/
116	SI_SUB_DDB_SERVICES	= 0x2380000,	/* capture, scripting, etc. */
117	SI_SUB_RUN_QUEUE	= 0x2400000,	/* set up run queue*/
118	SI_SUB_KTRACE		= 0x2480000,	/* ktrace */
119	SI_SUB_OPENSOLARIS	= 0x2490000,	/* OpenSolaris compatibility */
120	SI_SUB_AUDIT		= 0x24C0000,	/* audit */
121	SI_SUB_CREATE_INIT	= 0x2500000,	/* create init process*/
122	SI_SUB_SCHED_IDLE	= 0x2600000,	/* required idle procs */
123	SI_SUB_MBUF		= 0x2700000,	/* mbuf subsystem */
124	SI_SUB_INTR		= 0x2800000,	/* interrupt threads */
125	SI_SUB_TASKQ		= 0x2880000,	/* task queues */
126	SI_SUB_EPOCH		= 0x2888000,	/* epoch subsystem */
127#ifdef EARLY_AP_STARTUP
128	SI_SUB_SMP		= 0x2900000,	/* start the APs*/
129#endif
130	SI_SUB_SOFTINTR		= 0x2A00000,	/* start soft interrupt thread */
131	SI_SUB_DEVFS		= 0x2F00000,	/* devfs ready for devices */
132	SI_SUB_INIT_IF		= 0x3000000,	/* prep for net interfaces */
133	SI_SUB_NETGRAPH		= 0x3010000,	/* Let Netgraph initialize */
134	SI_SUB_DTRACE		= 0x3020000,	/* DTrace subsystem */
135	SI_SUB_DTRACE_PROVIDER	= 0x3048000,	/* DTrace providers */
136	SI_SUB_DTRACE_ANON	= 0x308C000,	/* DTrace anon enabling */
137	SI_SUB_DRIVERS		= 0x3100000,	/* Let Drivers initialize */
138	SI_SUB_CONFIGURE	= 0x3800000,	/* Configure devices */
139	SI_SUB_VFS		= 0x4000000,	/* virtual filesystem*/
140	SI_SUB_CLOCKS		= 0x4800000,	/* real time and stat clocks*/
141	SI_SUB_SYSV_SHM		= 0x6400000,	/* System V shared memory*/
142	SI_SUB_SYSV_SEM		= 0x6800000,	/* System V semaphores*/
143	SI_SUB_SYSV_MSG		= 0x6C00000,	/* System V message queues*/
144	SI_SUB_P1003_1B		= 0x6E00000,	/* P1003.1B realtime */
145	SI_SUB_PSEUDO		= 0x7000000,	/* pseudo devices*/
146	SI_SUB_EXEC		= 0x7400000,	/* execve() handlers */
147	SI_SUB_PROTO_BEGIN	= 0x8000000,	/* VNET initialization */
148	SI_SUB_PROTO_PFIL	= 0x8100000,	/* Initialize pfil before FWs */
149	SI_SUB_PROTO_IF		= 0x8400000,	/* interfaces*/
150	SI_SUB_PROTO_DOMAININIT	= 0x8600000,	/* domain registration system */
151	SI_SUB_PROTO_MC		= 0x8700000,	/* Multicast */
152	SI_SUB_PROTO_DOMAIN	= 0x8800000,	/* domains (address families?)*/
153	SI_SUB_PROTO_FIREWALL	= 0x8806000,	/* Firewalls */
154	SI_SUB_PROTO_IFATTACHDOMAIN = 0x8808000,/* domain dependent data init */
155	SI_SUB_PROTO_END	= 0x8ffffff,	/* VNET helper functions */
156	SI_SUB_KPROF		= 0x9000000,	/* kernel profiling*/
157	SI_SUB_KICK_SCHEDULER	= 0xa000000,	/* start the timeout events*/
158	SI_SUB_INT_CONFIG_HOOKS	= 0xa800000,	/* Interrupts enabled config */
159	SI_SUB_ROOT_CONF	= 0xb000000,	/* Find root devices */
160	SI_SUB_INTRINSIC_POST	= 0xd000000,	/* proc 0 cleanup*/
161	SI_SUB_SYSCALLS		= 0xd800000,	/* register system calls */
162	SI_SUB_VNET_DONE	= 0xdc00000,	/* vnet registration complete */
163	SI_SUB_KTHREAD_INIT	= 0xe000000,	/* init process*/
164	SI_SUB_KTHREAD_PAGE	= 0xe400000,	/* pageout daemon*/
165	SI_SUB_KTHREAD_VM	= 0xe800000,	/* vm daemon*/
166	SI_SUB_KTHREAD_BUF	= 0xea00000,	/* buffer daemon*/
167	SI_SUB_KTHREAD_UPDATE	= 0xec00000,	/* update daemon*/
168	SI_SUB_KTHREAD_IDLE	= 0xee00000,	/* idle procs*/
169#ifndef EARLY_AP_STARTUP
170	SI_SUB_SMP		= 0xf000000,	/* start the APs*/
171#endif
172	SI_SUB_RACCTD		= 0xf100000,	/* start racctd*/
173	SI_SUB_LAST		= 0xfffffff	/* final initialization */
174};
175
176/*
177 * Some enumerated orders; "ANY" sorts last.
178 */
179enum sysinit_elem_order {
180	SI_ORDER_FIRST		= 0x0000000,	/* first*/
181	SI_ORDER_SECOND		= 0x0000001,	/* second*/
182	SI_ORDER_THIRD		= 0x0000002,	/* third*/
183	SI_ORDER_FOURTH		= 0x0000003,	/* fourth*/
184	SI_ORDER_FIFTH		= 0x0000004,	/* fifth*/
185	SI_ORDER_SIXTH		= 0x0000005,	/* sixth*/
186	SI_ORDER_SEVENTH	= 0x0000006,	/* seventh*/
187	SI_ORDER_EIGHTH		= 0x0000007,	/* eighth*/
188	SI_ORDER_MIDDLE		= 0x1000000,	/* somewhere in the middle */
189	SI_ORDER_ANY		= 0xfffffff	/* last*/
190};
191
192/*
193 * A system initialization call instance
194 *
195 * At the moment there is one instance of sysinit.  We probably do not
196 * want two which is why this code is if'd out, but we definitely want
197 * to discern SYSINIT's which take non-constant data pointers and
198 * SYSINIT's which take constant data pointers,
199 *
200 * The C_* macros take functions expecting const void * arguments
201 * while the non-C_* macros take functions expecting just void * arguments.
202 *
203 * With -Wcast-qual on, the compiler issues warnings:
204 *	- if we pass non-const data or functions taking non-const data
205 *	  to a C_* macro.
206 *
207 *	- if we pass const data to the normal macros
208 *
209 * However, no warning is issued if we pass a function taking const data
210 * through a normal non-const macro.  This is ok because the function is
211 * saying it won't modify the data so we don't care whether the data is
212 * modifiable or not.
213 */
214
215typedef void (*sysinit_nfunc_t)(void *);
216typedef void (*sysinit_cfunc_t)(const void *);
217
218struct sysinit {
219	enum sysinit_sub_id	subsystem;	/* subsystem identifier*/
220	enum sysinit_elem_order	order;		/* init order within subsystem*/
221	STAILQ_ENTRY(sysinit)	next;		/* singly-linked list */
222	sysinit_cfunc_t func;			/* function		*/
223	const void	*udata;			/* multiplexer/argument */
224};
225
226/*
227 * Default: no special processing
228 *
229 * The C_ version of SYSINIT is for data pointers to const
230 * data ( and functions taking data pointers to const data ).
231 * At the moment it is no different from SYSINIT and thus
232 * still results in warnings.
233 *
234 * The casts are necessary to have the compiler produce the
235 * correct warnings when -Wcast-qual is used.
236 *
237 */
238#ifdef TSLOG
239struct sysinit_tslog {
240	sysinit_cfunc_t func;
241	const void * data;
242	const char * name;
243};
244static inline void
245sysinit_tslog_shim(const void * data)
246{
247	const struct sysinit_tslog * x = data;
248
249	TSRAW(curthread, TS_ENTER, "SYSINIT", x->name);
250	(x->func)(x->data);
251	TSRAW(curthread, TS_EXIT, "SYSINIT", x->name);
252}
253#define	C_SYSINIT(uniquifier, subsystem, order, func, ident)	\
254	static struct sysinit_tslog uniquifier ## _sys_init_tslog = {	\
255		func,						\
256		(ident),					\
257		#uniquifier					\
258	};							\
259	static struct sysinit uniquifier ## _sys_init = {	\
260		subsystem,					\
261		order,						\
262		{ NULL },					\
263		sysinit_tslog_shim,				\
264		&uniquifier ## _sys_init_tslog			\
265	};							\
266	DATA_WSET(sysinit_set,uniquifier ## _sys_init)
267#else
268#define	C_SYSINIT(uniquifier, subsystem, order, func, ident)	\
269	static struct sysinit uniquifier ## _sys_init = {	\
270		subsystem,					\
271		order,						\
272		{ NULL },					\
273		func,						\
274		(ident)						\
275	};							\
276	DATA_WSET(sysinit_set,uniquifier ## _sys_init)
277#endif
278
279#define	SYSINIT(uniquifier, subsystem, order, func, ident)	\
280	C_SYSINIT(uniquifier, subsystem, order,			\
281	(sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident))
282
283/*
284 * Called on module unload: no special processing
285 */
286#define	C_SYSUNINIT(uniquifier, subsystem, order, func, ident)	\
287	static struct sysinit uniquifier ## _sys_uninit = {	\
288		subsystem,					\
289		order,						\
290		{ NULL },					\
291		func,						\
292		(ident)						\
293	};							\
294	DATA_WSET(sysuninit_set,uniquifier ## _sys_uninit)
295
296#define	SYSUNINIT(uniquifier, subsystem, order, func, ident)	\
297	C_SYSUNINIT(uniquifier, subsystem, order,		\
298	(sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident))
299
300void	sysinit_add(struct sysinit **set, struct sysinit **set_end);
301
302#ifdef _KERNEL
303
304/*
305 * Infrastructure for tunable 'constants'.  Value may be specified at compile
306 * time or kernel load time.  Rules relating tunables together can be placed
307 * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_ANY.
308 *
309 * WARNING: developers should never use the reserved suffixes specified in
310 * loader.conf(5) for any tunables or conflicts will result.
311 */
312
313/*
314 * int
315 * please avoid using for new tunables!
316 */
317extern void tunable_int_init(void *);
318struct tunable_int {
319	const char *path;
320	int *var;
321};
322#define	TUNABLE_INT(path, var)					\
323	static struct tunable_int __CONCAT(__tunable_int_, __LINE__) = { \
324		(path),						\
325		(var),						\
326	};							\
327	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
328	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int_init,	\
329	    &__CONCAT(__tunable_int_, __LINE__))
330
331#define	TUNABLE_INT_FETCH(path, var)	getenv_int((path), (var))
332
333/*
334 * long
335 */
336extern void tunable_long_init(void *);
337struct tunable_long {
338	const char *path;
339	long *var;
340};
341#define	TUNABLE_LONG(path, var)					\
342	static struct tunable_long __CONCAT(__tunable_long_, __LINE__) = { \
343		(path),						\
344		(var),						\
345	};							\
346	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
347	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_long_init,\
348	    &__CONCAT(__tunable_long_, __LINE__))
349
350#define	TUNABLE_LONG_FETCH(path, var)	getenv_long((path), (var))
351
352/*
353 * unsigned long
354 */
355extern void tunable_ulong_init(void *);
356struct tunable_ulong {
357	const char *path;
358	unsigned long *var;
359};
360#define	TUNABLE_ULONG(path, var)				\
361	static struct tunable_ulong __CONCAT(__tunable_ulong_, __LINE__) = { \
362		(path),						\
363		(var),						\
364	};							\
365	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
366	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_ulong_init, \
367	    &__CONCAT(__tunable_ulong_, __LINE__))
368
369#define	TUNABLE_ULONG_FETCH(path, var)	getenv_ulong((path), (var))
370
371/*
372 * int64_t
373 */
374extern void tunable_int64_init(void *);
375struct tunable_int64 {
376	const char *path;
377	int64_t *var;
378};
379#define	TUNABLE_INT64(path, var)				\
380	static struct tunable_int64 __CONCAT(__tunable_int64_, __LINE__) = { \
381		(path),						\
382		(var),						\
383	};							\
384	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
385	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int64_init, \
386	    &__CONCAT(__tunable_int64_, __LINE__))
387
388#define	TUNABLE_INT64_FETCH(path, var)	getenv_int64((path), (var))
389
390/*
391 * uint64_t
392 */
393extern void tunable_uint64_init(void *);
394struct tunable_uint64 {
395	const char *path;
396	uint64_t *var;
397};
398#define	TUNABLE_UINT64(path, var)				\
399	static struct tunable_uint64 __CONCAT(__tunable_uint64_, __LINE__) = { \
400		(path),						\
401		(var),						\
402	};							\
403	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
404	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_uint64_init, \
405	    &__CONCAT(__tunable_uint64_, __LINE__))
406
407#define	TUNABLE_UINT64_FETCH(path, var)	getenv_uint64((path), (var))
408
409/*
410 * quad
411 */
412extern void tunable_quad_init(void *);
413struct tunable_quad {
414	const char *path;
415	quad_t *var;
416};
417#define	TUNABLE_QUAD(path, var)					\
418	static struct tunable_quad __CONCAT(__tunable_quad_, __LINE__) = { \
419		(path),						\
420		(var),						\
421	};							\
422	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
423	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_quad_init, \
424	    &__CONCAT(__tunable_quad_, __LINE__))
425
426#define	TUNABLE_QUAD_FETCH(path, var)	getenv_quad((path), (var))
427
428/*
429 * bool
430 */
431extern void tunable_bool_init(void *);
432struct tunable_bool {
433	const char *path;
434	bool *var;
435};
436#define	TUNABLE_BOOL(path, var) \
437	static struct tunable_bool __CONCAT(__tunable_bool_, __LINE__) = { \
438		(path),						\
439		(var),						\
440	};							\
441	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
442	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_bool_init, \
443	    &__CONCAT(__tunable_bool_, __LINE__))
444
445#define	TUNABLE_BOOL_FETCH(path, var)	getenv_bool((path), (var))
446
447extern void tunable_str_init(void *);
448struct tunable_str {
449	const char *path;
450	char *var;
451	int size;
452};
453#define	TUNABLE_STR(path, var, size)				\
454	static struct tunable_str __CONCAT(__tunable_str_, __LINE__) = { \
455		(path),						\
456		(var),						\
457		(size),						\
458	};							\
459	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
460	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_str_init,	\
461	    &__CONCAT(__tunable_str_, __LINE__))
462
463#define	TUNABLE_STR_FETCH(path, var, size)			\
464	getenv_string((path), (var), (size))
465
466#endif /* _KERNEL */
467
468typedef void (*ich_func_t)(void *_arg);
469
470struct intr_config_hook {
471	STAILQ_ENTRY(intr_config_hook) ich_links;
472	uintptr_t	ich_state;
473#define ICHS_QUEUED	0x1
474#define ICHS_RUNNING	0x2
475#define	ICHS_DONE	0x3
476	ich_func_t	ich_func;
477	void		*ich_arg;
478};
479
480int	config_intrhook_establish(struct intr_config_hook *hook);
481void	config_intrhook_disestablish(struct intr_config_hook *hook);
482int	config_intrhook_drain(struct intr_config_hook *hook);
483void	config_intrhook_oneshot(ich_func_t _func, void *_arg);
484
485#endif /* !_SYS_KERNEL_H_*/
486