1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved.  The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34/*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections.  This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41#include <meta_features.h>
42
43#include <kern/task.h>
44#include <kern/thread.h>
45#include <kern/debug.h>
46#include <kern/lock.h>
47#include <kern/extmod_statistics.h>
48#include <mach/mach_traps.h>
49#include <mach/port.h>
50#include <mach/task.h>
51#include <mach/task_access.h>
52#include <mach/task_special_ports.h>
53#include <mach/time_value.h>
54#include <mach/vm_map.h>
55#include <mach/vm_param.h>
56#include <mach/vm_prot.h>
57
58#include <sys/file_internal.h>
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/dir.h>
62#include <sys/namei.h>
63#include <sys/proc_internal.h>
64#include <sys/kauth.h>
65#include <sys/vm.h>
66#include <sys/file.h>
67#include <sys/vnode_internal.h>
68#include <sys/mount.h>
69#include <sys/trace.h>
70#include <sys/kernel.h>
71#include <sys/ubc_internal.h>
72#include <sys/user.h>
73#include <sys/syslog.h>
74#include <sys/stat.h>
75#include <sys/sysproto.h>
76#include <sys/mman.h>
77#include <sys/sysctl.h>
78#include <sys/cprotect.h>
79#include <sys/kpi_socket.h>
80#include <sys/kas_info.h>
81
82#include <security/audit/audit.h>
83#include <security/mac.h>
84#include <bsm/audit_kevents.h>
85
86#include <kern/kalloc.h>
87#include <vm/vm_map.h>
88#include <vm/vm_kern.h>
89#include <vm/vm_pageout.h>
90
91#include <machine/spl.h>
92
93#include <mach/shared_region.h>
94#include <vm/vm_shared_region.h>
95
96#include <vm/vm_protos.h>
97
98#include <sys/kern_memorystatus.h>
99
100
101int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
102int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
103
104SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
105
106
107/*
108 * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
109 */
110
111#ifndef SECURE_KERNEL
112extern int allow_stack_exec, allow_data_exec;
113
114SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
115SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
116#endif /* !SECURE_KERNEL */
117
118static const char *prot_values[] = {
119	"none",
120	"read-only",
121	"write-only",
122	"read-write",
123	"execute-only",
124	"read-execute",
125	"write-execute",
126	"read-write-execute"
127};
128
129void
130log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
131{
132	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
133		current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
134}
135
136int shared_region_unnest_logging = 1;
137
138SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
139	   &shared_region_unnest_logging, 0, "");
140
141int vm_shared_region_unnest_log_interval = 10;
142int shared_region_unnest_log_count_threshold = 5;
143
144/* These log rate throttling state variables aren't thread safe, but
145 * are sufficient unto the task.
146 */
147static int64_t last_unnest_log_time = 0;
148static int shared_region_unnest_log_count = 0;
149
150void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
151	struct timeval tv;
152	const char *pcommstr;
153
154	if (shared_region_unnest_logging == 0)
155		return;
156
157	if (shared_region_unnest_logging == 1) {
158		microtime(&tv);
159		if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
160			if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
161				return;
162		}
163		else {
164			last_unnest_log_time = tv.tv_sec;
165			shared_region_unnest_log_count = 0;
166		}
167	}
168
169	pcommstr = current_proc()->p_comm;
170
171	printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
172}
173
174int
175useracc(
176	user_addr_t	addr,
177	user_size_t	len,
178	int	prot)
179{
180	vm_map_t	map;
181
182	map = current_map();
183	return (vm_map_check_protection(
184			map,
185			vm_map_trunc_page(addr,
186					  vm_map_page_mask(map)),
187			vm_map_round_page(addr+len,
188					  vm_map_page_mask(map)),
189			prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
190}
191
192int
193vslock(
194	user_addr_t	addr,
195	user_size_t	len)
196{
197	kern_return_t	kret;
198	vm_map_t	map;
199
200	map = current_map();
201	kret = vm_map_wire(map,
202			   vm_map_trunc_page(addr,
203					     vm_map_page_mask(map)),
204			   vm_map_round_page(addr+len,
205					     vm_map_page_mask(map)),
206			   VM_PROT_READ | VM_PROT_WRITE,
207			   FALSE);
208
209	switch (kret) {
210	case KERN_SUCCESS:
211		return (0);
212	case KERN_INVALID_ADDRESS:
213	case KERN_NO_SPACE:
214		return (ENOMEM);
215	case KERN_PROTECTION_FAILURE:
216		return (EACCES);
217	default:
218		return (EINVAL);
219	}
220}
221
222int
223vsunlock(
224	user_addr_t addr,
225	user_size_t len,
226	__unused int dirtied)
227{
228#if FIXME  /* [ */
229	pmap_t		pmap;
230	vm_page_t	pg;
231	vm_map_offset_t	vaddr;
232	ppnum_t		paddr;
233#endif  /* FIXME ] */
234	kern_return_t	kret;
235	vm_map_t	map;
236
237	map = current_map();
238
239#if FIXME  /* [ */
240	if (dirtied) {
241		pmap = get_task_pmap(current_task());
242		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
243		     vaddr < vm_map_round_page(addr+len, PAGE_MASK);
244		     vaddr += PAGE_SIZE) {
245			paddr = pmap_extract(pmap, vaddr);
246			pg = PHYS_TO_VM_PAGE(paddr);
247			vm_page_set_modified(pg);
248		}
249	}
250#endif  /* FIXME ] */
251#ifdef	lint
252	dirtied++;
253#endif	/* lint */
254	kret = vm_map_unwire(map,
255			     vm_map_trunc_page(addr,
256					       vm_map_page_mask(map)),
257			     vm_map_round_page(addr+len,
258					       vm_map_page_mask(map)),
259			     FALSE);
260	switch (kret) {
261	case KERN_SUCCESS:
262		return (0);
263	case KERN_INVALID_ADDRESS:
264	case KERN_NO_SPACE:
265		return (ENOMEM);
266	case KERN_PROTECTION_FAILURE:
267		return (EACCES);
268	default:
269		return (EINVAL);
270	}
271}
272
273int
274subyte(
275	user_addr_t addr,
276	int byte)
277{
278	char character;
279
280	character = (char)byte;
281	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
282}
283
284int
285suibyte(
286	user_addr_t addr,
287	int byte)
288{
289	char character;
290
291	character = (char)byte;
292	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
293}
294
295int fubyte(user_addr_t addr)
296{
297	unsigned char byte;
298
299	if (copyin(addr, (void *) &byte, sizeof(char)))
300		return(-1);
301	return(byte);
302}
303
304int fuibyte(user_addr_t addr)
305{
306	unsigned char byte;
307
308	if (copyin(addr, (void *) &(byte), sizeof(char)))
309		return(-1);
310	return(byte);
311}
312
313int
314suword(
315	user_addr_t addr,
316	long word)
317{
318	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
319}
320
321long fuword(user_addr_t addr)
322{
323	long word = 0;
324
325	if (copyin(addr, (void *) &word, sizeof(int)))
326		return(-1);
327	return(word);
328}
329
330/* suiword and fuiword are the same as suword and fuword, respectively */
331
332int
333suiword(
334	user_addr_t addr,
335	long word)
336{
337	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
338}
339
340long fuiword(user_addr_t addr)
341{
342	long word = 0;
343
344	if (copyin(addr, (void *) &word, sizeof(int)))
345		return(-1);
346	return(word);
347}
348
349/*
350 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
351 * fetching and setting of process-sized size_t and pointer values.
352 */
353int
354sulong(user_addr_t addr, int64_t word)
355{
356
357	if (IS_64BIT_PROCESS(current_proc())) {
358		return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
359	} else {
360		return(suiword(addr, (long)word));
361	}
362}
363
364int64_t
365fulong(user_addr_t addr)
366{
367	int64_t longword;
368
369	if (IS_64BIT_PROCESS(current_proc())) {
370		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
371			return(-1);
372		return(longword);
373	} else {
374		return((int64_t)fuiword(addr));
375	}
376}
377
378int
379suulong(user_addr_t addr, uint64_t uword)
380{
381
382	if (IS_64BIT_PROCESS(current_proc())) {
383		return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
384	} else {
385		return(suiword(addr, (uint32_t)uword));
386	}
387}
388
389uint64_t
390fuulong(user_addr_t addr)
391{
392	uint64_t ulongword;
393
394	if (IS_64BIT_PROCESS(current_proc())) {
395		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
396			return(-1ULL);
397		return(ulongword);
398	} else {
399		return((uint64_t)fuiword(addr));
400	}
401}
402
403int
404swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
405{
406	return(ENOTSUP);
407}
408
409/*
410 * pid_for_task
411 *
412 * Find the BSD process ID for the Mach task associated with the given Mach port
413 * name
414 *
415 * Parameters:	args		User argument descriptor (see below)
416 *
417 * Indirect parameters:	args->t		Mach port name
418 * 			args->pid	Process ID (returned value; see below)
419 *
420 * Returns:	KERL_SUCCESS	Success
421 * 		KERN_FAILURE	Not success
422 *
423 * Implicit returns: args->pid		Process ID
424 *
425 */
426kern_return_t
427pid_for_task(
428	struct pid_for_task_args *args)
429{
430	mach_port_name_t	t = args->t;
431	user_addr_t		pid_addr  = args->pid;
432	proc_t p;
433	task_t		t1;
434	int	pid = -1;
435	kern_return_t	err = KERN_SUCCESS;
436
437	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
438	AUDIT_ARG(mach_port1, t);
439
440	t1 = port_name_to_task(t);
441
442	if (t1 == TASK_NULL) {
443		err = KERN_FAILURE;
444		goto pftout;
445	} else {
446		p = get_bsdtask_info(t1);
447		if (p) {
448			pid  = proc_pid(p);
449			err = KERN_SUCCESS;
450		} else {
451			err = KERN_FAILURE;
452		}
453	}
454	task_deallocate(t1);
455pftout:
456	AUDIT_ARG(pid, pid);
457	(void) copyout((char *) &pid, pid_addr, sizeof(int));
458	AUDIT_MACH_SYSCALL_EXIT(err);
459	return(err);
460}
461
462/*
463 *
464 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
465 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
466 *
467 */
468static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
469
470/*
471 *	Routine:	task_for_pid_posix_check
472 *	Purpose:
473 *			Verify that the current process should be allowed to
474 *			get the target process's task port. This is only
475 *			permitted if:
476 *			- The current process is root
477 *			OR all of the following are true:
478 *			- The target process's real, effective, and saved uids
479 *			  are the same as the current proc's euid,
480 *			- The target process's group set is a subset of the
481 *			  calling process's group set, and
482 *			- The target process hasn't switched credentials.
483 *
484 *	Returns:	TRUE: permitted
485 *			FALSE: denied
486 */
487static int
488task_for_pid_posix_check(proc_t target)
489{
490	kauth_cred_t targetcred, mycred;
491	uid_t myuid;
492	int allowed;
493
494	/* No task_for_pid on bad targets */
495	if (target->p_stat == SZOMB) {
496		return FALSE;
497	}
498
499	mycred = kauth_cred_get();
500	myuid = kauth_cred_getuid(mycred);
501
502	/* If we're running as root, the check passes */
503	if (kauth_cred_issuser(mycred))
504		return TRUE;
505
506	/* We're allowed to get our own task port */
507	if (target == current_proc())
508		return TRUE;
509
510	/*
511	 * Under DENY, only root can get another proc's task port,
512	 * so no more checks are needed.
513	 */
514	if (tfp_policy == KERN_TFP_POLICY_DENY) {
515		return FALSE;
516	}
517
518	targetcred = kauth_cred_proc_ref(target);
519	allowed = TRUE;
520
521	/* Do target's ruid, euid, and saved uid match my euid? */
522	if ((kauth_cred_getuid(targetcred) != myuid) ||
523			(kauth_cred_getruid(targetcred) != myuid) ||
524			(kauth_cred_getsvuid(targetcred) != myuid)) {
525		allowed = FALSE;
526		goto out;
527	}
528
529	/* Are target's groups a subset of my groups? */
530	if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
531			allowed == 0) {
532		allowed = FALSE;
533		goto out;
534	}
535
536	/* Has target switched credentials? */
537	if (target->p_flag & P_SUGID) {
538		allowed = FALSE;
539		goto out;
540	}
541
542out:
543	kauth_cred_unref(&targetcred);
544	return allowed;
545}
546
547/*
548 *	Routine:	task_for_pid
549 *	Purpose:
550 *		Get the task port for another "process", named by its
551 *		process ID on the same host as "target_task".
552 *
553 *		Only permitted to privileged processes, or processes
554 *		with the same user ID.
555 *
556 *		Note: if pid == 0, an error is return no matter who is calling.
557 *
558 * XXX This should be a BSD system call, not a Mach trap!!!
559 */
560kern_return_t
561task_for_pid(
562	struct task_for_pid_args *args)
563{
564	mach_port_name_t	target_tport = args->target_tport;
565	int			pid = args->pid;
566	user_addr_t		task_addr = args->t;
567	proc_t 			p = PROC_NULL;
568	task_t			t1 = TASK_NULL;
569	mach_port_name_t	tret = MACH_PORT_NULL;
570 	ipc_port_t 		tfpport;
571	void * sright;
572	int error = 0;
573
574	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
575	AUDIT_ARG(pid, pid);
576	AUDIT_ARG(mach_port1, target_tport);
577
578	/* Always check if pid == 0 */
579	if (pid == 0) {
580		(void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
581		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
582		return(KERN_FAILURE);
583	}
584
585	t1 = port_name_to_task(target_tport);
586	if (t1 == TASK_NULL) {
587		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
588		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
589		return(KERN_FAILURE);
590	}
591
592
593	p = proc_find(pid);
594	if (p == PROC_NULL) {
595		error = KERN_FAILURE;
596		goto tfpout;
597	}
598
599#if CONFIG_AUDIT
600	AUDIT_ARG(process, p);
601#endif
602
603	if (!(task_for_pid_posix_check(p))) {
604		error = KERN_FAILURE;
605		goto tfpout;
606	}
607
608	if (p->task != TASK_NULL) {
609		/* If we aren't root and target's task access port is set... */
610		if (!kauth_cred_issuser(kauth_cred_get()) &&
611			p != current_proc() &&
612			(task_get_task_access_port(p->task, &tfpport) == 0) &&
613			(tfpport != IPC_PORT_NULL)) {
614
615			if (tfpport == IPC_PORT_DEAD) {
616				error = KERN_PROTECTION_FAILURE;
617				goto tfpout;
618			}
619
620			/* Call up to the task access server */
621			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
622
623			if (error != MACH_MSG_SUCCESS) {
624				if (error == MACH_RCV_INTERRUPTED)
625					error = KERN_ABORTED;
626				else
627					error = KERN_FAILURE;
628				goto tfpout;
629			}
630		}
631#if CONFIG_MACF
632		error = mac_proc_check_get_task(kauth_cred_get(), p);
633		if (error) {
634			error = KERN_FAILURE;
635			goto tfpout;
636		}
637#endif
638
639		/* Grant task port access */
640		task_reference(p->task);
641		extmod_statistics_incr_task_for_pid(p->task);
642
643		sright = (void *) convert_task_to_port(p->task);
644		tret = ipc_port_copyout_send(
645				sright,
646				get_task_ipcspace(current_task()));
647	}
648	error = KERN_SUCCESS;
649
650tfpout:
651	task_deallocate(t1);
652	AUDIT_ARG(mach_port2, tret);
653	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
654	if (p != PROC_NULL)
655		proc_rele(p);
656	AUDIT_MACH_SYSCALL_EXIT(error);
657	return(error);
658}
659
660/*
661 *	Routine:	task_name_for_pid
662 *	Purpose:
663 *		Get the task name port for another "process", named by its
664 *		process ID on the same host as "target_task".
665 *
666 *		Only permitted to privileged processes, or processes
667 *		with the same user ID.
668 *
669 * XXX This should be a BSD system call, not a Mach trap!!!
670 */
671
672kern_return_t
673task_name_for_pid(
674	struct task_name_for_pid_args *args)
675{
676	mach_port_name_t	target_tport = args->target_tport;
677	int			pid = args->pid;
678	user_addr_t		task_addr = args->t;
679	proc_t		p = PROC_NULL;
680	task_t		t1;
681	mach_port_name_t	tret;
682	void * sright;
683	int error = 0, refheld = 0;
684	kauth_cred_t target_cred;
685
686	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
687	AUDIT_ARG(pid, pid);
688	AUDIT_ARG(mach_port1, target_tport);
689
690	t1 = port_name_to_task(target_tport);
691	if (t1 == TASK_NULL) {
692		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
693		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
694		return(KERN_FAILURE);
695	}
696
697	p = proc_find(pid);
698	if (p != PROC_NULL) {
699		AUDIT_ARG(process, p);
700		target_cred = kauth_cred_proc_ref(p);
701		refheld = 1;
702
703		if ((p->p_stat != SZOMB)
704		    && ((current_proc() == p)
705			|| kauth_cred_issuser(kauth_cred_get())
706			|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
707			    ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
708
709			if (p->task != TASK_NULL) {
710				task_reference(p->task);
711#if CONFIG_MACF
712				error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
713				if (error) {
714					task_deallocate(p->task);
715					goto noperm;
716				}
717#endif
718				sright = (void *)convert_task_name_to_port(p->task);
719				tret = ipc_port_copyout_send(sright,
720						get_task_ipcspace(current_task()));
721			} else
722				tret  = MACH_PORT_NULL;
723
724			AUDIT_ARG(mach_port2, tret);
725			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
726			task_deallocate(t1);
727			error = KERN_SUCCESS;
728			goto tnfpout;
729		}
730	}
731
732#if CONFIG_MACF
733noperm:
734#endif
735    task_deallocate(t1);
736	tret = MACH_PORT_NULL;
737	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
738	error = KERN_FAILURE;
739tnfpout:
740	if (refheld != 0)
741		kauth_cred_unref(&target_cred);
742	if (p != PROC_NULL)
743		proc_rele(p);
744	AUDIT_MACH_SYSCALL_EXIT(error);
745	return(error);
746}
747
748kern_return_t
749pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
750{
751	task_t	target = NULL;
752	proc_t	targetproc = PROC_NULL;
753	int 	pid = args->pid;
754	int 	error = 0;
755
756#if CONFIG_MACF
757	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
758	if (error) {
759		error = EPERM;
760		goto out;
761	}
762#endif
763
764	if (pid == 0) {
765		error = EPERM;
766		goto out;
767	}
768
769	targetproc = proc_find(pid);
770	if (targetproc == PROC_NULL) {
771		error = ESRCH;
772		goto out;
773	}
774
775	if (!task_for_pid_posix_check(targetproc)) {
776		error = EPERM;
777		goto out;
778	}
779
780	target = targetproc->task;
781	if (target != TASK_NULL) {
782		mach_port_t tfpport;
783
784		/* If we aren't root and target's task access port is set... */
785		if (!kauth_cred_issuser(kauth_cred_get()) &&
786			targetproc != current_proc() &&
787			(task_get_task_access_port(target, &tfpport) == 0) &&
788			(tfpport != IPC_PORT_NULL)) {
789
790			if (tfpport == IPC_PORT_DEAD) {
791				error = EACCES;
792				goto out;
793			}
794
795			/* Call up to the task access server */
796			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
797
798			if (error != MACH_MSG_SUCCESS) {
799				if (error == MACH_RCV_INTERRUPTED)
800					error = EINTR;
801				else
802					error = EPERM;
803				goto out;
804			}
805		}
806	}
807
808	task_reference(target);
809	error = task_pidsuspend(target);
810	if (error) {
811		if (error == KERN_INVALID_ARGUMENT) {
812			error = EINVAL;
813		} else {
814			error = EPERM;
815		}
816	}
817#if CONFIG_MEMORYSTATUS
818	else {
819		memorystatus_on_suspend(targetproc);
820	}
821#endif
822
823	task_deallocate(target);
824
825out:
826	if (targetproc != PROC_NULL)
827		proc_rele(targetproc);
828	*ret = error;
829	return error;
830}
831
832kern_return_t
833pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
834{
835	task_t	target = NULL;
836	proc_t	targetproc = PROC_NULL;
837	int 	pid = args->pid;
838	int 	error = 0;
839
840#if CONFIG_MACF
841	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
842	if (error) {
843		error = EPERM;
844		goto out;
845	}
846#endif
847
848	if (pid == 0) {
849		error = EPERM;
850		goto out;
851	}
852
853	targetproc = proc_find(pid);
854	if (targetproc == PROC_NULL) {
855		error = ESRCH;
856		goto out;
857	}
858
859	if (!task_for_pid_posix_check(targetproc)) {
860		error = EPERM;
861		goto out;
862	}
863
864	target = targetproc->task;
865	if (target != TASK_NULL) {
866		mach_port_t tfpport;
867
868		/* If we aren't root and target's task access port is set... */
869		if (!kauth_cred_issuser(kauth_cred_get()) &&
870			targetproc != current_proc() &&
871			(task_get_task_access_port(target, &tfpport) == 0) &&
872			(tfpport != IPC_PORT_NULL)) {
873
874			if (tfpport == IPC_PORT_DEAD) {
875				error = EACCES;
876				goto out;
877			}
878
879			/* Call up to the task access server */
880			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
881
882			if (error != MACH_MSG_SUCCESS) {
883				if (error == MACH_RCV_INTERRUPTED)
884					error = EINTR;
885				else
886					error = EPERM;
887				goto out;
888			}
889		}
890	}
891
892	task_reference(target);
893
894#if CONFIG_MEMORYSTATUS
895	memorystatus_on_resume(targetproc);
896#endif
897
898	error = task_pidresume(target);
899	if (error) {
900		if (error == KERN_INVALID_ARGUMENT) {
901			error = EINVAL;
902		} else {
903			if (error == KERN_MEMORY_ERROR) {
904				psignal(targetproc, SIGKILL);
905				error = EIO;
906			} else
907				error = EPERM;
908		}
909	}
910
911	task_deallocate(target);
912
913out:
914	if (targetproc != PROC_NULL)
915		proc_rele(targetproc);
916
917	*ret = error;
918	return error;
919}
920
921
922static int
923sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
924    __unused int arg2, struct sysctl_req *req)
925{
926    int error = 0;
927	int new_value;
928
929    error = SYSCTL_OUT(req, arg1, sizeof(int));
930    if (error || req->newptr == USER_ADDR_NULL)
931        return(error);
932
933	if (!kauth_cred_issuser(kauth_cred_get()))
934		return(EPERM);
935
936	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
937		goto out;
938	}
939	if ((new_value == KERN_TFP_POLICY_DENY)
940		|| (new_value == KERN_TFP_POLICY_DEFAULT))
941			tfp_policy = new_value;
942	else
943			error = EINVAL;
944out:
945    return(error);
946
947}
948
949#if defined(SECURE_KERNEL)
950static int kern_secure_kernel = 1;
951#else
952static int kern_secure_kernel = 0;
953#endif
954
955SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
956
957SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
958SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
959    &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
960
961SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
962	   &shared_region_trace_level, 0, "");
963SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
964	   &shared_region_version, 0, "");
965SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
966	   &shared_region_persistence, 0, "");
967
968/*
969 * shared_region_check_np:
970 *
971 * This system call is intended for dyld.
972 *
973 * dyld calls this when any process starts to see if the process's shared
974 * region is already set up and ready to use.
975 * This call returns the base address of the first mapping in the
976 * process's shared region's first mapping.
977 * dyld will then check what's mapped at that address.
978 *
979 * If the shared region is empty, dyld will then attempt to map the shared
980 * cache file in the shared region via the shared_region_map_np() system call.
981 *
982 * If something's already mapped in the shared region, dyld will check if it
983 * matches the shared cache it would like to use for that process.
984 * If it matches, evrything's ready and the process can proceed and use the
985 * shared region.
986 * If it doesn't match, dyld will unmap the shared region and map the shared
987 * cache into the process's address space via mmap().
988 *
989 * ERROR VALUES
990 * EINVAL	no shared region
991 * ENOMEM	shared region is empty
992 * EFAULT	bad address for "start_address"
993 */
994int
995shared_region_check_np(
996	__unused struct proc			*p,
997	struct shared_region_check_np_args	*uap,
998	__unused int				*retvalp)
999{
1000	vm_shared_region_t	shared_region;
1001	mach_vm_offset_t	start_address = 0;
1002	int			error;
1003	kern_return_t		kr;
1004
1005	SHARED_REGION_TRACE_DEBUG(
1006		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1007		 current_thread(), p->p_pid, p->p_comm,
1008		 (uint64_t)uap->start_address));
1009
1010	/* retrieve the current tasks's shared region */
1011	shared_region = vm_shared_region_get(current_task());
1012	if (shared_region != NULL) {
1013		/* retrieve address of its first mapping... */
1014		kr = vm_shared_region_start_address(shared_region,
1015						    &start_address);
1016		if (kr != KERN_SUCCESS) {
1017			error = ENOMEM;
1018		} else {
1019			/* ... and give it to the caller */
1020			error = copyout(&start_address,
1021					(user_addr_t) uap->start_address,
1022					sizeof (start_address));
1023			if (error) {
1024				SHARED_REGION_TRACE_ERROR(
1025					("shared_region: %p [%d(%s)] "
1026					 "check_np(0x%llx) "
1027					 "copyout(0x%llx) error %d\n",
1028					 current_thread(), p->p_pid, p->p_comm,
1029					 (uint64_t)uap->start_address, (uint64_t)start_address,
1030					 error));
1031			}
1032		}
1033		vm_shared_region_deallocate(shared_region);
1034	} else {
1035		/* no shared region ! */
1036		error = EINVAL;
1037	}
1038
1039	SHARED_REGION_TRACE_DEBUG(
1040		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1041		 current_thread(), p->p_pid, p->p_comm,
1042		 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1043
1044	return error;
1045}
1046
1047
1048int
1049shared_region_copyin_mappings(
1050		struct proc			*p,
1051		user_addr_t			user_mappings,
1052		unsigned int			mappings_count,
1053		struct shared_file_mapping_np	*mappings)
1054{
1055	int		error = 0;
1056	vm_size_t	mappings_size = 0;
1057
1058	/* get the list of mappings the caller wants us to establish */
1059	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1060	error = copyin(user_mappings,
1061		       mappings,
1062		       mappings_size);
1063	if (error) {
1064		SHARED_REGION_TRACE_ERROR(
1065			("shared_region: %p [%d(%s)] map(): "
1066			 "copyin(0x%llx, %d) failed (error=%d)\n",
1067			 current_thread(), p->p_pid, p->p_comm,
1068			 (uint64_t)user_mappings, mappings_count, error));
1069	}
1070	return error;
1071}
1072/*
1073 * shared_region_map_np()
1074 *
1075 * This system call is intended for dyld.
1076 *
1077 * dyld uses this to map a shared cache file into a shared region.
1078 * This is usually done only the first time a shared cache is needed.
1079 * Subsequent processes will just use the populated shared region without
1080 * requiring any further setup.
1081 */
1082int
1083_shared_region_map_and_slide(
1084	struct proc				*p,
1085	int					fd,
1086	uint32_t				mappings_count,
1087	struct shared_file_mapping_np		*mappings,
1088	uint32_t				slide,
1089	user_addr_t				slide_start,
1090	user_addr_t				slide_size)
1091{
1092	int				error;
1093	kern_return_t			kr;
1094	struct fileproc			*fp;
1095	struct vnode			*vp, *root_vp;
1096	struct vnode_attr		va;
1097	off_t				fs;
1098	memory_object_size_t		file_size;
1099#if CONFIG_MACF
1100	vm_prot_t			maxprot = VM_PROT_ALL;
1101#endif
1102	memory_object_control_t		file_control;
1103	struct vm_shared_region		*shared_region;
1104
1105	SHARED_REGION_TRACE_DEBUG(
1106		("shared_region: %p [%d(%s)] -> map\n",
1107		 current_thread(), p->p_pid, p->p_comm));
1108
1109	shared_region = NULL;
1110	fp = NULL;
1111	vp = NULL;
1112
1113	/* get file structure from file descriptor */
1114	error = fp_lookup(p, fd, &fp, 0);
1115	if (error) {
1116		SHARED_REGION_TRACE_ERROR(
1117			("shared_region: %p [%d(%s)] map: "
1118			 "fd=%d lookup failed (error=%d)\n",
1119			 current_thread(), p->p_pid, p->p_comm, fd, error));
1120		goto done;
1121	}
1122
1123	/* make sure we're attempting to map a vnode */
1124	if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1125		SHARED_REGION_TRACE_ERROR(
1126			("shared_region: %p [%d(%s)] map: "
1127			 "fd=%d not a vnode (type=%d)\n",
1128			 current_thread(), p->p_pid, p->p_comm,
1129			 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1130		error = EINVAL;
1131		goto done;
1132	}
1133
1134	/* we need at least read permission on the file */
1135	if (! (fp->f_fglob->fg_flag & FREAD)) {
1136		SHARED_REGION_TRACE_ERROR(
1137			("shared_region: %p [%d(%s)] map: "
1138			 "fd=%d not readable\n",
1139			 current_thread(), p->p_pid, p->p_comm, fd));
1140		error = EPERM;
1141		goto done;
1142	}
1143
1144	/* get vnode from file structure */
1145	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1146	if (error) {
1147		SHARED_REGION_TRACE_ERROR(
1148			("shared_region: %p [%d(%s)] map: "
1149			 "fd=%d getwithref failed (error=%d)\n",
1150			 current_thread(), p->p_pid, p->p_comm, fd, error));
1151		goto done;
1152	}
1153	vp = (struct vnode *) fp->f_fglob->fg_data;
1154
1155	/* make sure the vnode is a regular file */
1156	if (vp->v_type != VREG) {
1157		SHARED_REGION_TRACE_ERROR(
1158			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1159			 "not a file (type=%d)\n",
1160			 current_thread(), p->p_pid, p->p_comm,
1161			 vp, vp->v_name, vp->v_type));
1162		error = EINVAL;
1163		goto done;
1164	}
1165
1166#if CONFIG_MACF
1167	error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1168			fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1169	if (error) {
1170		goto done;
1171	}
1172#endif /* MAC */
1173
1174#if CONFIG_PROTECT
1175	/* check for content protection access */
1176	{
1177		error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1178		if (error) {
1179			goto done;
1180		}
1181	}
1182#endif /* CONFIG_PROTECT */
1183
1184	/* make sure vnode is on the process's root volume */
1185	root_vp = p->p_fd->fd_rdir;
1186	if (root_vp == NULL) {
1187		root_vp = rootvnode;
1188	} else {
1189		/*
1190		 * Chroot-ed processes can't use the shared_region.
1191		 */
1192		error = EINVAL;
1193		goto done;
1194	}
1195
1196	if (vp->v_mount != root_vp->v_mount) {
1197		SHARED_REGION_TRACE_ERROR(
1198			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1199			 "not on process's root volume\n",
1200			 current_thread(), p->p_pid, p->p_comm,
1201			 vp, vp->v_name));
1202		error = EPERM;
1203		goto done;
1204	}
1205
1206	/* make sure vnode is owned by "root" */
1207	VATTR_INIT(&va);
1208	VATTR_WANTED(&va, va_uid);
1209	error = vnode_getattr(vp, &va, vfs_context_current());
1210	if (error) {
1211		SHARED_REGION_TRACE_ERROR(
1212			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1213			 "vnode_getattr(%p) failed (error=%d)\n",
1214			 current_thread(), p->p_pid, p->p_comm,
1215			 vp, vp->v_name, vp, error));
1216		goto done;
1217	}
1218	if (va.va_uid != 0) {
1219		SHARED_REGION_TRACE_ERROR(
1220			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1221			 "owned by uid=%d instead of 0\n",
1222			 current_thread(), p->p_pid, p->p_comm,
1223			 vp, vp->v_name, va.va_uid));
1224		error = EPERM;
1225		goto done;
1226	}
1227
1228	/* get vnode size */
1229	error = vnode_size(vp, &fs, vfs_context_current());
1230	if (error) {
1231		SHARED_REGION_TRACE_ERROR(
1232			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1233			 "vnode_size(%p) failed (error=%d)\n",
1234			 current_thread(), p->p_pid, p->p_comm,
1235			 vp, vp->v_name, vp, error));
1236		goto done;
1237	}
1238	file_size = fs;
1239
1240	/* get the file's memory object handle */
1241	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1242	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1243		SHARED_REGION_TRACE_ERROR(
1244			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1245			 "no memory object\n",
1246			 current_thread(), p->p_pid, p->p_comm,
1247			 vp, vp->v_name));
1248		error = EINVAL;
1249		goto done;
1250	}
1251
1252
1253	/* get the process's shared region (setup in vm_map_exec()) */
1254	shared_region = vm_shared_region_get(current_task());
1255	if (shared_region == NULL) {
1256		SHARED_REGION_TRACE_ERROR(
1257			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1258			 "no shared region\n",
1259			 current_thread(), p->p_pid, p->p_comm,
1260			 vp, vp->v_name));
1261		goto done;
1262	}
1263
1264	/* map the file into that shared region's submap */
1265	kr = vm_shared_region_map_file(shared_region,
1266				       mappings_count,
1267				       mappings,
1268				       file_control,
1269				       file_size,
1270				       (void *) p->p_fd->fd_rdir,
1271				       slide,
1272				       slide_start,
1273				       slide_size);
1274	if (kr != KERN_SUCCESS) {
1275		SHARED_REGION_TRACE_ERROR(
1276			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1277			 "vm_shared_region_map_file() failed kr=0x%x\n",
1278			 current_thread(), p->p_pid, p->p_comm,
1279			 vp, vp->v_name, kr));
1280		switch (kr) {
1281		case KERN_INVALID_ADDRESS:
1282			error = EFAULT;
1283			break;
1284		case KERN_PROTECTION_FAILURE:
1285			error = EPERM;
1286			break;
1287		case KERN_NO_SPACE:
1288			error = ENOMEM;
1289			break;
1290		case KERN_FAILURE:
1291		case KERN_INVALID_ARGUMENT:
1292		default:
1293			error = EINVAL;
1294			break;
1295		}
1296		goto done;
1297	}
1298
1299	error = 0;
1300
1301	vnode_lock_spin(vp);
1302
1303	vp->v_flag |= VSHARED_DYLD;
1304
1305	vnode_unlock(vp);
1306
1307	/* update the vnode's access time */
1308	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1309		VATTR_INIT(&va);
1310		nanotime(&va.va_access_time);
1311		VATTR_SET_ACTIVE(&va, va_access_time);
1312		vnode_setattr(vp, &va, vfs_context_current());
1313	}
1314
1315	if (p->p_flag & P_NOSHLIB) {
1316		/* signal that this process is now using split libraries */
1317		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1318	}
1319
1320done:
1321	if (vp != NULL) {
1322		/*
1323		 * release the vnode...
1324		 * ubc_map() still holds it for us in the non-error case
1325		 */
1326		(void) vnode_put(vp);
1327		vp = NULL;
1328	}
1329	if (fp != NULL) {
1330		/* release the file descriptor */
1331		fp_drop(p, fd, fp, 0);
1332		fp = NULL;
1333	}
1334
1335	if (shared_region != NULL) {
1336		vm_shared_region_deallocate(shared_region);
1337	}
1338
1339	SHARED_REGION_TRACE_DEBUG(
1340		("shared_region: %p [%d(%s)] <- map\n",
1341		 current_thread(), p->p_pid, p->p_comm));
1342
1343	return error;
1344}
1345
1346int
1347shared_region_map_and_slide_np(
1348	struct proc				*p,
1349	struct shared_region_map_and_slide_np_args	*uap,
1350	__unused int					*retvalp)
1351{
1352	struct shared_file_mapping_np	*mappings;
1353	unsigned int			mappings_count = uap->count;
1354	kern_return_t			kr = KERN_SUCCESS;
1355	uint32_t			slide = uap->slide;
1356
1357#define SFM_MAX_STACK	8
1358	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
1359
1360	/* Is the process chrooted?? */
1361	if (p->p_fd->fd_rdir != NULL) {
1362		kr = EINVAL;
1363		goto done;
1364	}
1365
1366	if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1367		if (kr == KERN_INVALID_ARGUMENT) {
1368			/*
1369			 * This will happen if we request sliding again
1370			 * with the same slide value that was used earlier
1371			 * for the very first sliding.
1372			 */
1373			kr = KERN_SUCCESS;
1374		}
1375		goto done;
1376	}
1377
1378	if (mappings_count == 0) {
1379		SHARED_REGION_TRACE_INFO(
1380			("shared_region: %p [%d(%s)] map(): "
1381			 "no mappings\n",
1382			 current_thread(), p->p_pid, p->p_comm));
1383		kr = 0;	/* no mappings: we're done ! */
1384		goto done;
1385	} else if (mappings_count <= SFM_MAX_STACK) {
1386		mappings = &stack_mappings[0];
1387	} else {
1388		SHARED_REGION_TRACE_ERROR(
1389			("shared_region: %p [%d(%s)] map(): "
1390			 "too many mappings (%d)\n",
1391			 current_thread(), p->p_pid, p->p_comm,
1392			 mappings_count));
1393		kr = KERN_FAILURE;
1394		goto done;
1395	}
1396
1397	if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1398		goto done;
1399	}
1400
1401
1402	kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1403					  slide,
1404					  uap->slide_start, uap->slide_size);
1405	if (kr != KERN_SUCCESS) {
1406		return kr;
1407	}
1408
1409done:
1410	return kr;
1411}
1412
1413/* sysctl overflow room */
1414
1415/* vm_page_free_target is provided as a makeshift solution for applications that want to
1416	allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1417	reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1418extern unsigned int	vm_page_free_target;
1419SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1420		   &vm_page_free_target, 0, "Pageout daemon free target");
1421
1422extern unsigned int	vm_memory_pressure;
1423SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1424	   &vm_memory_pressure, 0, "Memory pressure indicator");
1425
1426static int
1427vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1428{
1429#pragma unused(oidp, arg1, arg2)
1430	unsigned int page_free_wanted;
1431
1432	page_free_wanted = mach_vm_ctl_page_free_wanted();
1433	return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1434}
1435SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1436	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1437	    0, 0, vm_ctl_page_free_wanted, "I", "");
1438
1439extern unsigned int	vm_page_purgeable_count;
1440SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1441	   &vm_page_purgeable_count, 0, "Purgeable page count");
1442
1443extern unsigned int	vm_page_purgeable_wired_count;
1444SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1445	   &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1446
1447extern int madvise_free_debug;
1448SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1449	   &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1450
1451SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1452	   &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1453SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1454	   &vm_page_stats_reusable.reusable_pages_success, "");
1455SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1456	   &vm_page_stats_reusable.reusable_pages_failure, "");
1457SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1458	   &vm_page_stats_reusable.reusable_pages_shared, "");
1459SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1460	   &vm_page_stats_reusable.all_reusable_calls, "");
1461SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1462	   &vm_page_stats_reusable.partial_reusable_calls, "");
1463SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1464	   &vm_page_stats_reusable.reuse_pages_success, "");
1465SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1466	   &vm_page_stats_reusable.reuse_pages_failure, "");
1467SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1468	   &vm_page_stats_reusable.all_reuse_calls, "");
1469SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1470	   &vm_page_stats_reusable.partial_reuse_calls, "");
1471SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1472	   &vm_page_stats_reusable.can_reuse_success, "");
1473SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1474	   &vm_page_stats_reusable.can_reuse_failure, "");
1475SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1476	   &vm_page_stats_reusable.reusable_reclaimed, "");
1477
1478
1479extern unsigned int vm_page_free_count, vm_page_speculative_count;
1480SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1481SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1482
1483extern unsigned int vm_page_cleaned_count;
1484SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1485
1486/* pageout counts */
1487extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1488extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1489SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1490SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1491SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1492SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1493SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1494SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1495SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1496
1497extern unsigned int vm_pageout_freed_from_cleaned;
1498SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1499
1500/* counts of pages entering the cleaned queue */
1501extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1502SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1503SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1504SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1505
1506/* counts of pages leaving the cleaned queue */
1507extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1508SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1509SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1510SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1511SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1512SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1513SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1514SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1515SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1516
1517#include <kern/thread.h>
1518#include <sys/user.h>
1519
1520void vm_pageout_io_throttle(void);
1521
1522void vm_pageout_io_throttle(void) {
1523	struct uthread *uthread = get_bsdthread_info(current_thread());
1524
1525               /*
1526                * thread is marked as a low priority I/O type
1527                * and the I/O we issued while in this cleaning operation
1528                * collided with normal I/O operations... we'll
1529                * delay in order to mitigate the impact of this
1530                * task on the normal operation of the system
1531                */
1532
1533	if (uthread->uu_lowpri_window) {
1534		throttle_lowpri_io(1);
1535	}
1536
1537}
1538
1539int
1540vm_pressure_monitor(
1541	__unused struct proc *p,
1542	struct vm_pressure_monitor_args *uap,
1543	int *retval)
1544{
1545	kern_return_t	kr;
1546	uint32_t	pages_reclaimed;
1547	uint32_t	pages_wanted;
1548
1549	kr = mach_vm_pressure_monitor(
1550		(boolean_t) uap->wait_for_pressure,
1551		uap->nsecs_monitored,
1552		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1553		&pages_wanted);
1554
1555	switch (kr) {
1556	case KERN_SUCCESS:
1557		break;
1558	case KERN_ABORTED:
1559		return EINTR;
1560	default:
1561		return EINVAL;
1562	}
1563
1564	if (uap->pages_reclaimed) {
1565		if (copyout((void *)&pages_reclaimed,
1566			    uap->pages_reclaimed,
1567			    sizeof (pages_reclaimed)) != 0) {
1568			return EFAULT;
1569		}
1570	}
1571
1572	*retval = (int) pages_wanted;
1573	return 0;
1574}
1575
1576int
1577kas_info(struct proc *p,
1578			  struct kas_info_args *uap,
1579			  int *retval __unused)
1580{
1581#ifdef SECURE_KERNEL
1582	(void)p;
1583	(void)uap;
1584	return ENOTSUP;
1585#else /* !SECURE_KERNEL */
1586	int			selector = uap->selector;
1587	user_addr_t	valuep = uap->value;
1588	user_addr_t	sizep = uap->size;
1589	user_size_t size;
1590	int			error;
1591
1592	if (!kauth_cred_issuser(kauth_cred_get())) {
1593		return EPERM;
1594	}
1595
1596#if CONFIG_MACF
1597	error = mac_system_check_kas_info(kauth_cred_get(), selector);
1598	if (error) {
1599		return error;
1600	}
1601#endif
1602
1603	if (IS_64BIT_PROCESS(p)) {
1604		user64_size_t size64;
1605		error = copyin(sizep, &size64, sizeof(size64));
1606		size = (user_size_t)size64;
1607	} else {
1608		user32_size_t size32;
1609		error = copyin(sizep, &size32, sizeof(size32));
1610		size = (user_size_t)size32;
1611	}
1612	if (error) {
1613		return error;
1614	}
1615
1616	switch (selector) {
1617		case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1618			{
1619				uint64_t slide = vm_kernel_slide;
1620
1621				if (sizeof(slide) != size) {
1622					return EINVAL;
1623				}
1624
1625				if (IS_64BIT_PROCESS(p)) {
1626					user64_size_t size64 = (user64_size_t)size;
1627					error = copyout(&size64, sizep, sizeof(size64));
1628				} else {
1629					user32_size_t size32 = (user32_size_t)size;
1630					error = copyout(&size32, sizep, sizeof(size32));
1631				}
1632				if (error) {
1633					return error;
1634				}
1635
1636				error = copyout(&slide, valuep, sizeof(slide));
1637				if (error) {
1638					return error;
1639				}
1640			}
1641			break;
1642		default:
1643			return EINVAL;
1644	}
1645
1646	return 0;
1647#endif /* !SECURE_KERNEL */
1648}
1649